hyperstreamdb 0.1.8__tar.gz → 0.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/.gitignore +1 -0
  2. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/Cargo.lock +3 -1
  3. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/Cargo.toml +4 -2
  4. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/PKG-INFO +24 -11
  5. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/README.md +18 -4
  6. hyperstreamdb-0.1.11/benchmark_results/BENCHMARK_REPORT.md +50 -0
  7. hyperstreamdb-0.1.11/benchmark_results/benchmark_results.csv +24 -0
  8. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/build.rs +1 -0
  9. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/COMPREHENSIVE_GUIDE.md +45 -9
  10. hyperstreamdb-0.1.11/docs/CONCURRENCY.md +38 -0
  11. hyperstreamdb-0.1.11/docs/CONFIGURATION.md +51 -0
  12. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/GPU_SETUP_GUIDE.md +12 -14
  13. hyperstreamdb-0.1.11/docs/INSTALLATION.md +551 -0
  14. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/PYTHON_VECTOR_API.md +10 -10
  15. hyperstreamdb-0.1.11/docs/catalog_usage.md +102 -0
  16. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/integrations/java_jni.md +3 -2
  17. hyperstreamdb-0.1.11/docs/requirements.txt +9 -0
  18. hyperstreamdb-0.1.11/docs/source/api/python.rst +40 -0
  19. hyperstreamdb-0.1.11/docs/source/api/rust.rst +11 -0
  20. hyperstreamdb-0.1.11/docs/source/conf.py +73 -0
  21. hyperstreamdb-0.1.11/docs/source/index.rst +50 -0
  22. hyperstreamdb-0.1.11/docs/source/roadmap.md +14 -0
  23. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/pyproject.toml +8 -11
  24. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/python/hyperstreamdb/__init__.py +195 -46
  25. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/spark-hyperstream/pom.xml +1 -0
  26. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/cache.rs +13 -6
  27. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/gpu.rs +5 -31
  28. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/manifest.rs +29 -29
  29. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/planner.rs +9 -9
  30. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/query.rs +21 -1
  31. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/reader.rs +57 -5
  32. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/segment.rs +33 -14
  33. hyperstreamdb-0.1.11/src/core/sql/pgvector_rewriter.rs +187 -0
  34. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/sql/session.rs +24 -17
  35. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/sql/vector_udf.rs +13 -3
  36. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/table.rs +198 -25
  37. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/wal.rs +18 -15
  38. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/lib.rs +7 -2
  39. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/python_binding.rs +282 -104
  40. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/python_distance.rs +84 -84
  41. hyperstreamdb-0.1.11/src/python_gpu_context.rs +164 -0
  42. hyperstreamdb-0.1.11/tests/check_mmh3.py +16 -0
  43. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/datafusion_rust_test.rs +5 -5
  44. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/integration_test_hnsw_ivf_native.rs +1 -1
  45. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-hyperstream/pom.xml +1 -0
  46. hyperstreamdb-0.1.8/benchmark_results/BENCHMARK_REPORT.md +0 -35
  47. hyperstreamdb-0.1.8/benchmark_results/benchmark_results.csv +0 -9
  48. hyperstreamdb-0.1.8/docs/catalog_usage.md +0 -107
  49. hyperstreamdb-0.1.8/src/core/sql/pgvector_rewriter.rs +0 -150
  50. hyperstreamdb-0.1.8/src/python_gpu_context.rs +0 -395
  51. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/.gitattributes +0 -0
  52. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/.hypothesis/constants/32b327793848e7d8 +0 -0
  53. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/.hypothesis/constants/67b0a8ccf18bf5d2 +0 -0
  54. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/.hypothesis/constants/84828557b4ee7be4 +0 -0
  55. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/.instructions.md +0 -0
  56. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/DORIS_OPTIMIZATION_PATTERNS.md +0 -0
  57. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/LICENSE-APACHE +0 -0
  58. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/LICENSE-MIT +0 -0
  59. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/RUN_COMPLIANCE_TESTS.sh +0 -0
  60. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/STEERING.md +0 -0
  61. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/THIRDPARTY_NOTICES.md +0 -0
  62. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/benches/bench_table.rs +0 -0
  63. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/benches/performance.rs +0 -0
  64. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/benchmark_results/benchmark_charts.png +0 -0
  65. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/book.toml +0 -0
  66. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/build-connectors.sh +0 -0
  67. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/build_out.txt +0 -0
  68. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/check_iceberg_compliance.py +0 -0
  69. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/clippy_output.txt +0 -0
  70. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docker-compose-minio-nessie.yml +0 -0
  71. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docker-compose.yml +0 -0
  72. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/BENCHMARKING.md +0 -0
  73. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/ICEBERG_V2_V3_API.md +0 -0
  74. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/PGVECTOR_SQL_GUIDE.md +0 -0
  75. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/VECTOR_CONFIGURATION.md +0 -0
  76. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/api_reference.md +0 -0
  77. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/architecture.md +0 -0
  78. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/index.md +0 -0
  79. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/integrations/README.md +0 -0
  80. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/integrations/python.md +0 -0
  81. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/integrations/spark.md +0 -0
  82. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/docs/integrations/trino.md +0 -0
  83. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/fix_cache.patch +0 -0
  84. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/fix_nb.py +0 -0
  85. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/fix_schema.patch +0 -0
  86. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/proptest-regressions/core/index/gpu.txt +0 -0
  87. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/proptest-regressions/core/sql/vector_literal.txt +0 -0
  88. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/proptest-regressions/core/sql/vector_udf.txt +0 -0
  89. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/python/hyperstreamdb/embeddings.py +0 -0
  90. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/spark-hyperstream/.bloop/bloop.settings.json +0 -0
  91. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/spark-hyperstream/.bloop/spark-hyperstream-test.json +0 -0
  92. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/spark-hyperstream/.bloop/spark-hyperstream.json +0 -0
  93. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/DefaultSource.java +0 -0
  94. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartition.java +0 -0
  95. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReader.java +0 -0
  96. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReaderFactory.java +0 -0
  97. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamScanBuilder.java +0 -0
  98. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamTable.java +0 -0
  99. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/bin/gateway.rs +0 -0
  100. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/bin/hdb.rs +0 -0
  101. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/bin/iceberg_rest.rs +0 -0
  102. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/bin/probe_datafusion.rs +0 -0
  103. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/bin/setup_test_data.rs +0 -0
  104. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/bin/verify_layered_indexing.rs +0 -0
  105. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/catalog/config.rs +0 -0
  106. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/catalog/glue.rs +0 -0
  107. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/catalog/hive.rs +0 -0
  108. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/catalog/jdbc.rs +0 -0
  109. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/catalog/mod.rs +0 -0
  110. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/catalog/nessie.rs +0 -0
  111. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/catalog/rest.rs +0 -0
  112. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/catalog/unity.rs +0 -0
  113. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/clustering.rs +0 -0
  114. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/compaction.rs +0 -0
  115. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/embeddings.rs +0 -0
  116. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/ffi.rs +0 -0
  117. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/iceberg/iceberg_delete.rs +0 -0
  118. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/iceberg.rs +0 -0
  119. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/cuda/cosine_distance.cu +0 -0
  120. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/cuda/hamming_distance.cu +0 -0
  121. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/cuda/inner_product.cu +0 -0
  122. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/cuda/jaccard_distance.cu +0 -0
  123. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/cuda/kmeans_assignment.cu +0 -0
  124. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/cuda/l1_distance.cu +0 -0
  125. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/cuda/l2_distance.cu +0 -0
  126. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/distance.rs +0 -0
  127. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/hnsw_ivf.rs +0 -0
  128. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/hnsw_rs/annhdf5.rs +0 -0
  129. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/hnsw_rs/api.rs +0 -0
  130. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/hnsw_rs/dist.rs +0 -0
  131. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/hnsw_rs/flatten.rs +0 -0
  132. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/hnsw_rs/hnsw.rs +0 -0
  133. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/hnsw_rs/hnswio.rs +0 -0
  134. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/hnsw_rs/libext.rs +0 -0
  135. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/hnsw_rs/mod.rs +0 -0
  136. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/hnsw_rs/prelude.rs +0 -0
  137. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/hnsw_rs/test.rs +0 -0
  138. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/ivf.rs +0 -0
  139. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/memory.rs +0 -0
  140. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/mod.rs +0 -0
  141. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/mps/cosine_distance.metal +0 -0
  142. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/mps/hamming_distance.metal +0 -0
  143. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/mps/inner_product.metal +0 -0
  144. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/mps/jaccard_distance.metal +0 -0
  145. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/mps/kmeans_assignment.metal +0 -0
  146. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/mps/l1_distance.metal +0 -0
  147. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/mps/l2_distance.metal +0 -0
  148. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/opencl/cosine_distance.cl +0 -0
  149. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/opencl/hamming_distance.cl +0 -0
  150. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/opencl/inner_product.cl +0 -0
  151. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/opencl/jaccard_distance.cl +0 -0
  152. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/opencl/kmeans_assignment.cl +0 -0
  153. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/opencl/l1_distance.cl +0 -0
  154. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/opencl/l2_distance.cl +0 -0
  155. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/pq.rs +0 -0
  156. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/index/tokenizer.rs +0 -0
  157. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/maintenance.rs +0 -0
  158. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/merge.rs +0 -0
  159. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/metadata.rs +0 -0
  160. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/mod.rs +0 -0
  161. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/nessie.rs +0 -0
  162. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/puffin.rs +0 -0
  163. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/sql/mod.rs +0 -0
  164. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/sql/optimizer.rs +0 -0
  165. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/sql/physical_plan/index_join.rs +0 -0
  166. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/sql/physical_plan.rs +0 -0
  167. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/sql/vector_literal.rs +0 -0
  168. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/sql/vector_operators.rs +0 -0
  169. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/storage.rs +0 -0
  170. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/core/table.rs.orig +0 -0
  171. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/enterprise/continuous_indexing.rs +0 -0
  172. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/enterprise/license.rs +0 -0
  173. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/enterprise/mod.rs +0 -0
  174. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/index.rs.old +0 -0
  175. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/telemetry/metrics.rs +0 -0
  176. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/telemetry/mod.rs +0 -0
  177. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/src/telemetry/tracing.rs +0 -0
  178. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/task.md +0 -0
  179. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/all_types_index_test.rs +0 -0
  180. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/bin/generate_iceberg_manifests.rs +0 -0
  181. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/bin/verify_iceberg_read_check.rs +0 -0
  182. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/data/download_nyc_taxi.sh +0 -0
  183. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/data/generate_embeddings.py +0 -0
  184. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/data/generate_wikipedia.py +0 -0
  185. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/data/start_nessie.sh +0 -0
  186. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/debug_murmur3.rs +0 -0
  187. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/fuzz_murmur3.rs +0 -0
  188. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/performance/README.md +0 -0
  189. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/prototype_merge.py +0 -0
  190. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/schema_evolution_test.rs +0 -0
  191. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_catalog_commit.rs +0 -0
  192. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_compliance.rs +0 -0
  193. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_delete_correctness.rs +0 -0
  194. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_iceberg_python_delete.sh +0 -0
  195. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_iceberg_rest.sh +0 -0
  196. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_iceberg_rest_create.sh +0 -0
  197. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_iceberg_rest_delete.sh +0 -0
  198. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_iceberg_rest_remove_index.sh +0 -0
  199. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_iceberg_rest_update.sh +0 -0
  200. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_metadata_creation.rs +0 -0
  201. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_mor_reads.rs +0 -0
  202. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_mor_writes.rs +0 -0
  203. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_partition_transforms.rs +0 -0
  204. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_partitioned_writes.rs +0 -0
  205. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_puffin_index.sh +0 -0
  206. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_rest_updates.sh +0 -0
  207. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/tests/verify_schema_compat.rs +0 -0
  208. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-config/.DS_Store +0 -0
  209. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-config/catalog/glue_catalog.properties +0 -0
  210. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-config/catalog/hyperstreamdb.properties +0 -0
  211. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-config/catalog/iceberg.properties +0 -0
  212. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-config/catalog/memory.properties +0 -0
  213. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-config/catalog/postgres.properties +0 -0
  214. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-config/config.properties +0 -0
  215. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-config/entrypoint.sh +0 -0
  216. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-config/jvm.config +0 -0
  217. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-config/node.properties +0 -0
  218. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-config.zip +0 -0
  219. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBColumnHandle.java +0 -0
  220. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBConnectorFactory.java +0 -0
  221. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBMetadata.java +0 -0
  222. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSource.java +0 -0
  223. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSourceProvider.java +0 -0
  224. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPlugin.java +0 -0
  225. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplit.java +0 -0
  226. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplitManager.java +0 -0
  227. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBTableHandle.java +0 -0
  228. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/update_schema_patch.py +0 -0
  229. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/update_schema_patch2.py +0 -0
  230. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/verify_docstrings.py +0 -0
  231. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/verify_fluent_api.py +0 -0
  232. {hyperstreamdb-0.1.8 → hyperstreamdb-0.1.11}/verify_unified_ingest.py +0 -0
@@ -103,3 +103,4 @@ groq_api_key.txt
103
103
  rag_db/
104
104
  news_db/
105
105
 
106
+ .hypothesis/
@@ -3755,7 +3755,7 @@ dependencies = [
3755
3755
 
3756
3756
  [[package]]
3757
3757
  name = "hyperstreamdb"
3758
- version = "0.1.8"
3758
+ version = "0.1.11"
3759
3759
  dependencies = [
3760
3760
  "ahash 0.8.12",
3761
3761
  "anyhow",
@@ -3782,6 +3782,8 @@ dependencies = [
3782
3782
  "cust",
3783
3783
  "datafusion",
3784
3784
  "datafusion-expr-common",
3785
+ "datafusion-functions",
3786
+ "datafusion-functions-aggregate",
3785
3787
  "datafusion-functions-aggregate-common",
3786
3788
  "dirs",
3787
3789
  "env_logger",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "hyperstreamdb"
3
- version = "0.1.8"
3
+ version = "0.1.11"
4
4
  edition = "2021"
5
5
  license = "MIT AND Apache-2.0"
6
6
  description = "HyperStreamDB - Serverless Index-Streaming Database with Overlay Indexing and Vector Search"
@@ -114,6 +114,8 @@ once_cell = "1.19"
114
114
  async-trait = "0.1.89"
115
115
  datafusion = "52.0.0"
116
116
  datafusion-expr-common = "52.0.0"
117
+ datafusion-functions = "52.0.0"
118
+ datafusion-functions-aggregate = "52.0.0"
117
119
  datafusion-functions-aggregate-common = "52.0.0"
118
120
  async-stream = "0.3.6"
119
121
  smartcore = "0.3" # For k-means clustering (IVF index)
@@ -158,7 +160,7 @@ features = ["invocation"]
158
160
  [dependencies.pyo3]
159
161
  version = "0.26.0"
160
162
  optional = true
161
- features = ["extension-module"]
163
+ features = ["extension-module", "abi3-py310"]
162
164
 
163
165
  [dependencies.numpy]
164
166
  version = "0.26.0"
@@ -1,15 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hyperstreamdb
3
- Version: 0.1.8
3
+ Version: 0.1.11
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: Programming Language :: Rust
7
7
  Classifier: Programming Language :: Python :: 3
8
- Classifier: Programming Language :: Python :: 3.8
9
- Classifier: Programming Language :: Python :: 3.9
10
8
  Classifier: Programming Language :: Python :: 3.10
11
9
  Classifier: Programming Language :: Python :: 3.11
12
10
  Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Classifier: Programming Language :: Python :: 3.14
13
13
  Classifier: Programming Language :: Python :: Implementation :: CPython
14
14
  Classifier: Programming Language :: Python :: Implementation :: PyPy
15
15
  Classifier: Topic :: Database
@@ -19,7 +19,7 @@ Requires-Dist: numpy>=1.20.0
19
19
  Requires-Dist: mps ; extra == 'all-gpu'
20
20
  Requires-Dist: cuda ; extra == 'all-gpu'
21
21
  Requires-Dist: rocm ; extra == 'all-gpu'
22
- Requires-Dist: intel-gpu ; extra == 'all-gpu'
22
+ Requires-Dist: intel ; extra == 'all-gpu'
23
23
  Requires-Dist: pytest>=7.0 ; extra == 'dev'
24
24
  Requires-Dist: pytest-asyncio>=0.21 ; extra == 'dev'
25
25
  Requires-Dist: maturin>=1.7 ; extra == 'dev'
@@ -27,8 +27,7 @@ Requires-Dist: maturin>=1.7 ; extra == 'src'
27
27
  Provides-Extra: all_gpu
28
28
  Provides-Extra: cuda
29
29
  Provides-Extra: dev
30
- Provides-Extra: intel_cpu
31
- Provides-Extra: intel_gpu
30
+ Provides-Extra: intel
32
31
  Provides-Extra: mps
33
32
  Provides-Extra: rocm
34
33
  Provides-Extra: src
@@ -39,7 +38,7 @@ Keywords: database,vector,search,indexing,parquet,iceberg
39
38
  Home-Page: https://github.com/rla3rd/hyperstreamdb
40
39
  Author: HyperStream Team
41
40
  License: MIT AND Apache-2.0
42
- Requires-Python: >=3.8
41
+ Requires-Python: >=3.10
43
42
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
44
43
  Project-URL: Homepage, https://github.com/rla3rd/hyperstreamdb
45
44
  Project-URL: Repository, https://github.com/rla3rd/hyperstreamdb
@@ -125,6 +124,10 @@ maturin develop
125
124
 
126
125
  # Or install from PyPI (coming soon)
127
126
  pip install hyperstreamdb
127
+
128
+ # Windows Users
129
+ # HyperStreamDB is optimized for Linux/POSIX environments.
130
+ # Windows users should use WSL2 (Windows Subsystem for Linux).
128
131
  ```
129
132
 
130
133
  ### GPU Acceleration (Optional)
@@ -342,10 +345,10 @@ distance = hdb.hamming_distance_packed(binary1, binary2)
342
345
  ```
343
346
 
344
347
  **Supported GPU Backends:**
345
- - **CUDA** - NVIDIA GPUs (Linux, Windows)
348
+ - **CUDA** - NVIDIA GPUs (Linux, Windows via WSL2)
346
349
  - **ROCm** - AMD GPUs (Linux)
347
350
  - **Metal (MPS)** - Apple Silicon (macOS)
348
- - **OpenCL** - Intel GPUs (Linux, Windows)
351
+ - **OpenCL** - Intel GPUs (Linux, Windows via WSL2)
349
352
  - **CPU** - Fallback for all platforms
350
353
 
351
354
  **Supported Distance Metrics:**
@@ -416,18 +419,28 @@ python tests/integration/test_nyc_taxi.py
416
419
  ```
417
420
 
418
421
  **Performance Targets:**
419
- - **Scalar Ingest**: >100K rows/sec ✅
422
+ - **Scalar Ingest**: >10K rows/sec ✅
420
423
  - **Vector Ingest (768D)**: >4,000 rows/sec ✅ (April 2026)
421
424
  - **Query (indexed)**: <100ms p99 ⏱️
422
425
  - **Vector search**: <50ms for k=10 on 10M vectors ⏱️
423
426
  - **Compaction**: <5min for 10GB ⏱️
424
427
 
425
- **Benchmarking Environment:**
428
+ **Benchmarking Environment: Lenovo T480**
426
429
  - **System**: Lenovo T480
427
430
  - **CPU**: Intel(R) Core(TM) i5-8350U CPU @ 1.70GHz
428
431
  - **RAM**: 64GB
429
432
  - **OS**: Linux
430
433
 
434
+ **Benchmarking Environment: Apple M4 Max**
435
+ - **System**: MacBook Pro (M4 Max, 16-core CPU, 40-core GPU)
436
+ - **Memory**: 128GB Unified Memory
437
+ - **OS**: macOS (Arm64)
438
+ - **Optimizations**: `target-cpu=native` (NEON SIMD)
439
+ - **Results (100K vectors, 768D)**:
440
+ - **Vector Ingest**: 16,707 rows/sec (CPU) ✅
441
+ - **Vector Search (k=10)**: 819ms (CPU / NEON) ✅
442
+ - **Vector Search (k=10)**: 860ms (MPS GPU) ⏱️
443
+
431
444
  ### Phase 2: Nessie Integration (Next)
432
445
 
433
446
  **Catalog Strategy:**
@@ -79,6 +79,10 @@ maturin develop
79
79
 
80
80
  # Or install from PyPI (coming soon)
81
81
  pip install hyperstreamdb
82
+
83
+ # Windows Users
84
+ # HyperStreamDB is optimized for Linux/POSIX environments.
85
+ # Windows users should use WSL2 (Windows Subsystem for Linux).
82
86
  ```
83
87
 
84
88
  ### GPU Acceleration (Optional)
@@ -296,10 +300,10 @@ distance = hdb.hamming_distance_packed(binary1, binary2)
296
300
  ```
297
301
 
298
302
  **Supported GPU Backends:**
299
- - **CUDA** - NVIDIA GPUs (Linux, Windows)
303
+ - **CUDA** - NVIDIA GPUs (Linux, Windows via WSL2)
300
304
  - **ROCm** - AMD GPUs (Linux)
301
305
  - **Metal (MPS)** - Apple Silicon (macOS)
302
- - **OpenCL** - Intel GPUs (Linux, Windows)
306
+ - **OpenCL** - Intel GPUs (Linux, Windows via WSL2)
303
307
  - **CPU** - Fallback for all platforms
304
308
 
305
309
  **Supported Distance Metrics:**
@@ -370,18 +374,28 @@ python tests/integration/test_nyc_taxi.py
370
374
  ```
371
375
 
372
376
  **Performance Targets:**
373
- - **Scalar Ingest**: >100K rows/sec ✅
377
+ - **Scalar Ingest**: >10K rows/sec ✅
374
378
  - **Vector Ingest (768D)**: >4,000 rows/sec ✅ (April 2026)
375
379
  - **Query (indexed)**: <100ms p99 ⏱️
376
380
  - **Vector search**: <50ms for k=10 on 10M vectors ⏱️
377
381
  - **Compaction**: <5min for 10GB ⏱️
378
382
 
379
- **Benchmarking Environment:**
383
+ **Benchmarking Environment: Lenovo T480**
380
384
  - **System**: Lenovo T480
381
385
  - **CPU**: Intel(R) Core(TM) i5-8350U CPU @ 1.70GHz
382
386
  - **RAM**: 64GB
383
387
  - **OS**: Linux
384
388
 
389
+ **Benchmarking Environment: Apple M4 Max**
390
+ - **System**: MacBook Pro (M4 Max, 16-core CPU, 40-core GPU)
391
+ - **Memory**: 128GB Unified Memory
392
+ - **OS**: macOS (Arm64)
393
+ - **Optimizations**: `target-cpu=native` (NEON SIMD)
394
+ - **Results (100K vectors, 768D)**:
395
+ - **Vector Ingest**: 16,707 rows/sec (CPU) ✅
396
+ - **Vector Search (k=10)**: 819ms (CPU / NEON) ✅
397
+ - **Vector Search (k=10)**: 860ms (MPS GPU) ⏱️
398
+
385
399
  ### Phase 2: Nessie Integration (Next)
386
400
 
387
401
  **Catalog Strategy:**
@@ -0,0 +1,50 @@
1
+ # Competitive Benchmark Report - HyperStreamDB
2
+
3
+ **Generated:** 2026-04-04 13:56:56.467083
4
+
5
+ ## Ingest Performance
6
+
7
+ | System | Operation | Dataset Size | Latency (ms) | Throughput (rows/sec) | Storage (MB) | Hardware | Device |
8
+ |:---------------------|:------------|---------------:|---------------:|------------------------:|---------------:|:-------------------------------------|:---------|
9
+ | HyperStreamDB | ingest | 1000 | 268.945 | 3718.23 | 7.18574 | Generic Baseline | cpu |
10
+ | DuckDB (Raw Parquet) | ingest | 1000 | 70.8115 | 14122 | 3.55513 | Generic Baseline | cpu |
11
+ | LanceDB | ingest | 1000 | 28.9528 | 34538.9 | 2.98259 | Generic Baseline | cpu |
12
+ | HyperStreamDB | ingest | 1000 | 801.391 | 1247.83 | 7.11231 | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
13
+ | HyperStreamDB | ingest | 1000 | 818.65 | 1221.52 | 7.13006 | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
14
+ | HyperStreamDB | ingest | 1000 | 820.246 | 1219.15 | 7.11529 | AMD Ryzen 9 5900XT 16-Core Processor | cuda:0 |
15
+ | HyperStreamDB | ingest | 1000 | 290.842 | 3438.29 | 7.1134 | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
16
+ | HyperStreamDB | ingest | 1000 | 412.61 | 2423.59 | 7.12722 | AMD Ryzen 9 5900XT 16-Core Processor | cuda:0 |
17
+
18
+ ## Vector Search Performance
19
+
20
+ | System | Operation | Dataset Size | Latency (ms) | Throughput (rows/sec) | Storage (MB) | Hardware | Device |
21
+ |:--------------|:-------------------|---------------:|---------------:|------------------------:|---------------:|:-------------------------------------|:---------|
22
+ | HyperStreamDB | vector_search_k10 | 1000 | 28.3022 | nan | nan | Generic Baseline | cpu |
23
+ | LanceDB | vector_search_k10 | 1000 | 6.41737 | nan | nan | Generic Baseline | cpu |
24
+ | HyperStreamDB | vector_search_k10 | 1000 | 109.424 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
25
+ | HyperStreamDB | vector_search_k10 | 1000 | 109.012 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
26
+ | HyperStreamDB | vector_search_k10 | 1000 | 108.445 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cuda:0 |
27
+ | HyperStreamDB | vector_search_k10 | 1000 | 16.1742 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
28
+ | HyperStreamDB | vector_search_k10 | 1000 | 14.6868 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cuda:0 |
29
+ | HyperStreamDB | vector_search_k100 | 1000 | 25.9826 | nan | nan | Generic Baseline | cpu |
30
+ | LanceDB | vector_search_k100 | 1000 | 7.24833 | nan | nan | Generic Baseline | cpu |
31
+ | HyperStreamDB | vector_search_k100 | 1000 | 148.144 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
32
+ | HyperStreamDB | vector_search_k100 | 1000 | 148.415 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
33
+ | HyperStreamDB | vector_search_k100 | 1000 | 152.582 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cuda:0 |
34
+ | HyperStreamDB | vector_search_k100 | 1000 | 19.9033 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
35
+ | HyperStreamDB | vector_search_k100 | 1000 | 16.9979 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cuda:0 |
36
+
37
+ ## Hybrid Query Performance
38
+
39
+ ## Key Findings
40
+
41
+ ### HyperStreamDB Advantages
42
+
43
+ 1. **Native Hybrid Queries**: Only system with scalar + vector in single query
44
+ 2. **Iceberg Compatibility**: Standard data lake format
45
+ 3. **Multi-Catalog Support**: Hive, Glue, Unity, REST, Nessie
46
+ 4. **100% Iceberg v3 Compliance**: All required features implemented
47
+
48
+ ### Competitive Position
49
+
50
+ - Vector search: 10.0x slower than LanceDB
@@ -0,0 +1,24 @@
1
+ System,Operation,Dataset Size,Latency (ms),Throughput (rows/sec),Storage (MB),Hardware,Device
2
+ HyperStreamDB,ingest,1000,268.94545555114746,3718.2260542410327,7.185737609863281,Generic Baseline,cpu
3
+ DuckDB (Raw Parquet),ingest,1000,70.81151008605957,14121.997946162524,3.555130958557129,Generic Baseline,cpu
4
+ LanceDB,ingest,1000,28.952836990356445,34538.929650765414,2.9825878143310547,Generic Baseline,cpu
5
+ HyperStreamDB,vector_search_k10,1000,28.3022403717041,,,Generic Baseline,cpu
6
+ LanceDB,vector_search_k10,1000,6.417369842529297,,,Generic Baseline,cpu
7
+ HyperStreamDB,vector_search_k100,1000,25.98259449005127,,,Generic Baseline,cpu
8
+ LanceDB,vector_search_k100,1000,7.248330116271973,,,Generic Baseline,cpu
9
+ DuckDB,scalar_query,1000,3.293037414550781,,,Generic Baseline,cpu
10
+ HyperStreamDB,ingest,1000,801.3906478881836,1247.8308832717098,7.112313270568848,AMD Ryzen 9 5900XT 16-Core Processor,cpu
11
+ HyperStreamDB,vector_search_k10,1000,109.423828125,,,AMD Ryzen 9 5900XT 16-Core Processor,cpu
12
+ HyperStreamDB,vector_search_k100,1000,148.14441204071045,,,AMD Ryzen 9 5900XT 16-Core Processor,cpu
13
+ HyperStreamDB,ingest,1000,818.6497688293457,1221.5235844138597,7.130064010620117,AMD Ryzen 9 5900XT 16-Core Processor,cpu
14
+ HyperStreamDB,vector_search_k10,1000,109.01196002960204,,,AMD Ryzen 9 5900XT 16-Core Processor,cpu
15
+ HyperStreamDB,vector_search_k100,1000,148.41535091400146,,,AMD Ryzen 9 5900XT 16-Core Processor,cpu
16
+ HyperStreamDB,ingest,1000,820.2464580535889,1219.1457703736496,7.115290641784668,AMD Ryzen 9 5900XT 16-Core Processor,cuda:0
17
+ HyperStreamDB,vector_search_k10,1000,108.44509601593018,,,AMD Ryzen 9 5900XT 16-Core Processor,cuda:0
18
+ HyperStreamDB,vector_search_k100,1000,152.58185863494873,,,AMD Ryzen 9 5900XT 16-Core Processor,cuda:0
19
+ HyperStreamDB,ingest,1000,290.84205627441406,3438.2922910450206,7.113402366638184,AMD Ryzen 9 5900XT 16-Core Processor,cpu
20
+ HyperStreamDB,vector_search_k10,1000,16.17424488067627,,,AMD Ryzen 9 5900XT 16-Core Processor,cpu
21
+ HyperStreamDB,vector_search_k100,1000,19.90334987640381,,,AMD Ryzen 9 5900XT 16-Core Processor,cpu
22
+ HyperStreamDB,ingest,1000,412.6102924346924,2423.5944142335693,7.127219200134277,AMD Ryzen 9 5900XT 16-Core Processor,cuda:0
23
+ HyperStreamDB,vector_search_k10,1000,14.686751365661621,,,AMD Ryzen 9 5900XT 16-Core Processor,cuda:0
24
+ HyperStreamDB,vector_search_k100,1000,16.997885704040527,,,AMD Ryzen 9 5900XT 16-Core Processor,cuda:0
@@ -36,6 +36,7 @@ fn main() {
36
36
  "l1_distance",
37
37
  "hamming_distance",
38
38
  "jaccard_distance",
39
+ "kmeans_assignment",
39
40
  ];
40
41
 
41
42
  // Compile each kernel
@@ -1,6 +1,6 @@
1
1
  # HyperStreamDB Comprehensive Guide
2
2
 
3
- **Version:** 0.1.8 (Alpha)
3
+ **Version:** 0.1.10 (Alpha)
4
4
  **Last Updated:** 2026-04-03
5
5
 
6
6
  HyperStreamDB is a serverless, hybrid-search database optimized for high-performance vector and scalar queries directly on data lakes (S3, GCS, Azure, Local).
@@ -145,20 +145,56 @@ See [pgvector SQL Guide](PGVECTOR_SQL_GUIDE.md) for complete documentation.
145
145
 
146
146
  ### 3.2 Hardware Acceleration
147
147
  The indexing engine supports hardware acceleration for multiple backends:
148
- * **CUDA**: NVIDIA GPUs (Linux/Windows)
148
+ * **CUDA**: NVIDIA GPUs (Linux, Windows via WSL2)
149
149
  * **Metal**: Apple Silicon (MPS)
150
150
  * **ROCm**: AMD GPUs
151
151
  * **Intel**: AVX-512 optimizations
152
152
 
153
153
  Enable via `Cargo.toml` features or environment detection.
154
154
 
155
- ### 3.3 Multi-Catalog Support
156
- HyperStreamDB supports enterprise catalog integrations:
157
- * **Nessie**: Git-like versioning for data.
158
- * **Unity Catalog**: Databricks integration.
159
- * **AWS Glue**: Native AWS metadata.
160
- * **Hive Metastore**: Legacy Hadoop compatibility.
161
- * **REST**: Iceberg-compatible REST catalog.
155
+ ## 3.3 Multi-Catalog Support
156
+
157
+ HyperStreamDB is designed to integrate seamlessly with standard data catalogs to provide discovery, cross-table atomicity, and consistent metadata across the enterprise. We support a variety of industry-standard protocols.
158
+
159
+ Below is a detailed example using the Hive Metastore, followed by short-form examples for other supported catalogs. Full integration guides for each will be provided in future updates.
160
+
161
+ ### Hive Metastore (Detailed Example)
162
+
163
+ Connecting to a Hive Metastore allows you to resolve table names to storage locations automatically.
164
+
165
+ ```python
166
+ import hyperstreamdb as hdb
167
+
168
+ # Load a table from Hive Metastore
169
+ table = hdb.Table.from_hive(
170
+ address="thrift://localhost:9083",
171
+ namespace="default",
172
+ table="my_analytics_table"
173
+ )
174
+
175
+ # Any writes will now be atomically committed back to Hive
176
+ df = table.to_pandas(filter="status = 'active'")
177
+ ```
178
+
179
+ ### AWS Glue, Nessie, and REST Catalogs
180
+
181
+ HyperStreamDB also provides native support for cloud-modern catalogs. These can be configured similarly to the Hive example:
182
+
183
+ ```python
184
+ # AWS Glue (Native AWS Integration)
185
+ table = hdb.Table.from_glue(namespace="prod", table="users")
186
+
187
+ # Project Nessie (Git-like Versioning)
188
+ table = hdb.Table.from_nessie(nessie_url, namespace="dev", table="experiments")
189
+
190
+ # Iceberg REST Catalog (Standard API)
191
+ table = hdb.Table.from_rest(rest_url, namespace="marketing", table="campaigns")
192
+
193
+ # Unity Catalog (Databricks Ecosystem)
194
+ table = hdb.Table.from_unity(unity_url, namespace="main", table="gold_data")
195
+ ```
196
+
197
+ For more details on advanced configurations and authentication (Kerberos, SASL, IAM), see the [Configuration Guide](./CONFIGURATION.md) or the [Catalog Usage Guide](./catalog_usage.md).
162
198
 
163
199
  ---
164
200
 
@@ -0,0 +1,38 @@
1
+ # Concurrency and Atomic Commits
2
+
3
+ HyperStreamDB is designed for high-concurrency environments where multiple clients may be reading from and writing to the same table simultaneously.
4
+
5
+ ## Optimistic Concurrency Control (OCC)
6
+
7
+ HyperStreamDB employs **Optimistic Concurrency Control** to ensure ACID compliance without the need for heavyweight central locks in most cases.
8
+
9
+ ### Snapshot Versioning
10
+ Every table state is represented by a specific version of the manifest file (e.g., `_manifest/v100.json`). These files are immutable once written.
11
+
12
+ ### The Commit Protocol
13
+ When a client (writer) wants to commit changes:
14
+ 1. **Read Latest**: The client reads the current latest version (e.g., `v100`).
15
+ 2. **Prepare**: The client calculates the new state (`v101`) based on the changes (e.g., added or removed segments).
16
+ 3. **Atomic Swap**: The client attempts to write the new manifest file `v101.json` using an **atomic "create-if-not-exists"** primitive.
17
+
18
+ ### Conflict Resolution
19
+ If another client successfully committed `v101.json` while the first client was preparing its changes:
20
+ - The first client's write operation will fail with an `AlreadyExists` or conflict error.
21
+ - HyperStreamDB automatically **retries** the commit (up to 100 times).
22
+ - In each retry, the client re-reads the *new* latest version, merges its changes again, and attempts to commit the *next* version (e.g., `v102`).
23
+ - A randomized **exponential backoff** is used between retries to reduce contention.
24
+
25
+ ## Catalog-Level Locking
26
+
27
+ While OCC works perfectly on local file systems and some cloud storage providers (like Azure Blob or Google Cloud Storage with certain settings), some providers like **AWS S3** do not natively support atomic "create-if-not-exists" with strong consistency for all operations.
28
+
29
+ In these cases, HyperStreamDB leverages **Iceberg-compatible catalogs** to provide the necessary atomicity:
30
+
31
+ - **AWS Glue**: Uses the Glue Catalog's built-in versioning and optimistic locking.
32
+ - **Nessie**: Provides Git-like branching and merging with cross-table atomic commits.
33
+ - **Hive Metastore**: Uses a relational database backend (like PostgreSQL or MySQL) to provide transactionally safe updates to the `metadata_location` parameter.
34
+ - **REST Catalog**: Delegates atomicity to a centralized REST server (e.g., Tabular, Polaris).
35
+
36
+ ## Read Isolation
37
+
38
+ Readers in HyperStreamDB always see a **consistent snapshot** of the table. Once a reader loads a particular version (e.g., `v100`), it will continue to see that state even if newer versions are committed by other clients. This provides **Snapshot Isolation**, which is ideal for long-running analytical queries.
@@ -0,0 +1,51 @@
1
+ # Configuration Guide
2
+
3
+ HyperStreamDB is designed to be highly configurable through environment variables and a centralized configuration file.
4
+
5
+ ## Environment Variables
6
+
7
+ These variables control the core behavior of the system, including memory management, caching, and storage paths.
8
+
9
+ | Variable | Description | Default |
10
+ |----------|-------------|---------|
11
+ | `HYPERSTREAM_CACHE_GB` | Memory limit for the hybrid vector index (HNSW-IVF) in GB. | `2` |
12
+ | `HYPERSTREAM_BLOCK_CACHE_GB` | Memory limit for the decoded RecordBatch block cache in GB. | `4` |
13
+ | `HYPERSTREAM_DISK_CACHE_DIR` | Directory used for caching segmented index files on local disk. | `/tmp/hdb_cache` |
14
+ | `HYPERSTREAM_WAL_DIR` | Directory for the Write-Ahead Log (WAL) used for fault tolerance. | `{table_uri}/_wal` |
15
+ | `HYPERSTREAM_CONFIG` | Path to a centralized `hyperstream.toml` configuration file. | None |
16
+ | `JAEGER_ENABLED` | Enable distributed tracing via Jaeger (requires `opentelemetry` feature). | `false` |
17
+
18
+ ## The hyperstream.toml File
19
+
20
+ You can use a TOML file to manage complex configurations, especially for catalogs and multi-cloud storage.
21
+
22
+ HyperStreamDB looks for this file in the following order:
23
+ 1. Environment variable `HYPERSTREAM_CONFIG`
24
+ 2. `./hyperstream.toml` (current directory)
25
+ 3. `~/.hyperstream/config.toml`
26
+
27
+ ### Example Configuration
28
+
29
+ ```toml
30
+ [storage]
31
+ type = "s3"
32
+ bucket = "my-data-lake"
33
+ region = "us-east-1"
34
+
35
+ [cache]
36
+ memory_limit_gb = 8
37
+ disk_cache_enabled = true
38
+ disk_cache_path = "/mnt/fast-ssd/hdb_cache"
39
+
40
+ [catalog]
41
+ type = "nessie"
42
+ url = "http://nessie:19120/api/v2"
43
+ ref = "main"
44
+ ```
45
+
46
+ ## Storage Credentials
47
+
48
+ HyperStreamDB uses the standard `object-store` crate, which automatically picks up credentials from:
49
+ - **AWS**: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION`, or IAM Roles.
50
+ - **GCP**: `GOOGLE_APPLICATION_CREDENTIALS` (JSON key file path).
51
+ - **Azure**: `AZURE_STORAGE_ACCOUNT`, `AZURE_STORAGE_KEY`.
@@ -9,7 +9,7 @@ HyperStreamDB supports GPU acceleration for vector distance computations across
9
9
  - **NVIDIA CUDA** - For NVIDIA GPUs (GeForce, Quadro, Tesla)
10
10
  - **AMD ROCm** - For AMD Radeon GPUs
11
11
  - **Apple Metal (MPS)** - For Apple Silicon Macs
12
- - **Intel OpenCL** - For Intel integrated and discrete GPUs
12
+ - **Intel OpenCL** - For Intel integrated and discrete GPUs (Linux, WSL2)
13
13
 
14
14
  GPU acceleration provides 10x+ speedup for batch distance operations on large vector databases (100,000+ vectors).
15
15
 
@@ -69,15 +69,16 @@ nvidia-smi
69
69
  nvcc --version
70
70
  ```
71
71
 
72
- ### Installation on Windows
72
+ ### Installation on Windows (via WSL2)
73
73
 
74
- 1. Download CUDA Toolkit from [NVIDIA website](https://developer.nvidia.com/cuda-downloads)
75
- 2. Run the installer (cuda_12.3.0_windows.exe)
76
- 3. Follow the installation wizard
77
- 4. Verify installation:
78
- ```cmd
74
+ Windows users should use **WSL2** (Windows Subsystem for Linux) to run HyperStreamDB with GPU support.
75
+
76
+ 1. Install WSL2 and Ubuntu (e.g., `wsl --install -d Ubuntu-22.04`)
77
+ 2. Install NVIDIA Windows Driver (this provides the necessary kernel-mode interface for WSL2)
78
+ 3. Within the WSL2 Ubuntu environment, follow the **Linux installation** instructions above.
79
+ 4. Verify from within WSL:
80
+ ```bash
79
81
  nvidia-smi
80
- nvcc --version
81
82
  ```
82
83
 
83
84
  ### Verification
@@ -236,7 +237,7 @@ print(f"Computed {len(distances)} distances on Apple GPU")
236
237
  - **GPU**: Intel Iris Xe or newer (integrated or discrete)
237
238
  - Recommended: Arc A-series discrete GPUs
238
239
  - **Driver**: Intel Graphics Driver with OpenCL support
239
- - **OS**: Linux or Windows
240
+ - **OS**: Linux or WSL2 (Windows with WSL2)
240
241
 
241
242
  ### Supported GPUs
242
243
 
@@ -260,12 +261,9 @@ sudo apt-get install opencl-headers
260
261
  clinfo
261
262
  ```
262
263
 
263
- ### Installation on Windows
264
+ ### Installation on Windows (via WSL2)
264
265
 
265
- 1. Download latest Intel Graphics Driver from [Intel Download Center](https://www.intel.com/content/www/us/en/download-center/home.html)
266
- 2. Run the installer
267
- 3. OpenCL support is included in modern Intel drivers
268
- 4. Verify with `clinfo` (install from [GitHub](https://github.com/Oblomov/clinfo))
266
+ Windows users should install the Intel OpenCL runtime within their WSL2 distribution following the Linux installation steps above.
269
267
 
270
268
  ### Verification
271
269