hyperstreamdb 0.1.6__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/.gitignore +1 -0
  2. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/Cargo.lock +3 -1
  3. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/Cargo.toml +7 -5
  4. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/PKG-INFO +32 -11
  5. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/README.md +28 -7
  6. hyperstreamdb-0.1.10/benchmark_results/BENCHMARK_REPORT.md +50 -0
  7. hyperstreamdb-0.1.10/benchmark_results/benchmark_charts.png +0 -0
  8. hyperstreamdb-0.1.10/benchmark_results/benchmark_results.csv +24 -0
  9. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/build.rs +1 -0
  10. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/BENCHMARKING.md +27 -1
  11. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/COMPREHENSIVE_GUIDE.md +71 -22
  12. hyperstreamdb-0.1.10/docs/CONCURRENCY.md +38 -0
  13. hyperstreamdb-0.1.10/docs/CONFIGURATION.md +51 -0
  14. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/GPU_SETUP_GUIDE.md +12 -14
  15. hyperstreamdb-0.1.10/docs/INSTALLATION.md +551 -0
  16. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/PYTHON_VECTOR_API.md +10 -10
  17. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/architecture.md +7 -0
  18. hyperstreamdb-0.1.10/docs/catalog_usage.md +102 -0
  19. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/integrations/java_jni.md +3 -2
  20. hyperstreamdb-0.1.10/docs/requirements.txt +9 -0
  21. hyperstreamdb-0.1.10/docs/source/api/python.rst +40 -0
  22. hyperstreamdb-0.1.10/docs/source/api/rust.rst +11 -0
  23. hyperstreamdb-0.1.10/docs/source/conf.py +73 -0
  24. hyperstreamdb-0.1.10/docs/source/index.rst +50 -0
  25. hyperstreamdb-0.1.10/docs/source/roadmap.md +14 -0
  26. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/pyproject.toml +6 -6
  27. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/python/hyperstreamdb/__init__.py +195 -46
  28. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/spark-hyperstream/pom.xml +1 -0
  29. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/bin/iceberg_rest.rs +1 -0
  30. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/cache.rs +13 -6
  31. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/distance.rs +60 -15
  32. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/gpu.rs +118 -46
  33. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/hnsw_ivf.rs +62 -33
  34. hyperstreamdb-0.1.10/src/core/index/ivf.rs +283 -0
  35. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/pq.rs +7 -10
  36. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/manifest.rs +133 -107
  37. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/planner.rs +9 -9
  38. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/query.rs +21 -1
  39. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/reader.rs +57 -5
  40. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/segment.rs +205 -197
  41. hyperstreamdb-0.1.10/src/core/sql/pgvector_rewriter.rs +187 -0
  42. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/sql/physical_plan.rs +20 -24
  43. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/sql/session.rs +24 -17
  44. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/sql/vector_udf.rs +13 -3
  45. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/table.rs +218 -29
  46. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/wal.rs +18 -15
  47. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/lib.rs +9 -4
  48. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/python_binding.rs +284 -111
  49. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/python_distance.rs +84 -84
  50. hyperstreamdb-0.1.10/src/python_gpu_context.rs +164 -0
  51. hyperstreamdb-0.1.10/tests/check_mmh3.py +16 -0
  52. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/datafusion_rust_test.rs +5 -5
  53. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/integration_test_hnsw_ivf_native.rs +1 -1
  54. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-hyperstream/pom.xml +1 -0
  55. hyperstreamdb-0.1.6/docs/catalog_usage.md +0 -107
  56. hyperstreamdb-0.1.6/src/core/index/ivf.rs +0 -357
  57. hyperstreamdb-0.1.6/src/core/sql/pgvector_rewriter.rs +0 -150
  58. hyperstreamdb-0.1.6/src/python_gpu_context.rs +0 -395
  59. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/.gitattributes +0 -0
  60. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/.hypothesis/constants/32b327793848e7d8 +0 -0
  61. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/.hypothesis/constants/67b0a8ccf18bf5d2 +0 -0
  62. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/.hypothesis/constants/84828557b4ee7be4 +0 -0
  63. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/.instructions.md +0 -0
  64. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/DORIS_OPTIMIZATION_PATTERNS.md +0 -0
  65. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/LICENSE-APACHE +0 -0
  66. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/LICENSE-MIT +0 -0
  67. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/RUN_COMPLIANCE_TESTS.sh +0 -0
  68. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/STEERING.md +0 -0
  69. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/THIRDPARTY_NOTICES.md +0 -0
  70. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/benches/bench_table.rs +0 -0
  71. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/benches/performance.rs +0 -0
  72. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/book.toml +0 -0
  73. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/build-connectors.sh +0 -0
  74. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/build_out.txt +0 -0
  75. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/check_iceberg_compliance.py +0 -0
  76. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/clippy_output.txt +0 -0
  77. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docker-compose-minio-nessie.yml +0 -0
  78. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docker-compose.yml +0 -0
  79. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/ICEBERG_V2_V3_API.md +0 -0
  80. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/PGVECTOR_SQL_GUIDE.md +0 -0
  81. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/VECTOR_CONFIGURATION.md +0 -0
  82. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/api_reference.md +0 -0
  83. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/index.md +0 -0
  84. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/integrations/README.md +0 -0
  85. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/integrations/python.md +0 -0
  86. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/integrations/spark.md +0 -0
  87. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/docs/integrations/trino.md +0 -0
  88. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/fix_cache.patch +0 -0
  89. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/fix_nb.py +0 -0
  90. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/fix_schema.patch +0 -0
  91. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/proptest-regressions/core/index/gpu.txt +0 -0
  92. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/proptest-regressions/core/sql/vector_literal.txt +0 -0
  93. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/proptest-regressions/core/sql/vector_udf.txt +0 -0
  94. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/python/hyperstreamdb/embeddings.py +0 -0
  95. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/spark-hyperstream/.bloop/bloop.settings.json +0 -0
  96. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/spark-hyperstream/.bloop/spark-hyperstream-test.json +0 -0
  97. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/spark-hyperstream/.bloop/spark-hyperstream.json +0 -0
  98. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/DefaultSource.java +0 -0
  99. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartition.java +0 -0
  100. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReader.java +0 -0
  101. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReaderFactory.java +0 -0
  102. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamScanBuilder.java +0 -0
  103. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamTable.java +0 -0
  104. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/bin/gateway.rs +0 -0
  105. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/bin/hdb.rs +0 -0
  106. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/bin/probe_datafusion.rs +0 -0
  107. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/bin/setup_test_data.rs +0 -0
  108. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/bin/verify_layered_indexing.rs +0 -0
  109. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/catalog/config.rs +0 -0
  110. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/catalog/glue.rs +0 -0
  111. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/catalog/hive.rs +0 -0
  112. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/catalog/jdbc.rs +0 -0
  113. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/catalog/mod.rs +0 -0
  114. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/catalog/nessie.rs +0 -0
  115. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/catalog/rest.rs +0 -0
  116. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/catalog/unity.rs +0 -0
  117. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/clustering.rs +0 -0
  118. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/compaction.rs +0 -0
  119. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/embeddings.rs +0 -0
  120. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/ffi.rs +0 -0
  121. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/iceberg/iceberg_delete.rs +0 -0
  122. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/iceberg.rs +0 -0
  123. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/cuda/cosine_distance.cu +0 -0
  124. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/cuda/hamming_distance.cu +0 -0
  125. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/cuda/inner_product.cu +0 -0
  126. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/cuda/jaccard_distance.cu +0 -0
  127. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/cuda/kmeans_assignment.cu +0 -0
  128. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/cuda/l1_distance.cu +0 -0
  129. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/cuda/l2_distance.cu +0 -0
  130. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/hnsw_rs/annhdf5.rs +0 -0
  131. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/hnsw_rs/api.rs +0 -0
  132. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/hnsw_rs/dist.rs +0 -0
  133. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/hnsw_rs/flatten.rs +0 -0
  134. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/hnsw_rs/hnsw.rs +0 -0
  135. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/hnsw_rs/hnswio.rs +0 -0
  136. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/hnsw_rs/libext.rs +0 -0
  137. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/hnsw_rs/mod.rs +0 -0
  138. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/hnsw_rs/prelude.rs +0 -0
  139. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/hnsw_rs/test.rs +0 -0
  140. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/memory.rs +0 -0
  141. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/mod.rs +0 -0
  142. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/mps/cosine_distance.metal +0 -0
  143. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/mps/hamming_distance.metal +0 -0
  144. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/mps/inner_product.metal +0 -0
  145. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/mps/jaccard_distance.metal +0 -0
  146. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/mps/kmeans_assignment.metal +0 -0
  147. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/mps/l1_distance.metal +0 -0
  148. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/mps/l2_distance.metal +0 -0
  149. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/opencl/cosine_distance.cl +0 -0
  150. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/opencl/hamming_distance.cl +0 -0
  151. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/opencl/inner_product.cl +0 -0
  152. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/opencl/jaccard_distance.cl +0 -0
  153. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/opencl/kmeans_assignment.cl +0 -0
  154. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/opencl/l1_distance.cl +0 -0
  155. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/opencl/l2_distance.cl +0 -0
  156. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/index/tokenizer.rs +0 -0
  157. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/maintenance.rs +0 -0
  158. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/merge.rs +0 -0
  159. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/metadata.rs +0 -0
  160. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/mod.rs +0 -0
  161. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/nessie.rs +0 -0
  162. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/puffin.rs +0 -0
  163. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/sql/mod.rs +0 -0
  164. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/sql/optimizer.rs +0 -0
  165. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/sql/physical_plan/index_join.rs +0 -0
  166. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/sql/vector_literal.rs +0 -0
  167. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/sql/vector_operators.rs +0 -0
  168. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/storage.rs +0 -0
  169. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/core/table.rs.orig +0 -0
  170. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/enterprise/continuous_indexing.rs +0 -0
  171. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/enterprise/license.rs +0 -0
  172. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/enterprise/mod.rs +0 -0
  173. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/index.rs.old +0 -0
  174. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/telemetry/metrics.rs +0 -0
  175. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/telemetry/mod.rs +0 -0
  176. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/src/telemetry/tracing.rs +0 -0
  177. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/task.md +0 -0
  178. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/all_types_index_test.rs +0 -0
  179. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/bin/generate_iceberg_manifests.rs +0 -0
  180. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/bin/verify_iceberg_read_check.rs +0 -0
  181. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/data/download_nyc_taxi.sh +0 -0
  182. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/data/generate_embeddings.py +0 -0
  183. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/data/generate_wikipedia.py +0 -0
  184. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/data/start_nessie.sh +0 -0
  185. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/debug_murmur3.rs +0 -0
  186. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/fuzz_murmur3.rs +0 -0
  187. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/performance/README.md +0 -0
  188. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/prototype_merge.py +0 -0
  189. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/schema_evolution_test.rs +0 -0
  190. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_catalog_commit.rs +0 -0
  191. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_compliance.rs +0 -0
  192. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_delete_correctness.rs +0 -0
  193. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_iceberg_python_delete.sh +0 -0
  194. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_iceberg_rest.sh +0 -0
  195. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_iceberg_rest_create.sh +0 -0
  196. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_iceberg_rest_delete.sh +0 -0
  197. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_iceberg_rest_remove_index.sh +0 -0
  198. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_iceberg_rest_update.sh +0 -0
  199. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_metadata_creation.rs +0 -0
  200. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_mor_reads.rs +0 -0
  201. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_mor_writes.rs +0 -0
  202. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_partition_transforms.rs +0 -0
  203. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_partitioned_writes.rs +0 -0
  204. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_puffin_index.sh +0 -0
  205. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_rest_updates.sh +0 -0
  206. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/tests/verify_schema_compat.rs +0 -0
  207. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-config/.DS_Store +0 -0
  208. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-config/catalog/glue_catalog.properties +0 -0
  209. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-config/catalog/hyperstreamdb.properties +0 -0
  210. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-config/catalog/iceberg.properties +0 -0
  211. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-config/catalog/memory.properties +0 -0
  212. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-config/catalog/postgres.properties +0 -0
  213. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-config/config.properties +0 -0
  214. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-config/entrypoint.sh +0 -0
  215. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-config/jvm.config +0 -0
  216. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-config/node.properties +0 -0
  217. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-config.zip +0 -0
  218. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBColumnHandle.java +0 -0
  219. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBConnectorFactory.java +0 -0
  220. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBMetadata.java +0 -0
  221. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSource.java +0 -0
  222. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSourceProvider.java +0 -0
  223. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPlugin.java +0 -0
  224. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplit.java +0 -0
  225. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplitManager.java +0 -0
  226. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBTableHandle.java +0 -0
  227. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/update_schema_patch.py +0 -0
  228. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/update_schema_patch2.py +0 -0
  229. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/verify_docstrings.py +0 -0
  230. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/verify_fluent_api.py +0 -0
  231. {hyperstreamdb-0.1.6 → hyperstreamdb-0.1.10}/verify_unified_ingest.py +0 -0
@@ -103,3 +103,4 @@ groq_api_key.txt
103
103
  rag_db/
104
104
  news_db/
105
105
 
106
+ .hypothesis/
@@ -3755,7 +3755,7 @@ dependencies = [
3755
3755
 
3756
3756
  [[package]]
3757
3757
  name = "hyperstreamdb"
3758
- version = "0.1.6"
3758
+ version = "0.1.10"
3759
3759
  dependencies = [
3760
3760
  "ahash 0.8.12",
3761
3761
  "anyhow",
@@ -3782,6 +3782,8 @@ dependencies = [
3782
3782
  "cust",
3783
3783
  "datafusion",
3784
3784
  "datafusion-expr-common",
3785
+ "datafusion-functions",
3786
+ "datafusion-functions-aggregate",
3785
3787
  "datafusion-functions-aggregate-common",
3786
3788
  "dirs",
3787
3789
  "env_logger",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "hyperstreamdb"
3
- version = "0.1.6"
3
+ version = "0.1.10"
4
4
  edition = "2021"
5
5
  license = "MIT AND Apache-2.0"
6
6
  description = "HyperStreamDB - Serverless Index-Streaming Database with Overlay Indexing and Vector Search"
@@ -32,10 +32,10 @@ crate-type = ["cdylib", "rlib"]
32
32
  [features]
33
33
  default = ["candle"]
34
34
  candle = []
35
- cuda = ["dep:cust", "intel_gpu"]
36
- rocm = ["dep:opencl3", "intel_gpu"] # Fallback to OpenCL for now
35
+ cuda = ["dep:cust", "intel"]
36
+ rocm = ["dep:opencl3", "intel"] # Fallback to OpenCL for now
37
37
  mps = ["metal"]
38
- intel_gpu = ["dep:opencl3"]
38
+ intel = ["dep:opencl3"]
39
39
  enterprise = []
40
40
  python = ["dep:pyo3", "pyo3/extension-module", "dep:numpy", "dep:pythonize"]
41
41
  java = ["dep:jni"]
@@ -114,6 +114,8 @@ once_cell = "1.19"
114
114
  async-trait = "0.1.89"
115
115
  datafusion = "52.0.0"
116
116
  datafusion-expr-common = "52.0.0"
117
+ datafusion-functions = "52.0.0"
118
+ datafusion-functions-aggregate = "52.0.0"
117
119
  datafusion-functions-aggregate-common = "52.0.0"
118
120
  async-stream = "0.3.6"
119
121
  smartcore = "0.3" # For k-means clustering (IVF index)
@@ -158,7 +160,7 @@ features = ["invocation"]
158
160
  [dependencies.pyo3]
159
161
  version = "0.26.0"
160
162
  optional = true
161
- features = ["extension-module"]
163
+ features = ["extension-module", "abi3-py310"]
162
164
 
163
165
  [dependencies.numpy]
164
166
  version = "0.26.0"
@@ -1,15 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hyperstreamdb
3
- Version: 0.1.6
3
+ Version: 0.1.10
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: Programming Language :: Rust
7
7
  Classifier: Programming Language :: Python :: 3
8
- Classifier: Programming Language :: Python :: 3.8
9
- Classifier: Programming Language :: Python :: 3.9
10
8
  Classifier: Programming Language :: Python :: 3.10
11
9
  Classifier: Programming Language :: Python :: 3.11
12
10
  Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Classifier: Programming Language :: Python :: 3.14
13
13
  Classifier: Programming Language :: Python :: Implementation :: CPython
14
14
  Classifier: Programming Language :: Python :: Implementation :: PyPy
15
15
  Classifier: Topic :: Database
@@ -39,7 +39,7 @@ Keywords: database,vector,search,indexing,parquet,iceberg
39
39
  Home-Page: https://github.com/rla3rd/hyperstreamdb
40
40
  Author: HyperStream Team
41
41
  License: MIT AND Apache-2.0
42
- Requires-Python: >=3.8
42
+ Requires-Python: >=3.10
43
43
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
44
44
  Project-URL: Homepage, https://github.com/rla3rd/hyperstreamdb
45
45
  Project-URL: Repository, https://github.com/rla3rd/hyperstreamdb
@@ -125,6 +125,10 @@ maturin develop
125
125
 
126
126
  # Or install from PyPI (coming soon)
127
127
  pip install hyperstreamdb
128
+
129
+ # Windows Users
130
+ # HyperStreamDB is optimized for Linux/POSIX environments.
131
+ # Windows users should use WSL2 (Windows Subsystem for Linux).
128
132
  ```
129
133
 
130
134
  ### GPU Acceleration (Optional)
@@ -342,10 +346,10 @@ distance = hdb.hamming_distance_packed(binary1, binary2)
342
346
  ```
343
347
 
344
348
  **Supported GPU Backends:**
345
- - **CUDA** - NVIDIA GPUs (Linux, Windows)
349
+ - **CUDA** - NVIDIA GPUs (Linux, Windows via WSL2)
346
350
  - **ROCm** - AMD GPUs (Linux)
347
351
  - **Metal (MPS)** - Apple Silicon (macOS)
348
- - **OpenCL** - Intel GPUs (Linux, Windows)
352
+ - **OpenCL** - Intel GPUs (Linux, Windows via WSL2)
349
353
  - **CPU** - Fallback for all platforms
350
354
 
351
355
  **Supported Distance Metrics:**
@@ -415,11 +419,28 @@ cargo bench
415
419
  python tests/integration/test_nyc_taxi.py
416
420
  ```
417
421
 
418
- **Performance Targets:**
419
- - Ingest: >100K rows/sec ⏱️
420
- - Query (indexed): <100ms p99 ⏱️
421
- - Vector search: <50ms for k=10 on 10M vectors ⏱️
422
- - Compaction: <5min for 10GB ⏱️
422
+ **Performance Targets:**
423
+ - **Scalar Ingest**: >10K rows/sec
424
+ - **Vector Ingest (768D)**: >4,000 rows/sec ✅ (April 2026)
425
+ - **Query (indexed)**: <100ms p99 ⏱️
426
+ - **Vector search**: <50ms for k=10 on 10M vectors ⏱️
427
+ - **Compaction**: <5min for 10GB ⏱️
428
+
429
+ **Benchmarking Environment: Lenovo T480**
430
+ - **System**: Lenovo T480
431
+ - **CPU**: Intel(R) Core(TM) i5-8350U CPU @ 1.70GHz
432
+ - **RAM**: 64GB
433
+ - **OS**: Linux
434
+
435
+ **Benchmarking Environment: Apple M4 Max**
436
+ - **System**: MacBook Pro (M4 Max, 16-core CPU, 40-core GPU)
437
+ - **Memory**: 128GB Unified Memory
438
+ - **OS**: macOS (Arm64)
439
+ - **Optimizations**: `target-cpu=native` (NEON SIMD)
440
+ - **Results (100K vectors, 768D)**:
441
+ - **Vector Ingest**: 16,707 rows/sec (CPU) ✅
442
+ - **Vector Search (k=10)**: 819ms (CPU / NEON) ✅
443
+ - **Vector Search (k=10)**: 860ms (MPS GPU) ⏱️
423
444
 
424
445
  ### Phase 2: Nessie Integration (Next)
425
446
 
@@ -79,6 +79,10 @@ maturin develop
79
79
 
80
80
  # Or install from PyPI (coming soon)
81
81
  pip install hyperstreamdb
82
+
83
+ # Windows Users
84
+ # HyperStreamDB is optimized for Linux/POSIX environments.
85
+ # Windows users should use WSL2 (Windows Subsystem for Linux).
82
86
  ```
83
87
 
84
88
  ### GPU Acceleration (Optional)
@@ -296,10 +300,10 @@ distance = hdb.hamming_distance_packed(binary1, binary2)
296
300
  ```
297
301
 
298
302
  **Supported GPU Backends:**
299
- - **CUDA** - NVIDIA GPUs (Linux, Windows)
303
+ - **CUDA** - NVIDIA GPUs (Linux, Windows via WSL2)
300
304
  - **ROCm** - AMD GPUs (Linux)
301
305
  - **Metal (MPS)** - Apple Silicon (macOS)
302
- - **OpenCL** - Intel GPUs (Linux, Windows)
306
+ - **OpenCL** - Intel GPUs (Linux, Windows via WSL2)
303
307
  - **CPU** - Fallback for all platforms
304
308
 
305
309
  **Supported Distance Metrics:**
@@ -369,11 +373,28 @@ cargo bench
369
373
  python tests/integration/test_nyc_taxi.py
370
374
  ```
371
375
 
372
- **Performance Targets:**
373
- - Ingest: >100K rows/sec ⏱️
374
- - Query (indexed): <100ms p99 ⏱️
375
- - Vector search: <50ms for k=10 on 10M vectors ⏱️
376
- - Compaction: <5min for 10GB ⏱️
376
+ **Performance Targets:**
377
+ - **Scalar Ingest**: >10K rows/sec
378
+ - **Vector Ingest (768D)**: >4,000 rows/sec ✅ (April 2026)
379
+ - **Query (indexed)**: <100ms p99 ⏱️
380
+ - **Vector search**: <50ms for k=10 on 10M vectors ⏱️
381
+ - **Compaction**: <5min for 10GB ⏱️
382
+
383
+ **Benchmarking Environment: Lenovo T480**
384
+ - **System**: Lenovo T480
385
+ - **CPU**: Intel(R) Core(TM) i5-8350U CPU @ 1.70GHz
386
+ - **RAM**: 64GB
387
+ - **OS**: Linux
388
+
389
+ **Benchmarking Environment: Apple M4 Max**
390
+ - **System**: MacBook Pro (M4 Max, 16-core CPU, 40-core GPU)
391
+ - **Memory**: 128GB Unified Memory
392
+ - **OS**: macOS (Arm64)
393
+ - **Optimizations**: `target-cpu=native` (NEON SIMD)
394
+ - **Results (100K vectors, 768D)**:
395
+ - **Vector Ingest**: 16,707 rows/sec (CPU) ✅
396
+ - **Vector Search (k=10)**: 819ms (CPU / NEON) ✅
397
+ - **Vector Search (k=10)**: 860ms (MPS GPU) ⏱️
377
398
 
378
399
  ### Phase 2: Nessie Integration (Next)
379
400
 
@@ -0,0 +1,50 @@
1
+ # Competitive Benchmark Report - HyperStreamDB
2
+
3
+ **Generated:** 2026-04-04 13:56:56.467083
4
+
5
+ ## Ingest Performance
6
+
7
+ | System | Operation | Dataset Size | Latency (ms) | Throughput (rows/sec) | Storage (MB) | Hardware | Device |
8
+ |:---------------------|:------------|---------------:|---------------:|------------------------:|---------------:|:-------------------------------------|:---------|
9
+ | HyperStreamDB | ingest | 1000 | 268.945 | 3718.23 | 7.18574 | Generic Baseline | cpu |
10
+ | DuckDB (Raw Parquet) | ingest | 1000 | 70.8115 | 14122 | 3.55513 | Generic Baseline | cpu |
11
+ | LanceDB | ingest | 1000 | 28.9528 | 34538.9 | 2.98259 | Generic Baseline | cpu |
12
+ | HyperStreamDB | ingest | 1000 | 801.391 | 1247.83 | 7.11231 | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
13
+ | HyperStreamDB | ingest | 1000 | 818.65 | 1221.52 | 7.13006 | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
14
+ | HyperStreamDB | ingest | 1000 | 820.246 | 1219.15 | 7.11529 | AMD Ryzen 9 5900XT 16-Core Processor | cuda:0 |
15
+ | HyperStreamDB | ingest | 1000 | 290.842 | 3438.29 | 7.1134 | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
16
+ | HyperStreamDB | ingest | 1000 | 412.61 | 2423.59 | 7.12722 | AMD Ryzen 9 5900XT 16-Core Processor | cuda:0 |
17
+
18
+ ## Vector Search Performance
19
+
20
+ | System | Operation | Dataset Size | Latency (ms) | Throughput (rows/sec) | Storage (MB) | Hardware | Device |
21
+ |:--------------|:-------------------|---------------:|---------------:|------------------------:|---------------:|:-------------------------------------|:---------|
22
+ | HyperStreamDB | vector_search_k10 | 1000 | 28.3022 | nan | nan | Generic Baseline | cpu |
23
+ | LanceDB | vector_search_k10 | 1000 | 6.41737 | nan | nan | Generic Baseline | cpu |
24
+ | HyperStreamDB | vector_search_k10 | 1000 | 109.424 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
25
+ | HyperStreamDB | vector_search_k10 | 1000 | 109.012 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
26
+ | HyperStreamDB | vector_search_k10 | 1000 | 108.445 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cuda:0 |
27
+ | HyperStreamDB | vector_search_k10 | 1000 | 16.1742 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
28
+ | HyperStreamDB | vector_search_k10 | 1000 | 14.6868 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cuda:0 |
29
+ | HyperStreamDB | vector_search_k100 | 1000 | 25.9826 | nan | nan | Generic Baseline | cpu |
30
+ | LanceDB | vector_search_k100 | 1000 | 7.24833 | nan | nan | Generic Baseline | cpu |
31
+ | HyperStreamDB | vector_search_k100 | 1000 | 148.144 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
32
+ | HyperStreamDB | vector_search_k100 | 1000 | 148.415 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
33
+ | HyperStreamDB | vector_search_k100 | 1000 | 152.582 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cuda:0 |
34
+ | HyperStreamDB | vector_search_k100 | 1000 | 19.9033 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cpu |
35
+ | HyperStreamDB | vector_search_k100 | 1000 | 16.9979 | nan | nan | AMD Ryzen 9 5900XT 16-Core Processor | cuda:0 |
36
+
37
+ ## Hybrid Query Performance
38
+
39
+ ## Key Findings
40
+
41
+ ### HyperStreamDB Advantages
42
+
43
+ 1. **Native Hybrid Queries**: Only system with scalar + vector in single query
44
+ 2. **Iceberg Compatibility**: Standard data lake format
45
+ 3. **Multi-Catalog Support**: Hive, Glue, Unity, REST, Nessie
46
+ 4. **100% Iceberg v3 Compliance**: All required features implemented
47
+
48
+ ### Competitive Position
49
+
50
+ - Vector search: 10.0x slower than LanceDB
@@ -0,0 +1,24 @@
1
+ System,Operation,Dataset Size,Latency (ms),Throughput (rows/sec),Storage (MB),Hardware,Device
2
+ HyperStreamDB,ingest,1000,268.94545555114746,3718.2260542410327,7.185737609863281,Generic Baseline,cpu
3
+ DuckDB (Raw Parquet),ingest,1000,70.81151008605957,14121.997946162524,3.555130958557129,Generic Baseline,cpu
4
+ LanceDB,ingest,1000,28.952836990356445,34538.929650765414,2.9825878143310547,Generic Baseline,cpu
5
+ HyperStreamDB,vector_search_k10,1000,28.3022403717041,,,Generic Baseline,cpu
6
+ LanceDB,vector_search_k10,1000,6.417369842529297,,,Generic Baseline,cpu
7
+ HyperStreamDB,vector_search_k100,1000,25.98259449005127,,,Generic Baseline,cpu
8
+ LanceDB,vector_search_k100,1000,7.248330116271973,,,Generic Baseline,cpu
9
+ DuckDB,scalar_query,1000,3.293037414550781,,,Generic Baseline,cpu
10
+ HyperStreamDB,ingest,1000,801.3906478881836,1247.8308832717098,7.112313270568848,AMD Ryzen 9 5900XT 16-Core Processor,cpu
11
+ HyperStreamDB,vector_search_k10,1000,109.423828125,,,AMD Ryzen 9 5900XT 16-Core Processor,cpu
12
+ HyperStreamDB,vector_search_k100,1000,148.14441204071045,,,AMD Ryzen 9 5900XT 16-Core Processor,cpu
13
+ HyperStreamDB,ingest,1000,818.6497688293457,1221.5235844138597,7.130064010620117,AMD Ryzen 9 5900XT 16-Core Processor,cpu
14
+ HyperStreamDB,vector_search_k10,1000,109.01196002960204,,,AMD Ryzen 9 5900XT 16-Core Processor,cpu
15
+ HyperStreamDB,vector_search_k100,1000,148.41535091400146,,,AMD Ryzen 9 5900XT 16-Core Processor,cpu
16
+ HyperStreamDB,ingest,1000,820.2464580535889,1219.1457703736496,7.115290641784668,AMD Ryzen 9 5900XT 16-Core Processor,cuda:0
17
+ HyperStreamDB,vector_search_k10,1000,108.44509601593018,,,AMD Ryzen 9 5900XT 16-Core Processor,cuda:0
18
+ HyperStreamDB,vector_search_k100,1000,152.58185863494873,,,AMD Ryzen 9 5900XT 16-Core Processor,cuda:0
19
+ HyperStreamDB,ingest,1000,290.84205627441406,3438.2922910450206,7.113402366638184,AMD Ryzen 9 5900XT 16-Core Processor,cpu
20
+ HyperStreamDB,vector_search_k10,1000,16.17424488067627,,,AMD Ryzen 9 5900XT 16-Core Processor,cpu
21
+ HyperStreamDB,vector_search_k100,1000,19.90334987640381,,,AMD Ryzen 9 5900XT 16-Core Processor,cpu
22
+ HyperStreamDB,ingest,1000,412.6102924346924,2423.5944142335693,7.127219200134277,AMD Ryzen 9 5900XT 16-Core Processor,cuda:0
23
+ HyperStreamDB,vector_search_k10,1000,14.686751365661621,,,AMD Ryzen 9 5900XT 16-Core Processor,cuda:0
24
+ HyperStreamDB,vector_search_k100,1000,16.997885704040527,,,AMD Ryzen 9 5900XT 16-Core Processor,cuda:0
@@ -36,6 +36,7 @@ fn main() {
36
36
  "l1_distance",
37
37
  "hamming_distance",
38
38
  "jaccard_distance",
39
+ "kmeans_assignment",
39
40
  ];
40
41
 
41
42
  // Compile each kernel
@@ -77,4 +77,30 @@ cargo bench --bench bench_table
77
77
  The results are automatically statistically analyzed by Criterion, providing p50, p95, and p99 metrics with outlier detection.
78
78
 
79
79
  ---
80
- **Last Updated**: January 26, 2026
80
+ ---
81
+
82
+ ## 5. Ingestion Performance (April 2026 Update)
83
+
84
+ Following a major optimization of the HNSW-IVF indexing pipeline, HyperStreamDB now features high-throughput vector ingestion that rivals industry-standard engines like LanceDB.
85
+
86
+ ### Key Architectural Improvements:
87
+ 1. **Delayed Indexing (Async):** Ingestion is now non-blocking. Vectors are written to Parquet immediately, while indexing happens in the background using a 32-core optimized worker pool.
88
+ 2. **Mini-Batch K-Means:** IVF centroid training is now 10x faster due to a sub-sampled training strategy ($O(Sample)$ vs $O(N)$).
89
+ 3. **Parallel PQ Training:** Product Quantization subspaces are trained in absolute parallel, saturating all available CPU threads.
90
+ 4. **Runtime SIMD Dispatch:** Automatic AVX2/FMA detection at runtime ensures peak performance even on generic binary builds.
91
+
92
+ ### Throughput Comparison (768-Dimensional Vectors)
93
+ Measurements taken on a 32-core Linux environment with 10k row batches.
94
+
95
+ | Feature | Baseline (Jan 2026) | **Optimized (April 2026)** | Speedup |
96
+ | :--- | :---: | :---: | :---: |
97
+ | **Ingestion Throughput** | 360 rows/sec | **4,013 rows/sec** | **11.1x** |
98
+ | **Indexing Latency (10k rows)** | 27.8s | **1.8s** | **15.4x** |
99
+ | **Write Availability** | Blocking | **Instant (Async)** | ∞ |
100
+
101
+ ### Competitive Landscape: HyperStreamDB vs LanceDB
102
+ While LanceDB is a highly mature engine, HyperStreamDB's native Iceberg integration and parallel HNSW construction provide comparable performance for local-first vector workloads.
103
+
104
+ - **HyperStreamDB (768D)**: **4,013 rows/sec** (on multi-core CPU)
105
+
106
+ **Last Updated**: April 3, 2026
@@ -1,7 +1,7 @@
1
1
  # HyperStreamDB Comprehensive Guide
2
2
 
3
- **Version:** 0.1.0 (Alpha)
4
- **Last Updated:** 2026-01-27
3
+ **Version:** 0.1.10 (Alpha)
4
+ **Last Updated:** 2026-04-03
5
5
 
6
6
  HyperStreamDB is a serverless, hybrid-search database optimized for high-performance vector and scalar queries directly on data lakes (S3, GCS, Azure, Local).
7
7
 
@@ -37,29 +37,42 @@ pip install .
37
37
  ```python
38
38
  import hyperstreamdb as hdb
39
39
  import pyarrow as pa
40
+ import pandas as pd
41
+ import numpy as np
40
42
 
41
- # 1. Create a Table
43
+ # 1. Create a Table with AG News Schema
42
44
  schema = pa.schema([
43
45
  ('id', pa.int32()),
44
- ('content', pa.string()),
45
- ('embedding', pa.list_(pa.float32(), 768))
46
+ ('label', pa.int32()), # 1:World, 2:Sports, 3:Business, 4:Sci/Tech
47
+ ('title', pa.string()),
48
+ ('description', pa.string()),
49
+ ('embedding', pa.list_(pa.float32(), 384)) # SBERT/all-MiniLM-L6-v2 size
46
50
  ])
47
- table = hdb.Table.create("file:///tmp/my_table", schema)
48
51
 
49
- # 2. Ingest Data
50
- data = generate_batch(1000) # Returns RecordBatch
51
- table.write(data)
52
+ table = hdb.Table.create("file:///tmp/ag_news", schema)
53
+
54
+ # 2. Ingest Real Data (Example: AG News Sample)
55
+ df = pd.DataFrame({
56
+ 'id': [1, 2],
57
+ 'label': [3, 4],
58
+ 'title': ["Wall St. Bears Claw Back", "SpaceX Launches New Falcon"],
59
+ 'description': ["Stocks fell today as inflation concerns...", "The private space company successfully..."],
60
+ 'embedding': [np.random.rand(384).tolist() for _ in range(2)]
61
+ })
62
+
63
+ table.write(df)
52
64
  table.commit()
53
65
 
54
- # 3. Query (Scalar + Vector)
55
- # Find nearest neighbors to 'query_vec' where content contains "AI"
66
+ # 3. Hybrid Search (Scalar + Vector)
67
+ # Search for "Space" related news in "Sci/Tech" category (label=4)
68
+ query_vec = np.random.rand(384).tolist()
56
69
  results = table.search(
57
70
  vector_column="embedding",
58
71
  query_vector=query_vec,
59
- k=10,
60
- filter="content LIKE '%AI%'"
72
+ k=5,
73
+ filter="label = 4 AND description LIKE '%Space%'"
61
74
  )
62
- print(results.to_pandas())
75
+ print(results.to_pandas()[['title', 'description']])
63
76
  ```
64
77
 
65
78
  ---
@@ -132,20 +145,56 @@ See [pgvector SQL Guide](PGVECTOR_SQL_GUIDE.md) for complete documentation.
132
145
 
133
146
  ### 3.2 Hardware Acceleration
134
147
  The indexing engine supports hardware acceleration for multiple backends:
135
- * **CUDA**: NVIDIA GPUs (Linux/Windows)
148
+ * **CUDA**: NVIDIA GPUs (Linux, Windows via WSL2)
136
149
  * **Metal**: Apple Silicon (MPS)
137
150
  * **ROCm**: AMD GPUs
138
151
  * **Intel**: AVX-512 optimizations
139
152
 
140
153
  Enable via `Cargo.toml` features or environment detection.
141
154
 
142
- ### 3.3 Multi-Catalog Support
143
- HyperStreamDB supports enterprise catalog integrations:
144
- * **Nessie**: Git-like versioning for data.
145
- * **Unity Catalog**: Databricks integration.
146
- * **AWS Glue**: Native AWS metadata.
147
- * **Hive Metastore**: Legacy Hadoop compatibility.
148
- * **REST**: Iceberg-compatible REST catalog.
155
+ ## 3.3 Multi-Catalog Support
156
+
157
+ HyperStreamDB is designed to integrate seamlessly with standard data catalogs to provide discovery, cross-table atomicity, and consistent metadata across the enterprise. We support a variety of industry-standard protocols.
158
+
159
+ Below is a detailed example using the Hive Metastore, followed by short-form examples for other supported catalogs. Full integration guides for each will be provided in future updates.
160
+
161
+ ### Hive Metastore (Detailed Example)
162
+
163
+ Connecting to a Hive Metastore allows you to resolve table names to storage locations automatically.
164
+
165
+ ```python
166
+ import hyperstreamdb as hdb
167
+
168
+ # Load a table from Hive Metastore
169
+ table = hdb.Table.from_hive(
170
+ address="thrift://localhost:9083",
171
+ namespace="default",
172
+ table="my_analytics_table"
173
+ )
174
+
175
+ # Any writes will now be atomically committed back to Hive
176
+ df = table.to_pandas(filter="status = 'active'")
177
+ ```
178
+
179
+ ### AWS Glue, Nessie, and REST Catalogs
180
+
181
+ HyperStreamDB also provides native support for cloud-modern catalogs. These can be configured similarly to the Hive example:
182
+
183
+ ```python
184
+ # AWS Glue (Native AWS Integration)
185
+ table = hdb.Table.from_glue(namespace="prod", table="users")
186
+
187
+ # Project Nessie (Git-like Versioning)
188
+ table = hdb.Table.from_nessie(nessie_url, namespace="dev", table="experiments")
189
+
190
+ # Iceberg REST Catalog (Standard API)
191
+ table = hdb.Table.from_rest(rest_url, namespace="marketing", table="campaigns")
192
+
193
+ # Unity Catalog (Databricks Ecosystem)
194
+ table = hdb.Table.from_unity(unity_url, namespace="main", table="gold_data")
195
+ ```
196
+
197
+ For more details on advanced configurations and authentication (Kerberos, SASL, IAM), see the [Configuration Guide](./CONFIGURATION.md) or the [Catalog Usage Guide](./catalog_usage.md).
149
198
 
150
199
  ---
151
200
 
@@ -0,0 +1,38 @@
1
+ # Concurrency and Atomic Commits
2
+
3
+ HyperStreamDB is designed for high-concurrency environments where multiple clients may be reading from and writing to the same table simultaneously.
4
+
5
+ ## Optimistic Concurrency Control (OCC)
6
+
7
+ HyperStreamDB employs **Optimistic Concurrency Control** to ensure ACID compliance without the need for heavyweight central locks in most cases.
8
+
9
+ ### Snapshot Versioning
10
+ Every table state is represented by a specific version of the manifest file (e.g., `_manifest/v100.json`). These files are immutable once written.
11
+
12
+ ### The Commit Protocol
13
+ When a client (writer) wants to commit changes:
14
+ 1. **Read Latest**: The client reads the current latest version (e.g., `v100`).
15
+ 2. **Prepare**: The client calculates the new state (`v101`) based on the changes (e.g., added or removed segments).
16
+ 3. **Atomic Swap**: The client attempts to write the new manifest file `v101.json` using an **atomic "create-if-not-exists"** primitive.
17
+
18
+ ### Conflict Resolution
19
+ If another client successfully committed `v101.json` while the first client was preparing its changes:
20
+ - The first client's write operation will fail with an `AlreadyExists` or conflict error.
21
+ - HyperStreamDB automatically **retries** the commit (up to 100 times).
22
+ - In each retry, the client re-reads the *new* latest version, merges its changes again, and attempts to commit the *next* version (e.g., `v102`).
23
+ - A randomized **exponential backoff** is used between retries to reduce contention.
24
+
25
+ ## Catalog-Level Locking
26
+
27
+ While OCC works perfectly on local file systems and some cloud storage providers (like Azure Blob or Google Cloud Storage with certain settings), some providers like **AWS S3** do not natively support atomic "create-if-not-exists" with strong consistency for all operations.
28
+
29
+ In these cases, HyperStreamDB leverages **Iceberg-compatible catalogs** to provide the necessary atomicity:
30
+
31
+ - **AWS Glue**: Uses the Glue Catalog's built-in versioning and optimistic locking.
32
+ - **Nessie**: Provides Git-like branching and merging with cross-table atomic commits.
33
+ - **Hive Metastore**: Uses a relational database backend (like PostgreSQL or MySQL) to provide transactionally safe updates to the `metadata_location` parameter.
34
+ - **REST Catalog**: Delegates atomicity to a centralized REST server (e.g., Tabular, Polaris).
35
+
36
+ ## Read Isolation
37
+
38
+ Readers in HyperStreamDB always see a **consistent snapshot** of the table. Once a reader loads a particular version (e.g., `v100`), it will continue to see that state even if newer versions are committed by other clients. This provides **Snapshot Isolation**, which is ideal for long-running analytical queries.
@@ -0,0 +1,51 @@
1
+ # Configuration Guide
2
+
3
+ HyperStreamDB is designed to be highly configurable through environment variables and a centralized configuration file.
4
+
5
+ ## Environment Variables
6
+
7
+ These variables control the core behavior of the system, including memory management, caching, and storage paths.
8
+
9
+ | Variable | Description | Default |
10
+ |----------|-------------|---------|
11
+ | `HYPERSTREAM_CACHE_GB` | Memory limit for the hybrid vector index (HNSW-IVF) in GB. | `2` |
12
+ | `HYPERSTREAM_BLOCK_CACHE_GB` | Memory limit for the decoded RecordBatch block cache in GB. | `4` |
13
+ | `HYPERSTREAM_DISK_CACHE_DIR` | Directory used for caching segmented index files on local disk. | `/tmp/hdb_cache` |
14
+ | `HYPERSTREAM_WAL_DIR` | Directory for the Write-Ahead Log (WAL) used for fault tolerance. | `{table_uri}/_wal` |
15
+ | `HYPERSTREAM_CONFIG` | Path to a centralized `hyperstream.toml` configuration file. | None |
16
+ | `JAEGER_ENABLED` | Enable distributed tracing via Jaeger (requires `opentelemetry` feature). | `false` |
17
+
18
+ ## The hyperstream.toml File
19
+
20
+ You can use a TOML file to manage complex configurations, especially for catalogs and multi-cloud storage.
21
+
22
+ HyperStreamDB looks for this file in the following order:
23
+ 1. Environment variable `HYPERSTREAM_CONFIG`
24
+ 2. `./hyperstream.toml` (current directory)
25
+ 3. `~/.hyperstream/config.toml`
26
+
27
+ ### Example Configuration
28
+
29
+ ```toml
30
+ [storage]
31
+ type = "s3"
32
+ bucket = "my-data-lake"
33
+ region = "us-east-1"
34
+
35
+ [cache]
36
+ memory_limit_gb = 8
37
+ disk_cache_enabled = true
38
+ disk_cache_path = "/mnt/fast-ssd/hdb_cache"
39
+
40
+ [catalog]
41
+ type = "nessie"
42
+ url = "http://nessie:19120/api/v2"
43
+ ref = "main"
44
+ ```
45
+
46
+ ## Storage Credentials
47
+
48
+ HyperStreamDB uses the standard `object-store` crate, which automatically picks up credentials from:
49
+ - **AWS**: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION`, or IAM Roles.
50
+ - **GCP**: `GOOGLE_APPLICATION_CREDENTIALS` (JSON key file path).
51
+ - **Azure**: `AZURE_STORAGE_ACCOUNT`, `AZURE_STORAGE_KEY`.
@@ -9,7 +9,7 @@ HyperStreamDB supports GPU acceleration for vector distance computations across
9
9
  - **NVIDIA CUDA** - For NVIDIA GPUs (GeForce, Quadro, Tesla)
10
10
  - **AMD ROCm** - For AMD Radeon GPUs
11
11
  - **Apple Metal (MPS)** - For Apple Silicon Macs
12
- - **Intel OpenCL** - For Intel integrated and discrete GPUs
12
+ - **Intel OpenCL** - For Intel integrated and discrete GPUs (Linux, WSL2)
13
13
 
14
14
  GPU acceleration provides 10x+ speedup for batch distance operations on large vector databases (100,000+ vectors).
15
15
 
@@ -69,15 +69,16 @@ nvidia-smi
69
69
  nvcc --version
70
70
  ```
71
71
 
72
- ### Installation on Windows
72
+ ### Installation on Windows (via WSL2)
73
73
 
74
- 1. Download CUDA Toolkit from [NVIDIA website](https://developer.nvidia.com/cuda-downloads)
75
- 2. Run the installer (cuda_12.3.0_windows.exe)
76
- 3. Follow the installation wizard
77
- 4. Verify installation:
78
- ```cmd
74
+ Windows users should use **WSL2** (Windows Subsystem for Linux) to run HyperStreamDB with GPU support.
75
+
76
+ 1. Install WSL2 and Ubuntu (e.g., `wsl --install -d Ubuntu-22.04`)
77
+ 2. Install NVIDIA Windows Driver (this provides the necessary kernel-mode interface for WSL2)
78
+ 3. Within the WSL2 Ubuntu environment, follow the **Linux installation** instructions above.
79
+ 4. Verify from within WSL:
80
+ ```bash
79
81
  nvidia-smi
80
- nvcc --version
81
82
  ```
82
83
 
83
84
  ### Verification
@@ -236,7 +237,7 @@ print(f"Computed {len(distances)} distances on Apple GPU")
236
237
  - **GPU**: Intel Iris Xe or newer (integrated or discrete)
237
238
  - Recommended: Arc A-series discrete GPUs
238
239
  - **Driver**: Intel Graphics Driver with OpenCL support
239
- - **OS**: Linux or Windows
240
+ - **OS**: Linux or WSL2 (Windows with WSL2)
240
241
 
241
242
  ### Supported GPUs
242
243
 
@@ -260,12 +261,9 @@ sudo apt-get install opencl-headers
260
261
  clinfo
261
262
  ```
262
263
 
263
- ### Installation on Windows
264
+ ### Installation on Windows (via WSL2)
264
265
 
265
- 1. Download latest Intel Graphics Driver from [Intel Download Center](https://www.intel.com/content/www/us/en/download-center/home.html)
266
- 2. Run the installer
267
- 3. OpenCL support is included in modern Intel drivers
268
- 4. Verify with `clinfo` (install from [GitHub](https://github.com/Oblomov/clinfo))
266
+ Windows users should install the Intel OpenCL runtime within their WSL2 distribution following the Linux installation steps above.
269
267
 
270
268
  ### Verification
271
269