hyperstreamdb 0.2.6__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. hyperstreamdb-0.3.1/.ipynb_checkpoints/Untitled-checkpoint.ipynb +6 -0
  2. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/Cargo.lock +2 -1
  3. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/Cargo.toml +2 -1
  4. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/PKG-INFO +31 -9
  5. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/README.md +30 -8
  6. hyperstreamdb-0.3.1/Untitled.ipynb +33 -0
  7. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benches/performance.rs +4 -4
  8. hyperstreamdb-0.3.1/benchmark_results/multi_filter_vector_20260409_231713.json +14 -0
  9. hyperstreamdb-0.3.1/benchmark_results/multi_filter_vector_20260409_231713.md +7 -0
  10. hyperstreamdb-0.3.1/docs/.nojekyll +0 -0
  11. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/PYTHON_VECTOR_API.md +3 -0
  12. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/VECTOR_CONFIGURATION.md +20 -15
  13. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/api_reference.md +14 -5
  14. hyperstreamdb-0.3.1/docs/source/_static/HyperStreamDB.png +0 -0
  15. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/python/hyperstreamdb/__init__.py +41 -14
  16. hyperstreamdb-0.3.1/split_table.py +19 -0
  17. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/cache.rs +8 -0
  18. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/distance.rs +88 -0
  19. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/hnsw_ivf.rs +160 -85
  20. hyperstreamdb-0.3.1/src/core/index/mod.rs +134 -0
  21. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/pq.rs +42 -2
  22. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/tokenizer.rs +1 -0
  23. hyperstreamdb-0.3.1/src/core/index/turboquant.rs +192 -0
  24. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/manifest.rs +429 -117
  25. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/mod.rs +1 -0
  26. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/planner.rs +38 -1
  27. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/query.rs +17 -3
  28. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/reader.rs +367 -28
  29. hyperstreamdb-0.3.1/src/core/search/mod.rs +87 -0
  30. hyperstreamdb-0.3.1/src/core/search/rrf.rs +66 -0
  31. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/segment.rs +182 -86
  32. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/sql/mod.rs +44 -4
  33. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/sql/physical_plan.rs +19 -4
  34. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/storage.rs +1 -1
  35. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/table/builder.rs +24 -8
  36. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/table/fluent.rs +13 -1
  37. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/table/mod.rs +412 -104
  38. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/table/read.rs +232 -15
  39. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/table/schema.rs +1 -0
  40. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/table/write.rs +157 -24
  41. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/lib.rs +7 -0
  42. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/python_binding.rs +300 -183
  43. hyperstreamdb-0.3.1/task.md +33 -0
  44. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/integration_test_hnsw_ivf_native.rs +2 -2
  45. hyperstreamdb-0.3.1/tests/verify_all_algos.py +83 -0
  46. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_mor_reads.rs +2 -2
  47. hyperstreamdb-0.2.6/src/core/index/mod.rs +0 -54
  48. hyperstreamdb-0.2.6/task.md +0 -112
  49. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/.gitattributes +0 -0
  50. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/.gitignore +0 -0
  51. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/.hypothesis/constants/32b327793848e7d8 +0 -0
  52. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/.hypothesis/constants/67b0a8ccf18bf5d2 +0 -0
  53. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/.hypothesis/constants/84828557b4ee7be4 +0 -0
  54. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/.instructions.md +0 -0
  55. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/CNAME +0 -0
  56. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/DORIS_OPTIMIZATION_PATTERNS.md +0 -0
  57. {hyperstreamdb-0.2.6/docs/source/_static → hyperstreamdb-0.3.1}/HyperStreamDB.png +0 -0
  58. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/LICENSE-APACHE +0 -0
  59. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/LICENSE-MIT +0 -0
  60. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/RUN_COMPLIANCE_TESTS.sh +0 -0
  61. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/STEERING.md +0 -0
  62. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/THIRDPARTY_NOTICES.md +0 -0
  63. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benches/bench_table.rs +0 -0
  64. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/BENCHMARK_REPORT.md +0 -0
  65. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/benchmark_charts.png +0 -0
  66. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/benchmark_results.csv +0 -0
  67. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/concurrent_queries_20260409_214245.json +0 -0
  68. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/concurrent_queries_20260409_214245.md +0 -0
  69. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/filtered_search_comparison_20260409_222607.json +0 -0
  70. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/filtered_search_comparison_20260409_222607.md +0 -0
  71. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/filtered_vector_search_20260409_214355.json +0 -0
  72. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/filtered_vector_search_20260409_214355.md +0 -0
  73. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/filtered_vector_search_20260409_220418.json +0 -0
  74. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/filtered_vector_search_20260409_220418.md +0 -0
  75. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/filtered_vector_search_20260409_222053.json +0 -0
  76. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/filtered_vector_search_20260409_222053.md +0 -0
  77. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/filtered_vector_search_20260409_225907.json +0 -0
  78. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/filtered_vector_search_20260409_225907.md +0 -0
  79. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/full_scan_baseline_20260409_222303.json +0 -0
  80. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/full_scan_baseline_20260409_222303.md +0 -0
  81. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/high_selectivity_filter_20260409_222302.json +0 -0
  82. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/high_selectivity_filter_20260409_222302.md +0 -0
  83. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/ingestion_comparison_20260409_222516.json +0 -0
  84. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/ingestion_comparison_20260409_222516.md +0 -0
  85. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/multi_filter_vector_20260409_214428.json +0 -0
  86. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/multi_filter_vector_20260409_214428.md +0 -0
  87. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/multi_filter_vector_20260409_220450.json +0 -0
  88. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/multi_filter_vector_20260409_220450.md +0 -0
  89. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/multi_filter_vector_20260409_222131.json +0 -0
  90. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/multi_filter_vector_20260409_222131.md +0 -0
  91. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/multi_filter_vector_20260409_225938.json +0 -0
  92. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/multi_filter_vector_20260409_225938.md +0 -0
  93. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/post_vs_pre_filter_20260409_214501.json +0 -0
  94. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/post_vs_pre_filter_20260409_214501.md +0 -0
  95. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/post_vs_pre_filter_20260409_220524.json +0 -0
  96. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/post_vs_pre_filter_20260409_220524.md +0 -0
  97. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/post_vs_pre_filter_20260409_222204.json +0 -0
  98. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/post_vs_pre_filter_20260409_222204.md +0 -0
  99. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/post_vs_pre_filter_20260409_230010.json +0 -0
  100. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/post_vs_pre_filter_20260409_230010.md +0 -0
  101. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/query_comparison_20260409_222541.json +0 -0
  102. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/query_comparison_20260409_222541.md +0 -0
  103. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/range_query_20260409_222302.json +0 -0
  104. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/range_query_20260409_222302.md +0 -0
  105. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/search_filtered_high_selectivity_20260409_214144.json +0 -0
  106. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/search_filtered_high_selectivity_20260409_214144.md +0 -0
  107. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/search_unfiltered_20260409_214028.json +0 -0
  108. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/benchmark_results/search_unfiltered_20260409_214028.md +0 -0
  109. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/book.toml +0 -0
  110. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/broken_binaries_all.txt +0 -0
  111. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/broken_bins.txt +0 -0
  112. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/build-connectors.sh +0 -0
  113. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/build.rs +0 -0
  114. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/check_iceberg_compliance.py +0 -0
  115. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/compliance_output.txt +0 -0
  116. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/critical_code_review.md +0 -0
  117. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/debug_log.txt +0 -0
  118. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/demo_basics_run.txt +0 -0
  119. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/demo_basics_v2.txt +0 -0
  120. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docker-compose-minio-nessie.yml +0 -0
  121. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docker-compose.yml +0 -0
  122. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/BENCHMARKING.md +0 -0
  123. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/COMPREHENSIVE_GUIDE.md +0 -0
  124. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/CONCURRENCY.md +0 -0
  125. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/CONFIGURATION.md +0 -0
  126. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/GPU_SETUP_GUIDE.md +0 -0
  127. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/ICEBERG_V2_V3_API.md +0 -0
  128. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/INSTALLATION.md +0 -0
  129. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/PGVECTOR_SQL_GUIDE.md +0 -0
  130. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/architecture.md +0 -0
  131. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/catalog_usage.md +0 -0
  132. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/index.md +0 -0
  133. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/integrations/README.md +0 -0
  134. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/integrations/java_jni.md +0 -0
  135. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/integrations/python.md +0 -0
  136. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/integrations/spark.md +0 -0
  137. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/integrations/trino.md +0 -0
  138. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/requirements.txt +0 -0
  139. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/source/api/python.rst +0 -0
  140. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/source/api/rust.rst +0 -0
  141. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/source/conf.py +0 -0
  142. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/source/index.rst +0 -0
  143. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/docs/source/roadmap.md +0 -0
  144. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/fix_nb.py +0 -0
  145. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/proptest-regressions/core/index/gpu.txt +0 -0
  146. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/proptest-regressions/core/sql/vector_literal.txt +0 -0
  147. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/proptest-regressions/core/sql/vector_udf.txt +0 -0
  148. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/pyproject.toml +0 -0
  149. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/python/hyperstreamdb/embeddings.py +0 -0
  150. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/python_test_output.txt +0 -0
  151. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/python_test_output_v2.txt +0 -0
  152. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/python_test_output_v3.txt +0 -0
  153. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/rust_check_all_warnings.txt +0 -0
  154. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/rust_test_output.txt +0 -0
  155. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/rust_warnings.txt +0 -0
  156. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/scratch/check_os_error.rs +0 -0
  157. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/simd_test_results.txt +0 -0
  158. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/spark-hyperstream/.bloop/bloop.settings.json +0 -0
  159. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/spark-hyperstream/.bloop/spark-hyperstream-test.json +0 -0
  160. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/spark-hyperstream/.bloop/spark-hyperstream.json +0 -0
  161. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/spark-hyperstream/pom.xml +0 -0
  162. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/DefaultSource.java +0 -0
  163. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartition.java +0 -0
  164. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReader.java +0 -0
  165. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReaderFactory.java +0 -0
  166. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamScanBuilder.java +0 -0
  167. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamTable.java +0 -0
  168. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/bin/gateway.rs +0 -0
  169. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/bin/hdb.rs +0 -0
  170. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/bin/iceberg_rest.rs +0 -0
  171. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/bin/probe_datafusion.rs +0 -0
  172. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/bin/setup_test_data.rs +0 -0
  173. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/bin/verify_layered_indexing.rs +0 -0
  174. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/catalog/config.rs +0 -0
  175. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/catalog/glue.rs +0 -0
  176. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/catalog/hive.rs +0 -0
  177. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/catalog/jdbc.rs +0 -0
  178. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/catalog/mod.rs +0 -0
  179. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/catalog/nessie.rs +0 -0
  180. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/catalog/rest.rs +0 -0
  181. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/catalog/unity.rs +0 -0
  182. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/clustering.rs +0 -0
  183. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/compaction.rs +0 -0
  184. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/embeddings.rs +0 -0
  185. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/ffi.rs +0 -0
  186. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/iceberg/iceberg_delete.rs +0 -0
  187. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/iceberg.rs +0 -0
  188. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/cuda/cosine_distance.cu +0 -0
  189. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/cuda/hamming_distance.cu +0 -0
  190. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/cuda/inner_product.cu +0 -0
  191. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/cuda/jaccard_distance.cu +0 -0
  192. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/cuda/kmeans_assignment.cu +0 -0
  193. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/cuda/l1_distance.cu +0 -0
  194. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/cuda/l2_distance.cu +0 -0
  195. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/gpu.rs +0 -0
  196. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/hnsw_rs/annhdf5.rs +0 -0
  197. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/hnsw_rs/api.rs +0 -0
  198. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/hnsw_rs/dist.rs +0 -0
  199. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/hnsw_rs/flatten.rs +0 -0
  200. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/hnsw_rs/hnsw.rs +0 -0
  201. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/hnsw_rs/hnswio.rs +0 -0
  202. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/hnsw_rs/libext.rs +0 -0
  203. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/hnsw_rs/mod.rs +0 -0
  204. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/hnsw_rs/prelude.rs +0 -0
  205. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/hnsw_rs/test.rs +0 -0
  206. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/ivf.rs +0 -0
  207. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/memory.rs +0 -0
  208. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/mps/cosine_distance.metal +0 -0
  209. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/mps/hamming_distance.metal +0 -0
  210. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/mps/inner_product.metal +0 -0
  211. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/mps/jaccard_distance.metal +0 -0
  212. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/mps/kmeans_assignment.metal +0 -0
  213. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/mps/l1_distance.metal +0 -0
  214. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/mps/l2_distance.metal +0 -0
  215. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/opencl/cosine_distance.cl +0 -0
  216. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/opencl/hamming_distance.cl +0 -0
  217. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/opencl/inner_product.cl +0 -0
  218. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/opencl/jaccard_distance.cl +0 -0
  219. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/opencl/kmeans_assignment.cl +0 -0
  220. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/opencl/l1_distance.cl +0 -0
  221. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/opencl/l2_distance.cl +0 -0
  222. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/index/wgpu_kernel.wgsl +0 -0
  223. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/license.rs +0 -0
  224. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/maintenance.rs +0 -0
  225. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/merge.rs +0 -0
  226. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/metadata.rs +0 -0
  227. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/nessie.rs +0 -0
  228. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/puffin.rs +0 -0
  229. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/sql/optimizer.rs +0 -0
  230. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/sql/pgvector_rewriter.rs +0 -0
  231. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/sql/physical_plan/index_join.rs +0 -0
  232. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/sql/session.rs +0 -0
  233. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/sql/vector_literal.rs +0 -0
  234. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/sql/vector_operators.rs +0 -0
  235. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/sql/vector_udf.rs +0 -0
  236. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/core/wal.rs +0 -0
  237. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/enterprise/continuous_indexing.rs +0 -0
  238. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/enterprise/license.rs +0 -0
  239. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/enterprise/mod.rs +0 -0
  240. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/index.rs.old +0 -0
  241. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/python_distance.rs +0 -0
  242. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/python_gpu_context.rs +0 -0
  243. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/telemetry/metrics.rs +0 -0
  244. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/telemetry/mod.rs +0 -0
  245. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/src/telemetry/tracing.rs +0 -0
  246. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/all_types_index_test.rs +0 -0
  247. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/bin/generate_iceberg_manifests.rs +0 -0
  248. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/bin/verify_iceberg_read_check.rs +0 -0
  249. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/check_mmh3.py +0 -0
  250. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/data/download_nyc_taxi.sh +0 -0
  251. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/data/generate_embeddings.py +0 -0
  252. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/data/generate_wikipedia.py +0 -0
  253. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/data/start_nessie.sh +0 -0
  254. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/datafusion_rust_test.rs +0 -0
  255. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/debug_murmur3.rs +0 -0
  256. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/fuzz_murmur3.rs +0 -0
  257. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/performance/README.md +0 -0
  258. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/prototype_merge.py +0 -0
  259. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/schema_evolution_test.rs +0 -0
  260. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_catalog_commit.rs +0 -0
  261. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_compliance.rs +0 -0
  262. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_delete_correctness.rs +0 -0
  263. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_iceberg_python_delete.sh +0 -0
  264. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_iceberg_rest.sh +0 -0
  265. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_iceberg_rest_create.sh +0 -0
  266. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_iceberg_rest_delete.sh +0 -0
  267. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_iceberg_rest_remove_index.sh +0 -0
  268. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_iceberg_rest_update.sh +0 -0
  269. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_metadata_creation.rs +0 -0
  270. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_mor_writes.rs +0 -0
  271. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_partition_transforms.rs +0 -0
  272. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_partitioned_writes.rs +0 -0
  273. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_puffin_index.sh +0 -0
  274. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_rest_updates.sh +0 -0
  275. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/tests/verify_schema_compat.rs +0 -0
  276. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-config/.DS_Store +0 -0
  277. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-config/catalog/glue_catalog.properties +0 -0
  278. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-config/catalog/hyperstreamdb.properties +0 -0
  279. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-config/catalog/iceberg.properties +0 -0
  280. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-config/catalog/memory.properties +0 -0
  281. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-config/catalog/postgres.properties +0 -0
  282. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-config/config.properties +0 -0
  283. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-config/entrypoint.sh +0 -0
  284. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-config/jvm.config +0 -0
  285. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-config/node.properties +0 -0
  286. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-config.zip +0 -0
  287. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-hyperstream/pom.xml +0 -0
  288. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBColumnHandle.java +0 -0
  289. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBConnectorFactory.java +0 -0
  290. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBMetadata.java +0 -0
  291. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSource.java +0 -0
  292. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSourceProvider.java +0 -0
  293. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPlugin.java +0 -0
  294. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplit.java +0 -0
  295. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplitManager.java +0 -0
  296. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBTableHandle.java +0 -0
  297. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/update_schema_patch.py +0 -0
  298. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/update_schema_patch2.py +0 -0
  299. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/verify_docstrings.py +0 -0
  300. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/verify_fluent_api.py +0 -0
  301. {hyperstreamdb-0.2.6 → hyperstreamdb-0.3.1}/verify_unified_ingest.py +0 -0
@@ -0,0 +1,6 @@
1
+ {
2
+ "cells": [],
3
+ "metadata": {},
4
+ "nbformat": 4,
5
+ "nbformat_minor": 5
6
+ }
@@ -3375,7 +3375,7 @@ dependencies = [
3375
3375
 
3376
3376
  [[package]]
3377
3377
  name = "hyperstreamdb"
3378
- version = "0.2.6"
3378
+ version = "0.3.1"
3379
3379
  dependencies = [
3380
3380
  "ahash 0.8.12",
3381
3381
  "anyhow",
@@ -3397,6 +3397,7 @@ dependencies = [
3397
3397
  "cpu-time",
3398
3398
  "criterion",
3399
3399
  "cudarc",
3400
+ "dashmap",
3400
3401
  "datafusion",
3401
3402
  "datafusion-expr-common",
3402
3403
  "datafusion-functions",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "hyperstreamdb"
3
- version = "0.2.6"
3
+ version = "0.3.1"
4
4
  edition = "2021"
5
5
  license = "MIT AND Apache-2.0"
6
6
  description = "HyperStreamDB - Serverless Index-Streaming Database with Overlay Indexing and Vector Search"
@@ -84,6 +84,7 @@ pilota = "0.11"
84
84
  # Hardware Acceleration (moved to bottom of file)
85
85
  # indexing
86
86
  roaring = "0.10.2" # For scalar bitmaps
87
+ dashmap = "6.0" # Concurrent HashMaps
87
88
  # hnsw_rs removed and internalized to src/core/index/hnsw_rs
88
89
  bincode = "1.3"
89
90
  cpu-time = "1.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hyperstreamdb
3
- Version: 0.2.6
3
+ Version: 0.3.1
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: Programming Language :: Rust
@@ -62,12 +62,8 @@ A production-ready indexed data lake format that combines the transactional guar
62
62
  | **Time Travel** | ✅ Yes | ✅ Yes |
63
63
  | **Scalar Indexes** | ❌ No | ✅ RoaringBitmap |
64
64
  | **Boolean Indexes** | ❌ No | ✅ Native Boolean |
65
- | **Vector Search** | ❌ No | ✅ HNSW |
66
- | **pgvector SQL** | ❌ No | ✅ Full Compatibility |
67
- | **GPU Acceleration** | ❌ No | ✅ CUDA/ROCm/XPU/Metal |
68
- | **Torch Alignment** | ❌ No | ✅ ROCm-as-CUDA |
69
- | **Python Vector API** | ❌ No | ✅ NumPy-compatible |
70
- | **Fluent Query API** | ❌ No | ✅ Method Chaining |
65
+ | **TurboQuant** | ❌ No | ✅ TQ8 & TQ4 (8-bit/4-bit) |
66
+ | **Fluent Indexing API** | ❌ No | ✅ Method Chaining |
71
67
  | **Hybrid Queries** | ❌ No | ✅ Scalar + Vector |
72
68
  | **Native SQL** | ❌ No | ✅ DataFusion |
73
69
  | **Index-Optimized Joins** | ❌ No | ✅ Index Nested Loop |
@@ -204,10 +200,12 @@ table = hdb.Table("s3://bucket/my-table")
204
200
  import pandas as pd
205
201
  df = pd.DataFrame({
206
202
  "id": [1, 2, 3],
207
- "text": ["hello", "world", "test"],
208
203
  "embedding": [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]
209
204
  })
210
- table.write_pandas(df)
205
+ table.insert(df) # Convenient alias for write_pandas
206
+
207
+ # Create high-performance vector index (TQ8 - 4x compression)
208
+ table.add_index("embedding", "hnsw_tq8")
211
209
 
212
210
  # Query with filters (uses indexes!) - Fluent API
213
211
  results = table.query().filter("id > 1").execute()
@@ -300,6 +298,30 @@ async fn main() -> anyhow::Result<()> {
300
298
  - **Performance**: Same underlying optimized execution as traditional APIs
301
299
  - **Interoperable**: Mix with SQL queries and traditional `to_pandas()` calls
302
300
  - **GPU Acceleration**: Automatic GPU context propagation for vector operations
301
+ - **TurboQuant Optimized**: Seamless integration with 8-bit/4-bit quantization
302
+
303
+ ### TurboQuant Quantization (TQ8 / TQ4)
304
+
305
+ HyperStreamDB features **TurboQuant**, an optimized quantization engine that reduces vector storage costs while maintaining high search accuracy:
306
+
307
+ - **TQ8 (8-bit)**: 4x compression vs. float32. Near-lossless accuracy (typically >99% recall retention). Ideal for general-purpose RAG.
308
+ - **TQ4 (4-bit)**: 8x compression vs. float32. Maximum efficiency for massive datasets where storage cost is the primary bottleneck.
309
+
310
+ ```python
311
+ # Use enterprise defaults (HNSW-TQ8)
312
+ table.add_index("embedding", "hnsw_tq8")
313
+
314
+ # High-compression mode
315
+ table.add_index("embedding", "hnsw_tq4")
316
+
317
+ # Custom HNSW-PQ configuration
318
+ table.add_index("embedding", {
319
+ "type": "hnsw_pq",
320
+ "complexity": 32,
321
+ "quality": 300,
322
+ "compression": 32 # PQ subspaces
323
+ })
324
+ ```
303
325
 
304
326
  ### Python Vector Distance API with GPU Acceleration
305
327
 
@@ -17,12 +17,8 @@ A production-ready indexed data lake format that combines the transactional guar
17
17
  | **Time Travel** | ✅ Yes | ✅ Yes |
18
18
  | **Scalar Indexes** | ❌ No | ✅ RoaringBitmap |
19
19
  | **Boolean Indexes** | ❌ No | ✅ Native Boolean |
20
- | **Vector Search** | ❌ No | ✅ HNSW |
21
- | **pgvector SQL** | ❌ No | ✅ Full Compatibility |
22
- | **GPU Acceleration** | ❌ No | ✅ CUDA/ROCm/XPU/Metal |
23
- | **Torch Alignment** | ❌ No | ✅ ROCm-as-CUDA |
24
- | **Python Vector API** | ❌ No | ✅ NumPy-compatible |
25
- | **Fluent Query API** | ❌ No | ✅ Method Chaining |
20
+ | **TurboQuant** | ❌ No | ✅ TQ8 & TQ4 (8-bit/4-bit) |
21
+ | **Fluent Indexing API** | ❌ No | ✅ Method Chaining |
26
22
  | **Hybrid Queries** | ❌ No | ✅ Scalar + Vector |
27
23
  | **Native SQL** | ❌ No | ✅ DataFusion |
28
24
  | **Index-Optimized Joins** | ❌ No | ✅ Index Nested Loop |
@@ -159,10 +155,12 @@ table = hdb.Table("s3://bucket/my-table")
159
155
  import pandas as pd
160
156
  df = pd.DataFrame({
161
157
  "id": [1, 2, 3],
162
- "text": ["hello", "world", "test"],
163
158
  "embedding": [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]
164
159
  })
165
- table.write_pandas(df)
160
+ table.insert(df) # Convenient alias for write_pandas
161
+
162
+ # Create high-performance vector index (TQ8 - 4x compression)
163
+ table.add_index("embedding", "hnsw_tq8")
166
164
 
167
165
  # Query with filters (uses indexes!) - Fluent API
168
166
  results = table.query().filter("id > 1").execute()
@@ -255,6 +253,30 @@ async fn main() -> anyhow::Result<()> {
255
253
  - **Performance**: Same underlying optimized execution as traditional APIs
256
254
  - **Interoperable**: Mix with SQL queries and traditional `to_pandas()` calls
257
255
  - **GPU Acceleration**: Automatic GPU context propagation for vector operations
256
+ - **TurboQuant Optimized**: Seamless integration with 8-bit/4-bit quantization
257
+
258
+ ### TurboQuant Quantization (TQ8 / TQ4)
259
+
260
+ HyperStreamDB features **TurboQuant**, an optimized quantization engine that reduces vector storage costs while maintaining high search accuracy:
261
+
262
+ - **TQ8 (8-bit)**: 4x compression vs. float32. Near-lossless accuracy (typically >99% recall retention). Ideal for general-purpose RAG.
263
+ - **TQ4 (4-bit)**: 8x compression vs. float32. Maximum efficiency for massive datasets where storage cost is the primary bottleneck.
264
+
265
+ ```python
266
+ # Use enterprise defaults (HNSW-TQ8)
267
+ table.add_index("embedding", "hnsw_tq8")
268
+
269
+ # High-compression mode
270
+ table.add_index("embedding", "hnsw_tq4")
271
+
272
+ # Custom HNSW-PQ configuration
273
+ table.add_index("embedding", {
274
+ "type": "hnsw_pq",
275
+ "complexity": 32,
276
+ "quality": 300,
277
+ "compression": 32 # PQ subspaces
278
+ })
279
+ ```
258
280
 
259
281
  ### Python Vector Distance API with GPU Acceleration
260
282
 
@@ -0,0 +1,33 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "c2ee4517-3acc-4aaf-aace-60dfb489e44d",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": []
10
+ }
11
+ ],
12
+ "metadata": {
13
+ "kernelspec": {
14
+ "display_name": "Python 3 (ipykernel)",
15
+ "language": "python",
16
+ "name": "python3"
17
+ },
18
+ "language_info": {
19
+ "codemirror_mode": {
20
+ "name": "ipython",
21
+ "version": 3
22
+ },
23
+ "file_extension": ".py",
24
+ "mimetype": "text/x-python",
25
+ "name": "python",
26
+ "nbconvert_exporter": "python",
27
+ "pygments_lexer": "ipython3",
28
+ "version": "3.12.3"
29
+ }
30
+ },
31
+ "nbformat": 4,
32
+ "nbformat_minor": 5
33
+ }
@@ -42,7 +42,7 @@ fn bench_query_indexed(c: &mut Criterion) {
42
42
  .with_columns_to_index(vec!["id".to_string()]);
43
43
  let writer = HybridSegmentWriter::new(writer_config);
44
44
  writer.write_batch(&batch).unwrap();
45
- writer.build_indexes(&batch).unwrap();
45
+ writer.build_indexes(&batch, 0).unwrap();
46
46
  let entry = writer.to_manifest_entry();
47
47
 
48
48
  // For Reader: Use relative path logic since store is rooted at tmp_dir
@@ -81,7 +81,7 @@ fn bench_vector_search(c: &mut Criterion) {
81
81
 
82
82
  let writer = HybridSegmentWriter::new(config);
83
83
  writer.write_batch(&batch).unwrap();
84
- writer.build_indexes(&batch).unwrap();
84
+ writer.build_indexes(&batch, 0).unwrap();
85
85
 
86
86
  // Generate a random query vector
87
87
  let mut rng = rand::thread_rng();
@@ -128,7 +128,7 @@ fn bench_hybrid_search(c: &mut Criterion) {
128
128
 
129
129
  let writer = HybridSegmentWriter::new(config);
130
130
  writer.write_batch(&batch).unwrap();
131
- writer.build_indexes(&batch).unwrap();
131
+ writer.build_indexes(&batch, 0).unwrap();
132
132
 
133
133
  let mut rng = rand::thread_rng();
134
134
  let query_vec: Vec<f32> = (0..vec_dim).map(|_| rng.gen()).collect();
@@ -192,7 +192,7 @@ fn bench_high_selectivity(c: &mut Criterion) {
192
192
  .with_columns_to_index(vec!["id".to_string()]);
193
193
  let writer = HybridSegmentWriter::new(config);
194
194
  writer.write_batch(&batch).unwrap();
195
- writer.build_indexes(&batch).unwrap();
195
+ writer.build_indexes(&batch, 0).unwrap();
196
196
  let entry = writer.to_manifest_entry();
197
197
 
198
198
  // For Reader: Use relative path logic
@@ -0,0 +1,14 @@
1
+ [
2
+ {
3
+ "name": "multi_filter_vector",
4
+ "throughput": null,
5
+ "latency_p50_ms": 19.483089447021484,
6
+ "latency_p95_ms": 22.400259971618652,
7
+ "latency_p99_ms": 22.65956401824951,
8
+ "latency_mean_ms": 20.386934280395508,
9
+ "latency_min_ms": 18.953323364257812,
10
+ "latency_max_ms": 22.724390029907227,
11
+ "elapsed_sec": 0.06162667274475098,
12
+ "memory_delta_mb": 0.21875
13
+ }
14
+ ]
@@ -0,0 +1,7 @@
1
+ ## Benchmark Results
2
+
3
+ **Hardware & OS:** Linux 6.8.0-106-generic | x86_64 (8 threads) | 63 GB RAM
4
+
5
+ | name | throughput | latency_p50_ms | latency_p95_ms | latency_p99_ms | latency_mean_ms | latency_min_ms | latency_max_ms | elapsed_sec | memory_delta_mb |
6
+ | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
7
+ | multi_filter_vector | None | 19.48 | 22.40 | 22.66 | 20.39 | 18.95 | 22.72 | 0.06 | 0.22 |
File without changes
@@ -4,6 +4,9 @@
4
4
 
5
5
  HyperStreamDB provides a comprehensive Python API for vector distance computations with GPU acceleration support across multiple hardware backends. This API allows you to compute distances between vectors directly from Python without writing SQL queries, with optional GPU acceleration for high-performance batch operations.
6
6
 
7
+ > [!NOTE]
8
+ > This guide covers **standalone** distance functions for CPU/GPU. For persistent vector storage and search with TurboQuant indexing (TQ8/TQ4), please see [Vector Configuration](VECTOR_CONFIGURATION.md).
9
+
7
10
  ## Supported Distance Metrics
8
11
 
9
12
  The API supports six distance metrics:
@@ -234,32 +234,37 @@ ORDER BY distance
234
234
  LIMIT 10;
235
235
  ```
236
236
 
237
- ### Index Build Parameters
238
-
239
- Index build parameters are set during index creation:
237
+ Index build parameters are set during index creation using the fluent `add_index` method:
240
238
 
241
239
  ```python
242
240
  import hyperstreamdb as hdb
243
241
 
244
242
  table = hdb.Table("s3://bucket/my-table")
245
243
 
246
- # Create HNSW index with custom parameters
247
- table.create_index(
244
+ # TurboQuant 8-bit quantization (Recommended Default)
245
+ # 4x compression, near-lossless accuracy
246
+ table.add_index("embedding", "hnsw_tq8")
247
+
248
+ # TurboQuant 4-bit quantization
249
+ # 8x compression, maximum efficiency
250
+ table.add_index("embedding", "hnsw_tq4")
251
+
252
+ # Custom HNSW parameters
253
+ table.add_index(
248
254
  column="embedding",
249
- index_type="hnsw",
250
- params={
251
- "m": 16, # Number of connections per layer
252
- "ef_construction": 200, # Build-time beam width
255
+ index_config={
256
+ "type": "hnsw",
257
+ "complexity": 16, # Max connections per node (formerly 'm')
258
+ "quality": 200, # Construction beam width (formerly 'ef_construction')
253
259
  }
254
260
  )
255
261
 
256
- # Create IVF index with custom parameters
257
- table.create_index(
262
+ # Product Quantization (PQ)
263
+ table.add_index(
258
264
  column="embedding",
259
- index_type="ivf",
260
- params={
261
- "n_lists": 1000, # Number of clusters
262
- "n_probes": 10, # Default search probes
265
+ index_config={
266
+ "type": "hnsw_pq",
267
+ "compression": 32 # PQ subspaces (formerly 'subspaces')
263
268
  }
264
269
  )
265
270
  ```
@@ -224,13 +224,22 @@ See [GPU Setup Guide](GPU_SETUP_GUIDE.md) for installation and configuration.
224
224
  ```python
225
225
  import hyperstreamdb as hdb
226
226
 
227
- # Configure HNSW index parameters
228
227
  table = hdb.Table("s3://bucket/table")
229
- table.create_vector_index(
228
+
229
+ # TurboQuant 8-bit (4x compression)
230
+ table.add_index("embedding", "hnsw_tq8")
231
+
232
+ # TurboQuant 4-bit (8x compression)
233
+ table.add_index("embedding", "hnsw_tq4")
234
+
235
+ # Custom HNSW configuration
236
+ table.add_index(
230
237
  column="embedding",
231
- metric="cosine",
232
- m=16, # Number of connections per layer
233
- ef_construction=200 # Size of dynamic candidate list
238
+ index_config={
239
+ "type": "hnsw",
240
+ "complexity": 16, # connections per node
241
+ "quality": 200 # construction search width
242
+ }
234
243
  )
235
244
  ```
236
245
 
@@ -96,12 +96,20 @@ except ImportError:
96
96
  from typing import List, Optional, Union, Dict, Any
97
97
  import os
98
98
 
99
+ class IndexType:
100
+ """
101
+ HyperStreamDB Indexing Algorithms.
102
+ """
103
+ HNSW = "hnsw"
104
+ BM25 = "bm25"
105
+ BLOOM = "bloom"
106
+ BITMAP = "bitmap"
107
+ INVERTED = "inverted"
108
+
99
109
  def _resolve_uri(uri: str) -> str:
100
- """Resolve a URI to an absolute path if it's a local relative path."""
101
- if "://" not in uri and not uri.startswith("/"):
102
- return os.path.abspath(uri)
110
+ if not uri.startswith(("s3://", "file://", "az://", "gs://", "http://", "https://")):
111
+ return f"file://{os.abspath(uri)}" if hasattr(os, "abspath") else uri
103
112
  return uri
104
-
105
113
  class Query:
106
114
  """
107
115
  Fluent Query interface for HyperStreamDB.
@@ -251,6 +259,10 @@ class Table:
251
259
  pass
252
260
  raise TypeError(f"Unsupported data type for write: {type(data)}")
253
261
 
262
+ def insert(self, data: Any, device: Optional[Any] = None):
263
+ """Alias for write() for compatibility with common vector DB APIs."""
264
+ return self.write(data, device=device)
265
+
254
266
  def write_pandas(self, df: pd.DataFrame, device: Optional[Any] = None):
255
267
  """High-level Pandas ingestion with auto-vectorization."""
256
268
  return self._write_pandas(df, device=device)
@@ -583,24 +595,39 @@ class Table:
583
595
  def set_index_config(self, column: str, enabled: bool = True, tokenizer: Optional[str] = None, device: Optional[str] = None):
584
596
  """
585
597
  Set indexing configuration for a specific column.
598
+ (Legacy compatibility wrapper)
599
+ """
600
+ if not enabled:
601
+ return self.drop_index(column)
602
+
603
+ config = {"type": "hnsw"}
604
+ if tokenizer: config["tokenizer"] = tokenizer
605
+ if device: config["build_device"] = device
606
+ return self.add_index(column, config)
607
+
608
+ def set_index_columns(self, config: Dict[str, Union[str, List[Union[str, Dict[str, Any]]], Dict[str, Any]]]):
609
+ """
610
+ Update indexing specifications for multiple columns at once.
611
+ Supports both simple strings and advanced configuration dictionaries.
586
612
 
587
- Args:
588
- column: Name of the column to configure.
589
- enabled: Whether to enable indexing for this column (default: True).
590
- tokenizer: Tokenizer name from the registry ('identity', 'whitespace', 'standard').
591
- device: Compute device ('cpu', 'cuda', 'mps') if specific processing is needed.
613
+ Example:
614
+ table.set_index_columns({
615
+ "embedding": IndexType.HNSW,
616
+ "content": ["hnsw", "bm25"],
617
+ "category": "bitmap"
618
+ })
592
619
  """
593
- self._inner.set_index_config(column, enabled, tokenizer, device)
620
+ return self._inner.set_index_columns(config)
594
621
 
595
- def add_index(self, column: str, tokenizer: Optional[str] = None, device: Optional[str] = None):
622
+ def add_index(self, column: str, algorithm: Union[str, Dict[str, Any]] = "hnsw"):
596
623
  """
597
- Add an index to a column (convenience wrapper).
624
+ Add an indexing strategy to a column.
598
625
  """
599
- return self._inner.add_index(column, tokenizer, device)
626
+ return self._inner.add_index(column, algorithm)
600
627
 
601
628
  def drop_index(self, column: str):
602
629
  """
603
- Drop an index from a column (convenience wrapper).
630
+ Remove all indexing strategies from a column.
604
631
  """
605
632
  return self._inner.drop_index(column)
606
633
 
@@ -0,0 +1,19 @@
1
+ import re
2
+ import os
3
+
4
+ with open("src/core/table/mod.rs", "r") as f:
5
+ mod_lines = f.readlines()
6
+
7
+ def extract_between(start_str, end_str):
8
+ start_idx = -1
9
+ end_idx = -1
10
+ for i, line in enumerate(mod_lines):
11
+ if start_str in line and start_idx == -1:
12
+ # check if it's the right indentation (4 spaces)
13
+ if line.startswith(" pub ") or line.startswith(" async fn ") or line.startswith(" fn "):
14
+ start_idx = i
15
+ if end_str in line and start_idx != -1:
16
+ # find the end of that block
17
+ pass # this is hard in python.
18
+
19
+ # Let's just use rustc --pretty !!
@@ -11,6 +11,7 @@ use crate::core::index::hnsw_rs::hnsw::Hnsw;
11
11
  use crate::core::index::hnsw_rs::dist::DistL2;
12
12
  use arrow::record_batch::RecordBatch;
13
13
  use parquet::file::metadata::ParquetMetaData;
14
+ use parquet::bloom_filter::Sbbf;
14
15
  use std::path::PathBuf;
15
16
  use object_store::ObjectStore;
16
17
  use anyhow::Result;
@@ -175,6 +176,13 @@ pub static PARQUET_META_CACHE: Lazy<Cache<String, (Arc<ParquetMetaData>, usize)>
175
176
  .build()
176
177
  });
177
178
 
179
+ pub static BLOOM_FILTER_CACHE: Lazy<Cache<String, Arc<Sbbf>>> = Lazy::new(|| {
180
+ Cache::builder()
181
+ .max_capacity(2048) // Roughly 250MB if each is 128KB
182
+ .time_to_idle(Duration::from_secs(60 * 30))
183
+ .build()
184
+ });
185
+
178
186
  /// Doris-inspired Block Cache for decoded RecordBatches.
179
187
  /// Bypasses Parquet decoding/decompression for frequently accessed blocks.
180
188
  pub static BLOCK_CACHE: Lazy<Cache<String, Arc<RecordBatch>>> = Lazy::new(|| {
@@ -274,3 +274,91 @@ pub fn sparse_l2_distance_squared(
274
274
 
275
275
  sum
276
276
  }
277
+
278
+ /// Optimized L2 distance for quantized u8 vectors
279
+ #[inline(always)]
280
+ pub fn l2_distance_u8(a: &[u8], b: &[u8]) -> f32 {
281
+ let mut sum = 0;
282
+ for (&x, &y) in a.iter().zip(b.iter()) {
283
+ let diff = (x as i32) - (y as i32);
284
+ sum += diff * diff;
285
+ }
286
+ sum as f32
287
+ }
288
+
289
+ /// Asymmetric Distance Calculation (ADC) for quantized vectors.
290
+ /// Calculates L2 distance between a float32 query and a quantized u8 vector.
291
+ #[inline(always)]
292
+ pub fn l2_distance_adc(query: &[f32], encoded: &[u8], offset: f32, scale: f32) -> f32 {
293
+ let mut sum = 0.0;
294
+ let inv_scale = 1.0 / scale;
295
+
296
+ // Unrolled for performance
297
+ let chunks_q = query.chunks_exact(8);
298
+ let chunks_e = encoded.chunks_exact(8);
299
+ let rem_q = chunks_q.remainder();
300
+ let rem_e = chunks_e.remainder();
301
+
302
+ for (q_chunk, e_chunk) in chunks_q.zip(chunks_e) {
303
+ for i in 0..8 {
304
+ let decoded = (e_chunk[i] as f32 * inv_scale) + offset;
305
+ let diff = q_chunk[i] - decoded;
306
+ sum += diff * diff;
307
+ }
308
+ }
309
+
310
+ for (q, e) in rem_q.iter().zip(rem_e.iter()) {
311
+ let decoded = (*e as f32 * inv_scale) + offset;
312
+ let diff = *q - decoded;
313
+ sum += diff * diff;
314
+ }
315
+ sum
316
+ }
317
+
318
+ /// Optimized L2 distance for packed 4-bit quantized vectors (u4)
319
+ #[inline(always)]
320
+ pub fn l2_distance_u4(a: &[u8], b: &[u8]) -> f32 {
321
+ let mut sum = 0;
322
+ for (&x, &y) in a.iter().zip(b.iter()) {
323
+ // Low nibbles
324
+ let diff_low = ((x & 0x0F) as i32) - ((y & 0x0F) as i32);
325
+ sum += diff_low * diff_low;
326
+
327
+ // High nibbles
328
+ let diff_high = (((x >> 4) & 0x0F) as i32) - (((y >> 4) & 0x0F) as i32);
329
+ sum += diff_high * diff_high;
330
+ }
331
+ sum as f32
332
+ }
333
+
334
+ #[derive(Debug, Clone, Copy)]
335
+ pub struct DistL2u8;
336
+
337
+ impl DistL2u8 {
338
+ #[inline(always)]
339
+ pub fn distance(&self, a: &[u8], b: &[u8]) -> f32 {
340
+ l2_distance_u8(a, b)
341
+ }
342
+ }
343
+
344
+ impl super::hnsw_rs::dist::Distance<u8> for DistL2u8 {
345
+ fn eval(&self, va: &[u8], vb: &[u8]) -> f32 {
346
+ self.distance(va, vb)
347
+ }
348
+ }
349
+
350
+ #[derive(Debug, Clone, Copy)]
351
+ pub struct DistL2u4;
352
+
353
+ impl DistL2u4 {
354
+ #[inline(always)]
355
+ pub fn distance(&self, a: &[u8], b: &[u8]) -> f32 {
356
+ l2_distance_u4(a, b)
357
+ }
358
+ }
359
+
360
+ impl super::hnsw_rs::dist::Distance<u8> for DistL2u4 {
361
+ fn eval(&self, va: &[u8], vb: &[u8]) -> f32 {
362
+ self.distance(va, vb)
363
+ }
364
+ }