hyperstreamdb 0.2.5__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hyperstreamdb-0.3.0/.ipynb_checkpoints/Untitled-checkpoint.ipynb +6 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/Cargo.lock +2 -1
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/Cargo.toml +2 -1
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/PKG-INFO +31 -9
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/README.md +30 -8
- hyperstreamdb-0.3.0/Untitled.ipynb +33 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benches/performance.rs +4 -4
- hyperstreamdb-0.3.0/benchmark_results/multi_filter_vector_20260409_231713.json +14 -0
- hyperstreamdb-0.3.0/benchmark_results/multi_filter_vector_20260409_231713.md +7 -0
- hyperstreamdb-0.3.0/docs/.nojekyll +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/PYTHON_VECTOR_API.md +3 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/VECTOR_CONFIGURATION.md +20 -15
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/api_reference.md +14 -5
- hyperstreamdb-0.3.0/docs/source/_static/HyperStreamDB.png +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/python/hyperstreamdb/__init__.py +41 -14
- hyperstreamdb-0.3.0/split_table.py +19 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/cache.rs +24 -1
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/distance.rs +88 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/gpu.rs +19 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/hnsw_ivf.rs +197 -87
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/hnsw_rs/flatten.rs +8 -11
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/hnsw_rs/hnswio.rs +24 -39
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/memory.rs +41 -3
- hyperstreamdb-0.3.0/src/core/index/mod.rs +134 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/pq.rs +42 -2
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/tokenizer.rs +1 -0
- hyperstreamdb-0.3.0/src/core/index/turboquant.rs +192 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/manifest.rs +429 -117
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/mod.rs +1 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/planner.rs +38 -1
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/query.rs +17 -3
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/reader.rs +367 -28
- hyperstreamdb-0.3.0/src/core/search/mod.rs +87 -0
- hyperstreamdb-0.3.0/src/core/search/rrf.rs +66 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/segment.rs +185 -88
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/sql/mod.rs +44 -4
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/sql/physical_plan.rs +19 -4
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/storage.rs +1 -1
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/table/builder.rs +24 -8
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/table/fluent.rs +13 -1
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/table/mod.rs +412 -104
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/table/read.rs +232 -15
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/table/schema.rs +1 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/table/write.rs +157 -24
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/lib.rs +7 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/python_binding.rs +300 -183
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/telemetry/tracing.rs +17 -4
- hyperstreamdb-0.3.0/task.md +33 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/integration_test_hnsw_ivf_native.rs +2 -2
- hyperstreamdb-0.3.0/tests/verify_all_algos.py +83 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_mor_reads.rs +2 -2
- hyperstreamdb-0.2.5/src/core/index/mod.rs +0 -54
- hyperstreamdb-0.2.5/task.md +0 -112
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/.gitattributes +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/.gitignore +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/.hypothesis/constants/32b327793848e7d8 +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/.hypothesis/constants/67b0a8ccf18bf5d2 +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/.hypothesis/constants/84828557b4ee7be4 +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/.instructions.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/CNAME +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/DORIS_OPTIMIZATION_PATTERNS.md +0 -0
- {hyperstreamdb-0.2.5/docs/source/_static → hyperstreamdb-0.3.0}/HyperStreamDB.png +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/LICENSE-APACHE +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/LICENSE-MIT +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/RUN_COMPLIANCE_TESTS.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/STEERING.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/THIRDPARTY_NOTICES.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benches/bench_table.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/BENCHMARK_REPORT.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/benchmark_charts.png +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/benchmark_results.csv +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/concurrent_queries_20260409_214245.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/concurrent_queries_20260409_214245.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/filtered_search_comparison_20260409_222607.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/filtered_search_comparison_20260409_222607.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/filtered_vector_search_20260409_214355.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/filtered_vector_search_20260409_214355.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/filtered_vector_search_20260409_220418.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/filtered_vector_search_20260409_220418.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/filtered_vector_search_20260409_222053.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/filtered_vector_search_20260409_222053.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/filtered_vector_search_20260409_225907.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/filtered_vector_search_20260409_225907.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/full_scan_baseline_20260409_222303.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/full_scan_baseline_20260409_222303.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/high_selectivity_filter_20260409_222302.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/high_selectivity_filter_20260409_222302.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/ingestion_comparison_20260409_222516.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/ingestion_comparison_20260409_222516.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/multi_filter_vector_20260409_214428.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/multi_filter_vector_20260409_214428.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/multi_filter_vector_20260409_220450.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/multi_filter_vector_20260409_220450.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/multi_filter_vector_20260409_222131.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/multi_filter_vector_20260409_222131.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/multi_filter_vector_20260409_225938.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/multi_filter_vector_20260409_225938.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/post_vs_pre_filter_20260409_214501.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/post_vs_pre_filter_20260409_214501.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/post_vs_pre_filter_20260409_220524.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/post_vs_pre_filter_20260409_220524.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/post_vs_pre_filter_20260409_222204.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/post_vs_pre_filter_20260409_222204.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/post_vs_pre_filter_20260409_230010.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/post_vs_pre_filter_20260409_230010.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/query_comparison_20260409_222541.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/query_comparison_20260409_222541.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/range_query_20260409_222302.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/range_query_20260409_222302.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/search_filtered_high_selectivity_20260409_214144.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/search_filtered_high_selectivity_20260409_214144.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/search_unfiltered_20260409_214028.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/benchmark_results/search_unfiltered_20260409_214028.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/book.toml +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/broken_binaries_all.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/broken_bins.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/build-connectors.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/build.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/check_iceberg_compliance.py +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/compliance_output.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/critical_code_review.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/debug_log.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/demo_basics_run.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/demo_basics_v2.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docker-compose-minio-nessie.yml +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docker-compose.yml +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/BENCHMARKING.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/COMPREHENSIVE_GUIDE.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/CONCURRENCY.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/CONFIGURATION.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/GPU_SETUP_GUIDE.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/ICEBERG_V2_V3_API.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/INSTALLATION.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/PGVECTOR_SQL_GUIDE.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/architecture.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/catalog_usage.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/index.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/integrations/README.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/integrations/java_jni.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/integrations/python.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/integrations/spark.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/integrations/trino.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/requirements.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/source/api/python.rst +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/source/api/rust.rst +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/source/conf.py +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/source/index.rst +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/docs/source/roadmap.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/fix_nb.py +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/proptest-regressions/core/index/gpu.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/proptest-regressions/core/sql/vector_literal.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/proptest-regressions/core/sql/vector_udf.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/pyproject.toml +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/python/hyperstreamdb/embeddings.py +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/python_test_output.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/python_test_output_v2.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/python_test_output_v3.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/rust_check_all_warnings.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/rust_test_output.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/rust_warnings.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/scratch/check_os_error.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/simd_test_results.txt +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/spark-hyperstream/.bloop/bloop.settings.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/spark-hyperstream/.bloop/spark-hyperstream-test.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/spark-hyperstream/.bloop/spark-hyperstream.json +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/spark-hyperstream/pom.xml +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/DefaultSource.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartition.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReader.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReaderFactory.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamScanBuilder.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamTable.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/bin/gateway.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/bin/hdb.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/bin/iceberg_rest.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/bin/probe_datafusion.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/bin/setup_test_data.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/bin/verify_layered_indexing.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/catalog/config.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/catalog/glue.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/catalog/hive.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/catalog/jdbc.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/catalog/mod.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/catalog/nessie.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/catalog/rest.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/catalog/unity.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/clustering.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/compaction.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/embeddings.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/ffi.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/iceberg/iceberg_delete.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/iceberg.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/cuda/cosine_distance.cu +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/cuda/hamming_distance.cu +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/cuda/inner_product.cu +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/cuda/jaccard_distance.cu +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/cuda/kmeans_assignment.cu +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/cuda/l1_distance.cu +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/cuda/l2_distance.cu +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/hnsw_rs/annhdf5.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/hnsw_rs/api.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/hnsw_rs/dist.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/hnsw_rs/hnsw.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/hnsw_rs/libext.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/hnsw_rs/mod.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/hnsw_rs/prelude.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/hnsw_rs/test.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/ivf.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/mps/cosine_distance.metal +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/mps/hamming_distance.metal +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/mps/inner_product.metal +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/mps/jaccard_distance.metal +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/mps/kmeans_assignment.metal +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/mps/l1_distance.metal +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/mps/l2_distance.metal +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/opencl/cosine_distance.cl +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/opencl/hamming_distance.cl +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/opencl/inner_product.cl +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/opencl/jaccard_distance.cl +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/opencl/kmeans_assignment.cl +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/opencl/l1_distance.cl +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/opencl/l2_distance.cl +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/index/wgpu_kernel.wgsl +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/license.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/maintenance.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/merge.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/metadata.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/nessie.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/puffin.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/sql/optimizer.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/sql/pgvector_rewriter.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/sql/physical_plan/index_join.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/sql/session.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/sql/vector_literal.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/sql/vector_operators.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/sql/vector_udf.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/core/wal.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/enterprise/continuous_indexing.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/enterprise/license.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/enterprise/mod.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/index.rs.old +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/python_distance.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/python_gpu_context.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/telemetry/metrics.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/src/telemetry/mod.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/all_types_index_test.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/bin/generate_iceberg_manifests.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/bin/verify_iceberg_read_check.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/check_mmh3.py +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/data/download_nyc_taxi.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/data/generate_embeddings.py +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/data/generate_wikipedia.py +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/data/start_nessie.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/datafusion_rust_test.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/debug_murmur3.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/fuzz_murmur3.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/performance/README.md +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/prototype_merge.py +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/schema_evolution_test.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_catalog_commit.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_compliance.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_delete_correctness.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_iceberg_python_delete.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_iceberg_rest.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_iceberg_rest_create.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_iceberg_rest_delete.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_iceberg_rest_remove_index.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_iceberg_rest_update.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_metadata_creation.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_mor_writes.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_partition_transforms.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_partitioned_writes.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_puffin_index.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_rest_updates.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/tests/verify_schema_compat.rs +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-config/.DS_Store +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-config/catalog/glue_catalog.properties +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-config/catalog/hyperstreamdb.properties +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-config/catalog/iceberg.properties +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-config/catalog/memory.properties +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-config/catalog/postgres.properties +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-config/config.properties +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-config/entrypoint.sh +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-config/jvm.config +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-config/node.properties +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-config.zip +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-hyperstream/pom.xml +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBColumnHandle.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBConnectorFactory.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBMetadata.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSource.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSourceProvider.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPlugin.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplit.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplitManager.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBTableHandle.java +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/update_schema_patch.py +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/update_schema_patch2.py +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/verify_docstrings.py +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/verify_fluent_api.py +0 -0
- {hyperstreamdb-0.2.5 → hyperstreamdb-0.3.0}/verify_unified_ingest.py +0 -0
|
@@ -3375,7 +3375,7 @@ dependencies = [
|
|
|
3375
3375
|
|
|
3376
3376
|
[[package]]
|
|
3377
3377
|
name = "hyperstreamdb"
|
|
3378
|
-
version = "0.
|
|
3378
|
+
version = "0.3.0"
|
|
3379
3379
|
dependencies = [
|
|
3380
3380
|
"ahash 0.8.12",
|
|
3381
3381
|
"anyhow",
|
|
@@ -3397,6 +3397,7 @@ dependencies = [
|
|
|
3397
3397
|
"cpu-time",
|
|
3398
3398
|
"criterion",
|
|
3399
3399
|
"cudarc",
|
|
3400
|
+
"dashmap",
|
|
3400
3401
|
"datafusion",
|
|
3401
3402
|
"datafusion-expr-common",
|
|
3402
3403
|
"datafusion-functions",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "hyperstreamdb"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
license = "MIT AND Apache-2.0"
|
|
6
6
|
description = "HyperStreamDB - Serverless Index-Streaming Database with Overlay Indexing and Vector Search"
|
|
@@ -84,6 +84,7 @@ pilota = "0.11"
|
|
|
84
84
|
# Hardware Acceleration (moved to bottom of file)
|
|
85
85
|
# indexing
|
|
86
86
|
roaring = "0.10.2" # For scalar bitmaps
|
|
87
|
+
dashmap = "6.0" # Concurrent HashMaps
|
|
87
88
|
# hnsw_rs removed and internalized to src/core/index/hnsw_rs
|
|
88
89
|
bincode = "1.3"
|
|
89
90
|
cpu-time = "1.0"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hyperstreamdb
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Classifier: Development Status :: 3 - Alpha
|
|
5
5
|
Classifier: Intended Audience :: Developers
|
|
6
6
|
Classifier: Programming Language :: Rust
|
|
@@ -62,12 +62,8 @@ A production-ready indexed data lake format that combines the transactional guar
|
|
|
62
62
|
| **Time Travel** | ✅ Yes | ✅ Yes |
|
|
63
63
|
| **Scalar Indexes** | ❌ No | ✅ RoaringBitmap |
|
|
64
64
|
| **Boolean Indexes** | ❌ No | ✅ Native Boolean |
|
|
65
|
-
| **
|
|
66
|
-
| **
|
|
67
|
-
| **GPU Acceleration** | ❌ No | ✅ CUDA/ROCm/XPU/Metal |
|
|
68
|
-
| **Torch Alignment** | ❌ No | ✅ ROCm-as-CUDA |
|
|
69
|
-
| **Python Vector API** | ❌ No | ✅ NumPy-compatible |
|
|
70
|
-
| **Fluent Query API** | ❌ No | ✅ Method Chaining |
|
|
65
|
+
| **TurboQuant** | ❌ No | ✅ TQ8 & TQ4 (8-bit/4-bit) |
|
|
66
|
+
| **Fluent Indexing API** | ❌ No | ✅ Method Chaining |
|
|
71
67
|
| **Hybrid Queries** | ❌ No | ✅ Scalar + Vector |
|
|
72
68
|
| **Native SQL** | ❌ No | ✅ DataFusion |
|
|
73
69
|
| **Index-Optimized Joins** | ❌ No | ✅ Index Nested Loop |
|
|
@@ -204,10 +200,12 @@ table = hdb.Table("s3://bucket/my-table")
|
|
|
204
200
|
import pandas as pd
|
|
205
201
|
df = pd.DataFrame({
|
|
206
202
|
"id": [1, 2, 3],
|
|
207
|
-
"text": ["hello", "world", "test"],
|
|
208
203
|
"embedding": [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]
|
|
209
204
|
})
|
|
210
|
-
table.
|
|
205
|
+
table.insert(df) # Convenient alias for write_pandas
|
|
206
|
+
|
|
207
|
+
# Create high-performance vector index (TQ8 - 4x compression)
|
|
208
|
+
table.add_index("embedding", "hnsw_tq8")
|
|
211
209
|
|
|
212
210
|
# Query with filters (uses indexes!) - Fluent API
|
|
213
211
|
results = table.query().filter("id > 1").execute()
|
|
@@ -300,6 +298,30 @@ async fn main() -> anyhow::Result<()> {
|
|
|
300
298
|
- **Performance**: Same underlying optimized execution as traditional APIs
|
|
301
299
|
- **Interoperable**: Mix with SQL queries and traditional `to_pandas()` calls
|
|
302
300
|
- **GPU Acceleration**: Automatic GPU context propagation for vector operations
|
|
301
|
+
- **TurboQuant Optimized**: Seamless integration with 8-bit/4-bit quantization
|
|
302
|
+
|
|
303
|
+
### TurboQuant Quantization (TQ8 / TQ4)
|
|
304
|
+
|
|
305
|
+
HyperStreamDB features **TurboQuant**, an optimized quantization engine that reduces vector storage costs while maintaining high search accuracy:
|
|
306
|
+
|
|
307
|
+
- **TQ8 (8-bit)**: 4x compression vs. float32. Near-lossless accuracy (typically >99% recall retention). Ideal for general-purpose RAG.
|
|
308
|
+
- **TQ4 (4-bit)**: 8x compression vs. float32. Maximum efficiency for massive datasets where storage cost is the primary bottleneck.
|
|
309
|
+
|
|
310
|
+
```python
|
|
311
|
+
# Use enterprise defaults (HNSW-TQ8)
|
|
312
|
+
table.add_index("embedding", "hnsw_tq8")
|
|
313
|
+
|
|
314
|
+
# High-compression mode
|
|
315
|
+
table.add_index("embedding", "hnsw_tq4")
|
|
316
|
+
|
|
317
|
+
# Custom HNSW-PQ configuration
|
|
318
|
+
table.add_index("embedding", {
|
|
319
|
+
"type": "hnsw_pq",
|
|
320
|
+
"complexity": 32,
|
|
321
|
+
"quality": 300,
|
|
322
|
+
"compression": 32 # PQ subspaces
|
|
323
|
+
})
|
|
324
|
+
```
|
|
303
325
|
|
|
304
326
|
### Python Vector Distance API with GPU Acceleration
|
|
305
327
|
|
|
@@ -17,12 +17,8 @@ A production-ready indexed data lake format that combines the transactional guar
|
|
|
17
17
|
| **Time Travel** | ✅ Yes | ✅ Yes |
|
|
18
18
|
| **Scalar Indexes** | ❌ No | ✅ RoaringBitmap |
|
|
19
19
|
| **Boolean Indexes** | ❌ No | ✅ Native Boolean |
|
|
20
|
-
| **
|
|
21
|
-
| **
|
|
22
|
-
| **GPU Acceleration** | ❌ No | ✅ CUDA/ROCm/XPU/Metal |
|
|
23
|
-
| **Torch Alignment** | ❌ No | ✅ ROCm-as-CUDA |
|
|
24
|
-
| **Python Vector API** | ❌ No | ✅ NumPy-compatible |
|
|
25
|
-
| **Fluent Query API** | ❌ No | ✅ Method Chaining |
|
|
20
|
+
| **TurboQuant** | ❌ No | ✅ TQ8 & TQ4 (8-bit/4-bit) |
|
|
21
|
+
| **Fluent Indexing API** | ❌ No | ✅ Method Chaining |
|
|
26
22
|
| **Hybrid Queries** | ❌ No | ✅ Scalar + Vector |
|
|
27
23
|
| **Native SQL** | ❌ No | ✅ DataFusion |
|
|
28
24
|
| **Index-Optimized Joins** | ❌ No | ✅ Index Nested Loop |
|
|
@@ -159,10 +155,12 @@ table = hdb.Table("s3://bucket/my-table")
|
|
|
159
155
|
import pandas as pd
|
|
160
156
|
df = pd.DataFrame({
|
|
161
157
|
"id": [1, 2, 3],
|
|
162
|
-
"text": ["hello", "world", "test"],
|
|
163
158
|
"embedding": [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]
|
|
164
159
|
})
|
|
165
|
-
table.
|
|
160
|
+
table.insert(df) # Convenient alias for write_pandas
|
|
161
|
+
|
|
162
|
+
# Create high-performance vector index (TQ8 - 4x compression)
|
|
163
|
+
table.add_index("embedding", "hnsw_tq8")
|
|
166
164
|
|
|
167
165
|
# Query with filters (uses indexes!) - Fluent API
|
|
168
166
|
results = table.query().filter("id > 1").execute()
|
|
@@ -255,6 +253,30 @@ async fn main() -> anyhow::Result<()> {
|
|
|
255
253
|
- **Performance**: Same underlying optimized execution as traditional APIs
|
|
256
254
|
- **Interoperable**: Mix with SQL queries and traditional `to_pandas()` calls
|
|
257
255
|
- **GPU Acceleration**: Automatic GPU context propagation for vector operations
|
|
256
|
+
- **TurboQuant Optimized**: Seamless integration with 8-bit/4-bit quantization
|
|
257
|
+
|
|
258
|
+
### TurboQuant Quantization (TQ8 / TQ4)
|
|
259
|
+
|
|
260
|
+
HyperStreamDB features **TurboQuant**, an optimized quantization engine that reduces vector storage costs while maintaining high search accuracy:
|
|
261
|
+
|
|
262
|
+
- **TQ8 (8-bit)**: 4x compression vs. float32. Near-lossless accuracy (typically >99% recall retention). Ideal for general-purpose RAG.
|
|
263
|
+
- **TQ4 (4-bit)**: 8x compression vs. float32. Maximum efficiency for massive datasets where storage cost is the primary bottleneck.
|
|
264
|
+
|
|
265
|
+
```python
|
|
266
|
+
# Use enterprise defaults (HNSW-TQ8)
|
|
267
|
+
table.add_index("embedding", "hnsw_tq8")
|
|
268
|
+
|
|
269
|
+
# High-compression mode
|
|
270
|
+
table.add_index("embedding", "hnsw_tq4")
|
|
271
|
+
|
|
272
|
+
# Custom HNSW-PQ configuration
|
|
273
|
+
table.add_index("embedding", {
|
|
274
|
+
"type": "hnsw_pq",
|
|
275
|
+
"complexity": 32,
|
|
276
|
+
"quality": 300,
|
|
277
|
+
"compression": 32 # PQ subspaces
|
|
278
|
+
})
|
|
279
|
+
```
|
|
258
280
|
|
|
259
281
|
### Python Vector Distance API with GPU Acceleration
|
|
260
282
|
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "code",
|
|
5
|
+
"execution_count": null,
|
|
6
|
+
"id": "c2ee4517-3acc-4aaf-aace-60dfb489e44d",
|
|
7
|
+
"metadata": {},
|
|
8
|
+
"outputs": [],
|
|
9
|
+
"source": []
|
|
10
|
+
}
|
|
11
|
+
],
|
|
12
|
+
"metadata": {
|
|
13
|
+
"kernelspec": {
|
|
14
|
+
"display_name": "Python 3 (ipykernel)",
|
|
15
|
+
"language": "python",
|
|
16
|
+
"name": "python3"
|
|
17
|
+
},
|
|
18
|
+
"language_info": {
|
|
19
|
+
"codemirror_mode": {
|
|
20
|
+
"name": "ipython",
|
|
21
|
+
"version": 3
|
|
22
|
+
},
|
|
23
|
+
"file_extension": ".py",
|
|
24
|
+
"mimetype": "text/x-python",
|
|
25
|
+
"name": "python",
|
|
26
|
+
"nbconvert_exporter": "python",
|
|
27
|
+
"pygments_lexer": "ipython3",
|
|
28
|
+
"version": "3.12.3"
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"nbformat": 4,
|
|
32
|
+
"nbformat_minor": 5
|
|
33
|
+
}
|
|
@@ -42,7 +42,7 @@ fn bench_query_indexed(c: &mut Criterion) {
|
|
|
42
42
|
.with_columns_to_index(vec!["id".to_string()]);
|
|
43
43
|
let writer = HybridSegmentWriter::new(writer_config);
|
|
44
44
|
writer.write_batch(&batch).unwrap();
|
|
45
|
-
writer.build_indexes(&batch).unwrap();
|
|
45
|
+
writer.build_indexes(&batch, 0).unwrap();
|
|
46
46
|
let entry = writer.to_manifest_entry();
|
|
47
47
|
|
|
48
48
|
// For Reader: Use relative path logic since store is rooted at tmp_dir
|
|
@@ -81,7 +81,7 @@ fn bench_vector_search(c: &mut Criterion) {
|
|
|
81
81
|
|
|
82
82
|
let writer = HybridSegmentWriter::new(config);
|
|
83
83
|
writer.write_batch(&batch).unwrap();
|
|
84
|
-
writer.build_indexes(&batch).unwrap();
|
|
84
|
+
writer.build_indexes(&batch, 0).unwrap();
|
|
85
85
|
|
|
86
86
|
// Generate a random query vector
|
|
87
87
|
let mut rng = rand::thread_rng();
|
|
@@ -128,7 +128,7 @@ fn bench_hybrid_search(c: &mut Criterion) {
|
|
|
128
128
|
|
|
129
129
|
let writer = HybridSegmentWriter::new(config);
|
|
130
130
|
writer.write_batch(&batch).unwrap();
|
|
131
|
-
writer.build_indexes(&batch).unwrap();
|
|
131
|
+
writer.build_indexes(&batch, 0).unwrap();
|
|
132
132
|
|
|
133
133
|
let mut rng = rand::thread_rng();
|
|
134
134
|
let query_vec: Vec<f32> = (0..vec_dim).map(|_| rng.gen()).collect();
|
|
@@ -192,7 +192,7 @@ fn bench_high_selectivity(c: &mut Criterion) {
|
|
|
192
192
|
.with_columns_to_index(vec!["id".to_string()]);
|
|
193
193
|
let writer = HybridSegmentWriter::new(config);
|
|
194
194
|
writer.write_batch(&batch).unwrap();
|
|
195
|
-
writer.build_indexes(&batch).unwrap();
|
|
195
|
+
writer.build_indexes(&batch, 0).unwrap();
|
|
196
196
|
let entry = writer.to_manifest_entry();
|
|
197
197
|
|
|
198
198
|
// For Reader: Use relative path logic
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"name": "multi_filter_vector",
|
|
4
|
+
"throughput": null,
|
|
5
|
+
"latency_p50_ms": 19.483089447021484,
|
|
6
|
+
"latency_p95_ms": 22.400259971618652,
|
|
7
|
+
"latency_p99_ms": 22.65956401824951,
|
|
8
|
+
"latency_mean_ms": 20.386934280395508,
|
|
9
|
+
"latency_min_ms": 18.953323364257812,
|
|
10
|
+
"latency_max_ms": 22.724390029907227,
|
|
11
|
+
"elapsed_sec": 0.06162667274475098,
|
|
12
|
+
"memory_delta_mb": 0.21875
|
|
13
|
+
}
|
|
14
|
+
]
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
## Benchmark Results
|
|
2
|
+
|
|
3
|
+
**Hardware & OS:** Linux 6.8.0-106-generic | x86_64 (8 threads) | 63 GB RAM
|
|
4
|
+
|
|
5
|
+
| name | throughput | latency_p50_ms | latency_p95_ms | latency_p99_ms | latency_mean_ms | latency_min_ms | latency_max_ms | elapsed_sec | memory_delta_mb |
|
|
6
|
+
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
|
7
|
+
| multi_filter_vector | None | 19.48 | 22.40 | 22.66 | 20.39 | 18.95 | 22.72 | 0.06 | 0.22 |
|
|
File without changes
|
|
@@ -4,6 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
HyperStreamDB provides a comprehensive Python API for vector distance computations with GPU acceleration support across multiple hardware backends. This API allows you to compute distances between vectors directly from Python without writing SQL queries, with optional GPU acceleration for high-performance batch operations.
|
|
6
6
|
|
|
7
|
+
> [!NOTE]
|
|
8
|
+
> This guide covers **standalone** distance functions for CPU/GPU. For persistent vector storage and search with TurboQuant indexing (TQ8/TQ4), please see [Vector Configuration](VECTOR_CONFIGURATION.md).
|
|
9
|
+
|
|
7
10
|
## Supported Distance Metrics
|
|
8
11
|
|
|
9
12
|
The API supports six distance metrics:
|
|
@@ -234,32 +234,37 @@ ORDER BY distance
|
|
|
234
234
|
LIMIT 10;
|
|
235
235
|
```
|
|
236
236
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
Index build parameters are set during index creation:
|
|
237
|
+
Index build parameters are set during index creation using the fluent `add_index` method:
|
|
240
238
|
|
|
241
239
|
```python
|
|
242
240
|
import hyperstreamdb as hdb
|
|
243
241
|
|
|
244
242
|
table = hdb.Table("s3://bucket/my-table")
|
|
245
243
|
|
|
246
|
-
#
|
|
247
|
-
|
|
244
|
+
# TurboQuant 8-bit quantization (Recommended Default)
|
|
245
|
+
# 4x compression, near-lossless accuracy
|
|
246
|
+
table.add_index("embedding", "hnsw_tq8")
|
|
247
|
+
|
|
248
|
+
# TurboQuant 4-bit quantization
|
|
249
|
+
# 8x compression, maximum efficiency
|
|
250
|
+
table.add_index("embedding", "hnsw_tq4")
|
|
251
|
+
|
|
252
|
+
# Custom HNSW parameters
|
|
253
|
+
table.add_index(
|
|
248
254
|
column="embedding",
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
"
|
|
252
|
-
"
|
|
255
|
+
index_config={
|
|
256
|
+
"type": "hnsw",
|
|
257
|
+
"complexity": 16, # Max connections per node (formerly 'm')
|
|
258
|
+
"quality": 200, # Construction beam width (formerly 'ef_construction')
|
|
253
259
|
}
|
|
254
260
|
)
|
|
255
261
|
|
|
256
|
-
#
|
|
257
|
-
table.
|
|
262
|
+
# Product Quantization (PQ)
|
|
263
|
+
table.add_index(
|
|
258
264
|
column="embedding",
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
"
|
|
262
|
-
"n_probes": 10, # Default search probes
|
|
265
|
+
index_config={
|
|
266
|
+
"type": "hnsw_pq",
|
|
267
|
+
"compression": 32 # PQ subspaces (formerly 'subspaces')
|
|
263
268
|
}
|
|
264
269
|
)
|
|
265
270
|
```
|
|
@@ -224,13 +224,22 @@ See [GPU Setup Guide](GPU_SETUP_GUIDE.md) for installation and configuration.
|
|
|
224
224
|
```python
|
|
225
225
|
import hyperstreamdb as hdb
|
|
226
226
|
|
|
227
|
-
# Configure HNSW index parameters
|
|
228
227
|
table = hdb.Table("s3://bucket/table")
|
|
229
|
-
|
|
228
|
+
|
|
229
|
+
# TurboQuant 8-bit (4x compression)
|
|
230
|
+
table.add_index("embedding", "hnsw_tq8")
|
|
231
|
+
|
|
232
|
+
# TurboQuant 4-bit (8x compression)
|
|
233
|
+
table.add_index("embedding", "hnsw_tq4")
|
|
234
|
+
|
|
235
|
+
# Custom HNSW configuration
|
|
236
|
+
table.add_index(
|
|
230
237
|
column="embedding",
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
238
|
+
index_config={
|
|
239
|
+
"type": "hnsw",
|
|
240
|
+
"complexity": 16, # connections per node
|
|
241
|
+
"quality": 200 # construction search width
|
|
242
|
+
}
|
|
234
243
|
)
|
|
235
244
|
```
|
|
236
245
|
|
|
Binary file
|
|
@@ -96,12 +96,20 @@ except ImportError:
|
|
|
96
96
|
from typing import List, Optional, Union, Dict, Any
|
|
97
97
|
import os
|
|
98
98
|
|
|
99
|
+
class IndexType:
|
|
100
|
+
"""
|
|
101
|
+
HyperStreamDB Indexing Algorithms.
|
|
102
|
+
"""
|
|
103
|
+
HNSW = "hnsw"
|
|
104
|
+
BM25 = "bm25"
|
|
105
|
+
BLOOM = "bloom"
|
|
106
|
+
BITMAP = "bitmap"
|
|
107
|
+
INVERTED = "inverted"
|
|
108
|
+
|
|
99
109
|
def _resolve_uri(uri: str) -> str:
|
|
100
|
-
"""
|
|
101
|
-
|
|
102
|
-
return os.path.abspath(uri)
|
|
110
|
+
if not uri.startswith(("s3://", "file://", "az://", "gs://", "http://", "https://")):
|
|
111
|
+
return f"file://{os.abspath(uri)}" if hasattr(os, "abspath") else uri
|
|
103
112
|
return uri
|
|
104
|
-
|
|
105
113
|
class Query:
|
|
106
114
|
"""
|
|
107
115
|
Fluent Query interface for HyperStreamDB.
|
|
@@ -251,6 +259,10 @@ class Table:
|
|
|
251
259
|
pass
|
|
252
260
|
raise TypeError(f"Unsupported data type for write: {type(data)}")
|
|
253
261
|
|
|
262
|
+
def insert(self, data: Any, device: Optional[Any] = None):
|
|
263
|
+
"""Alias for write() for compatibility with common vector DB APIs."""
|
|
264
|
+
return self.write(data, device=device)
|
|
265
|
+
|
|
254
266
|
def write_pandas(self, df: pd.DataFrame, device: Optional[Any] = None):
|
|
255
267
|
"""High-level Pandas ingestion with auto-vectorization."""
|
|
256
268
|
return self._write_pandas(df, device=device)
|
|
@@ -583,24 +595,39 @@ class Table:
|
|
|
583
595
|
def set_index_config(self, column: str, enabled: bool = True, tokenizer: Optional[str] = None, device: Optional[str] = None):
|
|
584
596
|
"""
|
|
585
597
|
Set indexing configuration for a specific column.
|
|
598
|
+
(Legacy compatibility wrapper)
|
|
599
|
+
"""
|
|
600
|
+
if not enabled:
|
|
601
|
+
return self.drop_index(column)
|
|
602
|
+
|
|
603
|
+
config = {"type": "hnsw"}
|
|
604
|
+
if tokenizer: config["tokenizer"] = tokenizer
|
|
605
|
+
if device: config["build_device"] = device
|
|
606
|
+
return self.add_index(column, config)
|
|
607
|
+
|
|
608
|
+
def set_index_columns(self, config: Dict[str, Union[str, List[Union[str, Dict[str, Any]]], Dict[str, Any]]]):
|
|
609
|
+
"""
|
|
610
|
+
Update indexing specifications for multiple columns at once.
|
|
611
|
+
Supports both simple strings and advanced configuration dictionaries.
|
|
586
612
|
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
613
|
+
Example:
|
|
614
|
+
table.set_index_columns({
|
|
615
|
+
"embedding": IndexType.HNSW,
|
|
616
|
+
"content": ["hnsw", "bm25"],
|
|
617
|
+
"category": "bitmap"
|
|
618
|
+
})
|
|
592
619
|
"""
|
|
593
|
-
self._inner.
|
|
620
|
+
return self._inner.set_index_columns(config)
|
|
594
621
|
|
|
595
|
-
def add_index(self, column: str,
|
|
622
|
+
def add_index(self, column: str, algorithm: Union[str, Dict[str, Any]] = "hnsw"):
|
|
596
623
|
"""
|
|
597
|
-
Add an
|
|
624
|
+
Add an indexing strategy to a column.
|
|
598
625
|
"""
|
|
599
|
-
return self._inner.add_index(column,
|
|
626
|
+
return self._inner.add_index(column, algorithm)
|
|
600
627
|
|
|
601
628
|
def drop_index(self, column: str):
|
|
602
629
|
"""
|
|
603
|
-
|
|
630
|
+
Remove all indexing strategies from a column.
|
|
604
631
|
"""
|
|
605
632
|
return self._inner.drop_index(column)
|
|
606
633
|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
with open("src/core/table/mod.rs", "r") as f:
|
|
5
|
+
mod_lines = f.readlines()
|
|
6
|
+
|
|
7
|
+
def extract_between(start_str, end_str):
|
|
8
|
+
start_idx = -1
|
|
9
|
+
end_idx = -1
|
|
10
|
+
for i, line in enumerate(mod_lines):
|
|
11
|
+
if start_str in line and start_idx == -1:
|
|
12
|
+
# check if it's the right indentation (4 spaces)
|
|
13
|
+
if line.startswith(" pub ") or line.startswith(" async fn ") or line.startswith(" fn "):
|
|
14
|
+
start_idx = i
|
|
15
|
+
if end_str in line and start_idx != -1:
|
|
16
|
+
# find the end of that block
|
|
17
|
+
pass # this is hard in python.
|
|
18
|
+
|
|
19
|
+
# Let's just use rustc --pretty !!
|
|
@@ -11,6 +11,7 @@ use crate::core::index::hnsw_rs::hnsw::Hnsw;
|
|
|
11
11
|
use crate::core::index::hnsw_rs::dist::DistL2;
|
|
12
12
|
use arrow::record_batch::RecordBatch;
|
|
13
13
|
use parquet::file::metadata::ParquetMetaData;
|
|
14
|
+
use parquet::bloom_filter::Sbbf;
|
|
14
15
|
use std::path::PathBuf;
|
|
15
16
|
use object_store::ObjectStore;
|
|
16
17
|
use anyhow::Result;
|
|
@@ -59,7 +60,22 @@ impl DiskCache {
|
|
|
59
60
|
}
|
|
60
61
|
|
|
61
62
|
let b = self.store.get(&object_store::path::Path::from(path)).await?.bytes().await?;
|
|
62
|
-
|
|
63
|
+
|
|
64
|
+
// Atomic write: write to unique temp file then rename
|
|
65
|
+
let thread_id = format!("{:?}", std::thread::current().id());
|
|
66
|
+
let temp_name = format!("{}.{}.{}.tmp", hash, std::process::id(), thread_id);
|
|
67
|
+
let temp_path = cache_dir.join(temp_name);
|
|
68
|
+
|
|
69
|
+
if let Ok(mut f) = std::fs::File::create(&temp_path) {
|
|
70
|
+
use std::io::Write;
|
|
71
|
+
if f.write_all(&b).is_ok() {
|
|
72
|
+
// rename is atomic on POSIX
|
|
73
|
+
let _ = std::fs::rename(&temp_path, &cache_path);
|
|
74
|
+
} else {
|
|
75
|
+
let _ = std::fs::remove_file(&temp_path);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
63
79
|
Ok(b)
|
|
64
80
|
} else {
|
|
65
81
|
let res = self.store.get(&object_store::path::Path::from(path)).await
|
|
@@ -160,6 +176,13 @@ pub static PARQUET_META_CACHE: Lazy<Cache<String, (Arc<ParquetMetaData>, usize)>
|
|
|
160
176
|
.build()
|
|
161
177
|
});
|
|
162
178
|
|
|
179
|
+
pub static BLOOM_FILTER_CACHE: Lazy<Cache<String, Arc<Sbbf>>> = Lazy::new(|| {
|
|
180
|
+
Cache::builder()
|
|
181
|
+
.max_capacity(2048) // Roughly 250MB if each is 128KB
|
|
182
|
+
.time_to_idle(Duration::from_secs(60 * 30))
|
|
183
|
+
.build()
|
|
184
|
+
});
|
|
185
|
+
|
|
163
186
|
/// Doris-inspired Block Cache for decoded RecordBatches.
|
|
164
187
|
/// Bypasses Parquet decoding/decompression for frequently accessed blocks.
|
|
165
188
|
pub static BLOCK_CACHE: Lazy<Cache<String, Arc<RecordBatch>>> = Lazy::new(|| {
|
|
@@ -274,3 +274,91 @@ pub fn sparse_l2_distance_squared(
|
|
|
274
274
|
|
|
275
275
|
sum
|
|
276
276
|
}
|
|
277
|
+
|
|
278
|
+
/// Optimized L2 distance for quantized u8 vectors
|
|
279
|
+
#[inline(always)]
|
|
280
|
+
pub fn l2_distance_u8(a: &[u8], b: &[u8]) -> f32 {
|
|
281
|
+
let mut sum = 0;
|
|
282
|
+
for (&x, &y) in a.iter().zip(b.iter()) {
|
|
283
|
+
let diff = (x as i32) - (y as i32);
|
|
284
|
+
sum += diff * diff;
|
|
285
|
+
}
|
|
286
|
+
sum as f32
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/// Asymmetric Distance Calculation (ADC) for quantized vectors.
|
|
290
|
+
/// Calculates L2 distance between a float32 query and a quantized u8 vector.
|
|
291
|
+
#[inline(always)]
|
|
292
|
+
pub fn l2_distance_adc(query: &[f32], encoded: &[u8], offset: f32, scale: f32) -> f32 {
|
|
293
|
+
let mut sum = 0.0;
|
|
294
|
+
let inv_scale = 1.0 / scale;
|
|
295
|
+
|
|
296
|
+
// Unrolled for performance
|
|
297
|
+
let chunks_q = query.chunks_exact(8);
|
|
298
|
+
let chunks_e = encoded.chunks_exact(8);
|
|
299
|
+
let rem_q = chunks_q.remainder();
|
|
300
|
+
let rem_e = chunks_e.remainder();
|
|
301
|
+
|
|
302
|
+
for (q_chunk, e_chunk) in chunks_q.zip(chunks_e) {
|
|
303
|
+
for i in 0..8 {
|
|
304
|
+
let decoded = (e_chunk[i] as f32 * inv_scale) + offset;
|
|
305
|
+
let diff = q_chunk[i] - decoded;
|
|
306
|
+
sum += diff * diff;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
for (q, e) in rem_q.iter().zip(rem_e.iter()) {
|
|
311
|
+
let decoded = (*e as f32 * inv_scale) + offset;
|
|
312
|
+
let diff = *q - decoded;
|
|
313
|
+
sum += diff * diff;
|
|
314
|
+
}
|
|
315
|
+
sum
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/// Optimized L2 distance for packed 4-bit quantized vectors (u4)
|
|
319
|
+
#[inline(always)]
|
|
320
|
+
pub fn l2_distance_u4(a: &[u8], b: &[u8]) -> f32 {
|
|
321
|
+
let mut sum = 0;
|
|
322
|
+
for (&x, &y) in a.iter().zip(b.iter()) {
|
|
323
|
+
// Low nibbles
|
|
324
|
+
let diff_low = ((x & 0x0F) as i32) - ((y & 0x0F) as i32);
|
|
325
|
+
sum += diff_low * diff_low;
|
|
326
|
+
|
|
327
|
+
// High nibbles
|
|
328
|
+
let diff_high = (((x >> 4) & 0x0F) as i32) - (((y >> 4) & 0x0F) as i32);
|
|
329
|
+
sum += diff_high * diff_high;
|
|
330
|
+
}
|
|
331
|
+
sum as f32
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
#[derive(Debug, Clone, Copy)]
|
|
335
|
+
pub struct DistL2u8;
|
|
336
|
+
|
|
337
|
+
impl DistL2u8 {
|
|
338
|
+
#[inline(always)]
|
|
339
|
+
pub fn distance(&self, a: &[u8], b: &[u8]) -> f32 {
|
|
340
|
+
l2_distance_u8(a, b)
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
impl super::hnsw_rs::dist::Distance<u8> for DistL2u8 {
|
|
345
|
+
fn eval(&self, va: &[u8], vb: &[u8]) -> f32 {
|
|
346
|
+
self.distance(va, vb)
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
#[derive(Debug, Clone, Copy)]
|
|
351
|
+
pub struct DistL2u4;
|
|
352
|
+
|
|
353
|
+
impl DistL2u4 {
|
|
354
|
+
#[inline(always)]
|
|
355
|
+
pub fn distance(&self, a: &[u8], b: &[u8]) -> f32 {
|
|
356
|
+
l2_distance_u4(a, b)
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
impl super::hnsw_rs::dist::Distance<u8> for DistL2u4 {
|
|
361
|
+
fn eval(&self, va: &[u8], vb: &[u8]) -> f32 {
|
|
362
|
+
self.distance(va, vb)
|
|
363
|
+
}
|
|
364
|
+
}
|
|
@@ -424,6 +424,25 @@ impl ComputeContext {
|
|
|
424
424
|
}
|
|
425
425
|
|
|
426
426
|
pub fn auto_detect() -> Self {
|
|
427
|
+
{
|
|
428
|
+
let read = GLOBAL_GPU_CONTEXT.read().unwrap();
|
|
429
|
+
if let Some(ctx) = &*read {
|
|
430
|
+
return ctx.clone();
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
let mut write = GLOBAL_GPU_CONTEXT.write().unwrap();
|
|
435
|
+
// Check again after acquiring lock
|
|
436
|
+
if let Some(ctx) = &*write {
|
|
437
|
+
return ctx.clone();
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
let ctx = Self::do_auto_detect();
|
|
441
|
+
*write = Some(ctx.clone());
|
|
442
|
+
ctx
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
fn do_auto_detect() -> Self {
|
|
427
446
|
#[cfg(feature = "cuda")]
|
|
428
447
|
if let Ok(b) = CudaBackend::new(0) { return Self { backend: ComputeBackend::Cuda, device_id: 0, implementation: Some(Arc::new(b)) }; }
|
|
429
448
|
#[cfg(all(target_os = "macos", feature = "mps"))]
|