hyperstreamdb 0.2.4__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/Cargo.lock +14 -1
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/Cargo.toml +2 -2
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/PKG-INFO +1 -1
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/cache.rs +16 -1
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/compaction.rs +1 -1
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/ffi.rs +4 -4
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/iceberg.rs +17 -12
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/gpu.rs +19 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/hnsw_ivf.rs +37 -2
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/hnsw_rs/flatten.rs +8 -11
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/hnsw_rs/hnswio.rs +24 -39
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/memory.rs +41 -3
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/manifest.rs +8 -4
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/merge.rs +3 -3
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/segment.rs +52 -53
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/table/mod.rs +4 -4
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/table/read.rs +2 -2
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/table/schema.rs +6 -6
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/table/write.rs +13 -13
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/python_binding.rs +3 -2
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/python_gpu_context.rs +1 -1
- hyperstreamdb-0.2.6/src/telemetry/tracing.rs +97 -0
- hyperstreamdb-0.2.4/src/telemetry/tracing.rs +0 -46
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/.gitattributes +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/.gitignore +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/.hypothesis/constants/32b327793848e7d8 +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/.hypothesis/constants/67b0a8ccf18bf5d2 +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/.hypothesis/constants/84828557b4ee7be4 +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/.instructions.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/CNAME +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/DORIS_OPTIMIZATION_PATTERNS.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/LICENSE-APACHE +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/LICENSE-MIT +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/README.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/RUN_COMPLIANCE_TESTS.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/STEERING.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/THIRDPARTY_NOTICES.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benches/bench_table.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benches/performance.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/BENCHMARK_REPORT.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/benchmark_charts.png +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/benchmark_results.csv +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/concurrent_queries_20260409_214245.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/concurrent_queries_20260409_214245.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/filtered_search_comparison_20260409_222607.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/filtered_search_comparison_20260409_222607.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/filtered_vector_search_20260409_214355.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/filtered_vector_search_20260409_214355.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/filtered_vector_search_20260409_220418.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/filtered_vector_search_20260409_220418.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/filtered_vector_search_20260409_222053.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/filtered_vector_search_20260409_222053.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/filtered_vector_search_20260409_225907.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/filtered_vector_search_20260409_225907.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/full_scan_baseline_20260409_222303.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/full_scan_baseline_20260409_222303.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/high_selectivity_filter_20260409_222302.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/high_selectivity_filter_20260409_222302.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/ingestion_comparison_20260409_222516.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/ingestion_comparison_20260409_222516.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/multi_filter_vector_20260409_214428.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/multi_filter_vector_20260409_214428.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/multi_filter_vector_20260409_220450.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/multi_filter_vector_20260409_220450.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/multi_filter_vector_20260409_222131.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/multi_filter_vector_20260409_222131.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/multi_filter_vector_20260409_225938.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/multi_filter_vector_20260409_225938.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/post_vs_pre_filter_20260409_214501.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/post_vs_pre_filter_20260409_214501.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/post_vs_pre_filter_20260409_220524.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/post_vs_pre_filter_20260409_220524.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/post_vs_pre_filter_20260409_222204.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/post_vs_pre_filter_20260409_222204.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/post_vs_pre_filter_20260409_230010.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/post_vs_pre_filter_20260409_230010.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/query_comparison_20260409_222541.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/query_comparison_20260409_222541.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/range_query_20260409_222302.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/range_query_20260409_222302.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/search_filtered_high_selectivity_20260409_214144.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/search_filtered_high_selectivity_20260409_214144.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/search_unfiltered_20260409_214028.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/benchmark_results/search_unfiltered_20260409_214028.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/book.toml +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/broken_binaries_all.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/broken_bins.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/build-connectors.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/build.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/check_iceberg_compliance.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/compliance_output.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/critical_code_review.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/debug_log.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/demo_basics_run.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/demo_basics_v2.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docker-compose-minio-nessie.yml +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docker-compose.yml +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/BENCHMARKING.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/COMPREHENSIVE_GUIDE.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/CONCURRENCY.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/CONFIGURATION.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/GPU_SETUP_GUIDE.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/ICEBERG_V2_V3_API.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/INSTALLATION.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/PGVECTOR_SQL_GUIDE.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/PYTHON_VECTOR_API.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/VECTOR_CONFIGURATION.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/api_reference.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/architecture.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/catalog_usage.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/index.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/integrations/README.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/integrations/java_jni.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/integrations/python.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/integrations/spark.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/integrations/trino.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/requirements.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/source/_static/HyperStreamDB.png +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/source/api/python.rst +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/source/api/rust.rst +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/source/conf.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/source/index.rst +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/docs/source/roadmap.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/fix_nb.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/proptest-regressions/core/index/gpu.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/proptest-regressions/core/sql/vector_literal.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/proptest-regressions/core/sql/vector_udf.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/pyproject.toml +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/python/hyperstreamdb/__init__.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/python/hyperstreamdb/embeddings.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/python_test_output.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/python_test_output_v2.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/python_test_output_v3.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/rust_check_all_warnings.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/rust_test_output.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/rust_warnings.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/scratch/check_os_error.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/simd_test_results.txt +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/spark-hyperstream/.bloop/bloop.settings.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/spark-hyperstream/.bloop/spark-hyperstream-test.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/spark-hyperstream/.bloop/spark-hyperstream.json +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/spark-hyperstream/pom.xml +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/DefaultSource.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartition.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReader.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReaderFactory.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamScanBuilder.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamTable.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/bin/gateway.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/bin/hdb.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/bin/iceberg_rest.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/bin/probe_datafusion.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/bin/setup_test_data.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/bin/verify_layered_indexing.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/catalog/config.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/catalog/glue.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/catalog/hive.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/catalog/jdbc.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/catalog/mod.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/catalog/nessie.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/catalog/rest.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/catalog/unity.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/clustering.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/embeddings.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/iceberg/iceberg_delete.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/cuda/cosine_distance.cu +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/cuda/hamming_distance.cu +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/cuda/inner_product.cu +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/cuda/jaccard_distance.cu +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/cuda/kmeans_assignment.cu +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/cuda/l1_distance.cu +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/cuda/l2_distance.cu +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/distance.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/hnsw_rs/annhdf5.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/hnsw_rs/api.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/hnsw_rs/dist.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/hnsw_rs/hnsw.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/hnsw_rs/libext.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/hnsw_rs/mod.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/hnsw_rs/prelude.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/hnsw_rs/test.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/ivf.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/mod.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/mps/cosine_distance.metal +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/mps/hamming_distance.metal +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/mps/inner_product.metal +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/mps/jaccard_distance.metal +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/mps/kmeans_assignment.metal +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/mps/l1_distance.metal +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/mps/l2_distance.metal +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/opencl/cosine_distance.cl +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/opencl/hamming_distance.cl +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/opencl/inner_product.cl +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/opencl/jaccard_distance.cl +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/opencl/kmeans_assignment.cl +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/opencl/l1_distance.cl +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/opencl/l2_distance.cl +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/pq.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/tokenizer.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/index/wgpu_kernel.wgsl +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/license.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/maintenance.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/metadata.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/mod.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/nessie.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/planner.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/puffin.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/query.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/reader.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/sql/mod.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/sql/optimizer.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/sql/pgvector_rewriter.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/sql/physical_plan/index_join.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/sql/physical_plan.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/sql/session.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/sql/vector_literal.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/sql/vector_operators.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/sql/vector_udf.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/storage.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/table/builder.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/table/fluent.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/core/wal.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/enterprise/continuous_indexing.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/enterprise/license.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/enterprise/mod.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/index.rs.old +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/lib.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/python_distance.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/telemetry/metrics.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/src/telemetry/mod.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/task.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/all_types_index_test.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/bin/generate_iceberg_manifests.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/bin/verify_iceberg_read_check.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/check_mmh3.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/data/download_nyc_taxi.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/data/generate_embeddings.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/data/generate_wikipedia.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/data/start_nessie.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/datafusion_rust_test.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/debug_murmur3.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/fuzz_murmur3.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/integration_test_hnsw_ivf_native.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/performance/README.md +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/prototype_merge.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/schema_evolution_test.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_catalog_commit.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_compliance.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_delete_correctness.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_iceberg_python_delete.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_iceberg_rest.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_iceberg_rest_create.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_iceberg_rest_delete.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_iceberg_rest_remove_index.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_iceberg_rest_update.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_metadata_creation.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_mor_reads.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_mor_writes.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_partition_transforms.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_partitioned_writes.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_puffin_index.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_rest_updates.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/tests/verify_schema_compat.rs +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-config/.DS_Store +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-config/catalog/glue_catalog.properties +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-config/catalog/hyperstreamdb.properties +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-config/catalog/iceberg.properties +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-config/catalog/memory.properties +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-config/catalog/postgres.properties +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-config/config.properties +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-config/entrypoint.sh +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-config/jvm.config +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-config/node.properties +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-config.zip +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-hyperstream/pom.xml +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBColumnHandle.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBConnectorFactory.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBMetadata.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSource.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSourceProvider.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPlugin.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplit.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplitManager.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBTableHandle.java +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/update_schema_patch.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/update_schema_patch2.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/verify_docstrings.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/verify_fluent_api.py +0 -0
- {hyperstreamdb-0.2.4 → hyperstreamdb-0.2.6}/verify_unified_ingest.py +0 -0
|
@@ -3375,7 +3375,7 @@ dependencies = [
|
|
|
3375
3375
|
|
|
3376
3376
|
[[package]]
|
|
3377
3377
|
name = "hyperstreamdb"
|
|
3378
|
-
version = "0.2.
|
|
3378
|
+
version = "0.2.6"
|
|
3379
3379
|
dependencies = [
|
|
3380
3380
|
"ahash 0.8.12",
|
|
3381
3381
|
"anyhow",
|
|
@@ -7141,6 +7141,16 @@ dependencies = [
|
|
|
7141
7141
|
"web-time",
|
|
7142
7142
|
]
|
|
7143
7143
|
|
|
7144
|
+
[[package]]
|
|
7145
|
+
name = "tracing-serde"
|
|
7146
|
+
version = "0.2.0"
|
|
7147
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
7148
|
+
checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1"
|
|
7149
|
+
dependencies = [
|
|
7150
|
+
"serde",
|
|
7151
|
+
"tracing-core",
|
|
7152
|
+
]
|
|
7153
|
+
|
|
7144
7154
|
[[package]]
|
|
7145
7155
|
name = "tracing-subscriber"
|
|
7146
7156
|
version = "0.3.23"
|
|
@@ -7151,12 +7161,15 @@ dependencies = [
|
|
|
7151
7161
|
"nu-ansi-term",
|
|
7152
7162
|
"once_cell",
|
|
7153
7163
|
"regex-automata",
|
|
7164
|
+
"serde",
|
|
7165
|
+
"serde_json",
|
|
7154
7166
|
"sharded-slab",
|
|
7155
7167
|
"smallvec",
|
|
7156
7168
|
"thread_local",
|
|
7157
7169
|
"tracing",
|
|
7158
7170
|
"tracing-core",
|
|
7159
7171
|
"tracing-log",
|
|
7172
|
+
"tracing-serde",
|
|
7160
7173
|
]
|
|
7161
7174
|
|
|
7162
7175
|
[[package]]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "hyperstreamdb"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.6"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
license = "MIT AND Apache-2.0"
|
|
6
6
|
description = "HyperStreamDB - Serverless Index-Streaming Database with Overlay Indexing and Vector Search"
|
|
@@ -102,7 +102,7 @@ anyhow = "1.0"
|
|
|
102
102
|
|
|
103
103
|
# logging
|
|
104
104
|
tracing = "0.1"
|
|
105
|
-
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
|
105
|
+
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
|
|
106
106
|
url = "2.5"
|
|
107
107
|
uuid = { version = "1.10", features = ["v4", "fast-rng", "macro-diagnostics"] }
|
|
108
108
|
chrono = "0.4"
|
|
@@ -59,7 +59,22 @@ impl DiskCache {
|
|
|
59
59
|
}
|
|
60
60
|
|
|
61
61
|
let b = self.store.get(&object_store::path::Path::from(path)).await?.bytes().await?;
|
|
62
|
-
|
|
62
|
+
|
|
63
|
+
// Atomic write: write to unique temp file then rename
|
|
64
|
+
let thread_id = format!("{:?}", std::thread::current().id());
|
|
65
|
+
let temp_name = format!("{}.{}.{}.tmp", hash, std::process::id(), thread_id);
|
|
66
|
+
let temp_path = cache_dir.join(temp_name);
|
|
67
|
+
|
|
68
|
+
if let Ok(mut f) = std::fs::File::create(&temp_path) {
|
|
69
|
+
use std::io::Write;
|
|
70
|
+
if f.write_all(&b).is_ok() {
|
|
71
|
+
// rename is atomic on POSIX
|
|
72
|
+
let _ = std::fs::rename(&temp_path, &cache_path);
|
|
73
|
+
} else {
|
|
74
|
+
let _ = std::fs::remove_file(&temp_path);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
63
78
|
Ok(b)
|
|
64
79
|
} else {
|
|
65
80
|
let res = self.store.get(&object_store::path::Path::from(path)).await
|
|
@@ -510,7 +510,7 @@ mod tests {
|
|
|
510
510
|
// - Manifests v1, v2, v3
|
|
511
511
|
// - The 3 original data files (since they are ONLY in v1-v3)
|
|
512
512
|
let deleted = table.vacuum_async(1).await?;
|
|
513
|
-
|
|
513
|
+
tracing::info!("Vacuum deleted {} files", deleted);
|
|
514
514
|
|
|
515
515
|
// We expect at least 3 data files + 3 manifest files = 6 files deleted
|
|
516
516
|
assert!(deleted >= 6);
|
|
@@ -108,7 +108,7 @@ pub extern "system" fn Java_com_hyperstreamdb_trino_HyperStreamDBPageSource_open
|
|
|
108
108
|
Box::into_raw(Box::new(session)) as jlong
|
|
109
109
|
},
|
|
110
110
|
Err(e) => {
|
|
111
|
-
|
|
111
|
+
tracing::error!("FFI Error opening session: {}", e);
|
|
112
112
|
0
|
|
113
113
|
}
|
|
114
114
|
}
|
|
@@ -196,7 +196,7 @@ pub extern "system" fn Java_com_hyperstreamdb_trino_HyperStreamDBSplitManager_ge
|
|
|
196
196
|
}
|
|
197
197
|
},
|
|
198
198
|
Err(e) => {
|
|
199
|
-
|
|
199
|
+
tracing::error!("FFI Error creating table: {}", e);
|
|
200
200
|
"[]".to_string()
|
|
201
201
|
}
|
|
202
202
|
};
|
|
@@ -234,7 +234,7 @@ pub extern "system" fn Java_com_hyperstreamdb_spark_HyperStreamScanBuilder_listD
|
|
|
234
234
|
}
|
|
235
235
|
},
|
|
236
236
|
Err(e) => {
|
|
237
|
-
|
|
237
|
+
tracing::error!("FFI Error creating table: {}", e);
|
|
238
238
|
"[]".to_string()
|
|
239
239
|
}
|
|
240
240
|
};
|
|
@@ -267,7 +267,7 @@ fn open_session_helper(mut env: JNIEnv, path: JString) -> jlong {
|
|
|
267
267
|
match HyperStreamSession::new(&path_str) {
|
|
268
268
|
Ok(session) => Box::into_raw(Box::new(session)) as jlong,
|
|
269
269
|
Err(e) => {
|
|
270
|
-
|
|
270
|
+
tracing::error!("FFI Error opening session: {}", e);
|
|
271
271
|
0
|
|
272
272
|
}
|
|
273
273
|
}
|
|
@@ -1240,19 +1240,24 @@ impl IcebergWriter {
|
|
|
1240
1240
|
s if s.starts_with("truncate[") => r#"["null", "string"]"#,
|
|
1241
1241
|
"identity" => {
|
|
1242
1242
|
let source_id = field.source_ids.first().copied().or(field.source_id);
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
"Int64" | "long" => r#"["null", "long"]"#,
|
|
1248
|
-
"Float32" | "float" => r#"["null", "float"]"#,
|
|
1249
|
-
"Float64" | "double" => r#"["null", "double"]"#,
|
|
1250
|
-
"Boolean" | "bool" | "boolean" => r#"["null", "boolean"]"#,
|
|
1251
|
-
"string" | "utf8" | "utf-8" | "String" | "Utf8" => r#"["null", "string"]"#,
|
|
1252
|
-
_ => r#"["null", "string"]"#,
|
|
1253
|
-
}
|
|
1243
|
+
// Robustness: Prioritize Name-based resolution, fallback to ID (Name-First strategy)
|
|
1244
|
+
let resolved_field = schema.fields.iter().find(|sf| sf.name == field.name).cloned().or_else(|| {
|
|
1245
|
+
if let Some(id) = source_id {
|
|
1246
|
+
schema.fields.iter().find(|f| f.id == id).cloned()
|
|
1254
1247
|
} else {
|
|
1255
|
-
|
|
1248
|
+
None
|
|
1249
|
+
}
|
|
1250
|
+
});
|
|
1251
|
+
|
|
1252
|
+
if let Some(f) = resolved_field {
|
|
1253
|
+
match f.type_str.as_str() {
|
|
1254
|
+
"Int32" | "int" => r#"["null", "int"]"#,
|
|
1255
|
+
"Int64" | "long" => r#"["null", "long"]"#,
|
|
1256
|
+
"Float32" | "float" => r#"["null", "float"]"#,
|
|
1257
|
+
"Float64" | "double" => r#"["null", "double"]"#,
|
|
1258
|
+
"Boolean" | "bool" | "boolean" => r#"["null", "boolean"]"#,
|
|
1259
|
+
"string" | "utf8" | "utf-8" | "String" | "Utf8" => r#"["null", "string"]"#,
|
|
1260
|
+
_ => r#"["null", "string"]"#,
|
|
1256
1261
|
}
|
|
1257
1262
|
} else {
|
|
1258
1263
|
r#"["null", "string"]"#
|
|
@@ -424,6 +424,25 @@ impl ComputeContext {
|
|
|
424
424
|
}
|
|
425
425
|
|
|
426
426
|
pub fn auto_detect() -> Self {
|
|
427
|
+
{
|
|
428
|
+
let read = GLOBAL_GPU_CONTEXT.read().unwrap();
|
|
429
|
+
if let Some(ctx) = &*read {
|
|
430
|
+
return ctx.clone();
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
let mut write = GLOBAL_GPU_CONTEXT.write().unwrap();
|
|
435
|
+
// Check again after acquiring lock
|
|
436
|
+
if let Some(ctx) = &*write {
|
|
437
|
+
return ctx.clone();
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
let ctx = Self::do_auto_detect();
|
|
441
|
+
*write = Some(ctx.clone());
|
|
442
|
+
ctx
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
fn do_auto_detect() -> Self {
|
|
427
446
|
#[cfg(feature = "cuda")]
|
|
428
447
|
if let Ok(b) = CudaBackend::new(0) { return Self { backend: ComputeBackend::Cuda, device_id: 0, implementation: Some(Arc::new(b)) }; }
|
|
429
448
|
#[cfg(all(target_os = "macos", feature = "mps"))]
|
|
@@ -645,7 +645,7 @@ impl HnswIvfIndex {
|
|
|
645
645
|
|
|
646
646
|
let disk_cache = DiskCache::new(store.clone());
|
|
647
647
|
|
|
648
|
-
let root_path = if base_path.contains("://") {
|
|
648
|
+
let mut root_path = if base_path.contains("://") {
|
|
649
649
|
if let Ok(url) = url::Url::parse(base_path) {
|
|
650
650
|
url.path().trim_start_matches('/').to_string()
|
|
651
651
|
} else {
|
|
@@ -655,6 +655,13 @@ impl HnswIvfIndex {
|
|
|
655
655
|
base_path.to_string()
|
|
656
656
|
};
|
|
657
657
|
|
|
658
|
+
// Robustness: If the path points to a specific cluster shard or graph file, strip it to get the base
|
|
659
|
+
if let Some(idx) = root_path.find(".cluster_") {
|
|
660
|
+
root_path = root_path[..idx].to_string();
|
|
661
|
+
} else if let Some(idx) = root_path.find(".hnsw.graph") {
|
|
662
|
+
root_path = root_path[..idx].to_string();
|
|
663
|
+
}
|
|
664
|
+
|
|
658
665
|
let centroids_path = format!("{}.centroids.parquet", root_path);
|
|
659
666
|
let centroids_bytes = disk_cache.get_bytes(¢roids_path).await?;
|
|
660
667
|
|
|
@@ -814,7 +821,14 @@ impl HnswIvfIndex {
|
|
|
814
821
|
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
|
|
815
822
|
use std::fs::File;
|
|
816
823
|
|
|
817
|
-
let
|
|
824
|
+
let mut base_path_str = base_path.to_string();
|
|
825
|
+
if let Some(idx) = base_path_str.find(".cluster_") {
|
|
826
|
+
base_path_str = base_path_str[..idx].to_string();
|
|
827
|
+
} else if let Some(idx) = base_path_str.find(".hnsw.graph") {
|
|
828
|
+
base_path_str = base_path_str[..idx].to_string();
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
let centroids_path = format!("{}.centroids.parquet", base_path_str);
|
|
818
832
|
let file = File::open(¢roids_path)?;
|
|
819
833
|
let builder = ParquetRecordBatchReaderBuilder::try_new(file)?;
|
|
820
834
|
let reader = builder.build()?;
|
|
@@ -991,4 +1005,25 @@ mod tests {
|
|
|
991
1005
|
assert!(filter.contains(*id as u32), "Result ID {} was not in the filter!", id);
|
|
992
1006
|
}
|
|
993
1007
|
}
|
|
1008
|
+
|
|
1009
|
+
#[test]
|
|
1010
|
+
fn test_hnsw_ivf_path_robustness() {
|
|
1011
|
+
let base_path_str = "/tmp/test_robust";
|
|
1012
|
+
|
|
1013
|
+
let paths = vec![
|
|
1014
|
+
format!("{}.cluster_0.hnsw.graph", base_path_str),
|
|
1015
|
+
format!("{}.cluster_99.hnsw.graph", base_path_str),
|
|
1016
|
+
format!("{}.hnsw.graph", base_path_str),
|
|
1017
|
+
];
|
|
1018
|
+
|
|
1019
|
+
for p in paths {
|
|
1020
|
+
let mut root = p.clone();
|
|
1021
|
+
if let Some(idx) = root.find(".cluster_") {
|
|
1022
|
+
root = root[..idx].to_string();
|
|
1023
|
+
} else if let Some(idx) = root.find(".hnsw.graph") {
|
|
1024
|
+
root = root[..idx].to_string();
|
|
1025
|
+
}
|
|
1026
|
+
assert_eq!(root, base_path_str, "Failed to strip suffix from {}", p);
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
994
1029
|
}
|
|
@@ -205,17 +205,15 @@ fn test_dump_reload_graph_flatten() {
|
|
|
205
205
|
let neighborhood_before_dump = FlatNeighborhood::from(&hnsw);
|
|
206
206
|
let nbg_2_before = neighborhood_before_dump.get_neighbours(2).unwrap();
|
|
207
207
|
println!("voisins du point 2 {:?}", nbg_2_before);
|
|
208
|
-
// dump in a file.
|
|
209
|
-
let
|
|
210
|
-
let
|
|
211
|
-
|
|
212
|
-
|
|
208
|
+
// dump in a file. Use tempdir for parallel safety.
|
|
209
|
+
let temp_dir = tempfile::tempdir().unwrap();
|
|
210
|
+
let fname = temp_dir.path().join("dumpreloadtestflat");
|
|
211
|
+
let fname_str = fname.to_str().unwrap().to_string();
|
|
212
|
+
let _res = hnsw.file_dump(&fname_str);
|
|
213
|
+
|
|
213
214
|
// reload
|
|
214
215
|
log::debug!("\n\n hnsw reload");
|
|
215
|
-
|
|
216
|
-
// from now on we test with DistL1
|
|
217
|
-
let graphfname = String::from("dumpreloadtestflat.hnsw.graph");
|
|
218
|
-
let graphpath = PathBuf::from(graphfname);
|
|
216
|
+
let graphpath = PathBuf::from(format!("{}.hnsw.graph", fname_str));
|
|
219
217
|
let graphfileres = OpenOptions::new().read(true).open(&graphpath);
|
|
220
218
|
if graphfileres.is_err() {
|
|
221
219
|
println!("test_dump_reload: could not open file {:?}", graphpath.as_os_str());
|
|
@@ -223,8 +221,7 @@ fn test_dump_reload_graph_flatten() {
|
|
|
223
221
|
}
|
|
224
222
|
let graphfile = graphfileres.unwrap();
|
|
225
223
|
//
|
|
226
|
-
let
|
|
227
|
-
let datapath = PathBuf::from(datafname);
|
|
224
|
+
let datapath = PathBuf::from(format!("{}.hnsw.data", fname_str));
|
|
228
225
|
let datafileres = OpenOptions::new().read(true).open(&datapath);
|
|
229
226
|
if datafileres.is_err() {
|
|
230
227
|
println!("test_dump_reload : could not open file {:?}", datapath.as_os_str());
|
|
@@ -783,19 +783,15 @@ fn test_dump_reload_1() {
|
|
|
783
783
|
for i in 0..data.len() {
|
|
784
784
|
hnsw.insert((&data[i], i));
|
|
785
785
|
}
|
|
786
|
-
//
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
let
|
|
790
|
-
let _res = hnsw.file_dump(&
|
|
791
|
-
|
|
792
|
-
//
|
|
786
|
+
// dump in a file. Use tempdir for parallel safety.
|
|
787
|
+
let temp_dir = tempfile::tempdir().unwrap();
|
|
788
|
+
let fname = temp_dir.path().join("dumpreloadtest1");
|
|
789
|
+
let fname_str = fname.to_str().unwrap().to_string();
|
|
790
|
+
let _res = hnsw.file_dump(&fname_str);
|
|
791
|
+
|
|
793
792
|
// reload
|
|
794
793
|
log::debug!("\n\n hnsw reload");
|
|
795
|
-
|
|
796
|
-
// from now on we test with DistL1
|
|
797
|
-
let graphfname = String::from("dumpreloadtest1.hnsw.graph");
|
|
798
|
-
let graphpath = PathBuf::from(graphfname);
|
|
794
|
+
let graphpath = PathBuf::from(format!("{}.hnsw.graph", fname_str));
|
|
799
795
|
let graphfileres = OpenOptions::new().read(true).open(&graphpath);
|
|
800
796
|
if graphfileres.is_err() {
|
|
801
797
|
println!("test_dump_reload: could not open file {:?}", graphpath.as_os_str());
|
|
@@ -803,8 +799,7 @@ fn test_dump_reload_1() {
|
|
|
803
799
|
}
|
|
804
800
|
let graphfile = graphfileres.unwrap();
|
|
805
801
|
//
|
|
806
|
-
let
|
|
807
|
-
let datapath = PathBuf::from(datafname);
|
|
802
|
+
let datapath = PathBuf::from(format!("{}.hnsw.data", fname_str));
|
|
808
803
|
let datafileres = OpenOptions::new().read(true).open(&datapath);
|
|
809
804
|
if datafileres.is_err() {
|
|
810
805
|
println!("test_dump_reload : could not open file {:?}", datapath.as_os_str());
|
|
@@ -852,19 +847,15 @@ fn test_dump_reload_myfn() {
|
|
|
852
847
|
for i in 0..data.len() {
|
|
853
848
|
hnsw.insert((&data[i], i));
|
|
854
849
|
}
|
|
855
|
-
//
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
let
|
|
859
|
-
let _res = hnsw.file_dump(&
|
|
860
|
-
|
|
861
|
-
//
|
|
850
|
+
// dump in a file. Use tempdir for parallel safety.
|
|
851
|
+
let temp_dir = tempfile::tempdir().unwrap();
|
|
852
|
+
let fname = temp_dir.path().join("dumpreloadtest_myfn");
|
|
853
|
+
let fname_str = fname.to_str().unwrap().to_string();
|
|
854
|
+
let _res = hnsw.file_dump(&fname_str);
|
|
855
|
+
|
|
862
856
|
// reload
|
|
863
857
|
log::debug!("\n\n hnsw reload");
|
|
864
|
-
|
|
865
|
-
// from now on we test with DistL1
|
|
866
|
-
let graphfname = String::from("dumpreloadtest_myfn.hnsw.graph");
|
|
867
|
-
let graphpath = PathBuf::from(graphfname);
|
|
858
|
+
let graphpath = PathBuf::from(format!("{}.hnsw.graph", fname_str));
|
|
868
859
|
let graphfileres = OpenOptions::new().read(true).open(&graphpath);
|
|
869
860
|
if graphfileres.is_err() {
|
|
870
861
|
println!("test_dump_reload: could not open file {:?}", graphpath.as_os_str());
|
|
@@ -872,8 +863,7 @@ fn test_dump_reload_myfn() {
|
|
|
872
863
|
}
|
|
873
864
|
let graphfile = graphfileres.unwrap();
|
|
874
865
|
//
|
|
875
|
-
let
|
|
876
|
-
let datapath = PathBuf::from(datafname);
|
|
866
|
+
let datapath = PathBuf::from(format!("{}.hnsw.data", fname_str));
|
|
877
867
|
let datafileres = OpenOptions::new().read(true).open(&datapath);
|
|
878
868
|
if datafileres.is_err() {
|
|
879
869
|
println!("test_dump_reload : could not open file {:?}", datapath.as_os_str());
|
|
@@ -918,19 +908,15 @@ fn test_dump_reload_graph_only() {
|
|
|
918
908
|
for i in 0..data.len() {
|
|
919
909
|
hnsw.insert((&data[i], i));
|
|
920
910
|
}
|
|
921
|
-
//
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
let
|
|
925
|
-
let _res = hnsw.file_dump(&
|
|
926
|
-
|
|
927
|
-
//
|
|
911
|
+
// dump in a file. Use tempdir for parallel safety.
|
|
912
|
+
let temp_dir = tempfile::tempdir().unwrap();
|
|
913
|
+
let fname = temp_dir.path().join("dumpreloadtestgraph");
|
|
914
|
+
let fname_str = fname.to_str().unwrap().to_string();
|
|
915
|
+
let _res = hnsw.file_dump(&fname_str);
|
|
916
|
+
|
|
928
917
|
// reload
|
|
929
918
|
log::debug!("\n\n hnsw reload");
|
|
930
|
-
|
|
931
|
-
// from now on we test with DistL1
|
|
932
|
-
let graphfname = String::from("dumpreloadtestgraph.hnsw.graph");
|
|
933
|
-
let graphpath = PathBuf::from(graphfname);
|
|
919
|
+
let graphpath = PathBuf::from(format!("{}.hnsw.graph", fname_str));
|
|
934
920
|
let graphfileres = OpenOptions::new().read(true).open(&graphpath);
|
|
935
921
|
if graphfileres.is_err() {
|
|
936
922
|
println!("test_dump_reload: could not open file {:?}", graphpath.as_os_str());
|
|
@@ -938,8 +924,7 @@ fn test_dump_reload_graph_only() {
|
|
|
938
924
|
}
|
|
939
925
|
let graphfile = graphfileres.unwrap();
|
|
940
926
|
//
|
|
941
|
-
let
|
|
942
|
-
let datapath = PathBuf::from(datafname);
|
|
927
|
+
let datapath = PathBuf::from(format!("{}.hnsw.data", fname_str));
|
|
943
928
|
let datafileres = OpenOptions::new().read(true).open(&datapath);
|
|
944
929
|
if datafileres.is_err() {
|
|
945
930
|
println!("test_dump_reload : could not open file {:?}", datapath.as_os_str());
|
|
@@ -125,13 +125,16 @@ impl InMemoryVectorIndex {
|
|
|
125
125
|
let values = fsl.values().as_any().downcast_ref::<Float32Array>()
|
|
126
126
|
.context("Expected Float32Array values in FixedSizeListArray")?;
|
|
127
127
|
|
|
128
|
-
//
|
|
129
|
-
|
|
128
|
+
// Respect slicing: only copy the range of the value array that belongs to this FSL slice
|
|
129
|
+
let start_offset = fsl.offset() * self.dim;
|
|
130
|
+
let len = fsl.len() * self.dim;
|
|
131
|
+
let slice = &values.values()[start_offset..start_offset + len];
|
|
132
|
+
|
|
133
|
+
self.vectors.extend_from_slice(slice);
|
|
130
134
|
self.count += fsl.len();
|
|
131
135
|
} else if let Some(list) = col.as_any().downcast_ref::<ListArray>() {
|
|
132
136
|
for i in 0..list.len() {
|
|
133
137
|
if list.is_null(i) {
|
|
134
|
-
// Fill with zeros to maintain alignment or handle nulls
|
|
135
138
|
self.vectors.extend(std::iter::repeat_n(0.0, self.dim));
|
|
136
139
|
} else {
|
|
137
140
|
let vector_array = list.value(i);
|
|
@@ -244,4 +247,39 @@ mod tests {
|
|
|
244
247
|
assert_eq!(results[0].0, 0); // v1 is closest
|
|
245
248
|
assert_eq!(results[1].0, 1); // v2 is second closest
|
|
246
249
|
}
|
|
250
|
+
|
|
251
|
+
#[test]
|
|
252
|
+
fn test_memory_index_sliced_array() {
|
|
253
|
+
let dim = 4;
|
|
254
|
+
let mut index = InMemoryVectorIndex::new(dim);
|
|
255
|
+
|
|
256
|
+
let schema = Arc::new(Schema::new(vec![
|
|
257
|
+
Field::new("vec", DataType::FixedSizeList(
|
|
258
|
+
Arc::new(Field::new("item", DataType::Float32, true)),
|
|
259
|
+
dim as i32
|
|
260
|
+
), true),
|
|
261
|
+
]));
|
|
262
|
+
|
|
263
|
+
let v1 = vec![Some(1.0), Some(0.0), Some(0.0), Some(0.0)];
|
|
264
|
+
let v2 = vec![Some(0.0), Some(1.0), Some(0.0), Some(0.0)];
|
|
265
|
+
let v3 = vec![Some(0.0), Some(0.0), Some(1.0), Some(0.0)];
|
|
266
|
+
|
|
267
|
+
let array = FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
|
268
|
+
vec![Some(v1), Some(v2.clone()), Some(v3)],
|
|
269
|
+
dim as i32
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
// Slice the array to only include v2
|
|
273
|
+
let sliced_array = array.slice(1, 1);
|
|
274
|
+
let batch = RecordBatch::try_new(schema, vec![Arc::new(sliced_array)]).unwrap();
|
|
275
|
+
index.insert_batch(&batch, "vec", 0).unwrap();
|
|
276
|
+
|
|
277
|
+
assert_eq!(index.count, 1);
|
|
278
|
+
assert_eq!(index.vectors.len(), dim);
|
|
279
|
+
|
|
280
|
+
let query = crate::core::index::VectorValue::Float32(v2.into_iter().map(|x| x.unwrap()).collect());
|
|
281
|
+
let results = index.search(&query, 1, None);
|
|
282
|
+
assert_eq!(results.len(), 1);
|
|
283
|
+
assert_eq!(results[0].1, 0.0); // Exact match
|
|
284
|
+
}
|
|
247
285
|
}
|
|
@@ -1628,15 +1628,19 @@ impl PartitionSpec {
|
|
|
1628
1628
|
for i in 0..batch.num_rows() {
|
|
1629
1629
|
let mut key = Vec::with_capacity(self.fields.len());
|
|
1630
1630
|
for field in &self.fields {
|
|
1631
|
-
|
|
1631
|
+
// Determine source columns
|
|
1632
1632
|
let source_ids = field.get_source_ids();
|
|
1633
1633
|
let mut cols = Vec::new();
|
|
1634
1634
|
|
|
1635
|
-
//
|
|
1635
|
+
// Prioritize finding by Name (most intuitive for users)
|
|
1636
|
+
let mut found = false;
|
|
1636
1637
|
if let Ok(idx) = batch.schema().index_of(&field.name) {
|
|
1637
1638
|
cols.push(batch.column(idx));
|
|
1638
|
-
|
|
1639
|
-
|
|
1639
|
+
found = true;
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
// Fallback to Iceberg IDs only if name lookup failed
|
|
1643
|
+
if !found {
|
|
1640
1644
|
for id in &source_ids {
|
|
1641
1645
|
let idx = batch.schema().fields().iter().position(|f| {
|
|
1642
1646
|
f.metadata().get("iceberg.id")
|
|
@@ -126,7 +126,7 @@ impl MergePlanner {
|
|
|
126
126
|
) -> Result<Vec<(Option<SegmentId>, SegmentId)>>
|
|
127
127
|
where F: Fn(&SegmentId, &str) -> Result<Option<RoaringBitmap>>
|
|
128
128
|
{
|
|
129
|
-
|
|
129
|
+
tracing::info!("Executing Merge on {} candidates", candidate_segments.len());
|
|
130
130
|
|
|
131
131
|
let mut updates_by_segment: HashMap<SegmentId, Vec<usize>> = HashMap::new();
|
|
132
132
|
let mut unmatched_rows: Vec<usize> = Vec::new();
|
|
@@ -198,7 +198,7 @@ impl MergePlanner {
|
|
|
198
198
|
|
|
199
199
|
// 2. Process Updates (Copy-on-Write)
|
|
200
200
|
for (seg_id, source_row_indices) in updates_by_segment {
|
|
201
|
-
|
|
201
|
+
tracing::info!("Updating Segment {} with {} rows", seg_id, source_row_indices.len());
|
|
202
202
|
|
|
203
203
|
// A. Read Original Segment
|
|
204
204
|
// Use empty base path for reader configuration because store is already rooted at base_path
|
|
@@ -276,7 +276,7 @@ impl MergePlanner {
|
|
|
276
276
|
|
|
277
277
|
// 3. Process Inserts (Unmatched)
|
|
278
278
|
if !unmatched_rows.is_empty() {
|
|
279
|
-
|
|
279
|
+
tracing::info!("Inserting {} new rows", unmatched_rows.len());
|
|
280
280
|
let indices_arr = arrow::array::UInt32Array::from(unmatched_rows.iter().map(|&x| x as u32).collect::<Vec<u32>>());
|
|
281
281
|
let inserts_batch = arrow::compute::take_record_batch(source_batch, &indices_arr)?;
|
|
282
282
|
|