hyperstreamdb 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/Cargo.lock +1 -1
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/Cargo.toml +1 -1
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/PKG-INFO +1 -1
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/cache.rs +32 -3
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/reader.rs +4 -5
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/segment.rs +2 -2
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/table/builder.rs +36 -15
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/table/read.rs +74 -25
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/wal.rs +31 -25
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/python_binding.rs +102 -100
- hyperstreamdb-0.3.2/tests/integration_test_hnsw_ivf_native.rs +288 -0
- hyperstreamdb-0.3.2/tests/stability.rs +70 -0
- hyperstreamdb-0.3.0/tests/integration_test_hnsw_ivf_native.rs +0 -138
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/.gitattributes +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/.gitignore +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/.hypothesis/constants/32b327793848e7d8 +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/.hypothesis/constants/67b0a8ccf18bf5d2 +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/.hypothesis/constants/84828557b4ee7be4 +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/.instructions.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/.ipynb_checkpoints/Untitled-checkpoint.ipynb +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/CNAME +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/DORIS_OPTIMIZATION_PATTERNS.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/HyperStreamDB.png +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/LICENSE-APACHE +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/LICENSE-MIT +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/README.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/RUN_COMPLIANCE_TESTS.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/STEERING.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/THIRDPARTY_NOTICES.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/Untitled.ipynb +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benches/bench_table.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benches/performance.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/BENCHMARK_REPORT.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/benchmark_charts.png +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/benchmark_results.csv +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/concurrent_queries_20260409_214245.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/concurrent_queries_20260409_214245.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/filtered_search_comparison_20260409_222607.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/filtered_search_comparison_20260409_222607.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_214355.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_214355.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_220418.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_220418.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_222053.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_222053.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_225907.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_225907.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/full_scan_baseline_20260409_222303.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/full_scan_baseline_20260409_222303.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/high_selectivity_filter_20260409_222302.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/high_selectivity_filter_20260409_222302.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/ingestion_comparison_20260409_222516.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/ingestion_comparison_20260409_222516.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_214428.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_214428.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_220450.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_220450.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_222131.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_222131.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_225938.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_225938.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_231713.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_231713.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_214501.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_214501.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_220524.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_220524.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_222204.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_222204.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_230010.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_230010.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/query_comparison_20260409_222541.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/query_comparison_20260409_222541.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/range_query_20260409_222302.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/range_query_20260409_222302.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/search_filtered_high_selectivity_20260409_214144.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/search_filtered_high_selectivity_20260409_214144.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/search_unfiltered_20260409_214028.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/benchmark_results/search_unfiltered_20260409_214028.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/book.toml +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/broken_binaries_all.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/broken_bins.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/build-connectors.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/build.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/check_iceberg_compliance.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/compliance_output.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/critical_code_review.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/debug_log.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/demo_basics_run.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/demo_basics_v2.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docker-compose-minio-nessie.yml +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docker-compose.yml +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/.nojekyll +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/BENCHMARKING.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/COMPREHENSIVE_GUIDE.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/CONCURRENCY.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/CONFIGURATION.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/GPU_SETUP_GUIDE.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/ICEBERG_V2_V3_API.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/INSTALLATION.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/PGVECTOR_SQL_GUIDE.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/PYTHON_VECTOR_API.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/VECTOR_CONFIGURATION.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/api_reference.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/architecture.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/catalog_usage.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/index.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/integrations/README.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/integrations/java_jni.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/integrations/python.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/integrations/spark.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/integrations/trino.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/requirements.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/source/_static/HyperStreamDB.png +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/source/api/python.rst +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/source/api/rust.rst +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/source/conf.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/source/index.rst +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/docs/source/roadmap.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/fix_nb.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/proptest-regressions/core/index/gpu.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/proptest-regressions/core/sql/vector_literal.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/proptest-regressions/core/sql/vector_udf.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/pyproject.toml +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/python/hyperstreamdb/__init__.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/python/hyperstreamdb/embeddings.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/python_test_output.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/python_test_output_v2.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/python_test_output_v3.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/rust_check_all_warnings.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/rust_test_output.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/rust_warnings.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/scratch/check_os_error.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/simd_test_results.txt +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/spark-hyperstream/.bloop/bloop.settings.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/spark-hyperstream/.bloop/spark-hyperstream-test.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/spark-hyperstream/.bloop/spark-hyperstream.json +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/spark-hyperstream/pom.xml +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/DefaultSource.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartition.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReader.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReaderFactory.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamScanBuilder.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamTable.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/split_table.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/bin/gateway.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/bin/hdb.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/bin/iceberg_rest.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/bin/probe_datafusion.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/bin/setup_test_data.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/bin/verify_layered_indexing.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/catalog/config.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/catalog/glue.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/catalog/hive.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/catalog/jdbc.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/catalog/mod.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/catalog/nessie.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/catalog/rest.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/catalog/unity.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/clustering.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/compaction.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/embeddings.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/ffi.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/iceberg/iceberg_delete.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/iceberg.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/cuda/cosine_distance.cu +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/cuda/hamming_distance.cu +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/cuda/inner_product.cu +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/cuda/jaccard_distance.cu +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/cuda/kmeans_assignment.cu +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/cuda/l1_distance.cu +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/cuda/l2_distance.cu +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/distance.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/gpu.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/hnsw_ivf.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/annhdf5.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/api.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/dist.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/flatten.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/hnsw.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/hnswio.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/libext.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/mod.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/prelude.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/test.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/ivf.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/memory.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/mod.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/mps/cosine_distance.metal +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/mps/hamming_distance.metal +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/mps/inner_product.metal +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/mps/jaccard_distance.metal +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/mps/kmeans_assignment.metal +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/mps/l1_distance.metal +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/mps/l2_distance.metal +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/opencl/cosine_distance.cl +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/opencl/hamming_distance.cl +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/opencl/inner_product.cl +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/opencl/jaccard_distance.cl +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/opencl/kmeans_assignment.cl +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/opencl/l1_distance.cl +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/opencl/l2_distance.cl +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/pq.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/tokenizer.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/turboquant.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/index/wgpu_kernel.wgsl +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/license.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/maintenance.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/manifest.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/merge.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/metadata.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/mod.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/nessie.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/planner.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/puffin.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/query.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/search/mod.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/search/rrf.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/sql/mod.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/sql/optimizer.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/sql/pgvector_rewriter.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/sql/physical_plan/index_join.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/sql/physical_plan.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/sql/session.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/sql/vector_literal.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/sql/vector_operators.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/sql/vector_udf.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/storage.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/table/fluent.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/table/mod.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/table/schema.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/core/table/write.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/enterprise/continuous_indexing.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/enterprise/license.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/enterprise/mod.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/index.rs.old +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/lib.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/python_distance.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/python_gpu_context.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/telemetry/metrics.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/telemetry/mod.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/src/telemetry/tracing.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/task.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/all_types_index_test.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/bin/generate_iceberg_manifests.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/bin/verify_iceberg_read_check.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/check_mmh3.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/data/download_nyc_taxi.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/data/generate_embeddings.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/data/generate_wikipedia.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/data/start_nessie.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/datafusion_rust_test.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/debug_murmur3.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/fuzz_murmur3.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/performance/README.md +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/prototype_merge.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/schema_evolution_test.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_all_algos.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_catalog_commit.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_compliance.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_delete_correctness.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_iceberg_python_delete.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_iceberg_rest.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_iceberg_rest_create.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_iceberg_rest_delete.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_iceberg_rest_remove_index.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_iceberg_rest_update.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_metadata_creation.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_mor_reads.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_mor_writes.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_partition_transforms.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_partitioned_writes.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_puffin_index.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_rest_updates.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/tests/verify_schema_compat.rs +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-config/.DS_Store +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-config/catalog/glue_catalog.properties +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-config/catalog/hyperstreamdb.properties +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-config/catalog/iceberg.properties +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-config/catalog/memory.properties +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-config/catalog/postgres.properties +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-config/config.properties +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-config/entrypoint.sh +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-config/jvm.config +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-config/node.properties +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-config.zip +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-hyperstream/pom.xml +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBColumnHandle.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBConnectorFactory.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBMetadata.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSource.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSourceProvider.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPlugin.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplit.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplitManager.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBTableHandle.java +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/update_schema_patch.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/update_schema_patch2.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/verify_docstrings.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/verify_fluent_api.py +0 -0
- {hyperstreamdb-0.3.0 → hyperstreamdb-0.3.2}/verify_unified_ingest.py +0 -0
|
@@ -125,8 +125,22 @@ pub static INDEX_CACHE: Lazy<Cache<String, Arc<RoaringBitmap>>> = Lazy::new(|| {
|
|
|
125
125
|
});
|
|
126
126
|
|
|
127
127
|
pub static BYTE_CACHE: Lazy<Cache<String, Arc<Vec<u8>>>> = Lazy::new(|| {
|
|
128
|
+
let cache_gb: u64 = std::env::var("HYPERSTREAM_CACHE_GB")
|
|
129
|
+
.unwrap_or_else(|_| "2".to_string())
|
|
130
|
+
.parse()
|
|
131
|
+
.unwrap_or(2);
|
|
132
|
+
|
|
133
|
+
// Allocate 10% of global cache to Byte Cache (max 512MB)
|
|
134
|
+
let limit_bytes = (cache_gb * 1024 * 1024 * 1024 / 10).min(512 * 1024 * 1024);
|
|
135
|
+
let max_kb = limit_bytes / 1024;
|
|
136
|
+
|
|
137
|
+
tracing::info!("Initializing Byte Cache with {} MB limit", limit_bytes / (1024 * 1024));
|
|
138
|
+
|
|
128
139
|
Cache::builder()
|
|
129
|
-
.
|
|
140
|
+
.weigher(|_key, value: &Arc<Vec<u8>>| -> u32 {
|
|
141
|
+
(value.len() / 1024) as u32
|
|
142
|
+
})
|
|
143
|
+
.max_capacity(max_kb)
|
|
130
144
|
.time_to_idle(Duration::from_secs(60 * 30)) // 30 mins
|
|
131
145
|
.build()
|
|
132
146
|
});
|
|
@@ -163,9 +177,24 @@ pub static HNSW_IVF_CACHE: Lazy<Cache<String, Arc<HnswIvfIndex>>> = Lazy::new(||
|
|
|
163
177
|
});
|
|
164
178
|
|
|
165
179
|
pub static INVERTED_INDEX_CACHE: Lazy<Cache<String, Arc<Vec<RecordBatch>>>> = Lazy::new(|| {
|
|
180
|
+
let cache_gb: u64 = std::env::var("HYPERSTREAM_CACHE_GB")
|
|
181
|
+
.unwrap_or_else(|_| "2".to_string())
|
|
182
|
+
.parse()
|
|
183
|
+
.unwrap_or(2);
|
|
184
|
+
|
|
185
|
+
// Allocate 25% of global cache to Inverted Index Cache
|
|
186
|
+
let limit_bytes = cache_gb * 1024 * 1024 * 1024 / 4;
|
|
187
|
+
let max_kb = limit_bytes / 1024;
|
|
188
|
+
|
|
189
|
+
tracing::info!("Initializing Inverted Index Cache with {} MB limit", limit_bytes / (1024 * 1024));
|
|
190
|
+
|
|
166
191
|
Cache::builder()
|
|
167
|
-
.
|
|
168
|
-
|
|
192
|
+
.weigher(|_key, value: &Arc<Vec<RecordBatch>>| -> u32 {
|
|
193
|
+
let bytes: usize = value.iter().map(|b| b.get_array_memory_size()).sum();
|
|
194
|
+
(bytes / 1024) as u32
|
|
195
|
+
})
|
|
196
|
+
.max_capacity(max_kb)
|
|
197
|
+
.time_to_idle(Duration::from_secs(60 * 15))
|
|
169
198
|
.build()
|
|
170
199
|
});
|
|
171
200
|
|
|
@@ -1635,11 +1635,10 @@ impl HybridReader {
|
|
|
1635
1635
|
};
|
|
1636
1636
|
|
|
1637
1637
|
// Load HNSW-IVF index
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
};
|
|
1638
|
+
// NOTE: blob_type records the *algorithm* (e.g. "hnsw_tq8"), not the storage format.
|
|
1639
|
+
// The writer always uses the multi-file layout (.centroids.parquet, .cluster_N.hnsw.*),
|
|
1640
|
+
// so we always load via load_async_with_cache_key regardless of blob_type.
|
|
1641
|
+
let hnsw_ivf = HnswIvfIndex::load_async_with_cache_key(self.store.clone(), &idx_path_str, &cache_key).await?;
|
|
1643
1642
|
|
|
1644
1643
|
// Search with HNSW-IVF
|
|
1645
1644
|
let query_clone = query.clone();
|
|
@@ -1551,8 +1551,8 @@ mod tests {
|
|
|
1551
1551
|
assert!(std::path::Path::new(&format!("{}.id.inv.parquet", base)).exists(), "Inverted index for id should exist");
|
|
1552
1552
|
|
|
1553
1553
|
// Vector Index (embedding) - HNSW-IVF saves centroids and cluster graphs
|
|
1554
|
-
assert!(std::path::Path::new(&format!("{}.embedding.centroids.parquet", base)).exists(), "Vector index centroids should exist");
|
|
1555
|
-
assert!(std::path::Path::new(&format!("{}.embedding.cluster_0.hnsw.graph", base)).exists(), "Vector index graph should exist");
|
|
1554
|
+
assert!(std::path::Path::new(&format!("{}.embedding.tq8.centroids.parquet", base)).exists(), "Vector index centroids should exist");
|
|
1555
|
+
assert!(std::path::Path::new(&format!("{}.embedding.tq8.cluster_0.hnsw.graph", base)).exists(), "Vector index graph should exist");
|
|
1556
1556
|
|
|
1557
1557
|
Ok(())
|
|
1558
1558
|
}
|
|
@@ -24,18 +24,37 @@ use super::Table;
|
|
|
24
24
|
/// rebuilds the in-memory vector index from recovered data.
|
|
25
25
|
/// Returns (aligned_buffer, optional_memory_index, promoted_schema).
|
|
26
26
|
pub(crate) fn recover_wal_state(
|
|
27
|
-
|
|
27
|
+
recovered_stream: Box<dyn Iterator<Item = Result<RecordBatch>>>,
|
|
28
28
|
mut schema_val: SchemaRef,
|
|
29
29
|
) -> (Vec<RecordBatch>, Option<InMemoryVectorIndex>, SchemaRef) {
|
|
30
|
-
|
|
30
|
+
let mut aligned_buffer = Vec::new();
|
|
31
|
+
let mut total_rows = 0;
|
|
32
|
+
|
|
33
|
+
// 1. First pass: Collect batches and merge schema
|
|
34
|
+
let mut batches = Vec::new();
|
|
35
|
+
for batch_res in recovered_stream {
|
|
36
|
+
match batch_res {
|
|
37
|
+
Ok(batch) => {
|
|
38
|
+
// Safely merge schema
|
|
39
|
+
match arrow::datatypes::Schema::try_merge(vec![schema_val.as_ref().clone(), batch.schema().as_ref().clone()]) {
|
|
40
|
+
Ok(s) => schema_val = Arc::new(s),
|
|
41
|
+
Err(e) => tracing::warn!("Failed to merge WAL batch schema: {}", e),
|
|
42
|
+
}
|
|
43
|
+
batches.push(batch);
|
|
44
|
+
}
|
|
45
|
+
Err(e) => tracing::error!("WAL Replay Error: {}", e),
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if batches.is_empty() {
|
|
31
50
|
return (Vec::new(), None, schema_val);
|
|
32
51
|
}
|
|
33
52
|
|
|
34
|
-
tracing::info!("Recovering {} batches from WAL...",
|
|
53
|
+
tracing::info!("Recovering {} batches from WAL...", batches.len());
|
|
35
54
|
|
|
36
55
|
// Use first batch schema if current schema is empty
|
|
37
56
|
if schema_val.fields().is_empty() {
|
|
38
|
-
if let Some(first) =
|
|
57
|
+
if let Some(first) = batches.first() {
|
|
39
58
|
schema_val = first.schema();
|
|
40
59
|
}
|
|
41
60
|
}
|
|
@@ -43,7 +62,7 @@ pub(crate) fn recover_wal_state(
|
|
|
43
62
|
// Safely attempt to merge all WAL schemas to capture any column additions
|
|
44
63
|
// or type evolutions instead of fragile field count comparisons.
|
|
45
64
|
let mut merged_schema = schema_val.as_ref().clone();
|
|
46
|
-
for batch in &
|
|
65
|
+
for batch in &batches {
|
|
47
66
|
match arrow::datatypes::Schema::try_merge(vec![merged_schema.clone(), batch.schema().as_ref().clone()]) {
|
|
48
67
|
Ok(s) => merged_schema = s,
|
|
49
68
|
Err(e) => tracing::warn!("Failed to merge WAL batch schema: {}", e),
|
|
@@ -52,8 +71,8 @@ pub(crate) fn recover_wal_state(
|
|
|
52
71
|
let schema_val = std::sync::Arc::new(merged_schema);
|
|
53
72
|
|
|
54
73
|
// Align all recovered batches to the widest schema
|
|
55
|
-
|
|
56
|
-
if b.schema() != schema_val {
|
|
74
|
+
for b in batches {
|
|
75
|
+
let aligned = if b.schema() != schema_val {
|
|
57
76
|
let mut cols = Vec::with_capacity(schema_val.fields().len());
|
|
58
77
|
for field in schema_val.fields() {
|
|
59
78
|
let col = if let Some(c) = b.column_by_name(field.name()) {
|
|
@@ -66,8 +85,9 @@ pub(crate) fn recover_wal_state(
|
|
|
66
85
|
RecordBatch::try_new(schema_val.clone(), cols).unwrap_or(b)
|
|
67
86
|
} else {
|
|
68
87
|
b
|
|
69
|
-
}
|
|
70
|
-
|
|
88
|
+
};
|
|
89
|
+
aligned_buffer.push(aligned);
|
|
90
|
+
}
|
|
71
91
|
|
|
72
92
|
// Rebuild in-memory vector index from recovered data.
|
|
73
93
|
// Look for an "embedding" column (the most common convention), supporting
|
|
@@ -94,10 +114,9 @@ pub(crate) fn recover_wal_state(
|
|
|
94
114
|
|
|
95
115
|
if let Some(d) = dim {
|
|
96
116
|
let mut idx = InMemoryVectorIndex::new(d);
|
|
97
|
-
let mut offset = 0;
|
|
98
117
|
for batch in &aligned_buffer {
|
|
99
|
-
let _ = idx.insert_batch(batch, col_name,
|
|
100
|
-
|
|
118
|
+
let _ = idx.insert_batch(batch, col_name, total_rows);
|
|
119
|
+
total_rows += batch.num_rows();
|
|
101
120
|
}
|
|
102
121
|
mem_index = Some(idx);
|
|
103
122
|
}
|
|
@@ -231,13 +250,15 @@ impl TableBuilder {
|
|
|
231
250
|
let _ = wal.spawn_worker();
|
|
232
251
|
|
|
233
252
|
// Replay WAL (Recovery)
|
|
234
|
-
let
|
|
253
|
+
let recovered_stream = wal.replay_stream().unwrap_or_else(|e| {
|
|
235
254
|
tracing::warn!("WAL Recovery Warning: {}" , e);
|
|
236
|
-
|
|
255
|
+
Box::new(std::iter::empty())
|
|
237
256
|
});
|
|
238
257
|
|
|
258
|
+
let (_, recovered_paths) = wal.replay().unwrap_or_else(|_| (vec![], vec![])); // For paths cleanup only
|
|
259
|
+
|
|
239
260
|
let (initial_buffer, initial_mem_index, schema_val) = recover_wal_state(
|
|
240
|
-
|
|
261
|
+
recovered_stream, schema_val,
|
|
241
262
|
);
|
|
242
263
|
|
|
243
264
|
let table = Table {
|
|
@@ -18,6 +18,7 @@ use crate::SegmentConfig;
|
|
|
18
18
|
|
|
19
19
|
use super::Table;
|
|
20
20
|
use crate::core::search::{HybridSearchCoordinator, KeywordSearchParams, ScoredResult};
|
|
21
|
+
use futures::stream::BoxStream;
|
|
21
22
|
|
|
22
23
|
impl Table {
|
|
23
24
|
|
|
@@ -45,6 +46,21 @@ impl Table {
|
|
|
45
46
|
self.read_with_config_async(filter_str, vector_filter, columns, self.query_config.clone()).await
|
|
46
47
|
}
|
|
47
48
|
|
|
49
|
+
pub async fn read_stream_async(&self, filter_str: Option<&str>, vector_filter: Option<VectorSearchParams>, columns: Option<&[&str]>) -> Result<BoxStream<'static, Result<RecordBatch>>> {
|
|
50
|
+
self.read_with_config_stream_async(filter_str, vector_filter, columns, self.query_config.clone()).await
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
pub async fn read_with_config_stream_async(&self, filter_str: Option<&str>, vector_filter: Option<VectorSearchParams>, columns: Option<&[&str]>, config: QueryConfig) -> Result<BoxStream<'static, Result<RecordBatch>>> {
|
|
54
|
+
let expr = match filter_str {
|
|
55
|
+
Some(f) => {
|
|
56
|
+
let schema = self.arrow_schema();
|
|
57
|
+
Some(FilterExpr::parse_sql(f, schema).await?)
|
|
58
|
+
}
|
|
59
|
+
_ => None,
|
|
60
|
+
};
|
|
61
|
+
self.read_expr_stream_async(expr, vector_filter, columns, config, filter_str).await
|
|
62
|
+
}
|
|
63
|
+
|
|
48
64
|
pub fn query(&self) -> TableQuery<'_> {
|
|
49
65
|
TableQuery::new(self)
|
|
50
66
|
}
|
|
@@ -277,6 +293,19 @@ impl Table {
|
|
|
277
293
|
config: QueryConfig,
|
|
278
294
|
filter_str: Option<&str>,
|
|
279
295
|
) -> Result<Vec<RecordBatch>> {
|
|
296
|
+
let stream = self.read_expr_stream_async(expr, vector_filter, columns, config, filter_str).await?;
|
|
297
|
+
let results: Vec<Result<RecordBatch>> = stream.collect().await;
|
|
298
|
+
results.into_iter().collect()
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
pub async fn read_expr_stream_async(
|
|
302
|
+
&self,
|
|
303
|
+
expr: Option<FilterExpr>,
|
|
304
|
+
vector_filter: Option<VectorSearchParams>,
|
|
305
|
+
columns: Option<&[&str]>,
|
|
306
|
+
config: QueryConfig,
|
|
307
|
+
filter_str: Option<&str>,
|
|
308
|
+
) -> Result<BoxStream<'static, Result<RecordBatch>>> {
|
|
280
309
|
use futures::StreamExt;
|
|
281
310
|
|
|
282
311
|
let manifest_manager = ManifestManager::new(self.store.clone(), "", &self.uri);
|
|
@@ -375,7 +404,9 @@ impl Table {
|
|
|
375
404
|
|
|
376
405
|
// Convert ScoredResults back to RecordBatches by fetching from Parquet
|
|
377
406
|
// This is a simplified version of the final row-fetcher
|
|
378
|
-
|
|
407
|
+
let results = self.fetch_results_by_id(scored_results, columns).await?;
|
|
408
|
+
return Ok(futures::stream::iter(results.into_iter().map(Ok)).boxed());
|
|
409
|
+
|
|
379
410
|
}
|
|
380
411
|
}
|
|
381
412
|
|
|
@@ -472,7 +503,7 @@ impl Table {
|
|
|
472
503
|
}
|
|
473
504
|
}
|
|
474
505
|
|
|
475
|
-
return Ok(results);
|
|
506
|
+
return Ok(futures::stream::iter(results.into_iter().map(Ok)).boxed());
|
|
476
507
|
}
|
|
477
508
|
|
|
478
509
|
// Extract Iceberg schema from the already-loaded manifest to avoid
|
|
@@ -490,35 +521,52 @@ impl Table {
|
|
|
490
521
|
let concurrency = config.max_parallel_readers.unwrap_or_else(|| {
|
|
491
522
|
std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4)
|
|
492
523
|
});
|
|
524
|
+
|
|
525
|
+
struct ReadCtx {
|
|
526
|
+
table: Table,
|
|
527
|
+
expr: Option<Arc<FilterExpr>>,
|
|
528
|
+
schema: Option<Arc<crate::core::manifest::Schema>>,
|
|
529
|
+
gpu: Option<crate::core::index::gpu::ComputeContext>,
|
|
530
|
+
columns: Option<Vec<String>>,
|
|
531
|
+
version: u64,
|
|
532
|
+
}
|
|
533
|
+
let read_ctx = Arc::new(ReadCtx {
|
|
534
|
+
table: self.clone(),
|
|
535
|
+
expr: expr_arc.clone(),
|
|
536
|
+
schema: iceberg_schema_arc,
|
|
537
|
+
gpu: current_gpu_context,
|
|
538
|
+
columns: columns.map(|c| c.iter().map(|s| s.to_string()).collect()),
|
|
539
|
+
version: version as u64,
|
|
540
|
+
});
|
|
541
|
+
|
|
493
542
|
let stream = futures::stream::iter(entries_to_read)
|
|
494
|
-
.map(
|
|
495
|
-
let
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
543
|
+
.map({
|
|
544
|
+
let read_ctx = read_ctx.clone();
|
|
545
|
+
move |entry| {
|
|
546
|
+
let ctx = read_ctx.clone();
|
|
547
|
+
async move {
|
|
548
|
+
if let Some(c) = ctx.gpu.clone() {
|
|
549
|
+
crate::core::index::gpu::set_global_gpu_context(Some(c));
|
|
550
|
+
}
|
|
551
|
+
let cols_refs: Option<Vec<&str>> = ctx.columns.as_ref().map(|v| v.iter().map(|s| s.as_str()).collect());
|
|
552
|
+
ctx.table.read_segment_expr(
|
|
553
|
+
&entry, ctx.expr.as_deref(), ctx.version, cols_refs.as_deref(),
|
|
554
|
+
ctx.schema.as_deref(),
|
|
555
|
+
).await
|
|
501
556
|
}
|
|
502
|
-
self.read_segment_expr(
|
|
503
|
-
&entry, expr_clone.as_deref(), version, columns,
|
|
504
|
-
schema_clone.as_deref(),
|
|
505
|
-
).await
|
|
506
557
|
}
|
|
507
558
|
})
|
|
508
559
|
.buffer_unordered(concurrency);
|
|
509
560
|
|
|
510
|
-
let
|
|
511
|
-
let mut all_batches = Vec::new();
|
|
512
|
-
for (i, res) in results.into_iter().enumerate() {
|
|
561
|
+
let results_stream = stream.flat_map(|res| {
|
|
513
562
|
match res {
|
|
514
|
-
Ok(b_vec) =>
|
|
515
|
-
|
|
516
|
-
},
|
|
517
|
-
Err(e) => tracing::error!("Error reading batch {}: {}", i, e),
|
|
563
|
+
Ok(b_vec) => futures::stream::iter(b_vec.into_iter().map(Ok)).boxed(),
|
|
564
|
+
Err(e) => futures::stream::once(async move { Err(e) }).boxed(),
|
|
518
565
|
}
|
|
519
|
-
}
|
|
566
|
+
});
|
|
520
567
|
|
|
521
568
|
// --- Read from In-Memory Write Buffer ---
|
|
569
|
+
let mut mem_batches = Vec::new();
|
|
522
570
|
{
|
|
523
571
|
let buffer = self.write_buffer.read().unwrap();
|
|
524
572
|
if !buffer.is_empty() {
|
|
@@ -536,7 +584,7 @@ impl Table {
|
|
|
536
584
|
|
|
537
585
|
if let Ok(filtered) = planner.filter_expr(&batch_to_scan, e) {
|
|
538
586
|
if filtered.num_rows() > 0 {
|
|
539
|
-
|
|
587
|
+
mem_batches.push(Ok(filtered));
|
|
540
588
|
}
|
|
541
589
|
}
|
|
542
590
|
}
|
|
@@ -547,18 +595,19 @@ impl Table {
|
|
|
547
595
|
.filter_map(|name| batch.schema().index_of(name).ok())
|
|
548
596
|
.collect();
|
|
549
597
|
if let Ok(projected) = batch.project(&indices) {
|
|
550
|
-
|
|
598
|
+
mem_batches.push(Ok(projected));
|
|
551
599
|
}
|
|
552
600
|
} else {
|
|
553
|
-
|
|
601
|
+
mem_batches.push(Ok(batch.clone()));
|
|
554
602
|
}
|
|
555
603
|
}
|
|
556
604
|
}
|
|
557
605
|
}
|
|
558
606
|
}
|
|
559
607
|
|
|
608
|
+
let mem_stream = futures::stream::iter(mem_batches);
|
|
560
609
|
|
|
561
|
-
Ok(
|
|
610
|
+
Ok(results_stream.chain(mem_stream).boxed())
|
|
562
611
|
}
|
|
563
612
|
|
|
564
613
|
pub async fn read_filter_async(
|
|
@@ -175,15 +175,13 @@ impl WriteAheadLog {
|
|
|
175
175
|
}
|
|
176
176
|
}
|
|
177
177
|
|
|
178
|
-
/// Replay all log files in the WAL directory and return
|
|
179
|
-
/// This should be
|
|
180
|
-
pub fn
|
|
178
|
+
/// Replay all log files in the WAL directory and return an iterator of batches.
|
|
179
|
+
/// This should be used on startup for memory-efficient recovery.
|
|
180
|
+
pub fn replay_stream(&self) -> Result<Box<dyn Iterator<Item = Result<RecordBatch>>>> {
|
|
181
181
|
if !self.dir.exists() {
|
|
182
|
-
return Ok(
|
|
182
|
+
return Ok(Box::new(std::iter::empty()));
|
|
183
183
|
}
|
|
184
184
|
|
|
185
|
-
let mut all_batches = Vec::new();
|
|
186
|
-
|
|
187
185
|
// 1. List all .arrow files in the directory
|
|
188
186
|
let entries = std::fs::read_dir(&self.dir)?;
|
|
189
187
|
let mut wal_files = Vec::new();
|
|
@@ -196,10 +194,10 @@ impl WriteAheadLog {
|
|
|
196
194
|
}
|
|
197
195
|
}
|
|
198
196
|
|
|
199
|
-
// Sort for deterministic replay
|
|
197
|
+
// Sort for deterministic replay
|
|
200
198
|
wal_files.sort();
|
|
201
199
|
|
|
202
|
-
let mut
|
|
200
|
+
let mut all_iterators = Vec::new();
|
|
203
201
|
|
|
204
202
|
for path in wal_files {
|
|
205
203
|
let file = File::open(&path)?;
|
|
@@ -208,27 +206,35 @@ impl WriteAheadLog {
|
|
|
208
206
|
}
|
|
209
207
|
|
|
210
208
|
let reader = BufReader::new(file);
|
|
211
|
-
let ipc_reader = StreamReader::try_new(reader, None)
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
209
|
+
let ipc_reader = StreamReader::try_new(reader, None)?;
|
|
210
|
+
all_iterators.push(ipc_reader);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
Ok(Box::new(all_iterators.into_iter().flatten().map(|res| res.map_err(anyhow::Error::from))))
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/// Replay all log files in the WAL directory and return all batches.
|
|
217
|
+
/// Legacy method, consider using replay_stream for large logs.
|
|
218
|
+
pub fn replay(&self) -> Result<(Vec<RecordBatch>, Vec<String>)> {
|
|
219
|
+
let stream = self.replay_stream()?;
|
|
220
|
+
let mut batches = Vec::new();
|
|
221
|
+
for b in stream {
|
|
222
|
+
batches.push(b?);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Return paths for cleanup (simplified for now)
|
|
226
|
+
let mut paths = Vec::new();
|
|
227
|
+
if self.dir.exists() {
|
|
228
|
+
for entry in std::fs::read_dir(&self.dir)? {
|
|
229
|
+
let entry = entry?;
|
|
230
|
+
let path = entry.path();
|
|
231
|
+
if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("arrow") {
|
|
232
|
+
paths.push(path.to_str().unwrap().to_string());
|
|
227
233
|
}
|
|
228
234
|
}
|
|
229
235
|
}
|
|
230
236
|
|
|
231
|
-
Ok((
|
|
237
|
+
Ok((batches, paths))
|
|
232
238
|
}
|
|
233
239
|
|
|
234
240
|
/// Initialize the writer with a schema.
|