hyperstreamdb 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/Cargo.lock +1 -1
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/Cargo.toml +1 -1
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/PKG-INFO +1 -1
- hyperstreamdb-0.2.1/src/core/table/builder.rs +279 -0
- hyperstreamdb-0.2.1/src/core/table/fluent.rs +64 -0
- hyperstreamdb-0.2.0/src/core/table.rs → hyperstreamdb-0.2.1/src/core/table/mod.rs +131 -2008
- hyperstreamdb-0.2.1/src/core/table/read.rs +669 -0
- hyperstreamdb-0.2.1/src/core/table/schema.rs +260 -0
- hyperstreamdb-0.2.1/src/core/table/write.rs +673 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/.gitattributes +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/.gitignore +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/.hypothesis/constants/32b327793848e7d8 +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/.hypothesis/constants/67b0a8ccf18bf5d2 +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/.hypothesis/constants/84828557b4ee7be4 +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/.instructions.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/DORIS_OPTIMIZATION_PATTERNS.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/LICENSE-APACHE +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/LICENSE-MIT +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/README.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/RUN_COMPLIANCE_TESTS.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/STEERING.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/THIRDPARTY_NOTICES.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benches/bench_table.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benches/performance.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/BENCHMARK_REPORT.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/benchmark_charts.png +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/benchmark_results.csv +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/concurrent_queries_20260409_214245.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/concurrent_queries_20260409_214245.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/filtered_search_comparison_20260409_222607.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/filtered_search_comparison_20260409_222607.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/filtered_vector_search_20260409_214355.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/filtered_vector_search_20260409_214355.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/filtered_vector_search_20260409_220418.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/filtered_vector_search_20260409_220418.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/filtered_vector_search_20260409_222053.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/filtered_vector_search_20260409_222053.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/filtered_vector_search_20260409_225907.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/filtered_vector_search_20260409_225907.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/full_scan_baseline_20260409_222303.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/full_scan_baseline_20260409_222303.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/high_selectivity_filter_20260409_222302.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/high_selectivity_filter_20260409_222302.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/ingestion_comparison_20260409_222516.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/ingestion_comparison_20260409_222516.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/multi_filter_vector_20260409_214428.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/multi_filter_vector_20260409_214428.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/multi_filter_vector_20260409_220450.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/multi_filter_vector_20260409_220450.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/multi_filter_vector_20260409_222131.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/multi_filter_vector_20260409_222131.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/multi_filter_vector_20260409_225938.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/multi_filter_vector_20260409_225938.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/post_vs_pre_filter_20260409_214501.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/post_vs_pre_filter_20260409_214501.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/post_vs_pre_filter_20260409_220524.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/post_vs_pre_filter_20260409_220524.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/post_vs_pre_filter_20260409_222204.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/post_vs_pre_filter_20260409_222204.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/post_vs_pre_filter_20260409_230010.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/post_vs_pre_filter_20260409_230010.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/query_comparison_20260409_222541.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/query_comparison_20260409_222541.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/range_query_20260409_222302.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/range_query_20260409_222302.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/search_filtered_high_selectivity_20260409_214144.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/search_filtered_high_selectivity_20260409_214144.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/search_unfiltered_20260409_214028.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/benchmark_results/search_unfiltered_20260409_214028.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/book.toml +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/broken_binaries_all.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/broken_bins.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/build-connectors.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/build.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/check_iceberg_compliance.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/compliance_output.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/critical_code_review.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/debug_log.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/demo_basics_run.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/demo_basics_v2.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docker-compose-minio-nessie.yml +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docker-compose.yml +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/BENCHMARKING.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/COMPREHENSIVE_GUIDE.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/CONCURRENCY.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/CONFIGURATION.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/GPU_SETUP_GUIDE.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/ICEBERG_V2_V3_API.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/INSTALLATION.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/PGVECTOR_SQL_GUIDE.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/PYTHON_VECTOR_API.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/VECTOR_CONFIGURATION.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/api_reference.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/architecture.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/catalog_usage.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/index.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/integrations/README.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/integrations/java_jni.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/integrations/python.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/integrations/spark.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/integrations/trino.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/requirements.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/source/api/python.rst +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/source/api/rust.rst +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/source/conf.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/source/index.rst +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/docs/source/roadmap.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/fix_nb.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/proptest-regressions/core/index/gpu.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/proptest-regressions/core/sql/vector_literal.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/proptest-regressions/core/sql/vector_udf.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/pyproject.toml +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/python/hyperstreamdb/__init__.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/python/hyperstreamdb/embeddings.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/python_test_output.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/python_test_output_v2.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/python_test_output_v3.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/rust_check_all_warnings.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/rust_test_output.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/rust_warnings.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/scratch/check_os_error.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/simd_test_results.txt +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/spark-hyperstream/.bloop/bloop.settings.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/spark-hyperstream/.bloop/spark-hyperstream-test.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/spark-hyperstream/.bloop/spark-hyperstream.json +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/spark-hyperstream/pom.xml +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/DefaultSource.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartition.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReader.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReaderFactory.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamScanBuilder.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamTable.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/bin/gateway.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/bin/hdb.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/bin/iceberg_rest.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/bin/probe_datafusion.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/bin/setup_test_data.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/bin/verify_layered_indexing.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/cache.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/catalog/config.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/catalog/glue.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/catalog/hive.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/catalog/jdbc.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/catalog/mod.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/catalog/nessie.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/catalog/rest.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/catalog/unity.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/clustering.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/compaction.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/embeddings.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/ffi.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/iceberg/iceberg_delete.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/iceberg.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/cuda/cosine_distance.cu +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/cuda/hamming_distance.cu +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/cuda/inner_product.cu +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/cuda/jaccard_distance.cu +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/cuda/kmeans_assignment.cu +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/cuda/l1_distance.cu +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/cuda/l2_distance.cu +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/distance.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/gpu.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/hnsw_ivf.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/hnsw_rs/annhdf5.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/hnsw_rs/api.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/hnsw_rs/dist.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/hnsw_rs/flatten.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/hnsw_rs/hnsw.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/hnsw_rs/hnswio.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/hnsw_rs/libext.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/hnsw_rs/mod.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/hnsw_rs/prelude.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/hnsw_rs/test.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/ivf.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/memory.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/mod.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/mps/cosine_distance.metal +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/mps/hamming_distance.metal +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/mps/inner_product.metal +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/mps/jaccard_distance.metal +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/mps/kmeans_assignment.metal +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/mps/l1_distance.metal +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/mps/l2_distance.metal +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/opencl/cosine_distance.cl +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/opencl/hamming_distance.cl +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/opencl/inner_product.cl +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/opencl/jaccard_distance.cl +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/opencl/kmeans_assignment.cl +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/opencl/l1_distance.cl +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/opencl/l2_distance.cl +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/pq.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/index/tokenizer.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/license.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/maintenance.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/manifest.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/merge.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/metadata.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/mod.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/nessie.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/planner.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/puffin.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/query.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/reader.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/segment.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/sql/mod.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/sql/optimizer.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/sql/pgvector_rewriter.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/sql/physical_plan/index_join.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/sql/physical_plan.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/sql/session.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/sql/vector_literal.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/sql/vector_operators.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/sql/vector_udf.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/storage.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/core/wal.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/enterprise/continuous_indexing.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/enterprise/license.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/enterprise/mod.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/index.rs.old +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/lib.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/python_binding.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/python_distance.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/python_gpu_context.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/telemetry/metrics.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/telemetry/mod.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/src/telemetry/tracing.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/task.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/all_types_index_test.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/bin/generate_iceberg_manifests.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/bin/verify_iceberg_read_check.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/check_mmh3.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/data/download_nyc_taxi.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/data/generate_embeddings.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/data/generate_wikipedia.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/data/start_nessie.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/datafusion_rust_test.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/debug_murmur3.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/fuzz_murmur3.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/integration_test_hnsw_ivf_native.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/performance/README.md +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/prototype_merge.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/schema_evolution_test.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_catalog_commit.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_compliance.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_delete_correctness.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_iceberg_python_delete.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_iceberg_rest.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_iceberg_rest_create.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_iceberg_rest_delete.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_iceberg_rest_remove_index.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_iceberg_rest_update.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_metadata_creation.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_mor_reads.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_mor_writes.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_partition_transforms.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_partitioned_writes.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_puffin_index.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_rest_updates.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/tests/verify_schema_compat.rs +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-config/.DS_Store +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-config/catalog/glue_catalog.properties +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-config/catalog/hyperstreamdb.properties +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-config/catalog/iceberg.properties +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-config/catalog/memory.properties +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-config/catalog/postgres.properties +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-config/config.properties +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-config/entrypoint.sh +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-config/jvm.config +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-config/node.properties +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-config.zip +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-hyperstream/pom.xml +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBColumnHandle.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBConnectorFactory.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBMetadata.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSource.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSourceProvider.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPlugin.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplit.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplitManager.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBTableHandle.java +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/update_schema_patch.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/update_schema_patch2.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/verify_docstrings.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/verify_fluent_api.py +0 -0
- {hyperstreamdb-0.2.0 → hyperstreamdb-0.2.1}/verify_unified_ingest.py +0 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
// Copyright (c) 2026 Richard Albright. All rights reserved.
|
|
2
|
+
|
|
3
|
+
use anyhow::Result;
|
|
4
|
+
use std::sync::Arc;
|
|
5
|
+
use object_store::ObjectStore;
|
|
6
|
+
use tokio::runtime::Runtime;
|
|
7
|
+
use std::collections::HashMap;
|
|
8
|
+
use tokio::sync::Mutex;
|
|
9
|
+
use tracing;
|
|
10
|
+
use arrow::record_batch::RecordBatch;
|
|
11
|
+
use arrow::datatypes::{Schema, SchemaRef};
|
|
12
|
+
use arrow::array::Array;
|
|
13
|
+
use crate::core::catalog::Catalog;
|
|
14
|
+
use crate::core::storage::create_object_store;
|
|
15
|
+
use crate::core::manifest::ManifestManager;
|
|
16
|
+
use crate::core::query::QueryConfig;
|
|
17
|
+
use crate::core::wal::WriteAheadLog;
|
|
18
|
+
use crate::core::index::memory::InMemoryVectorIndex;
|
|
19
|
+
|
|
20
|
+
use super::Table;
|
|
21
|
+
|
|
22
|
+
/// Shared WAL recovery logic used by both sync and async Table constructors.
|
|
23
|
+
/// Promotes schema to the widest version, aligns all recovered batches, and
|
|
24
|
+
/// rebuilds the in-memory vector index from recovered data.
|
|
25
|
+
/// Returns (aligned_buffer, optional_memory_index, promoted_schema).
|
|
26
|
+
pub(crate) fn recover_wal_state(
|
|
27
|
+
recovered_batches: Vec<RecordBatch>,
|
|
28
|
+
mut schema_val: SchemaRef,
|
|
29
|
+
) -> (Vec<RecordBatch>, Option<InMemoryVectorIndex>, SchemaRef) {
|
|
30
|
+
if recovered_batches.is_empty() {
|
|
31
|
+
return (Vec::new(), None, schema_val);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
tracing::info!("Recovering {} batches from WAL...", recovered_batches.len());
|
|
35
|
+
|
|
36
|
+
// Use first batch schema if current schema is empty
|
|
37
|
+
if schema_val.fields().is_empty() {
|
|
38
|
+
if let Some(first) = recovered_batches.first() {
|
|
39
|
+
schema_val = first.schema();
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Safely attempt to merge all WAL schemas to capture any column additions
|
|
44
|
+
// or type evolutions instead of fragile field count comparisons.
|
|
45
|
+
let mut merged_schema = schema_val.as_ref().clone();
|
|
46
|
+
for batch in &recovered_batches {
|
|
47
|
+
match arrow::datatypes::Schema::try_merge(vec![merged_schema.clone(), batch.schema().as_ref().clone()]) {
|
|
48
|
+
Ok(s) => merged_schema = s,
|
|
49
|
+
Err(e) => tracing::warn!("Failed to merge WAL batch schema: {}", e),
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
let schema_val = std::sync::Arc::new(merged_schema);
|
|
53
|
+
|
|
54
|
+
// Align all recovered batches to the widest schema
|
|
55
|
+
let aligned_buffer: Vec<RecordBatch> = recovered_batches.into_iter().map(|b| {
|
|
56
|
+
if b.schema() != schema_val {
|
|
57
|
+
let mut cols = Vec::with_capacity(schema_val.fields().len());
|
|
58
|
+
for field in schema_val.fields() {
|
|
59
|
+
let col = if let Some(c) = b.column_by_name(field.name()) {
|
|
60
|
+
c.clone()
|
|
61
|
+
} else {
|
|
62
|
+
arrow::array::new_null_array(field.data_type(), b.num_rows())
|
|
63
|
+
};
|
|
64
|
+
cols.push(col);
|
|
65
|
+
}
|
|
66
|
+
RecordBatch::try_new(schema_val.clone(), cols).unwrap_or(b)
|
|
67
|
+
} else {
|
|
68
|
+
b
|
|
69
|
+
}
|
|
70
|
+
}).collect();
|
|
71
|
+
|
|
72
|
+
// Rebuild in-memory vector index from recovered data.
|
|
73
|
+
// Look for an "embedding" column (the most common convention), supporting
|
|
74
|
+
// both FixedSizeList and variable-length List arrays.
|
|
75
|
+
let col_name = aligned_buffer.first().and_then(|b| {
|
|
76
|
+
b.schema().fields().iter()
|
|
77
|
+
.find(|f| f.name() == "embedding")
|
|
78
|
+
.map(|f| f.name().clone())
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
let mut mem_index = None;
|
|
82
|
+
if let Some(ref col_name) = col_name {
|
|
83
|
+
if let Some(first) = aligned_buffer.first() {
|
|
84
|
+
if let Some(col) = first.column_by_name(col_name) {
|
|
85
|
+
let dim = if let Some(fsl) = col.as_any().downcast_ref::<arrow::array::FixedSizeListArray>() {
|
|
86
|
+
Some(fsl.value_length() as usize)
|
|
87
|
+
} else if let Some(list) = col.as_any().downcast_ref::<arrow::array::ListArray>() {
|
|
88
|
+
(0..list.len()).find_map(|i| {
|
|
89
|
+
if list.is_null(i) { None } else {
|
|
90
|
+
list.value(i).as_any().downcast_ref::<arrow::array::Float32Array>().map(|v| v.len())
|
|
91
|
+
}
|
|
92
|
+
})
|
|
93
|
+
} else { None };
|
|
94
|
+
|
|
95
|
+
if let Some(d) = dim {
|
|
96
|
+
let mut idx = InMemoryVectorIndex::new(d);
|
|
97
|
+
let mut offset = 0;
|
|
98
|
+
for batch in &aligned_buffer {
|
|
99
|
+
let _ = idx.insert_batch(batch, col_name, offset);
|
|
100
|
+
offset += batch.num_rows();
|
|
101
|
+
}
|
|
102
|
+
mem_index = Some(idx);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
(aligned_buffer, mem_index, schema_val)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// ============================================================================
|
|
112
|
+
// Table Builder
|
|
113
|
+
// ============================================================================
|
|
114
|
+
|
|
115
|
+
pub struct TableBuilder {
|
|
116
|
+
uri: String,
|
|
117
|
+
catalog: Option<Arc<dyn Catalog>>,
|
|
118
|
+
catalog_namespace: Option<String>,
|
|
119
|
+
catalog_table_name: Option<String>,
|
|
120
|
+
runtime: Option<Arc<Runtime>>,
|
|
121
|
+
index_all: bool,
|
|
122
|
+
default_device: Option<String>,
|
|
123
|
+
query_config: QueryConfig,
|
|
124
|
+
data_store: Option<Arc<dyn ObjectStore>>,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
impl TableBuilder {
|
|
128
|
+
pub fn new(uri: impl Into<String>) -> Self {
|
|
129
|
+
Self {
|
|
130
|
+
uri: uri.into(),
|
|
131
|
+
catalog: None,
|
|
132
|
+
catalog_namespace: None,
|
|
133
|
+
catalog_table_name: None,
|
|
134
|
+
runtime: None,
|
|
135
|
+
index_all: true,
|
|
136
|
+
default_device: None,
|
|
137
|
+
query_config: QueryConfig::default(),
|
|
138
|
+
data_store: None,
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
pub fn with_catalog(
|
|
143
|
+
mut self,
|
|
144
|
+
catalog: Arc<dyn Catalog>,
|
|
145
|
+
namespace: &str,
|
|
146
|
+
table_name: &str,
|
|
147
|
+
) -> Self {
|
|
148
|
+
self.catalog = Some(catalog);
|
|
149
|
+
self.catalog_namespace = Some(namespace.to_string());
|
|
150
|
+
self.catalog_table_name = Some(table_name.to_string());
|
|
151
|
+
self
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
pub fn with_runtime(mut self, rt: Arc<Runtime>) -> Self {
|
|
155
|
+
self.runtime = Some(rt);
|
|
156
|
+
self
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
pub fn with_index_all(mut self, index_all: bool) -> Self {
|
|
160
|
+
self.index_all = index_all;
|
|
161
|
+
self
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
pub fn with_default_device(mut self, device: &str) -> Self {
|
|
165
|
+
self.default_device = Some(device.to_string());
|
|
166
|
+
self
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
pub fn with_query_config(mut self, config: QueryConfig) -> Self {
|
|
170
|
+
self.query_config = config;
|
|
171
|
+
self
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
pub fn with_data_store(mut self, store: Arc<dyn ObjectStore>) -> Self {
|
|
175
|
+
self.data_store = Some(store);
|
|
176
|
+
self
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
pub async fn build_async(self) -> Result<Table> {
|
|
180
|
+
// Normalize URI to absolute path if it is local
|
|
181
|
+
let uri = if !self.uri.contains("://") || self.uri.starts_with("file://") {
|
|
182
|
+
let path = self.uri.strip_prefix("file://").unwrap_or(&self.uri);
|
|
183
|
+
let abs_path = std::fs::canonicalize(path).unwrap_or_else(|_| {
|
|
184
|
+
if let Ok(current) = std::env::current_dir() {
|
|
185
|
+
current.join(path)
|
|
186
|
+
} else {
|
|
187
|
+
std::path::PathBuf::from(path)
|
|
188
|
+
}
|
|
189
|
+
});
|
|
190
|
+
format!("file://{}", abs_path.display())
|
|
191
|
+
} else {
|
|
192
|
+
self.uri.clone()
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
if let Some((base, prefix, ns, table)) = Table::detect_iceberg_rest(&uri) {
|
|
196
|
+
return Box::pin(Table::new_from_rest(base, prefix, ns, table, &uri)).await;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
let store = create_object_store(&uri)?;
|
|
200
|
+
|
|
201
|
+
let manifest_manager = ManifestManager::new(store.clone(), "", &uri);
|
|
202
|
+
let (manifest, version) = manifest_manager.load_latest().await.unwrap_or_default();
|
|
203
|
+
let schema_val = if version > 0 {
|
|
204
|
+
Table::load_initial_schema(store.clone(), &uri).await
|
|
205
|
+
} else {
|
|
206
|
+
Arc::new(Schema::new(Vec::<arrow::datatypes::Field>::new()))
|
|
207
|
+
};
|
|
208
|
+
let partition_spec = Arc::new(manifest.partition_spec.clone());
|
|
209
|
+
|
|
210
|
+
// Initialize WAL
|
|
211
|
+
let wal_dir = if uri.starts_with("file://") {
|
|
212
|
+
let path = uri.strip_prefix("file://").unwrap();
|
|
213
|
+
std::path::PathBuf::from(path).join("_wal")
|
|
214
|
+
} else {
|
|
215
|
+
let safe_uri = uri.replace("://", "_").replace("/", "_");
|
|
216
|
+
std::env::temp_dir().join("hyperstream_wal").join(safe_uri)
|
|
217
|
+
};
|
|
218
|
+
|
|
219
|
+
if !wal_dir.exists() {
|
|
220
|
+
std::fs::create_dir_all(&wal_dir).unwrap_or_default();
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
let mut wal = WriteAheadLog::new(wal_dir);
|
|
224
|
+
let _ = wal.spawn_worker();
|
|
225
|
+
|
|
226
|
+
// Replay WAL (Recovery)
|
|
227
|
+
let (recovered_batches, recovered_paths) = wal.replay().unwrap_or_else(|e| {
|
|
228
|
+
tracing::warn!("WAL Recovery Warning: {}" , e);
|
|
229
|
+
(Vec::new(), Vec::new())
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
let (initial_buffer, initial_mem_index, schema_val) = recover_wal_state(
|
|
233
|
+
recovered_batches, schema_val,
|
|
234
|
+
);
|
|
235
|
+
|
|
236
|
+
let table = Table {
|
|
237
|
+
uri: uri.clone(),
|
|
238
|
+
store,
|
|
239
|
+
data_store: self.data_store,
|
|
240
|
+
rt: self.runtime,
|
|
241
|
+
query_config: self.query_config,
|
|
242
|
+
index_all: self.index_all,
|
|
243
|
+
index_columns: Arc::new(std::sync::RwLock::new(Vec::new())),
|
|
244
|
+
index_configs: Arc::new(std::sync::RwLock::new(HashMap::new())),
|
|
245
|
+
default_device: Arc::new(std::sync::RwLock::new(self.default_device)),
|
|
246
|
+
schema: Arc::new(std::sync::RwLock::new(schema_val)),
|
|
247
|
+
write_buffer: Arc::new(std::sync::RwLock::new(initial_buffer)),
|
|
248
|
+
memory_index: Arc::new(std::sync::RwLock::new(initial_mem_index)),
|
|
249
|
+
wal: Arc::new(Mutex::new(wal)),
|
|
250
|
+
background_tasks: Arc::new(Mutex::new(Vec::new())),
|
|
251
|
+
catalog: self.catalog,
|
|
252
|
+
catalog_namespace: self.catalog_namespace,
|
|
253
|
+
catalog_table_name: self.catalog_table_name,
|
|
254
|
+
sort_order: Arc::new(std::sync::RwLock::new(None)),
|
|
255
|
+
sort_order_columns: Arc::new(std::sync::RwLock::new(None)),
|
|
256
|
+
#[cfg(feature = "enterprise")]
|
|
257
|
+
enterprise_license: None,
|
|
258
|
+
primary_key: Arc::new(std::sync::RwLock::new(Vec::new())),
|
|
259
|
+
autocommit: Arc::new(std::sync::atomic::AtomicBool::new(true)),
|
|
260
|
+
recovered_wal_paths: Arc::new(std::sync::Mutex::new(recovered_paths)),
|
|
261
|
+
partition_spec,
|
|
262
|
+
};
|
|
263
|
+
|
|
264
|
+
table.sync_primary_key_from_schema_async().await.ok();
|
|
265
|
+
Ok(table)
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
pub fn build(mut self) -> Result<Table> {
|
|
269
|
+
let rt = match self.runtime {
|
|
270
|
+
Some(ref r) => r.clone(),
|
|
271
|
+
None => {
|
|
272
|
+
let r = Arc::new(Runtime::new()?);
|
|
273
|
+
self.runtime = Some(r.clone());
|
|
274
|
+
r
|
|
275
|
+
}
|
|
276
|
+
};
|
|
277
|
+
rt.block_on(self.build_async())
|
|
278
|
+
}
|
|
279
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
// Copyright (c) 2026 Richard Albright. All rights reserved.
|
|
2
|
+
|
|
3
|
+
use anyhow::Result;
|
|
4
|
+
use arrow::record_batch::RecordBatch;
|
|
5
|
+
use crate::core::planner::VectorSearchParams;
|
|
6
|
+
use crate::core::index::gpu::{set_global_gpu_context, ComputeContext};
|
|
7
|
+
|
|
8
|
+
use super::Table;
|
|
9
|
+
|
|
10
|
+
pub struct TableQuery<'a> {
|
|
11
|
+
pub table: &'a Table,
|
|
12
|
+
pub filter_str: Option<String>,
|
|
13
|
+
pub vector_filter: Option<VectorSearchParams>,
|
|
14
|
+
pub columns: Option<Vec<String>>,
|
|
15
|
+
pub context: Option<ComputeContext>,
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
impl<'a> TableQuery<'a> {
|
|
19
|
+
pub fn new(table: &'a Table) -> Self {
|
|
20
|
+
Self {
|
|
21
|
+
table,
|
|
22
|
+
filter_str: None,
|
|
23
|
+
vector_filter: None,
|
|
24
|
+
columns: None,
|
|
25
|
+
context: None,
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
pub fn filter(mut self, expr: &str) -> Self {
|
|
30
|
+
if let Some(ref mut f) = self.filter_str {
|
|
31
|
+
*f = format!("({}) AND ({})", f, expr);
|
|
32
|
+
} else {
|
|
33
|
+
self.filter_str = Some(expr.to_string());
|
|
34
|
+
}
|
|
35
|
+
self
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
pub fn vector_search(mut self, column: &str, query: crate::core::index::VectorValue, k: usize) -> Self {
|
|
39
|
+
self.vector_filter = Some(VectorSearchParams::new(column, query, k));
|
|
40
|
+
self
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
pub fn select(mut self, columns: Vec<String>) -> Self {
|
|
44
|
+
self.columns = Some(columns);
|
|
45
|
+
self
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
pub fn with_context(mut self, context: ComputeContext) -> Self {
|
|
49
|
+
self.context = Some(context);
|
|
50
|
+
self
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
pub async fn to_batches(self) -> Result<Vec<RecordBatch>> {
|
|
54
|
+
let cols_refs: Option<Vec<&str>> = self.columns.as_ref().map(|c| c.iter().map(|s| s.as_str()).collect());
|
|
55
|
+
let cols_slice: Option<&[&str]> = cols_refs.as_deref();
|
|
56
|
+
|
|
57
|
+
// Inject context if provided
|
|
58
|
+
if let Some(ctx) = self.context {
|
|
59
|
+
set_global_gpu_context(Some(ctx));
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
self.table.read_async(self.filter_str.as_deref(), self.vector_filter, cols_slice).await
|
|
63
|
+
}
|
|
64
|
+
}
|