hyperstreamdb 0.3.1__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (301) hide show
  1. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/Cargo.lock +1 -1
  2. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/Cargo.toml +1 -1
  3. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/PKG-INFO +1 -1
  4. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/cache.rs +32 -3
  5. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/reader.rs +4 -5
  6. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/segment.rs +2 -2
  7. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/table/builder.rs +36 -15
  8. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/table/read.rs +74 -25
  9. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/wal.rs +31 -25
  10. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/python_binding.rs +102 -100
  11. hyperstreamdb-0.3.2/tests/integration_test_hnsw_ivf_native.rs +288 -0
  12. hyperstreamdb-0.3.2/tests/stability.rs +70 -0
  13. hyperstreamdb-0.3.1/tests/integration_test_hnsw_ivf_native.rs +0 -138
  14. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/.gitattributes +0 -0
  15. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/.gitignore +0 -0
  16. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/.hypothesis/constants/32b327793848e7d8 +0 -0
  17. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/.hypothesis/constants/67b0a8ccf18bf5d2 +0 -0
  18. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/.hypothesis/constants/84828557b4ee7be4 +0 -0
  19. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/.instructions.md +0 -0
  20. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/.ipynb_checkpoints/Untitled-checkpoint.ipynb +0 -0
  21. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/CNAME +0 -0
  22. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/DORIS_OPTIMIZATION_PATTERNS.md +0 -0
  23. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/HyperStreamDB.png +0 -0
  24. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/LICENSE-APACHE +0 -0
  25. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/LICENSE-MIT +0 -0
  26. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/README.md +0 -0
  27. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/RUN_COMPLIANCE_TESTS.sh +0 -0
  28. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/STEERING.md +0 -0
  29. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/THIRDPARTY_NOTICES.md +0 -0
  30. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/Untitled.ipynb +0 -0
  31. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benches/bench_table.rs +0 -0
  32. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benches/performance.rs +0 -0
  33. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/BENCHMARK_REPORT.md +0 -0
  34. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/benchmark_charts.png +0 -0
  35. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/benchmark_results.csv +0 -0
  36. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/concurrent_queries_20260409_214245.json +0 -0
  37. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/concurrent_queries_20260409_214245.md +0 -0
  38. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/filtered_search_comparison_20260409_222607.json +0 -0
  39. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/filtered_search_comparison_20260409_222607.md +0 -0
  40. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_214355.json +0 -0
  41. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_214355.md +0 -0
  42. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_220418.json +0 -0
  43. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_220418.md +0 -0
  44. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_222053.json +0 -0
  45. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_222053.md +0 -0
  46. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_225907.json +0 -0
  47. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/filtered_vector_search_20260409_225907.md +0 -0
  48. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/full_scan_baseline_20260409_222303.json +0 -0
  49. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/full_scan_baseline_20260409_222303.md +0 -0
  50. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/high_selectivity_filter_20260409_222302.json +0 -0
  51. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/high_selectivity_filter_20260409_222302.md +0 -0
  52. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/ingestion_comparison_20260409_222516.json +0 -0
  53. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/ingestion_comparison_20260409_222516.md +0 -0
  54. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_214428.json +0 -0
  55. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_214428.md +0 -0
  56. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_220450.json +0 -0
  57. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_220450.md +0 -0
  58. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_222131.json +0 -0
  59. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_222131.md +0 -0
  60. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_225938.json +0 -0
  61. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_225938.md +0 -0
  62. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_231713.json +0 -0
  63. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/multi_filter_vector_20260409_231713.md +0 -0
  64. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_214501.json +0 -0
  65. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_214501.md +0 -0
  66. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_220524.json +0 -0
  67. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_220524.md +0 -0
  68. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_222204.json +0 -0
  69. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_222204.md +0 -0
  70. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_230010.json +0 -0
  71. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/post_vs_pre_filter_20260409_230010.md +0 -0
  72. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/query_comparison_20260409_222541.json +0 -0
  73. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/query_comparison_20260409_222541.md +0 -0
  74. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/range_query_20260409_222302.json +0 -0
  75. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/range_query_20260409_222302.md +0 -0
  76. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/search_filtered_high_selectivity_20260409_214144.json +0 -0
  77. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/search_filtered_high_selectivity_20260409_214144.md +0 -0
  78. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/search_unfiltered_20260409_214028.json +0 -0
  79. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/benchmark_results/search_unfiltered_20260409_214028.md +0 -0
  80. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/book.toml +0 -0
  81. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/broken_binaries_all.txt +0 -0
  82. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/broken_bins.txt +0 -0
  83. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/build-connectors.sh +0 -0
  84. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/build.rs +0 -0
  85. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/check_iceberg_compliance.py +0 -0
  86. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/compliance_output.txt +0 -0
  87. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/critical_code_review.md +0 -0
  88. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/debug_log.txt +0 -0
  89. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/demo_basics_run.txt +0 -0
  90. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/demo_basics_v2.txt +0 -0
  91. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docker-compose-minio-nessie.yml +0 -0
  92. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docker-compose.yml +0 -0
  93. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/.nojekyll +0 -0
  94. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/BENCHMARKING.md +0 -0
  95. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/COMPREHENSIVE_GUIDE.md +0 -0
  96. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/CONCURRENCY.md +0 -0
  97. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/CONFIGURATION.md +0 -0
  98. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/GPU_SETUP_GUIDE.md +0 -0
  99. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/ICEBERG_V2_V3_API.md +0 -0
  100. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/INSTALLATION.md +0 -0
  101. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/PGVECTOR_SQL_GUIDE.md +0 -0
  102. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/PYTHON_VECTOR_API.md +0 -0
  103. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/VECTOR_CONFIGURATION.md +0 -0
  104. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/api_reference.md +0 -0
  105. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/architecture.md +0 -0
  106. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/catalog_usage.md +0 -0
  107. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/index.md +0 -0
  108. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/integrations/README.md +0 -0
  109. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/integrations/java_jni.md +0 -0
  110. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/integrations/python.md +0 -0
  111. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/integrations/spark.md +0 -0
  112. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/integrations/trino.md +0 -0
  113. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/requirements.txt +0 -0
  114. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/source/_static/HyperStreamDB.png +0 -0
  115. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/source/api/python.rst +0 -0
  116. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/source/api/rust.rst +0 -0
  117. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/source/conf.py +0 -0
  118. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/source/index.rst +0 -0
  119. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/docs/source/roadmap.md +0 -0
  120. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/fix_nb.py +0 -0
  121. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/proptest-regressions/core/index/gpu.txt +0 -0
  122. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/proptest-regressions/core/sql/vector_literal.txt +0 -0
  123. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/proptest-regressions/core/sql/vector_udf.txt +0 -0
  124. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/pyproject.toml +0 -0
  125. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/python/hyperstreamdb/__init__.py +0 -0
  126. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/python/hyperstreamdb/embeddings.py +0 -0
  127. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/python_test_output.txt +0 -0
  128. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/python_test_output_v2.txt +0 -0
  129. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/python_test_output_v3.txt +0 -0
  130. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/rust_check_all_warnings.txt +0 -0
  131. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/rust_test_output.txt +0 -0
  132. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/rust_warnings.txt +0 -0
  133. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/scratch/check_os_error.rs +0 -0
  134. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/simd_test_results.txt +0 -0
  135. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/spark-hyperstream/.bloop/bloop.settings.json +0 -0
  136. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/spark-hyperstream/.bloop/spark-hyperstream-test.json +0 -0
  137. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/spark-hyperstream/.bloop/spark-hyperstream.json +0 -0
  138. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/spark-hyperstream/pom.xml +0 -0
  139. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/DefaultSource.java +0 -0
  140. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartition.java +0 -0
  141. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReader.java +0 -0
  142. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamPartitionReaderFactory.java +0 -0
  143. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamScanBuilder.java +0 -0
  144. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/spark-hyperstream/src/main/java/com/hyperstreamdb/spark/HyperStreamTable.java +0 -0
  145. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/split_table.py +0 -0
  146. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/bin/gateway.rs +0 -0
  147. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/bin/hdb.rs +0 -0
  148. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/bin/iceberg_rest.rs +0 -0
  149. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/bin/probe_datafusion.rs +0 -0
  150. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/bin/setup_test_data.rs +0 -0
  151. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/bin/verify_layered_indexing.rs +0 -0
  152. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/catalog/config.rs +0 -0
  153. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/catalog/glue.rs +0 -0
  154. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/catalog/hive.rs +0 -0
  155. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/catalog/jdbc.rs +0 -0
  156. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/catalog/mod.rs +0 -0
  157. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/catalog/nessie.rs +0 -0
  158. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/catalog/rest.rs +0 -0
  159. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/catalog/unity.rs +0 -0
  160. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/clustering.rs +0 -0
  161. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/compaction.rs +0 -0
  162. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/embeddings.rs +0 -0
  163. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/ffi.rs +0 -0
  164. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/iceberg/iceberg_delete.rs +0 -0
  165. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/iceberg.rs +0 -0
  166. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/cuda/cosine_distance.cu +0 -0
  167. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/cuda/hamming_distance.cu +0 -0
  168. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/cuda/inner_product.cu +0 -0
  169. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/cuda/jaccard_distance.cu +0 -0
  170. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/cuda/kmeans_assignment.cu +0 -0
  171. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/cuda/l1_distance.cu +0 -0
  172. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/cuda/l2_distance.cu +0 -0
  173. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/distance.rs +0 -0
  174. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/gpu.rs +0 -0
  175. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/hnsw_ivf.rs +0 -0
  176. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/annhdf5.rs +0 -0
  177. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/api.rs +0 -0
  178. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/dist.rs +0 -0
  179. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/flatten.rs +0 -0
  180. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/hnsw.rs +0 -0
  181. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/hnswio.rs +0 -0
  182. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/libext.rs +0 -0
  183. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/mod.rs +0 -0
  184. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/prelude.rs +0 -0
  185. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/hnsw_rs/test.rs +0 -0
  186. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/ivf.rs +0 -0
  187. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/memory.rs +0 -0
  188. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/mod.rs +0 -0
  189. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/mps/cosine_distance.metal +0 -0
  190. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/mps/hamming_distance.metal +0 -0
  191. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/mps/inner_product.metal +0 -0
  192. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/mps/jaccard_distance.metal +0 -0
  193. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/mps/kmeans_assignment.metal +0 -0
  194. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/mps/l1_distance.metal +0 -0
  195. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/mps/l2_distance.metal +0 -0
  196. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/opencl/cosine_distance.cl +0 -0
  197. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/opencl/hamming_distance.cl +0 -0
  198. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/opencl/inner_product.cl +0 -0
  199. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/opencl/jaccard_distance.cl +0 -0
  200. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/opencl/kmeans_assignment.cl +0 -0
  201. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/opencl/l1_distance.cl +0 -0
  202. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/opencl/l2_distance.cl +0 -0
  203. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/pq.rs +0 -0
  204. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/tokenizer.rs +0 -0
  205. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/turboquant.rs +0 -0
  206. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/index/wgpu_kernel.wgsl +0 -0
  207. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/license.rs +0 -0
  208. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/maintenance.rs +0 -0
  209. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/manifest.rs +0 -0
  210. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/merge.rs +0 -0
  211. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/metadata.rs +0 -0
  212. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/mod.rs +0 -0
  213. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/nessie.rs +0 -0
  214. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/planner.rs +0 -0
  215. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/puffin.rs +0 -0
  216. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/query.rs +0 -0
  217. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/search/mod.rs +0 -0
  218. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/search/rrf.rs +0 -0
  219. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/sql/mod.rs +0 -0
  220. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/sql/optimizer.rs +0 -0
  221. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/sql/pgvector_rewriter.rs +0 -0
  222. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/sql/physical_plan/index_join.rs +0 -0
  223. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/sql/physical_plan.rs +0 -0
  224. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/sql/session.rs +0 -0
  225. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/sql/vector_literal.rs +0 -0
  226. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/sql/vector_operators.rs +0 -0
  227. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/sql/vector_udf.rs +0 -0
  228. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/storage.rs +0 -0
  229. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/table/fluent.rs +0 -0
  230. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/table/mod.rs +0 -0
  231. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/table/schema.rs +0 -0
  232. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/core/table/write.rs +0 -0
  233. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/enterprise/continuous_indexing.rs +0 -0
  234. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/enterprise/license.rs +0 -0
  235. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/enterprise/mod.rs +0 -0
  236. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/index.rs.old +0 -0
  237. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/lib.rs +0 -0
  238. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/python_distance.rs +0 -0
  239. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/python_gpu_context.rs +0 -0
  240. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/telemetry/metrics.rs +0 -0
  241. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/telemetry/mod.rs +0 -0
  242. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/src/telemetry/tracing.rs +0 -0
  243. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/task.md +0 -0
  244. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/all_types_index_test.rs +0 -0
  245. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/bin/generate_iceberg_manifests.rs +0 -0
  246. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/bin/verify_iceberg_read_check.rs +0 -0
  247. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/check_mmh3.py +0 -0
  248. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/data/download_nyc_taxi.sh +0 -0
  249. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/data/generate_embeddings.py +0 -0
  250. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/data/generate_wikipedia.py +0 -0
  251. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/data/start_nessie.sh +0 -0
  252. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/datafusion_rust_test.rs +0 -0
  253. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/debug_murmur3.rs +0 -0
  254. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/fuzz_murmur3.rs +0 -0
  255. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/performance/README.md +0 -0
  256. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/prototype_merge.py +0 -0
  257. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/schema_evolution_test.rs +0 -0
  258. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_all_algos.py +0 -0
  259. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_catalog_commit.rs +0 -0
  260. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_compliance.rs +0 -0
  261. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_delete_correctness.rs +0 -0
  262. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_iceberg_python_delete.sh +0 -0
  263. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_iceberg_rest.sh +0 -0
  264. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_iceberg_rest_create.sh +0 -0
  265. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_iceberg_rest_delete.sh +0 -0
  266. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_iceberg_rest_remove_index.sh +0 -0
  267. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_iceberg_rest_update.sh +0 -0
  268. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_metadata_creation.rs +0 -0
  269. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_mor_reads.rs +0 -0
  270. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_mor_writes.rs +0 -0
  271. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_partition_transforms.rs +0 -0
  272. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_partitioned_writes.rs +0 -0
  273. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_puffin_index.sh +0 -0
  274. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_rest_updates.sh +0 -0
  275. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/tests/verify_schema_compat.rs +0 -0
  276. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-config/.DS_Store +0 -0
  277. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-config/catalog/glue_catalog.properties +0 -0
  278. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-config/catalog/hyperstreamdb.properties +0 -0
  279. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-config/catalog/iceberg.properties +0 -0
  280. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-config/catalog/memory.properties +0 -0
  281. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-config/catalog/postgres.properties +0 -0
  282. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-config/config.properties +0 -0
  283. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-config/entrypoint.sh +0 -0
  284. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-config/jvm.config +0 -0
  285. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-config/node.properties +0 -0
  286. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-config.zip +0 -0
  287. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-hyperstream/pom.xml +0 -0
  288. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBColumnHandle.java +0 -0
  289. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBConnectorFactory.java +0 -0
  290. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBMetadata.java +0 -0
  291. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSource.java +0 -0
  292. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPageSourceProvider.java +0 -0
  293. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBPlugin.java +0 -0
  294. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplit.java +0 -0
  295. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBSplitManager.java +0 -0
  296. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/trino-hyperstream/src/main/java/com/hyperstreamdb/trino/HyperStreamDBTableHandle.java +0 -0
  297. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/update_schema_patch.py +0 -0
  298. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/update_schema_patch2.py +0 -0
  299. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/verify_docstrings.py +0 -0
  300. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/verify_fluent_api.py +0 -0
  301. {hyperstreamdb-0.3.1 → hyperstreamdb-0.3.2}/verify_unified_ingest.py +0 -0
@@ -3375,7 +3375,7 @@ dependencies = [
3375
3375
 
3376
3376
  [[package]]
3377
3377
  name = "hyperstreamdb"
3378
- version = "0.3.1"
3378
+ version = "0.3.2"
3379
3379
  dependencies = [
3380
3380
  "ahash 0.8.12",
3381
3381
  "anyhow",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "hyperstreamdb"
3
- version = "0.3.1"
3
+ version = "0.3.2"
4
4
  edition = "2021"
5
5
  license = "MIT AND Apache-2.0"
6
6
  description = "HyperStreamDB - Serverless Index-Streaming Database with Overlay Indexing and Vector Search"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hyperstreamdb
3
- Version: 0.3.1
3
+ Version: 0.3.2
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: Intended Audience :: Developers
6
6
  Classifier: Programming Language :: Rust
@@ -125,8 +125,22 @@ pub static INDEX_CACHE: Lazy<Cache<String, Arc<RoaringBitmap>>> = Lazy::new(|| {
125
125
  });
126
126
 
127
127
  pub static BYTE_CACHE: Lazy<Cache<String, Arc<Vec<u8>>>> = Lazy::new(|| {
128
+ let cache_gb: u64 = std::env::var("HYPERSTREAM_CACHE_GB")
129
+ .unwrap_or_else(|_| "2".to_string())
130
+ .parse()
131
+ .unwrap_or(2);
132
+
133
+ // Allocate 10% of global cache to Byte Cache (max 512MB)
134
+ let limit_bytes = (cache_gb * 1024 * 1024 * 1024 / 10).min(512 * 1024 * 1024);
135
+ let max_kb = limit_bytes / 1024;
136
+
137
+ tracing::info!("Initializing Byte Cache with {} MB limit", limit_bytes / (1024 * 1024));
138
+
128
139
  Cache::builder()
129
- .max_capacity(1000) // 1000 small files (manifests, etc)
140
+ .weigher(|_key, value: &Arc<Vec<u8>>| -> u32 {
141
+ (value.len() / 1024) as u32
142
+ })
143
+ .max_capacity(max_kb)
130
144
  .time_to_idle(Duration::from_secs(60 * 30)) // 30 mins
131
145
  .build()
132
146
  });
@@ -163,9 +177,24 @@ pub static HNSW_IVF_CACHE: Lazy<Cache<String, Arc<HnswIvfIndex>>> = Lazy::new(||
163
177
  });
164
178
 
165
179
  pub static INVERTED_INDEX_CACHE: Lazy<Cache<String, Arc<Vec<RecordBatch>>>> = Lazy::new(|| {
180
+ let cache_gb: u64 = std::env::var("HYPERSTREAM_CACHE_GB")
181
+ .unwrap_or_else(|_| "2".to_string())
182
+ .parse()
183
+ .unwrap_or(2);
184
+
185
+ // Allocate 25% of global cache to Inverted Index Cache
186
+ let limit_bytes = cache_gb * 1024 * 1024 * 1024 / 4;
187
+ let max_kb = limit_bytes / 1024;
188
+
189
+ tracing::info!("Initializing Inverted Index Cache with {} MB limit", limit_bytes / (1024 * 1024));
190
+
166
191
  Cache::builder()
167
- .max_capacity(1000) // Cache 1000 decoded inverted index files
168
- .time_to_idle(Duration::from_secs(60 * 5))
192
+ .weigher(|_key, value: &Arc<Vec<RecordBatch>>| -> u32 {
193
+ let bytes: usize = value.iter().map(|b| b.get_array_memory_size()).sum();
194
+ (bytes / 1024) as u32
195
+ })
196
+ .max_capacity(max_kb)
197
+ .time_to_idle(Duration::from_secs(60 * 15))
169
198
  .build()
170
199
  });
171
200
 
@@ -1635,11 +1635,10 @@ impl HybridReader {
1635
1635
  };
1636
1636
 
1637
1637
  // Load HNSW-IVF index
1638
- let hnsw_ivf = if idx_info.blob_type.is_some() {
1639
- HnswIvfIndex::load_puffin_async(self.store.clone(), &idx_path_str).await?
1640
- } else {
1641
- HnswIvfIndex::load_async_with_cache_key(self.store.clone(), &idx_path_str, &cache_key).await?
1642
- };
1638
+ // NOTE: blob_type records the *algorithm* (e.g. "hnsw_tq8"), not the storage format.
1639
+ // The writer always uses the multi-file layout (.centroids.parquet, .cluster_N.hnsw.*),
1640
+ // so we always load via load_async_with_cache_key regardless of blob_type.
1641
+ let hnsw_ivf = HnswIvfIndex::load_async_with_cache_key(self.store.clone(), &idx_path_str, &cache_key).await?;
1643
1642
 
1644
1643
  // Search with HNSW-IVF
1645
1644
  let query_clone = query.clone();
@@ -1551,8 +1551,8 @@ mod tests {
1551
1551
  assert!(std::path::Path::new(&format!("{}.id.inv.parquet", base)).exists(), "Inverted index for id should exist");
1552
1552
 
1553
1553
  // Vector Index (embedding) - HNSW-IVF saves centroids and cluster graphs
1554
- assert!(std::path::Path::new(&format!("{}.embedding.centroids.parquet", base)).exists(), "Vector index centroids should exist");
1555
- assert!(std::path::Path::new(&format!("{}.embedding.cluster_0.hnsw.graph", base)).exists(), "Vector index graph should exist");
1554
+ assert!(std::path::Path::new(&format!("{}.embedding.tq8.centroids.parquet", base)).exists(), "Vector index centroids should exist");
1555
+ assert!(std::path::Path::new(&format!("{}.embedding.tq8.cluster_0.hnsw.graph", base)).exists(), "Vector index graph should exist");
1556
1556
 
1557
1557
  Ok(())
1558
1558
  }
@@ -24,18 +24,37 @@ use super::Table;
24
24
  /// rebuilds the in-memory vector index from recovered data.
25
25
  /// Returns (aligned_buffer, optional_memory_index, promoted_schema).
26
26
  pub(crate) fn recover_wal_state(
27
- recovered_batches: Vec<RecordBatch>,
27
+ recovered_stream: Box<dyn Iterator<Item = Result<RecordBatch>>>,
28
28
  mut schema_val: SchemaRef,
29
29
  ) -> (Vec<RecordBatch>, Option<InMemoryVectorIndex>, SchemaRef) {
30
- if recovered_batches.is_empty() {
30
+ let mut aligned_buffer = Vec::new();
31
+ let mut total_rows = 0;
32
+
33
+ // 1. First pass: Collect batches and merge schema
34
+ let mut batches = Vec::new();
35
+ for batch_res in recovered_stream {
36
+ match batch_res {
37
+ Ok(batch) => {
38
+ // Safely merge schema
39
+ match arrow::datatypes::Schema::try_merge(vec![schema_val.as_ref().clone(), batch.schema().as_ref().clone()]) {
40
+ Ok(s) => schema_val = Arc::new(s),
41
+ Err(e) => tracing::warn!("Failed to merge WAL batch schema: {}", e),
42
+ }
43
+ batches.push(batch);
44
+ }
45
+ Err(e) => tracing::error!("WAL Replay Error: {}", e),
46
+ }
47
+ }
48
+
49
+ if batches.is_empty() {
31
50
  return (Vec::new(), None, schema_val);
32
51
  }
33
52
 
34
- tracing::info!("Recovering {} batches from WAL...", recovered_batches.len());
53
+ tracing::info!("Recovering {} batches from WAL...", batches.len());
35
54
 
36
55
  // Use first batch schema if current schema is empty
37
56
  if schema_val.fields().is_empty() {
38
- if let Some(first) = recovered_batches.first() {
57
+ if let Some(first) = batches.first() {
39
58
  schema_val = first.schema();
40
59
  }
41
60
  }
@@ -43,7 +62,7 @@ pub(crate) fn recover_wal_state(
43
62
  // Safely attempt to merge all WAL schemas to capture any column additions
44
63
  // or type evolutions instead of fragile field count comparisons.
45
64
  let mut merged_schema = schema_val.as_ref().clone();
46
- for batch in &recovered_batches {
65
+ for batch in &batches {
47
66
  match arrow::datatypes::Schema::try_merge(vec![merged_schema.clone(), batch.schema().as_ref().clone()]) {
48
67
  Ok(s) => merged_schema = s,
49
68
  Err(e) => tracing::warn!("Failed to merge WAL batch schema: {}", e),
@@ -52,8 +71,8 @@ pub(crate) fn recover_wal_state(
52
71
  let schema_val = std::sync::Arc::new(merged_schema);
53
72
 
54
73
  // Align all recovered batches to the widest schema
55
- let aligned_buffer: Vec<RecordBatch> = recovered_batches.into_iter().map(|b| {
56
- if b.schema() != schema_val {
74
+ for b in batches {
75
+ let aligned = if b.schema() != schema_val {
57
76
  let mut cols = Vec::with_capacity(schema_val.fields().len());
58
77
  for field in schema_val.fields() {
59
78
  let col = if let Some(c) = b.column_by_name(field.name()) {
@@ -66,8 +85,9 @@ pub(crate) fn recover_wal_state(
66
85
  RecordBatch::try_new(schema_val.clone(), cols).unwrap_or(b)
67
86
  } else {
68
87
  b
69
- }
70
- }).collect();
88
+ };
89
+ aligned_buffer.push(aligned);
90
+ }
71
91
 
72
92
  // Rebuild in-memory vector index from recovered data.
73
93
  // Look for an "embedding" column (the most common convention), supporting
@@ -94,10 +114,9 @@ pub(crate) fn recover_wal_state(
94
114
 
95
115
  if let Some(d) = dim {
96
116
  let mut idx = InMemoryVectorIndex::new(d);
97
- let mut offset = 0;
98
117
  for batch in &aligned_buffer {
99
- let _ = idx.insert_batch(batch, col_name, offset);
100
- offset += batch.num_rows();
118
+ let _ = idx.insert_batch(batch, col_name, total_rows);
119
+ total_rows += batch.num_rows();
101
120
  }
102
121
  mem_index = Some(idx);
103
122
  }
@@ -231,13 +250,15 @@ impl TableBuilder {
231
250
  let _ = wal.spawn_worker();
232
251
 
233
252
  // Replay WAL (Recovery)
234
- let (recovered_batches, recovered_paths) = wal.replay().unwrap_or_else(|e| {
253
+ let recovered_stream = wal.replay_stream().unwrap_or_else(|e| {
235
254
  tracing::warn!("WAL Recovery Warning: {}" , e);
236
- (Vec::new(), Vec::new())
255
+ Box::new(std::iter::empty())
237
256
  });
238
257
 
258
+ let (_, recovered_paths) = wal.replay().unwrap_or_else(|_| (vec![], vec![])); // For paths cleanup only
259
+
239
260
  let (initial_buffer, initial_mem_index, schema_val) = recover_wal_state(
240
- recovered_batches, schema_val,
261
+ recovered_stream, schema_val,
241
262
  );
242
263
 
243
264
  let table = Table {
@@ -18,6 +18,7 @@ use crate::SegmentConfig;
18
18
 
19
19
  use super::Table;
20
20
  use crate::core::search::{HybridSearchCoordinator, KeywordSearchParams, ScoredResult};
21
+ use futures::stream::BoxStream;
21
22
 
22
23
  impl Table {
23
24
 
@@ -45,6 +46,21 @@ impl Table {
45
46
  self.read_with_config_async(filter_str, vector_filter, columns, self.query_config.clone()).await
46
47
  }
47
48
 
49
+ pub async fn read_stream_async(&self, filter_str: Option<&str>, vector_filter: Option<VectorSearchParams>, columns: Option<&[&str]>) -> Result<BoxStream<'static, Result<RecordBatch>>> {
50
+ self.read_with_config_stream_async(filter_str, vector_filter, columns, self.query_config.clone()).await
51
+ }
52
+
53
+ pub async fn read_with_config_stream_async(&self, filter_str: Option<&str>, vector_filter: Option<VectorSearchParams>, columns: Option<&[&str]>, config: QueryConfig) -> Result<BoxStream<'static, Result<RecordBatch>>> {
54
+ let expr = match filter_str {
55
+ Some(f) => {
56
+ let schema = self.arrow_schema();
57
+ Some(FilterExpr::parse_sql(f, schema).await?)
58
+ }
59
+ _ => None,
60
+ };
61
+ self.read_expr_stream_async(expr, vector_filter, columns, config, filter_str).await
62
+ }
63
+
48
64
  pub fn query(&self) -> TableQuery<'_> {
49
65
  TableQuery::new(self)
50
66
  }
@@ -277,6 +293,19 @@ impl Table {
277
293
  config: QueryConfig,
278
294
  filter_str: Option<&str>,
279
295
  ) -> Result<Vec<RecordBatch>> {
296
+ let stream = self.read_expr_stream_async(expr, vector_filter, columns, config, filter_str).await?;
297
+ let results: Vec<Result<RecordBatch>> = stream.collect().await;
298
+ results.into_iter().collect()
299
+ }
300
+
301
+ pub async fn read_expr_stream_async(
302
+ &self,
303
+ expr: Option<FilterExpr>,
304
+ vector_filter: Option<VectorSearchParams>,
305
+ columns: Option<&[&str]>,
306
+ config: QueryConfig,
307
+ filter_str: Option<&str>,
308
+ ) -> Result<BoxStream<'static, Result<RecordBatch>>> {
280
309
  use futures::StreamExt;
281
310
 
282
311
  let manifest_manager = ManifestManager::new(self.store.clone(), "", &self.uri);
@@ -375,7 +404,9 @@ impl Table {
375
404
 
376
405
  // Convert ScoredResults back to RecordBatches by fetching from Parquet
377
406
  // This is a simplified version of the final row-fetcher
378
- return self.fetch_results_by_id(scored_results, columns).await;
407
+ let results = self.fetch_results_by_id(scored_results, columns).await?;
408
+ return Ok(futures::stream::iter(results.into_iter().map(Ok)).boxed());
409
+
379
410
  }
380
411
  }
381
412
 
@@ -472,7 +503,7 @@ impl Table {
472
503
  }
473
504
  }
474
505
 
475
- return Ok(results);
506
+ return Ok(futures::stream::iter(results.into_iter().map(Ok)).boxed());
476
507
  }
477
508
 
478
509
  // Extract Iceberg schema from the already-loaded manifest to avoid
@@ -490,35 +521,52 @@ impl Table {
490
521
  let concurrency = config.max_parallel_readers.unwrap_or_else(|| {
491
522
  std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4)
492
523
  });
524
+
525
+ struct ReadCtx {
526
+ table: Table,
527
+ expr: Option<Arc<FilterExpr>>,
528
+ schema: Option<Arc<crate::core::manifest::Schema>>,
529
+ gpu: Option<crate::core::index::gpu::ComputeContext>,
530
+ columns: Option<Vec<String>>,
531
+ version: u64,
532
+ }
533
+ let read_ctx = Arc::new(ReadCtx {
534
+ table: self.clone(),
535
+ expr: expr_arc.clone(),
536
+ schema: iceberg_schema_arc,
537
+ gpu: current_gpu_context,
538
+ columns: columns.map(|c| c.iter().map(|s| s.to_string()).collect()),
539
+ version: version as u64,
540
+ });
541
+
493
542
  let stream = futures::stream::iter(entries_to_read)
494
- .map(|entry| {
495
- let expr_clone = expr_arc.clone();
496
- let schema_clone = iceberg_schema_arc.clone();
497
- let ctx = current_gpu_context.clone();
498
- async move {
499
- if let Some(c) = ctx {
500
- crate::core::index::gpu::set_global_gpu_context(Some(c));
543
+ .map({
544
+ let read_ctx = read_ctx.clone();
545
+ move |entry| {
546
+ let ctx = read_ctx.clone();
547
+ async move {
548
+ if let Some(c) = ctx.gpu.clone() {
549
+ crate::core::index::gpu::set_global_gpu_context(Some(c));
550
+ }
551
+ let cols_refs: Option<Vec<&str>> = ctx.columns.as_ref().map(|v| v.iter().map(|s| s.as_str()).collect());
552
+ ctx.table.read_segment_expr(
553
+ &entry, ctx.expr.as_deref(), ctx.version, cols_refs.as_deref(),
554
+ ctx.schema.as_deref(),
555
+ ).await
501
556
  }
502
- self.read_segment_expr(
503
- &entry, expr_clone.as_deref(), version, columns,
504
- schema_clone.as_deref(),
505
- ).await
506
557
  }
507
558
  })
508
559
  .buffer_unordered(concurrency);
509
560
 
510
- let results: Vec<Result<Vec<RecordBatch>>> = stream.collect().await;
511
- let mut all_batches = Vec::new();
512
- for (i, res) in results.into_iter().enumerate() {
561
+ let results_stream = stream.flat_map(|res| {
513
562
  match res {
514
- Ok(b_vec) => {
515
- all_batches.extend(b_vec);
516
- },
517
- Err(e) => tracing::error!("Error reading batch {}: {}", i, e),
563
+ Ok(b_vec) => futures::stream::iter(b_vec.into_iter().map(Ok)).boxed(),
564
+ Err(e) => futures::stream::once(async move { Err(e) }).boxed(),
518
565
  }
519
- }
566
+ });
520
567
 
521
568
  // --- Read from In-Memory Write Buffer ---
569
+ let mut mem_batches = Vec::new();
522
570
  {
523
571
  let buffer = self.write_buffer.read().unwrap();
524
572
  if !buffer.is_empty() {
@@ -536,7 +584,7 @@ impl Table {
536
584
 
537
585
  if let Ok(filtered) = planner.filter_expr(&batch_to_scan, e) {
538
586
  if filtered.num_rows() > 0 {
539
- all_batches.push(filtered);
587
+ mem_batches.push(Ok(filtered));
540
588
  }
541
589
  }
542
590
  }
@@ -547,18 +595,19 @@ impl Table {
547
595
  .filter_map(|name| batch.schema().index_of(name).ok())
548
596
  .collect();
549
597
  if let Ok(projected) = batch.project(&indices) {
550
- all_batches.push(projected);
598
+ mem_batches.push(Ok(projected));
551
599
  }
552
600
  } else {
553
- all_batches.push(batch.clone());
601
+ mem_batches.push(Ok(batch.clone()));
554
602
  }
555
603
  }
556
604
  }
557
605
  }
558
606
  }
559
607
 
608
+ let mem_stream = futures::stream::iter(mem_batches);
560
609
 
561
- Ok(all_batches)
610
+ Ok(results_stream.chain(mem_stream).boxed())
562
611
  }
563
612
 
564
613
  pub async fn read_filter_async(
@@ -175,15 +175,13 @@ impl WriteAheadLog {
175
175
  }
176
176
  }
177
177
 
178
- /// Replay all log files in the WAL directory and return all batches.
179
- /// This should be called on startup.
180
- pub fn replay(&self) -> Result<(Vec<RecordBatch>, Vec<String>)> {
178
+ /// Replay all log files in the WAL directory and return an iterator of batches.
179
+ /// This should be used on startup for memory-efficient recovery.
180
+ pub fn replay_stream(&self) -> Result<Box<dyn Iterator<Item = Result<RecordBatch>>>> {
181
181
  if !self.dir.exists() {
182
- return Ok((Vec::new(), Vec::new()));
182
+ return Ok(Box::new(std::iter::empty()));
183
183
  }
184
184
 
185
- let mut all_batches = Vec::new();
186
-
187
185
  // 1. List all .arrow files in the directory
188
186
  let entries = std::fs::read_dir(&self.dir)?;
189
187
  let mut wal_files = Vec::new();
@@ -196,10 +194,10 @@ impl WriteAheadLog {
196
194
  }
197
195
  }
198
196
 
199
- // Sort for deterministic replay (optional but good)
197
+ // Sort for deterministic replay
200
198
  wal_files.sort();
201
199
 
202
- let mut replayed_paths = Vec::new();
200
+ let mut all_iterators = Vec::new();
203
201
 
204
202
  for path in wal_files {
205
203
  let file = File::open(&path)?;
@@ -208,27 +206,35 @@ impl WriteAheadLog {
208
206
  }
209
207
 
210
208
  let reader = BufReader::new(file);
211
- let ipc_reader = StreamReader::try_new(reader, None);
212
-
213
- match ipc_reader {
214
- Ok(reader) => {
215
- let mut count = 0;
216
- for batch in reader {
217
- all_batches.push(batch?);
218
- count += 1;
219
- }
220
- if count > 0 {
221
- println!("WAL: Replayed {} batches from {:?}", count, path);
222
- replayed_paths.push(path.to_str().unwrap().to_string());
223
- }
224
- },
225
- Err(e) => {
226
- println!("WAL Recovery Warning: Could not read log {:?}: {}", path, e);
209
+ let ipc_reader = StreamReader::try_new(reader, None)?;
210
+ all_iterators.push(ipc_reader);
211
+ }
212
+
213
+ Ok(Box::new(all_iterators.into_iter().flatten().map(|res| res.map_err(anyhow::Error::from))))
214
+ }
215
+
216
+ /// Replay all log files in the WAL directory and return all batches.
217
+ /// Legacy method, consider using replay_stream for large logs.
218
+ pub fn replay(&self) -> Result<(Vec<RecordBatch>, Vec<String>)> {
219
+ let stream = self.replay_stream()?;
220
+ let mut batches = Vec::new();
221
+ for b in stream {
222
+ batches.push(b?);
223
+ }
224
+
225
+ // Return paths for cleanup (simplified for now)
226
+ let mut paths = Vec::new();
227
+ if self.dir.exists() {
228
+ for entry in std::fs::read_dir(&self.dir)? {
229
+ let entry = entry?;
230
+ let path = entry.path();
231
+ if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("arrow") {
232
+ paths.push(path.to_str().unwrap().to_string());
227
233
  }
228
234
  }
229
235
  }
230
236
 
231
- Ok((all_batches, replayed_paths))
237
+ Ok((batches, paths))
232
238
  }
233
239
 
234
240
  /// Initialize the writer with a schema.