nidx-binding 6.3.1.post46__tar.gz → 6.3.1.post48__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/Cargo.lock +11 -11
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/Cargo.toml +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/PKG-INFO +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_binding/Cargo.toml +2 -2
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_binding/src/lib.rs +13 -7
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/Cargo.toml +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/fuzzy_query.rs +7 -3
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/lib.rs +9 -10
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/reader.rs +28 -16
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/resource_indexer.rs +6 -2
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/schema.rs +5 -4
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/search_query.rs +49 -37
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/search_response.rs +30 -14
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/set_query.rs +3 -8
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/stop_words.rs +19 -6
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/tests/common/mod.rs +1 -4
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/tests/reader.rs +28 -18
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_protos/Cargo.toml +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_protos/pyproject.toml +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_relation/Cargo.toml +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_relation/src/graph_query_parser.rs +40 -36
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_relation/src/io_maps.rs +4 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_relation/src/lib.rs +25 -19
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_relation/src/reader.rs +35 -14
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_relation/src/resource_indexer.rs +10 -3
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_relation/src/schema.rs +27 -11
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_relation/tests/common/mod.rs +12 -9
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_relation/tests/test_graph_search.rs +157 -133
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_relation/tests/test_reader.rs +4 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_relation/tests/test_writer.rs +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_tantivy/Cargo.toml +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_tantivy/src/index_reader.rs +38 -14
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_tantivy/src/lib.rs +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_tests/Cargo.toml +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_tests/src/graph.rs +6 -2
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_tests/src/lib.rs +14 -10
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/Cargo.toml +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/src/lib.rs +11 -6
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/src/prefilter.rs +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/src/reader.rs +33 -29
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/src/resource_indexer.rs +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/src/schema.rs +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/src/search_query.rs +26 -16
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/tests/common/mod.rs +11 -7
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/tests/test_flow.rs +14 -6
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/tests/test_search.rs +14 -8
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_types/Cargo.toml +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/Cargo.toml +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/config.rs +3 -7
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_point/disk_hnsw.rs +13 -11
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_point/mod.rs +68 -31
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_point/node.rs +5 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_point/ops_hnsw.rs +8 -2
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_point/params.rs +1 -5
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_point/ram_hnsw.rs +10 -8
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_point/tests.rs +38 -14
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_point_provider/reader.rs +64 -49
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_types/data_store.rs +17 -7
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_types/trie_ram.rs +6 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/formula/mod.rs +1 -4
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/indexer.rs +29 -21
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/inverted_index/fst_index.rs +3 -3
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/inverted_index/map.rs +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/inverted_index.rs +19 -10
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/lib.rs +3 -6
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/tests/common/mod.rs +1 -4
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/tests/test_basic_search.rs +109 -36
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/tests/test_hidden.rs +16 -12
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/pyproject.toml +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/api/grpc.rs +39 -25
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/api/shards.rs +6 -2
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/api.rs +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/control.rs +3 -7
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/import_export.rs +35 -13
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/indexer.rs +42 -28
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/main.rs +8 -5
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/metadata/index.rs +24 -11
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/metadata/index_request.rs +7 -6
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/metadata/merge_job.rs +12 -8
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/metadata/segment.rs +11 -5
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/metadata/shard.rs +13 -5
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/metadata.rs +15 -11
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/scheduler/audit_task.rs +4 -2
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/scheduler/log_merge.rs +12 -3
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/scheduler/merge_task.rs +20 -16
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/scheduler/purge_tasks.rs +6 -6
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/scheduler/vector_merge.rs +7 -3
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/scheduler.rs +7 -4
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/searcher/grpc.rs +5 -3
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/searcher/index_cache.rs +11 -6
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/searcher/query_language.rs +23 -20
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/searcher/query_planner.rs +23 -19
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/searcher/shard_search.rs +4 -3
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/searcher/shard_selector.rs +1 -4
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/searcher/shard_suggest.rs +8 -6
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/searcher/sync.rs +87 -34
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/searcher.rs +12 -3
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/segment_store.rs +1 -4
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/settings.rs +18 -16
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/telemetry/duration_layer.rs +2 -2
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/telemetry/log_format.rs +9 -6
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/telemetry/middleware.rs +11 -8
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/telemetry.rs +11 -8
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/tool.rs +4 -10
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/worker.rs +24 -10
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/common/mod.rs +15 -5
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/test_date_range_search.rs +1 -6
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/test_search_filtering.rs +73 -21
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/test_search_relations.rs +33 -9
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/test_searcher_cluster.rs +10 -4
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/test_security_search.rs +3 -7
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/test_shards.rs +13 -9
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/test_shards_api.rs +19 -10
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/test_suggest.rs +57 -13
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/test_synced_searcher.rs +12 -7
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/test_vector_normalization.rs +6 -2
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/test_vectorsets.rs +17 -27
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-0cfce9b29547f8f5bafa6e440f86103be7b8c4ad2fd92db9ac223f4efbe23d10.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-1a561eed00f3dbe868bf5030059793300209179dc8fb73e4b57a54b5e81262fe.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-1d3fca2682e25a01143da92285297f134a6a105a96f64d87e0db3abb219855e4.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-249b3b57c27a71baa823f1fe0f0bba9c9af36f61c28f731e58beea60ec48e687.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-24cb6b683daa42d7125f862e25943ab4be7bf275cd8739f8da4859d701795e1a.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-263c8fce6db5b03bbd012fafdba6943cbee6ed7eb8976cdef4f5b01dde7ca6fd.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-2a5d92fb1638df830a4477a7cdf24e6db6b43034b7bbe74fdfb63e8afe2c4071.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-2b065a363f58caed60e3706603c1260dbf5a4c795604a5b68edda22eb07fec1b.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-3fc3cb39934683de8cd475ce1368c8373453eb1e01f81587d66b9d14b109ce6e.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-48f33b77b7c1633467b0b2efcaa1d3c207e7757e4f1d83b40d15e6ca365f7771.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-4ae09f2c08e2f324bee01bb8487a8f37678a1c5e9d327339235c50d4921a8949.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-4d7a76fa413c9ef0ce2a47ac7bb7e01d3e6a2aabded9487d21010a53efee8852.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-4fcbdd6657c7dc9b60b3a563dd41711b3dbcf72ce063427b7a01f8cddf34c244.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-577109ac00ccfbd38ecaccab94116f2f46a4caf5612afa372cded197123c1e08.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-5db25f97d8578d6d78f2f6bd4b72cc82a9b1b82805c6422d967ac63b20d99db4.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-5ec3233a3a23e926055056d46bdde17836a633066dbb5f349502648cd3ea9a60.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-66edb6ea424d8681927dcddb6bac5f1239175f4775d1f40417ba15054b0c6f19.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-6f9c6d201c1b5712efb68c363bffd3e0169c11f2a8f925e8cd4e8808599ff7b4.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-733c3ebacc86f444bf5e2dd79ade660c291e88a00fc09b722f6e2e191545874c.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-77a245aed9f6f8b9fc643efbd4b1e2e8e99bbba085e3e75a4f29321cd7b2a25c.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-7a3bf27c330c468a596e8a297cf7d8b192e31e67ecc5177c1267f579e8e247c7.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-7a7e59e47b30b12237511fd3d7da2d17b0471ad2b006af48d6a6f587c779692b.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-7dcbb33312cc9f11ae3a6d73b1ace017a9f19a8bf8f10304fc57977c8efeadff.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-7efa7c0d747afc4b6aed0586ff846c27839c3213ff7ee9f30c89b0d0f17e60e3.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-8493140d788604d498a4e48da4158708572ccc9d60185290a00d549cc84533db.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-8493bb0059b013eaca42fd10cd7d04f0d06a8acaed379eff0d23f3229edde9ee.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-87996b3d6c7a2195438d7038015b06949102bce8c7b8cd8db1f83aaf23cbe489.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-8f096d8171b89f9615d18f95d696dc9e4fb3674e103161a713cdc806f7a68506.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-917732a56ee04bf3a6e127319dda8225210869c82f9828d878162394dba4e078.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-95fe4ef93ee90733db1b67ed7987f80b5aac792f1590b979c68b418d1599eb98.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-9b67658569b343d8b4b61ae0a7dc721f367f2ba33c7b69b9e68bfd5c9bff5206.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-9c8062ea55d070afef68309e58fa987eb37fda44e1efbf68c8ba2af7846cc968.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-a06e1d9f6f95e4c4c2b98310ebddcc9d963cc033582bf2e945e8bf3a301b4247.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-a55265c9b07bd1399961a6f1e757201fd0eebe868ddaf96437111113d80fce92.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-a891a37be5c2d7cce775c2dd33726b0318fd3839beab222a1b22bc6174604207.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-a945191bb4b3e37d6823ed3ad499339d007d69983105de8567777d9daf517b28.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-abe9f7832f2bd799ac44008da031e8d8ab52d4f5fbfc2a7e3974e8873bae55b2.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-aca588cca57a85e4d7fcc40c23cd87e57d53d11ca550d78e7e3d5e39e524fcd3.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-b02f8aafc00a7724510772ac41269e368c5bccf03ef7b4590e0ef6fd1a1bf64f.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-b742e17cabe2d64617e9aa64bafc782172f7a4f8023d1b54f952a0fb39f6b2b8.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-b94e349dbc0daec57f8f8f6e9e2dffb06100b1bb2b41d297c9f3b191da37a83d.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-bd9afa22994aba671dbf7b5f89b53c2ee02f53c0442a81265786a6d52d08512f.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-be60554eca98a5899efc6b49785cecd6444a6d39afed9e4a884ce2dbf162012c.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-bf49702b506c9a1650ece1f8e8d9f14834a902f8caefafe30ded55e2790f2188.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-bfb8ae2e860e451b0868a7b1b50a451d998b2105f9fdfd307b26a1775d145e9f.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-bfcd21ed704cd305db5c17fcdec7d92aa4ac501913c9c9514d8ff92928c0c7e7.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-c3ab694650f49a75b146fb877a92e48c4f20f0d99f70f8ec859fbb763b01a1e5.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-c55542bb9fae544d87fae6f30e0fe8a9088d12075f4442ab4fe2fcd05e472234.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-cb29a6556d35ac630ee0aa885dd7341cf9573bd3efd216ff8a887b87686b03db.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-d0a1f341a89f5f14696b10baa72db9d95551c2b7e5fc67308fd52dc03dd98a92.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-d2ad0a0ca2649c9e4873cfcc1fc66d2d07cc45d0f65c560b06d7b5f592f4fa8a.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-d6cfe78eb635ba0b89ca4021a4dc8182d18ab5b197f30149cd28488eba4c1df5.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-d729b56dea00e49dcdba8cf0001e2811da27351eabe98212db3b589f18fc6f32.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-d9658bfd4e7170b41d03f2ddf2446d0bf54171c0d39d53bf20af2b8437f2ec48.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-dbba7b3d3289425bae711aedbf73fbc3699f857f86f84d95c3b556d05c5658b0.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-dcb96b649d6d63a58efd5d445453a4f3d7869a56ff714b69bedf3d616a0473ca.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-ebd876fbf5362a5900e75bc05f2f11c73c406ef7da4e95097fc6a1c3d1b8bc54.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-eef5cc6bce1cc14eba8f3e68971724ef181e88cffcedd74673615f2026b89a62.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/.sqlx/query-ef56d5fefc5774040d1ee397beadb475f6af02768c22f0e583c74062e2e821ce.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/README.md +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/migrations/20241007163501_initial.sql +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/migrations/20241211120039_merge_job_priority.sql +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/migrations/20241211121159_basic_indexes.sql +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/migrations/20241212151105_check_segment_records.sql +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/migrations/20250110145554_in_flight_messages.sql +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/query_io.rs +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/request_types.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/README.md +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/ar.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/az.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/bn.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/ca.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/ch.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/da.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/de.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/el.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/en.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/es.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/eu.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/extract.py +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/fi.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/fr.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/he.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/hu.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/id.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/it.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/kk.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/ne.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/nl.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/no.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/pt.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/ro.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/ru.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/sl.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/sv.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/tg.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/stop_words/tr.json +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_protos/build.py +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_protos/build.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_protos/src/lib.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_protos/src/nidx.proto +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/src/query_io.rs +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/src/request_types.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_text/tests/test_streaming.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_types/src/lib.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_types/src/prefilter.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_types/src/query_language.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_point_provider/mod.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_types/mod.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/data_types/trie.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/query_io.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/request_types.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/utils.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/vector_types/dense_f32.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_vector/src/vector_types/mod.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/errors.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/grpc_server.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/lib.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/metadata/deletion.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/metrics.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/scheduler/metrics_task.rs +1 -1
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/src/searcher/streams.rs +0 -0
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/common/services.rs +2 -2
- {nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/tests/test_search_sorting.rs +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
# This file is automatically @generated by Cargo.
|
2
2
|
# It is not intended for manual editing.
|
3
|
-
version =
|
3
|
+
version = 4
|
4
4
|
|
5
5
|
[[package]]
|
6
6
|
name = "addr2line"
|
@@ -2838,9 +2838,9 @@ dependencies = [
|
|
2838
2838
|
|
2839
2839
|
[[package]]
|
2840
2840
|
name = "pyo3"
|
2841
|
-
version = "0.
|
2841
|
+
version = "0.23.5"
|
2842
2842
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2843
|
-
checksum = "
|
2843
|
+
checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872"
|
2844
2844
|
dependencies = [
|
2845
2845
|
"cfg-if",
|
2846
2846
|
"indoc",
|
@@ -2856,9 +2856,9 @@ dependencies = [
|
|
2856
2856
|
|
2857
2857
|
[[package]]
|
2858
2858
|
name = "pyo3-build-config"
|
2859
|
-
version = "0.
|
2859
|
+
version = "0.23.5"
|
2860
2860
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2861
|
-
checksum = "
|
2861
|
+
checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb"
|
2862
2862
|
dependencies = [
|
2863
2863
|
"once_cell",
|
2864
2864
|
"target-lexicon",
|
@@ -2866,9 +2866,9 @@ dependencies = [
|
|
2866
2866
|
|
2867
2867
|
[[package]]
|
2868
2868
|
name = "pyo3-ffi"
|
2869
|
-
version = "0.
|
2869
|
+
version = "0.23.5"
|
2870
2870
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2871
|
-
checksum = "
|
2871
|
+
checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d"
|
2872
2872
|
dependencies = [
|
2873
2873
|
"libc",
|
2874
2874
|
"pyo3-build-config",
|
@@ -2876,9 +2876,9 @@ dependencies = [
|
|
2876
2876
|
|
2877
2877
|
[[package]]
|
2878
2878
|
name = "pyo3-macros"
|
2879
|
-
version = "0.
|
2879
|
+
version = "0.23.5"
|
2880
2880
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2881
|
-
checksum = "
|
2881
|
+
checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da"
|
2882
2882
|
dependencies = [
|
2883
2883
|
"proc-macro2",
|
2884
2884
|
"pyo3-macros-backend",
|
@@ -2888,9 +2888,9 @@ dependencies = [
|
|
2888
2888
|
|
2889
2889
|
[[package]]
|
2890
2890
|
name = "pyo3-macros-backend"
|
2891
|
-
version = "0.
|
2891
|
+
version = "0.23.5"
|
2892
2892
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
2893
|
-
checksum = "
|
2893
|
+
checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028"
|
2894
2894
|
dependencies = [
|
2895
2895
|
"heck",
|
2896
2896
|
"proc-macro2",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nidx_binding
|
3
|
-
Version: 6.3.1.
|
3
|
+
Version: 6.3.1.post48
|
4
4
|
Classifier: Programming Language :: Rust
|
5
5
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
6
6
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
@@ -1,7 +1,7 @@
|
|
1
1
|
[package]
|
2
2
|
name = "nidx_binding"
|
3
3
|
version = "0.1.0"
|
4
|
-
edition = "
|
4
|
+
edition = "2024"
|
5
5
|
license = "AGPL-3.0"
|
6
6
|
|
7
7
|
[lib]
|
@@ -12,7 +12,7 @@ crate-type = ["cdylib"]
|
|
12
12
|
anyhow = "1.0.93"
|
13
13
|
nidx = { version = "0.1.0", path = "..", default-features = false }
|
14
14
|
nidx_protos = { version = "0.1.0", path = "../nidx_protos" }
|
15
|
-
pyo3 = "0.
|
15
|
+
pyo3 = "0.23"
|
16
16
|
tempfile = "3.14.0"
|
17
17
|
tokio = "1.41.1"
|
18
18
|
tokio-util = { version = "0.7.12", features = ["io", "io-util", "compat"] }
|
@@ -23,21 +23,21 @@ use nidx::worker;
|
|
23
23
|
use pyo3::exceptions::PyException;
|
24
24
|
use pyo3::prelude::*;
|
25
25
|
|
26
|
+
use nidx::Settings;
|
26
27
|
use nidx::api::grpc::ApiServer;
|
27
28
|
use nidx::grpc_server::GrpcServer;
|
28
29
|
use nidx::indexer::process_index_message;
|
29
30
|
use nidx::searcher::grpc::SearchServer;
|
30
31
|
use nidx::searcher::{SyncStatus, SyncedSearcher};
|
31
32
|
use nidx::settings::EnvSettings;
|
32
|
-
use nidx::Settings;
|
33
|
-
use nidx_protos::prost::*;
|
34
33
|
use nidx_protos::IndexMessage;
|
34
|
+
use nidx_protos::prost::*;
|
35
35
|
use std::collections::HashMap;
|
36
|
-
use std::sync::atomic::AtomicI64;
|
37
36
|
use std::sync::Arc;
|
38
|
-
use
|
37
|
+
use std::sync::atomic::AtomicI64;
|
38
|
+
use tempfile::{TempDir, tempdir};
|
39
39
|
use tokio::runtime::Runtime;
|
40
|
-
use tokio::sync::mpsc::{
|
40
|
+
use tokio::sync::mpsc::{Sender, channel};
|
41
41
|
use tokio::sync::watch;
|
42
42
|
use tokio_util::sync::CancellationToken;
|
43
43
|
|
@@ -112,9 +112,15 @@ impl NidxBinding {
|
|
112
112
|
pub fn wait_for_sync(&mut self) {
|
113
113
|
self.runtime.as_ref().unwrap().block_on(async {
|
114
114
|
// Wait for a new sync to start
|
115
|
-
self.sync_watcher
|
115
|
+
self.sync_watcher
|
116
|
+
.wait_for(|s| matches!(s, SyncStatus::Syncing))
|
117
|
+
.await
|
118
|
+
.unwrap();
|
116
119
|
// Wait for it to finish
|
117
|
-
self.sync_watcher
|
120
|
+
self.sync_watcher
|
121
|
+
.wait_for(|s| matches!(s, SyncStatus::Synced))
|
122
|
+
.await
|
123
|
+
.unwrap();
|
118
124
|
});
|
119
125
|
}
|
120
126
|
}
|
@@ -44,12 +44,12 @@ use std::io;
|
|
44
44
|
use std::ops::Range;
|
45
45
|
use std::sync::Arc;
|
46
46
|
|
47
|
-
use levenshtein_automata::{Distance, LevenshteinAutomatonBuilder
|
47
|
+
use levenshtein_automata::{DFA, Distance, LevenshteinAutomatonBuilder};
|
48
48
|
use once_cell::sync::Lazy;
|
49
|
+
use tantivy::TantivyError::InvalidArgument;
|
49
50
|
use tantivy::query::{BitSetDocSet, ConstScorer, EnableScoring, Explanation, Query, Scorer, Weight};
|
50
51
|
use tantivy::schema::{Field, IndexRecordOption, Term};
|
51
52
|
use tantivy::termdict::{TermDictionary, TermStreamer};
|
52
|
-
use tantivy::TantivyError::InvalidArgument;
|
53
53
|
use tantivy::{DocId, DocSet, Score, SegmentReader, TantivyError};
|
54
54
|
use tantivy_common::BitSet;
|
55
55
|
use tantivy_fst::Automaton;
|
@@ -224,7 +224,11 @@ impl FuzzyTermQuery {
|
|
224
224
|
} else {
|
225
225
|
automaton_builder.build_dfa(term_text)
|
226
226
|
};
|
227
|
-
Ok(AutomatonWeight::new(
|
227
|
+
Ok(AutomatonWeight::new(
|
228
|
+
self.term.field(),
|
229
|
+
DfaWrapper(automaton),
|
230
|
+
self.termc.clone(),
|
231
|
+
))
|
228
232
|
}
|
229
233
|
None => Err(InvalidArgument(format!(
|
230
234
|
"Levenshtein distance of {} is not allowed. Choose a value in the {:?} range",
|
@@ -31,20 +31,20 @@ mod stop_words;
|
|
31
31
|
|
32
32
|
use nidx_protos::{ParagraphItem, ParagraphSearchResponse, StreamRequest};
|
33
33
|
use nidx_tantivy::{
|
34
|
-
index_reader::{open_index_with_deletions, DeletionQueryBuilder},
|
35
34
|
TantivyIndexer, TantivyMeta, TantivySegmentMetadata,
|
35
|
+
index_reader::{DeletionQueryBuilder, open_index_with_deletions},
|
36
36
|
};
|
37
|
-
use nidx_types::{prefilter::PrefilterResult
|
37
|
+
use nidx_types::{OpenIndexMetadata, prefilter::PrefilterResult};
|
38
38
|
use reader::ParagraphReaderService;
|
39
39
|
use resource_indexer::index_paragraphs;
|
40
40
|
use schema::ParagraphSchema;
|
41
41
|
use std::path::Path;
|
42
42
|
use tantivy::{
|
43
|
+
Term,
|
43
44
|
directory::MmapDirectory,
|
44
45
|
indexer::merge_indices,
|
45
46
|
query::{Query, TermSetQuery},
|
46
47
|
schema::{Field, Schema},
|
47
|
-
Term,
|
48
48
|
};
|
49
49
|
use tracing::instrument;
|
50
50
|
|
@@ -61,11 +61,7 @@ impl DeletionQueryBuilder for ParagraphDeletionQueryBuilder {
|
|
61
61
|
Box::new(TermSetQuery::new(keys.map(|k| {
|
62
62
|
// Our keys can be resource or field ids, match the corresponding tantivy field
|
63
63
|
let is_field = k.len() > 32;
|
64
|
-
let tantivy_field = if is_field {
|
65
|
-
self.field
|
66
|
-
} else {
|
67
|
-
self.resource
|
68
|
-
};
|
64
|
+
let tantivy_field = if is_field { self.field } else { self.resource };
|
69
65
|
Term::from_field_bytes(tantivy_field, k.as_bytes())
|
70
66
|
})))
|
71
67
|
}
|
@@ -134,7 +130,10 @@ impl ParagraphSearcher {
|
|
134
130
|
reader: ParagraphReaderService {
|
135
131
|
index: index.clone(),
|
136
132
|
schema: ParagraphSchema::new(),
|
137
|
-
reader: index
|
133
|
+
reader: index
|
134
|
+
.reader_builder()
|
135
|
+
.reload_policy(tantivy::ReloadPolicy::Manual)
|
136
|
+
.try_into()?,
|
138
137
|
},
|
139
138
|
})
|
140
139
|
}
|
@@ -157,7 +156,7 @@ impl ParagraphSearcher {
|
|
157
156
|
self.reader.suggest(request, prefilter)
|
158
157
|
}
|
159
158
|
|
160
|
-
pub fn iterator(&self, request: &StreamRequest) -> anyhow::Result<impl Iterator<Item = ParagraphItem
|
159
|
+
pub fn iterator(&self, request: &StreamRequest) -> anyhow::Result<impl Iterator<Item = ParagraphItem> + use<>> {
|
161
160
|
self.reader.iterator(request)
|
162
161
|
}
|
163
162
|
}
|
@@ -26,14 +26,14 @@ use nidx_protos::{OrderBy, ParagraphItem, ParagraphSearchResponse, StreamRequest
|
|
26
26
|
use nidx_types::prefilter::PrefilterResult;
|
27
27
|
use tantivy::collector::{Collector, Count, FacetCollector, TopDocs};
|
28
28
|
use tantivy::query::{AllQuery, Query, QueryParser};
|
29
|
-
use tantivy::{
|
29
|
+
use tantivy::{DateTime, Order, schema::*};
|
30
30
|
use tantivy::{DocAddress, Index, IndexReader};
|
31
31
|
use tracing::*;
|
32
32
|
|
33
33
|
use super::schema::ParagraphSchema;
|
34
34
|
use crate::request_types::{ParagraphSearchRequest, ParagraphSuggestRequest};
|
35
|
-
use crate::search_query::{search_query, streaming_query, suggest_query
|
36
|
-
use crate::search_response::{
|
35
|
+
use crate::search_query::{SharedTermC, search_query, streaming_query, suggest_query};
|
36
|
+
use crate::search_response::{SearchBm25Response, SearchFacetsResponse, SearchIntResponse, extract_labels};
|
37
37
|
|
38
38
|
const FUZZY_DISTANCE: u8 = 1;
|
39
39
|
const NUMBER_OF_RESULTS_SUGGEST: usize = 10;
|
@@ -46,7 +46,10 @@ pub struct ParagraphReaderService {
|
|
46
46
|
|
47
47
|
impl Debug for ParagraphReaderService {
|
48
48
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
49
|
-
f.debug_struct("TextService")
|
49
|
+
f.debug_struct("TextService")
|
50
|
+
.field("index", &self.index)
|
51
|
+
.field("schema", &self.schema)
|
52
|
+
.finish()
|
50
53
|
}
|
51
54
|
}
|
52
55
|
|
@@ -94,7 +97,7 @@ impl ParagraphReaderService {
|
|
94
97
|
}))
|
95
98
|
}
|
96
99
|
|
97
|
-
pub fn iterator(&self, request: &StreamRequest) -> anyhow::Result<impl Iterator<Item = ParagraphItem
|
100
|
+
pub fn iterator(&self, request: &StreamRequest) -> anyhow::Result<impl Iterator<Item = ParagraphItem> + use<>> {
|
98
101
|
let producer = BatchProducer {
|
99
102
|
offset: 0,
|
100
103
|
total: self.count()?,
|
@@ -136,7 +139,11 @@ impl ParagraphReaderService {
|
|
136
139
|
let v = time.elapsed().as_millis();
|
137
140
|
debug!("{id:?} - Searching: starts at {v} ms");
|
138
141
|
|
139
|
-
let advanced = request
|
142
|
+
let advanced = request
|
143
|
+
.advanced_query
|
144
|
+
.as_ref()
|
145
|
+
.map(|query| parser.parse_query(query))
|
146
|
+
.transpose()?;
|
140
147
|
#[rustfmt::skip] let (original, termc, fuzzied) = search_query(
|
141
148
|
&parser,
|
142
149
|
&text,
|
@@ -245,7 +252,10 @@ impl Iterator for BatchProducer {
|
|
245
252
|
return None;
|
246
253
|
};
|
247
254
|
let mut items = vec![];
|
248
|
-
for doc in top_docs
|
255
|
+
for doc in top_docs
|
256
|
+
.into_iter()
|
257
|
+
.flat_map(|i| self.searcher.doc::<TantivyDocument>(i.1))
|
258
|
+
{
|
249
259
|
let id = doc
|
250
260
|
.get_first(self.paragraph_field)
|
251
261
|
.expect("document doesn't appear to have uuid.")
|
@@ -254,10 +264,7 @@ impl Iterator for BatchProducer {
|
|
254
264
|
.to_string();
|
255
265
|
|
256
266
|
let labels = extract_labels(doc.get_all(self.facet_field));
|
257
|
-
items.push(ParagraphItem {
|
258
|
-
id,
|
259
|
-
labels,
|
260
|
-
});
|
267
|
+
items.push(ParagraphItem { id, labels });
|
261
268
|
}
|
262
269
|
self.offset += Self::BATCH;
|
263
270
|
let v = time.elapsed().as_millis();
|
@@ -290,7 +297,9 @@ impl<'a> Searcher<'a> {
|
|
290
297
|
OrderType::Desc => Order::Desc,
|
291
298
|
OrderType::Asc => Order::Asc,
|
292
299
|
};
|
293
|
-
TopDocs::with_limit(limit)
|
300
|
+
TopDocs::with_limit(limit)
|
301
|
+
.and_offset(offset)
|
302
|
+
.order_by_fast_field(order_field, order_direction)
|
294
303
|
}
|
295
304
|
fn do_search(
|
296
305
|
&self,
|
@@ -300,10 +309,13 @@ impl<'a> Searcher<'a> {
|
|
300
309
|
min_score: f32,
|
301
310
|
) -> anyhow::Result<ParagraphSearchResponse> {
|
302
311
|
let searcher = service.reader.searcher();
|
303
|
-
let facet_collector = self
|
304
|
-
|
305
|
-
|
306
|
-
|
312
|
+
let facet_collector = self
|
313
|
+
.facets
|
314
|
+
.iter()
|
315
|
+
.fold(FacetCollector::for_field("facets"), |mut collector, facet| {
|
316
|
+
collector.add_facet(Facet::from(facet));
|
317
|
+
collector
|
318
|
+
});
|
307
319
|
if self.only_faceted {
|
308
320
|
// No query search, just facets
|
309
321
|
let facets_count = searcher.search(&query, &facet_collector).unwrap();
|
{nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/resource_indexer.rs
RENAMED
@@ -49,8 +49,12 @@ pub fn index_paragraphs(
|
|
49
49
|
};
|
50
50
|
|
51
51
|
let empty_paragraph = HashMap::with_capacity(0);
|
52
|
-
let inspect_paragraph =
|
53
|
-
|
52
|
+
let inspect_paragraph = |field: &str| {
|
53
|
+
resource
|
54
|
+
.paragraphs
|
55
|
+
.get(field)
|
56
|
+
.map_or_else(|| &empty_paragraph, |i| &i.paragraphs)
|
57
|
+
};
|
54
58
|
|
55
59
|
let resource_labels = resource
|
56
60
|
.labels
|
@@ -17,13 +17,13 @@
|
|
17
17
|
// You should have received a copy of the GNU Affero General Public License
|
18
18
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
//
|
20
|
-
use nidx_protos::prost::*;
|
21
20
|
use nidx_protos::ParagraphMetadata;
|
22
|
-
use
|
23
|
-
use tantivy::schema::{FacetOptions, Field, NumericOptions, Schema, STORED, STRING, TEXT};
|
21
|
+
use nidx_protos::prost::*;
|
24
22
|
use tantivy::DateOptions;
|
25
23
|
use tantivy::DateTime;
|
26
24
|
use tantivy::TantivyDocument;
|
25
|
+
use tantivy::schema::Value;
|
26
|
+
use tantivy::schema::{FacetOptions, Field, NumericOptions, STORED, STRING, Schema, TEXT};
|
27
27
|
|
28
28
|
#[derive(Debug, Clone)]
|
29
29
|
pub struct ParagraphSchema {
|
@@ -70,7 +70,8 @@ impl ParagraphSchema {
|
|
70
70
|
|
71
71
|
/// Returns the paragraph metadata for the given document, if any.
|
72
72
|
pub fn metadata(&self, doc: &TantivyDocument) -> Option<ParagraphMetadata> {
|
73
|
-
doc.get_first(self.metadata)
|
73
|
+
doc.get_first(self.metadata)
|
74
|
+
.and_then(|value| ParagraphMetadata::decode(value.as_bytes()?).ok())
|
74
75
|
}
|
75
76
|
}
|
76
77
|
|
@@ -66,11 +66,7 @@ impl TermCollector {
|
|
66
66
|
let term_dict = index.terms();
|
67
67
|
let mut term_s = vec![];
|
68
68
|
let found = term_dict.ord_to_term(term, &mut term_s).unwrap_or(false);
|
69
|
-
let elem = if found {
|
70
|
-
term_s
|
71
|
-
} else {
|
72
|
-
vec![]
|
73
|
-
};
|
69
|
+
let elem = if found { term_s } else { vec![] };
|
74
70
|
match String::from_utf8(elem).ok() {
|
75
71
|
Some(v) if v.len() > 2 => terms.push(v),
|
76
72
|
_ => (),
|
@@ -123,9 +119,11 @@ fn queryp_map(queries: Vec<QueryP>, distance: u8, as_prefix: Option<usize>, term
|
|
123
119
|
}
|
124
120
|
|
125
121
|
fn flat_bool_query(query: BooleanQuery, collector: (usize, Vec<QueryP>)) -> (usize, Vec<QueryP>) {
|
126
|
-
query
|
127
|
-
|
128
|
-
|
122
|
+
query
|
123
|
+
.clauses()
|
124
|
+
.iter()
|
125
|
+
.map(|(occur, subq)| (*occur, subq.box_clone()))
|
126
|
+
.fold(collector, |(mut id, mut c), (occur, subq)| {
|
129
127
|
if subq.is::<BooleanQuery>() {
|
130
128
|
let subq: Box<BooleanQuery> = subq.downcast().unwrap();
|
131
129
|
flat_bool_query(*subq, (id, c))
|
@@ -137,8 +135,7 @@ fn flat_bool_query(query: BooleanQuery, collector: (usize, Vec<QueryP>)) -> (usi
|
|
137
135
|
c.push((occur, subq));
|
138
136
|
(id, c)
|
139
137
|
}
|
140
|
-
}
|
141
|
-
)
|
138
|
+
})
|
142
139
|
}
|
143
140
|
|
144
141
|
fn flat_and_adapt(query: Box<dyn Query>, prefixed: bool, distance: u8, termc: SharedTermC) -> Vec<QueryP> {
|
@@ -155,16 +152,7 @@ fn flat_and_adapt(query: Box<dyn Query>, prefixed: bool, distance: u8, termc: Sh
|
|
155
152
|
let as_prefix = 1;
|
156
153
|
(queries, as_prefix)
|
157
154
|
};
|
158
|
-
queryp_map(
|
159
|
-
queries,
|
160
|
-
distance,
|
161
|
-
if prefixed {
|
162
|
-
Some(as_prefix)
|
163
|
-
} else {
|
164
|
-
None
|
165
|
-
},
|
166
|
-
termc,
|
167
|
-
)
|
155
|
+
queryp_map(queries, distance, if prefixed { Some(as_prefix) } else { None }, termc)
|
168
156
|
}
|
169
157
|
|
170
158
|
fn fuzzied_queries(query: Box<dyn Query>, prefixed: bool, distance: u8, termc: SharedTermC) -> Vec<QueryP> {
|
@@ -192,9 +180,12 @@ fn parse_query(parser: &QueryParser, text: &str) -> Box<dyn Query> {
|
|
192
180
|
/// The last term of the query is a prefix fuzzy term and must be preserved.
|
193
181
|
fn remove_stop_words(query: &str) -> Cow<'_, str> {
|
194
182
|
match query.rsplit_once(' ') {
|
195
|
-
Some((query, last_term)) =>
|
196
|
-
|
197
|
-
|
183
|
+
Some((query, last_term)) => query
|
184
|
+
.split(' ')
|
185
|
+
.filter(|term| !is_stop_word(&term.to_lowercase()))
|
186
|
+
.chain([last_term])
|
187
|
+
.join(" ")
|
188
|
+
.into(),
|
198
189
|
None => query.into(),
|
199
190
|
}
|
200
191
|
}
|
@@ -222,7 +213,10 @@ fn preprocess_raw_query(query: &str, tc: &mut TermCollector) -> ProcessedQuery {
|
|
222
213
|
let unquote = query[start..qstart].trim();
|
223
214
|
let unquote = remove_stop_words(unquote);
|
224
215
|
|
225
|
-
unquote
|
216
|
+
unquote
|
217
|
+
.split(' ')
|
218
|
+
.filter(|s| !s.is_empty())
|
219
|
+
.for_each(|t| tc.log_eterm(t.to_string()));
|
226
220
|
tc.log_eterm(quote.to_string());
|
227
221
|
|
228
222
|
if !regular_query.is_empty() {
|
@@ -245,7 +239,9 @@ fn preprocess_raw_query(query: &str, tc: &mut TermCollector) -> ProcessedQuery {
|
|
245
239
|
let tail = query[start..].trim();
|
246
240
|
let tail = remove_stop_words(tail);
|
247
241
|
|
248
|
-
tail.split(' ')
|
242
|
+
tail.split(' ')
|
243
|
+
.filter(|s| !s.is_empty())
|
244
|
+
.for_each(|t| tc.log_eterm(t.to_string()));
|
249
245
|
|
250
246
|
if !regular_query.is_empty() {
|
251
247
|
regular_query.push(' ');
|
@@ -270,11 +266,7 @@ fn filter_query(
|
|
270
266
|
filter_or: bool,
|
271
267
|
) -> Option<Box<dyn Query>> {
|
272
268
|
let mut filter_terms = vec![];
|
273
|
-
let operator = if filter_or {
|
274
|
-
Occur::Should
|
275
|
-
} else {
|
276
|
-
Occur::Must
|
277
|
-
};
|
269
|
+
let operator = if filter_or { Occur::Should } else { Occur::Must };
|
278
270
|
|
279
271
|
// Paragraph filter
|
280
272
|
if let Some(formula) = ¶graph_formula {
|
@@ -286,7 +278,9 @@ fn filter_query(
|
|
286
278
|
if let PrefilterResult::Some(field_keys) = prefilter {
|
287
279
|
let set_query = Box::new(SetQuery::new(
|
288
280
|
schema.field_uuid,
|
289
|
-
field_keys
|
281
|
+
field_keys
|
282
|
+
.iter()
|
283
|
+
.map(|x| format!("{}{}", x.resource_id.simple(), x.field_id)),
|
290
284
|
));
|
291
285
|
filter_terms.push((operator, set_query));
|
292
286
|
}
|
@@ -434,8 +428,14 @@ mod tests {
|
|
434
428
|
|
435
429
|
#[test]
|
436
430
|
fn test() {
|
437
|
-
let subqueries0: Vec<_> = vec![dummy_term_query; 12]
|
438
|
-
|
431
|
+
let subqueries0: Vec<_> = vec![dummy_term_query; 12]
|
432
|
+
.into_iter()
|
433
|
+
.map(|f| (Occur::Must, f()))
|
434
|
+
.collect();
|
435
|
+
let subqueries1: Vec<_> = vec![dummy_term_query; 12]
|
436
|
+
.into_iter()
|
437
|
+
.map(|f| (Occur::Must, f()))
|
438
|
+
.collect();
|
439
439
|
let boolean0: Box<dyn Query> = Box::new(BooleanQuery::new(subqueries0));
|
440
440
|
let boolean1: Box<dyn Query> = Box::new(BooleanQuery::new(subqueries1));
|
441
441
|
let nested = BooleanQuery::new(vec![(Occur::Should, boolean0), (Occur::Should, boolean1)]);
|
@@ -448,7 +448,10 @@ mod tests {
|
|
448
448
|
#[test]
|
449
449
|
fn it_removes_stop_word_fterms() {
|
450
450
|
let tests = [
|
451
|
-
(
|
451
|
+
(
|
452
|
+
"nuclia is a database for unstructured data",
|
453
|
+
"nuclia database unstructured data",
|
454
|
+
),
|
452
455
|
(
|
453
456
|
"nuclia is a database for the",
|
454
457
|
// keeps last term even if is a stop word
|
@@ -457,8 +460,14 @@ mod tests {
|
|
457
460
|
("is a for and", "and"),
|
458
461
|
("what does stop is?", "stop is?"),
|
459
462
|
("", ""),
|
460
|
-
(
|
461
|
-
|
463
|
+
(
|
464
|
+
"comment s'appelle le train à grande vitesse",
|
465
|
+
"comment s'appelle train grande vitesse",
|
466
|
+
),
|
467
|
+
(
|
468
|
+
"¿Qué significa la palabra sentence en español?",
|
469
|
+
"¿Qué significa palabra sentence español?",
|
470
|
+
),
|
462
471
|
("Per què les vaques no són de color rosa?", "vaques color rosa?"),
|
463
472
|
("How can I learn to make a flat white?", "learn make flat white?"),
|
464
473
|
("Qué es escalada en bloque?", "escalada bloque?"),
|
@@ -466,7 +475,10 @@ mod tests {
|
|
466
475
|
"Wer hat gesagt: 'Kaffeetrinken ist integraler Bestandteil des Kletterns'?",
|
467
476
|
"Wer gesagt: 'Kaffeetrinken integraler Bestandteil Kletterns'?",
|
468
477
|
),
|
469
|
-
(
|
478
|
+
(
|
479
|
+
"i pistacchi siciliani sono i migliori al mondo?",
|
480
|
+
"pistacchi siciliani migliori mondo?",
|
481
|
+
),
|
470
482
|
];
|
471
483
|
|
472
484
|
for (query, expected_fuzzy_query) in tests {
|
{nidx_binding-6.3.1.post46 → nidx_binding-6.3.1.post48}/nidx_paragraph/src/search_response.rs
RENAMED
@@ -30,7 +30,11 @@ use crate::reader::ParagraphReaderService;
|
|
30
30
|
use crate::search_query::TermCollector;
|
31
31
|
|
32
32
|
pub fn extract_labels<'a>(facets_iterator: impl Iterator<Item = &'a OwnedValue>) -> Vec<String> {
|
33
|
-
facets_iterator
|
33
|
+
facets_iterator
|
34
|
+
.flat_map(|x| x.as_facet())
|
35
|
+
.filter(|x| is_label(x))
|
36
|
+
.map(|x| x.to_path_string())
|
37
|
+
.collect()
|
34
38
|
}
|
35
39
|
|
36
40
|
pub fn is_label(facet: &Facet) -> bool {
|
@@ -56,14 +60,7 @@ pub fn produce_facets(facets: Vec<String>, facets_count: FacetCounts) -> HashMap
|
|
56
60
|
.map(|facet| (&facets_count, facet))
|
57
61
|
.map(|(facets_count, facet)| (facet_count(&facet, facets_count), facet))
|
58
62
|
.filter(|(r, _)| !r.is_empty())
|
59
|
-
.map(|(facetresults, facet)| {
|
60
|
-
(
|
61
|
-
facet,
|
62
|
-
FacetResults {
|
63
|
-
facetresults,
|
64
|
-
},
|
65
|
-
)
|
66
|
-
})
|
63
|
+
.map(|(facetresults, facet)| (facet, FacetResults { facetresults }))
|
67
64
|
.collect()
|
68
65
|
}
|
69
66
|
|
@@ -102,7 +99,10 @@ pub struct SearchFacetsResponse<'a> {
|
|
102
99
|
|
103
100
|
impl<'a> From<SearchFacetsResponse<'a>> for ParagraphSearchResponse {
|
104
101
|
fn from(response: SearchFacetsResponse) -> Self {
|
105
|
-
let facets = response
|
102
|
+
let facets = response
|
103
|
+
.facets_count
|
104
|
+
.map(|count| produce_facets(response.facets, count))
|
105
|
+
.unwrap_or_default();
|
106
106
|
let results: Vec<ParagraphResult> = Vec::with_capacity(0);
|
107
107
|
ParagraphSearchResponse {
|
108
108
|
results,
|
@@ -161,7 +161,12 @@ impl<'a> From<SearchIntResponse<'a>> for ParagraphSearchResponse {
|
|
161
161
|
.unwrap()
|
162
162
|
.to_string();
|
163
163
|
|
164
|
-
let split = doc
|
164
|
+
let split = doc
|
165
|
+
.get_first(schema.split)
|
166
|
+
.unwrap_or(&default_split)
|
167
|
+
.as_str()
|
168
|
+
.unwrap()
|
169
|
+
.to_string();
|
165
170
|
|
166
171
|
let index = doc.get_first(schema.index).unwrap().as_u64().unwrap();
|
167
172
|
let mut terms: Vec<_> = response.termc.get_fterms(doc_address.doc_id).into_iter().collect();
|
@@ -186,7 +191,10 @@ impl<'a> From<SearchIntResponse<'a>> for ParagraphSearchResponse {
|
|
186
191
|
}
|
187
192
|
}
|
188
193
|
|
189
|
-
let facets = response
|
194
|
+
let facets = response
|
195
|
+
.facets_count
|
196
|
+
.map(|count| produce_facets(response.facets, count))
|
197
|
+
.unwrap_or_default();
|
190
198
|
ParagraphSearchResponse {
|
191
199
|
results,
|
192
200
|
facets,
|
@@ -251,7 +259,12 @@ impl<'a> From<SearchBm25Response<'a>> for ParagraphSearchResponse {
|
|
251
259
|
.unwrap()
|
252
260
|
.to_string();
|
253
261
|
|
254
|
-
let split = doc
|
262
|
+
let split = doc
|
263
|
+
.get_first(schema.split)
|
264
|
+
.unwrap_or(&default_split)
|
265
|
+
.as_str()
|
266
|
+
.unwrap()
|
267
|
+
.to_string();
|
255
268
|
|
256
269
|
let index = doc.get_first(schema.index).unwrap().as_u64().unwrap();
|
257
270
|
let mut terms: Vec<_> = response.termc.get_fterms(doc_address.doc_id).into_iter().collect();
|
@@ -276,7 +289,10 @@ impl<'a> From<SearchBm25Response<'a>> for ParagraphSearchResponse {
|
|
276
289
|
}
|
277
290
|
}
|
278
291
|
|
279
|
-
let facets = response
|
292
|
+
let facets = response
|
293
|
+
.facets_count
|
294
|
+
.map(|count| produce_facets(response.facets, count))
|
295
|
+
.unwrap_or_default();
|
280
296
|
ParagraphSearchResponse {
|
281
297
|
results,
|
282
298
|
facets,
|