nidx-binding 6.6.1.post437__tar.gz → 6.6.1.post439__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/Cargo.lock +1 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/PKG-INFO +1 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/pyproject.toml +1 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/Cargo.toml +1 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/config.rs +13 -2
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/mod.rs +59 -20
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/ops_hnsw.rs +13 -7
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/tests.rs +2 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point_provider/reader.rs +89 -9
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v1/store.rs +2 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v1.rs +3 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v2/paragraph_store.rs +1 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v2/vector_store.rs +7 -3
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v2.rs +3 -6
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/formula/mod.rs +6 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/indexer.rs +15 -2
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/lib.rs +2 -1
- nidx_binding-6.6.1.post439/nidx_vector/src/multivector.rs +50 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/tests/test_basic_search.rs +3 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/tests/test_hidden.rs +5 -1
- nidx_binding-6.6.1.post439/nidx_vector/tests/test_maxsim.rs +161 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/pyproject.toml +1 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/import_export.rs +2 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/indexer.rs +2 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata/merge_job.rs +2 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata.rs +2 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler/merge_task.rs +2 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/sync.rs +2 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_shards_api.rs +2 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_synced_searcher.rs +2 -1
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.config/nextest.toml +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-0cfce9b29547f8f5bafa6e440f86103be7b8c4ad2fd92db9ac223f4efbe23d10.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-1a561eed00f3dbe868bf5030059793300209179dc8fb73e4b57a54b5e81262fe.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-1d3fca2682e25a01143da92285297f134a6a105a96f64d87e0db3abb219855e4.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-249b3b57c27a71baa823f1fe0f0bba9c9af36f61c28f731e58beea60ec48e687.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-24cb6b683daa42d7125f862e25943ab4be7bf275cd8739f8da4859d701795e1a.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-263c8fce6db5b03bbd012fafdba6943cbee6ed7eb8976cdef4f5b01dde7ca6fd.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-2a5d92fb1638df830a4477a7cdf24e6db6b43034b7bbe74fdfb63e8afe2c4071.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-2b065a363f58caed60e3706603c1260dbf5a4c795604a5b68edda22eb07fec1b.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-3fc3cb39934683de8cd475ce1368c8373453eb1e01f81587d66b9d14b109ce6e.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-48f33b77b7c1633467b0b2efcaa1d3c207e7757e4f1d83b40d15e6ca365f7771.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-4ae09f2c08e2f324bee01bb8487a8f37678a1c5e9d327339235c50d4921a8949.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-4d7a76fa413c9ef0ce2a47ac7bb7e01d3e6a2aabded9487d21010a53efee8852.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-4fcbdd6657c7dc9b60b3a563dd41711b3dbcf72ce063427b7a01f8cddf34c244.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-577109ac00ccfbd38ecaccab94116f2f46a4caf5612afa372cded197123c1e08.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-5db25f97d8578d6d78f2f6bd4b72cc82a9b1b82805c6422d967ac63b20d99db4.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-5ec3233a3a23e926055056d46bdde17836a633066dbb5f349502648cd3ea9a60.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-66edb6ea424d8681927dcddb6bac5f1239175f4775d1f40417ba15054b0c6f19.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-6f9c6d201c1b5712efb68c363bffd3e0169c11f2a8f925e8cd4e8808599ff7b4.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-733c3ebacc86f444bf5e2dd79ade660c291e88a00fc09b722f6e2e191545874c.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-7a3bf27c330c468a596e8a297cf7d8b192e31e67ecc5177c1267f579e8e247c7.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-7a7e59e47b30b12237511fd3d7da2d17b0471ad2b006af48d6a6f587c779692b.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-7dcbb33312cc9f11ae3a6d73b1ace017a9f19a8bf8f10304fc57977c8efeadff.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-7efa7c0d747afc4b6aed0586ff846c27839c3213ff7ee9f30c89b0d0f17e60e3.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-8493140d788604d498a4e48da4158708572ccc9d60185290a00d549cc84533db.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-8493bb0059b013eaca42fd10cd7d04f0d06a8acaed379eff0d23f3229edde9ee.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-87996b3d6c7a2195438d7038015b06949102bce8c7b8cd8db1f83aaf23cbe489.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-8d33717587c6ee8f5fc339a80b1212a73d6c03e45856b1d55457fc8074709dd0.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-8f096d8171b89f9615d18f95d696dc9e4fb3674e103161a713cdc806f7a68506.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-917732a56ee04bf3a6e127319dda8225210869c82f9828d878162394dba4e078.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-95fe4ef93ee90733db1b67ed7987f80b5aac792f1590b979c68b418d1599eb98.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-9b67658569b343d8b4b61ae0a7dc721f367f2ba33c7b69b9e68bfd5c9bff5206.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-9c8062ea55d070afef68309e58fa987eb37fda44e1efbf68c8ba2af7846cc968.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-a06e1d9f6f95e4c4c2b98310ebddcc9d963cc033582bf2e945e8bf3a301b4247.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-a55265c9b07bd1399961a6f1e757201fd0eebe868ddaf96437111113d80fce92.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-a60ec2f66f1e7b84189e5b089f2087a29ff6a64326a3743dea935bbc58ee77fa.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-a891a37be5c2d7cce775c2dd33726b0318fd3839beab222a1b22bc6174604207.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-a945191bb4b3e37d6823ed3ad499339d007d69983105de8567777d9daf517b28.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-abe9f7832f2bd799ac44008da031e8d8ab52d4f5fbfc2a7e3974e8873bae55b2.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-aca588cca57a85e4d7fcc40c23cd87e57d53d11ca550d78e7e3d5e39e524fcd3.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-b02f8aafc00a7724510772ac41269e368c5bccf03ef7b4590e0ef6fd1a1bf64f.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-b742e17cabe2d64617e9aa64bafc782172f7a4f8023d1b54f952a0fb39f6b2b8.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-b94e349dbc0daec57f8f8f6e9e2dffb06100b1bb2b41d297c9f3b191da37a83d.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-bd9afa22994aba671dbf7b5f89b53c2ee02f53c0442a81265786a6d52d08512f.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-be60554eca98a5899efc6b49785cecd6444a6d39afed9e4a884ce2dbf162012c.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-bf49702b506c9a1650ece1f8e8d9f14834a902f8caefafe30ded55e2790f2188.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-bfcd21ed704cd305db5c17fcdec7d92aa4ac501913c9c9514d8ff92928c0c7e7.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-c3ab694650f49a75b146fb877a92e48c4f20f0d99f70f8ec859fbb763b01a1e5.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-c55542bb9fae544d87fae6f30e0fe8a9088d12075f4442ab4fe2fcd05e472234.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-cb29a6556d35ac630ee0aa885dd7341cf9573bd3efd216ff8a887b87686b03db.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-d0a1f341a89f5f14696b10baa72db9d95551c2b7e5fc67308fd52dc03dd98a92.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-d2ad0a0ca2649c9e4873cfcc1fc66d2d07cc45d0f65c560b06d7b5f592f4fa8a.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-d6cfe78eb635ba0b89ca4021a4dc8182d18ab5b197f30149cd28488eba4c1df5.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-d729b56dea00e49dcdba8cf0001e2811da27351eabe98212db3b589f18fc6f32.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-d9658bfd4e7170b41d03f2ddf2446d0bf54171c0d39d53bf20af2b8437f2ec48.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-dbba7b3d3289425bae711aedbf73fbc3699f857f86f84d95c3b556d05c5658b0.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-dcb96b649d6d63a58efd5d445453a4f3d7869a56ff714b69bedf3d616a0473ca.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-ebd876fbf5362a5900e75bc05f2f11c73c406ef7da4e95097fc6a1c3d1b8bc54.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-eef5cc6bce1cc14eba8f3e68971724ef181e88cffcedd74673615f2026b89a62.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-ef56d5fefc5774040d1ee397beadb475f6af02768c22f0e583c74062e2e821ce.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/Cargo.toml +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/README.md +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/migrations/20241007163501_initial.sql +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/migrations/20241211120039_merge_job_priority.sql +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/migrations/20241211121159_basic_indexes.sql +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/migrations/20241212151105_check_segment_records.sql +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/migrations/20250110145554_in_flight_messages.sql +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_binding/Cargo.toml +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_binding/src/lib.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/Cargo.toml +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/fuzzy_query.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/lib.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/query_io.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/reader.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/request_types.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/resource_indexer.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/schema.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/search_query.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/search_response.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/set_query.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/stop_words.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/README.md +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/ar.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/az.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/bn.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/ca.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/ch.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/da.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/de.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/el.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/en.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/es.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/eu.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/extract.py +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/fi.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/fr.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/he.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/hu.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/id.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/it.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/kk.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/ne.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/nl.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/no.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/pt.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/ro.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/ru.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/sl.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/sv.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/tg.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/tr.json +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/tests/common/mod.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/tests/reader.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/Cargo.toml +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/build.py +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/build.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/nidx.proto +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/nodereader.proto +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/noderesources.proto +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/nodewriter.proto +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/src/lib.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/Cargo.toml +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/graph_collector.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/graph_query_parser.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/io_maps.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/lib.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/reader.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/resource_indexer.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/schema.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/top_unique_n.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/tests/common/mod.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/tests/test_graph_query_parser_search.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/tests/test_graph_search.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/tests/test_writer.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tantivy/Cargo.toml +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tantivy/src/index_reader.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tantivy/src/lib.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tantivy/src/utils.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tests/Cargo.toml +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tests/src/graph.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tests/src/lib.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/Cargo.toml +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/lib.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/prefilter.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/query_io.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/reader.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/request_types.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/resource_indexer.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/schema.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/search_query.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/tests/common/mod.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/tests/test_deletions.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/tests/test_flow.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/tests/test_search.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/tests/test_streaming.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_types/Cargo.toml +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_types/src/lib.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_types/src/prefilter.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_types/src/query_language.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/disk_hnsw.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/params.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/ram_hnsw.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point_provider/mod.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v1/node.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v1/trie.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v1/trie_ram.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_types.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/inverted_index/fst_index.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/inverted_index/map.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/inverted_index.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/query_io.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/request_types.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/utils.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/vector_types/dense_f32.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/vector_types/mod.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/tests/common/mod.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/api/grpc.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/api/shards.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/api.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/control.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/errors.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/grpc_server.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/lib.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/main.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata/deletion.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata/index.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata/index_request.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata/segment.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata/shard.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metrics.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler/audit_task.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler/log_merge.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler/metrics_task.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler/purge_tasks.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler/vector_merge.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/grpc.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/index_cache.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/query_language.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/query_planner.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/shard_search.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/shard_selector.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/shard_suggest.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/streams.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/segment_store.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/settings.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/telemetry/duration_layer.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/telemetry/log_format.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/telemetry/middleware.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/telemetry.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/tool.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/utilization_tracker.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/worker.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/common/mod.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/common/services.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_date_range_search.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_search_filtering.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_search_relations.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_search_sorting.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_searcher_cluster.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_security_search.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_shards.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_suggest.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_vector_normalization.rs +0 -0
- {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_vectorsets.rs +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nidx_binding
|
3
|
-
Version: 6.6.1.
|
3
|
+
Version: 6.6.1.post439
|
4
4
|
Classifier: Programming Language :: Rust
|
5
5
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
6
6
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
@@ -10,7 +10,7 @@ build-backend = "pdm.backend"
|
|
10
10
|
|
11
11
|
[project]
|
12
12
|
name = "nidx_protos"
|
13
|
-
version = "6.6.1.
|
13
|
+
version = "6.6.1.post439"
|
14
14
|
license = "AGPL-3.0-or-later"
|
15
15
|
description = "Protobuf definitions for nucliadb/nidx"
|
16
16
|
authors = [{ name = "Nuclia", email = "nucliadb@nuclia.com" }]
|
@@ -65,9 +65,9 @@ impl VectorType {
|
|
65
65
|
}
|
66
66
|
}
|
67
67
|
|
68
|
-
pub fn dimension(&self) ->
|
68
|
+
pub fn dimension(&self) -> usize {
|
69
69
|
match self {
|
70
|
-
VectorType::DenseF32 { dimension } =>
|
70
|
+
VectorType::DenseF32 { dimension } => *dimension,
|
71
71
|
}
|
72
72
|
}
|
73
73
|
|
@@ -79,6 +79,13 @@ impl VectorType {
|
|
79
79
|
}
|
80
80
|
}
|
81
81
|
|
82
|
+
#[derive(Debug, Default, Serialize, Deserialize, Clone)]
|
83
|
+
pub enum VectorCardinality {
|
84
|
+
#[default]
|
85
|
+
Single,
|
86
|
+
Multi,
|
87
|
+
}
|
88
|
+
|
82
89
|
#[derive(Debug, Serialize, Deserialize, Clone)]
|
83
90
|
pub struct VectorConfig {
|
84
91
|
#[serde(default)]
|
@@ -86,6 +93,8 @@ pub struct VectorConfig {
|
|
86
93
|
#[serde(default)]
|
87
94
|
pub normalize_vectors: bool,
|
88
95
|
pub vector_type: VectorType,
|
96
|
+
#[serde(default)]
|
97
|
+
pub vector_cardinality: VectorCardinality,
|
89
98
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
90
99
|
pub flags: Vec<String>,
|
91
100
|
}
|
@@ -114,11 +123,13 @@ impl TryFrom<VectorIndexConfig> for VectorConfig {
|
|
114
123
|
dimension: dim as usize,
|
115
124
|
},
|
116
125
|
};
|
126
|
+
// TODO: Add support for multivectors. It is incompatible with vector normalization for now
|
117
127
|
Ok(VectorConfig {
|
118
128
|
similarity: proto.similarity().into(),
|
119
129
|
normalize_vectors: proto.normalize_vectors,
|
120
130
|
vector_type,
|
121
131
|
flags: vec![],
|
132
|
+
vector_cardinality: VectorCardinality::Single,
|
122
133
|
})
|
123
134
|
}
|
124
135
|
}
|
@@ -30,7 +30,8 @@ use crate::config::{VectorConfig, flags};
|
|
30
30
|
use crate::data_store::{DataStore, DataStoreV1, DataStoreV2, OpenReason, ParagraphRef, VectorRef};
|
31
31
|
use crate::formula::Formula;
|
32
32
|
use crate::inverted_index::{FilterBitSet, InvertedIndexes, build_indexes};
|
33
|
-
use crate::{ParagraphAddr, VectorErr, VectorR, VectorSegmentMeta, VectorSegmentMetadata};
|
33
|
+
use crate::{ParagraphAddr, VectorAddr, VectorErr, VectorR, VectorSegmentMeta, VectorSegmentMetadata};
|
34
|
+
use core::f32;
|
34
35
|
use disk_hnsw::DiskHnsw;
|
35
36
|
use io::{BufWriter, Write};
|
36
37
|
use memmap2::Mmap;
|
@@ -187,7 +188,7 @@ fn merge_indexes<DS: DataStore + 'static>(
|
|
187
188
|
|
188
189
|
let metadata = VectorSegmentMetadata {
|
189
190
|
path: data_point_path.to_path_buf(),
|
190
|
-
records:
|
191
|
+
records: data_store.stored_paragraph_count(),
|
191
192
|
index_metadata: VectorSegmentMeta {
|
192
193
|
tags: operants[0].metadata.index_metadata.tags.clone(),
|
193
194
|
},
|
@@ -208,12 +209,15 @@ fn merge_indexes<DS: DataStore + 'static>(
|
|
208
209
|
|
209
210
|
pub fn create(path: &Path, elems: Vec<Elem>, config: &VectorConfig, tags: HashSet<String>) -> VectorR<OpenDataPoint> {
|
210
211
|
// Check dimensions
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
212
|
+
let dim = config.vector_type.dimension();
|
213
|
+
for e in &elems {
|
214
|
+
for v in &e.vectors {
|
215
|
+
if v.len() != dim {
|
216
|
+
return Err(crate::VectorErr::InconsistentDimensions {
|
217
|
+
index_config: dim,
|
218
|
+
vector: v.len(),
|
219
|
+
});
|
220
|
+
}
|
217
221
|
}
|
218
222
|
}
|
219
223
|
|
@@ -229,6 +233,12 @@ pub fn create(path: &Path, elems: Vec<Elem>, config: &VectorConfig, tags: HashSe
|
|
229
233
|
tags,
|
230
234
|
)
|
231
235
|
} else {
|
236
|
+
// Double check vector cardinality
|
237
|
+
if elems.iter().any(|e| e.vectors.len() != 1) {
|
238
|
+
return Err(crate::VectorErr::InvalidConfiguration(
|
239
|
+
"DataStore v1 not supported with multi-vectors",
|
240
|
+
));
|
241
|
+
}
|
232
242
|
DataStoreV1::create(path, elems, &config.vector_type)?;
|
233
243
|
create_indexes(
|
234
244
|
path,
|
@@ -280,7 +290,7 @@ fn create_indexes<DS: DataStore + 'static>(
|
|
280
290
|
|
281
291
|
let metadata = VectorSegmentMetadata {
|
282
292
|
path: path.to_path_buf(),
|
283
|
-
records:
|
293
|
+
records: data_store.stored_paragraph_count(),
|
284
294
|
index_metadata: VectorSegmentMeta { tags },
|
285
295
|
};
|
286
296
|
|
@@ -360,7 +370,7 @@ impl<DS: DataStore> DataRetriever for Retriever<'_, DS> {
|
|
360
370
|
#[derive(Clone, Debug)]
|
361
371
|
pub struct Elem {
|
362
372
|
pub key: String,
|
363
|
-
pub
|
373
|
+
pub vectors: Vec<Vec<f32>>,
|
364
374
|
pub metadata: Option<Vec<u8>>,
|
365
375
|
pub labels: Vec<String>,
|
366
376
|
}
|
@@ -370,7 +380,21 @@ impl Elem {
|
|
370
380
|
labels,
|
371
381
|
metadata,
|
372
382
|
key,
|
373
|
-
vector,
|
383
|
+
vectors: vec![vector],
|
384
|
+
}
|
385
|
+
}
|
386
|
+
|
387
|
+
pub fn new_multivector(
|
388
|
+
key: String,
|
389
|
+
vectors: Vec<Vec<f32>>,
|
390
|
+
labels: Vec<String>,
|
391
|
+
metadata: Option<Vec<u8>>,
|
392
|
+
) -> Elem {
|
393
|
+
Elem {
|
394
|
+
labels,
|
395
|
+
metadata,
|
396
|
+
key,
|
397
|
+
vectors,
|
374
398
|
}
|
375
399
|
}
|
376
400
|
}
|
@@ -440,6 +464,10 @@ impl OpenDataPoint {
|
|
440
464
|
self.data_store.get_paragraph(id)
|
441
465
|
}
|
442
466
|
|
467
|
+
pub fn get_vector(&self, id: VectorAddr) -> VectorRef {
|
468
|
+
self.data_store.get_vector(id)
|
469
|
+
}
|
470
|
+
|
443
471
|
pub fn search(
|
444
472
|
&self,
|
445
473
|
query: &[f32],
|
@@ -490,12 +518,20 @@ impl OpenDataPoint {
|
|
490
518
|
let mut scored_results = Vec::new();
|
491
519
|
for paragraph_addr in bitset.iter() {
|
492
520
|
let paragraph = data_store.get_paragraph(paragraph_addr);
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
521
|
+
|
522
|
+
// Only return the best vector match per paragraph
|
523
|
+
let best_vector_score = paragraph
|
524
|
+
.vectors(¶graph_addr)
|
525
|
+
.map(|va| {
|
526
|
+
let address = va.into();
|
527
|
+
let score = retriever.similarity(query_address, address);
|
528
|
+
Cnx(address, score)
|
529
|
+
})
|
530
|
+
.max_by(|v, w| v.1.total_cmp(&w.1))
|
531
|
+
.unwrap();
|
532
|
+
|
533
|
+
if best_vector_score.1 >= min_score {
|
534
|
+
scored_results.push(Reverse(best_vector_score));
|
499
535
|
}
|
500
536
|
}
|
501
537
|
scored_results.sort();
|
@@ -528,7 +564,7 @@ mod test {
|
|
528
564
|
|
529
565
|
use crate::{
|
530
566
|
ParagraphAddr, VectorAddr,
|
531
|
-
config::{Similarity, VectorConfig},
|
567
|
+
config::{Similarity, VectorCardinality, VectorConfig},
|
532
568
|
formula::Formula,
|
533
569
|
vector_types::dense_f32::{dot_similarity, encode_vector},
|
534
570
|
};
|
@@ -612,6 +648,7 @@ mod test {
|
|
612
648
|
vector_type: crate::config::VectorType::DenseF32 { dimension: DIMENSION },
|
613
649
|
normalize_vectors: false,
|
614
650
|
flags: vec![],
|
651
|
+
vector_cardinality: VectorCardinality::Single,
|
615
652
|
};
|
616
653
|
let mut rng = SmallRng::seed_from_u64(1234567890);
|
617
654
|
let temp_dir = tempdir()?;
|
@@ -628,7 +665,7 @@ mod test {
|
|
628
665
|
|
629
666
|
for (i, (elem, mut labels)) in elems.into_iter().enumerate() {
|
630
667
|
let vector = dp.data_store.get_vector(VectorAddr(i as u32));
|
631
|
-
assert_eq!(config.vector_type.encode(&elem.
|
668
|
+
assert_eq!(config.vector_type.encode(&elem.vectors[0]), vector.vector());
|
632
669
|
|
633
670
|
let paragraph = dp.data_store.get_paragraph(ParagraphAddr(i as u32));
|
634
671
|
assert_eq!(elem.key, paragraph.id());
|
@@ -657,6 +694,7 @@ mod test {
|
|
657
694
|
vector_type: crate::config::VectorType::DenseF32 { dimension: DIMENSION },
|
658
695
|
normalize_vectors: false,
|
659
696
|
flags: vec![],
|
697
|
+
vector_cardinality: VectorCardinality::Single,
|
660
698
|
};
|
661
699
|
let mut rng = SmallRng::seed_from_u64(1234567890);
|
662
700
|
|
@@ -684,7 +722,7 @@ mod test {
|
|
684
722
|
|
685
723
|
for (i, (elem, mut labels)) in elems1.into_iter().chain(elems2.into_iter()).enumerate() {
|
686
724
|
let vector = merged_dp.data_store.get_vector(VectorAddr(i as u32));
|
687
|
-
assert_eq!(config.vector_type.encode(&elem.
|
725
|
+
assert_eq!(config.vector_type.encode(&elem.vectors[0]), vector.vector());
|
688
726
|
|
689
727
|
let paragraph = merged_dp.data_store.get_paragraph(ParagraphAddr(i as u32));
|
690
728
|
assert_eq!(elem.key, paragraph.id());
|
@@ -734,6 +772,7 @@ mod test {
|
|
734
772
|
vector_type: crate::config::VectorType::DenseF32 { dimension: DIMENSION },
|
735
773
|
normalize_vectors: false,
|
736
774
|
flags: vec![],
|
775
|
+
vector_cardinality: VectorCardinality::Single,
|
737
776
|
};
|
738
777
|
|
739
778
|
// Create a data point
|
{nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/ops_hnsw.rs
RENAMED
@@ -82,7 +82,7 @@ pub type Neighbours = Vec<(Address, f32)>;
|
|
82
82
|
struct NodeFilter<'a, DR> {
|
83
83
|
retriever: &'a DR,
|
84
84
|
filter: &'a FilterBitSet,
|
85
|
-
|
85
|
+
paragraphs: FxHashSet<ParagraphAddr>,
|
86
86
|
vec_counter: RepCounter<'a>,
|
87
87
|
}
|
88
88
|
|
@@ -93,11 +93,18 @@ impl<DR: DataRetriever> NodeFilter<'_, DR> {
|
|
93
93
|
|
94
94
|
pub fn is_valid(&self, n: Address, score: f32) -> bool {
|
95
95
|
!score.is_nan()
|
96
|
-
//
|
97
|
-
&& !self.
|
98
|
-
//
|
96
|
+
// Reject the candidate if we already have a result for the same paragraph
|
97
|
+
&& !self.paragraphs.contains(&self.retriever.paragraph(n))
|
98
|
+
// Reject the candidate if we already have a result with an identical vector
|
99
99
|
&& self.vec_counter.get(self.retriever.get_vector(n)) == 0
|
100
100
|
}
|
101
|
+
|
102
|
+
/// Adds a result so that further candidates with the same vector
|
103
|
+
/// or paragraph will get rejected.
|
104
|
+
pub fn add_result(&mut self, n: Address) {
|
105
|
+
self.paragraphs.insert(self.retriever.paragraph(n));
|
106
|
+
self.vec_counter.add(self.retriever.get_vector(n));
|
107
|
+
}
|
101
108
|
}
|
102
109
|
|
103
110
|
pub struct HnswOps<'a, DR> {
|
@@ -194,8 +201,7 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
|
|
194
201
|
|
195
202
|
let paragraph_addr = self.retriever.paragraph(candidate);
|
196
203
|
if filter.is_valid(candidate, candidate_similarity) && filter.passes_formula(paragraph_addr) {
|
197
|
-
|
198
|
-
filter.vec_counter.add(candidate_vector);
|
204
|
+
filter.add_result(candidate);
|
199
205
|
results.push((candidate, candidate_similarity));
|
200
206
|
}
|
201
207
|
|
@@ -354,7 +360,7 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
|
|
354
360
|
let filter = NodeFilter {
|
355
361
|
filter: with_filter,
|
356
362
|
retriever: self.retriever,
|
357
|
-
|
363
|
+
paragraphs: Default::default(),
|
358
364
|
vec_counter: RepCounter::new(!with_duplicates),
|
359
365
|
};
|
360
366
|
let layer_zero = hnsw.get_layer(0);
|
{nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/tests.rs
RENAMED
@@ -24,7 +24,7 @@ use std::time::Instant;
|
|
24
24
|
use tempfile::tempdir;
|
25
25
|
|
26
26
|
use crate::VectorR;
|
27
|
-
use crate::config::{Similarity, VectorConfig, flags};
|
27
|
+
use crate::config::{Similarity, VectorCardinality, VectorConfig, flags};
|
28
28
|
use crate::data_point::{self, Elem};
|
29
29
|
use crate::data_store::{DataStoreV1, DataStoreV2};
|
30
30
|
use crate::formula::{AtomClause, Clause, Formula};
|
@@ -34,6 +34,7 @@ const CONFIG: VectorConfig = VectorConfig {
|
|
34
34
|
normalize_vectors: false,
|
35
35
|
vector_type: crate::config::VectorType::DenseF32 { dimension: 178 },
|
36
36
|
flags: vec![],
|
37
|
+
vector_cardinality: VectorCardinality::Single,
|
37
38
|
};
|
38
39
|
|
39
40
|
fn create_query() -> Vec<f32> {
|
@@ -18,10 +18,14 @@
|
|
18
18
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
//
|
20
20
|
|
21
|
+
use crate::ParagraphAddr;
|
22
|
+
use crate::config::VectorCardinality;
|
21
23
|
use crate::data_point::OpenDataPoint;
|
22
24
|
use crate::data_point_provider::SearchRequest;
|
23
25
|
use crate::data_point_provider::VectorConfig;
|
24
26
|
use crate::data_store::ParagraphRef;
|
27
|
+
use crate::multivector::extract_multi_vectors;
|
28
|
+
use crate::multivector::maxsim_similarity;
|
25
29
|
use crate::request_types::VectorSearchRequest;
|
26
30
|
use crate::utils;
|
27
31
|
use crate::{VectorErr, VectorR};
|
@@ -30,6 +34,7 @@ use nidx_protos::prost::*;
|
|
30
34
|
use nidx_protos::{DocumentScored, DocumentVectorIdentifier, SentenceMetadata, VectorSearchResponse};
|
31
35
|
use nidx_types::prefilter::PrefilterResult;
|
32
36
|
use nidx_types::query_language::*;
|
37
|
+
use rayon::prelude::*;
|
33
38
|
use std::cmp::Ordering;
|
34
39
|
use std::collections::HashSet;
|
35
40
|
use std::time::Instant;
|
@@ -39,6 +44,8 @@ use tracing::*;
|
|
39
44
|
pub struct ScoredParagraph<'a> {
|
40
45
|
score: f32,
|
41
46
|
paragraph: ParagraphRef<'a>,
|
47
|
+
address: ParagraphAddr,
|
48
|
+
data_point: &'a OpenDataPoint,
|
42
49
|
}
|
43
50
|
impl Eq for ScoredParagraph<'_> {}
|
44
51
|
impl std::hash::Hash for ScoredParagraph<'_> {
|
@@ -72,8 +79,13 @@ impl PartialEq for ScoredParagraph<'_> {
|
|
72
79
|
}
|
73
80
|
|
74
81
|
impl<'a> ScoredParagraph<'a> {
|
75
|
-
pub fn new(paragraph: ParagraphRef<'a>, score: f32) -> Self {
|
76
|
-
Self {
|
82
|
+
pub fn new(data_point: &'a OpenDataPoint, address: ParagraphAddr, paragraph: ParagraphRef<'a>, score: f32) -> Self {
|
83
|
+
Self {
|
84
|
+
data_point,
|
85
|
+
paragraph,
|
86
|
+
score,
|
87
|
+
address,
|
88
|
+
}
|
77
89
|
}
|
78
90
|
pub fn score(&self) -> f32 {
|
79
91
|
self.score
|
@@ -88,6 +100,12 @@ impl<'a> ScoredParagraph<'a> {
|
|
88
100
|
let metadata = self.paragraph.metadata();
|
89
101
|
(!metadata.is_empty()).then_some(metadata)
|
90
102
|
}
|
103
|
+
pub fn vectors(&self) -> Vec<&[u8]> {
|
104
|
+
self.paragraph
|
105
|
+
.vectors(&self.address)
|
106
|
+
.map(|va| self.data_point.get_vector(va).vector())
|
107
|
+
.collect()
|
108
|
+
}
|
91
109
|
}
|
92
110
|
|
93
111
|
// Fixed-sized sorted collection
|
@@ -251,8 +269,9 @@ impl Reader {
|
|
251
269
|
open_data_point.search(query, filter, with_duplicates, no_results, &self.config, min_score);
|
252
270
|
|
253
271
|
for candidate in partial_solution {
|
254
|
-
let
|
255
|
-
let
|
272
|
+
let addr = candidate.paragraph();
|
273
|
+
let paragraph = open_data_point.get_paragraph(addr);
|
274
|
+
let scored_paragraph = ScoredParagraph::new(open_data_point, addr, paragraph, candidate.score());
|
256
275
|
ffsv.add(scored_paragraph, candidate.vector());
|
257
276
|
}
|
258
277
|
}
|
@@ -293,14 +312,72 @@ impl Reader {
|
|
293
312
|
formula.operator = BooleanOperator::Or;
|
294
313
|
}
|
295
314
|
|
296
|
-
let search_request = (total_to_get, request, formula);
|
297
315
|
let v = time.elapsed().as_millis();
|
298
|
-
|
299
|
-
|
300
|
-
|
316
|
+
let result = match self.config.vector_cardinality {
|
317
|
+
VectorCardinality::Single => {
|
318
|
+
let search_request = (total_to_get, request, formula);
|
319
|
+
debug!("{id:?} - Searching: starts at {v} ms");
|
320
|
+
self._search(&search_request, &request.segment_filtering_formula)?
|
321
|
+
}
|
322
|
+
VectorCardinality::Multi => {
|
323
|
+
let search_vectors = extract_multi_vectors(&request.vector, &self.config.vector_type)?;
|
324
|
+
debug!(
|
325
|
+
"{id:?} - Multi-vector searching: starts at {v} ms with {} requests",
|
326
|
+
search_vectors.len()
|
327
|
+
);
|
328
|
+
let encoded_query = search_vectors
|
329
|
+
.iter()
|
330
|
+
.map(|v| self.config.vector_type.encode(v))
|
331
|
+
.collect::<Vec<_>>();
|
332
|
+
|
333
|
+
// Search for each vector in the query
|
334
|
+
let results = search_vectors
|
335
|
+
.into_par_iter()
|
336
|
+
.map(|v| {
|
337
|
+
let mut subreq = request.clone();
|
338
|
+
|
339
|
+
subreq.vector = v;
|
340
|
+
// We are OK with duplicate individual vectors. We always deduplicate by paragraphs anyway (NodeFilter.paragraphs)
|
341
|
+
subreq.with_duplicates = true;
|
342
|
+
// We don't care about min_score in this first pass, we apply min_score on top of maxsim similarity
|
343
|
+
subreq.min_score = f32::MIN;
|
344
|
+
// Request at least a few vectors, since the rerank may offer different results later
|
345
|
+
let total_to_get = total_to_get.max(10);
|
346
|
+
|
347
|
+
let search_request = (total_to_get, &subreq, formula.clone());
|
348
|
+
self._search(&search_request, &request.segment_filtering_formula)
|
349
|
+
})
|
350
|
+
.collect::<Result<Vec<_>, _>>()?;
|
351
|
+
|
352
|
+
let v = time.elapsed().as_millis();
|
353
|
+
debug!("{id:?} - Multi-vector reranking: starts at {v} ms");
|
354
|
+
|
355
|
+
// Remove duplicates, we only want each paragraph once
|
356
|
+
let mut result_paragraphs = results.into_iter().flatten().collect::<Vec<_>>();
|
357
|
+
result_paragraphs.sort_unstable_by_key(|rp| rp.address);
|
358
|
+
result_paragraphs.dedup_by_key(|rp| rp.address);
|
359
|
+
|
360
|
+
// Score each paragraph using maxsim
|
361
|
+
let similarity_function = self.config.similarity_function();
|
362
|
+
let mut results = result_paragraphs
|
363
|
+
.into_par_iter()
|
364
|
+
.filter_map(|mut sp| {
|
365
|
+
sp.score = maxsim_similarity(similarity_function, &encoded_query, &sp.vectors());
|
366
|
+
(sp.score() > request.min_score).then_some(sp)
|
367
|
+
})
|
368
|
+
.collect::<Vec<_>>();
|
369
|
+
|
370
|
+
// Select top_k
|
371
|
+
results.sort_unstable_by(|a, b| b.score().partial_cmp(&a.score()).unwrap());
|
372
|
+
results.truncate(total_to_get);
|
373
|
+
|
374
|
+
results
|
375
|
+
}
|
376
|
+
};
|
301
377
|
|
302
378
|
let v = time.elapsed().as_millis();
|
303
379
|
debug!("{id:?} - Searching: ends at {v} ms");
|
380
|
+
|
304
381
|
debug!("{id:?} - Creating results: starts at {v} ms");
|
305
382
|
|
306
383
|
let documents = result
|
@@ -335,7 +412,7 @@ mod tests {
|
|
335
412
|
use tempfile::TempDir;
|
336
413
|
|
337
414
|
use super::*;
|
338
|
-
use crate::config::{Similarity, VectorConfig, VectorType};
|
415
|
+
use crate::config::{Similarity, VectorCardinality, VectorConfig, VectorType};
|
339
416
|
use crate::data_point;
|
340
417
|
use crate::indexer::{ResourceWrapper, index_resource};
|
341
418
|
|
@@ -348,6 +425,7 @@ mod tests {
|
|
348
425
|
normalize_vectors: false,
|
349
426
|
vector_type: VectorType::DenseF32 { dimension: 3 },
|
350
427
|
flags: vec![],
|
428
|
+
vector_cardinality: VectorCardinality::Single,
|
351
429
|
};
|
352
430
|
let raw_sentences = [
|
353
431
|
(
|
@@ -444,6 +522,7 @@ mod tests {
|
|
444
522
|
normalize_vectors: false,
|
445
523
|
vector_type: VectorType::DenseF32 { dimension: 3 },
|
446
524
|
flags: vec![],
|
525
|
+
vector_cardinality: VectorCardinality::Single,
|
447
526
|
};
|
448
527
|
let raw_sentences = [
|
449
528
|
(
|
@@ -565,6 +644,7 @@ mod tests {
|
|
565
644
|
normalize_vectors: false,
|
566
645
|
vector_type: VectorType::DenseF32 { dimension: 3 },
|
567
646
|
flags: vec![],
|
647
|
+
vector_cardinality: VectorCardinality::Single,
|
568
648
|
};
|
569
649
|
let raw_sentences = [
|
570
650
|
(
|
{nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v1/store.rs
RENAMED
@@ -211,13 +211,14 @@ pub fn merge(
|
|
211
211
|
#[cfg(test)]
|
212
212
|
mod tests {
|
213
213
|
use super::*;
|
214
|
-
use crate::data_point::Elem;
|
214
|
+
use crate::{config::VectorCardinality, data_point::Elem};
|
215
215
|
|
216
216
|
const VECTOR_CONFIG: VectorConfig = VectorConfig {
|
217
217
|
vector_type: VectorType::DenseF32 { dimension: 3 },
|
218
218
|
similarity: crate::config::Similarity::Dot,
|
219
219
|
normalize_vectors: false,
|
220
220
|
flags: vec![],
|
221
|
+
vector_cardinality: VectorCardinality::Single,
|
221
222
|
};
|
222
223
|
|
223
224
|
#[test]
|
@@ -137,10 +137,12 @@ impl store::IntoBuffer for Elem {
|
|
137
137
|
let ram_trie = trie_ram::create_trie(&self.labels);
|
138
138
|
let trie_bytes = trie::serialize(ram_trie);
|
139
139
|
|
140
|
+
debug_assert!(self.vectors.len() == 1);
|
141
|
+
|
140
142
|
Node::serialize_into(
|
141
143
|
w,
|
142
144
|
self.key,
|
143
|
-
vector_type.encode(&self.
|
145
|
+
vector_type.encode(&self.vectors[0]),
|
144
146
|
vector_type.vector_alignment(),
|
145
147
|
trie_bytes,
|
146
148
|
self.metadata.as_ref(),
|
@@ -133,16 +133,20 @@ impl VectorStoreWriter {
|
|
133
133
|
})
|
134
134
|
}
|
135
135
|
|
136
|
-
pub fn write(
|
136
|
+
pub fn write(
|
137
|
+
&mut self,
|
138
|
+
paragraph_id: u32,
|
139
|
+
vectors: impl Iterator<Item = impl AsRef<[u8]>>,
|
140
|
+
) -> std::io::Result<(u32, u32)> {
|
137
141
|
let first_addr = self.addr;
|
138
142
|
for v in vectors {
|
139
|
-
self.output.write_all(v)?;
|
143
|
+
self.output.write_all(v.as_ref())?;
|
140
144
|
self.output.write_all(paragraph_id.to_le_bytes().as_slice())?;
|
141
145
|
if self.padding_bytes > 0 {
|
142
146
|
self.output.seek(SeekFrom::Current(self.padding_bytes as i64))?;
|
143
147
|
}
|
148
|
+
self.addr += 1;
|
144
149
|
}
|
145
|
-
self.addr += vectors.len() as u32;
|
146
150
|
let last_addr = self.addr - 1;
|
147
151
|
Ok((first_addr, last_addr))
|
148
152
|
}
|
@@ -47,7 +47,7 @@ impl DataStoreV2 {
|
|
47
47
|
let mut vectors = VectorStoreWriter::new(path, vector_type)?;
|
48
48
|
|
49
49
|
for (idx, elem) in (0..).zip(entries.into_iter()) {
|
50
|
-
let (first_vector, _) = vectors.write(idx,
|
50
|
+
let (first_vector, _) = vectors.write(idx, elem.vectors.iter().map(|v| vector_type.encode(v)))?;
|
51
51
|
paragraphs.write(StoredParagraph::from_elem(&elem, first_vector))?;
|
52
52
|
}
|
53
53
|
|
@@ -70,13 +70,10 @@ impl DataStoreV2 {
|
|
70
70
|
for paragraph_addr in alive {
|
71
71
|
// Retrieve paragraph and vectors
|
72
72
|
let paragraph = store.get_paragraph(paragraph_addr);
|
73
|
-
let p_vectors
|
74
|
-
.vectors(¶graph_addr)
|
75
|
-
.map(|v| store.get_vector(v).vector())
|
76
|
-
.collect();
|
73
|
+
let p_vectors = paragraph.vectors(¶graph_addr).map(|v| store.get_vector(v).vector());
|
77
74
|
|
78
75
|
// Write to new store
|
79
|
-
let (first_vector, last_vector) = vectors.write(p_idx, p_vectors
|
76
|
+
let (first_vector, last_vector) = vectors.write(p_idx, p_vectors)?;
|
80
77
|
paragraphs.write_paragraph_ref(paragraph, first_vector, last_vector - first_vector + 1)?;
|
81
78
|
p_idx += 1;
|
82
79
|
}
|
@@ -17,8 +17,9 @@
|
|
17
17
|
// You should have received a copy of the GNU Affero General Public License
|
18
18
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
|
20
|
-
use crate::config::VectorConfig;
|
20
|
+
use crate::config::{VectorCardinality, VectorConfig};
|
21
21
|
use crate::data_point::{self, Elem};
|
22
|
+
use crate::multivector::extract_multi_vectors;
|
22
23
|
use crate::{VectorSegmentMetadata, utils};
|
23
24
|
use nidx_protos::{noderesources, prost::*};
|
24
25
|
use std::collections::HashMap;
|
@@ -124,7 +125,19 @@ pub fn index_resource(
|
|
124
125
|
sentence.vector.clone()
|
125
126
|
};
|
126
127
|
let metadata = sentence.metadata.as_ref().map(|m| m.encode_to_vec());
|
127
|
-
|
128
|
+
|
129
|
+
match config.vector_cardinality {
|
130
|
+
VectorCardinality::Single => elems.push(Elem::new(key, vector, paragraph.labels.clone(), metadata)),
|
131
|
+
VectorCardinality::Multi => {
|
132
|
+
let vectors = extract_multi_vectors(&vector, &config.vector_type)?;
|
133
|
+
elems.push(Elem::new_multivector(
|
134
|
+
key.clone(),
|
135
|
+
vectors,
|
136
|
+
paragraph.labels.clone(),
|
137
|
+
metadata.clone(),
|
138
|
+
));
|
139
|
+
}
|
140
|
+
};
|
128
141
|
}
|
129
142
|
}
|
130
143
|
}
|
@@ -26,6 +26,7 @@ mod data_types;
|
|
26
26
|
mod formula;
|
27
27
|
mod indexer;
|
28
28
|
mod inverted_index;
|
29
|
+
mod multivector;
|
29
30
|
mod query_io;
|
30
31
|
mod request_types;
|
31
32
|
mod utils;
|
@@ -47,7 +48,7 @@ use tracing::instrument;
|
|
47
48
|
pub use indexer::SEGMENT_TAGS;
|
48
49
|
pub use request_types::VectorSearchRequest;
|
49
50
|
|
50
|
-
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq)]
|
51
|
+
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash)]
|
51
52
|
pub struct ParagraphAddr(u32);
|
52
53
|
pub struct VectorAddr(u32);
|
53
54
|
|