nidx-binding 6.6.1.post437__tar.gz → 6.6.1.post439__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/Cargo.lock +1 -0
  2. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/PKG-INFO +1 -1
  3. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/pyproject.toml +1 -1
  4. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/Cargo.toml +1 -0
  5. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/config.rs +13 -2
  6. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/mod.rs +59 -20
  7. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/ops_hnsw.rs +13 -7
  8. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/tests.rs +2 -1
  9. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point_provider/reader.rs +89 -9
  10. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v1/store.rs +2 -1
  11. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v1.rs +3 -1
  12. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v2/paragraph_store.rs +1 -1
  13. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v2/vector_store.rs +7 -3
  14. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v2.rs +3 -6
  15. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/formula/mod.rs +6 -0
  16. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/indexer.rs +15 -2
  17. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/lib.rs +2 -1
  18. nidx_binding-6.6.1.post439/nidx_vector/src/multivector.rs +50 -0
  19. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/tests/test_basic_search.rs +3 -0
  20. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/tests/test_hidden.rs +5 -1
  21. nidx_binding-6.6.1.post439/nidx_vector/tests/test_maxsim.rs +161 -0
  22. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/pyproject.toml +1 -1
  23. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/import_export.rs +2 -0
  24. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/indexer.rs +2 -1
  25. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata/merge_job.rs +2 -1
  26. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata.rs +2 -1
  27. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler/merge_task.rs +2 -1
  28. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/sync.rs +2 -1
  29. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_shards_api.rs +2 -1
  30. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_synced_searcher.rs +2 -1
  31. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.config/nextest.toml +0 -0
  32. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-0cfce9b29547f8f5bafa6e440f86103be7b8c4ad2fd92db9ac223f4efbe23d10.json +0 -0
  33. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-1a561eed00f3dbe868bf5030059793300209179dc8fb73e4b57a54b5e81262fe.json +0 -0
  34. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-1d3fca2682e25a01143da92285297f134a6a105a96f64d87e0db3abb219855e4.json +0 -0
  35. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-249b3b57c27a71baa823f1fe0f0bba9c9af36f61c28f731e58beea60ec48e687.json +0 -0
  36. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-24cb6b683daa42d7125f862e25943ab4be7bf275cd8739f8da4859d701795e1a.json +0 -0
  37. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-263c8fce6db5b03bbd012fafdba6943cbee6ed7eb8976cdef4f5b01dde7ca6fd.json +0 -0
  38. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-2a5d92fb1638df830a4477a7cdf24e6db6b43034b7bbe74fdfb63e8afe2c4071.json +0 -0
  39. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-2b065a363f58caed60e3706603c1260dbf5a4c795604a5b68edda22eb07fec1b.json +0 -0
  40. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-3fc3cb39934683de8cd475ce1368c8373453eb1e01f81587d66b9d14b109ce6e.json +0 -0
  41. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-48f33b77b7c1633467b0b2efcaa1d3c207e7757e4f1d83b40d15e6ca365f7771.json +0 -0
  42. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-4ae09f2c08e2f324bee01bb8487a8f37678a1c5e9d327339235c50d4921a8949.json +0 -0
  43. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-4d7a76fa413c9ef0ce2a47ac7bb7e01d3e6a2aabded9487d21010a53efee8852.json +0 -0
  44. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-4fcbdd6657c7dc9b60b3a563dd41711b3dbcf72ce063427b7a01f8cddf34c244.json +0 -0
  45. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-577109ac00ccfbd38ecaccab94116f2f46a4caf5612afa372cded197123c1e08.json +0 -0
  46. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-5db25f97d8578d6d78f2f6bd4b72cc82a9b1b82805c6422d967ac63b20d99db4.json +0 -0
  47. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-5ec3233a3a23e926055056d46bdde17836a633066dbb5f349502648cd3ea9a60.json +0 -0
  48. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-66edb6ea424d8681927dcddb6bac5f1239175f4775d1f40417ba15054b0c6f19.json +0 -0
  49. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-6f9c6d201c1b5712efb68c363bffd3e0169c11f2a8f925e8cd4e8808599ff7b4.json +0 -0
  50. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-733c3ebacc86f444bf5e2dd79ade660c291e88a00fc09b722f6e2e191545874c.json +0 -0
  51. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-7a3bf27c330c468a596e8a297cf7d8b192e31e67ecc5177c1267f579e8e247c7.json +0 -0
  52. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-7a7e59e47b30b12237511fd3d7da2d17b0471ad2b006af48d6a6f587c779692b.json +0 -0
  53. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-7dcbb33312cc9f11ae3a6d73b1ace017a9f19a8bf8f10304fc57977c8efeadff.json +0 -0
  54. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-7efa7c0d747afc4b6aed0586ff846c27839c3213ff7ee9f30c89b0d0f17e60e3.json +0 -0
  55. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-8493140d788604d498a4e48da4158708572ccc9d60185290a00d549cc84533db.json +0 -0
  56. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-8493bb0059b013eaca42fd10cd7d04f0d06a8acaed379eff0d23f3229edde9ee.json +0 -0
  57. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-87996b3d6c7a2195438d7038015b06949102bce8c7b8cd8db1f83aaf23cbe489.json +0 -0
  58. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-8d33717587c6ee8f5fc339a80b1212a73d6c03e45856b1d55457fc8074709dd0.json +0 -0
  59. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-8f096d8171b89f9615d18f95d696dc9e4fb3674e103161a713cdc806f7a68506.json +0 -0
  60. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-917732a56ee04bf3a6e127319dda8225210869c82f9828d878162394dba4e078.json +0 -0
  61. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-95fe4ef93ee90733db1b67ed7987f80b5aac792f1590b979c68b418d1599eb98.json +0 -0
  62. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-9b67658569b343d8b4b61ae0a7dc721f367f2ba33c7b69b9e68bfd5c9bff5206.json +0 -0
  63. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-9c8062ea55d070afef68309e58fa987eb37fda44e1efbf68c8ba2af7846cc968.json +0 -0
  64. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-a06e1d9f6f95e4c4c2b98310ebddcc9d963cc033582bf2e945e8bf3a301b4247.json +0 -0
  65. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-a55265c9b07bd1399961a6f1e757201fd0eebe868ddaf96437111113d80fce92.json +0 -0
  66. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-a60ec2f66f1e7b84189e5b089f2087a29ff6a64326a3743dea935bbc58ee77fa.json +0 -0
  67. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-a891a37be5c2d7cce775c2dd33726b0318fd3839beab222a1b22bc6174604207.json +0 -0
  68. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-a945191bb4b3e37d6823ed3ad499339d007d69983105de8567777d9daf517b28.json +0 -0
  69. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-abe9f7832f2bd799ac44008da031e8d8ab52d4f5fbfc2a7e3974e8873bae55b2.json +0 -0
  70. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-aca588cca57a85e4d7fcc40c23cd87e57d53d11ca550d78e7e3d5e39e524fcd3.json +0 -0
  71. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-b02f8aafc00a7724510772ac41269e368c5bccf03ef7b4590e0ef6fd1a1bf64f.json +0 -0
  72. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-b742e17cabe2d64617e9aa64bafc782172f7a4f8023d1b54f952a0fb39f6b2b8.json +0 -0
  73. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-b94e349dbc0daec57f8f8f6e9e2dffb06100b1bb2b41d297c9f3b191da37a83d.json +0 -0
  74. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-bd9afa22994aba671dbf7b5f89b53c2ee02f53c0442a81265786a6d52d08512f.json +0 -0
  75. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-be60554eca98a5899efc6b49785cecd6444a6d39afed9e4a884ce2dbf162012c.json +0 -0
  76. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-bf49702b506c9a1650ece1f8e8d9f14834a902f8caefafe30ded55e2790f2188.json +0 -0
  77. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-bfcd21ed704cd305db5c17fcdec7d92aa4ac501913c9c9514d8ff92928c0c7e7.json +0 -0
  78. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-c3ab694650f49a75b146fb877a92e48c4f20f0d99f70f8ec859fbb763b01a1e5.json +0 -0
  79. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-c55542bb9fae544d87fae6f30e0fe8a9088d12075f4442ab4fe2fcd05e472234.json +0 -0
  80. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-cb29a6556d35ac630ee0aa885dd7341cf9573bd3efd216ff8a887b87686b03db.json +0 -0
  81. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-d0a1f341a89f5f14696b10baa72db9d95551c2b7e5fc67308fd52dc03dd98a92.json +0 -0
  82. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-d2ad0a0ca2649c9e4873cfcc1fc66d2d07cc45d0f65c560b06d7b5f592f4fa8a.json +0 -0
  83. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-d6cfe78eb635ba0b89ca4021a4dc8182d18ab5b197f30149cd28488eba4c1df5.json +0 -0
  84. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-d729b56dea00e49dcdba8cf0001e2811da27351eabe98212db3b589f18fc6f32.json +0 -0
  85. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-d9658bfd4e7170b41d03f2ddf2446d0bf54171c0d39d53bf20af2b8437f2ec48.json +0 -0
  86. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-dbba7b3d3289425bae711aedbf73fbc3699f857f86f84d95c3b556d05c5658b0.json +0 -0
  87. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-dcb96b649d6d63a58efd5d445453a4f3d7869a56ff714b69bedf3d616a0473ca.json +0 -0
  88. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-ebd876fbf5362a5900e75bc05f2f11c73c406ef7da4e95097fc6a1c3d1b8bc54.json +0 -0
  89. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-eef5cc6bce1cc14eba8f3e68971724ef181e88cffcedd74673615f2026b89a62.json +0 -0
  90. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/.sqlx/query-ef56d5fefc5774040d1ee397beadb475f6af02768c22f0e583c74062e2e821ce.json +0 -0
  91. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/Cargo.toml +0 -0
  92. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/README.md +0 -0
  93. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/migrations/20241007163501_initial.sql +0 -0
  94. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/migrations/20241211120039_merge_job_priority.sql +0 -0
  95. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/migrations/20241211121159_basic_indexes.sql +0 -0
  96. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/migrations/20241212151105_check_segment_records.sql +0 -0
  97. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/migrations/20250110145554_in_flight_messages.sql +0 -0
  98. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_binding/Cargo.toml +0 -0
  99. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_binding/src/lib.rs +0 -0
  100. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/Cargo.toml +0 -0
  101. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/fuzzy_query.rs +0 -0
  102. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/lib.rs +0 -0
  103. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/query_io.rs +0 -0
  104. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/reader.rs +0 -0
  105. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/request_types.rs +0 -0
  106. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/resource_indexer.rs +0 -0
  107. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/schema.rs +0 -0
  108. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/search_query.rs +0 -0
  109. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/search_response.rs +0 -0
  110. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/set_query.rs +0 -0
  111. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/src/stop_words.rs +0 -0
  112. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/README.md +0 -0
  113. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/ar.json +0 -0
  114. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/az.json +0 -0
  115. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/bn.json +0 -0
  116. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/ca.json +0 -0
  117. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/ch.json +0 -0
  118. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/da.json +0 -0
  119. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/de.json +0 -0
  120. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/el.json +0 -0
  121. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/en.json +0 -0
  122. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/es.json +0 -0
  123. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/eu.json +0 -0
  124. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/extract.py +0 -0
  125. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/fi.json +0 -0
  126. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/fr.json +0 -0
  127. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/he.json +0 -0
  128. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/hu.json +0 -0
  129. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/id.json +0 -0
  130. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/it.json +0 -0
  131. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/kk.json +0 -0
  132. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/ne.json +0 -0
  133. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/nl.json +0 -0
  134. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/no.json +0 -0
  135. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/pt.json +0 -0
  136. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/ro.json +0 -0
  137. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/ru.json +0 -0
  138. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/sl.json +0 -0
  139. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/sv.json +0 -0
  140. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/tg.json +0 -0
  141. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/stop_words/tr.json +0 -0
  142. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/tests/common/mod.rs +0 -0
  143. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_paragraph/tests/reader.rs +0 -0
  144. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/Cargo.toml +0 -0
  145. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/build.py +0 -0
  146. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/build.rs +0 -0
  147. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/nidx.proto +0 -0
  148. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/nodereader.proto +0 -0
  149. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/noderesources.proto +0 -0
  150. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/nodewriter.proto +0 -0
  151. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_protos/src/lib.rs +0 -0
  152. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/Cargo.toml +0 -0
  153. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/graph_collector.rs +0 -0
  154. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/graph_query_parser.rs +0 -0
  155. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/io_maps.rs +0 -0
  156. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/lib.rs +0 -0
  157. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/reader.rs +0 -0
  158. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/resource_indexer.rs +0 -0
  159. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/schema.rs +0 -0
  160. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/src/top_unique_n.rs +0 -0
  161. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/tests/common/mod.rs +0 -0
  162. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/tests/test_graph_query_parser_search.rs +0 -0
  163. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/tests/test_graph_search.rs +0 -0
  164. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_relation/tests/test_writer.rs +0 -0
  165. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tantivy/Cargo.toml +0 -0
  166. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tantivy/src/index_reader.rs +0 -0
  167. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tantivy/src/lib.rs +0 -0
  168. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tantivy/src/utils.rs +0 -0
  169. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tests/Cargo.toml +0 -0
  170. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tests/src/graph.rs +0 -0
  171. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_tests/src/lib.rs +0 -0
  172. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/Cargo.toml +0 -0
  173. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/lib.rs +0 -0
  174. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/prefilter.rs +0 -0
  175. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/query_io.rs +0 -0
  176. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/reader.rs +0 -0
  177. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/request_types.rs +0 -0
  178. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/resource_indexer.rs +0 -0
  179. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/schema.rs +0 -0
  180. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/src/search_query.rs +0 -0
  181. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/tests/common/mod.rs +0 -0
  182. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/tests/test_deletions.rs +0 -0
  183. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/tests/test_flow.rs +0 -0
  184. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/tests/test_search.rs +0 -0
  185. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_text/tests/test_streaming.rs +0 -0
  186. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_types/Cargo.toml +0 -0
  187. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_types/src/lib.rs +0 -0
  188. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_types/src/prefilter.rs +0 -0
  189. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_types/src/query_language.rs +0 -0
  190. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/disk_hnsw.rs +0 -0
  191. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/params.rs +0 -0
  192. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point/ram_hnsw.rs +0 -0
  193. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_point_provider/mod.rs +0 -0
  194. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v1/node.rs +0 -0
  195. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v1/trie.rs +0 -0
  196. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store/v1/trie_ram.rs +0 -0
  197. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_store.rs +0 -0
  198. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/data_types.rs +0 -0
  199. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/inverted_index/fst_index.rs +0 -0
  200. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/inverted_index/map.rs +0 -0
  201. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/inverted_index.rs +0 -0
  202. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/query_io.rs +0 -0
  203. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/request_types.rs +0 -0
  204. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/utils.rs +0 -0
  205. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/vector_types/dense_f32.rs +0 -0
  206. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/src/vector_types/mod.rs +0 -0
  207. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/nidx_vector/tests/common/mod.rs +0 -0
  208. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/api/grpc.rs +0 -0
  209. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/api/shards.rs +0 -0
  210. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/api.rs +0 -0
  211. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/control.rs +0 -0
  212. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/errors.rs +0 -0
  213. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/grpc_server.rs +0 -0
  214. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/lib.rs +0 -0
  215. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/main.rs +0 -0
  216. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata/deletion.rs +0 -0
  217. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata/index.rs +0 -0
  218. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata/index_request.rs +0 -0
  219. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata/segment.rs +0 -0
  220. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metadata/shard.rs +0 -0
  221. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/metrics.rs +0 -0
  222. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler/audit_task.rs +0 -0
  223. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler/log_merge.rs +0 -0
  224. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler/metrics_task.rs +0 -0
  225. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler/purge_tasks.rs +0 -0
  226. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler/vector_merge.rs +0 -0
  227. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/scheduler.rs +0 -0
  228. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/grpc.rs +0 -0
  229. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/index_cache.rs +0 -0
  230. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/query_language.rs +0 -0
  231. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/query_planner.rs +0 -0
  232. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/shard_search.rs +0 -0
  233. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/shard_selector.rs +0 -0
  234. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/shard_suggest.rs +0 -0
  235. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher/streams.rs +0 -0
  236. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/searcher.rs +0 -0
  237. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/segment_store.rs +0 -0
  238. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/settings.rs +0 -0
  239. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/telemetry/duration_layer.rs +0 -0
  240. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/telemetry/log_format.rs +0 -0
  241. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/telemetry/middleware.rs +0 -0
  242. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/telemetry.rs +0 -0
  243. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/tool.rs +0 -0
  244. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/utilization_tracker.rs +0 -0
  245. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/src/worker.rs +0 -0
  246. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/common/mod.rs +0 -0
  247. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/common/services.rs +0 -0
  248. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_date_range_search.rs +0 -0
  249. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_search_filtering.rs +0 -0
  250. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_search_relations.rs +0 -0
  251. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_search_sorting.rs +0 -0
  252. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_searcher_cluster.rs +0 -0
  253. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_security_search.rs +0 -0
  254. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_shards.rs +0 -0
  255. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_suggest.rs +0 -0
  256. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_vector_normalization.rs +0 -0
  257. {nidx_binding-6.6.1.post437 → nidx_binding-6.6.1.post439}/tests/test_vectorsets.rs +0 -0
@@ -2292,6 +2292,7 @@ dependencies = [
2292
2292
  "nidx_protos",
2293
2293
  "nidx_types",
2294
2294
  "rand 0.8.5",
2295
+ "rayon",
2295
2296
  "rstest",
2296
2297
  "serde",
2297
2298
  "simsimd",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nidx_binding
3
- Version: 6.6.1.post437
3
+ Version: 6.6.1.post439
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
@@ -10,7 +10,7 @@ build-backend = "pdm.backend"
10
10
 
11
11
  [project]
12
12
  name = "nidx_protos"
13
- version = "6.6.1.post437"
13
+ version = "6.6.1.post439"
14
14
  license = "AGPL-3.0-or-later"
15
15
  description = "Protobuf definitions for nucliadb/nidx"
16
16
  authors = [{ name = "Nuclia", email = "nucliadb@nuclia.com" }]
@@ -23,6 +23,7 @@ stream-vbyte = "0.4.1"
23
23
  fst = "0.4.7"
24
24
  bit-vec = "0.8.0"
25
25
  bincode = "2.0.1"
26
+ rayon = "1.10.0"
26
27
 
27
28
  [target.'cfg(not(all(target_os="linux",target_arch="aarch64")))'.dependencies]
28
29
  simsimd = "4.3.1"
@@ -65,9 +65,9 @@ impl VectorType {
65
65
  }
66
66
  }
67
67
 
68
- pub fn dimension(&self) -> Option<usize> {
68
+ pub fn dimension(&self) -> usize {
69
69
  match self {
70
- VectorType::DenseF32 { dimension } => Some(*dimension),
70
+ VectorType::DenseF32 { dimension } => *dimension,
71
71
  }
72
72
  }
73
73
 
@@ -79,6 +79,13 @@ impl VectorType {
79
79
  }
80
80
  }
81
81
 
82
+ #[derive(Debug, Default, Serialize, Deserialize, Clone)]
83
+ pub enum VectorCardinality {
84
+ #[default]
85
+ Single,
86
+ Multi,
87
+ }
88
+
82
89
  #[derive(Debug, Serialize, Deserialize, Clone)]
83
90
  pub struct VectorConfig {
84
91
  #[serde(default)]
@@ -86,6 +93,8 @@ pub struct VectorConfig {
86
93
  #[serde(default)]
87
94
  pub normalize_vectors: bool,
88
95
  pub vector_type: VectorType,
96
+ #[serde(default)]
97
+ pub vector_cardinality: VectorCardinality,
89
98
  #[serde(default, skip_serializing_if = "Vec::is_empty")]
90
99
  pub flags: Vec<String>,
91
100
  }
@@ -114,11 +123,13 @@ impl TryFrom<VectorIndexConfig> for VectorConfig {
114
123
  dimension: dim as usize,
115
124
  },
116
125
  };
126
+ // TODO: Add support for multivectors. It is incompatible with vector normalization for now
117
127
  Ok(VectorConfig {
118
128
  similarity: proto.similarity().into(),
119
129
  normalize_vectors: proto.normalize_vectors,
120
130
  vector_type,
121
131
  flags: vec![],
132
+ vector_cardinality: VectorCardinality::Single,
122
133
  })
123
134
  }
124
135
  }
@@ -30,7 +30,8 @@ use crate::config::{VectorConfig, flags};
30
30
  use crate::data_store::{DataStore, DataStoreV1, DataStoreV2, OpenReason, ParagraphRef, VectorRef};
31
31
  use crate::formula::Formula;
32
32
  use crate::inverted_index::{FilterBitSet, InvertedIndexes, build_indexes};
33
- use crate::{ParagraphAddr, VectorErr, VectorR, VectorSegmentMeta, VectorSegmentMetadata};
33
+ use crate::{ParagraphAddr, VectorAddr, VectorErr, VectorR, VectorSegmentMeta, VectorSegmentMetadata};
34
+ use core::f32;
34
35
  use disk_hnsw::DiskHnsw;
35
36
  use io::{BufWriter, Write};
36
37
  use memmap2::Mmap;
@@ -187,7 +188,7 @@ fn merge_indexes<DS: DataStore + 'static>(
187
188
 
188
189
  let metadata = VectorSegmentMetadata {
189
190
  path: data_point_path.to_path_buf(),
190
- records: merged_vectors_count,
191
+ records: data_store.stored_paragraph_count(),
191
192
  index_metadata: VectorSegmentMeta {
192
193
  tags: operants[0].metadata.index_metadata.tags.clone(),
193
194
  },
@@ -208,12 +209,15 @@ fn merge_indexes<DS: DataStore + 'static>(
208
209
 
209
210
  pub fn create(path: &Path, elems: Vec<Elem>, config: &VectorConfig, tags: HashSet<String>) -> VectorR<OpenDataPoint> {
210
211
  // Check dimensions
211
- if let Some(dim) = config.vector_type.dimension() {
212
- if let Some(elem) = elems.iter().find(|elem| elem.vector.len() != dim) {
213
- return Err(crate::VectorErr::InconsistentDimensions {
214
- index_config: dim,
215
- vector: elem.vector.len(),
216
- });
212
+ let dim = config.vector_type.dimension();
213
+ for e in &elems {
214
+ for v in &e.vectors {
215
+ if v.len() != dim {
216
+ return Err(crate::VectorErr::InconsistentDimensions {
217
+ index_config: dim,
218
+ vector: v.len(),
219
+ });
220
+ }
217
221
  }
218
222
  }
219
223
 
@@ -229,6 +233,12 @@ pub fn create(path: &Path, elems: Vec<Elem>, config: &VectorConfig, tags: HashSe
229
233
  tags,
230
234
  )
231
235
  } else {
236
+ // Double check vector cardinality
237
+ if elems.iter().any(|e| e.vectors.len() != 1) {
238
+ return Err(crate::VectorErr::InvalidConfiguration(
239
+ "DataStore v1 not supported with multi-vectors",
240
+ ));
241
+ }
232
242
  DataStoreV1::create(path, elems, &config.vector_type)?;
233
243
  create_indexes(
234
244
  path,
@@ -280,7 +290,7 @@ fn create_indexes<DS: DataStore + 'static>(
280
290
 
281
291
  let metadata = VectorSegmentMetadata {
282
292
  path: path.to_path_buf(),
283
- records: vector_count,
293
+ records: data_store.stored_paragraph_count(),
284
294
  index_metadata: VectorSegmentMeta { tags },
285
295
  };
286
296
 
@@ -360,7 +370,7 @@ impl<DS: DataStore> DataRetriever for Retriever<'_, DS> {
360
370
  #[derive(Clone, Debug)]
361
371
  pub struct Elem {
362
372
  pub key: String,
363
- pub vector: Vec<f32>,
373
+ pub vectors: Vec<Vec<f32>>,
364
374
  pub metadata: Option<Vec<u8>>,
365
375
  pub labels: Vec<String>,
366
376
  }
@@ -370,7 +380,21 @@ impl Elem {
370
380
  labels,
371
381
  metadata,
372
382
  key,
373
- vector,
383
+ vectors: vec![vector],
384
+ }
385
+ }
386
+
387
+ pub fn new_multivector(
388
+ key: String,
389
+ vectors: Vec<Vec<f32>>,
390
+ labels: Vec<String>,
391
+ metadata: Option<Vec<u8>>,
392
+ ) -> Elem {
393
+ Elem {
394
+ labels,
395
+ metadata,
396
+ key,
397
+ vectors,
374
398
  }
375
399
  }
376
400
  }
@@ -440,6 +464,10 @@ impl OpenDataPoint {
440
464
  self.data_store.get_paragraph(id)
441
465
  }
442
466
 
467
+ pub fn get_vector(&self, id: VectorAddr) -> VectorRef {
468
+ self.data_store.get_vector(id)
469
+ }
470
+
443
471
  pub fn search(
444
472
  &self,
445
473
  query: &[f32],
@@ -490,12 +518,20 @@ impl OpenDataPoint {
490
518
  let mut scored_results = Vec::new();
491
519
  for paragraph_addr in bitset.iter() {
492
520
  let paragraph = data_store.get_paragraph(paragraph_addr);
493
- for vector_addr in paragraph.vectors(&paragraph_addr) {
494
- let address = vector_addr.into();
495
- let score = retriever.similarity(query_address, address);
496
- if score >= min_score {
497
- scored_results.push(Reverse(Cnx(address, score)));
498
- }
521
+
522
+ // Only return the best vector match per paragraph
523
+ let best_vector_score = paragraph
524
+ .vectors(&paragraph_addr)
525
+ .map(|va| {
526
+ let address = va.into();
527
+ let score = retriever.similarity(query_address, address);
528
+ Cnx(address, score)
529
+ })
530
+ .max_by(|v, w| v.1.total_cmp(&w.1))
531
+ .unwrap();
532
+
533
+ if best_vector_score.1 >= min_score {
534
+ scored_results.push(Reverse(best_vector_score));
499
535
  }
500
536
  }
501
537
  scored_results.sort();
@@ -528,7 +564,7 @@ mod test {
528
564
 
529
565
  use crate::{
530
566
  ParagraphAddr, VectorAddr,
531
- config::{Similarity, VectorConfig},
567
+ config::{Similarity, VectorCardinality, VectorConfig},
532
568
  formula::Formula,
533
569
  vector_types::dense_f32::{dot_similarity, encode_vector},
534
570
  };
@@ -612,6 +648,7 @@ mod test {
612
648
  vector_type: crate::config::VectorType::DenseF32 { dimension: DIMENSION },
613
649
  normalize_vectors: false,
614
650
  flags: vec![],
651
+ vector_cardinality: VectorCardinality::Single,
615
652
  };
616
653
  let mut rng = SmallRng::seed_from_u64(1234567890);
617
654
  let temp_dir = tempdir()?;
@@ -628,7 +665,7 @@ mod test {
628
665
 
629
666
  for (i, (elem, mut labels)) in elems.into_iter().enumerate() {
630
667
  let vector = dp.data_store.get_vector(VectorAddr(i as u32));
631
- assert_eq!(config.vector_type.encode(&elem.vector), vector.vector());
668
+ assert_eq!(config.vector_type.encode(&elem.vectors[0]), vector.vector());
632
669
 
633
670
  let paragraph = dp.data_store.get_paragraph(ParagraphAddr(i as u32));
634
671
  assert_eq!(elem.key, paragraph.id());
@@ -657,6 +694,7 @@ mod test {
657
694
  vector_type: crate::config::VectorType::DenseF32 { dimension: DIMENSION },
658
695
  normalize_vectors: false,
659
696
  flags: vec![],
697
+ vector_cardinality: VectorCardinality::Single,
660
698
  };
661
699
  let mut rng = SmallRng::seed_from_u64(1234567890);
662
700
 
@@ -684,7 +722,7 @@ mod test {
684
722
 
685
723
  for (i, (elem, mut labels)) in elems1.into_iter().chain(elems2.into_iter()).enumerate() {
686
724
  let vector = merged_dp.data_store.get_vector(VectorAddr(i as u32));
687
- assert_eq!(config.vector_type.encode(&elem.vector), vector.vector());
725
+ assert_eq!(config.vector_type.encode(&elem.vectors[0]), vector.vector());
688
726
 
689
727
  let paragraph = merged_dp.data_store.get_paragraph(ParagraphAddr(i as u32));
690
728
  assert_eq!(elem.key, paragraph.id());
@@ -734,6 +772,7 @@ mod test {
734
772
  vector_type: crate::config::VectorType::DenseF32 { dimension: DIMENSION },
735
773
  normalize_vectors: false,
736
774
  flags: vec![],
775
+ vector_cardinality: VectorCardinality::Single,
737
776
  };
738
777
 
739
778
  // Create a data point
@@ -82,7 +82,7 @@ pub type Neighbours = Vec<(Address, f32)>;
82
82
  struct NodeFilter<'a, DR> {
83
83
  retriever: &'a DR,
84
84
  filter: &'a FilterBitSet,
85
- blocked_addresses: &'a FxHashSet<Address>,
85
+ paragraphs: FxHashSet<ParagraphAddr>,
86
86
  vec_counter: RepCounter<'a>,
87
87
  }
88
88
 
@@ -93,11 +93,18 @@ impl<DR: DataRetriever> NodeFilter<'_, DR> {
93
93
 
94
94
  pub fn is_valid(&self, n: Address, score: f32) -> bool {
95
95
  !score.is_nan()
96
- // The vector is blocked, meaning that its key is part of the current version of the solution
97
- && !self.blocked_addresses.contains(&n)
98
- // The number of times this vector appears is 0
96
+ // Reject the candidate if we already have a result for the same paragraph
97
+ && !self.paragraphs.contains(&self.retriever.paragraph(n))
98
+ // Reject the candidate if we already have a result with an identical vector
99
99
  && self.vec_counter.get(self.retriever.get_vector(n)) == 0
100
100
  }
101
+
102
+ /// Adds a result so that further candidates with the same vector
103
+ /// or paragraph will get rejected.
104
+ pub fn add_result(&mut self, n: Address) {
105
+ self.paragraphs.insert(self.retriever.paragraph(n));
106
+ self.vec_counter.add(self.retriever.get_vector(n));
107
+ }
101
108
  }
102
109
 
103
110
  pub struct HnswOps<'a, DR> {
@@ -194,8 +201,7 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
194
201
 
195
202
  let paragraph_addr = self.retriever.paragraph(candidate);
196
203
  if filter.is_valid(candidate, candidate_similarity) && filter.passes_formula(paragraph_addr) {
197
- let candidate_vector = self.retriever.get_vector(candidate);
198
- filter.vec_counter.add(candidate_vector);
204
+ filter.add_result(candidate);
199
205
  results.push((candidate, candidate_similarity));
200
206
  }
201
207
 
@@ -354,7 +360,7 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
354
360
  let filter = NodeFilter {
355
361
  filter: with_filter,
356
362
  retriever: self.retriever,
357
- blocked_addresses: &Default::default(),
363
+ paragraphs: Default::default(),
358
364
  vec_counter: RepCounter::new(!with_duplicates),
359
365
  };
360
366
  let layer_zero = hnsw.get_layer(0);
@@ -24,7 +24,7 @@ use std::time::Instant;
24
24
  use tempfile::tempdir;
25
25
 
26
26
  use crate::VectorR;
27
- use crate::config::{Similarity, VectorConfig, flags};
27
+ use crate::config::{Similarity, VectorCardinality, VectorConfig, flags};
28
28
  use crate::data_point::{self, Elem};
29
29
  use crate::data_store::{DataStoreV1, DataStoreV2};
30
30
  use crate::formula::{AtomClause, Clause, Formula};
@@ -34,6 +34,7 @@ const CONFIG: VectorConfig = VectorConfig {
34
34
  normalize_vectors: false,
35
35
  vector_type: crate::config::VectorType::DenseF32 { dimension: 178 },
36
36
  flags: vec![],
37
+ vector_cardinality: VectorCardinality::Single,
37
38
  };
38
39
 
39
40
  fn create_query() -> Vec<f32> {
@@ -18,10 +18,14 @@
18
18
  // along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  //
20
20
 
21
+ use crate::ParagraphAddr;
22
+ use crate::config::VectorCardinality;
21
23
  use crate::data_point::OpenDataPoint;
22
24
  use crate::data_point_provider::SearchRequest;
23
25
  use crate::data_point_provider::VectorConfig;
24
26
  use crate::data_store::ParagraphRef;
27
+ use crate::multivector::extract_multi_vectors;
28
+ use crate::multivector::maxsim_similarity;
25
29
  use crate::request_types::VectorSearchRequest;
26
30
  use crate::utils;
27
31
  use crate::{VectorErr, VectorR};
@@ -30,6 +34,7 @@ use nidx_protos::prost::*;
30
34
  use nidx_protos::{DocumentScored, DocumentVectorIdentifier, SentenceMetadata, VectorSearchResponse};
31
35
  use nidx_types::prefilter::PrefilterResult;
32
36
  use nidx_types::query_language::*;
37
+ use rayon::prelude::*;
33
38
  use std::cmp::Ordering;
34
39
  use std::collections::HashSet;
35
40
  use std::time::Instant;
@@ -39,6 +44,8 @@ use tracing::*;
39
44
  pub struct ScoredParagraph<'a> {
40
45
  score: f32,
41
46
  paragraph: ParagraphRef<'a>,
47
+ address: ParagraphAddr,
48
+ data_point: &'a OpenDataPoint,
42
49
  }
43
50
  impl Eq for ScoredParagraph<'_> {}
44
51
  impl std::hash::Hash for ScoredParagraph<'_> {
@@ -72,8 +79,13 @@ impl PartialEq for ScoredParagraph<'_> {
72
79
  }
73
80
 
74
81
  impl<'a> ScoredParagraph<'a> {
75
- pub fn new(paragraph: ParagraphRef<'a>, score: f32) -> Self {
76
- Self { paragraph, score }
82
+ pub fn new(data_point: &'a OpenDataPoint, address: ParagraphAddr, paragraph: ParagraphRef<'a>, score: f32) -> Self {
83
+ Self {
84
+ data_point,
85
+ paragraph,
86
+ score,
87
+ address,
88
+ }
77
89
  }
78
90
  pub fn score(&self) -> f32 {
79
91
  self.score
@@ -88,6 +100,12 @@ impl<'a> ScoredParagraph<'a> {
88
100
  let metadata = self.paragraph.metadata();
89
101
  (!metadata.is_empty()).then_some(metadata)
90
102
  }
103
+ pub fn vectors(&self) -> Vec<&[u8]> {
104
+ self.paragraph
105
+ .vectors(&self.address)
106
+ .map(|va| self.data_point.get_vector(va).vector())
107
+ .collect()
108
+ }
91
109
  }
92
110
 
93
111
  // Fixed-sized sorted collection
@@ -251,8 +269,9 @@ impl Reader {
251
269
  open_data_point.search(query, filter, with_duplicates, no_results, &self.config, min_score);
252
270
 
253
271
  for candidate in partial_solution {
254
- let paragraph = open_data_point.get_paragraph(candidate.paragraph());
255
- let scored_paragraph = ScoredParagraph::new(paragraph, candidate.score());
272
+ let addr = candidate.paragraph();
273
+ let paragraph = open_data_point.get_paragraph(addr);
274
+ let scored_paragraph = ScoredParagraph::new(open_data_point, addr, paragraph, candidate.score());
256
275
  ffsv.add(scored_paragraph, candidate.vector());
257
276
  }
258
277
  }
@@ -293,14 +312,72 @@ impl Reader {
293
312
  formula.operator = BooleanOperator::Or;
294
313
  }
295
314
 
296
- let search_request = (total_to_get, request, formula);
297
315
  let v = time.elapsed().as_millis();
298
- debug!("{id:?} - Searching: starts at {v} ms");
299
-
300
- let result = self._search(&search_request, &request.segment_filtering_formula)?;
316
+ let result = match self.config.vector_cardinality {
317
+ VectorCardinality::Single => {
318
+ let search_request = (total_to_get, request, formula);
319
+ debug!("{id:?} - Searching: starts at {v} ms");
320
+ self._search(&search_request, &request.segment_filtering_formula)?
321
+ }
322
+ VectorCardinality::Multi => {
323
+ let search_vectors = extract_multi_vectors(&request.vector, &self.config.vector_type)?;
324
+ debug!(
325
+ "{id:?} - Multi-vector searching: starts at {v} ms with {} requests",
326
+ search_vectors.len()
327
+ );
328
+ let encoded_query = search_vectors
329
+ .iter()
330
+ .map(|v| self.config.vector_type.encode(v))
331
+ .collect::<Vec<_>>();
332
+
333
+ // Search for each vector in the query
334
+ let results = search_vectors
335
+ .into_par_iter()
336
+ .map(|v| {
337
+ let mut subreq = request.clone();
338
+
339
+ subreq.vector = v;
340
+ // We are OK with duplicate individual vectors. We always deduplicate by paragraphs anyway (NodeFilter.paragraphs)
341
+ subreq.with_duplicates = true;
342
+ // We don't care about min_score in this first pass, we apply min_score on top of maxsim similarity
343
+ subreq.min_score = f32::MIN;
344
+ // Request at least a few vectors, since the rerank may offer different results later
345
+ let total_to_get = total_to_get.max(10);
346
+
347
+ let search_request = (total_to_get, &subreq, formula.clone());
348
+ self._search(&search_request, &request.segment_filtering_formula)
349
+ })
350
+ .collect::<Result<Vec<_>, _>>()?;
351
+
352
+ let v = time.elapsed().as_millis();
353
+ debug!("{id:?} - Multi-vector reranking: starts at {v} ms");
354
+
355
+ // Remove duplicates, we only want each paragraph once
356
+ let mut result_paragraphs = results.into_iter().flatten().collect::<Vec<_>>();
357
+ result_paragraphs.sort_unstable_by_key(|rp| rp.address);
358
+ result_paragraphs.dedup_by_key(|rp| rp.address);
359
+
360
+ // Score each paragraph using maxsim
361
+ let similarity_function = self.config.similarity_function();
362
+ let mut results = result_paragraphs
363
+ .into_par_iter()
364
+ .filter_map(|mut sp| {
365
+ sp.score = maxsim_similarity(similarity_function, &encoded_query, &sp.vectors());
366
+ (sp.score() > request.min_score).then_some(sp)
367
+ })
368
+ .collect::<Vec<_>>();
369
+
370
+ // Select top_k
371
+ results.sort_unstable_by(|a, b| b.score().partial_cmp(&a.score()).unwrap());
372
+ results.truncate(total_to_get);
373
+
374
+ results
375
+ }
376
+ };
301
377
 
302
378
  let v = time.elapsed().as_millis();
303
379
  debug!("{id:?} - Searching: ends at {v} ms");
380
+
304
381
  debug!("{id:?} - Creating results: starts at {v} ms");
305
382
 
306
383
  let documents = result
@@ -335,7 +412,7 @@ mod tests {
335
412
  use tempfile::TempDir;
336
413
 
337
414
  use super::*;
338
- use crate::config::{Similarity, VectorConfig, VectorType};
415
+ use crate::config::{Similarity, VectorCardinality, VectorConfig, VectorType};
339
416
  use crate::data_point;
340
417
  use crate::indexer::{ResourceWrapper, index_resource};
341
418
 
@@ -348,6 +425,7 @@ mod tests {
348
425
  normalize_vectors: false,
349
426
  vector_type: VectorType::DenseF32 { dimension: 3 },
350
427
  flags: vec![],
428
+ vector_cardinality: VectorCardinality::Single,
351
429
  };
352
430
  let raw_sentences = [
353
431
  (
@@ -444,6 +522,7 @@ mod tests {
444
522
  normalize_vectors: false,
445
523
  vector_type: VectorType::DenseF32 { dimension: 3 },
446
524
  flags: vec![],
525
+ vector_cardinality: VectorCardinality::Single,
447
526
  };
448
527
  let raw_sentences = [
449
528
  (
@@ -565,6 +644,7 @@ mod tests {
565
644
  normalize_vectors: false,
566
645
  vector_type: VectorType::DenseF32 { dimension: 3 },
567
646
  flags: vec![],
647
+ vector_cardinality: VectorCardinality::Single,
568
648
  };
569
649
  let raw_sentences = [
570
650
  (
@@ -211,13 +211,14 @@ pub fn merge(
211
211
  #[cfg(test)]
212
212
  mod tests {
213
213
  use super::*;
214
- use crate::data_point::Elem;
214
+ use crate::{config::VectorCardinality, data_point::Elem};
215
215
 
216
216
  const VECTOR_CONFIG: VectorConfig = VectorConfig {
217
217
  vector_type: VectorType::DenseF32 { dimension: 3 },
218
218
  similarity: crate::config::Similarity::Dot,
219
219
  normalize_vectors: false,
220
220
  flags: vec![],
221
+ vector_cardinality: VectorCardinality::Single,
221
222
  };
222
223
 
223
224
  #[test]
@@ -137,10 +137,12 @@ impl store::IntoBuffer for Elem {
137
137
  let ram_trie = trie_ram::create_trie(&self.labels);
138
138
  let trie_bytes = trie::serialize(ram_trie);
139
139
 
140
+ debug_assert!(self.vectors.len() == 1);
141
+
140
142
  Node::serialize_into(
141
143
  w,
142
144
  self.key,
143
- vector_type.encode(&self.vector),
145
+ vector_type.encode(&self.vectors[0]),
144
146
  vector_type.vector_alignment(),
145
147
  trie_bytes,
146
148
  self.metadata.as_ref(),
@@ -59,7 +59,7 @@ impl<'a> StoredParagraph<'a> {
59
59
  labels: elem.labels.iter().map(String::as_str).collect(),
60
60
  metadata: elem.metadata.as_ref().map_or(&[], |x| x),
61
61
  first_vector,
62
- num_vectors: 1,
62
+ num_vectors: elem.vectors.len() as u32,
63
63
  }
64
64
  }
65
65
 
@@ -133,16 +133,20 @@ impl VectorStoreWriter {
133
133
  })
134
134
  }
135
135
 
136
- pub fn write(&mut self, paragraph_id: u32, vectors: &[&[u8]]) -> std::io::Result<(u32, u32)> {
136
+ pub fn write(
137
+ &mut self,
138
+ paragraph_id: u32,
139
+ vectors: impl Iterator<Item = impl AsRef<[u8]>>,
140
+ ) -> std::io::Result<(u32, u32)> {
137
141
  let first_addr = self.addr;
138
142
  for v in vectors {
139
- self.output.write_all(v)?;
143
+ self.output.write_all(v.as_ref())?;
140
144
  self.output.write_all(paragraph_id.to_le_bytes().as_slice())?;
141
145
  if self.padding_bytes > 0 {
142
146
  self.output.seek(SeekFrom::Current(self.padding_bytes as i64))?;
143
147
  }
148
+ self.addr += 1;
144
149
  }
145
- self.addr += vectors.len() as u32;
146
150
  let last_addr = self.addr - 1;
147
151
  Ok((first_addr, last_addr))
148
152
  }
@@ -47,7 +47,7 @@ impl DataStoreV2 {
47
47
  let mut vectors = VectorStoreWriter::new(path, vector_type)?;
48
48
 
49
49
  for (idx, elem) in (0..).zip(entries.into_iter()) {
50
- let (first_vector, _) = vectors.write(idx, &[&vector_type.encode(&elem.vector)])?;
50
+ let (first_vector, _) = vectors.write(idx, elem.vectors.iter().map(|v| vector_type.encode(v)))?;
51
51
  paragraphs.write(StoredParagraph::from_elem(&elem, first_vector))?;
52
52
  }
53
53
 
@@ -70,13 +70,10 @@ impl DataStoreV2 {
70
70
  for paragraph_addr in alive {
71
71
  // Retrieve paragraph and vectors
72
72
  let paragraph = store.get_paragraph(paragraph_addr);
73
- let p_vectors: Vec<_> = paragraph
74
- .vectors(&paragraph_addr)
75
- .map(|v| store.get_vector(v).vector())
76
- .collect();
73
+ let p_vectors = paragraph.vectors(&paragraph_addr).map(|v| store.get_vector(v).vector());
77
74
 
78
75
  // Write to new store
79
- let (first_vector, last_vector) = vectors.write(p_idx, p_vectors.as_slice())?;
76
+ let (first_vector, last_vector) = vectors.write(p_idx, p_vectors)?;
80
77
  paragraphs.write_paragraph_ref(paragraph, first_vector, last_vector - first_vector + 1)?;
81
78
  p_idx += 1;
82
79
  }
@@ -96,3 +96,9 @@ impl Formula {
96
96
  self.clauses.push(clause.into())
97
97
  }
98
98
  }
99
+
100
+ impl Default for Formula {
101
+ fn default() -> Self {
102
+ Self::new()
103
+ }
104
+ }
@@ -17,8 +17,9 @@
17
17
  // You should have received a copy of the GNU Affero General Public License
18
18
  // along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
 
20
- use crate::config::VectorConfig;
20
+ use crate::config::{VectorCardinality, VectorConfig};
21
21
  use crate::data_point::{self, Elem};
22
+ use crate::multivector::extract_multi_vectors;
22
23
  use crate::{VectorSegmentMetadata, utils};
23
24
  use nidx_protos::{noderesources, prost::*};
24
25
  use std::collections::HashMap;
@@ -124,7 +125,19 @@ pub fn index_resource(
124
125
  sentence.vector.clone()
125
126
  };
126
127
  let metadata = sentence.metadata.as_ref().map(|m| m.encode_to_vec());
127
- elems.push(Elem::new(key, vector, paragraph.labels.clone(), metadata));
128
+
129
+ match config.vector_cardinality {
130
+ VectorCardinality::Single => elems.push(Elem::new(key, vector, paragraph.labels.clone(), metadata)),
131
+ VectorCardinality::Multi => {
132
+ let vectors = extract_multi_vectors(&vector, &config.vector_type)?;
133
+ elems.push(Elem::new_multivector(
134
+ key.clone(),
135
+ vectors,
136
+ paragraph.labels.clone(),
137
+ metadata.clone(),
138
+ ));
139
+ }
140
+ };
128
141
  }
129
142
  }
130
143
  }
@@ -26,6 +26,7 @@ mod data_types;
26
26
  mod formula;
27
27
  mod indexer;
28
28
  mod inverted_index;
29
+ mod multivector;
29
30
  mod query_io;
30
31
  mod request_types;
31
32
  mod utils;
@@ -47,7 +48,7 @@ use tracing::instrument;
47
48
  pub use indexer::SEGMENT_TAGS;
48
49
  pub use request_types::VectorSearchRequest;
49
50
 
50
- #[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq)]
51
+ #[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash)]
51
52
  pub struct ParagraphAddr(u32);
52
53
  pub struct VectorAddr(u32);
53
54