nidx-binding 6.9.0.post534__tar.gz → 6.9.0.post567__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/PKG-INFO +1 -1
  2. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_protos/pyproject.toml +1 -1
  3. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/Cargo.toml +0 -2
  4. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/config.rs +13 -0
  5. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/data_store/v1.rs +12 -0
  6. nidx_binding-6.9.0.post567/nidx_vector/src/data_store/v2/quant_vector_store.rs +106 -0
  7. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/data_store/v2.rs +67 -6
  8. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/data_store.rs +4 -1
  9. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/hnsw/ops_hnsw.rs +63 -17
  10. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/hnsw.rs +1 -0
  11. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/lib.rs +4 -4
  12. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/segment/tests.rs +7 -4
  13. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/segment.rs +115 -67
  14. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/vector_types/dense_f32.rs +11 -51
  15. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/vector_types/mod.rs +1 -0
  16. nidx_binding-6.9.0.post567/nidx_vector/src/vector_types/rabitq.rs +271 -0
  17. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/tests/test_basic_search.rs +8 -8
  18. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/pyproject.toml +1 -1
  19. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.config/nextest.toml +0 -0
  20. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-0cfce9b29547f8f5bafa6e440f86103be7b8c4ad2fd92db9ac223f4efbe23d10.json +0 -0
  21. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-1a561eed00f3dbe868bf5030059793300209179dc8fb73e4b57a54b5e81262fe.json +0 -0
  22. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-1d3fca2682e25a01143da92285297f134a6a105a96f64d87e0db3abb219855e4.json +0 -0
  23. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-249b3b57c27a71baa823f1fe0f0bba9c9af36f61c28f731e58beea60ec48e687.json +0 -0
  24. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-24cb6b683daa42d7125f862e25943ab4be7bf275cd8739f8da4859d701795e1a.json +0 -0
  25. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-263c8fce6db5b03bbd012fafdba6943cbee6ed7eb8976cdef4f5b01dde7ca6fd.json +0 -0
  26. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-2a5d92fb1638df830a4477a7cdf24e6db6b43034b7bbe74fdfb63e8afe2c4071.json +0 -0
  27. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-2b065a363f58caed60e3706603c1260dbf5a4c795604a5b68edda22eb07fec1b.json +0 -0
  28. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-3fc3cb39934683de8cd475ce1368c8373453eb1e01f81587d66b9d14b109ce6e.json +0 -0
  29. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-48f33b77b7c1633467b0b2efcaa1d3c207e7757e4f1d83b40d15e6ca365f7771.json +0 -0
  30. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-4ae09f2c08e2f324bee01bb8487a8f37678a1c5e9d327339235c50d4921a8949.json +0 -0
  31. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-4d7a76fa413c9ef0ce2a47ac7bb7e01d3e6a2aabded9487d21010a53efee8852.json +0 -0
  32. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-4fcbdd6657c7dc9b60b3a563dd41711b3dbcf72ce063427b7a01f8cddf34c244.json +0 -0
  33. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-577109ac00ccfbd38ecaccab94116f2f46a4caf5612afa372cded197123c1e08.json +0 -0
  34. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-5db25f97d8578d6d78f2f6bd4b72cc82a9b1b82805c6422d967ac63b20d99db4.json +0 -0
  35. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-5ec3233a3a23e926055056d46bdde17836a633066dbb5f349502648cd3ea9a60.json +0 -0
  36. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-66edb6ea424d8681927dcddb6bac5f1239175f4775d1f40417ba15054b0c6f19.json +0 -0
  37. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-6f9c6d201c1b5712efb68c363bffd3e0169c11f2a8f925e8cd4e8808599ff7b4.json +0 -0
  38. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-733c3ebacc86f444bf5e2dd79ade660c291e88a00fc09b722f6e2e191545874c.json +0 -0
  39. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-7a3bf27c330c468a596e8a297cf7d8b192e31e67ecc5177c1267f579e8e247c7.json +0 -0
  40. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-7a7e59e47b30b12237511fd3d7da2d17b0471ad2b006af48d6a6f587c779692b.json +0 -0
  41. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-7dcbb33312cc9f11ae3a6d73b1ace017a9f19a8bf8f10304fc57977c8efeadff.json +0 -0
  42. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-7efa7c0d747afc4b6aed0586ff846c27839c3213ff7ee9f30c89b0d0f17e60e3.json +0 -0
  43. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-8493140d788604d498a4e48da4158708572ccc9d60185290a00d549cc84533db.json +0 -0
  44. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-8493bb0059b013eaca42fd10cd7d04f0d06a8acaed379eff0d23f3229edde9ee.json +0 -0
  45. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-87996b3d6c7a2195438d7038015b06949102bce8c7b8cd8db1f83aaf23cbe489.json +0 -0
  46. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-8d33717587c6ee8f5fc339a80b1212a73d6c03e45856b1d55457fc8074709dd0.json +0 -0
  47. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-8f096d8171b89f9615d18f95d696dc9e4fb3674e103161a713cdc806f7a68506.json +0 -0
  48. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-917732a56ee04bf3a6e127319dda8225210869c82f9828d878162394dba4e078.json +0 -0
  49. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-95fe4ef93ee90733db1b67ed7987f80b5aac792f1590b979c68b418d1599eb98.json +0 -0
  50. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-9b67658569b343d8b4b61ae0a7dc721f367f2ba33c7b69b9e68bfd5c9bff5206.json +0 -0
  51. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-9c8062ea55d070afef68309e58fa987eb37fda44e1efbf68c8ba2af7846cc968.json +0 -0
  52. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-a06e1d9f6f95e4c4c2b98310ebddcc9d963cc033582bf2e945e8bf3a301b4247.json +0 -0
  53. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-a55265c9b07bd1399961a6f1e757201fd0eebe868ddaf96437111113d80fce92.json +0 -0
  54. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-a60ec2f66f1e7b84189e5b089f2087a29ff6a64326a3743dea935bbc58ee77fa.json +0 -0
  55. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-a891a37be5c2d7cce775c2dd33726b0318fd3839beab222a1b22bc6174604207.json +0 -0
  56. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-a945191bb4b3e37d6823ed3ad499339d007d69983105de8567777d9daf517b28.json +0 -0
  57. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-abe9f7832f2bd799ac44008da031e8d8ab52d4f5fbfc2a7e3974e8873bae55b2.json +0 -0
  58. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-aca588cca57a85e4d7fcc40c23cd87e57d53d11ca550d78e7e3d5e39e524fcd3.json +0 -0
  59. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-b02f8aafc00a7724510772ac41269e368c5bccf03ef7b4590e0ef6fd1a1bf64f.json +0 -0
  60. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-b742e17cabe2d64617e9aa64bafc782172f7a4f8023d1b54f952a0fb39f6b2b8.json +0 -0
  61. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-b94e349dbc0daec57f8f8f6e9e2dffb06100b1bb2b41d297c9f3b191da37a83d.json +0 -0
  62. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-bd9afa22994aba671dbf7b5f89b53c2ee02f53c0442a81265786a6d52d08512f.json +0 -0
  63. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-be60554eca98a5899efc6b49785cecd6444a6d39afed9e4a884ce2dbf162012c.json +0 -0
  64. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-bf49702b506c9a1650ece1f8e8d9f14834a902f8caefafe30ded55e2790f2188.json +0 -0
  65. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-bfcd21ed704cd305db5c17fcdec7d92aa4ac501913c9c9514d8ff92928c0c7e7.json +0 -0
  66. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-c3ab694650f49a75b146fb877a92e48c4f20f0d99f70f8ec859fbb763b01a1e5.json +0 -0
  67. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-c55542bb9fae544d87fae6f30e0fe8a9088d12075f4442ab4fe2fcd05e472234.json +0 -0
  68. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-cb29a6556d35ac630ee0aa885dd7341cf9573bd3efd216ff8a887b87686b03db.json +0 -0
  69. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-d0a1f341a89f5f14696b10baa72db9d95551c2b7e5fc67308fd52dc03dd98a92.json +0 -0
  70. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-d2ad0a0ca2649c9e4873cfcc1fc66d2d07cc45d0f65c560b06d7b5f592f4fa8a.json +0 -0
  71. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-d6cfe78eb635ba0b89ca4021a4dc8182d18ab5b197f30149cd28488eba4c1df5.json +0 -0
  72. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-d729b56dea00e49dcdba8cf0001e2811da27351eabe98212db3b589f18fc6f32.json +0 -0
  73. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-d9658bfd4e7170b41d03f2ddf2446d0bf54171c0d39d53bf20af2b8437f2ec48.json +0 -0
  74. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-dbba7b3d3289425bae711aedbf73fbc3699f857f86f84d95c3b556d05c5658b0.json +0 -0
  75. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-dcb96b649d6d63a58efd5d445453a4f3d7869a56ff714b69bedf3d616a0473ca.json +0 -0
  76. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-ebd876fbf5362a5900e75bc05f2f11c73c406ef7da4e95097fc6a1c3d1b8bc54.json +0 -0
  77. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-eef5cc6bce1cc14eba8f3e68971724ef181e88cffcedd74673615f2026b89a62.json +0 -0
  78. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/.sqlx/query-ef56d5fefc5774040d1ee397beadb475f6af02768c22f0e583c74062e2e821ce.json +0 -0
  79. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/Cargo.lock +0 -0
  80. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/Cargo.toml +0 -0
  81. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/README.md +0 -0
  82. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/migrations/20241007163501_initial.sql +0 -0
  83. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/migrations/20241211120039_merge_job_priority.sql +0 -0
  84. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/migrations/20241211121159_basic_indexes.sql +0 -0
  85. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/migrations/20241212151105_check_segment_records.sql +0 -0
  86. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/migrations/20250110145554_in_flight_messages.sql +0 -0
  87. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_binding/Cargo.toml +0 -0
  88. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_binding/src/lib.rs +0 -0
  89. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/Cargo.toml +0 -0
  90. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/fuzzy_query.rs +0 -0
  91. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/lib.rs +0 -0
  92. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/query_io.rs +0 -0
  93. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/query_parser/fuzzy_parser.rs +0 -0
  94. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/query_parser/keyword_parser.rs +0 -0
  95. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/query_parser/stop_words.rs +0 -0
  96. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/query_parser/tokenizer.rs +0 -0
  97. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/query_parser.rs +0 -0
  98. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/reader.rs +0 -0
  99. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/request_types.rs +0 -0
  100. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/resource_indexer.rs +0 -0
  101. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/schema.rs +0 -0
  102. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/search_query.rs +0 -0
  103. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/search_response.rs +0 -0
  104. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/src/set_query.rs +0 -0
  105. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/README.md +0 -0
  106. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/ar.json +0 -0
  107. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/az.json +0 -0
  108. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/bn.json +0 -0
  109. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/ca.json +0 -0
  110. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/ch.json +0 -0
  111. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/da.json +0 -0
  112. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/de.json +0 -0
  113. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/el.json +0 -0
  114. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/en.json +0 -0
  115. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/es.json +0 -0
  116. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/eu.json +0 -0
  117. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/extract.py +0 -0
  118. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/fi.json +0 -0
  119. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/fr.json +0 -0
  120. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/he.json +0 -0
  121. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/hu.json +0 -0
  122. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/id.json +0 -0
  123. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/it.json +0 -0
  124. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/kk.json +0 -0
  125. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/ne.json +0 -0
  126. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/nl.json +0 -0
  127. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/no.json +0 -0
  128. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/pt.json +0 -0
  129. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/ro.json +0 -0
  130. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/ru.json +0 -0
  131. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/sl.json +0 -0
  132. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/sv.json +0 -0
  133. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/tg.json +0 -0
  134. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/stop_words/tr.json +0 -0
  135. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/tests/common/mod.rs +0 -0
  136. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_paragraph/tests/reader.rs +0 -0
  137. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_protos/Cargo.toml +0 -0
  138. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_protos/build.py +0 -0
  139. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_protos/build.rs +0 -0
  140. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_protos/nidx.proto +0 -0
  141. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_protos/nodereader.proto +0 -0
  142. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_protos/noderesources.proto +0 -0
  143. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_protos/nodewriter.proto +0 -0
  144. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_protos/src/lib.rs +0 -0
  145. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/Cargo.toml +0 -0
  146. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/src/graph_collector.rs +0 -0
  147. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/src/graph_query_parser.rs +0 -0
  148. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/src/io_maps.rs +0 -0
  149. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/src/lib.rs +0 -0
  150. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/src/reader.rs +0 -0
  151. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/src/resource_indexer.rs +0 -0
  152. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/src/schema.rs +0 -0
  153. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/src/top_unique_n.rs +0 -0
  154. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/tests/common/mod.rs +0 -0
  155. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/tests/test_graph_query_parser_search.rs +0 -0
  156. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/tests/test_graph_search.rs +0 -0
  157. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_relation/tests/test_writer.rs +0 -0
  158. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_tantivy/Cargo.toml +0 -0
  159. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_tantivy/src/index_reader.rs +0 -0
  160. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_tantivy/src/lib.rs +0 -0
  161. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_tantivy/src/utils.rs +0 -0
  162. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_tests/Cargo.toml +0 -0
  163. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_tests/src/graph.rs +0 -0
  164. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_tests/src/lib.rs +0 -0
  165. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/Cargo.toml +0 -0
  166. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/src/lib.rs +0 -0
  167. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/src/prefilter.rs +0 -0
  168. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/src/query_io.rs +0 -0
  169. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/src/reader.rs +0 -0
  170. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/src/request_types.rs +0 -0
  171. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/src/resource_indexer.rs +0 -0
  172. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/src/schema.rs +0 -0
  173. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/src/search_query.rs +0 -0
  174. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/tests/common/mod.rs +0 -0
  175. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/tests/test_deletions.rs +0 -0
  176. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/tests/test_flow.rs +0 -0
  177. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/tests/test_search.rs +0 -0
  178. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_text/tests/test_streaming.rs +0 -0
  179. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_types/Cargo.toml +0 -0
  180. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_types/src/lib.rs +0 -0
  181. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_types/src/prefilter.rs +0 -0
  182. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_types/src/query_language.rs +0 -0
  183. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/data_store/v1/node.rs +0 -0
  184. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/data_store/v1/store.rs +0 -0
  185. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/data_store/v1/trie.rs +0 -0
  186. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/data_store/v1/trie_ram.rs +0 -0
  187. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/data_store/v2/paragraph_store.rs +0 -0
  188. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/data_store/v2/vector_store.rs +0 -0
  189. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/data_types.rs +0 -0
  190. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/formula.rs +0 -0
  191. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/hnsw/disk_hnsw.rs +0 -0
  192. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/hnsw/params.rs +0 -0
  193. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/hnsw/ram_hnsw.rs +0 -0
  194. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/indexer.rs +0 -0
  195. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/inverted_index/fst_index.rs +0 -0
  196. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/inverted_index/map.rs +0 -0
  197. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/inverted_index.rs +0 -0
  198. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/multivector.rs +0 -0
  199. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/query_io.rs +0 -0
  200. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/request_types.rs +0 -0
  201. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/searcher.rs +0 -0
  202. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/src/utils.rs +0 -0
  203. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/tests/common/mod.rs +0 -0
  204. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/tests/test_hidden.rs +0 -0
  205. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/nidx_vector/tests/test_maxsim.rs +0 -0
  206. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/api/grpc.rs +0 -0
  207. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/api/shards.rs +0 -0
  208. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/api.rs +0 -0
  209. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/control.rs +0 -0
  210. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/errors.rs +0 -0
  211. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/grpc_server.rs +0 -0
  212. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/import_export.rs +0 -0
  213. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/indexer.rs +0 -0
  214. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/lib.rs +0 -0
  215. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/main.rs +0 -0
  216. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/metadata/deletion.rs +0 -0
  217. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/metadata/index.rs +0 -0
  218. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/metadata/index_request.rs +0 -0
  219. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/metadata/merge_job.rs +0 -0
  220. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/metadata/segment.rs +0 -0
  221. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/metadata/shard.rs +0 -0
  222. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/metadata.rs +0 -0
  223. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/metrics.rs +0 -0
  224. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/scheduler/audit_task.rs +0 -0
  225. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/scheduler/log_merge.rs +0 -0
  226. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/scheduler/merge_task.rs +0 -0
  227. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/scheduler/metrics_task.rs +0 -0
  228. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/scheduler/purge_tasks.rs +0 -0
  229. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/scheduler/vector_merge.rs +0 -0
  230. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/scheduler.rs +0 -0
  231. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/searcher/grpc.rs +0 -0
  232. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/searcher/index_cache.rs +0 -0
  233. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/searcher/query_language.rs +0 -0
  234. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/searcher/query_planner.rs +0 -0
  235. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/searcher/shard_search.rs +0 -0
  236. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/searcher/shard_selector.rs +0 -0
  237. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/searcher/shard_suggest.rs +0 -0
  238. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/searcher/streams.rs +0 -0
  239. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/searcher/sync.rs +0 -0
  240. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/searcher.rs +0 -0
  241. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/segment_store.rs +0 -0
  242. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/settings.rs +0 -0
  243. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/telemetry/duration_layer.rs +0 -0
  244. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/telemetry/log_format.rs +0 -0
  245. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/telemetry/middleware.rs +0 -0
  246. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/telemetry.rs +0 -0
  247. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/tool.rs +0 -0
  248. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/utilization_tracker.rs +0 -0
  249. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/src/worker.rs +0 -0
  250. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/common/mod.rs +0 -0
  251. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/common/services.rs +0 -0
  252. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/test_date_range_search.rs +0 -0
  253. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/test_search_filtering.rs +0 -0
  254. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/test_search_relations.rs +0 -0
  255. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/test_search_sorting.rs +0 -0
  256. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/test_searcher_cluster.rs +0 -0
  257. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/test_security_search.rs +0 -0
  258. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/test_shards.rs +0 -0
  259. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/test_shards_api.rs +0 -0
  260. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/test_suggest.rs +0 -0
  261. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/test_synced_searcher.rs +0 -0
  262. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/test_vector_normalization.rs +0 -0
  263. {nidx_binding-6.9.0.post534 → nidx_binding-6.9.0.post567}/tests/test_vectorsets.rs +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nidx_binding
3
- Version: 6.9.0.post534
3
+ Version: 6.9.0.post567
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
@@ -10,7 +10,7 @@ build-backend = "pdm.backend"
10
10
 
11
11
  [project]
12
12
  name = "nidx_protos"
13
- version = "6.9.0.post534"
13
+ version = "6.9.0.post567"
14
14
  license = "AGPL-3.0-or-later"
15
15
  description = "Protobuf definitions for nucliadb/nidx"
16
16
  authors = [{ name = "Nuclia", email = "nucliadb@nuclia.com" }]
@@ -24,8 +24,6 @@ fst = "0.4.7"
24
24
  bit-vec = "0.8.0"
25
25
  bincode = "2.0.1"
26
26
  rayon = "1.10.0"
27
-
28
- [target.'cfg(not(all(target_os="linux",target_arch="aarch64")))'.dependencies]
29
27
  simsimd = "4.3.1"
30
28
 
31
29
  [dev-dependencies]
@@ -29,6 +29,8 @@ use crate::vector_types::*;
29
29
  pub mod flags {
30
30
  // pub const DATA_STORE_V2: &str = "data_store_v2";
31
31
  pub const FORCE_DATA_STORE_V1: &str = "force_data_store_v1"; // For testing of v1+v2 merges
32
+
33
+ pub const DISABLE_RABITQ_SEARCH: &str = "disable_rabitq_search"; // Do not use RaBitQ vectors in search
32
34
  }
33
35
 
34
36
  #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize)]
@@ -60,6 +62,12 @@ impl VectorType {
60
62
  }
61
63
  }
62
64
 
65
+ pub fn decode<'a>(&self, data: &'a [u8]) -> &'a [f32] {
66
+ match self {
67
+ VectorType::DenseF32 { .. } => dense_f32::decode_vector(data),
68
+ }
69
+ }
70
+
63
71
  pub fn vector_alignment(&self) -> usize {
64
72
  match self {
65
73
  VectorType::DenseF32 { .. } => size_of::<f32>(),
@@ -107,6 +115,11 @@ impl VectorConfig {
107
115
  (Similarity::Cosine, VectorType::DenseF32 { .. }) => dense_f32::cosine_similarity,
108
116
  }
109
117
  }
118
+
119
+ pub fn quantizable_vectors(&self) -> bool {
120
+ matches!(self.similarity, Similarity::Dot)
121
+ && matches!(&self.vector_type, VectorType::DenseF32 { dimension } if dimension.is_multiple_of(64))
122
+ }
110
123
  }
111
124
 
112
125
  impl TryFrom<VectorIndexConfig> for VectorConfig {
@@ -21,6 +21,7 @@
21
21
  use crate::{
22
22
  config::{VectorConfig, VectorType},
23
23
  segment::Elem,
24
+ vector_types::rabitq,
24
25
  };
25
26
  use memmap2::Mmap;
26
27
  use node::Node;
@@ -42,6 +43,10 @@ pub struct DataStoreV1 {
42
43
  }
43
44
 
44
45
  impl DataStore for DataStoreV1 {
46
+ fn has_quantized(&self) -> bool {
47
+ false
48
+ }
49
+
45
50
  fn size_bytes(&self) -> usize {
46
51
  self.nodes.len()
47
52
  }
@@ -61,9 +66,16 @@ impl DataStore for DataStoreV1 {
61
66
  }
62
67
  }
63
68
 
69
+ fn get_quantized_vector(&self, _id: VectorAddr) -> rabitq::EncodedVector<'_> {
70
+ panic!("Store does not have quantized vectors")
71
+ }
72
+
64
73
  fn will_need(&self, id: VectorAddr) {
65
74
  store::will_need(&self.nodes, id.0 as usize, self.vector_len_bytes);
66
75
  }
76
+
77
+ fn will_need_quantized(&self, _id: VectorAddr) {}
78
+
67
79
  fn as_any(&self) -> &dyn Any {
68
80
  self
69
81
  }
@@ -0,0 +1,106 @@
1
+ // Copyright (C) 2021 Bosutech XXI S.L.
2
+ //
3
+ // nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ // For commercial licensing, contact us at info@nuclia.com.
5
+ //
6
+ // AGPL:
7
+ // This program is free software: you can redistribute it and/or modify
8
+ // it under the terms of the GNU Affero General Public License as
9
+ // published by the Free Software Foundation, either version 3 of the
10
+ // License, or (at your option) any later version.
11
+ //
12
+ // This program is distributed in the hope that it will be useful,
13
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ // GNU Affero General Public License for more details.
16
+ //
17
+ // You should have received a copy of the GNU Affero General Public License
18
+ // along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ //
20
+
21
+ use lazy_static::lazy_static;
22
+ use memmap2::Mmap;
23
+ use std::{fs::File, io::Write as _, path::Path};
24
+
25
+ use crate::{
26
+ data_store::{OpenReason, VectorAddr},
27
+ vector_types::rabitq,
28
+ };
29
+
30
+ const FILENAME: &str = "vectors.quant";
31
+
32
+ /// Storage for quantized vectors of fixed size, just the vectors, metadata is with the raw vectors
33
+ pub struct QuantVectorStore {
34
+ data: Mmap,
35
+ vector_len_bytes: usize,
36
+ }
37
+
38
+ impl QuantVectorStore {
39
+ pub fn open(path: &Path, vector_len_bytes: usize, reason: &OpenReason) -> std::io::Result<Self> {
40
+ let data = unsafe { Mmap::map(&File::open(path.join(FILENAME))?)? };
41
+
42
+ #[cfg(not(target_os = "windows"))]
43
+ {
44
+ let advice = match reason {
45
+ OpenReason::Create => memmap2::Advice::Sequential,
46
+ OpenReason::Search => memmap2::Advice::Random,
47
+ };
48
+ data.advise(advice)?;
49
+ }
50
+
51
+ Ok(Self { data, vector_len_bytes })
52
+ }
53
+
54
+ pub fn get_vector(&self, addr: VectorAddr) -> rabitq::EncodedVector<'_> {
55
+ let start = self.record_start(addr);
56
+ rabitq::EncodedVector::from_bytes(&self.data[start..start + self.vector_len_bytes])
57
+ }
58
+
59
+ fn record_start(&self, VectorAddr(addr): VectorAddr) -> usize {
60
+ addr as usize * self.vector_len_bytes
61
+ }
62
+
63
+ #[cfg(not(target_os = "windows"))]
64
+ pub fn will_need(&self, addr: VectorAddr) {
65
+ lazy_static! {
66
+ static ref PAGE_SIZE: usize = unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as usize;
67
+ };
68
+
69
+ // Align node pointer to the start page, as required by madvise
70
+ let start = self.data.as_ptr().wrapping_add(self.record_start(addr));
71
+ let offset = start.align_offset(*PAGE_SIZE);
72
+ let (start_page, advise_size) = if offset > 0 {
73
+ (
74
+ start.wrapping_add(offset).wrapping_sub(*PAGE_SIZE),
75
+ self.vector_len_bytes + *PAGE_SIZE - offset,
76
+ )
77
+ } else {
78
+ (start, self.vector_len_bytes)
79
+ };
80
+
81
+ unsafe { libc::madvise(start_page as *mut libc::c_void, advise_size, libc::MADV_WILLNEED) };
82
+ }
83
+
84
+ #[cfg(target_os = "windows")]
85
+ pub fn will_need(src: &[u8], id: usize, vector_len: usize) {}
86
+ }
87
+
88
+ pub struct QuantVectorStoreWriter {
89
+ output: File,
90
+ }
91
+
92
+ impl QuantVectorStoreWriter {
93
+ pub fn new(path: &Path) -> std::io::Result<Self> {
94
+ Ok(Self {
95
+ output: File::create(path.join(FILENAME))?,
96
+ })
97
+ }
98
+
99
+ pub fn write(&mut self, vector: &[u8]) -> std::io::Result<()> {
100
+ self.output.write_all(vector)
101
+ }
102
+
103
+ pub fn close(self) -> std::io::Result<()> {
104
+ self.output.sync_all()
105
+ }
106
+ }
@@ -18,7 +18,13 @@
18
18
  // along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  //
20
20
 
21
- use crate::{VectorR, config::VectorType, segment::Elem};
21
+ use crate::{
22
+ VectorR,
23
+ config::{VectorConfig, VectorType},
24
+ data_store::v2::quant_vector_store::{QuantVectorStore, QuantVectorStoreWriter},
25
+ segment::Elem,
26
+ vector_types::rabitq,
27
+ };
22
28
 
23
29
  use super::{DataStore, OpenReason, ParagraphAddr, VectorAddr};
24
30
  pub use paragraph_store::StoredParagraph;
@@ -27,11 +33,13 @@ use std::path::Path;
27
33
  use vector_store::{VectorStore, VectorStoreWriter};
28
34
 
29
35
  mod paragraph_store;
36
+ mod quant_vector_store;
30
37
  mod vector_store;
31
38
 
32
39
  pub struct DataStoreV2 {
33
40
  paragraphs: ParagraphStore,
34
41
  vectors: VectorStore,
42
+ quantized: Option<QuantVectorStore>,
35
43
  }
36
44
 
37
45
  impl DataStoreV2 {
@@ -39,20 +47,39 @@ impl DataStoreV2 {
39
47
  Ok(Self {
40
48
  vectors: VectorStore::open(path, vector_type, &reason)?,
41
49
  paragraphs: ParagraphStore::open(path, &reason)?,
50
+ quantized: QuantVectorStore::open(
51
+ path,
52
+ rabitq::EncodedVector::encoded_len(vector_type.dimension()),
53
+ &reason,
54
+ )
55
+ .ok(),
42
56
  })
43
57
  }
44
58
 
45
- pub fn create(path: &Path, entries: Vec<Elem>, vector_type: &VectorType) -> VectorR<()> {
59
+ pub fn create(path: &Path, entries: Vec<Elem>, config: &VectorConfig) -> VectorR<()> {
46
60
  let mut paragraphs = ParagraphStoreWriter::new(path)?;
47
- let mut vectors = VectorStoreWriter::new(path, vector_type)?;
61
+ let mut vectors = VectorStoreWriter::new(path, &config.vector_type)?;
62
+ let mut quantized = if config.quantizable_vectors() {
63
+ Some(QuantVectorStoreWriter::new(path)?)
64
+ } else {
65
+ None
66
+ };
48
67
 
49
68
  for (idx, elem) in (0..).zip(entries.into_iter()) {
50
- let (first_vector, _) = vectors.write(idx, elem.vectors.iter().map(|v| vector_type.encode(v)))?;
69
+ let (first_vector, _) = vectors.write(idx, elem.vectors.iter().map(|v| config.vector_type.encode(v)))?;
70
+ if let Some(quantized) = &mut quantized {
71
+ for v in &elem.vectors {
72
+ quantized.write(&rabitq::EncodedVector::encode(v))?;
73
+ }
74
+ }
51
75
  paragraphs.write(StoredParagraph::from_elem(&elem, first_vector))?;
52
76
  }
53
77
 
54
78
  paragraphs.close()?;
55
79
  vectors.close()?;
80
+ if let Some(quantized) = quantized {
81
+ quantized.close()?;
82
+ }
56
83
 
57
84
  Ok(())
58
85
  }
@@ -60,10 +87,15 @@ impl DataStoreV2 {
60
87
  pub fn merge(
61
88
  path: &Path,
62
89
  producers: Vec<(impl Iterator<Item = ParagraphAddr>, &dyn DataStore)>,
63
- vector_type: &VectorType,
90
+ config: &VectorConfig,
64
91
  ) -> VectorR<()> {
65
92
  let mut paragraphs = ParagraphStoreWriter::new(path)?;
66
- let mut vectors = VectorStoreWriter::new(path, vector_type)?;
93
+ let mut vectors = VectorStoreWriter::new(path, &config.vector_type)?;
94
+ let mut quantized = if config.quantizable_vectors() {
95
+ Some(QuantVectorStoreWriter::new(path)?)
96
+ } else {
97
+ None
98
+ };
67
99
 
68
100
  let mut p_idx = 0;
69
101
  for (alive, store) in producers {
@@ -74,6 +106,20 @@ impl DataStoreV2 {
74
106
 
75
107
  // Write to new store
76
108
  let (first_vector, last_vector) = vectors.write(p_idx, p_vectors)?;
109
+ if let Some(quantized) = &mut quantized {
110
+ // Copy quantized vectors if they exist, calculate them if not
111
+ if store.has_quantized() {
112
+ for vec_addr in paragraph.vectors(&paragraph_addr) {
113
+ quantized.write(store.get_quantized_vector(vec_addr).bytes())?;
114
+ }
115
+ } else {
116
+ let p_vectors = paragraph.vectors(&paragraph_addr).map(|v| store.get_vector(v).vector());
117
+ for v in p_vectors {
118
+ quantized.write(&rabitq::EncodedVector::encode(config.vector_type.decode(v)))?;
119
+ }
120
+ }
121
+ }
122
+
77
123
  paragraphs.write_paragraph_ref(paragraph, first_vector, last_vector - first_vector + 1)?;
78
124
  p_idx += 1;
79
125
  }
@@ -87,6 +133,10 @@ impl DataStoreV2 {
87
133
  }
88
134
 
89
135
  impl DataStore for DataStoreV2 {
136
+ fn has_quantized(&self) -> bool {
137
+ self.quantized.is_some()
138
+ }
139
+
90
140
  fn size_bytes(&self) -> usize {
91
141
  self.vectors.size_bytes() + self.paragraphs.size_bytes()
92
142
  }
@@ -107,10 +157,21 @@ impl DataStore for DataStoreV2 {
107
157
  self.vectors.get_vector(id)
108
158
  }
109
159
 
160
+ fn get_quantized_vector(&self, id: VectorAddr) -> rabitq::EncodedVector<'_> {
161
+ let Some(quantized) = &self.quantized else {
162
+ panic!("Store does not have quantized vectors")
163
+ };
164
+ quantized.get_vector(id)
165
+ }
166
+
110
167
  fn will_need(&self, id: VectorAddr) {
111
168
  self.vectors.will_need(id);
112
169
  }
113
170
 
171
+ fn will_need_quantized(&self, id: VectorAddr) {
172
+ self.quantized.as_ref().unwrap().will_need(id);
173
+ }
174
+
114
175
  fn as_any(&self) -> &dyn std::any::Any {
115
176
  self
116
177
  }
@@ -28,7 +28,7 @@ pub use v1::node::Node;
28
28
  pub use v2::DataStoreV2;
29
29
  use v2::StoredParagraph;
30
30
 
31
- use crate::{ParagraphAddr, VectorAddr};
31
+ use crate::{ParagraphAddr, VectorAddr, vector_types::rabitq};
32
32
 
33
33
  pub enum OpenReason {
34
34
  Search,
@@ -94,8 +94,11 @@ pub trait DataStore: Sync + Send {
94
94
  fn stored_vector_count(&self) -> u32;
95
95
  fn get_paragraph(&self, id: ParagraphAddr) -> ParagraphRef<'_>;
96
96
  fn get_vector(&self, id: VectorAddr) -> VectorRef<'_>;
97
+ fn get_quantized_vector(&self, id: VectorAddr) -> rabitq::EncodedVector<'_>;
97
98
  fn will_need(&self, id: VectorAddr);
99
+ fn will_need_quantized(&self, id: VectorAddr);
98
100
  fn as_any(&self) -> &dyn Any;
101
+ fn has_quantized(&self) -> bool;
99
102
  }
100
103
 
101
104
  pub fn iter_paragraphs(data_store: &impl DataStore) -> impl Iterator<Item = ParagraphAddr> {
@@ -28,6 +28,7 @@ use std::cmp::{Ordering, Reverse};
28
28
  use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};
29
29
 
30
30
  use crate::inverted_index::FilterBitSet;
31
+ use crate::vector_types::rabitq;
31
32
  use crate::{ParagraphAddr, VectorAddr};
32
33
 
33
34
  use super::params;
@@ -35,12 +36,22 @@ use super::*;
35
36
 
36
37
  /// Implementors of this trait can guide the hnsw search
37
38
  pub trait DataRetriever: std::marker::Sync {
38
- fn similarity(&self, x: VectorAddr, y: VectorAddr) -> f32;
39
+ fn similarity(&self, x: VectorAddr, y: &SearchVector) -> f32;
40
+ fn similarity_upper_bound(&self, x: VectorAddr, y: &SearchVector) -> f32;
39
41
  fn paragraph(&self, x: VectorAddr) -> ParagraphAddr;
40
42
  fn get_vector(&self, x: VectorAddr) -> &[u8];
41
43
  /// Embeddings with smaller similarity should not be considered.
42
44
  fn min_score(&self) -> f32;
45
+ /// Preload all data for a vector + paragraph (needed for similarity + filtering)
43
46
  fn will_need(&self, x: VectorAddr);
47
+ /// Preload a vector (only the vector, for similarity comparison)
48
+ fn will_need_vector(&self, x: VectorAddr);
49
+ }
50
+
51
+ pub enum SearchVector {
52
+ Stored(VectorAddr),
53
+ Query(Vec<u8>),
54
+ RabitQ(rabitq::QueryVector),
44
55
  }
45
56
 
46
57
  /// Implementors of this trait are layers of an HNSW where a nearest neighbour search can be ran.
@@ -142,7 +153,7 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
142
153
  if let Some((_, edge)) = layer.get_out_edges(x).find(|&(z, _)| z == y) {
143
154
  edge
144
155
  } else {
145
- self.similarity(x, y)
156
+ self.retriever.similarity(x, &SearchVector::Stored(y))
146
157
  }
147
158
  })
148
159
  .all(|inter_sim| sim > inter_sim);
@@ -163,9 +174,6 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
163
174
 
164
175
  results
165
176
  }
166
- fn similarity(&self, x: VectorAddr, y: VectorAddr) -> f32 {
167
- self.retriever.similarity(x, y)
168
- }
169
177
  fn get_random_layer(&mut self) -> usize {
170
178
  let sample: f64 = self.layer_rng.sample(self.distribution);
171
179
  let picked_level = -sample.ln() * params::level_factor();
@@ -174,7 +182,7 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
174
182
  fn closest_up_nodes<L: Layer>(
175
183
  &'a self,
176
184
  entry_points: Vec<VectorAddr>,
177
- query: VectorAddr,
185
+ query: &SearchVector,
178
186
  layer: L,
179
187
  number_of_results: usize,
180
188
  mut filter: NodeFilter<'a, DR>,
@@ -196,7 +204,7 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
196
204
  break;
197
205
  };
198
206
 
199
- let candidate_similarity = self.similarity(query, candidate);
207
+ let candidate_similarity = self.retriever.similarity(candidate, query);
200
208
 
201
209
  if candidate_similarity < self.retriever.min_score() {
202
210
  break;
@@ -231,34 +239,43 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
231
239
  }
232
240
  fn layer_search<L: Layer>(
233
241
  &self,
234
- x: VectorAddr,
242
+ query: &SearchVector,
235
243
  layer: L,
236
244
  k_neighbours: usize,
237
245
  entry_points: &[VectorAddr],
238
246
  ) -> Neighbours {
247
+ // Nodes already visited
239
248
  let mut visited = FxHashSet::default();
249
+ // Nodes to visit
240
250
  let mut candidates = BinaryHeap::new();
251
+ // Best results so far
241
252
  let mut ms_neighbours = BinaryHeap::new();
253
+
254
+ // The initial candidates are the entry points
242
255
  for ep in entry_points.iter().copied() {
243
256
  visited.insert(ep);
244
- let similarity = self.similarity(x, ep);
257
+ let similarity = self.retriever.similarity(ep, query);
245
258
  candidates.push(Cnx(ep, similarity));
246
259
  ms_neighbours.push(Reverse(Cnx(ep, similarity)));
247
260
  }
261
+
248
262
  loop {
249
263
  match (candidates.pop(), ms_neighbours.peek().cloned()) {
264
+ // No more candidates, done
250
265
  (None, _) => break,
266
+ // Candidate is worse than worse result, done
251
267
  (Some(Cnx(_, cs)), Some(Reverse(Cnx(_, ws)))) if cs < ws => break,
268
+ // Candidate is better than worse result
252
269
  (Some(Cnx(cn, _)), Some(Reverse(Cnx(_, mut ws)))) => {
253
270
  for (y, _) in layer.get_out_edges(cn) {
254
271
  if self.preload_nodes && !visited.contains(&y) {
255
- self.retriever.will_need(y);
272
+ self.retriever.will_need_vector(y);
256
273
  }
257
274
  }
258
275
  for (y, _) in layer.get_out_edges(cn) {
259
276
  if !visited.contains(&y) {
260
277
  visited.insert(y);
261
- let similarity = self.similarity(x, y);
278
+ let similarity = self.retriever.similarity(y, query);
262
279
  if similarity > ws || ms_neighbours.len() < k_neighbours {
263
280
  candidates.push(Cnx(y, similarity));
264
281
  ms_neighbours.push(Reverse(Cnx(y, similarity)));
@@ -288,7 +305,8 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
288
305
  mmax: usize,
289
306
  ) -> Vec<VectorAddr> {
290
307
  use params::*;
291
- let neighbours = self.layer_search::<&RAMLayer>(x, layer, ef_construction(), entry_points);
308
+ let neighbours =
309
+ self.layer_search::<&RAMLayer>(&SearchVector::Stored(x), layer, ef_construction(), entry_points);
292
310
  let neighbours = self.select_neighbours_heuristic(m(), neighbours, layer);
293
311
  let mut needs_repair = HashSet::new();
294
312
  let mut result = Vec::with_capacity(neighbours.len());
@@ -325,7 +343,7 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
325
343
  for l in (0..=top_layer).rev() {
326
344
  if l > level {
327
345
  // Above insertion point, just search
328
- eps[0] = self.layer_search(x, &hnsw.layers[l], 1, &eps)[0].0;
346
+ eps[0] = self.layer_search(&SearchVector::Stored(x), &hnsw.layers[l], 1, &eps)[0].0;
329
347
  } else {
330
348
  eps = self.layer_insert(x, &mut hnsw.layers[l], &eps, params::m_max_for_layer(l));
331
349
  }
@@ -337,7 +355,7 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
337
355
 
338
356
  pub fn search<H: Hnsw>(
339
357
  &self,
340
- query: VectorAddr,
358
+ query: &SearchVector,
341
359
  hnsw: H,
342
360
  k_neighbours: usize,
343
361
  with_filter: &FilterBitSet,
@@ -362,9 +380,30 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
362
380
  crnt_layer -= 1;
363
381
  }
364
382
 
383
+ // If using RabitQ, request more vectors to rerank later
384
+ let original_query = if let SearchVector::RabitQ(rq) = query {
385
+ Some(&SearchVector::Query(rq.original().to_vec()))
386
+ } else {
387
+ None
388
+ };
389
+ let last_layer_k = if original_query.is_some() {
390
+ std::cmp::min(k_neighbours * 100, 2000)
391
+ } else {
392
+ k_neighbours
393
+ };
394
+
365
395
  let entry_points: Vec<_> = neighbours.into_iter().map(|(node, _)| node).collect();
366
396
  let layer = hnsw.get_layer(crnt_layer);
367
- let neighbors = self.layer_search(query, layer, k_neighbours, &entry_points);
397
+ let mut neighbors = self.layer_search(query, layer, last_layer_k, &entry_points);
398
+
399
+ // If using RabitQ, rerank using the original vectors
400
+ if let Some(query) = original_query {
401
+ neighbors = neighbors
402
+ .into_iter()
403
+ .map(|(addr, _)| (addr, self.retriever.similarity(addr, query)))
404
+ .collect();
405
+ neighbors.sort_unstable_by(|x, y| y.1.partial_cmp(&x.1).unwrap_or(Ordering::Equal));
406
+ }
368
407
 
369
408
  let filter = NodeFilter {
370
409
  filter: with_filter,
@@ -373,8 +412,15 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
373
412
  vec_counter: RepCounter::new(!with_duplicates),
374
413
  };
375
414
  let layer_zero = hnsw.get_layer(0);
376
- let entry_points: Vec<_> = neighbors.into_iter().map(|(node, _)| node).collect();
377
- let mut filtered_result = self.closest_up_nodes(entry_points, query, layer_zero, k_neighbours, filter);
415
+ let entry_points: Vec<_> = neighbors.into_iter().take(k_neighbours).map(|(node, _)| node).collect();
416
+
417
+ let mut filtered_result = self.closest_up_nodes(
418
+ entry_points,
419
+ original_query.unwrap_or(query),
420
+ layer_zero,
421
+ k_neighbours,
422
+ filter,
423
+ );
378
424
 
379
425
  // order may be lost
380
426
  filtered_result.sort_by(|a, b| b.1.total_cmp(&a.1));
@@ -27,4 +27,5 @@ pub use disk_hnsw::DiskHnsw;
27
27
  pub use ops_hnsw::Cnx;
28
28
  pub use ops_hnsw::DataRetriever;
29
29
  pub use ops_hnsw::HnswOps;
30
+ pub use ops_hnsw::SearchVector;
30
31
  pub use ram_hnsw::RAMHnsw;
@@ -21,7 +21,7 @@
21
21
  pub mod config;
22
22
  mod data_store;
23
23
  mod data_types;
24
- mod formula;
24
+ pub mod formula;
25
25
  mod hnsw;
26
26
  mod indexer;
27
27
  mod inverted_index;
@@ -29,9 +29,9 @@ mod multivector;
29
29
  mod query_io;
30
30
  mod request_types;
31
31
  mod searcher;
32
- mod segment;
32
+ pub mod segment;
33
33
  mod utils;
34
- mod vector_types;
34
+ pub mod vector_types;
35
35
 
36
36
  use config::VectorConfig;
37
37
  use indexer::{ResourceWrapper, index_resource};
@@ -52,7 +52,7 @@ pub use request_types::VectorSearchRequest;
52
52
  #[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash)]
53
53
  pub struct ParagraphAddr(u32);
54
54
  #[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq, Hash)]
55
- pub struct VectorAddr(u32);
55
+ pub struct VectorAddr(pub u32);
56
56
 
57
57
  #[derive(Clone, Serialize, Deserialize)]
58
58
  pub struct VectorSegmentMeta {
@@ -32,13 +32,16 @@ use crate::segment::{self, Elem};
32
32
  const CONFIG: VectorConfig = VectorConfig {
33
33
  similarity: Similarity::Cosine,
34
34
  normalize_vectors: false,
35
- vector_type: crate::config::VectorType::DenseF32 { dimension: 178 },
35
+ vector_type: crate::config::VectorType::DenseF32 { dimension: 128 },
36
36
  flags: vec![],
37
37
  vector_cardinality: VectorCardinality::Single,
38
38
  };
39
39
 
40
40
  fn create_query() -> Vec<f32> {
41
- let v: Vec<_> = vec![rand::random::<f32>; 178].into_iter().map(|f| f()).collect();
41
+ let v: Vec<_> = vec![rand::random::<f32>; 128]
42
+ .into_iter()
43
+ .map(|f| f() * 2.0 - 1.0)
44
+ .collect();
42
45
  let mut modulus = 0.0;
43
46
  for w in &v {
44
47
  modulus += w * w;
@@ -61,12 +64,12 @@ fn simple_flow() {
61
64
  let mut expected_keys = vec![];
62
65
  for i in 0..50 {
63
66
  let key = format!("9cb39c75f8d9498d8f82d92b173011f5/f/field/0-{i}");
64
- let vector = vec![rand::random::<f32>(); 178];
67
+ let vector = vec![rand::random::<f32>(); 128];
65
68
  elems.push(Elem::new(key.clone(), vector, labels.clone(), None));
66
69
  expected_keys.push(key);
67
70
  }
68
71
  let segment = segment::create(temp_dir.path(), elems, &CONFIG, HashSet::new()).unwrap();
69
- let query = vec![rand::random::<f32>(); 178];
72
+ let query = vec![rand::random::<f32>(); 128];
70
73
  let no_results = 10;
71
74
  let formula = queries[..20].iter().fold(Formula::new(), |mut acc, i| {
72
75
  acc.extend(i.clone());