nidx-binding 6.3.4.post159__tar.gz → 6.3.4.post167__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/PKG-INFO +1 -1
  2. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_protos/pyproject.toml +1 -1
  3. nidx_binding-6.3.4.post167/nidx_relation/src/graph_collector.rs +97 -0
  4. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_relation/src/graph_query_parser.rs +85 -43
  5. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_relation/src/lib.rs +1 -0
  6. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_relation/src/reader.rs +57 -67
  7. nidx_binding-6.3.4.post167/nidx_relation/src/top_unique_n.rs +164 -0
  8. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_relation/tests/test_graph_search.rs +329 -17
  9. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/pyproject.toml +1 -1
  10. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/test_search_relations.rs +6 -3
  11. nidx_binding-6.3.4.post159/nidx_relation/src/graph_collector.rs +0 -177
  12. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-0cfce9b29547f8f5bafa6e440f86103be7b8c4ad2fd92db9ac223f4efbe23d10.json +0 -0
  13. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-1a561eed00f3dbe868bf5030059793300209179dc8fb73e4b57a54b5e81262fe.json +0 -0
  14. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-1d3fca2682e25a01143da92285297f134a6a105a96f64d87e0db3abb219855e4.json +0 -0
  15. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-249b3b57c27a71baa823f1fe0f0bba9c9af36f61c28f731e58beea60ec48e687.json +0 -0
  16. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-24cb6b683daa42d7125f862e25943ab4be7bf275cd8739f8da4859d701795e1a.json +0 -0
  17. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-263c8fce6db5b03bbd012fafdba6943cbee6ed7eb8976cdef4f5b01dde7ca6fd.json +0 -0
  18. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-2a5d92fb1638df830a4477a7cdf24e6db6b43034b7bbe74fdfb63e8afe2c4071.json +0 -0
  19. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-2b065a363f58caed60e3706603c1260dbf5a4c795604a5b68edda22eb07fec1b.json +0 -0
  20. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-3fc3cb39934683de8cd475ce1368c8373453eb1e01f81587d66b9d14b109ce6e.json +0 -0
  21. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-48f33b77b7c1633467b0b2efcaa1d3c207e7757e4f1d83b40d15e6ca365f7771.json +0 -0
  22. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-4ae09f2c08e2f324bee01bb8487a8f37678a1c5e9d327339235c50d4921a8949.json +0 -0
  23. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-4d7a76fa413c9ef0ce2a47ac7bb7e01d3e6a2aabded9487d21010a53efee8852.json +0 -0
  24. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-4fcbdd6657c7dc9b60b3a563dd41711b3dbcf72ce063427b7a01f8cddf34c244.json +0 -0
  25. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-577109ac00ccfbd38ecaccab94116f2f46a4caf5612afa372cded197123c1e08.json +0 -0
  26. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-5db25f97d8578d6d78f2f6bd4b72cc82a9b1b82805c6422d967ac63b20d99db4.json +0 -0
  27. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-5ec3233a3a23e926055056d46bdde17836a633066dbb5f349502648cd3ea9a60.json +0 -0
  28. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-66edb6ea424d8681927dcddb6bac5f1239175f4775d1f40417ba15054b0c6f19.json +0 -0
  29. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-6f9c6d201c1b5712efb68c363bffd3e0169c11f2a8f925e8cd4e8808599ff7b4.json +0 -0
  30. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-733c3ebacc86f444bf5e2dd79ade660c291e88a00fc09b722f6e2e191545874c.json +0 -0
  31. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-77a245aed9f6f8b9fc643efbd4b1e2e8e99bbba085e3e75a4f29321cd7b2a25c.json +0 -0
  32. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-7a3bf27c330c468a596e8a297cf7d8b192e31e67ecc5177c1267f579e8e247c7.json +0 -0
  33. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-7a7e59e47b30b12237511fd3d7da2d17b0471ad2b006af48d6a6f587c779692b.json +0 -0
  34. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-7dcbb33312cc9f11ae3a6d73b1ace017a9f19a8bf8f10304fc57977c8efeadff.json +0 -0
  35. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-7efa7c0d747afc4b6aed0586ff846c27839c3213ff7ee9f30c89b0d0f17e60e3.json +0 -0
  36. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-8493140d788604d498a4e48da4158708572ccc9d60185290a00d549cc84533db.json +0 -0
  37. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-8493bb0059b013eaca42fd10cd7d04f0d06a8acaed379eff0d23f3229edde9ee.json +0 -0
  38. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-87996b3d6c7a2195438d7038015b06949102bce8c7b8cd8db1f83aaf23cbe489.json +0 -0
  39. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-8f096d8171b89f9615d18f95d696dc9e4fb3674e103161a713cdc806f7a68506.json +0 -0
  40. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-917732a56ee04bf3a6e127319dda8225210869c82f9828d878162394dba4e078.json +0 -0
  41. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-95fe4ef93ee90733db1b67ed7987f80b5aac792f1590b979c68b418d1599eb98.json +0 -0
  42. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-9b67658569b343d8b4b61ae0a7dc721f367f2ba33c7b69b9e68bfd5c9bff5206.json +0 -0
  43. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-9c8062ea55d070afef68309e58fa987eb37fda44e1efbf68c8ba2af7846cc968.json +0 -0
  44. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-a06e1d9f6f95e4c4c2b98310ebddcc9d963cc033582bf2e945e8bf3a301b4247.json +0 -0
  45. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-a55265c9b07bd1399961a6f1e757201fd0eebe868ddaf96437111113d80fce92.json +0 -0
  46. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-a891a37be5c2d7cce775c2dd33726b0318fd3839beab222a1b22bc6174604207.json +0 -0
  47. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-a945191bb4b3e37d6823ed3ad499339d007d69983105de8567777d9daf517b28.json +0 -0
  48. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-abe9f7832f2bd799ac44008da031e8d8ab52d4f5fbfc2a7e3974e8873bae55b2.json +0 -0
  49. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-aca588cca57a85e4d7fcc40c23cd87e57d53d11ca550d78e7e3d5e39e524fcd3.json +0 -0
  50. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-b02f8aafc00a7724510772ac41269e368c5bccf03ef7b4590e0ef6fd1a1bf64f.json +0 -0
  51. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-b742e17cabe2d64617e9aa64bafc782172f7a4f8023d1b54f952a0fb39f6b2b8.json +0 -0
  52. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-b94e349dbc0daec57f8f8f6e9e2dffb06100b1bb2b41d297c9f3b191da37a83d.json +0 -0
  53. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-bd9afa22994aba671dbf7b5f89b53c2ee02f53c0442a81265786a6d52d08512f.json +0 -0
  54. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-be60554eca98a5899efc6b49785cecd6444a6d39afed9e4a884ce2dbf162012c.json +0 -0
  55. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-bf49702b506c9a1650ece1f8e8d9f14834a902f8caefafe30ded55e2790f2188.json +0 -0
  56. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-bfb8ae2e860e451b0868a7b1b50a451d998b2105f9fdfd307b26a1775d145e9f.json +0 -0
  57. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-bfcd21ed704cd305db5c17fcdec7d92aa4ac501913c9c9514d8ff92928c0c7e7.json +0 -0
  58. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-c3ab694650f49a75b146fb877a92e48c4f20f0d99f70f8ec859fbb763b01a1e5.json +0 -0
  59. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-c55542bb9fae544d87fae6f30e0fe8a9088d12075f4442ab4fe2fcd05e472234.json +0 -0
  60. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-cb29a6556d35ac630ee0aa885dd7341cf9573bd3efd216ff8a887b87686b03db.json +0 -0
  61. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-d0a1f341a89f5f14696b10baa72db9d95551c2b7e5fc67308fd52dc03dd98a92.json +0 -0
  62. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-d2ad0a0ca2649c9e4873cfcc1fc66d2d07cc45d0f65c560b06d7b5f592f4fa8a.json +0 -0
  63. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-d6cfe78eb635ba0b89ca4021a4dc8182d18ab5b197f30149cd28488eba4c1df5.json +0 -0
  64. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-d729b56dea00e49dcdba8cf0001e2811da27351eabe98212db3b589f18fc6f32.json +0 -0
  65. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-d9658bfd4e7170b41d03f2ddf2446d0bf54171c0d39d53bf20af2b8437f2ec48.json +0 -0
  66. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-dbba7b3d3289425bae711aedbf73fbc3699f857f86f84d95c3b556d05c5658b0.json +0 -0
  67. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-dcb96b649d6d63a58efd5d445453a4f3d7869a56ff714b69bedf3d616a0473ca.json +0 -0
  68. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-ebd876fbf5362a5900e75bc05f2f11c73c406ef7da4e95097fc6a1c3d1b8bc54.json +0 -0
  69. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-eef5cc6bce1cc14eba8f3e68971724ef181e88cffcedd74673615f2026b89a62.json +0 -0
  70. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/.sqlx/query-ef56d5fefc5774040d1ee397beadb475f6af02768c22f0e583c74062e2e821ce.json +0 -0
  71. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/Cargo.lock +0 -0
  72. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/Cargo.toml +0 -0
  73. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/README.md +0 -0
  74. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/migrations/20241007163501_initial.sql +0 -0
  75. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/migrations/20241211120039_merge_job_priority.sql +0 -0
  76. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/migrations/20241211121159_basic_indexes.sql +0 -0
  77. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/migrations/20241212151105_check_segment_records.sql +0 -0
  78. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/migrations/20250110145554_in_flight_messages.sql +0 -0
  79. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_binding/Cargo.toml +0 -0
  80. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_binding/src/lib.rs +0 -0
  81. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/Cargo.toml +0 -0
  82. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/src/fuzzy_query.rs +0 -0
  83. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/src/lib.rs +0 -0
  84. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/src/query_io.rs +0 -0
  85. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/src/reader.rs +0 -0
  86. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/src/request_types.rs +0 -0
  87. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/src/resource_indexer.rs +0 -0
  88. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/src/schema.rs +0 -0
  89. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/src/search_query.rs +0 -0
  90. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/src/search_response.rs +0 -0
  91. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/src/set_query.rs +0 -0
  92. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/src/stop_words.rs +0 -0
  93. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/README.md +0 -0
  94. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/ar.json +0 -0
  95. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/az.json +0 -0
  96. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/bn.json +0 -0
  97. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/ca.json +0 -0
  98. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/ch.json +0 -0
  99. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/da.json +0 -0
  100. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/de.json +0 -0
  101. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/el.json +0 -0
  102. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/en.json +0 -0
  103. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/es.json +0 -0
  104. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/eu.json +0 -0
  105. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/extract.py +0 -0
  106. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/fi.json +0 -0
  107. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/fr.json +0 -0
  108. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/he.json +0 -0
  109. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/hu.json +0 -0
  110. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/id.json +0 -0
  111. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/it.json +0 -0
  112. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/kk.json +0 -0
  113. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/ne.json +0 -0
  114. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/nl.json +0 -0
  115. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/no.json +0 -0
  116. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/pt.json +0 -0
  117. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/ro.json +0 -0
  118. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/ru.json +0 -0
  119. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/sl.json +0 -0
  120. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/sv.json +0 -0
  121. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/tg.json +0 -0
  122. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/stop_words/tr.json +0 -0
  123. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/tests/common/mod.rs +0 -0
  124. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_paragraph/tests/reader.rs +0 -0
  125. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_protos/Cargo.toml +0 -0
  126. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_protos/build.py +0 -0
  127. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_protos/build.rs +0 -0
  128. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_protos/src/lib.rs +0 -0
  129. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_protos/src/nidx.proto +0 -0
  130. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_relation/Cargo.toml +0 -0
  131. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_relation/src/io_maps.rs +0 -0
  132. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_relation/src/resource_indexer.rs +0 -0
  133. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_relation/src/schema.rs +0 -0
  134. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_relation/tests/common/mod.rs +0 -0
  135. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_relation/tests/test_graph_query_parser_search.rs +0 -0
  136. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_relation/tests/test_reader.rs +0 -0
  137. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_relation/tests/test_writer.rs +0 -0
  138. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_tantivy/Cargo.toml +0 -0
  139. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_tantivy/src/index_reader.rs +0 -0
  140. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_tantivy/src/lib.rs +0 -0
  141. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_tests/Cargo.toml +0 -0
  142. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_tests/src/graph.rs +0 -0
  143. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_tests/src/lib.rs +0 -0
  144. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/Cargo.toml +0 -0
  145. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/src/lib.rs +0 -0
  146. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/src/prefilter.rs +0 -0
  147. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/src/query_io.rs +0 -0
  148. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/src/reader.rs +0 -0
  149. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/src/request_types.rs +0 -0
  150. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/src/resource_indexer.rs +0 -0
  151. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/src/schema.rs +0 -0
  152. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/src/search_query.rs +0 -0
  153. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/tests/common/mod.rs +0 -0
  154. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/tests/test_flow.rs +0 -0
  155. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/tests/test_search.rs +0 -0
  156. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_text/tests/test_streaming.rs +0 -0
  157. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_types/Cargo.toml +0 -0
  158. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_types/src/lib.rs +0 -0
  159. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_types/src/prefilter.rs +0 -0
  160. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_types/src/query_language.rs +0 -0
  161. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/Cargo.toml +0 -0
  162. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/config.rs +0 -0
  163. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_point/disk_hnsw.rs +0 -0
  164. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_point/mod.rs +0 -0
  165. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_point/node.rs +0 -0
  166. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_point/ops_hnsw.rs +0 -0
  167. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_point/params.rs +0 -0
  168. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_point/ram_hnsw.rs +0 -0
  169. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_point/tests.rs +0 -0
  170. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_point_provider/mod.rs +0 -0
  171. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_point_provider/reader.rs +0 -0
  172. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_types/data_store.rs +0 -0
  173. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_types/mod.rs +0 -0
  174. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_types/trie.rs +0 -0
  175. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/data_types/trie_ram.rs +0 -0
  176. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/formula/mod.rs +0 -0
  177. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/indexer.rs +0 -0
  178. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/inverted_index/fst_index.rs +0 -0
  179. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/inverted_index/map.rs +0 -0
  180. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/inverted_index.rs +0 -0
  181. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/lib.rs +0 -0
  182. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/query_io.rs +0 -0
  183. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/request_types.rs +0 -0
  184. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/utils.rs +0 -0
  185. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/vector_types/dense_f32.rs +0 -0
  186. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/src/vector_types/mod.rs +0 -0
  187. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/tests/common/mod.rs +0 -0
  188. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/tests/test_basic_search.rs +0 -0
  189. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/nidx_vector/tests/test_hidden.rs +0 -0
  190. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/api/grpc.rs +0 -0
  191. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/api/shards.rs +0 -0
  192. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/api.rs +0 -0
  193. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/control.rs +0 -0
  194. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/errors.rs +0 -0
  195. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/grpc_server.rs +0 -0
  196. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/import_export.rs +0 -0
  197. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/indexer.rs +0 -0
  198. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/lib.rs +0 -0
  199. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/main.rs +0 -0
  200. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/metadata/deletion.rs +0 -0
  201. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/metadata/index.rs +0 -0
  202. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/metadata/index_request.rs +0 -0
  203. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/metadata/merge_job.rs +0 -0
  204. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/metadata/segment.rs +0 -0
  205. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/metadata/shard.rs +0 -0
  206. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/metadata.rs +0 -0
  207. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/metrics.rs +0 -0
  208. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/scheduler/audit_task.rs +0 -0
  209. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/scheduler/log_merge.rs +0 -0
  210. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/scheduler/merge_task.rs +0 -0
  211. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/scheduler/metrics_task.rs +0 -0
  212. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/scheduler/purge_tasks.rs +0 -0
  213. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/scheduler/vector_merge.rs +0 -0
  214. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/scheduler.rs +0 -0
  215. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/searcher/grpc.rs +0 -0
  216. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/searcher/index_cache.rs +0 -0
  217. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/searcher/query_language.rs +0 -0
  218. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/searcher/query_planner.rs +0 -0
  219. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/searcher/shard_search.rs +0 -0
  220. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/searcher/shard_selector.rs +0 -0
  221. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/searcher/shard_suggest.rs +0 -0
  222. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/searcher/streams.rs +0 -0
  223. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/searcher/sync.rs +0 -0
  224. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/searcher.rs +0 -0
  225. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/segment_store.rs +0 -0
  226. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/settings.rs +0 -0
  227. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/telemetry/duration_layer.rs +0 -0
  228. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/telemetry/log_format.rs +0 -0
  229. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/telemetry/middleware.rs +0 -0
  230. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/telemetry.rs +0 -0
  231. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/tool.rs +0 -0
  232. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/src/worker.rs +0 -0
  233. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/common/mod.rs +0 -0
  234. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/common/services.rs +0 -0
  235. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/test_date_range_search.rs +0 -0
  236. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/test_search_filtering.rs +0 -0
  237. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/test_search_sorting.rs +0 -0
  238. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/test_searcher_cluster.rs +0 -0
  239. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/test_security_search.rs +0 -0
  240. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/test_shards.rs +0 -0
  241. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/test_shards_api.rs +0 -0
  242. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/test_suggest.rs +0 -0
  243. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/test_synced_searcher.rs +0 -0
  244. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/test_vector_normalization.rs +0 -0
  245. {nidx_binding-6.3.4.post159 → nidx_binding-6.3.4.post167}/tests/test_vectorsets.rs +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nidx_binding
3
- Version: 6.3.4.post159
3
+ Version: 6.3.4.post167
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
@@ -4,7 +4,7 @@ build-backend = "pdm.backend"
4
4
 
5
5
  [project]
6
6
  name = "nidx_protos"
7
- version = "6.3.4.post159"
7
+ version = "6.3.4.post167"
8
8
  license = { text = "AGPL" }
9
9
  description = "Protobuf definitions for nucliadb/nidx"
10
10
  authors = [ { name = "Nuclia", email = "nucliadb@nuclia.com" }]
@@ -0,0 +1,97 @@
1
+ // Copyright (C) 2021 Bosutech XXI S.L.
2
+ //
3
+ // nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ // For commercial licensing, contact us at info@nuclia.com.
5
+ //
6
+ // AGPL:
7
+ // This program is free software: you can redistribute it and/or modify
8
+ // it under the terms of the GNU Affero General Public License as
9
+ // published by the Free Software Foundation, either version 3 of the
10
+ // License, or (at your option) any later version.
11
+ //
12
+ // This program is distributed in the hope that it will be useful,
13
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ // GNU Affero General Public License for more details.
16
+ //
17
+ // You should have received a copy of the GNU Affero General Public License
18
+ // along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ //
20
+
21
+ use tantivy::{
22
+ DocId, Score, SegmentOrdinal, SegmentReader,
23
+ collector::{Collector, SegmentCollector},
24
+ columnar::Column,
25
+ };
26
+
27
+ use crate::top_unique_n::TopUniqueN;
28
+
29
+ #[derive(Clone, Copy)]
30
+ pub enum Selector {
31
+ SourceNodes,
32
+ DestinationNodes,
33
+ Relations,
34
+ }
35
+
36
+ pub struct TopUniqueCollector {
37
+ limit: usize,
38
+ selector: Selector,
39
+ }
40
+
41
+ pub struct TopUniqueSegmentCollector {
42
+ unique: TopUniqueN<Vec<u64>>,
43
+ encoded_field: Column<u64>,
44
+ }
45
+
46
+ impl TopUniqueCollector {
47
+ pub fn new(selector: Selector, top_k: usize) -> Self {
48
+ Self { limit: top_k, selector }
49
+ }
50
+ }
51
+
52
+ impl Collector for TopUniqueCollector {
53
+ type Fruit = TopUniqueN<Vec<u64>>;
54
+ type Child = TopUniqueSegmentCollector;
55
+
56
+ fn requires_scoring(&self) -> bool {
57
+ true
58
+ }
59
+
60
+ fn for_segment(&self, _segment_local_id: SegmentOrdinal, segment: &SegmentReader) -> tantivy::Result<Self::Child> {
61
+ let fast_field = match self.selector {
62
+ Selector::SourceNodes => segment.fast_fields().u64("encoded_source_id")?,
63
+ Selector::DestinationNodes => segment.fast_fields().u64("encoded_target_id")?,
64
+ Selector::Relations => segment.fast_fields().u64("encoded_relation_id")?,
65
+ };
66
+ let segment_collector = TopUniqueSegmentCollector {
67
+ unique: TopUniqueN::new(self.limit),
68
+ encoded_field: fast_field,
69
+ };
70
+ Ok(segment_collector)
71
+ }
72
+
73
+ fn merge_fruits(
74
+ &self,
75
+ segment_fruits: Vec<<Self::Child as SegmentCollector>::Fruit>,
76
+ ) -> tantivy::Result<Self::Fruit> {
77
+ let fruits = segment_fruits.into_iter().flat_map(|top| top.into_sorted_vec());
78
+ let mut unique = TopUniqueN::new(self.limit);
79
+ for (key, score) in fruits {
80
+ unique.insert(key, score);
81
+ }
82
+ Ok(unique)
83
+ }
84
+ }
85
+
86
+ impl SegmentCollector for TopUniqueSegmentCollector {
87
+ type Fruit = TopUniqueN<Vec<u64>>;
88
+
89
+ fn collect(&mut self, doc_id: DocId, score: Score) {
90
+ let value = self.encoded_field.values_for_doc(doc_id).collect::<Vec<u64>>();
91
+ self.unique.insert(value, score);
92
+ }
93
+
94
+ fn harvest(self) -> Self::Fruit {
95
+ self.unique
96
+ }
97
+ }
@@ -19,10 +19,11 @@
19
19
  //
20
20
  use anyhow::anyhow;
21
21
  use nidx_protos::graph_query::FacetFilter;
22
+ use nidx_protos::graph_query::node::MatchLocation;
22
23
  use nidx_protos::relation::RelationType;
23
24
  use nidx_protos::relation_node::NodeType;
24
25
  use nidx_types::query_language::{BooleanExpression, BooleanOperation, Operator};
25
- use tantivy::query::{AllQuery, BooleanQuery, FuzzyTermQuery, Occur, Query, TermQuery};
26
+ use tantivy::query::{AllQuery, BooleanQuery, FuzzyTermQuery, Occur, Query, TermQuery, TermSetQuery};
26
27
  use tantivy::schema::{Facet, Field, IndexRecordOption};
27
28
  use tantivy::tokenizer::TokenizerManager;
28
29
 
@@ -41,7 +42,9 @@ pub struct FuzzyTerm {
41
42
  #[derive(Clone)]
42
43
  pub enum Term {
43
44
  Exact(String),
45
+ ExactWord(String),
44
46
  Fuzzy(FuzzyTerm),
47
+ FuzzyWord(FuzzyTerm),
45
48
  }
46
49
 
47
50
  #[derive(Default, Clone)]
@@ -467,8 +470,8 @@ impl<'a> GraphQueryParser<'a> {
467
470
 
468
471
  fn has_node_value(&self, value: &Term, exact_field: Field, tokenized_field: Field) -> Option<Box<dyn Query>> {
469
472
  let text_value = match value {
470
- Term::Exact(value) => value,
471
- Term::Fuzzy(fuzzy) => &fuzzy.value,
473
+ Term::Exact(value) | Term::ExactWord(value) => value,
474
+ Term::Fuzzy(fuzzy) | Term::FuzzyWord(fuzzy) => &fuzzy.value,
472
475
  };
473
476
  if text_value.is_empty() {
474
477
  return None;
@@ -482,42 +485,38 @@ impl<'a> GraphQueryParser<'a> {
482
485
  tokenized_terms.push(tantivy::Term::from_field_text(tokenized_field, &token.text));
483
486
  }
484
487
 
485
- // TODO: Rethink this
486
- // Current logic:
487
- // - Exact match always match the search term against the full field
488
- // - Fuzzy + prefix search works does a prefix fuzzy match of the whole entity name
489
- // - Fuzzy search looks for entities containing all words in the term with a fuzzy match (tokenized)
490
- //
491
- // Questions:
492
- // - Do we want exact match of a word in the entity? (kind of supported by setting fuzzy distance = 0)
493
488
  let query: Box<dyn Query> = match value {
494
489
  Term::Exact(_) => Box::new(TermQuery::new(exact_term, IndexRecordOption::Basic)),
495
- Term::Fuzzy(fuzzy) => match fuzzy {
496
- FuzzyTerm {
497
- fuzzy_distance,
498
- is_prefix: true,
499
- ..
500
- } => Box::new(FuzzyTermQuery::new_prefix(exact_term, *fuzzy_distance, true)),
501
-
502
- FuzzyTerm { fuzzy_distance, .. } => {
503
- if tokenized_terms.len() > 1 {
504
- Box::new(BooleanQuery::intersection(
505
- tokenized_terms
506
- .into_iter()
507
- .map(|term| -> Box<dyn Query> {
508
- Box::new(FuzzyTermQuery::new(term, *fuzzy_distance, true))
509
- })
510
- .collect(),
511
- ))
512
- } else {
513
- Box::new(FuzzyTermQuery::new(
514
- tokenized_terms.into_iter().next().unwrap(),
515
- *fuzzy_distance,
516
- true,
517
- ))
518
- }
490
+
491
+ Term::ExactWord(_) => Box::new(TermSetQuery::new(tokenized_terms)),
492
+
493
+ Term::Fuzzy(fuzzy) => {
494
+ if fuzzy.is_prefix {
495
+ Box::new(FuzzyTermQuery::new_prefix(exact_term, fuzzy.fuzzy_distance, true))
496
+ } else {
497
+ Box::new(FuzzyTermQuery::new(exact_term, fuzzy.fuzzy_distance, true))
519
498
  }
520
- },
499
+ }
500
+
501
+ Term::FuzzyWord(fuzzy) => {
502
+ let query_builder = if fuzzy.is_prefix {
503
+ FuzzyTermQuery::new_prefix
504
+ } else {
505
+ FuzzyTermQuery::new
506
+ };
507
+
508
+ if tokenized_terms.len() == 1 {
509
+ let tokenized_term = tokenized_terms.into_iter().next().unwrap();
510
+ Box::new(query_builder(tokenized_term, fuzzy.fuzzy_distance, true))
511
+ } else {
512
+ Box::new(BooleanQuery::intersection(
513
+ tokenized_terms
514
+ .into_iter()
515
+ .map(|term| -> Box<dyn Query> { Box::new(query_builder(term, fuzzy.fuzzy_distance, true)) })
516
+ .collect(),
517
+ ))
518
+ }
519
+ }
521
520
  };
522
521
 
523
522
  Some(query)
@@ -704,13 +703,56 @@ impl TryFrom<&nidx_protos::graph_query::Node> for Node {
704
703
  type Error = anyhow::Error;
705
704
 
706
705
  fn try_from(node_pb: &nidx_protos::graph_query::Node) -> Result<Self, Self::Error> {
707
- let value = node_pb.value.clone().map(|value| match node_pb.match_kind() {
708
- nidx_protos::graph_query::node::MatchKind::Exact => Term::Exact(value),
709
- nidx_protos::graph_query::node::MatchKind::Fuzzy => Term::Fuzzy(FuzzyTerm {
710
- value,
711
- fuzzy_distance: DEFAULT_NODE_VALUE_FUZZY_DISTANCE,
712
- is_prefix: true,
713
- }),
706
+ let value = node_pb.value.clone().map(|value| {
707
+ if let Some(match_kind) = node_pb.new_match_kind {
708
+ match match_kind {
709
+ nidx_protos::graph_query::node::NewMatchKind::Exact(exact) => match exact.kind() {
710
+ MatchLocation::Full => Term::Exact(value),
711
+ MatchLocation::Prefix => Term::Fuzzy(FuzzyTerm {
712
+ value,
713
+ fuzzy_distance: 0,
714
+ is_prefix: true,
715
+ }),
716
+ MatchLocation::Words => Term::ExactWord(value),
717
+ MatchLocation::PrefixWords => Term::FuzzyWord(FuzzyTerm {
718
+ value,
719
+ fuzzy_distance: 0,
720
+ is_prefix: true,
721
+ }),
722
+ },
723
+ nidx_protos::graph_query::node::NewMatchKind::Fuzzy(fuzzy) => match fuzzy.kind() {
724
+ MatchLocation::Full => Term::Fuzzy(FuzzyTerm {
725
+ value,
726
+ fuzzy_distance: fuzzy.distance as u8,
727
+ is_prefix: false,
728
+ }),
729
+ MatchLocation::Prefix => Term::Fuzzy(FuzzyTerm {
730
+ value,
731
+ fuzzy_distance: fuzzy.distance as u8,
732
+ is_prefix: true,
733
+ }),
734
+ MatchLocation::Words => Term::FuzzyWord(FuzzyTerm {
735
+ value,
736
+ fuzzy_distance: fuzzy.distance as u8,
737
+ is_prefix: false,
738
+ }),
739
+ MatchLocation::PrefixWords => Term::FuzzyWord(FuzzyTerm {
740
+ value,
741
+ fuzzy_distance: fuzzy.distance as u8,
742
+ is_prefix: true,
743
+ }),
744
+ },
745
+ }
746
+ } else {
747
+ match node_pb.match_kind() {
748
+ nidx_protos::graph_query::node::MatchKind::DeprecatedExact => Term::Exact(value),
749
+ nidx_protos::graph_query::node::MatchKind::DeprecatedFuzzy => Term::Fuzzy(FuzzyTerm {
750
+ value,
751
+ fuzzy_distance: DEFAULT_NODE_VALUE_FUZZY_DISTANCE,
752
+ is_prefix: true,
753
+ }),
754
+ }
755
+ }
714
756
  });
715
757
  let node_type = node_pb.node_type.map(NodeType::try_from).transpose()?;
716
758
  let node_subtype = node_pb.node_subtype.clone();
@@ -24,6 +24,7 @@ mod io_maps;
24
24
  mod reader;
25
25
  mod resource_indexer;
26
26
  mod schema;
27
+ mod top_unique_n;
27
28
 
28
29
  use anyhow::anyhow;
29
30
  use nidx_protos::{
@@ -31,14 +31,15 @@ use nidx_types::prefilter::{FieldId, PrefilterResult};
31
31
  use tantivy::collector::TopDocs;
32
32
  use tantivy::query::{BooleanQuery, EmptyQuery, Occur, Query, TermSetQuery};
33
33
  use tantivy::schema::Field;
34
- use tantivy::{DocAddress, Index, IndexReader, Searcher};
34
+ use tantivy::{Index, IndexReader};
35
35
  use uuid::Uuid;
36
36
 
37
- use crate::graph_collector::{NodeSelector, TopUniqueNodeCollector2, TopUniqueRelationCollector2};
37
+ use crate::graph_collector::{Selector, TopUniqueCollector};
38
38
  use crate::graph_query_parser::{
39
39
  BoolGraphQuery, BoolNodeQuery, Expression, FuzzyTerm, GraphQuery, GraphQueryParser, Node, NodeQuery, Term,
40
40
  };
41
41
  use crate::schema::{Schema, decode_node, decode_relation, encode_field_id};
42
+ use crate::top_unique_n::TopUniqueN;
42
43
  use crate::{RelationConfig, io_maps};
43
44
 
44
45
  const FUZZY_DISTANCE: u8 = 1;
@@ -146,10 +147,38 @@ impl RelationsReaderService {
146
147
  let collector = TopDocs::with_limit(top_k);
147
148
  let searcher = self.reader.searcher();
148
149
  let matching_docs = searcher.search(&index_query, &collector)?;
149
- self.build_graph_response(
150
- &searcher,
151
- matching_docs.into_iter().map(|(_score, doc_address)| doc_address),
152
- )
150
+
151
+ let mut nodes = Vec::new();
152
+ let mut relations = Vec::new();
153
+ let mut graph = Vec::new();
154
+
155
+ for (_score, doc_address) in matching_docs {
156
+ let doc = searcher.doc(doc_address)?;
157
+
158
+ let source = io_maps::source_to_relation_node(&self.schema, &doc);
159
+ let relation = io_maps::doc_to_graph_relation(&self.schema, &doc);
160
+ let destination = io_maps::target_to_relation_node(&self.schema, &doc);
161
+
162
+ let source_idx = nodes.len();
163
+ nodes.push(source);
164
+ let relation_idx = relations.len();
165
+ relations.push(relation);
166
+ let destination_idx = nodes.len();
167
+ nodes.push(destination);
168
+
169
+ graph.push(nidx_protos::graph_search_response::Path {
170
+ source: source_idx as u32,
171
+ relation: relation_idx as u32,
172
+ destination: destination_idx as u32,
173
+ })
174
+ }
175
+
176
+ let response = nidx_protos::GraphSearchResponse {
177
+ nodes,
178
+ relations,
179
+ graph,
180
+ };
181
+ Ok(response)
153
182
  }
154
183
 
155
184
  fn nodes_graph_search(
@@ -166,25 +195,27 @@ impl RelationsReaderService {
166
195
 
167
196
  let searcher = self.reader.searcher();
168
197
 
169
- let mut unique_nodes = HashSet::new();
198
+ let mut unique_nodes = TopUniqueN::new(top_k);
170
199
 
171
- let collector = TopUniqueNodeCollector2::new(NodeSelector::SourceNodes, top_k);
172
- let mut source_nodes = searcher.search(&source_query, &collector)?;
173
- unique_nodes.extend(source_nodes.drain());
200
+ let collector = TopUniqueCollector::new(Selector::SourceNodes, top_k);
201
+ let source_nodes = searcher.search(&source_query, &collector)?;
202
+ unique_nodes.merge(source_nodes);
174
203
 
175
- let collector = TopUniqueNodeCollector2::new(NodeSelector::DestinationNodes, top_k);
176
- let mut destination_nodes = searcher.search(&destination_query, &collector)?;
177
- unique_nodes.extend(destination_nodes.drain());
204
+ let collector = TopUniqueCollector::new(Selector::DestinationNodes, top_k);
205
+ let destination_nodes = searcher.search(&destination_query, &collector)?;
206
+ unique_nodes.merge(destination_nodes);
178
207
 
179
208
  let nodes = unique_nodes
209
+ .into_sorted_vec()
180
210
  .into_iter()
181
- .map(|encoded_node| decode_node(&encoded_node))
182
- .map(|(value, node_type, node_subtype)| RelationNode {
183
- value,
184
- ntype: io_maps::u64_to_node_type(node_type),
185
- subtype: node_subtype,
211
+ .map(|(encoded_node, _score)| {
212
+ let (value, node_type, node_subtype) = decode_node(&encoded_node);
213
+ RelationNode {
214
+ value,
215
+ ntype: io_maps::u64_to_node_type(node_type),
216
+ subtype: node_subtype,
217
+ }
186
218
  })
187
- .take(top_k)
188
219
  .collect();
189
220
 
190
221
  let response = nidx_protos::GraphSearchResponse {
@@ -207,12 +238,13 @@ impl RelationsReaderService {
207
238
 
208
239
  let searcher = self.reader.searcher();
209
240
 
210
- let collector = TopUniqueRelationCollector2::new(top_k);
211
- let matching_docs = searcher.search(&index_query, &collector)?;
241
+ let collector = TopUniqueCollector::new(Selector::Relations, top_k);
242
+ let top_relations = searcher.search(&index_query, &collector)?;
212
243
 
213
- let relations = matching_docs
244
+ let relations = top_relations
245
+ .into_sorted_vec()
214
246
  .into_iter()
215
- .map(|encoded_relation| {
247
+ .map(|(encoded_relation, _score)| {
216
248
  let (relation_type, relation_label) = decode_relation(&encoded_relation);
217
249
  nidx_protos::graph_search_response::Relation {
218
250
  relation_type: io_maps::u64_to_relation_type::<i32>(relation_type),
@@ -240,48 +272,6 @@ impl RelationsReaderService {
240
272
  }
241
273
  }
242
274
  }
243
-
244
- fn build_graph_response(
245
- &self,
246
- searcher: &Searcher,
247
- docs: impl Iterator<Item = DocAddress>,
248
- ) -> anyhow::Result<nidx_protos::GraphSearchResponse> {
249
- // We are being very naive and writing everything to the proto response. We could be smarter
250
- // and deduplicates nodes and relations. As paths are pointers, this would improve proto
251
- // size and ser/de time at expenses of deduplication effort.
252
-
253
- let mut nodes = Vec::new();
254
- let mut relations = Vec::new();
255
- let mut graph = Vec::new();
256
-
257
- for doc_address in docs {
258
- let doc = searcher.doc(doc_address)?;
259
-
260
- let source = io_maps::source_to_relation_node(&self.schema, &doc);
261
- let relation = io_maps::doc_to_graph_relation(&self.schema, &doc);
262
- let destination = io_maps::target_to_relation_node(&self.schema, &doc);
263
-
264
- let source_idx = nodes.len();
265
- nodes.push(source);
266
- let relation_idx = relations.len();
267
- relations.push(relation);
268
- let destination_idx = nodes.len();
269
- nodes.push(destination);
270
-
271
- graph.push(nidx_protos::graph_search_response::Path {
272
- source: source_idx as u32,
273
- relation: relation_idx as u32,
274
- destination: destination_idx as u32,
275
- })
276
- }
277
-
278
- let response = nidx_protos::GraphSearchResponse {
279
- nodes,
280
- relations,
281
- graph,
282
- };
283
- Ok(response)
284
- }
285
275
  }
286
276
 
287
277
  impl RelationsReaderService {
@@ -449,13 +439,13 @@ impl RelationsReaderService {
449
439
  let words: Vec<_> = query.split_whitespace().collect();
450
440
  for word in words {
451
441
  prefix_nodes_q.push(Node {
452
- value: Some(Term::Fuzzy(FuzzyTerm {
442
+ value: Some(Term::FuzzyWord(FuzzyTerm {
453
443
  value: word.to_string(),
454
444
  fuzzy_distance: FUZZY_DISTANCE,
455
445
  is_prefix: false,
456
446
  })),
457
447
  ..Default::default()
458
- })
448
+ });
459
449
  }
460
450
 
461
451
  // add fuzzy query for all prefixes
@@ -0,0 +1,164 @@
1
+ // Copyright (C) 2021 Bosutech XXI S.L.
2
+ //
3
+ // nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ // For commercial licensing, contact us at info@nuclia.com.
5
+ //
6
+ // AGPL:
7
+ // This program is free software: you can redistribute it and/or modify
8
+ // it under the terms of the GNU Affero General Public License as
9
+ // published by the Free Software Foundation, either version 3 of the
10
+ // License, or (at your option) any later version.
11
+ //
12
+ // This program is distributed in the hope that it will be useful,
13
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ // GNU Affero General Public License for more details.
16
+ //
17
+ // You should have received a copy of the GNU Affero General Public License
18
+ // along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ //
20
+
21
+ use std::collections::HashMap;
22
+
23
+ /// Top unique N
24
+ ///
25
+ /// Maintain the top set of unique keys with greatest scores.
26
+ pub struct TopUniqueN<K> {
27
+ elements: HashMap<K, f32>,
28
+ top_n: usize,
29
+ threshold: f32,
30
+ }
31
+
32
+ impl<K> TopUniqueN<K>
33
+ where
34
+ K: Eq + std::hash::Hash + std::fmt::Debug,
35
+ {
36
+ pub fn new(top_n: usize) -> Self {
37
+ Self {
38
+ top_n,
39
+ elements: HashMap::with_capacity(2 * top_n),
40
+ threshold: f32::NEG_INFINITY,
41
+ }
42
+ }
43
+
44
+ pub fn insert(&mut self, key: K, score: f32) {
45
+ if score < self.threshold {
46
+ return;
47
+ }
48
+
49
+ if self.elements.len() == self.elements.capacity() {
50
+ let lowest_score = self.truncate_top_n();
51
+ self.threshold = lowest_score;
52
+ }
53
+
54
+ self.elements
55
+ .entry(key)
56
+ .and_modify(|s| {
57
+ if score > *s {
58
+ *s = score
59
+ }
60
+ })
61
+ .or_insert(score);
62
+ }
63
+
64
+ // Truncate the current set of element to N leaving only the top-scoring
65
+ // elements. Return the smallest score across the top.
66
+ fn truncate_top_n(&mut self) -> f32 {
67
+ let mut vec = Vec::from_iter(self.elements.drain());
68
+ vec.sort_unstable_by(|a, b| a.1.total_cmp(&b.1).reverse());
69
+ vec.truncate(self.top_n);
70
+ let lowest_score = vec.last().map(|(_, score)| *score).unwrap_or(f32::NEG_INFINITY);
71
+
72
+ self.elements.extend(vec);
73
+
74
+ lowest_score
75
+ }
76
+
77
+ pub fn into_sorted_vec(self) -> Vec<(K, f32)> {
78
+ let mut vec = Vec::from_iter(self.elements);
79
+ vec.sort_by(|a, b| a.1.total_cmp(&b.1).reverse());
80
+ vec.truncate(self.top_n);
81
+ vec
82
+ }
83
+
84
+ pub fn merge(&mut self, other: Self) {
85
+ for (key, score) in other.elements.into_iter() {
86
+ self.insert(key, score);
87
+ }
88
+ }
89
+ }
90
+
91
+ #[cfg(test)]
92
+ mod tests {
93
+ use super::*;
94
+
95
+ #[test]
96
+ fn test_top_n() {
97
+ let mut top = TopUniqueN::new(2);
98
+ top.insert("A", 1.0);
99
+ top.insert("B", 3.0);
100
+ top.insert("C", 2.0);
101
+ top.insert("D", 4.0);
102
+ top.insert("E", -1.0);
103
+
104
+ let r = top.into_sorted_vec();
105
+ assert_eq!(r.len(), 2);
106
+ let r: HashMap<_, _> = HashMap::from_iter(r);
107
+ let expected = HashMap::from_iter([("B", 3.0), ("D", 4.0)]);
108
+ assert_eq!(r, expected);
109
+ }
110
+
111
+ /// Validate inserting more than it's capacity, values are truncated to N.
112
+ #[test]
113
+ fn test_internal_truncate() {
114
+ const N: usize = 2;
115
+ let mut top = TopUniqueN::new(N);
116
+
117
+ // capacity is at least 2 * N + 1, but in reality, it's usually more
118
+ let actual_capacity = top.elements.capacity();
119
+ assert!(actual_capacity >= 2 * N);
120
+
121
+ let mut key_id = 0;
122
+ let mut key_generator = std::iter::repeat_with(|| {
123
+ let key = key_id.to_string();
124
+ key_id += 1;
125
+ key
126
+ });
127
+
128
+ while top.elements.len() < top.elements.capacity() {
129
+ let key = key_generator.next().unwrap();
130
+ top.insert(key, 1.0);
131
+ }
132
+ assert_eq!(top.elements.len(), top.elements.capacity());
133
+ assert!(top.threshold < 0.0);
134
+
135
+ // this insert would overflow the capacity, but it truncates the internal values and don't
136
+ // increase it
137
+ top.insert("A".to_string(), 1.0);
138
+ assert_eq!(top.elements.capacity(), actual_capacity);
139
+ assert_eq!(top.elements.len(), N + 1);
140
+ assert_eq!(top.threshold, 1.0);
141
+ }
142
+
143
+ #[test]
144
+ fn test_merge() {
145
+ let mut top_a = TopUniqueN::new(4);
146
+ top_a.insert("A1", 1.0);
147
+ top_a.insert("A2", 3.0);
148
+
149
+ let mut top_b = TopUniqueN::new(3);
150
+ top_b.insert("B1", 1.0);
151
+ top_b.insert("B2", 3.0);
152
+ top_b.insert("B3", 4.0);
153
+ top_b.insert("B4", 2.0);
154
+
155
+ top_a.merge(top_b);
156
+
157
+ let r = top_a.into_sorted_vec();
158
+ assert_eq!(r.len(), 4);
159
+
160
+ let r: HashMap<_, _> = HashMap::from_iter(r);
161
+ let expected = HashMap::from_iter([("A2", 3.0), ("B2", 3.0), ("B3", 4.0), ("B4", 2.0)]);
162
+ assert_eq!(r, expected);
163
+ }
164
+ }