nidx-binding 6.7.0.post467__tar.gz → 6.7.0.post483__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/Cargo.lock +1 -0
  2. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/PKG-INFO +2 -2
  3. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/Cargo.toml +1 -0
  4. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/src/fuzzy_query.rs +11 -1
  5. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/src/lib.rs +1 -1
  6. nidx_binding-6.7.0.post483/nidx_paragraph/src/query_parser/fuzzy_parser.rs +192 -0
  7. nidx_binding-6.7.0.post483/nidx_paragraph/src/query_parser/keyword_parser.rs +212 -0
  8. {nidx_binding-6.7.0.post467/nidx_paragraph/src → nidx_binding-6.7.0.post483/nidx_paragraph/src/query_parser}/stop_words.rs +107 -0
  9. nidx_binding-6.7.0.post483/nidx_paragraph/src/query_parser/tokenizer.rs +454 -0
  10. nidx_binding-6.7.0.post483/nidx_paragraph/src/query_parser.rs +156 -0
  11. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/src/reader.rs +80 -88
  12. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/src/request_types.rs +13 -0
  13. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/src/schema.rs +1 -0
  14. nidx_binding-6.7.0.post483/nidx_paragraph/src/search_query.rs +236 -0
  15. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/src/search_response.rs +20 -9
  16. nidx_binding-6.7.0.post483/nidx_paragraph/tests/reader.rs +507 -0
  17. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_protos/pyproject.toml +1 -1
  18. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/pyproject.toml +1 -1
  19. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/test_suggest.rs +5 -3
  20. nidx_binding-6.7.0.post467/nidx_paragraph/src/search_query.rs +0 -489
  21. nidx_binding-6.7.0.post467/nidx_paragraph/tests/reader.rs +0 -495
  22. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.config/nextest.toml +0 -0
  23. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-0cfce9b29547f8f5bafa6e440f86103be7b8c4ad2fd92db9ac223f4efbe23d10.json +0 -0
  24. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-1a561eed00f3dbe868bf5030059793300209179dc8fb73e4b57a54b5e81262fe.json +0 -0
  25. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-1d3fca2682e25a01143da92285297f134a6a105a96f64d87e0db3abb219855e4.json +0 -0
  26. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-249b3b57c27a71baa823f1fe0f0bba9c9af36f61c28f731e58beea60ec48e687.json +0 -0
  27. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-24cb6b683daa42d7125f862e25943ab4be7bf275cd8739f8da4859d701795e1a.json +0 -0
  28. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-263c8fce6db5b03bbd012fafdba6943cbee6ed7eb8976cdef4f5b01dde7ca6fd.json +0 -0
  29. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-2a5d92fb1638df830a4477a7cdf24e6db6b43034b7bbe74fdfb63e8afe2c4071.json +0 -0
  30. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-2b065a363f58caed60e3706603c1260dbf5a4c795604a5b68edda22eb07fec1b.json +0 -0
  31. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-3fc3cb39934683de8cd475ce1368c8373453eb1e01f81587d66b9d14b109ce6e.json +0 -0
  32. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-48f33b77b7c1633467b0b2efcaa1d3c207e7757e4f1d83b40d15e6ca365f7771.json +0 -0
  33. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-4ae09f2c08e2f324bee01bb8487a8f37678a1c5e9d327339235c50d4921a8949.json +0 -0
  34. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-4d7a76fa413c9ef0ce2a47ac7bb7e01d3e6a2aabded9487d21010a53efee8852.json +0 -0
  35. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-4fcbdd6657c7dc9b60b3a563dd41711b3dbcf72ce063427b7a01f8cddf34c244.json +0 -0
  36. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-577109ac00ccfbd38ecaccab94116f2f46a4caf5612afa372cded197123c1e08.json +0 -0
  37. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-5db25f97d8578d6d78f2f6bd4b72cc82a9b1b82805c6422d967ac63b20d99db4.json +0 -0
  38. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-5ec3233a3a23e926055056d46bdde17836a633066dbb5f349502648cd3ea9a60.json +0 -0
  39. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-66edb6ea424d8681927dcddb6bac5f1239175f4775d1f40417ba15054b0c6f19.json +0 -0
  40. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-6f9c6d201c1b5712efb68c363bffd3e0169c11f2a8f925e8cd4e8808599ff7b4.json +0 -0
  41. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-733c3ebacc86f444bf5e2dd79ade660c291e88a00fc09b722f6e2e191545874c.json +0 -0
  42. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-7a3bf27c330c468a596e8a297cf7d8b192e31e67ecc5177c1267f579e8e247c7.json +0 -0
  43. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-7a7e59e47b30b12237511fd3d7da2d17b0471ad2b006af48d6a6f587c779692b.json +0 -0
  44. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-7dcbb33312cc9f11ae3a6d73b1ace017a9f19a8bf8f10304fc57977c8efeadff.json +0 -0
  45. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-7efa7c0d747afc4b6aed0586ff846c27839c3213ff7ee9f30c89b0d0f17e60e3.json +0 -0
  46. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-8493140d788604d498a4e48da4158708572ccc9d60185290a00d549cc84533db.json +0 -0
  47. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-8493bb0059b013eaca42fd10cd7d04f0d06a8acaed379eff0d23f3229edde9ee.json +0 -0
  48. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-87996b3d6c7a2195438d7038015b06949102bce8c7b8cd8db1f83aaf23cbe489.json +0 -0
  49. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-8d33717587c6ee8f5fc339a80b1212a73d6c03e45856b1d55457fc8074709dd0.json +0 -0
  50. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-8f096d8171b89f9615d18f95d696dc9e4fb3674e103161a713cdc806f7a68506.json +0 -0
  51. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-917732a56ee04bf3a6e127319dda8225210869c82f9828d878162394dba4e078.json +0 -0
  52. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-95fe4ef93ee90733db1b67ed7987f80b5aac792f1590b979c68b418d1599eb98.json +0 -0
  53. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-9b67658569b343d8b4b61ae0a7dc721f367f2ba33c7b69b9e68bfd5c9bff5206.json +0 -0
  54. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-9c8062ea55d070afef68309e58fa987eb37fda44e1efbf68c8ba2af7846cc968.json +0 -0
  55. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-a06e1d9f6f95e4c4c2b98310ebddcc9d963cc033582bf2e945e8bf3a301b4247.json +0 -0
  56. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-a55265c9b07bd1399961a6f1e757201fd0eebe868ddaf96437111113d80fce92.json +0 -0
  57. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-a60ec2f66f1e7b84189e5b089f2087a29ff6a64326a3743dea935bbc58ee77fa.json +0 -0
  58. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-a891a37be5c2d7cce775c2dd33726b0318fd3839beab222a1b22bc6174604207.json +0 -0
  59. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-a945191bb4b3e37d6823ed3ad499339d007d69983105de8567777d9daf517b28.json +0 -0
  60. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-abe9f7832f2bd799ac44008da031e8d8ab52d4f5fbfc2a7e3974e8873bae55b2.json +0 -0
  61. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-aca588cca57a85e4d7fcc40c23cd87e57d53d11ca550d78e7e3d5e39e524fcd3.json +0 -0
  62. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-b02f8aafc00a7724510772ac41269e368c5bccf03ef7b4590e0ef6fd1a1bf64f.json +0 -0
  63. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-b742e17cabe2d64617e9aa64bafc782172f7a4f8023d1b54f952a0fb39f6b2b8.json +0 -0
  64. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-b94e349dbc0daec57f8f8f6e9e2dffb06100b1bb2b41d297c9f3b191da37a83d.json +0 -0
  65. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-bd9afa22994aba671dbf7b5f89b53c2ee02f53c0442a81265786a6d52d08512f.json +0 -0
  66. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-be60554eca98a5899efc6b49785cecd6444a6d39afed9e4a884ce2dbf162012c.json +0 -0
  67. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-bf49702b506c9a1650ece1f8e8d9f14834a902f8caefafe30ded55e2790f2188.json +0 -0
  68. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-bfcd21ed704cd305db5c17fcdec7d92aa4ac501913c9c9514d8ff92928c0c7e7.json +0 -0
  69. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-c3ab694650f49a75b146fb877a92e48c4f20f0d99f70f8ec859fbb763b01a1e5.json +0 -0
  70. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-c55542bb9fae544d87fae6f30e0fe8a9088d12075f4442ab4fe2fcd05e472234.json +0 -0
  71. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-cb29a6556d35ac630ee0aa885dd7341cf9573bd3efd216ff8a887b87686b03db.json +0 -0
  72. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-d0a1f341a89f5f14696b10baa72db9d95551c2b7e5fc67308fd52dc03dd98a92.json +0 -0
  73. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-d2ad0a0ca2649c9e4873cfcc1fc66d2d07cc45d0f65c560b06d7b5f592f4fa8a.json +0 -0
  74. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-d6cfe78eb635ba0b89ca4021a4dc8182d18ab5b197f30149cd28488eba4c1df5.json +0 -0
  75. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-d729b56dea00e49dcdba8cf0001e2811da27351eabe98212db3b589f18fc6f32.json +0 -0
  76. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-d9658bfd4e7170b41d03f2ddf2446d0bf54171c0d39d53bf20af2b8437f2ec48.json +0 -0
  77. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-dbba7b3d3289425bae711aedbf73fbc3699f857f86f84d95c3b556d05c5658b0.json +0 -0
  78. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-dcb96b649d6d63a58efd5d445453a4f3d7869a56ff714b69bedf3d616a0473ca.json +0 -0
  79. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-ebd876fbf5362a5900e75bc05f2f11c73c406ef7da4e95097fc6a1c3d1b8bc54.json +0 -0
  80. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-eef5cc6bce1cc14eba8f3e68971724ef181e88cffcedd74673615f2026b89a62.json +0 -0
  81. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/.sqlx/query-ef56d5fefc5774040d1ee397beadb475f6af02768c22f0e583c74062e2e821ce.json +0 -0
  82. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/Cargo.toml +0 -0
  83. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/README.md +0 -0
  84. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/migrations/20241007163501_initial.sql +0 -0
  85. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/migrations/20241211120039_merge_job_priority.sql +0 -0
  86. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/migrations/20241211121159_basic_indexes.sql +0 -0
  87. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/migrations/20241212151105_check_segment_records.sql +0 -0
  88. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/migrations/20250110145554_in_flight_messages.sql +0 -0
  89. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_binding/Cargo.toml +0 -0
  90. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_binding/src/lib.rs +0 -0
  91. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/src/query_io.rs +0 -0
  92. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/src/resource_indexer.rs +0 -0
  93. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/src/set_query.rs +0 -0
  94. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/README.md +0 -0
  95. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/ar.json +0 -0
  96. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/az.json +0 -0
  97. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/bn.json +0 -0
  98. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/ca.json +0 -0
  99. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/ch.json +0 -0
  100. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/da.json +0 -0
  101. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/de.json +0 -0
  102. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/el.json +0 -0
  103. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/en.json +0 -0
  104. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/es.json +0 -0
  105. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/eu.json +0 -0
  106. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/extract.py +0 -0
  107. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/fi.json +0 -0
  108. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/fr.json +0 -0
  109. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/he.json +0 -0
  110. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/hu.json +0 -0
  111. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/id.json +0 -0
  112. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/it.json +0 -0
  113. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/kk.json +0 -0
  114. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/ne.json +0 -0
  115. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/nl.json +0 -0
  116. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/no.json +0 -0
  117. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/pt.json +0 -0
  118. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/ro.json +0 -0
  119. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/ru.json +0 -0
  120. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/sl.json +0 -0
  121. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/sv.json +0 -0
  122. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/tg.json +0 -0
  123. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/stop_words/tr.json +0 -0
  124. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_paragraph/tests/common/mod.rs +0 -0
  125. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_protos/Cargo.toml +0 -0
  126. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_protos/build.py +0 -0
  127. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_protos/build.rs +0 -0
  128. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_protos/nidx.proto +0 -0
  129. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_protos/nodereader.proto +0 -0
  130. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_protos/noderesources.proto +0 -0
  131. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_protos/nodewriter.proto +0 -0
  132. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_protos/src/lib.rs +0 -0
  133. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/Cargo.toml +0 -0
  134. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/src/graph_collector.rs +0 -0
  135. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/src/graph_query_parser.rs +0 -0
  136. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/src/io_maps.rs +0 -0
  137. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/src/lib.rs +0 -0
  138. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/src/reader.rs +0 -0
  139. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/src/resource_indexer.rs +0 -0
  140. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/src/schema.rs +0 -0
  141. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/src/top_unique_n.rs +0 -0
  142. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/tests/common/mod.rs +0 -0
  143. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/tests/test_graph_query_parser_search.rs +0 -0
  144. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/tests/test_graph_search.rs +0 -0
  145. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_relation/tests/test_writer.rs +0 -0
  146. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_tantivy/Cargo.toml +0 -0
  147. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_tantivy/src/index_reader.rs +0 -0
  148. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_tantivy/src/lib.rs +0 -0
  149. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_tantivy/src/utils.rs +0 -0
  150. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_tests/Cargo.toml +0 -0
  151. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_tests/src/graph.rs +0 -0
  152. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_tests/src/lib.rs +0 -0
  153. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/Cargo.toml +0 -0
  154. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/src/lib.rs +0 -0
  155. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/src/prefilter.rs +0 -0
  156. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/src/query_io.rs +0 -0
  157. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/src/reader.rs +0 -0
  158. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/src/request_types.rs +0 -0
  159. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/src/resource_indexer.rs +0 -0
  160. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/src/schema.rs +0 -0
  161. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/src/search_query.rs +0 -0
  162. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/tests/common/mod.rs +0 -0
  163. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/tests/test_deletions.rs +0 -0
  164. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/tests/test_flow.rs +0 -0
  165. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/tests/test_search.rs +0 -0
  166. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_text/tests/test_streaming.rs +0 -0
  167. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_types/Cargo.toml +0 -0
  168. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_types/src/lib.rs +0 -0
  169. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_types/src/prefilter.rs +0 -0
  170. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_types/src/query_language.rs +0 -0
  171. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/Cargo.toml +0 -0
  172. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/config.rs +0 -0
  173. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/data_store/v1/node.rs +0 -0
  174. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/data_store/v1/store.rs +0 -0
  175. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/data_store/v1/trie.rs +0 -0
  176. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/data_store/v1/trie_ram.rs +0 -0
  177. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/data_store/v1.rs +0 -0
  178. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/data_store/v2/paragraph_store.rs +0 -0
  179. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/data_store/v2/vector_store.rs +0 -0
  180. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/data_store/v2.rs +0 -0
  181. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/data_store.rs +0 -0
  182. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/data_types.rs +0 -0
  183. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/formula.rs +0 -0
  184. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/hnsw/disk_hnsw.rs +0 -0
  185. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/hnsw/ops_hnsw.rs +0 -0
  186. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/hnsw/params.rs +0 -0
  187. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/hnsw/ram_hnsw.rs +0 -0
  188. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/hnsw.rs +0 -0
  189. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/indexer.rs +0 -0
  190. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/inverted_index/fst_index.rs +0 -0
  191. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/inverted_index/map.rs +0 -0
  192. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/inverted_index.rs +0 -0
  193. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/lib.rs +0 -0
  194. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/multivector.rs +0 -0
  195. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/query_io.rs +0 -0
  196. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/request_types.rs +0 -0
  197. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/searcher.rs +0 -0
  198. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/segment/tests.rs +0 -0
  199. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/segment.rs +0 -0
  200. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/utils.rs +0 -0
  201. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/vector_types/dense_f32.rs +0 -0
  202. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/src/vector_types/mod.rs +0 -0
  203. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/tests/common/mod.rs +0 -0
  204. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/tests/test_basic_search.rs +0 -0
  205. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/tests/test_hidden.rs +0 -0
  206. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/nidx_vector/tests/test_maxsim.rs +0 -0
  207. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/api/grpc.rs +0 -0
  208. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/api/shards.rs +0 -0
  209. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/api.rs +0 -0
  210. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/control.rs +0 -0
  211. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/errors.rs +0 -0
  212. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/grpc_server.rs +0 -0
  213. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/import_export.rs +0 -0
  214. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/indexer.rs +0 -0
  215. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/lib.rs +0 -0
  216. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/main.rs +0 -0
  217. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/metadata/deletion.rs +0 -0
  218. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/metadata/index.rs +0 -0
  219. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/metadata/index_request.rs +0 -0
  220. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/metadata/merge_job.rs +0 -0
  221. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/metadata/segment.rs +0 -0
  222. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/metadata/shard.rs +0 -0
  223. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/metadata.rs +0 -0
  224. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/metrics.rs +0 -0
  225. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/scheduler/audit_task.rs +0 -0
  226. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/scheduler/log_merge.rs +0 -0
  227. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/scheduler/merge_task.rs +0 -0
  228. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/scheduler/metrics_task.rs +0 -0
  229. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/scheduler/purge_tasks.rs +0 -0
  230. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/scheduler/vector_merge.rs +0 -0
  231. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/scheduler.rs +0 -0
  232. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/searcher/grpc.rs +0 -0
  233. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/searcher/index_cache.rs +0 -0
  234. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/searcher/query_language.rs +0 -0
  235. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/searcher/query_planner.rs +0 -0
  236. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/searcher/shard_search.rs +0 -0
  237. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/searcher/shard_selector.rs +0 -0
  238. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/searcher/shard_suggest.rs +0 -0
  239. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/searcher/streams.rs +0 -0
  240. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/searcher/sync.rs +0 -0
  241. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/searcher.rs +0 -0
  242. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/segment_store.rs +0 -0
  243. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/settings.rs +0 -0
  244. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/telemetry/duration_layer.rs +0 -0
  245. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/telemetry/log_format.rs +0 -0
  246. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/telemetry/middleware.rs +0 -0
  247. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/telemetry.rs +0 -0
  248. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/tool.rs +0 -0
  249. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/utilization_tracker.rs +0 -0
  250. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/src/worker.rs +0 -0
  251. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/common/mod.rs +0 -0
  252. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/common/services.rs +0 -0
  253. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/test_date_range_search.rs +0 -0
  254. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/test_search_filtering.rs +0 -0
  255. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/test_search_relations.rs +0 -0
  256. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/test_search_sorting.rs +0 -0
  257. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/test_searcher_cluster.rs +0 -0
  258. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/test_security_search.rs +0 -0
  259. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/test_shards.rs +0 -0
  260. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/test_shards_api.rs +0 -0
  261. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/test_synced_searcher.rs +0 -0
  262. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/test_vector_normalization.rs +0 -0
  263. {nidx_binding-6.7.0.post467 → nidx_binding-6.7.0.post483}/tests/test_vectorsets.rs +0 -0
@@ -2193,6 +2193,7 @@ dependencies = [
2193
2193
  "nidx_protos",
2194
2194
  "nidx_tantivy",
2195
2195
  "nidx_types",
2196
+ "nom",
2196
2197
  "once_cell",
2197
2198
  "regex",
2198
2199
  "serde_json",
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nidx_binding
3
- Version: 6.7.0.post467
3
+ Version: 6.7.0.post483
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
7
7
  Summary: Bindings for nidx (part of nucliadb)
8
8
  Author-email: Nuclia <nucliadb@nuclia.com>
9
- License: AGPL-3.0-or-later
9
+ License-Expression: AGPL-3.0-or-later
10
10
  Requires-Python: >=3.9, <4
11
11
  Project-URL: Homepage, https://nuclia.com
12
12
  Project-URL: Repository, https://github.com/nuclia/nucliadb
@@ -12,6 +12,7 @@ levenshtein_automata = "0.2.1"
12
12
  nidx_protos = { version = "0.1.0", path = "../nidx_protos" }
13
13
  nidx_tantivy = { version = "0.1.0", path = "../nidx_tantivy" }
14
14
  nidx_types = { version = "0.1.0", path = "../nidx_types" }
15
+ nom = "7" # same version as used by tantivy
15
16
  once_cell = "1.20.2"
16
17
  regex = "1.11.1"
17
18
  serde_json = "1.0.132"
@@ -184,7 +184,12 @@ pub struct FuzzyTermQuery {
184
184
 
185
185
  impl std::fmt::Debug for FuzzyTermQuery {
186
186
  fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
187
- f.write_str("Fuzzy")
187
+ f.write_fmt(format_args!(
188
+ "Fuzzy {{ term: {}, distance: {}, prefix: {} }}",
189
+ self.term.value().as_str().unwrap_or("<unknown>"),
190
+ self.distance,
191
+ self.prefix
192
+ ))
188
193
  }
189
194
  }
190
195
  impl FuzzyTermQuery {
@@ -210,6 +215,11 @@ impl FuzzyTermQuery {
210
215
  }
211
216
  }
212
217
 
218
+ #[cfg(test)]
219
+ pub fn is_prefix(&self) -> bool {
220
+ self.prefix
221
+ }
222
+
213
223
  fn specialized_weight(&self) -> tantivy::Result<AutomatonWeight<DfaWrapper>> {
214
224
  // LEV_BUILDER is a HashMap, whose `get` method returns an Option
215
225
  match LEV_BUILDER.get(&(self.distance, self.transposition_cost_one)) {
@@ -20,6 +20,7 @@
20
20
 
21
21
  mod fuzzy_query;
22
22
  mod query_io;
23
+ mod query_parser;
23
24
  mod reader;
24
25
  mod request_types;
25
26
  mod resource_indexer;
@@ -27,7 +28,6 @@ mod schema;
27
28
  mod search_query;
28
29
  mod search_response;
29
30
  mod set_query;
30
- mod stop_words;
31
31
 
32
32
  use nidx_protos::{ParagraphItem, ParagraphSearchResponse, StreamRequest};
33
33
  use nidx_tantivy::{
@@ -0,0 +1,192 @@
1
+ // Copyright (C) 2021 Bosutech XXI S.L.
2
+ //
3
+ // nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ // For commercial licensing, contact us at info@nuclia.com.
5
+ //
6
+ // AGPL:
7
+ // This program is free software: you can redistribute it and/or modify
8
+ // it under the terms of the GNU Affero General Public License as
9
+ // published by the Free Software Foundation, either version 3 of the
10
+ // License, or (at your option) any later version.
11
+ //
12
+ // This program is distributed in the hope that it will be useful,
13
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ // GNU Affero General Public License for more details.
16
+ //
17
+ // You should have received a copy of the GNU Affero General Public License
18
+ // along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ //
20
+
21
+ use tantivy::Term;
22
+ use tantivy::query::AllQuery;
23
+ use tantivy::query::BooleanQuery;
24
+ use tantivy::query::Occur;
25
+ use tantivy::query::Query;
26
+ use tantivy::query::TermQuery;
27
+ use tantivy::schema::IndexRecordOption;
28
+
29
+ use crate::fuzzy_query::FuzzyTermQuery;
30
+ use crate::schema::ParagraphSchema;
31
+ use crate::search_query::SharedTermC;
32
+
33
+ use super::keyword_parser;
34
+ use super::tokenizer::Token;
35
+
36
+ /// Minimum length required to be considered a fuzzy word. Words with smaller
37
+ /// length than this are considered too short to be fuzzy. This is done to avoid
38
+ /// too much noise from short terms.
39
+ const MIN_FUZZY_LEN: usize = 3;
40
+
41
+ /// Minimum length required to be considered a fuzzy prefix. This is again don
42
+ /// eto avoid too much noise from short term.
43
+ const MIN_FUZZY_PREFIX_LEN: usize = 4;
44
+
45
+ /// Levenshtein distance used by all fuzzy terms.
46
+ pub const FUZZY_DISTANCE: u8 = 1;
47
+
48
+ /// Convert a tokenized query into a tantivy fuzzy query
49
+ ///
50
+ /// A fuzzy query will match similarly to a keyword query but some terms will be
51
+ /// searched with certain Levenshtein distance. Quoted and excluded terms will
52
+ /// remain the same, but literals will be elegible to be fuzzy. All long enough
53
+ /// literals will be searched as fuzzy. The last literal can be searched as a
54
+ /// fuzzy prefix if a suggest-like feature is desired.
55
+ ///
56
+ pub fn parse_fuzzy_query(query: &[Token], term_collector: SharedTermC, last_literal_as_prefix: bool) -> Box<dyn Query> {
57
+ let last_literal_index = if last_literal_as_prefix {
58
+ query.iter().rposition(|token| matches!(token, Token::Literal(_)))
59
+ } else {
60
+ None
61
+ };
62
+
63
+ let mut subqueries = vec![];
64
+ let schema = ParagraphSchema::new();
65
+
66
+ for (i, item) in query.iter().enumerate() {
67
+ match item {
68
+ Token::Literal(literal) => {
69
+ let term = Term::from_field_text(schema.text, literal);
70
+ let distance = FUZZY_DISTANCE;
71
+ let transposition_cost_one = true;
72
+
73
+ let q: Box<dyn Query>;
74
+ if literal.len() < MIN_FUZZY_LEN {
75
+ // to avoid noise, we don't want to match too short terms as fuzzy
76
+ q = Box::new(TermQuery::new(term, IndexRecordOption::Basic));
77
+ } else if matches!(last_literal_index, Some(idx) if idx == i) && literal.len() >= MIN_FUZZY_PREFIX_LEN {
78
+ q = Box::new(FuzzyTermQuery::new_prefix(
79
+ term,
80
+ distance,
81
+ transposition_cost_one,
82
+ term_collector.clone(),
83
+ ));
84
+ } else {
85
+ q = Box::new(FuzzyTermQuery::new(
86
+ term,
87
+ distance,
88
+ transposition_cost_one,
89
+ term_collector.clone(),
90
+ ));
91
+ }
92
+ subqueries.push((Occur::Should, q));
93
+ }
94
+ Token::Quoted(quoted) => {
95
+ let q = keyword_parser::parse_quoted(&schema, quoted);
96
+ subqueries.push((Occur::Should, q));
97
+ }
98
+ Token::Excluded(excluded) => {
99
+ let q = keyword_parser::parse_excluded(&schema, excluded);
100
+ subqueries.push((Occur::Should, q));
101
+ }
102
+ }
103
+ }
104
+
105
+ if subqueries.is_empty() {
106
+ Box::new(AllQuery)
107
+ } else if subqueries.len() == 1 {
108
+ subqueries.pop().unwrap().1
109
+ } else {
110
+ Box::new(BooleanQuery::new(subqueries))
111
+ }
112
+ }
113
+
114
+ #[cfg(test)]
115
+ mod tests {
116
+ use crate::search_query::TermCollector;
117
+
118
+ use super::*;
119
+
120
+ #[test]
121
+ fn test_short_literals_do_not_fuzzy() {
122
+ let term_collector = SharedTermC::from(TermCollector::new());
123
+
124
+ // literal shorter than MIN_FUZZY_LEN will become a TermQuery
125
+ let literal = "ab";
126
+ assert!(literal.len() < MIN_FUZZY_LEN);
127
+ let query = [Token::Literal(literal.into())];
128
+ let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), false);
129
+ assert!(fuzzy.is::<TermQuery>());
130
+ let q = fuzzy.downcast::<TermQuery>().unwrap();
131
+ assert_eq!(q.term().value().as_str(), Some(literal));
132
+ }
133
+
134
+ #[test]
135
+ fn test_fuzzy_literals() {
136
+ let term_collector = SharedTermC::from(TermCollector::new());
137
+
138
+ let literal = "abcd";
139
+ assert!(literal.len() >= MIN_FUZZY_LEN);
140
+ let query = [Token::Literal(literal.into())];
141
+ let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), false);
142
+ assert!(fuzzy.is::<FuzzyTermQuery>());
143
+ assert!(!fuzzy.downcast::<FuzzyTermQuery>().unwrap().is_prefix());
144
+ }
145
+
146
+ #[test]
147
+ fn test_fuzzy_prefix() {
148
+ let term_collector = SharedTermC::from(TermCollector::new());
149
+
150
+ // literals longer than the min fuzzy prefix become prefix if they are
151
+ // last and the flag is enabled
152
+
153
+ let literal = "abcd";
154
+ assert!(literal.len() >= MIN_FUZZY_PREFIX_LEN);
155
+ let query = [Token::Literal(literal.into())];
156
+
157
+ let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), false);
158
+ assert!(fuzzy.is::<FuzzyTermQuery>());
159
+ assert!(!fuzzy.downcast::<FuzzyTermQuery>().unwrap().is_prefix());
160
+
161
+ let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), true);
162
+ assert!(fuzzy.is::<FuzzyTermQuery>());
163
+ assert!(fuzzy.downcast::<FuzzyTermQuery>().unwrap().is_prefix());
164
+
165
+ // only the last term is fuzzy prefix
166
+
167
+ let query = [Token::Literal(literal.into()), Token::Literal(literal.into())];
168
+ let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), true);
169
+ assert!(fuzzy.is::<BooleanQuery>());
170
+ let q = fuzzy.downcast::<BooleanQuery>().unwrap();
171
+ let clauses = q.clauses();
172
+ assert_eq!(clauses.len(), 2);
173
+ assert!(clauses[0].1.is::<FuzzyTermQuery>());
174
+ assert!(!clauses[0].1.downcast_ref::<FuzzyTermQuery>().unwrap().is_prefix());
175
+ assert!(clauses[1].1.is::<FuzzyTermQuery>());
176
+ assert!(clauses[1].1.downcast_ref::<FuzzyTermQuery>().unwrap().is_prefix());
177
+
178
+ // however, shorter terms won't become prefix
179
+
180
+ let literal = "abc";
181
+ assert!(literal.len() < MIN_FUZZY_PREFIX_LEN);
182
+ let query = [Token::Literal(literal.into())];
183
+
184
+ let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), false);
185
+ assert!(fuzzy.is::<FuzzyTermQuery>());
186
+ assert!(!fuzzy.downcast::<FuzzyTermQuery>().unwrap().is_prefix());
187
+
188
+ let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), true);
189
+ assert!(fuzzy.is::<FuzzyTermQuery>());
190
+ assert!(!fuzzy.downcast::<FuzzyTermQuery>().unwrap().is_prefix());
191
+ }
192
+ }
@@ -0,0 +1,212 @@
1
+ // Copyright (C) 2021 Bosutech XXI S.L.
2
+ //
3
+ // nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ // For commercial licensing, contact us at info@nuclia.com.
5
+ //
6
+ // AGPL:
7
+ // This program is free software: you can redistribute it and/or modify
8
+ // it under the terms of the GNU Affero General Public License as
9
+ // published by the Free Software Foundation, either version 3 of the
10
+ // License, or (at your option) any later version.
11
+ //
12
+ // This program is distributed in the hope that it will be useful,
13
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ // GNU Affero General Public License for more details.
16
+ //
17
+ // You should have received a copy of the GNU Affero General Public License
18
+ // along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ //
20
+ use tantivy::Term;
21
+ use tantivy::query::{AllQuery, BooleanQuery, EmptyQuery, Occur, PhraseQuery, Query, TermQuery};
22
+ use tantivy::schema::IndexRecordOption;
23
+ use tracing::error;
24
+
25
+ use crate::ParagraphSchema;
26
+
27
+ use super::tokenizer::Token;
28
+
29
+ /// Convert a tokenized query into a tantivy keyword query
30
+ ///
31
+ /// Empty queries will match everything.
32
+ pub fn parse_keyword_query<'a>(query: &'a [Token<'a>], schema: &ParagraphSchema) -> Box<dyn Query> {
33
+ let mut subqueries = vec![];
34
+ for item in query {
35
+ match item {
36
+ Token::Literal(literal) => {
37
+ let q = parse_literal(schema, literal);
38
+ subqueries.push((Occur::Should, q));
39
+ }
40
+ Token::Quoted(quoted) => {
41
+ let q = parse_quoted(schema, quoted);
42
+ subqueries.push((Occur::Should, q));
43
+ }
44
+ Token::Excluded(excluded) => {
45
+ let q = parse_excluded(schema, excluded);
46
+ subqueries.push((Occur::Should, q));
47
+ }
48
+ }
49
+ }
50
+
51
+ if subqueries.is_empty() {
52
+ Box::new(AllQuery)
53
+ } else if subqueries.len() == 1 {
54
+ subqueries.pop().unwrap().1
55
+ } else {
56
+ Box::new(BooleanQuery::new(subqueries))
57
+ }
58
+ }
59
+
60
+ #[inline]
61
+ pub fn parse_literal(schema: &ParagraphSchema, literal: &str) -> Box<dyn Query> {
62
+ Box::new(TermQuery::new(
63
+ Term::from_field_text(schema.text, literal),
64
+ IndexRecordOption::Basic,
65
+ ))
66
+ }
67
+
68
+ pub fn parse_quoted(schema: &ParagraphSchema, quoted: &str) -> Box<dyn Query> {
69
+ let mut terms: Vec<Term> = quoted
70
+ .split_whitespace()
71
+ .map(|word| Term::from_field_text(schema.text, word))
72
+ .collect();
73
+
74
+ #[allow(clippy::comparison_chain)]
75
+ if terms.len() == 1 {
76
+ // phrase queries must have more than one term, so we use a term query
77
+ let term = terms.remove(0); // safe because terms.len() == 1
78
+ Box::new(TermQuery::new(term, IndexRecordOption::Basic))
79
+ } else if terms.len() > 1 {
80
+ Box::new(PhraseQuery::new(terms))
81
+ } else {
82
+ debug_assert!(
83
+ false,
84
+ "Quoted content should have been validated to not only contain whitespaces"
85
+ );
86
+ // we return a fallback to protect us from tokenizer errors, but this branch should never
87
+ // happen
88
+ error!("Keyword tokenizer build a query with a only whitespaces Quoted token!");
89
+ Box::new(EmptyQuery)
90
+ }
91
+ }
92
+
93
+ #[inline]
94
+ pub fn parse_excluded(schema: &ParagraphSchema, excluded: &str) -> Box<dyn Query> {
95
+ Box::new(BooleanQuery::new(vec![
96
+ (Occur::Must, Box::new(AllQuery)),
97
+ (
98
+ Occur::MustNot,
99
+ Box::new(TermQuery::new(
100
+ Term::from_field_text(schema.text, excluded),
101
+ IndexRecordOption::Basic,
102
+ )),
103
+ ),
104
+ ]))
105
+ }
106
+
107
+ #[cfg(test)]
108
+ mod tests {
109
+ use super::*;
110
+
111
+ #[test]
112
+ fn test_empty_query_is_all_query() {
113
+ let schema = ParagraphSchema::new();
114
+ let query = parse_keyword_query(&[], &schema);
115
+ assert!(query.is::<AllQuery>());
116
+ }
117
+
118
+ #[test]
119
+ fn test_one_clause_simplification() {
120
+ let schema = ParagraphSchema::new();
121
+
122
+ let query = parse_keyword_query(&[Token::Literal("nucliadb".into())], &schema);
123
+ let term = extract_term_from(&query);
124
+ assert_eq!(*term, Term::from_field_text(schema.text, "nucliadb"));
125
+ }
126
+
127
+ #[test]
128
+ fn test_tantivy_query_conversion() {
129
+ let schema = ParagraphSchema::new();
130
+
131
+ // nucliadb -is a "RAG database" with "superpowers"
132
+ let query = vec![
133
+ Token::Literal("nucliadb".into()),
134
+ Token::Excluded("is".into()),
135
+ Token::Literal("a".into()),
136
+ Token::Quoted("RAG database".into()),
137
+ Token::Literal("with".into()),
138
+ Token::Quoted("superpowers".into()),
139
+ ];
140
+ let r = downcast_boolean_query(parse_keyword_query(&query, &schema));
141
+ let clauses = r.clauses();
142
+ assert_eq!(clauses.len(), 6);
143
+
144
+ // term: nucliadb
145
+ assert_eq!(clauses[0].0, Occur::Should);
146
+ let term = extract_term_from(&clauses[0].1);
147
+ assert_eq!(*term, Term::from_field_text(schema.text, "nucliadb"));
148
+
149
+ // excluded term: is
150
+ assert_eq!(clauses[1].0, Occur::Should);
151
+ let subquery = clauses[1]
152
+ .1
153
+ .downcast_ref::<BooleanQuery>()
154
+ .expect("BooleanQuery expected");
155
+ let subclauses = subquery.clauses();
156
+ assert_eq!(subclauses.len(), 2);
157
+ assert_eq!(subclauses[0].0, Occur::Must);
158
+ assert!(subclauses[0].1.downcast_ref::<AllQuery>().is_some());
159
+ assert_eq!(subclauses[1].0, Occur::MustNot);
160
+ let term = extract_term_from(&subclauses[1].1);
161
+ assert_eq!(*term, Term::from_field_text(schema.text, "is"));
162
+
163
+ // term: a
164
+ assert_eq!(clauses[2].0, Occur::Should);
165
+ let term = extract_term_from(&clauses[2].1);
166
+ assert_eq!(*term, Term::from_field_text(schema.text, "a"));
167
+
168
+ // exact: RAG database
169
+ assert_eq!(clauses[3].0, Occur::Should);
170
+ let terms = extract_phrase_terms_from(&clauses[3].1);
171
+ assert_eq!(
172
+ *terms,
173
+ vec![
174
+ Term::from_field_text(schema.text, "RAG"),
175
+ Term::from_field_text(schema.text, "database"),
176
+ ]
177
+ );
178
+
179
+ // term: with
180
+ assert_eq!(clauses[4].0, Occur::Should);
181
+ let term = extract_term_from(&clauses[4].1);
182
+ assert_eq!(*term, Term::from_field_text(schema.text, "with"));
183
+
184
+ // exact: superpowers.
185
+ // An exact match of 1 word is converted into a term query
186
+ assert_eq!(clauses[5].0, Occur::Should);
187
+ let term = extract_term_from(&clauses[5].1);
188
+ assert_eq!(*term, Term::from_field_text(schema.text, "superpowers"));
189
+ }
190
+
191
+ fn downcast_boolean_query(q: Box<dyn Query>) -> Box<BooleanQuery> {
192
+ let q = q.downcast::<BooleanQuery>();
193
+ assert!(q.is_ok(), "BooleanQuery expected");
194
+ q.unwrap()
195
+ }
196
+
197
+ #[allow(clippy::borrowed_box)]
198
+ fn extract_term_from(query: &Box<dyn Query>) -> &Term {
199
+ let q = query.downcast_ref::<TermQuery>();
200
+ assert!(q.is_some(), "TermQuery expected");
201
+ let q = q.unwrap();
202
+ q.term()
203
+ }
204
+
205
+ #[allow(clippy::borrowed_box)]
206
+ fn extract_phrase_terms_from(query: &Box<dyn Query>) -> Vec<Term> {
207
+ let q = query.downcast_ref::<PhraseQuery>();
208
+ assert!(q.is_some(), "PhraseQuery expected");
209
+ let q = q.unwrap();
210
+ q.phrase_terms()
211
+ }
212
+ }
@@ -22,6 +22,8 @@ use std::env;
22
22
 
23
23
  use lazy_static::lazy_static;
24
24
 
25
+ use super::Token;
26
+
25
27
  lazy_static! {
26
28
  static ref STOP_WORDS: StopWords = build_stop_words();
27
29
  }
@@ -90,6 +92,37 @@ pub fn is_stop_word(word: &str) -> bool {
90
92
  STOP_WORDS.is_stop_word(word)
91
93
  }
92
94
 
95
+ /// Consume a tokenized query and filter out any stop words from it.
96
+ ///
97
+ /// A stop words is any literal matching the following criteria:
98
+ /// - It is present in the given list of stop words
99
+ /// - Is **not** the last term in the query
100
+ ///
101
+ /// The last term of the query can be used as fuzzy prefix and must be preserved.
102
+ pub fn remove_stop_words(mut query: Vec<Token>) -> Vec<Token> {
103
+ if query.is_empty() {
104
+ return query;
105
+ }
106
+ // as we don't want to remove the last literal, we pop the last element and
107
+ // always put it in the filtered query. We don't really care if the last
108
+ // it's a literal or not.
109
+ let last: Token<'_> = query.pop().unwrap(); // safe as query is not empty
110
+
111
+ query
112
+ .into_iter()
113
+ .filter(|token| !is_stop_word_token(token))
114
+ .chain([last])
115
+ .collect()
116
+ }
117
+
118
+ fn is_stop_word_token(token: &Token) -> bool {
119
+ if let Token::Literal(lit) = token {
120
+ is_stop_word(&lit.to_lowercase())
121
+ } else {
122
+ false
123
+ }
124
+ }
125
+
93
126
  #[cfg(test)]
94
127
  mod tests {
95
128
 
@@ -122,4 +155,78 @@ mod tests {
122
155
  assert!(elapsed < 1000.0, "{}", elapsed);
123
156
  }
124
157
  }
158
+
159
+ #[test]
160
+ fn test_stop_word_tokens() {
161
+ assert!(is_stop_word_token(&Token::Literal("is".into())));
162
+ assert!(!is_stop_word_token(&Token::Excluded("is".into())));
163
+ assert!(!is_stop_word_token(&Token::Quoted("is".into())));
164
+
165
+ assert!(!is_stop_word_token(&Token::Literal("music".into())));
166
+ assert!(!is_stop_word_token(&Token::Excluded("music".into())));
167
+ assert!(!is_stop_word_token(&Token::Quoted("music".into())));
168
+ }
169
+
170
+ #[test]
171
+ fn test_stop_words_detection_is_case_insensitive() {
172
+ assert!(is_stop_word_token(&Token::Literal("is".into())));
173
+ assert!(is_stop_word_token(&Token::Literal("IS".into())));
174
+ }
175
+
176
+ #[test]
177
+ fn test_token_with_stop_word_detection() {
178
+ // Stop word in the middle is removed
179
+ let tokens = vec![
180
+ Token::Literal("music".into()),
181
+ Token::Literal("is".into()),
182
+ Token::Literal("classical".into()),
183
+ ];
184
+ let filtered = remove_stop_words(tokens);
185
+ assert_eq!(
186
+ filtered,
187
+ vec![Token::Literal("music".into()), Token::Literal("classical".into()),]
188
+ );
189
+
190
+ // Only literals are stop words
191
+ let tokens = vec![
192
+ Token::Literal("music".into()),
193
+ Token::Quoted("is".into()),
194
+ Token::Literal("classical".into()),
195
+ ];
196
+ let filtered = remove_stop_words(tokens);
197
+ assert_eq!(
198
+ filtered,
199
+ vec![
200
+ Token::Literal("music".into()),
201
+ Token::Quoted("is".into()),
202
+ Token::Literal("classical".into()),
203
+ ]
204
+ );
205
+
206
+ // Stop words at the end are not removed
207
+ let tokens = vec![
208
+ Token::Literal("classical".into()),
209
+ Token::Literal("music".into()),
210
+ Token::Literal("is".into()),
211
+ ];
212
+ let filtered = remove_stop_words(tokens);
213
+ assert_eq!(
214
+ filtered,
215
+ vec![
216
+ Token::Literal("classical".into()),
217
+ Token::Literal("music".into()),
218
+ Token::Literal("is".into()),
219
+ ]
220
+ );
221
+
222
+ // Everything is a stop word, but last is not removed
223
+ let tokens = vec![
224
+ Token::Literal("we".into()),
225
+ Token::Literal("shouldn't".into()),
226
+ Token::Literal("be".into()),
227
+ Token::Literal("here".into()),
228
+ ];
229
+ let filtered = remove_stop_words(tokens);
230
+ assert_eq!(filtered, vec![Token::Literal("here".into()),]);
231
+ }
125
232
  }