nidx-binding 6.7.1.post486__tar.gz → 6.7.1.post489__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/Cargo.lock +1 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/PKG-INFO +1 -1
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/Cargo.toml +1 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/src/fuzzy_query.rs +11 -1
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/src/lib.rs +1 -1
- nidx_binding-6.7.1.post489/nidx_paragraph/src/query_parser/fuzzy_parser.rs +192 -0
- nidx_binding-6.7.1.post489/nidx_paragraph/src/query_parser/keyword_parser.rs +212 -0
- {nidx_binding-6.7.1.post486/nidx_paragraph/src → nidx_binding-6.7.1.post489/nidx_paragraph/src/query_parser}/stop_words.rs +107 -0
- nidx_binding-6.7.1.post489/nidx_paragraph/src/query_parser/tokenizer.rs +454 -0
- nidx_binding-6.7.1.post489/nidx_paragraph/src/query_parser.rs +156 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/src/reader.rs +80 -88
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/src/request_types.rs +13 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/src/schema.rs +1 -0
- nidx_binding-6.7.1.post489/nidx_paragraph/src/search_query.rs +236 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/src/search_response.rs +20 -9
- nidx_binding-6.7.1.post489/nidx_paragraph/tests/reader.rs +507 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_protos/pyproject.toml +1 -1
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/pyproject.toml +1 -1
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/test_suggest.rs +5 -3
- nidx_binding-6.7.1.post486/nidx_paragraph/src/search_query.rs +0 -489
- nidx_binding-6.7.1.post486/nidx_paragraph/tests/reader.rs +0 -495
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.config/nextest.toml +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-0cfce9b29547f8f5bafa6e440f86103be7b8c4ad2fd92db9ac223f4efbe23d10.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-1a561eed00f3dbe868bf5030059793300209179dc8fb73e4b57a54b5e81262fe.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-1d3fca2682e25a01143da92285297f134a6a105a96f64d87e0db3abb219855e4.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-249b3b57c27a71baa823f1fe0f0bba9c9af36f61c28f731e58beea60ec48e687.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-24cb6b683daa42d7125f862e25943ab4be7bf275cd8739f8da4859d701795e1a.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-263c8fce6db5b03bbd012fafdba6943cbee6ed7eb8976cdef4f5b01dde7ca6fd.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-2a5d92fb1638df830a4477a7cdf24e6db6b43034b7bbe74fdfb63e8afe2c4071.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-2b065a363f58caed60e3706603c1260dbf5a4c795604a5b68edda22eb07fec1b.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-3fc3cb39934683de8cd475ce1368c8373453eb1e01f81587d66b9d14b109ce6e.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-48f33b77b7c1633467b0b2efcaa1d3c207e7757e4f1d83b40d15e6ca365f7771.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-4ae09f2c08e2f324bee01bb8487a8f37678a1c5e9d327339235c50d4921a8949.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-4d7a76fa413c9ef0ce2a47ac7bb7e01d3e6a2aabded9487d21010a53efee8852.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-4fcbdd6657c7dc9b60b3a563dd41711b3dbcf72ce063427b7a01f8cddf34c244.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-577109ac00ccfbd38ecaccab94116f2f46a4caf5612afa372cded197123c1e08.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-5db25f97d8578d6d78f2f6bd4b72cc82a9b1b82805c6422d967ac63b20d99db4.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-5ec3233a3a23e926055056d46bdde17836a633066dbb5f349502648cd3ea9a60.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-66edb6ea424d8681927dcddb6bac5f1239175f4775d1f40417ba15054b0c6f19.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-6f9c6d201c1b5712efb68c363bffd3e0169c11f2a8f925e8cd4e8808599ff7b4.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-733c3ebacc86f444bf5e2dd79ade660c291e88a00fc09b722f6e2e191545874c.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-7a3bf27c330c468a596e8a297cf7d8b192e31e67ecc5177c1267f579e8e247c7.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-7a7e59e47b30b12237511fd3d7da2d17b0471ad2b006af48d6a6f587c779692b.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-7dcbb33312cc9f11ae3a6d73b1ace017a9f19a8bf8f10304fc57977c8efeadff.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-7efa7c0d747afc4b6aed0586ff846c27839c3213ff7ee9f30c89b0d0f17e60e3.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-8493140d788604d498a4e48da4158708572ccc9d60185290a00d549cc84533db.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-8493bb0059b013eaca42fd10cd7d04f0d06a8acaed379eff0d23f3229edde9ee.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-87996b3d6c7a2195438d7038015b06949102bce8c7b8cd8db1f83aaf23cbe489.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-8d33717587c6ee8f5fc339a80b1212a73d6c03e45856b1d55457fc8074709dd0.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-8f096d8171b89f9615d18f95d696dc9e4fb3674e103161a713cdc806f7a68506.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-917732a56ee04bf3a6e127319dda8225210869c82f9828d878162394dba4e078.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-95fe4ef93ee90733db1b67ed7987f80b5aac792f1590b979c68b418d1599eb98.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-9b67658569b343d8b4b61ae0a7dc721f367f2ba33c7b69b9e68bfd5c9bff5206.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-9c8062ea55d070afef68309e58fa987eb37fda44e1efbf68c8ba2af7846cc968.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-a06e1d9f6f95e4c4c2b98310ebddcc9d963cc033582bf2e945e8bf3a301b4247.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-a55265c9b07bd1399961a6f1e757201fd0eebe868ddaf96437111113d80fce92.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-a60ec2f66f1e7b84189e5b089f2087a29ff6a64326a3743dea935bbc58ee77fa.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-a891a37be5c2d7cce775c2dd33726b0318fd3839beab222a1b22bc6174604207.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-a945191bb4b3e37d6823ed3ad499339d007d69983105de8567777d9daf517b28.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-abe9f7832f2bd799ac44008da031e8d8ab52d4f5fbfc2a7e3974e8873bae55b2.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-aca588cca57a85e4d7fcc40c23cd87e57d53d11ca550d78e7e3d5e39e524fcd3.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-b02f8aafc00a7724510772ac41269e368c5bccf03ef7b4590e0ef6fd1a1bf64f.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-b742e17cabe2d64617e9aa64bafc782172f7a4f8023d1b54f952a0fb39f6b2b8.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-b94e349dbc0daec57f8f8f6e9e2dffb06100b1bb2b41d297c9f3b191da37a83d.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-bd9afa22994aba671dbf7b5f89b53c2ee02f53c0442a81265786a6d52d08512f.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-be60554eca98a5899efc6b49785cecd6444a6d39afed9e4a884ce2dbf162012c.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-bf49702b506c9a1650ece1f8e8d9f14834a902f8caefafe30ded55e2790f2188.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-bfcd21ed704cd305db5c17fcdec7d92aa4ac501913c9c9514d8ff92928c0c7e7.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-c3ab694650f49a75b146fb877a92e48c4f20f0d99f70f8ec859fbb763b01a1e5.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-c55542bb9fae544d87fae6f30e0fe8a9088d12075f4442ab4fe2fcd05e472234.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-cb29a6556d35ac630ee0aa885dd7341cf9573bd3efd216ff8a887b87686b03db.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-d0a1f341a89f5f14696b10baa72db9d95551c2b7e5fc67308fd52dc03dd98a92.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-d2ad0a0ca2649c9e4873cfcc1fc66d2d07cc45d0f65c560b06d7b5f592f4fa8a.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-d6cfe78eb635ba0b89ca4021a4dc8182d18ab5b197f30149cd28488eba4c1df5.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-d729b56dea00e49dcdba8cf0001e2811da27351eabe98212db3b589f18fc6f32.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-d9658bfd4e7170b41d03f2ddf2446d0bf54171c0d39d53bf20af2b8437f2ec48.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-dbba7b3d3289425bae711aedbf73fbc3699f857f86f84d95c3b556d05c5658b0.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-dcb96b649d6d63a58efd5d445453a4f3d7869a56ff714b69bedf3d616a0473ca.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-ebd876fbf5362a5900e75bc05f2f11c73c406ef7da4e95097fc6a1c3d1b8bc54.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-eef5cc6bce1cc14eba8f3e68971724ef181e88cffcedd74673615f2026b89a62.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/.sqlx/query-ef56d5fefc5774040d1ee397beadb475f6af02768c22f0e583c74062e2e821ce.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/Cargo.toml +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/README.md +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/migrations/20241007163501_initial.sql +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/migrations/20241211120039_merge_job_priority.sql +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/migrations/20241211121159_basic_indexes.sql +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/migrations/20241212151105_check_segment_records.sql +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/migrations/20250110145554_in_flight_messages.sql +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_binding/Cargo.toml +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_binding/src/lib.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/src/query_io.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/src/resource_indexer.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/src/set_query.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/README.md +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/ar.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/az.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/bn.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/ca.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/ch.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/da.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/de.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/el.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/en.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/es.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/eu.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/extract.py +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/fi.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/fr.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/he.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/hu.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/id.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/it.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/kk.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/ne.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/nl.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/no.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/pt.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/ro.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/ru.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/sl.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/sv.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/tg.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/stop_words/tr.json +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_paragraph/tests/common/mod.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_protos/Cargo.toml +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_protos/build.py +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_protos/build.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_protos/nidx.proto +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_protos/nodereader.proto +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_protos/noderesources.proto +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_protos/nodewriter.proto +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_protos/src/lib.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/Cargo.toml +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/src/graph_collector.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/src/graph_query_parser.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/src/io_maps.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/src/lib.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/src/reader.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/src/resource_indexer.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/src/schema.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/src/top_unique_n.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/tests/common/mod.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/tests/test_graph_query_parser_search.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/tests/test_graph_search.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_relation/tests/test_writer.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_tantivy/Cargo.toml +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_tantivy/src/index_reader.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_tantivy/src/lib.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_tantivy/src/utils.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_tests/Cargo.toml +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_tests/src/graph.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_tests/src/lib.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/Cargo.toml +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/src/lib.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/src/prefilter.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/src/query_io.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/src/reader.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/src/request_types.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/src/resource_indexer.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/src/schema.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/src/search_query.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/tests/common/mod.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/tests/test_deletions.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/tests/test_flow.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/tests/test_search.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_text/tests/test_streaming.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_types/Cargo.toml +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_types/src/lib.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_types/src/prefilter.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_types/src/query_language.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/Cargo.toml +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/config.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/data_store/v1/node.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/data_store/v1/store.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/data_store/v1/trie.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/data_store/v1/trie_ram.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/data_store/v1.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/data_store/v2/paragraph_store.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/data_store/v2/vector_store.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/data_store/v2.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/data_store.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/data_types.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/formula.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/hnsw/disk_hnsw.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/hnsw/ops_hnsw.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/hnsw/params.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/hnsw/ram_hnsw.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/hnsw.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/indexer.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/inverted_index/fst_index.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/inverted_index/map.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/inverted_index.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/lib.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/multivector.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/query_io.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/request_types.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/searcher.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/segment/tests.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/segment.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/utils.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/vector_types/dense_f32.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/src/vector_types/mod.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/tests/common/mod.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/tests/test_basic_search.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/tests/test_hidden.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/nidx_vector/tests/test_maxsim.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/api/grpc.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/api/shards.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/api.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/control.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/errors.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/grpc_server.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/import_export.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/indexer.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/lib.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/main.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/metadata/deletion.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/metadata/index.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/metadata/index_request.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/metadata/merge_job.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/metadata/segment.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/metadata/shard.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/metadata.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/metrics.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/scheduler/audit_task.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/scheduler/log_merge.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/scheduler/merge_task.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/scheduler/metrics_task.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/scheduler/purge_tasks.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/scheduler/vector_merge.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/scheduler.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/searcher/grpc.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/searcher/index_cache.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/searcher/query_language.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/searcher/query_planner.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/searcher/shard_search.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/searcher/shard_selector.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/searcher/shard_suggest.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/searcher/streams.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/searcher/sync.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/searcher.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/segment_store.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/settings.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/telemetry/duration_layer.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/telemetry/log_format.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/telemetry/middleware.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/telemetry.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/tool.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/utilization_tracker.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/src/worker.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/common/mod.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/common/services.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/test_date_range_search.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/test_search_filtering.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/test_search_relations.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/test_search_sorting.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/test_searcher_cluster.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/test_security_search.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/test_shards.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/test_shards_api.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/test_synced_searcher.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/test_vector_normalization.rs +0 -0
- {nidx_binding-6.7.1.post486 → nidx_binding-6.7.1.post489}/tests/test_vectorsets.rs +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nidx_binding
|
3
|
-
Version: 6.7.1.
|
3
|
+
Version: 6.7.1.post489
|
4
4
|
Classifier: Programming Language :: Rust
|
5
5
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
6
6
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
@@ -12,6 +12,7 @@ levenshtein_automata = "0.2.1"
|
|
12
12
|
nidx_protos = { version = "0.1.0", path = "../nidx_protos" }
|
13
13
|
nidx_tantivy = { version = "0.1.0", path = "../nidx_tantivy" }
|
14
14
|
nidx_types = { version = "0.1.0", path = "../nidx_types" }
|
15
|
+
nom = "7" # same version as used by tantivy
|
15
16
|
once_cell = "1.20.2"
|
16
17
|
regex = "1.11.1"
|
17
18
|
serde_json = "1.0.132"
|
@@ -184,7 +184,12 @@ pub struct FuzzyTermQuery {
|
|
184
184
|
|
185
185
|
impl std::fmt::Debug for FuzzyTermQuery {
|
186
186
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
187
|
-
f.
|
187
|
+
f.write_fmt(format_args!(
|
188
|
+
"Fuzzy {{ term: {}, distance: {}, prefix: {} }}",
|
189
|
+
self.term.value().as_str().unwrap_or("<unknown>"),
|
190
|
+
self.distance,
|
191
|
+
self.prefix
|
192
|
+
))
|
188
193
|
}
|
189
194
|
}
|
190
195
|
impl FuzzyTermQuery {
|
@@ -210,6 +215,11 @@ impl FuzzyTermQuery {
|
|
210
215
|
}
|
211
216
|
}
|
212
217
|
|
218
|
+
#[cfg(test)]
|
219
|
+
pub fn is_prefix(&self) -> bool {
|
220
|
+
self.prefix
|
221
|
+
}
|
222
|
+
|
213
223
|
fn specialized_weight(&self) -> tantivy::Result<AutomatonWeight<DfaWrapper>> {
|
214
224
|
// LEV_BUILDER is a HashMap, whose `get` method returns an Option
|
215
225
|
match LEV_BUILDER.get(&(self.distance, self.transposition_cost_one)) {
|
@@ -20,6 +20,7 @@
|
|
20
20
|
|
21
21
|
mod fuzzy_query;
|
22
22
|
mod query_io;
|
23
|
+
mod query_parser;
|
23
24
|
mod reader;
|
24
25
|
mod request_types;
|
25
26
|
mod resource_indexer;
|
@@ -27,7 +28,6 @@ mod schema;
|
|
27
28
|
mod search_query;
|
28
29
|
mod search_response;
|
29
30
|
mod set_query;
|
30
|
-
mod stop_words;
|
31
31
|
|
32
32
|
use nidx_protos::{ParagraphItem, ParagraphSearchResponse, StreamRequest};
|
33
33
|
use nidx_tantivy::{
|
@@ -0,0 +1,192 @@
|
|
1
|
+
// Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
//
|
3
|
+
// nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
// For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
//
|
6
|
+
// AGPL:
|
7
|
+
// This program is free software: you can redistribute it and/or modify
|
8
|
+
// it under the terms of the GNU Affero General Public License as
|
9
|
+
// published by the Free Software Foundation, either version 3 of the
|
10
|
+
// License, or (at your option) any later version.
|
11
|
+
//
|
12
|
+
// This program is distributed in the hope that it will be useful,
|
13
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
// GNU Affero General Public License for more details.
|
16
|
+
//
|
17
|
+
// You should have received a copy of the GNU Affero General Public License
|
18
|
+
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
//
|
20
|
+
|
21
|
+
use tantivy::Term;
|
22
|
+
use tantivy::query::AllQuery;
|
23
|
+
use tantivy::query::BooleanQuery;
|
24
|
+
use tantivy::query::Occur;
|
25
|
+
use tantivy::query::Query;
|
26
|
+
use tantivy::query::TermQuery;
|
27
|
+
use tantivy::schema::IndexRecordOption;
|
28
|
+
|
29
|
+
use crate::fuzzy_query::FuzzyTermQuery;
|
30
|
+
use crate::schema::ParagraphSchema;
|
31
|
+
use crate::search_query::SharedTermC;
|
32
|
+
|
33
|
+
use super::keyword_parser;
|
34
|
+
use super::tokenizer::Token;
|
35
|
+
|
36
|
+
/// Minimum length required to be considered a fuzzy word. Words with smaller
|
37
|
+
/// length than this are considered too short to be fuzzy. This is done to avoid
|
38
|
+
/// too much noise from short terms.
|
39
|
+
const MIN_FUZZY_LEN: usize = 3;
|
40
|
+
|
41
|
+
/// Minimum length required to be considered a fuzzy prefix. This is again don
|
42
|
+
/// eto avoid too much noise from short term.
|
43
|
+
const MIN_FUZZY_PREFIX_LEN: usize = 4;
|
44
|
+
|
45
|
+
/// Levenshtein distance used by all fuzzy terms.
|
46
|
+
pub const FUZZY_DISTANCE: u8 = 1;
|
47
|
+
|
48
|
+
/// Convert a tokenized query into a tantivy fuzzy query
|
49
|
+
///
|
50
|
+
/// A fuzzy query will match similarly to a keyword query but some terms will be
|
51
|
+
/// searched with certain Levenshtein distance. Quoted and excluded terms will
|
52
|
+
/// remain the same, but literals will be elegible to be fuzzy. All long enough
|
53
|
+
/// literals will be searched as fuzzy. The last literal can be searched as a
|
54
|
+
/// fuzzy prefix if a suggest-like feature is desired.
|
55
|
+
///
|
56
|
+
pub fn parse_fuzzy_query(query: &[Token], term_collector: SharedTermC, last_literal_as_prefix: bool) -> Box<dyn Query> {
|
57
|
+
let last_literal_index = if last_literal_as_prefix {
|
58
|
+
query.iter().rposition(|token| matches!(token, Token::Literal(_)))
|
59
|
+
} else {
|
60
|
+
None
|
61
|
+
};
|
62
|
+
|
63
|
+
let mut subqueries = vec![];
|
64
|
+
let schema = ParagraphSchema::new();
|
65
|
+
|
66
|
+
for (i, item) in query.iter().enumerate() {
|
67
|
+
match item {
|
68
|
+
Token::Literal(literal) => {
|
69
|
+
let term = Term::from_field_text(schema.text, literal);
|
70
|
+
let distance = FUZZY_DISTANCE;
|
71
|
+
let transposition_cost_one = true;
|
72
|
+
|
73
|
+
let q: Box<dyn Query>;
|
74
|
+
if literal.len() < MIN_FUZZY_LEN {
|
75
|
+
// to avoid noise, we don't want to match too short terms as fuzzy
|
76
|
+
q = Box::new(TermQuery::new(term, IndexRecordOption::Basic));
|
77
|
+
} else if matches!(last_literal_index, Some(idx) if idx == i) && literal.len() >= MIN_FUZZY_PREFIX_LEN {
|
78
|
+
q = Box::new(FuzzyTermQuery::new_prefix(
|
79
|
+
term,
|
80
|
+
distance,
|
81
|
+
transposition_cost_one,
|
82
|
+
term_collector.clone(),
|
83
|
+
));
|
84
|
+
} else {
|
85
|
+
q = Box::new(FuzzyTermQuery::new(
|
86
|
+
term,
|
87
|
+
distance,
|
88
|
+
transposition_cost_one,
|
89
|
+
term_collector.clone(),
|
90
|
+
));
|
91
|
+
}
|
92
|
+
subqueries.push((Occur::Should, q));
|
93
|
+
}
|
94
|
+
Token::Quoted(quoted) => {
|
95
|
+
let q = keyword_parser::parse_quoted(&schema, quoted);
|
96
|
+
subqueries.push((Occur::Should, q));
|
97
|
+
}
|
98
|
+
Token::Excluded(excluded) => {
|
99
|
+
let q = keyword_parser::parse_excluded(&schema, excluded);
|
100
|
+
subqueries.push((Occur::Should, q));
|
101
|
+
}
|
102
|
+
}
|
103
|
+
}
|
104
|
+
|
105
|
+
if subqueries.is_empty() {
|
106
|
+
Box::new(AllQuery)
|
107
|
+
} else if subqueries.len() == 1 {
|
108
|
+
subqueries.pop().unwrap().1
|
109
|
+
} else {
|
110
|
+
Box::new(BooleanQuery::new(subqueries))
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
#[cfg(test)]
|
115
|
+
mod tests {
|
116
|
+
use crate::search_query::TermCollector;
|
117
|
+
|
118
|
+
use super::*;
|
119
|
+
|
120
|
+
#[test]
|
121
|
+
fn test_short_literals_do_not_fuzzy() {
|
122
|
+
let term_collector = SharedTermC::from(TermCollector::new());
|
123
|
+
|
124
|
+
// literal shorter than MIN_FUZZY_LEN will become a TermQuery
|
125
|
+
let literal = "ab";
|
126
|
+
assert!(literal.len() < MIN_FUZZY_LEN);
|
127
|
+
let query = [Token::Literal(literal.into())];
|
128
|
+
let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), false);
|
129
|
+
assert!(fuzzy.is::<TermQuery>());
|
130
|
+
let q = fuzzy.downcast::<TermQuery>().unwrap();
|
131
|
+
assert_eq!(q.term().value().as_str(), Some(literal));
|
132
|
+
}
|
133
|
+
|
134
|
+
#[test]
|
135
|
+
fn test_fuzzy_literals() {
|
136
|
+
let term_collector = SharedTermC::from(TermCollector::new());
|
137
|
+
|
138
|
+
let literal = "abcd";
|
139
|
+
assert!(literal.len() >= MIN_FUZZY_LEN);
|
140
|
+
let query = [Token::Literal(literal.into())];
|
141
|
+
let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), false);
|
142
|
+
assert!(fuzzy.is::<FuzzyTermQuery>());
|
143
|
+
assert!(!fuzzy.downcast::<FuzzyTermQuery>().unwrap().is_prefix());
|
144
|
+
}
|
145
|
+
|
146
|
+
#[test]
|
147
|
+
fn test_fuzzy_prefix() {
|
148
|
+
let term_collector = SharedTermC::from(TermCollector::new());
|
149
|
+
|
150
|
+
// literals longer than the min fuzzy prefix become prefix if they are
|
151
|
+
// last and the flag is enabled
|
152
|
+
|
153
|
+
let literal = "abcd";
|
154
|
+
assert!(literal.len() >= MIN_FUZZY_PREFIX_LEN);
|
155
|
+
let query = [Token::Literal(literal.into())];
|
156
|
+
|
157
|
+
let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), false);
|
158
|
+
assert!(fuzzy.is::<FuzzyTermQuery>());
|
159
|
+
assert!(!fuzzy.downcast::<FuzzyTermQuery>().unwrap().is_prefix());
|
160
|
+
|
161
|
+
let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), true);
|
162
|
+
assert!(fuzzy.is::<FuzzyTermQuery>());
|
163
|
+
assert!(fuzzy.downcast::<FuzzyTermQuery>().unwrap().is_prefix());
|
164
|
+
|
165
|
+
// only the last term is fuzzy prefix
|
166
|
+
|
167
|
+
let query = [Token::Literal(literal.into()), Token::Literal(literal.into())];
|
168
|
+
let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), true);
|
169
|
+
assert!(fuzzy.is::<BooleanQuery>());
|
170
|
+
let q = fuzzy.downcast::<BooleanQuery>().unwrap();
|
171
|
+
let clauses = q.clauses();
|
172
|
+
assert_eq!(clauses.len(), 2);
|
173
|
+
assert!(clauses[0].1.is::<FuzzyTermQuery>());
|
174
|
+
assert!(!clauses[0].1.downcast_ref::<FuzzyTermQuery>().unwrap().is_prefix());
|
175
|
+
assert!(clauses[1].1.is::<FuzzyTermQuery>());
|
176
|
+
assert!(clauses[1].1.downcast_ref::<FuzzyTermQuery>().unwrap().is_prefix());
|
177
|
+
|
178
|
+
// however, shorter terms won't become prefix
|
179
|
+
|
180
|
+
let literal = "abc";
|
181
|
+
assert!(literal.len() < MIN_FUZZY_PREFIX_LEN);
|
182
|
+
let query = [Token::Literal(literal.into())];
|
183
|
+
|
184
|
+
let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), false);
|
185
|
+
assert!(fuzzy.is::<FuzzyTermQuery>());
|
186
|
+
assert!(!fuzzy.downcast::<FuzzyTermQuery>().unwrap().is_prefix());
|
187
|
+
|
188
|
+
let fuzzy = parse_fuzzy_query(&query, term_collector.clone(), true);
|
189
|
+
assert!(fuzzy.is::<FuzzyTermQuery>());
|
190
|
+
assert!(!fuzzy.downcast::<FuzzyTermQuery>().unwrap().is_prefix());
|
191
|
+
}
|
192
|
+
}
|
@@ -0,0 +1,212 @@
|
|
1
|
+
// Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
//
|
3
|
+
// nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
// For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
//
|
6
|
+
// AGPL:
|
7
|
+
// This program is free software: you can redistribute it and/or modify
|
8
|
+
// it under the terms of the GNU Affero General Public License as
|
9
|
+
// published by the Free Software Foundation, either version 3 of the
|
10
|
+
// License, or (at your option) any later version.
|
11
|
+
//
|
12
|
+
// This program is distributed in the hope that it will be useful,
|
13
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
// GNU Affero General Public License for more details.
|
16
|
+
//
|
17
|
+
// You should have received a copy of the GNU Affero General Public License
|
18
|
+
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
//
|
20
|
+
use tantivy::Term;
|
21
|
+
use tantivy::query::{AllQuery, BooleanQuery, EmptyQuery, Occur, PhraseQuery, Query, TermQuery};
|
22
|
+
use tantivy::schema::IndexRecordOption;
|
23
|
+
use tracing::error;
|
24
|
+
|
25
|
+
use crate::ParagraphSchema;
|
26
|
+
|
27
|
+
use super::tokenizer::Token;
|
28
|
+
|
29
|
+
/// Convert a tokenized query into a tantivy keyword query
|
30
|
+
///
|
31
|
+
/// Empty queries will match everything.
|
32
|
+
pub fn parse_keyword_query<'a>(query: &'a [Token<'a>], schema: &ParagraphSchema) -> Box<dyn Query> {
|
33
|
+
let mut subqueries = vec![];
|
34
|
+
for item in query {
|
35
|
+
match item {
|
36
|
+
Token::Literal(literal) => {
|
37
|
+
let q = parse_literal(schema, literal);
|
38
|
+
subqueries.push((Occur::Should, q));
|
39
|
+
}
|
40
|
+
Token::Quoted(quoted) => {
|
41
|
+
let q = parse_quoted(schema, quoted);
|
42
|
+
subqueries.push((Occur::Should, q));
|
43
|
+
}
|
44
|
+
Token::Excluded(excluded) => {
|
45
|
+
let q = parse_excluded(schema, excluded);
|
46
|
+
subqueries.push((Occur::Should, q));
|
47
|
+
}
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
if subqueries.is_empty() {
|
52
|
+
Box::new(AllQuery)
|
53
|
+
} else if subqueries.len() == 1 {
|
54
|
+
subqueries.pop().unwrap().1
|
55
|
+
} else {
|
56
|
+
Box::new(BooleanQuery::new(subqueries))
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
#[inline]
|
61
|
+
pub fn parse_literal(schema: &ParagraphSchema, literal: &str) -> Box<dyn Query> {
|
62
|
+
Box::new(TermQuery::new(
|
63
|
+
Term::from_field_text(schema.text, literal),
|
64
|
+
IndexRecordOption::Basic,
|
65
|
+
))
|
66
|
+
}
|
67
|
+
|
68
|
+
pub fn parse_quoted(schema: &ParagraphSchema, quoted: &str) -> Box<dyn Query> {
|
69
|
+
let mut terms: Vec<Term> = quoted
|
70
|
+
.split_whitespace()
|
71
|
+
.map(|word| Term::from_field_text(schema.text, word))
|
72
|
+
.collect();
|
73
|
+
|
74
|
+
#[allow(clippy::comparison_chain)]
|
75
|
+
if terms.len() == 1 {
|
76
|
+
// phrase queries must have more than one term, so we use a term query
|
77
|
+
let term = terms.remove(0); // safe because terms.len() == 1
|
78
|
+
Box::new(TermQuery::new(term, IndexRecordOption::Basic))
|
79
|
+
} else if terms.len() > 1 {
|
80
|
+
Box::new(PhraseQuery::new(terms))
|
81
|
+
} else {
|
82
|
+
debug_assert!(
|
83
|
+
false,
|
84
|
+
"Quoted content should have been validated to not only contain whitespaces"
|
85
|
+
);
|
86
|
+
// we return a fallback to protect us from tokenizer errors, but this branch should never
|
87
|
+
// happen
|
88
|
+
error!("Keyword tokenizer build a query with a only whitespaces Quoted token!");
|
89
|
+
Box::new(EmptyQuery)
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
#[inline]
|
94
|
+
pub fn parse_excluded(schema: &ParagraphSchema, excluded: &str) -> Box<dyn Query> {
|
95
|
+
Box::new(BooleanQuery::new(vec![
|
96
|
+
(Occur::Must, Box::new(AllQuery)),
|
97
|
+
(
|
98
|
+
Occur::MustNot,
|
99
|
+
Box::new(TermQuery::new(
|
100
|
+
Term::from_field_text(schema.text, excluded),
|
101
|
+
IndexRecordOption::Basic,
|
102
|
+
)),
|
103
|
+
),
|
104
|
+
]))
|
105
|
+
}
|
106
|
+
|
107
|
+
#[cfg(test)]
|
108
|
+
mod tests {
|
109
|
+
use super::*;
|
110
|
+
|
111
|
+
#[test]
|
112
|
+
fn test_empty_query_is_all_query() {
|
113
|
+
let schema = ParagraphSchema::new();
|
114
|
+
let query = parse_keyword_query(&[], &schema);
|
115
|
+
assert!(query.is::<AllQuery>());
|
116
|
+
}
|
117
|
+
|
118
|
+
#[test]
|
119
|
+
fn test_one_clause_simplification() {
|
120
|
+
let schema = ParagraphSchema::new();
|
121
|
+
|
122
|
+
let query = parse_keyword_query(&[Token::Literal("nucliadb".into())], &schema);
|
123
|
+
let term = extract_term_from(&query);
|
124
|
+
assert_eq!(*term, Term::from_field_text(schema.text, "nucliadb"));
|
125
|
+
}
|
126
|
+
|
127
|
+
#[test]
|
128
|
+
fn test_tantivy_query_conversion() {
|
129
|
+
let schema = ParagraphSchema::new();
|
130
|
+
|
131
|
+
// nucliadb -is a "RAG database" with "superpowers"
|
132
|
+
let query = vec![
|
133
|
+
Token::Literal("nucliadb".into()),
|
134
|
+
Token::Excluded("is".into()),
|
135
|
+
Token::Literal("a".into()),
|
136
|
+
Token::Quoted("RAG database".into()),
|
137
|
+
Token::Literal("with".into()),
|
138
|
+
Token::Quoted("superpowers".into()),
|
139
|
+
];
|
140
|
+
let r = downcast_boolean_query(parse_keyword_query(&query, &schema));
|
141
|
+
let clauses = r.clauses();
|
142
|
+
assert_eq!(clauses.len(), 6);
|
143
|
+
|
144
|
+
// term: nucliadb
|
145
|
+
assert_eq!(clauses[0].0, Occur::Should);
|
146
|
+
let term = extract_term_from(&clauses[0].1);
|
147
|
+
assert_eq!(*term, Term::from_field_text(schema.text, "nucliadb"));
|
148
|
+
|
149
|
+
// excluded term: is
|
150
|
+
assert_eq!(clauses[1].0, Occur::Should);
|
151
|
+
let subquery = clauses[1]
|
152
|
+
.1
|
153
|
+
.downcast_ref::<BooleanQuery>()
|
154
|
+
.expect("BooleanQuery expected");
|
155
|
+
let subclauses = subquery.clauses();
|
156
|
+
assert_eq!(subclauses.len(), 2);
|
157
|
+
assert_eq!(subclauses[0].0, Occur::Must);
|
158
|
+
assert!(subclauses[0].1.downcast_ref::<AllQuery>().is_some());
|
159
|
+
assert_eq!(subclauses[1].0, Occur::MustNot);
|
160
|
+
let term = extract_term_from(&subclauses[1].1);
|
161
|
+
assert_eq!(*term, Term::from_field_text(schema.text, "is"));
|
162
|
+
|
163
|
+
// term: a
|
164
|
+
assert_eq!(clauses[2].0, Occur::Should);
|
165
|
+
let term = extract_term_from(&clauses[2].1);
|
166
|
+
assert_eq!(*term, Term::from_field_text(schema.text, "a"));
|
167
|
+
|
168
|
+
// exact: RAG database
|
169
|
+
assert_eq!(clauses[3].0, Occur::Should);
|
170
|
+
let terms = extract_phrase_terms_from(&clauses[3].1);
|
171
|
+
assert_eq!(
|
172
|
+
*terms,
|
173
|
+
vec![
|
174
|
+
Term::from_field_text(schema.text, "RAG"),
|
175
|
+
Term::from_field_text(schema.text, "database"),
|
176
|
+
]
|
177
|
+
);
|
178
|
+
|
179
|
+
// term: with
|
180
|
+
assert_eq!(clauses[4].0, Occur::Should);
|
181
|
+
let term = extract_term_from(&clauses[4].1);
|
182
|
+
assert_eq!(*term, Term::from_field_text(schema.text, "with"));
|
183
|
+
|
184
|
+
// exact: superpowers.
|
185
|
+
// An exact match of 1 word is converted into a term query
|
186
|
+
assert_eq!(clauses[5].0, Occur::Should);
|
187
|
+
let term = extract_term_from(&clauses[5].1);
|
188
|
+
assert_eq!(*term, Term::from_field_text(schema.text, "superpowers"));
|
189
|
+
}
|
190
|
+
|
191
|
+
fn downcast_boolean_query(q: Box<dyn Query>) -> Box<BooleanQuery> {
|
192
|
+
let q = q.downcast::<BooleanQuery>();
|
193
|
+
assert!(q.is_ok(), "BooleanQuery expected");
|
194
|
+
q.unwrap()
|
195
|
+
}
|
196
|
+
|
197
|
+
#[allow(clippy::borrowed_box)]
|
198
|
+
fn extract_term_from(query: &Box<dyn Query>) -> &Term {
|
199
|
+
let q = query.downcast_ref::<TermQuery>();
|
200
|
+
assert!(q.is_some(), "TermQuery expected");
|
201
|
+
let q = q.unwrap();
|
202
|
+
q.term()
|
203
|
+
}
|
204
|
+
|
205
|
+
#[allow(clippy::borrowed_box)]
|
206
|
+
fn extract_phrase_terms_from(query: &Box<dyn Query>) -> Vec<Term> {
|
207
|
+
let q = query.downcast_ref::<PhraseQuery>();
|
208
|
+
assert!(q.is_some(), "PhraseQuery expected");
|
209
|
+
let q = q.unwrap();
|
210
|
+
q.phrase_terms()
|
211
|
+
}
|
212
|
+
}
|
@@ -22,6 +22,8 @@ use std::env;
|
|
22
22
|
|
23
23
|
use lazy_static::lazy_static;
|
24
24
|
|
25
|
+
use super::Token;
|
26
|
+
|
25
27
|
lazy_static! {
|
26
28
|
static ref STOP_WORDS: StopWords = build_stop_words();
|
27
29
|
}
|
@@ -90,6 +92,37 @@ pub fn is_stop_word(word: &str) -> bool {
|
|
90
92
|
STOP_WORDS.is_stop_word(word)
|
91
93
|
}
|
92
94
|
|
95
|
+
/// Consume a tokenized query and filter out any stop words from it.
|
96
|
+
///
|
97
|
+
/// A stop words is any literal matching the following criteria:
|
98
|
+
/// - It is present in the given list of stop words
|
99
|
+
/// - Is **not** the last term in the query
|
100
|
+
///
|
101
|
+
/// The last term of the query can be used as fuzzy prefix and must be preserved.
|
102
|
+
pub fn remove_stop_words(mut query: Vec<Token>) -> Vec<Token> {
|
103
|
+
if query.is_empty() {
|
104
|
+
return query;
|
105
|
+
}
|
106
|
+
// as we don't want to remove the last literal, we pop the last element and
|
107
|
+
// always put it in the filtered query. We don't really care if the last
|
108
|
+
// it's a literal or not.
|
109
|
+
let last: Token<'_> = query.pop().unwrap(); // safe as query is not empty
|
110
|
+
|
111
|
+
query
|
112
|
+
.into_iter()
|
113
|
+
.filter(|token| !is_stop_word_token(token))
|
114
|
+
.chain([last])
|
115
|
+
.collect()
|
116
|
+
}
|
117
|
+
|
118
|
+
fn is_stop_word_token(token: &Token) -> bool {
|
119
|
+
if let Token::Literal(lit) = token {
|
120
|
+
is_stop_word(&lit.to_lowercase())
|
121
|
+
} else {
|
122
|
+
false
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
93
126
|
#[cfg(test)]
|
94
127
|
mod tests {
|
95
128
|
|
@@ -122,4 +155,78 @@ mod tests {
|
|
122
155
|
assert!(elapsed < 1000.0, "{}", elapsed);
|
123
156
|
}
|
124
157
|
}
|
158
|
+
|
159
|
+
#[test]
|
160
|
+
fn test_stop_word_tokens() {
|
161
|
+
assert!(is_stop_word_token(&Token::Literal("is".into())));
|
162
|
+
assert!(!is_stop_word_token(&Token::Excluded("is".into())));
|
163
|
+
assert!(!is_stop_word_token(&Token::Quoted("is".into())));
|
164
|
+
|
165
|
+
assert!(!is_stop_word_token(&Token::Literal("music".into())));
|
166
|
+
assert!(!is_stop_word_token(&Token::Excluded("music".into())));
|
167
|
+
assert!(!is_stop_word_token(&Token::Quoted("music".into())));
|
168
|
+
}
|
169
|
+
|
170
|
+
#[test]
|
171
|
+
fn test_stop_words_detection_is_case_insensitive() {
|
172
|
+
assert!(is_stop_word_token(&Token::Literal("is".into())));
|
173
|
+
assert!(is_stop_word_token(&Token::Literal("IS".into())));
|
174
|
+
}
|
175
|
+
|
176
|
+
#[test]
|
177
|
+
fn test_token_with_stop_word_detection() {
|
178
|
+
// Stop word in the middle is removed
|
179
|
+
let tokens = vec![
|
180
|
+
Token::Literal("music".into()),
|
181
|
+
Token::Literal("is".into()),
|
182
|
+
Token::Literal("classical".into()),
|
183
|
+
];
|
184
|
+
let filtered = remove_stop_words(tokens);
|
185
|
+
assert_eq!(
|
186
|
+
filtered,
|
187
|
+
vec![Token::Literal("music".into()), Token::Literal("classical".into()),]
|
188
|
+
);
|
189
|
+
|
190
|
+
// Only literals are stop words
|
191
|
+
let tokens = vec![
|
192
|
+
Token::Literal("music".into()),
|
193
|
+
Token::Quoted("is".into()),
|
194
|
+
Token::Literal("classical".into()),
|
195
|
+
];
|
196
|
+
let filtered = remove_stop_words(tokens);
|
197
|
+
assert_eq!(
|
198
|
+
filtered,
|
199
|
+
vec![
|
200
|
+
Token::Literal("music".into()),
|
201
|
+
Token::Quoted("is".into()),
|
202
|
+
Token::Literal("classical".into()),
|
203
|
+
]
|
204
|
+
);
|
205
|
+
|
206
|
+
// Stop words at the end are not removed
|
207
|
+
let tokens = vec![
|
208
|
+
Token::Literal("classical".into()),
|
209
|
+
Token::Literal("music".into()),
|
210
|
+
Token::Literal("is".into()),
|
211
|
+
];
|
212
|
+
let filtered = remove_stop_words(tokens);
|
213
|
+
assert_eq!(
|
214
|
+
filtered,
|
215
|
+
vec![
|
216
|
+
Token::Literal("classical".into()),
|
217
|
+
Token::Literal("music".into()),
|
218
|
+
Token::Literal("is".into()),
|
219
|
+
]
|
220
|
+
);
|
221
|
+
|
222
|
+
// Everything is a stop word, but last is not removed
|
223
|
+
let tokens = vec![
|
224
|
+
Token::Literal("we".into()),
|
225
|
+
Token::Literal("shouldn't".into()),
|
226
|
+
Token::Literal("be".into()),
|
227
|
+
Token::Literal("here".into()),
|
228
|
+
];
|
229
|
+
let filtered = remove_stop_words(tokens);
|
230
|
+
assert_eq!(filtered, vec![Token::Literal("here".into()),]);
|
231
|
+
}
|
125
232
|
}
|