nucliadb 6.9.0.post5100__py3-none-any.whl → 6.9.0.post5110__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nucliadb might be problematic. Click here for more details.
- nucliadb/search/search/query_parser/parsers/common.py +16 -7
- nucliadb/search/search/query_parser/parsers/find.py +0 -3
- nucliadb/search/search/query_parser/parsers/search.py +0 -3
- {nucliadb-6.9.0.post5100.dist-info → nucliadb-6.9.0.post5110.dist-info}/METADATA +6 -6
- {nucliadb-6.9.0.post5100.dist-info → nucliadb-6.9.0.post5110.dist-info}/RECORD +8 -8
- {nucliadb-6.9.0.post5100.dist-info → nucliadb-6.9.0.post5110.dist-info}/WHEEL +0 -0
- {nucliadb-6.9.0.post5100.dist-info → nucliadb-6.9.0.post5110.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.9.0.post5100.dist-info → nucliadb-6.9.0.post5110.dist-info}/top_level.txt +0 -0
|
@@ -21,7 +21,6 @@ import re
|
|
|
21
21
|
import string
|
|
22
22
|
from typing import Optional, Union
|
|
23
23
|
|
|
24
|
-
from nucliadb.common.exceptions import InvalidQueryError
|
|
25
24
|
from nucliadb.search import logger
|
|
26
25
|
from nucliadb.search.search.query_parser.fetcher import Fetcher
|
|
27
26
|
from nucliadb.search.search.query_parser.models import (
|
|
@@ -32,15 +31,20 @@ from nucliadb_models import search as search_models
|
|
|
32
31
|
|
|
33
32
|
DEFAULT_GENERIC_SEMANTIC_THRESHOLD = 0.7
|
|
34
33
|
|
|
35
|
-
# -* is an invalid query in tantivy and it won't return results but if you add some whitespaces
|
|
36
|
-
# between - and *, it will actually trigger a tantivy bug and panic
|
|
37
|
-
INVALID_QUERY = re.compile(r"- +\*")
|
|
38
34
|
|
|
35
|
+
def validate_query_syntax(query: str) -> str:
|
|
36
|
+
"""Filter some queries that panic tantivy, better than returning the 500"""
|
|
39
37
|
|
|
40
|
-
|
|
41
|
-
#
|
|
38
|
+
# -* is an invalid query in tantivy and it won't return results but if you add some whitespaces
|
|
39
|
+
# between - and *, it will actually trigger a tantivy bug and panic
|
|
40
|
+
INVALID_QUERY = re.compile(r"- *\*")
|
|
42
41
|
if INVALID_QUERY.search(query):
|
|
43
|
-
|
|
42
|
+
# remove the * and extra spaces, as it's probably what doesn't have
|
|
43
|
+
# meaning in both cases: -* and - *
|
|
44
|
+
fixed = re.sub(INVALID_QUERY, "- ", query)
|
|
45
|
+
query = fixed
|
|
46
|
+
|
|
47
|
+
return query
|
|
44
48
|
|
|
45
49
|
|
|
46
50
|
def is_empty_query(request: search_models.BaseSearchRequest) -> bool:
|
|
@@ -85,6 +89,7 @@ async def parse_keyword_query(
|
|
|
85
89
|
fetcher: Fetcher,
|
|
86
90
|
) -> KeywordQuery:
|
|
87
91
|
query = item.query
|
|
92
|
+
|
|
88
93
|
# If there was a rephrase with image, we should use the rephrased query for keyword search
|
|
89
94
|
rephrased_query = await fetcher.get_rephrased_query()
|
|
90
95
|
if item.query_image is not None and rephrased_query is not None:
|
|
@@ -98,6 +103,10 @@ async def parse_keyword_query(
|
|
|
98
103
|
query = synonyms_query
|
|
99
104
|
is_synonyms_query = True
|
|
100
105
|
|
|
106
|
+
# after all query transformations, pass a validator that can fix some
|
|
107
|
+
# queries that trigger a panic on the index
|
|
108
|
+
query = validate_query_syntax(query)
|
|
109
|
+
|
|
101
110
|
min_score = parse_keyword_min_score(item.min_score)
|
|
102
111
|
|
|
103
112
|
return KeywordQuery(
|
|
@@ -57,7 +57,6 @@ from .common import (
|
|
|
57
57
|
parse_semantic_query,
|
|
58
58
|
parse_top_k,
|
|
59
59
|
should_disable_vector_search,
|
|
60
|
-
validate_query_syntax,
|
|
61
60
|
)
|
|
62
61
|
|
|
63
62
|
|
|
@@ -146,8 +145,6 @@ class _FindParser:
|
|
|
146
145
|
return retrieval
|
|
147
146
|
|
|
148
147
|
def _validate_request(self):
|
|
149
|
-
validate_query_syntax(self.item.query)
|
|
150
|
-
|
|
151
148
|
# synonyms are not compatible with vector/graph search
|
|
152
149
|
if (
|
|
153
150
|
self.item.with_synonyms
|
|
@@ -51,7 +51,6 @@ from .common import (
|
|
|
51
51
|
parse_semantic_query,
|
|
52
52
|
parse_top_k,
|
|
53
53
|
should_disable_vector_search,
|
|
54
|
-
validate_query_syntax,
|
|
55
54
|
)
|
|
56
55
|
|
|
57
56
|
INDEX_SORTABLE_FIELDS = [
|
|
@@ -128,8 +127,6 @@ class _SearchParser:
|
|
|
128
127
|
return retrieval
|
|
129
128
|
|
|
130
129
|
def _validate_request(self):
|
|
131
|
-
validate_query_syntax(self.item.query)
|
|
132
|
-
|
|
133
130
|
# synonyms are not compatible with vector/graph search
|
|
134
131
|
if (
|
|
135
132
|
self.item.with_synonyms
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nucliadb
|
|
3
|
-
Version: 6.9.0.
|
|
3
|
+
Version: 6.9.0.post5110
|
|
4
4
|
Summary: NucliaDB
|
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
|
6
6
|
License-Expression: AGPL-3.0-or-later
|
|
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
20
|
Requires-Python: <4,>=3.9
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.9.0.
|
|
23
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.0.
|
|
24
|
-
Requires-Dist: nucliadb-protos>=6.9.0.
|
|
25
|
-
Requires-Dist: nucliadb-models>=6.9.0.
|
|
26
|
-
Requires-Dist: nidx-protos>=6.9.0.
|
|
22
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.9.0.post5110
|
|
23
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.0.post5110
|
|
24
|
+
Requires-Dist: nucliadb-protos>=6.9.0.post5110
|
|
25
|
+
Requires-Dist: nucliadb-models>=6.9.0.post5110
|
|
26
|
+
Requires-Dist: nidx-protos>=6.9.0.post5110
|
|
27
27
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
|
28
28
|
Requires-Dist: nuclia-models>=0.50.0
|
|
29
29
|
Requires-Dist: uvicorn[standard]
|
|
@@ -285,10 +285,10 @@ nucliadb/search/search/query_parser/old_filters.py,sha256=GsU3T3-WiSPvjucP7evHks
|
|
|
285
285
|
nucliadb/search/search/query_parser/parsers/__init__.py,sha256=ySCNSdbesLXGZyR88919njulA6UE10_3PhqMG_Yj1o4,1034
|
|
286
286
|
nucliadb/search/search/query_parser/parsers/ask.py,sha256=ySa3lBhUuammIchJvj7xodeGIYGkR0uyLnHuOtLfWE8,2810
|
|
287
287
|
nucliadb/search/search/query_parser/parsers/catalog.py,sha256=DErVfWJ9a_F5a8Qbht1WY4Lm8r3ui9Y3C7oJMOOZOng,7474
|
|
288
|
-
nucliadb/search/search/query_parser/parsers/common.py,sha256=
|
|
289
|
-
nucliadb/search/search/query_parser/parsers/find.py,sha256=
|
|
288
|
+
nucliadb/search/search/query_parser/parsers/common.py,sha256=GimacpFLtvvD6hIODwuQBKwW_VD7MrfBzjy9U4ja13k,6871
|
|
289
|
+
nucliadb/search/search/query_parser/parsers/find.py,sha256=7z6GDscMGybvlXENAKOs7XM3p1IJmdC-AZsb4GX_O3g,12698
|
|
290
290
|
nucliadb/search/search/query_parser/parsers/graph.py,sha256=s7nCB7ly_4BZWds-8zce1R-r2fHSiEhAK8P-eL14wTk,9390
|
|
291
|
-
nucliadb/search/search/query_parser/parsers/search.py,sha256=
|
|
291
|
+
nucliadb/search/search/query_parser/parsers/search.py,sha256=k9JaE9qWGi_eUxEKe_pOKrMIpx7k2wr2j3XrNqZJx9A,10428
|
|
292
292
|
nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=xiOQ7_X6MkcZs3W_0DjdVfyk-G1AY6RBx3oG5hsq7ig,11455
|
|
293
293
|
nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
|
294
294
|
nucliadb/standalone/api_router.py,sha256=zRSMlaRVHUDGTYA3zC03UV_aLLn-ch-kaeWn1tEjTXw,4338
|
|
@@ -384,8 +384,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
|
384
384
|
nucliadb/writer/tus/s3.py,sha256=vu1BGg4VqJ_x2P1u2BxqPKlSfw5orT_a3R-Ln5oPUpU,8483
|
|
385
385
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
|
386
386
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
|
387
|
-
nucliadb-6.9.0.
|
|
388
|
-
nucliadb-6.9.0.
|
|
389
|
-
nucliadb-6.9.0.
|
|
390
|
-
nucliadb-6.9.0.
|
|
391
|
-
nucliadb-6.9.0.
|
|
387
|
+
nucliadb-6.9.0.post5110.dist-info/METADATA,sha256=DuN8dmjX6Byrm7_V5TMuUeXvPSJjof6pl6V3CU2TXCU,4158
|
|
388
|
+
nucliadb-6.9.0.post5110.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
389
|
+
nucliadb-6.9.0.post5110.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
|
390
|
+
nucliadb-6.9.0.post5110.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
|
391
|
+
nucliadb-6.9.0.post5110.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|