nucliadb 6.9.0.post5100__py3-none-any.whl → 6.9.0.post5110__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb might be problematic. Click here for more details.

@@ -21,7 +21,6 @@ import re
21
21
  import string
22
22
  from typing import Optional, Union
23
23
 
24
- from nucliadb.common.exceptions import InvalidQueryError
25
24
  from nucliadb.search import logger
26
25
  from nucliadb.search.search.query_parser.fetcher import Fetcher
27
26
  from nucliadb.search.search.query_parser.models import (
@@ -32,15 +31,20 @@ from nucliadb_models import search as search_models
32
31
 
33
32
  DEFAULT_GENERIC_SEMANTIC_THRESHOLD = 0.7
34
33
 
35
- # -* is an invalid query in tantivy and it won't return results but if you add some whitespaces
36
- # between - and *, it will actually trigger a tantivy bug and panic
37
- INVALID_QUERY = re.compile(r"- +\*")
38
34
 
35
+ def validate_query_syntax(query: str) -> str:
36
+ """Filter some queries that panic tantivy, better than returning the 500"""
39
37
 
40
- def validate_query_syntax(query: str):
41
- # Filter some queries that panic tantivy, better than returning the 500
38
+ # -* is an invalid query in tantivy and it won't return results but if you add some whitespaces
39
+ # between - and *, it will actually trigger a tantivy bug and panic
40
+ INVALID_QUERY = re.compile(r"- *\*")
42
41
  if INVALID_QUERY.search(query):
43
- raise InvalidQueryError("query", "Invalid query syntax")
42
+ # remove the * and extra spaces, as it's probably what doesn't have
43
+ # meaning in both cases: -* and - *
44
+ fixed = re.sub(INVALID_QUERY, "- ", query)
45
+ query = fixed
46
+
47
+ return query
44
48
 
45
49
 
46
50
  def is_empty_query(request: search_models.BaseSearchRequest) -> bool:
@@ -85,6 +89,7 @@ async def parse_keyword_query(
85
89
  fetcher: Fetcher,
86
90
  ) -> KeywordQuery:
87
91
  query = item.query
92
+
88
93
  # If there was a rephrase with image, we should use the rephrased query for keyword search
89
94
  rephrased_query = await fetcher.get_rephrased_query()
90
95
  if item.query_image is not None and rephrased_query is not None:
@@ -98,6 +103,10 @@ async def parse_keyword_query(
98
103
  query = synonyms_query
99
104
  is_synonyms_query = True
100
105
 
106
+ # after all query transformations, pass a validator that can fix some
107
+ # queries that trigger a panic on the index
108
+ query = validate_query_syntax(query)
109
+
101
110
  min_score = parse_keyword_min_score(item.min_score)
102
111
 
103
112
  return KeywordQuery(
@@ -57,7 +57,6 @@ from .common import (
57
57
  parse_semantic_query,
58
58
  parse_top_k,
59
59
  should_disable_vector_search,
60
- validate_query_syntax,
61
60
  )
62
61
 
63
62
 
@@ -146,8 +145,6 @@ class _FindParser:
146
145
  return retrieval
147
146
 
148
147
  def _validate_request(self):
149
- validate_query_syntax(self.item.query)
150
-
151
148
  # synonyms are not compatible with vector/graph search
152
149
  if (
153
150
  self.item.with_synonyms
@@ -51,7 +51,6 @@ from .common import (
51
51
  parse_semantic_query,
52
52
  parse_top_k,
53
53
  should_disable_vector_search,
54
- validate_query_syntax,
55
54
  )
56
55
 
57
56
  INDEX_SORTABLE_FIELDS = [
@@ -128,8 +127,6 @@ class _SearchParser:
128
127
  return retrieval
129
128
 
130
129
  def _validate_request(self):
131
- validate_query_syntax(self.item.query)
132
-
133
130
  # synonyms are not compatible with vector/graph search
134
131
  if (
135
132
  self.item.with_synonyms
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.9.0.post5100
3
+ Version: 6.9.0.post5110
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.9.0.post5100
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.0.post5100
24
- Requires-Dist: nucliadb-protos>=6.9.0.post5100
25
- Requires-Dist: nucliadb-models>=6.9.0.post5100
26
- Requires-Dist: nidx-protos>=6.9.0.post5100
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.9.0.post5110
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.9.0.post5110
24
+ Requires-Dist: nucliadb-protos>=6.9.0.post5110
25
+ Requires-Dist: nucliadb-models>=6.9.0.post5110
26
+ Requires-Dist: nidx-protos>=6.9.0.post5110
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.50.0
29
29
  Requires-Dist: uvicorn[standard]
@@ -285,10 +285,10 @@ nucliadb/search/search/query_parser/old_filters.py,sha256=GsU3T3-WiSPvjucP7evHks
285
285
  nucliadb/search/search/query_parser/parsers/__init__.py,sha256=ySCNSdbesLXGZyR88919njulA6UE10_3PhqMG_Yj1o4,1034
286
286
  nucliadb/search/search/query_parser/parsers/ask.py,sha256=ySa3lBhUuammIchJvj7xodeGIYGkR0uyLnHuOtLfWE8,2810
287
287
  nucliadb/search/search/query_parser/parsers/catalog.py,sha256=DErVfWJ9a_F5a8Qbht1WY4Lm8r3ui9Y3C7oJMOOZOng,7474
288
- nucliadb/search/search/query_parser/parsers/common.py,sha256=jbQweWVufngbobr99qpHh1iiaGICOC6-e9AV33x0-Gk,6594
289
- nucliadb/search/search/query_parser/parsers/find.py,sha256=4xQwa0BxNucenUrW_iZ2jCGd15Dm4AKS_B91BE8sDi4,12773
288
+ nucliadb/search/search/query_parser/parsers/common.py,sha256=GimacpFLtvvD6hIODwuQBKwW_VD7MrfBzjy9U4ja13k,6871
289
+ nucliadb/search/search/query_parser/parsers/find.py,sha256=7z6GDscMGybvlXENAKOs7XM3p1IJmdC-AZsb4GX_O3g,12698
290
290
  nucliadb/search/search/query_parser/parsers/graph.py,sha256=s7nCB7ly_4BZWds-8zce1R-r2fHSiEhAK8P-eL14wTk,9390
291
- nucliadb/search/search/query_parser/parsers/search.py,sha256=78KSJ9t3I7nFVY2Qk2fMw2P1RHUdGRsWzBf59FdAeTA,10503
291
+ nucliadb/search/search/query_parser/parsers/search.py,sha256=k9JaE9qWGi_eUxEKe_pOKrMIpx7k2wr2j3XrNqZJx9A,10428
292
292
  nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=xiOQ7_X6MkcZs3W_0DjdVfyk-G1AY6RBx3oG5hsq7ig,11455
293
293
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
294
294
  nucliadb/standalone/api_router.py,sha256=zRSMlaRVHUDGTYA3zC03UV_aLLn-ch-kaeWn1tEjTXw,4338
@@ -384,8 +384,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
384
384
  nucliadb/writer/tus/s3.py,sha256=vu1BGg4VqJ_x2P1u2BxqPKlSfw5orT_a3R-Ln5oPUpU,8483
385
385
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
386
386
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
387
- nucliadb-6.9.0.post5100.dist-info/METADATA,sha256=0sGsM87JOiyKio3xqNtZEcVvuiXIuhMFywyEPrXoIeQ,4158
388
- nucliadb-6.9.0.post5100.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
389
- nucliadb-6.9.0.post5100.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
390
- nucliadb-6.9.0.post5100.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
391
- nucliadb-6.9.0.post5100.dist-info/RECORD,,
387
+ nucliadb-6.9.0.post5110.dist-info/METADATA,sha256=DuN8dmjX6Byrm7_V5TMuUeXvPSJjof6pl6V3CU2TXCU,4158
388
+ nucliadb-6.9.0.post5110.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
389
+ nucliadb-6.9.0.post5110.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
390
+ nucliadb-6.9.0.post5110.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
391
+ nucliadb-6.9.0.post5110.dist-info/RECORD,,