nucliadb 6.5.1.post4520__py3-none-any.whl → 6.5.1.post4525__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@
19
19
  #
20
20
 
21
21
  import logging
22
+ import re
22
23
  from collections import defaultdict
23
24
  from typing import Any, Literal, Union, cast
24
25
 
@@ -38,6 +39,8 @@ from .filters import translate_label
38
39
  observer = metrics.Observer("pg_catalog_search", labels={"op": ""})
39
40
  logger = logging.getLogger(__name__)
40
41
 
42
+ SPLIT_REGEX = re.compile(r"\W")
43
+
41
44
 
42
45
  def _filter_operands(operands: list[CatalogExpression]) -> tuple[list[str], list[CatalogExpression]]:
43
46
  facets = []
@@ -163,10 +166,10 @@ def _prepare_query_search(query: search_models.CatalogQuery, params: dict[str, A
163
166
  # This is doing tokenization inside the SQL server (to keep the index updated). We could move it to
164
167
  # the python code at update/query time if it ever becomes a problem but for now, a single regex
165
168
  # executed per query is not a problem.
166
- params["query"] = query.query
167
- return sql.SQL(
168
- "regexp_split_to_array(lower(title), '\\W') @> regexp_split_to_array(lower(%(query)s), '\\W')"
169
- )
169
+
170
+ # Remove zero-length words from the split
171
+ params["query"] = [word for word in SPLIT_REGEX.split(query.query) if word]
172
+ return sql.SQL("regexp_split_to_array(lower(title), '\\W') @> %(query)s")
170
173
  elif query.match == search_models.CatalogQueryMatch.Fuzzy:
171
174
  params["query"] = query.query
172
175
  # Note: the operator is %>, We use %%> for psycopg escaping
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.5.1.post4520
3
+ Version: 6.5.1.post4525
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.5.1.post4520
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.1.post4520
24
- Requires-Dist: nucliadb-protos>=6.5.1.post4520
25
- Requires-Dist: nucliadb-models>=6.5.1.post4520
26
- Requires-Dist: nidx-protos>=6.5.1.post4520
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.5.1.post4525
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.1.post4525
24
+ Requires-Dist: nucliadb-protos>=6.5.1.post4525
25
+ Requires-Dist: nucliadb-models>=6.5.1.post4525
26
+ Requires-Dist: nidx-protos>=6.5.1.post4525
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.24.2
29
29
  Requires-Dist: uvicorn[standard]
@@ -254,7 +254,7 @@ nucliadb/search/search/ingestion_agents.py,sha256=IK6yOPEF9rST_uoqspdVdPk0pldjDh
254
254
  nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lPo4,22470
255
255
  nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
256
256
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
257
- nucliadb/search/search/pgcatalog.py,sha256=O_nRjSJf1Qc-XorVwcNlsDOftzy_zQLLfagkjU4YmSA,16718
257
+ nucliadb/search/search/pgcatalog.py,sha256=_AiyW6it66UX6BsZbM3-230IQhiEG4utoKYboviyOFI,16799
258
258
  nucliadb/search/search/predict_proxy.py,sha256=T4T6c1ZAbBCgPLy-6JoSlixy-0bAh6z0rNH1bMMy3bg,8631
259
259
  nucliadb/search/search/query.py,sha256=0qIQdt548L3jtKOyKo06aGJ73SLBxAW3N38_Hc1M3Uw,11528
260
260
  nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
@@ -375,8 +375,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
375
375
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
376
376
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
377
377
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
378
- nucliadb-6.5.1.post4520.dist-info/METADATA,sha256=4zLZ2kYpWRVlz8HgcOR7TZ9WIIgBCyd-w6wDuMJU_ww,4158
379
- nucliadb-6.5.1.post4520.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
380
- nucliadb-6.5.1.post4520.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
381
- nucliadb-6.5.1.post4520.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
382
- nucliadb-6.5.1.post4520.dist-info/RECORD,,
378
+ nucliadb-6.5.1.post4525.dist-info/METADATA,sha256=w8Hgr9H0qA7QLKmypMcYBiX1_unYDquS3n3WWodM-gM,4158
379
+ nucliadb-6.5.1.post4525.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
380
+ nucliadb-6.5.1.post4525.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
381
+ nucliadb-6.5.1.post4525.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
382
+ nucliadb-6.5.1.post4525.dist-info/RECORD,,