nucliadb 6.5.1.post4510__py3-none-any.whl → 6.5.1.post4525__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@
19
19
  #
20
20
 
21
21
  import logging
22
+ import re
22
23
  from collections import defaultdict
23
24
  from typing import Any, Literal, Union, cast
24
25
 
@@ -38,6 +39,8 @@ from .filters import translate_label
38
39
  observer = metrics.Observer("pg_catalog_search", labels={"op": ""})
39
40
  logger = logging.getLogger(__name__)
40
41
 
42
+ SPLIT_REGEX = re.compile(r"\W")
43
+
41
44
 
42
45
  def _filter_operands(operands: list[CatalogExpression]) -> tuple[list[str], list[CatalogExpression]]:
43
46
  facets = []
@@ -163,10 +166,10 @@ def _prepare_query_search(query: search_models.CatalogQuery, params: dict[str, A
163
166
  # This is doing tokenization inside the SQL server (to keep the index updated). We could move it to
164
167
  # the python code at update/query time if it ever becomes a problem but for now, a single regex
165
168
  # executed per query is not a problem.
166
- params["query"] = query.query
167
- return sql.SQL(
168
- "regexp_split_to_array(lower(title), '\\W') @> regexp_split_to_array(lower(%(query)s), '\\W')"
169
- )
169
+
170
+ # Remove zero-length words from the split
171
+ params["query"] = [word for word in SPLIT_REGEX.split(query.query) if word]
172
+ return sql.SQL("regexp_split_to_array(lower(title), '\\W') @> %(query)s")
170
173
  elif query.match == search_models.CatalogQueryMatch.Fuzzy:
171
174
  params["query"] = query.query
172
175
  # Note: the operator is %>, We use %%> for psycopg escaping
@@ -203,7 +203,7 @@ async def chat_streaming_generator(
203
203
 
204
204
  if is_json is False and chunk: # Ensure chunk is not empty before decoding
205
205
  # If response is text the status_code comes at the last chunk of data
206
- status_code = chunk.decode()
206
+ status_code = chunk.decode().split(".")[-1]
207
207
 
208
208
  audit_predict_proxy_endpoint(
209
209
  headers=predict_response.headers,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.5.1.post4510
3
+ Version: 6.5.1.post4525
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -19,11 +19,11 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: <4,>=3.9
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: nucliadb-telemetry[all]>=6.5.1.post4510
23
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.1.post4510
24
- Requires-Dist: nucliadb-protos>=6.5.1.post4510
25
- Requires-Dist: nucliadb-models>=6.5.1.post4510
26
- Requires-Dist: nidx-protos>=6.5.1.post4510
22
+ Requires-Dist: nucliadb-telemetry[all]>=6.5.1.post4525
23
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.1.post4525
24
+ Requires-Dist: nucliadb-protos>=6.5.1.post4525
25
+ Requires-Dist: nucliadb-models>=6.5.1.post4525
26
+ Requires-Dist: nidx-protos>=6.5.1.post4525
27
27
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
28
28
  Requires-Dist: nuclia-models>=0.24.2
29
29
  Requires-Dist: uvicorn[standard]
@@ -254,8 +254,8 @@ nucliadb/search/search/ingestion_agents.py,sha256=IK6yOPEF9rST_uoqspdVdPk0pldjDh
254
254
  nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lPo4,22470
255
255
  nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
256
256
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
257
- nucliadb/search/search/pgcatalog.py,sha256=O_nRjSJf1Qc-XorVwcNlsDOftzy_zQLLfagkjU4YmSA,16718
258
- nucliadb/search/search/predict_proxy.py,sha256=rbclJeMBW7Yd6PpSAhp9fiH6YpPrVnUb-qgIGQTUB90,8616
257
+ nucliadb/search/search/pgcatalog.py,sha256=_AiyW6it66UX6BsZbM3-230IQhiEG4utoKYboviyOFI,16799
258
+ nucliadb/search/search/predict_proxy.py,sha256=T4T6c1ZAbBCgPLy-6JoSlixy-0bAh6z0rNH1bMMy3bg,8631
259
259
  nucliadb/search/search/query.py,sha256=0qIQdt548L3jtKOyKo06aGJ73SLBxAW3N38_Hc1M3Uw,11528
260
260
  nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
261
261
  nucliadb/search/search/rerankers.py,sha256=E2J1QdKAojqbhHM3KAyaOXKf6tJyETUxKs4tf_BEyqk,7472
@@ -375,8 +375,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
375
375
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
376
376
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
377
377
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
378
- nucliadb-6.5.1.post4510.dist-info/METADATA,sha256=lLAN-xpgGXv6qlz-HyosNkSlKRX5qV6Be8-Xj59N8F0,4158
379
- nucliadb-6.5.1.post4510.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
380
- nucliadb-6.5.1.post4510.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
381
- nucliadb-6.5.1.post4510.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
382
- nucliadb-6.5.1.post4510.dist-info/RECORD,,
378
+ nucliadb-6.5.1.post4525.dist-info/METADATA,sha256=w8Hgr9H0qA7QLKmypMcYBiX1_unYDquS3n3WWodM-gM,4158
379
+ nucliadb-6.5.1.post4525.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
380
+ nucliadb-6.5.1.post4525.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
381
+ nucliadb-6.5.1.post4525.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
382
+ nucliadb-6.5.1.post4525.dist-info/RECORD,,