nucliadb 6.2.1.post3251__py3-none-any.whl → 6.2.1.post3254__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -62,9 +62,6 @@ class Reranker(BaseModel): ...
62
62
  class NoopReranker(Reranker): ...
63
63
 
64
64
 
65
- class MultiMatchBoosterReranker(Reranker): ...
66
-
67
-
68
65
  class PredictReranker(Reranker):
69
66
  window: int = Field(le=200)
70
67
 
@@ -31,7 +31,6 @@ from nucliadb.search.search.query_parser.models import (
31
31
  CatalogFilters,
32
32
  CatalogQuery,
33
33
  DateTimeFilter,
34
- MultiMatchBoosterReranker,
35
34
  NoopReranker,
36
35
  PredictReranker,
37
36
  RankFusion,
@@ -123,9 +122,6 @@ class _FindParser:
123
122
  if self.item.reranker == search_models.RerankerName.NOOP:
124
123
  reranking = NoopReranker()
125
124
 
126
- elif self.item.reranker == search_models.RerankerName.MULTI_MATCH_BOOSTER:
127
- reranking = MultiMatchBoosterReranker()
128
-
129
125
  elif self.item.reranker == search_models.RerankerName.PREDICT_RERANKER:
130
126
  # for predict rearnker, by default, we want a x2 factor with a
131
127
  # top of 200 results
@@ -169,58 +169,17 @@ class PredictReranker(Reranker):
169
169
  return best
170
170
 
171
171
 
172
- class MultiMatchBoosterReranker(Reranker):
173
- """This reranker gives more value to items that come from different indices"""
174
-
175
- @property
176
- def window(self) -> Optional[int]:
177
- return None
178
-
179
- @reranker_observer.wrap({"type": "multi_match_booster"})
180
- async def _rerank(self, items: list[RerankableItem], options: RerankingOptions) -> list[RankedItem]:
181
- """Given a list of rerankable items, boost matches that appear multiple
182
- times. The returned list can be smaller than the initial, as repeated
183
- matches are deduplicated.
184
- """
185
- reranked_by_id = {}
186
- for item in items:
187
- if item.id not in reranked_by_id:
188
- reranked_by_id[item.id] = RankedItem(
189
- id=item.id,
190
- score=item.score,
191
- score_type=item.score_type,
192
- )
193
- else:
194
- # it's a mutiple match, boost the score
195
- if reranked_by_id[item.id].score < item.score:
196
- # previous implementation noted that we are using vector
197
- # score x2 when we find a multiple match. However, this may
198
- # not be true, as the same paragraph could come in any
199
- # position in the rank fusioned result list
200
- reranked_by_id[item.id].score = item.score * 2
201
-
202
- reranked_by_id[item.id].score_type = SCORE_TYPE.BOTH
203
-
204
- reranked = list(reranked_by_id.values())
205
- sort_by_score(reranked)
206
- return reranked
207
-
208
-
209
172
  def get_reranker(reranker: parser_models.Reranker) -> Reranker:
210
173
  algorithm: Reranker
211
174
 
212
175
  if isinstance(reranker, parser_models.NoopReranker):
213
176
  algorithm = NoopReranker()
214
177
 
215
- elif isinstance(reranker, parser_models.MultiMatchBoosterReranker):
216
- algorithm = MultiMatchBoosterReranker()
217
-
218
178
  elif isinstance(reranker, parser_models.PredictReranker):
219
179
  algorithm = PredictReranker(reranker.window)
220
180
 
221
181
  else:
222
- logger.warning(f"Unknown reranker requested: {reranker}. Using default instead")
223
- algorithm = MultiMatchBoosterReranker()
182
+ raise ValueError(f"Unknown reranker requested: {reranker}")
224
183
 
225
184
  return algorithm
226
185
 
@@ -17,14 +17,13 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- import datetime
21
20
  import logging
22
21
  import time
23
22
 
24
23
  import orjson
25
24
  import pydantic
26
25
  from fastapi import Request
27
- from fastapi.responses import JSONResponse, StreamingResponse
26
+ from fastapi.responses import JSONResponse
28
27
  from fastapi.routing import APIRouter
29
28
  from fastapi_versioning import version
30
29
  from jwcrypto import jwe, jwk # type: ignore
@@ -33,7 +32,7 @@ from nucliadb.common import datamanagers
33
32
  from nucliadb.common.cluster import manager
34
33
  from nucliadb.common.http_clients import processing
35
34
  from nucliadb.common.http_clients.auth import NucliaAuthHTTPClient
36
- from nucliadb.standalone import introspect, versions
35
+ from nucliadb.standalone import versions
37
36
  from nucliadb_models.resource import NucliaDBRoles
38
37
  from nucliadb_utils.authentication import requires
39
38
  from nucliadb_utils.settings import nuclia_settings
@@ -146,17 +145,6 @@ async def versions_endpoint(request: Request) -> JSONResponse:
146
145
  )
147
146
 
148
147
 
149
- @standalone_api_router.get("/introspect")
150
- def introspect_endpoint(request: Request) -> StreamingResponse:
151
- introspect_id = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
152
- return StreamingResponse(
153
- content=introspect.stream_tar(request.app),
154
- status_code=200,
155
- headers={"Content-Disposition": f"attachment; filename=introspect_{introspect_id}.tar.gz"},
156
- media_type="application/octet-stream",
157
- )
158
-
159
-
160
148
  @standalone_api_router.get("/pull/position")
161
149
  async def pull_status(request: Request) -> JSONResponse:
162
150
  async with datamanagers.with_ro_transaction() as txn:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.2.1.post3251
3
+ Version: 6.2.1.post3254
4
4
  Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
5
  Author: NucliaDB Community
6
6
  Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
23
  Requires-Python: >=3.9, <4
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3251
26
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3251
27
- Requires-Dist: nucliadb-protos>=6.2.1.post3251
28
- Requires-Dist: nucliadb-models>=6.2.1.post3251
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3254
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3254
27
+ Requires-Dist: nucliadb-protos>=6.2.1.post3254
28
+ Requires-Dist: nucliadb-models>=6.2.1.post3254
29
29
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
30
  Requires-Dist: nuclia-models>=0.24.2
31
31
  Requires-Dist: uvicorn
@@ -221,7 +221,7 @@ nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3Le
221
221
  nucliadb/search/search/predict_proxy.py,sha256=xBlh6kjuQpWRq7KsBx4pEl2PtnwljjQIiYMaTWpcCSA,3015
222
222
  nucliadb/search/search/query.py,sha256=AlhRw4Mick4Oab5HsKHaQpBXsVc_UUY5IpkUIwsFfU8,30577
223
223
  nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
224
- nucliadb/search/search/rerankers.py,sha256=0kAHES9X_FKkP7KSN9NRETFmRPKzwrFAo_54MbyvM7Q,9051
224
+ nucliadb/search/search/rerankers.py,sha256=3vep4EOVNeDJGsMdx-1g6Ar4ZGJG3IHym3HkxnbwtAQ,7321
225
225
  nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
226
226
  nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
227
227
  nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
@@ -234,14 +234,13 @@ nucliadb/search/search/chat/query.py,sha256=rBssR6MPSx8h2DASRMTLODaz9oGE5tNVVVeD
234
234
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
235
235
  nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
236
236
  nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
237
- nucliadb/search/search/query_parser/models.py,sha256=-VlCDXUCgOroAZw1Leqhj2VMgRv_CD2w40PXXOBLaUM,2332
238
- nucliadb/search/search/query_parser/parser.py,sha256=JC6koS9Np1PzCfEk1Xy6mpP1HmovS_vIxxA9u-kwzos,6498
237
+ nucliadb/search/search/query_parser/models.py,sha256=2iWuTcH24RDF8xokgXr0j5qbMoURQ1TFyqJIYs16LqU,2283
238
+ nucliadb/search/search/query_parser/parser.py,sha256=m6meq5QQO_ofdtbrvEORsZLjxURWfRR0dINrgDXmYRg,6323
239
239
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
240
- nucliadb/standalone/api_router.py,sha256=zR03TQ-Pd2kXx1jeV83Puw19112Z8Jhln7p1cAn69kg,6699
240
+ nucliadb/standalone/api_router.py,sha256=4-g-eEq27nL6vKCLRCoV0Pxf-L273N-eHeEX2vI9qgg,6215
241
241
  nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
242
242
  nucliadb/standalone/auth.py,sha256=UwMv-TywhMZabvVg3anQLeCRdoHDnWf2o3luvnoNBjs,7670
243
243
  nucliadb/standalone/config.py,sha256=g9JBJQfyw87TYZ3yuy0O9WFVLd_MmCJxSRSI0E8FwZE,5396
244
- nucliadb/standalone/introspect.py,sha256=xHdHV-CB0Vy5cp1MQAodu0Pc8izpzl_lX2ARJJwL3RI,6083
245
244
  nucliadb/standalone/lifecycle.py,sha256=rdKLG-oOLN4rfd2VGG_2vlDUWYneWSCiuEhoeiFKfnM,2343
246
245
  nucliadb/standalone/migrations.py,sha256=s9-3RSZ-O3bjEw2TnBe_YWLUEKbub0bARUxi1gA3yuY,1950
247
246
  nucliadb/standalone/purge.py,sha256=ZY-cebb214FFiPG7OFmXZGg0G3CK5Amw0FLLm9WJhKE,1343
@@ -332,9 +331,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
332
331
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
333
332
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
334
333
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
335
- nucliadb-6.2.1.post3251.dist-info/METADATA,sha256=ykfBfM1MB4gZsChTKAkP6hzOw0DAlxBmAFMqMwQfh3Y,4603
336
- nucliadb-6.2.1.post3251.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
337
- nucliadb-6.2.1.post3251.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
338
- nucliadb-6.2.1.post3251.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
339
- nucliadb-6.2.1.post3251.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
340
- nucliadb-6.2.1.post3251.dist-info/RECORD,,
334
+ nucliadb-6.2.1.post3254.dist-info/METADATA,sha256=KxrWjVFc1AQo6-OO61isHxc49B03BWRNesKyXN_TugY,4603
335
+ nucliadb-6.2.1.post3254.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
336
+ nucliadb-6.2.1.post3254.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
337
+ nucliadb-6.2.1.post3254.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
338
+ nucliadb-6.2.1.post3254.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
339
+ nucliadb-6.2.1.post3254.dist-info/RECORD,,
@@ -1,183 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- import asyncio
22
- import os
23
- import platform
24
- import sys
25
- import tarfile
26
- import tempfile
27
- from collections.abc import AsyncGenerator
28
- from typing import Optional
29
-
30
- import pkg_resources
31
- import psutil
32
- from fastapi import FastAPI
33
- from pydantic import BaseModel
34
-
35
- from nucliadb.standalone.settings import Settings
36
- from nucliadb_telemetry.settings import LogOutputType, LogSettings
37
-
38
- MB = 1024 * 1024
39
- CHUNK_SIZE = 2 * MB
40
- SYSTEM_INFO_TEMPLATE = """System info
41
- ===========
42
-
43
- Python
44
- ------
45
- - Version: {python_version}
46
-
47
- Operative system
48
- ----------------
49
- - Name: {os_name}
50
- - Release: {os_release}
51
- - Version: {os_version}
52
- - Machine: {os_machine}
53
- - File System Encoding: {os_file_system_encoding}
54
-
55
- CPU information
56
- ---------------
57
- - Number of CPUs: {cpu_count}
58
-
59
- Memory information
60
- ------------------
61
- - Total: {memory_total:.2f} MB
62
- - Available: {memory_available:.2f} MB
63
- - Used: {memory_used:.2f} MB
64
- - Used %: {memory_used_percent:.2f}%
65
- """
66
-
67
-
68
- class NodeInfo(BaseModel):
69
- id: str
70
- address: str
71
- shard_count: int
72
- primary_id: Optional[str] = None
73
-
74
-
75
- class ClusterInfo(BaseModel):
76
- nodes: list[NodeInfo]
77
-
78
-
79
- async def stream_tar(app: FastAPI) -> AsyncGenerator[bytes, None]:
80
- with tempfile.TemporaryDirectory() as temp_dir:
81
- tar_file = os.path.join(temp_dir, "introspect.tar.gz")
82
- with tarfile.open(tar_file, mode="w:gz") as tar:
83
- await add_system_info(temp_dir, tar)
84
- await add_dependencies(temp_dir, tar)
85
- settings: Settings = app.settings.copy() # type: ignore
86
- await add_settings(temp_dir, tar, settings)
87
- if settings.log_output_type == LogOutputType.FILE:
88
- await add_logs(tar)
89
-
90
- async for chunk in stream_out_tar(tar_file):
91
- yield chunk
92
-
93
-
94
- async def stream_out_tar(tar_file: str) -> AsyncGenerator[bytes, None]:
95
- loop = asyncio.get_event_loop()
96
- with open(tar_file, "rb") as f:
97
- chunk = await loop.run_in_executor(None, f.read, CHUNK_SIZE)
98
- while chunk:
99
- yield chunk
100
- chunk = await loop.run_in_executor(None, f.read, CHUNK_SIZE)
101
-
102
-
103
- async def add_system_info(temp_dir: str, tar: tarfile.TarFile):
104
- loop = asyncio.get_event_loop()
105
- await loop.run_in_executor(None, _add_system_info_to_tar, temp_dir, tar)
106
-
107
-
108
- def _add_system_info_to_tar(temp_dir: str, tar: tarfile.TarFile):
109
- system_info_file = os.path.join(temp_dir, "system_info.txt")
110
- with open(system_info_file, "w") as f:
111
- memory = psutil.virtual_memory()
112
- f.write(
113
- SYSTEM_INFO_TEMPLATE.format(
114
- python_version=sys.version,
115
- os_name=os.uname().sysname,
116
- os_release=platform.release(),
117
- os_version=platform.version(),
118
- os_machine=platform.machine(),
119
- os_file_system_encoding=os.sys.getfilesystemencoding(), # type: ignore
120
- cpu_count=psutil.cpu_count(),
121
- memory_total=memory.total / MB,
122
- memory_available=memory.available / MB,
123
- memory_used=memory.used / MB,
124
- memory_used_percent=memory.percent,
125
- )
126
- )
127
- tar.add(system_info_file, arcname="system_info.txt")
128
-
129
-
130
- async def add_dependencies(temp_dir: str, tar: tarfile.TarFile):
131
- loop = asyncio.get_event_loop()
132
- await loop.run_in_executor(None, _add_dependencies_to_tar, temp_dir, tar)
133
-
134
-
135
- def _add_dependencies_to_tar(temp_dir: str, tar: tarfile.TarFile):
136
- dependendies_file = os.path.join(temp_dir, "dependencies.txt")
137
- with open(dependendies_file, "w") as f:
138
- installed_packages = [pkg for pkg in pkg_resources.working_set]
139
- lines = []
140
- for pkg in sorted(installed_packages, key=lambda p: p.key):
141
- lines.append(f"{pkg.key}=={pkg.version}\n")
142
- f.writelines(lines)
143
- tar.add(dependendies_file, arcname="dependencies.txt")
144
-
145
-
146
- async def add_settings(temp_dir: str, tar: tarfile.TarFile, settings: Settings):
147
- loop = asyncio.get_event_loop()
148
- await loop.run_in_executor(None, _add_settings_to_tar, temp_dir, tar, settings)
149
-
150
-
151
- def _add_settings_to_tar(temp_dir: str, tar: tarfile.TarFile, settings: Settings):
152
- remove_sensitive_settings(settings)
153
- settings_file = os.path.join(temp_dir, "settings.json")
154
- with open(settings_file, "w") as f:
155
- f.write(settings.model_dump_json(indent=4))
156
- tar.add(settings_file, arcname="settings.json")
157
-
158
-
159
- def remove_sensitive_settings(settings: Settings):
160
- for sensitive_setting in [
161
- "nua_api_key",
162
- "jwk_key",
163
- "gcs_base64_creds",
164
- "s3_client_secret",
165
- "driver_pg_url",
166
- ]:
167
- if hasattr(settings, sensitive_setting):
168
- setattr(settings, sensitive_setting, "********")
169
-
170
-
171
- async def add_logs(tar):
172
- loop = asyncio.get_event_loop()
173
- await loop.run_in_executor(None, _add_logs_to_tar, tar)
174
-
175
-
176
- def _add_logs_to_tar(tar: tarfile.TarFile):
177
- log_settings = LogSettings()
178
- access_log = os.path.realpath(log_settings.access_log)
179
- tar.add(access_log, arcname="logs/access.log")
180
- error_log = os.path.realpath(log_settings.error_log)
181
- tar.add(error_log, arcname="logs/error.log")
182
- info_log = os.path.realpath(log_settings.info_log)
183
- tar.add(info_log, arcname="logs/info.log")