nucliadb 6.2.0.post2679__py3-none-any.whl → 6.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0028_extracted_vectors_reference.py +61 -0
- migrations/0029_backfill_field_status.py +149 -0
- migrations/0030_label_deduplication.py +60 -0
- nucliadb/common/cluster/manager.py +41 -331
- nucliadb/common/cluster/rebalance.py +2 -2
- nucliadb/common/cluster/rollover.py +12 -71
- nucliadb/common/cluster/settings.py +3 -0
- nucliadb/common/cluster/standalone/utils.py +0 -43
- nucliadb/common/cluster/utils.py +0 -16
- nucliadb/common/counters.py +1 -0
- nucliadb/common/datamanagers/fields.py +48 -7
- nucliadb/common/datamanagers/vectorsets.py +11 -2
- nucliadb/common/external_index_providers/base.py +2 -1
- nucliadb/common/external_index_providers/pinecone.py +3 -5
- nucliadb/common/ids.py +18 -4
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +76 -37
- nucliadb/export_import/models.py +3 -3
- nucliadb/health.py +0 -7
- nucliadb/ingest/app.py +0 -8
- nucliadb/ingest/consumer/auditing.py +1 -1
- nucliadb/ingest/consumer/shard_creator.py +1 -1
- nucliadb/ingest/fields/base.py +83 -21
- nucliadb/ingest/orm/brain.py +55 -56
- nucliadb/ingest/orm/broker_message.py +12 -2
- nucliadb/ingest/orm/entities.py +6 -17
- nucliadb/ingest/orm/knowledgebox.py +44 -22
- nucliadb/ingest/orm/processor/data_augmentation.py +7 -29
- nucliadb/ingest/orm/processor/processor.py +5 -2
- nucliadb/ingest/orm/resource.py +222 -413
- nucliadb/ingest/processing.py +8 -2
- nucliadb/ingest/serialize.py +77 -46
- nucliadb/ingest/service/writer.py +2 -56
- nucliadb/ingest/settings.py +1 -4
- nucliadb/learning_proxy.py +6 -4
- nucliadb/purge/__init__.py +102 -12
- nucliadb/purge/orphan_shards.py +6 -4
- nucliadb/reader/api/models.py +3 -3
- nucliadb/reader/api/v1/__init__.py +1 -0
- nucliadb/reader/api/v1/download.py +2 -2
- nucliadb/reader/api/v1/knowledgebox.py +3 -3
- nucliadb/reader/api/v1/resource.py +23 -12
- nucliadb/reader/api/v1/services.py +4 -4
- nucliadb/reader/api/v1/vectorsets.py +48 -0
- nucliadb/search/api/v1/ask.py +11 -1
- nucliadb/search/api/v1/feedback.py +3 -3
- nucliadb/search/api/v1/knowledgebox.py +8 -13
- nucliadb/search/api/v1/search.py +3 -2
- nucliadb/search/api/v1/suggest.py +0 -2
- nucliadb/search/predict.py +6 -4
- nucliadb/search/requesters/utils.py +1 -2
- nucliadb/search/search/chat/ask.py +77 -13
- nucliadb/search/search/chat/prompt.py +16 -5
- nucliadb/search/search/chat/query.py +74 -34
- nucliadb/search/search/exceptions.py +2 -7
- nucliadb/search/search/find.py +9 -5
- nucliadb/search/search/find_merge.py +10 -4
- nucliadb/search/search/graph_strategy.py +884 -0
- nucliadb/search/search/hydrator.py +6 -0
- nucliadb/search/search/merge.py +79 -24
- nucliadb/search/search/query.py +74 -245
- nucliadb/search/search/query_parser/exceptions.py +11 -1
- nucliadb/search/search/query_parser/fetcher.py +405 -0
- nucliadb/search/search/query_parser/models.py +0 -3
- nucliadb/search/search/query_parser/parser.py +22 -21
- nucliadb/search/search/rerankers.py +1 -42
- nucliadb/search/search/shards.py +19 -0
- nucliadb/standalone/api_router.py +2 -14
- nucliadb/standalone/settings.py +4 -0
- nucliadb/train/generators/field_streaming.py +7 -3
- nucliadb/train/lifecycle.py +3 -6
- nucliadb/train/nodes.py +14 -12
- nucliadb/train/resource.py +380 -0
- nucliadb/writer/api/constants.py +20 -16
- nucliadb/writer/api/v1/__init__.py +1 -0
- nucliadb/writer/api/v1/export_import.py +1 -1
- nucliadb/writer/api/v1/field.py +13 -7
- nucliadb/writer/api/v1/knowledgebox.py +3 -46
- nucliadb/writer/api/v1/resource.py +20 -13
- nucliadb/writer/api/v1/services.py +10 -1
- nucliadb/writer/api/v1/upload.py +61 -34
- nucliadb/writer/{vectorsets.py → api/v1/vectorsets.py} +99 -47
- nucliadb/writer/back_pressure.py +17 -46
- nucliadb/writer/resource/basic.py +9 -7
- nucliadb/writer/resource/field.py +42 -9
- nucliadb/writer/settings.py +2 -2
- nucliadb/writer/tus/gcs.py +11 -10
- {nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/METADATA +11 -14
- {nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/RECORD +94 -96
- {nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/WHEEL +1 -1
- nucliadb/common/cluster/discovery/base.py +0 -178
- nucliadb/common/cluster/discovery/k8s.py +0 -301
- nucliadb/common/cluster/discovery/manual.py +0 -57
- nucliadb/common/cluster/discovery/single.py +0 -51
- nucliadb/common/cluster/discovery/types.py +0 -32
- nucliadb/common/cluster/discovery/utils.py +0 -67
- nucliadb/common/cluster/standalone/grpc_node_binding.py +0 -349
- nucliadb/common/cluster/standalone/index_node.py +0 -123
- nucliadb/common/cluster/standalone/service.py +0 -84
- nucliadb/standalone/introspect.py +0 -208
- nucliadb-6.2.0.post2679.dist-info/zip-safe +0 -1
- /nucliadb/common/{cluster/discovery → models_utils}/__init__.py +0 -0
- {nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/top_level.txt +0 -0
@@ -1,208 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
|
21
|
-
import asyncio
|
22
|
-
import os
|
23
|
-
import platform
|
24
|
-
import sys
|
25
|
-
import tarfile
|
26
|
-
import tempfile
|
27
|
-
from collections.abc import AsyncGenerator
|
28
|
-
from typing import Optional
|
29
|
-
|
30
|
-
import pkg_resources
|
31
|
-
import psutil
|
32
|
-
from fastapi import FastAPI
|
33
|
-
from pydantic import BaseModel
|
34
|
-
|
35
|
-
from nucliadb.common.cluster import manager as cluster_manager
|
36
|
-
from nucliadb.standalone.settings import Settings
|
37
|
-
from nucliadb_telemetry.settings import LogOutputType, LogSettings
|
38
|
-
|
39
|
-
MB = 1024 * 1024
|
40
|
-
CHUNK_SIZE = 2 * MB
|
41
|
-
SYSTEM_INFO_TEMPLATE = """System info
|
42
|
-
===========
|
43
|
-
|
44
|
-
Python
|
45
|
-
------
|
46
|
-
- Version: {python_version}
|
47
|
-
|
48
|
-
Operative system
|
49
|
-
----------------
|
50
|
-
- Name: {os_name}
|
51
|
-
- Release: {os_release}
|
52
|
-
- Version: {os_version}
|
53
|
-
- Machine: {os_machine}
|
54
|
-
- File System Encoding: {os_file_system_encoding}
|
55
|
-
|
56
|
-
CPU information
|
57
|
-
---------------
|
58
|
-
- Number of CPUs: {cpu_count}
|
59
|
-
|
60
|
-
Memory information
|
61
|
-
------------------
|
62
|
-
- Total: {memory_total:.2f} MB
|
63
|
-
- Available: {memory_available:.2f} MB
|
64
|
-
- Used: {memory_used:.2f} MB
|
65
|
-
- Used %: {memory_used_percent:.2f}%
|
66
|
-
"""
|
67
|
-
|
68
|
-
|
69
|
-
class NodeInfo(BaseModel):
|
70
|
-
id: str
|
71
|
-
address: str
|
72
|
-
shard_count: int
|
73
|
-
primary_id: Optional[str] = None
|
74
|
-
|
75
|
-
|
76
|
-
class ClusterInfo(BaseModel):
|
77
|
-
nodes: list[NodeInfo]
|
78
|
-
|
79
|
-
|
80
|
-
async def stream_tar(app: FastAPI) -> AsyncGenerator[bytes, None]:
|
81
|
-
with tempfile.TemporaryDirectory() as temp_dir:
|
82
|
-
tar_file = os.path.join(temp_dir, "introspect.tar.gz")
|
83
|
-
with tarfile.open(tar_file, mode="w:gz") as tar:
|
84
|
-
await add_system_info(temp_dir, tar)
|
85
|
-
await add_dependencies(temp_dir, tar)
|
86
|
-
await add_cluster_info(temp_dir, tar)
|
87
|
-
settings: Settings = app.settings.copy() # type: ignore
|
88
|
-
await add_settings(temp_dir, tar, settings)
|
89
|
-
if settings.log_output_type == LogOutputType.FILE:
|
90
|
-
await add_logs(tar)
|
91
|
-
|
92
|
-
async for chunk in stream_out_tar(tar_file):
|
93
|
-
yield chunk
|
94
|
-
|
95
|
-
|
96
|
-
async def stream_out_tar(tar_file: str) -> AsyncGenerator[bytes, None]:
|
97
|
-
loop = asyncio.get_event_loop()
|
98
|
-
with open(tar_file, "rb") as f:
|
99
|
-
chunk = await loop.run_in_executor(None, f.read, CHUNK_SIZE)
|
100
|
-
while chunk:
|
101
|
-
yield chunk
|
102
|
-
chunk = await loop.run_in_executor(None, f.read, CHUNK_SIZE)
|
103
|
-
|
104
|
-
|
105
|
-
async def add_system_info(temp_dir: str, tar: tarfile.TarFile):
|
106
|
-
loop = asyncio.get_event_loop()
|
107
|
-
await loop.run_in_executor(None, _add_system_info_to_tar, temp_dir, tar)
|
108
|
-
|
109
|
-
|
110
|
-
def _add_system_info_to_tar(temp_dir: str, tar: tarfile.TarFile):
|
111
|
-
system_info_file = os.path.join(temp_dir, "system_info.txt")
|
112
|
-
with open(system_info_file, "w") as f:
|
113
|
-
memory = psutil.virtual_memory()
|
114
|
-
f.write(
|
115
|
-
SYSTEM_INFO_TEMPLATE.format(
|
116
|
-
python_version=sys.version,
|
117
|
-
os_name=os.uname().sysname,
|
118
|
-
os_release=platform.release(),
|
119
|
-
os_version=platform.version(),
|
120
|
-
os_machine=platform.machine(),
|
121
|
-
os_file_system_encoding=os.sys.getfilesystemencoding(), # type: ignore
|
122
|
-
cpu_count=psutil.cpu_count(),
|
123
|
-
memory_total=memory.total / MB,
|
124
|
-
memory_available=memory.available / MB,
|
125
|
-
memory_used=memory.used / MB,
|
126
|
-
memory_used_percent=memory.percent,
|
127
|
-
)
|
128
|
-
)
|
129
|
-
tar.add(system_info_file, arcname="system_info.txt")
|
130
|
-
|
131
|
-
|
132
|
-
async def add_dependencies(temp_dir: str, tar: tarfile.TarFile):
|
133
|
-
loop = asyncio.get_event_loop()
|
134
|
-
await loop.run_in_executor(None, _add_dependencies_to_tar, temp_dir, tar)
|
135
|
-
|
136
|
-
|
137
|
-
def _add_dependencies_to_tar(temp_dir: str, tar: tarfile.TarFile):
|
138
|
-
dependendies_file = os.path.join(temp_dir, "dependencies.txt")
|
139
|
-
with open(dependendies_file, "w") as f:
|
140
|
-
installed_packages = [pkg for pkg in pkg_resources.working_set]
|
141
|
-
lines = []
|
142
|
-
for pkg in sorted(installed_packages, key=lambda p: p.key):
|
143
|
-
lines.append(f"{pkg.key}=={pkg.version}\n")
|
144
|
-
f.writelines(lines)
|
145
|
-
tar.add(dependendies_file, arcname="dependencies.txt")
|
146
|
-
|
147
|
-
|
148
|
-
async def add_cluster_info(temp_dir: str, tar: tarfile.TarFile):
|
149
|
-
loop = asyncio.get_event_loop()
|
150
|
-
await loop.run_in_executor(None, _add_cluster_info_to_tar, temp_dir, tar)
|
151
|
-
|
152
|
-
|
153
|
-
def _add_cluster_info_to_tar(temp_dir: str, tar: tarfile.TarFile):
|
154
|
-
cluster_info = ClusterInfo(
|
155
|
-
nodes=[
|
156
|
-
NodeInfo(
|
157
|
-
id=node.id,
|
158
|
-
address=node.address,
|
159
|
-
shard_count=node.shard_count,
|
160
|
-
primary_id=node.primary_id,
|
161
|
-
)
|
162
|
-
for node in cluster_manager.get_index_nodes()
|
163
|
-
]
|
164
|
-
)
|
165
|
-
cluster_info_file = os.path.join(temp_dir, "cluster_info.txt")
|
166
|
-
with open(cluster_info_file, "w") as f:
|
167
|
-
f.write(cluster_info.model_dump_json(indent=4))
|
168
|
-
tar.add(cluster_info_file, arcname="cluster_info.txt")
|
169
|
-
|
170
|
-
|
171
|
-
async def add_settings(temp_dir: str, tar: tarfile.TarFile, settings: Settings):
|
172
|
-
loop = asyncio.get_event_loop()
|
173
|
-
await loop.run_in_executor(None, _add_settings_to_tar, temp_dir, tar, settings)
|
174
|
-
|
175
|
-
|
176
|
-
def _add_settings_to_tar(temp_dir: str, tar: tarfile.TarFile, settings: Settings):
|
177
|
-
remove_sensitive_settings(settings)
|
178
|
-
settings_file = os.path.join(temp_dir, "settings.json")
|
179
|
-
with open(settings_file, "w") as f:
|
180
|
-
f.write(settings.model_dump_json(indent=4))
|
181
|
-
tar.add(settings_file, arcname="settings.json")
|
182
|
-
|
183
|
-
|
184
|
-
def remove_sensitive_settings(settings: Settings):
|
185
|
-
for sensitive_setting in [
|
186
|
-
"nua_api_key",
|
187
|
-
"jwk_key",
|
188
|
-
"gcs_base64_creds",
|
189
|
-
"s3_client_secret",
|
190
|
-
"driver_pg_url",
|
191
|
-
]:
|
192
|
-
if hasattr(settings, sensitive_setting):
|
193
|
-
setattr(settings, sensitive_setting, "********")
|
194
|
-
|
195
|
-
|
196
|
-
async def add_logs(tar):
|
197
|
-
loop = asyncio.get_event_loop()
|
198
|
-
await loop.run_in_executor(None, _add_logs_to_tar, tar)
|
199
|
-
|
200
|
-
|
201
|
-
def _add_logs_to_tar(tar: tarfile.TarFile):
|
202
|
-
log_settings = LogSettings()
|
203
|
-
access_log = os.path.realpath(log_settings.access_log)
|
204
|
-
tar.add(access_log, arcname="logs/access.log")
|
205
|
-
error_log = os.path.realpath(log_settings.error_log)
|
206
|
-
tar.add(error_log, arcname="logs/error.log")
|
207
|
-
info_log = os.path.realpath(log_settings.info_log)
|
208
|
-
tar.add(info_log, arcname="logs/info.log")
|
@@ -1 +0,0 @@
|
|
1
|
-
|
File without changes
|
File without changes
|
File without changes
|