nucliadb 6.3.3.post3608__py3-none-any.whl → 6.3.3.post3613__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/backups/create.py +8 -2
- nucliadb/purge/__init__.py +3 -0
- nucliadb/tasks/retries.py +94 -9
- {nucliadb-6.3.3.post3608.dist-info → nucliadb-6.3.3.post3613.dist-info}/METADATA +6 -6
- {nucliadb-6.3.3.post3608.dist-info → nucliadb-6.3.3.post3613.dist-info}/RECORD +8 -8
- {nucliadb-6.3.3.post3608.dist-info → nucliadb-6.3.3.post3613.dist-info}/WHEEL +0 -0
- {nucliadb-6.3.3.post3608.dist-info → nucliadb-6.3.3.post3613.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.3.3.post3608.dist-info → nucliadb-6.3.3.post3613.dist-info}/top_level.txt +0 -0
nucliadb/backups/create.py
CHANGED
@@ -174,7 +174,12 @@ async def backup_resource_with_binaries(
|
|
174
174
|
if not await exists_cf(context, cloud_file):
|
175
175
|
logger.warning(
|
176
176
|
"Cloud file not found in storage, skipping",
|
177
|
-
extra={
|
177
|
+
extra={
|
178
|
+
"kbid": kbid,
|
179
|
+
"rid": rid,
|
180
|
+
"cf_uri": cloud_file.uri,
|
181
|
+
"cf_bucket": cloud_file.bucket_name,
|
182
|
+
},
|
178
183
|
)
|
179
184
|
continue
|
180
185
|
|
@@ -260,7 +265,8 @@ async def delete_metadata(context: ApplicationContext, kbid: str, backup_id: str
|
|
260
265
|
|
261
266
|
|
262
267
|
async def exists_cf(context: ApplicationContext, cf: resources_pb2.CloudFile) -> bool:
|
263
|
-
|
268
|
+
bucket_name = context.blob_storage.get_bucket_name_from_cf(cf)
|
269
|
+
return await context.blob_storage.exists_object(bucket=bucket_name, key=cf.uri)
|
264
270
|
|
265
271
|
|
266
272
|
async def upload_to_bucket(context: ApplicationContext, bytes_iterator: AsyncIterator[bytes], key: str):
|
nucliadb/purge/__init__.py
CHANGED
@@ -37,6 +37,7 @@ from nucliadb.ingest.orm.knowledgebox import (
|
|
37
37
|
RESOURCE_TO_DELETE_STORAGE_BASE,
|
38
38
|
KnowledgeBox,
|
39
39
|
)
|
40
|
+
from nucliadb.tasks.retries import purge_metadata as purge_task_metadata
|
40
41
|
from nucliadb_protos.knowledgebox_pb2 import VectorSetConfig, VectorSetPurge
|
41
42
|
from nucliadb_telemetry import errors
|
42
43
|
from nucliadb_telemetry.logs import setup_logging
|
@@ -270,6 +271,7 @@ async def main():
|
|
270
271
|
service_name=SERVICE_NAME,
|
271
272
|
)
|
272
273
|
try:
|
274
|
+
purge_task_metadata_task = asyncio.create_task(purge_task_metadata(driver))
|
273
275
|
purge_resources_storage_task = asyncio.create_task(
|
274
276
|
purge_deleted_resource_storage(driver, storage)
|
275
277
|
)
|
@@ -277,6 +279,7 @@ async def main():
|
|
277
279
|
await purge_kb_storage(driver, storage)
|
278
280
|
await purge_kb_vectorsets(driver, storage)
|
279
281
|
await purge_resources_storage_task
|
282
|
+
await purge_task_metadata_task
|
280
283
|
except Exception as ex: # pragma: no cover
|
281
284
|
logger.exception("Unhandled exception on purge command")
|
282
285
|
errors.capture_exception(ex)
|
nucliadb/tasks/retries.py
CHANGED
@@ -19,12 +19,15 @@
|
|
19
19
|
#
|
20
20
|
import functools
|
21
21
|
import logging
|
22
|
+
from datetime import datetime, timezone
|
22
23
|
from enum import Enum
|
23
|
-
from typing import Callable, Optional
|
24
|
+
from typing import Callable, Optional, cast
|
24
25
|
|
25
26
|
from pydantic import BaseModel
|
26
27
|
|
27
28
|
from nucliadb.common.context import ApplicationContext
|
29
|
+
from nucliadb.common.maindb.driver import Driver
|
30
|
+
from nucliadb.common.maindb.pg import PGDriver, PGTransaction
|
28
31
|
|
29
32
|
logger = logging.getLogger(__name__)
|
30
33
|
|
@@ -41,6 +44,7 @@ class TaskMetadata(BaseModel):
|
|
41
44
|
status: Status
|
42
45
|
retries: int = 0
|
43
46
|
error_messages: list[str] = []
|
47
|
+
last_modified: Optional[datetime] = None
|
44
48
|
|
45
49
|
|
46
50
|
class TaskRetryHandler:
|
@@ -84,16 +88,10 @@ class TaskRetryHandler:
|
|
84
88
|
)
|
85
89
|
|
86
90
|
async def get_metadata(self) -> Optional[TaskMetadata]:
|
87
|
-
|
88
|
-
metadata = await txn.get(self.metadata_key)
|
89
|
-
if metadata is None:
|
90
|
-
return None
|
91
|
-
return TaskMetadata.model_validate_json(metadata)
|
91
|
+
return await _get_metadata(self.context.kv_driver, self.metadata_key)
|
92
92
|
|
93
93
|
async def set_metadata(self, metadata: TaskMetadata) -> None:
|
94
|
-
|
95
|
-
await txn.set(self.metadata_key, metadata.model_dump_json().encode())
|
96
|
-
await txn.commit()
|
94
|
+
await _set_metadata(self.context.kv_driver, self.metadata_key, metadata)
|
97
95
|
|
98
96
|
def wrap(self, func: Callable) -> Callable:
|
99
97
|
@functools.wraps(func)
|
@@ -106,6 +104,7 @@ class TaskRetryHandler:
|
|
106
104
|
task_id=self.task_id,
|
107
105
|
status=TaskMetadata.Status.RUNNING,
|
108
106
|
retries=0,
|
107
|
+
last_modified=datetime.now(timezone.utc),
|
109
108
|
)
|
110
109
|
await self.set_metadata(metadata)
|
111
110
|
|
@@ -123,6 +122,7 @@ class TaskRetryHandler:
|
|
123
122
|
f"Task reached max retries. Setting to FAILED state",
|
124
123
|
extra={"kbid": self.kbid, "task_type": self.task_type, "task_id": self.task_id},
|
125
124
|
)
|
125
|
+
metadata.last_modified = datetime.now(timezone.utc)
|
126
126
|
await self.set_metadata(metadata)
|
127
127
|
return
|
128
128
|
try:
|
@@ -144,6 +144,91 @@ class TaskRetryHandler:
|
|
144
144
|
metadata.status = TaskMetadata.Status.COMPLETED
|
145
145
|
return func_result
|
146
146
|
finally:
|
147
|
+
metadata.last_modified = datetime.now(timezone.utc)
|
147
148
|
await self.set_metadata(metadata)
|
148
149
|
|
149
150
|
return wrapper
|
151
|
+
|
152
|
+
|
153
|
+
async def _get_metadata(kv_driver: Driver, metadata_key: str) -> Optional[TaskMetadata]:
|
154
|
+
async with kv_driver.transaction(read_only=True) as txn:
|
155
|
+
metadata = await txn.get(metadata_key)
|
156
|
+
if metadata is None:
|
157
|
+
return None
|
158
|
+
return TaskMetadata.model_validate_json(metadata)
|
159
|
+
|
160
|
+
|
161
|
+
async def _set_metadata(kv_driver: Driver, metadata_key: str, metadata: TaskMetadata) -> None:
|
162
|
+
async with kv_driver.transaction() as txn:
|
163
|
+
await txn.set(metadata_key, metadata.model_dump_json().encode())
|
164
|
+
await txn.commit()
|
165
|
+
|
166
|
+
|
167
|
+
async def purge_metadata(kv_driver: Driver) -> int:
|
168
|
+
"""
|
169
|
+
Purges old task metadata records that are in a final state and older than 15 days.
|
170
|
+
Returns the total number of records purged.
|
171
|
+
"""
|
172
|
+
if not isinstance(kv_driver, PGDriver):
|
173
|
+
return 0
|
174
|
+
|
175
|
+
total_purged = 0
|
176
|
+
start: Optional[str] = ""
|
177
|
+
while True:
|
178
|
+
start, purged = await purge_batch(kv_driver, start)
|
179
|
+
total_purged += purged
|
180
|
+
if start is None:
|
181
|
+
break
|
182
|
+
return total_purged
|
183
|
+
|
184
|
+
|
185
|
+
async def purge_batch(
|
186
|
+
kv_driver: PGDriver, start: Optional[str] = None, batch_size: int = 200
|
187
|
+
) -> tuple[Optional[str], int]:
|
188
|
+
"""
|
189
|
+
Returns the next start key and the number of purged records. If start is None, it means there are no more records to purge.
|
190
|
+
"""
|
191
|
+
async with kv_driver.transaction() as txn:
|
192
|
+
txn = cast(PGTransaction, txn)
|
193
|
+
async with txn.connection.cursor() as cur:
|
194
|
+
await cur.execute(
|
195
|
+
"""
|
196
|
+
SELECT key from resources
|
197
|
+
WHERE key ~ '^/kbs/[^/]*/tasks/[^/]*/[^/]*$'
|
198
|
+
AND key > %s
|
199
|
+
ORDER BY key
|
200
|
+
LIMIT %s
|
201
|
+
""",
|
202
|
+
(start, batch_size),
|
203
|
+
)
|
204
|
+
records = await cur.fetchall()
|
205
|
+
keys = [r[0] for r in records]
|
206
|
+
|
207
|
+
if not keys:
|
208
|
+
# No more records to purge
|
209
|
+
return None, 0
|
210
|
+
|
211
|
+
to_delete = []
|
212
|
+
for key in keys:
|
213
|
+
metadata = await _get_metadata(kv_driver, key)
|
214
|
+
if metadata is None: # pragma: no cover
|
215
|
+
continue
|
216
|
+
task_finished = metadata.status in (TaskMetadata.Status.COMPLETED, TaskMetadata.Status.FAILED)
|
217
|
+
old_task = (
|
218
|
+
metadata.last_modified is None
|
219
|
+
or (datetime.now(timezone.utc) - metadata.last_modified).days >= 15
|
220
|
+
)
|
221
|
+
if task_finished and old_task:
|
222
|
+
to_delete.append(key)
|
223
|
+
|
224
|
+
n_to_delete = len(to_delete)
|
225
|
+
delete_batch_size = 50
|
226
|
+
while len(to_delete) > 0:
|
227
|
+
batch = to_delete[:delete_batch_size]
|
228
|
+
to_delete = to_delete[delete_batch_size:]
|
229
|
+
async with kv_driver.transaction() as txn:
|
230
|
+
for key in batch:
|
231
|
+
logger.info("Purging task metadata", extra={"key": key})
|
232
|
+
await txn.delete(key)
|
233
|
+
await txn.commit()
|
234
|
+
return keys[-1], n_to_delete
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.3.3.
|
3
|
+
Version: 6.3.3.post3613
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.3.3.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.3.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.3.3.
|
26
|
-
Requires-Dist: nucliadb-models>=6.3.3.
|
27
|
-
Requires-Dist: nidx-protos>=6.3.3.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.3.3.post3613
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.3.post3613
|
25
|
+
Requires-Dist: nucliadb-protos>=6.3.3.post3613
|
26
|
+
Requires-Dist: nucliadb-models>=6.3.3.post3613
|
27
|
+
Requires-Dist: nidx-protos>=6.3.3.post3613
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn
|
@@ -41,7 +41,7 @@ nucliadb/openapi.py,sha256=wDiw0dVEvTpJvbatkJ0JZLkKm9RItZT5PWRHjqRfqTA,2272
|
|
41
41
|
nucliadb/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
42
42
|
nucliadb/backups/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
43
43
|
nucliadb/backups/const.py,sha256=9vPAhLxQO_gNAjSdPxWuv3V66s9WcdpjOQ89CZlfmuk,1894
|
44
|
-
nucliadb/backups/create.py,sha256=
|
44
|
+
nucliadb/backups/create.py,sha256=TJtYewhD0jkYV_h3rNUhKzhqB2QHAhLWYOgRVlGysGs,11450
|
45
45
|
nucliadb/backups/delete.py,sha256=1rnBhVUGYYZJXSZUrrgYMDZ5NyswEWkIA-G-crRCyHk,2404
|
46
46
|
nucliadb/backups/models.py,sha256=-hITU4Mv6AxePu12toBu_fjpEv6vVGcwNVxV22O9jQA,1273
|
47
47
|
nucliadb/backups/restore.py,sha256=YD3Bbo9ry4YLMM6imB-DXbOAMXfGxVzJtTAAUFDvB0I,10153
|
@@ -165,7 +165,7 @@ nucliadb/migrator/settings.py,sha256=jOUX0ZMunCXN8HpF9xXN0aunJYRhu4Vdr_ffjRIqwtw
|
|
165
165
|
nucliadb/migrator/utils.py,sha256=NgUreUvON8_nWEzTxELBMWlfV7E6-6qi-g0DMEbVEz4,2885
|
166
166
|
nucliadb/models/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
167
167
|
nucliadb/models/responses.py,sha256=qnuOoc7TrVSUnpikfTwHLKez47_DE4mSFzpxrwtqijA,1599
|
168
|
-
nucliadb/purge/__init__.py,sha256=
|
168
|
+
nucliadb/purge/__init__.py,sha256=p-DxJ3vGtEcCuiSsWY22bQn99vTMHk32BQdQJVy5b0k,11983
|
169
169
|
nucliadb/purge/orphan_shards.py,sha256=AU1Jfc4qtQFasB6dkuGz0a_Zjs-i7liUTf7Xpl8R8ng,7939
|
170
170
|
nucliadb/reader/__init__.py,sha256=C5Efic7WlGm2U2C5WOyquMFbIj2Pojwe_8mwzVYnOzE,1304
|
171
171
|
nucliadb/reader/app.py,sha256=Se-BFTE6d1v1msLzQn4q5XIhjnSxa2ckDSHdvm7NRf8,3096
|
@@ -276,7 +276,7 @@ nucliadb/tasks/consumer.py,sha256=4CWfBdXVr2a25n7seldbQ0PaK0FcxJZuWgosU6aODS8,69
|
|
276
276
|
nucliadb/tasks/logger.py,sha256=C7keOEO_mjLVp5VbqAZ2QXfqVB2Hot7NgBlUP_SDSMw,924
|
277
277
|
nucliadb/tasks/models.py,sha256=qrZKi5DNDQ07waMsp5L4_Fi7WRs57YiO-kmXlrBzEAA,1168
|
278
278
|
nucliadb/tasks/producer.py,sha256=UnpJAzhj_GElsCoO5G6T4m6MshsgOaqR2tVzJmEta64,2625
|
279
|
-
nucliadb/tasks/retries.py,sha256=
|
279
|
+
nucliadb/tasks/retries.py,sha256=L8jbnkyNe6LVoXAmpdtl4QLoMNI8NUXOna7u0sSM0_Y,8157
|
280
280
|
nucliadb/tasks/utils.py,sha256=tV1AbWdFc3qfIULX44Veqj41FCD1B6XYjG6brULBeiw,1459
|
281
281
|
nucliadb/tests/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
282
282
|
nucliadb/tests/config.py,sha256=JN_Jhgj-fwM9_8IeO9pwxr6C1PiwRDrXxm67Y38rU30,2080
|
@@ -351,8 +351,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
351
351
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
352
352
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
353
353
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
354
|
-
nucliadb-6.3.3.
|
355
|
-
nucliadb-6.3.3.
|
356
|
-
nucliadb-6.3.3.
|
357
|
-
nucliadb-6.3.3.
|
358
|
-
nucliadb-6.3.3.
|
354
|
+
nucliadb-6.3.3.post3613.dist-info/METADATA,sha256=NjyQDQSB_zbOUyfe9Zcekfh2SMCNZHggFv3GkrVI6gE,4291
|
355
|
+
nucliadb-6.3.3.post3613.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
356
|
+
nucliadb-6.3.3.post3613.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
357
|
+
nucliadb-6.3.3.post3613.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
358
|
+
nucliadb-6.3.3.post3613.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|