nucliadb 6.3.3.post3608__py3-none-any.whl → 6.3.3.post3611__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,6 +37,7 @@ from nucliadb.ingest.orm.knowledgebox import (
37
37
  RESOURCE_TO_DELETE_STORAGE_BASE,
38
38
  KnowledgeBox,
39
39
  )
40
+ from nucliadb.tasks.retries import purge_metadata as purge_task_metadata
40
41
  from nucliadb_protos.knowledgebox_pb2 import VectorSetConfig, VectorSetPurge
41
42
  from nucliadb_telemetry import errors
42
43
  from nucliadb_telemetry.logs import setup_logging
@@ -270,6 +271,7 @@ async def main():
270
271
  service_name=SERVICE_NAME,
271
272
  )
272
273
  try:
274
+ purge_task_metadata_task = asyncio.create_task(purge_task_metadata(driver))
273
275
  purge_resources_storage_task = asyncio.create_task(
274
276
  purge_deleted_resource_storage(driver, storage)
275
277
  )
@@ -277,6 +279,7 @@ async def main():
277
279
  await purge_kb_storage(driver, storage)
278
280
  await purge_kb_vectorsets(driver, storage)
279
281
  await purge_resources_storage_task
282
+ await purge_task_metadata_task
280
283
  except Exception as ex: # pragma: no cover
281
284
  logger.exception("Unhandled exception on purge command")
282
285
  errors.capture_exception(ex)
nucliadb/tasks/retries.py CHANGED
@@ -19,12 +19,15 @@
19
19
  #
20
20
  import functools
21
21
  import logging
22
+ from datetime import datetime, timezone
22
23
  from enum import Enum
23
- from typing import Callable, Optional
24
+ from typing import Callable, Optional, cast
24
25
 
25
26
  from pydantic import BaseModel
26
27
 
27
28
  from nucliadb.common.context import ApplicationContext
29
+ from nucliadb.common.maindb.driver import Driver
30
+ from nucliadb.common.maindb.pg import PGDriver, PGTransaction
28
31
 
29
32
  logger = logging.getLogger(__name__)
30
33
 
@@ -41,6 +44,7 @@ class TaskMetadata(BaseModel):
41
44
  status: Status
42
45
  retries: int = 0
43
46
  error_messages: list[str] = []
47
+ last_modified: Optional[datetime] = None
44
48
 
45
49
 
46
50
  class TaskRetryHandler:
@@ -84,16 +88,10 @@ class TaskRetryHandler:
84
88
  )
85
89
 
86
90
  async def get_metadata(self) -> Optional[TaskMetadata]:
87
- async with self.context.kv_driver.transaction(read_only=True) as txn:
88
- metadata = await txn.get(self.metadata_key)
89
- if metadata is None:
90
- return None
91
- return TaskMetadata.model_validate_json(metadata)
91
+ return await _get_metadata(self.context.kv_driver, self.metadata_key)
92
92
 
93
93
  async def set_metadata(self, metadata: TaskMetadata) -> None:
94
- async with self.context.kv_driver.transaction() as txn:
95
- await txn.set(self.metadata_key, metadata.model_dump_json().encode())
96
- await txn.commit()
94
+ await _set_metadata(self.context.kv_driver, self.metadata_key, metadata)
97
95
 
98
96
  def wrap(self, func: Callable) -> Callable:
99
97
  @functools.wraps(func)
@@ -106,6 +104,7 @@ class TaskRetryHandler:
106
104
  task_id=self.task_id,
107
105
  status=TaskMetadata.Status.RUNNING,
108
106
  retries=0,
107
+ last_modified=datetime.now(timezone.utc),
109
108
  )
110
109
  await self.set_metadata(metadata)
111
110
 
@@ -123,6 +122,7 @@ class TaskRetryHandler:
123
122
  f"Task reached max retries. Setting to FAILED state",
124
123
  extra={"kbid": self.kbid, "task_type": self.task_type, "task_id": self.task_id},
125
124
  )
125
+ metadata.last_modified = datetime.now(timezone.utc)
126
126
  await self.set_metadata(metadata)
127
127
  return
128
128
  try:
@@ -144,6 +144,91 @@ class TaskRetryHandler:
144
144
  metadata.status = TaskMetadata.Status.COMPLETED
145
145
  return func_result
146
146
  finally:
147
+ metadata.last_modified = datetime.now(timezone.utc)
147
148
  await self.set_metadata(metadata)
148
149
 
149
150
  return wrapper
151
+
152
+
153
+ async def _get_metadata(kv_driver: Driver, metadata_key: str) -> Optional[TaskMetadata]:
154
+ async with kv_driver.transaction(read_only=True) as txn:
155
+ metadata = await txn.get(metadata_key)
156
+ if metadata is None:
157
+ return None
158
+ return TaskMetadata.model_validate_json(metadata)
159
+
160
+
161
+ async def _set_metadata(kv_driver: Driver, metadata_key: str, metadata: TaskMetadata) -> None:
162
+ async with kv_driver.transaction() as txn:
163
+ await txn.set(metadata_key, metadata.model_dump_json().encode())
164
+ await txn.commit()
165
+
166
+
167
+ async def purge_metadata(kv_driver: Driver) -> int:
168
+ """
169
+ Purges old task metadata records that are in a final state and older than 15 days.
170
+ Returns the total number of records purged.
171
+ """
172
+ if not isinstance(kv_driver, PGDriver):
173
+ return 0
174
+
175
+ total_purged = 0
176
+ start: Optional[str] = ""
177
+ while True:
178
+ start, purged = await purge_batch(kv_driver, start)
179
+ total_purged += purged
180
+ if start is None:
181
+ break
182
+ return total_purged
183
+
184
+
185
+ async def purge_batch(
186
+ kv_driver: PGDriver, start: Optional[str] = None, batch_size: int = 200
187
+ ) -> tuple[Optional[str], int]:
188
+ """
189
+ Returns the next start key and the number of purged records. If start is None, it means there are no more records to purge.
190
+ """
191
+ async with kv_driver.transaction() as txn:
192
+ txn = cast(PGTransaction, txn)
193
+ async with txn.connection.cursor() as cur:
194
+ await cur.execute(
195
+ """
196
+ SELECT key from resources
197
+ WHERE key ~ '^/kbs/[^/]*/tasks/[^/]*/[^/]*$'
198
+ AND key > %s
199
+ ORDER BY key
200
+ LIMIT %s
201
+ """,
202
+ (start, batch_size),
203
+ )
204
+ records = await cur.fetchall()
205
+ keys = [r[0] for r in records]
206
+
207
+ if not keys:
208
+ # No more records to purge
209
+ return None, 0
210
+
211
+ to_delete = []
212
+ for key in keys:
213
+ metadata = await _get_metadata(kv_driver, key)
214
+ if metadata is None: # pragma: no cover
215
+ continue
216
+ task_finished = metadata.status in (TaskMetadata.Status.COMPLETED, TaskMetadata.Status.FAILED)
217
+ old_task = (
218
+ metadata.last_modified is None
219
+ or (datetime.now(timezone.utc) - metadata.last_modified).days >= 15
220
+ )
221
+ if task_finished and old_task:
222
+ to_delete.append(key)
223
+
224
+ n_to_delete = len(to_delete)
225
+ delete_batch_size = 50
226
+ while len(to_delete) > 0:
227
+ batch = to_delete[:delete_batch_size]
228
+ to_delete = to_delete[delete_batch_size:]
229
+ async with kv_driver.transaction() as txn:
230
+ for key in batch:
231
+ logger.info("Purging task metadata", extra={"key": key})
232
+ await txn.delete(key)
233
+ await txn.commit()
234
+ return keys[-1], n_to_delete
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.3.3.post3608
3
+ Version: 6.3.3.post3611
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.3.3.post3608
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.3.post3608
25
- Requires-Dist: nucliadb-protos>=6.3.3.post3608
26
- Requires-Dist: nucliadb-models>=6.3.3.post3608
27
- Requires-Dist: nidx-protos>=6.3.3.post3608
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.3.post3611
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.3.post3611
25
+ Requires-Dist: nucliadb-protos>=6.3.3.post3611
26
+ Requires-Dist: nucliadb-models>=6.3.3.post3611
27
+ Requires-Dist: nidx-protos>=6.3.3.post3611
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn
@@ -165,7 +165,7 @@ nucliadb/migrator/settings.py,sha256=jOUX0ZMunCXN8HpF9xXN0aunJYRhu4Vdr_ffjRIqwtw
165
165
  nucliadb/migrator/utils.py,sha256=NgUreUvON8_nWEzTxELBMWlfV7E6-6qi-g0DMEbVEz4,2885
166
166
  nucliadb/models/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
167
167
  nucliadb/models/responses.py,sha256=qnuOoc7TrVSUnpikfTwHLKez47_DE4mSFzpxrwtqijA,1599
168
- nucliadb/purge/__init__.py,sha256=BphuNvsJ1aSwuVXUcSOaK4nj9pDcpuKRBf_QAcoRj-A,11787
168
+ nucliadb/purge/__init__.py,sha256=p-DxJ3vGtEcCuiSsWY22bQn99vTMHk32BQdQJVy5b0k,11983
169
169
  nucliadb/purge/orphan_shards.py,sha256=AU1Jfc4qtQFasB6dkuGz0a_Zjs-i7liUTf7Xpl8R8ng,7939
170
170
  nucliadb/reader/__init__.py,sha256=C5Efic7WlGm2U2C5WOyquMFbIj2Pojwe_8mwzVYnOzE,1304
171
171
  nucliadb/reader/app.py,sha256=Se-BFTE6d1v1msLzQn4q5XIhjnSxa2ckDSHdvm7NRf8,3096
@@ -276,7 +276,7 @@ nucliadb/tasks/consumer.py,sha256=4CWfBdXVr2a25n7seldbQ0PaK0FcxJZuWgosU6aODS8,69
276
276
  nucliadb/tasks/logger.py,sha256=C7keOEO_mjLVp5VbqAZ2QXfqVB2Hot7NgBlUP_SDSMw,924
277
277
  nucliadb/tasks/models.py,sha256=qrZKi5DNDQ07waMsp5L4_Fi7WRs57YiO-kmXlrBzEAA,1168
278
278
  nucliadb/tasks/producer.py,sha256=UnpJAzhj_GElsCoO5G6T4m6MshsgOaqR2tVzJmEta64,2625
279
- nucliadb/tasks/retries.py,sha256=tLNtwAutGEfFV3Adr4a9ew-Wg4vuxBd72GGDE_Mma8s,5160
279
+ nucliadb/tasks/retries.py,sha256=L8jbnkyNe6LVoXAmpdtl4QLoMNI8NUXOna7u0sSM0_Y,8157
280
280
  nucliadb/tasks/utils.py,sha256=tV1AbWdFc3qfIULX44Veqj41FCD1B6XYjG6brULBeiw,1459
281
281
  nucliadb/tests/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
282
282
  nucliadb/tests/config.py,sha256=JN_Jhgj-fwM9_8IeO9pwxr6C1PiwRDrXxm67Y38rU30,2080
@@ -351,8 +351,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
351
351
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
352
352
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
353
353
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
354
- nucliadb-6.3.3.post3608.dist-info/METADATA,sha256=D39vPx8WWL01vqSIJ7XLuZ5j4QTwhJWsYECLccPs6lk,4291
355
- nucliadb-6.3.3.post3608.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
356
- nucliadb-6.3.3.post3608.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
357
- nucliadb-6.3.3.post3608.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
358
- nucliadb-6.3.3.post3608.dist-info/RECORD,,
354
+ nucliadb-6.3.3.post3611.dist-info/METADATA,sha256=k4CK1ALRNK5NsSVw0Lo-VG8RNfvYELnUJbd2pxziQzE,4291
355
+ nucliadb-6.3.3.post3611.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
356
+ nucliadb-6.3.3.post3611.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
357
+ nucliadb-6.3.3.post3611.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
358
+ nucliadb-6.3.3.post3611.dist-info/RECORD,,