nucliadb 6.3.1.post3574__py3-none-any.whl → 6.3.1.post3584__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import asyncio
21
+ import logging
21
22
  import tarfile
22
23
  from datetime import datetime, timezone
23
24
  from typing import AsyncIterator, Optional
@@ -29,6 +30,7 @@ from nucliadb.backups.const import (
29
30
  )
30
31
  from nucliadb.backups.models import BackupMetadata, CreateBackupRequest
31
32
  from nucliadb.backups.settings import settings
33
+ from nucliadb.backups.utils import exists_in_storge
32
34
  from nucliadb.common import datamanagers
33
35
  from nucliadb.common.context import ApplicationContext
34
36
  from nucliadb.export_import.utils import (
@@ -44,6 +46,8 @@ from nucliadb_utils.audit.stream import StreamAuditStorage
44
46
  from nucliadb_utils.storages.storage import StorageField
45
47
  from nucliadb_utils.utilities import get_audit
46
48
 
49
+ logger = logging.getLogger(__name__)
50
+
47
51
 
48
52
  async def backup_kb_task(context: ApplicationContext, msg: CreateBackupRequest):
49
53
  kbid = msg.kb_id
@@ -101,6 +105,10 @@ async def backup_resources(context: ApplicationContext, kbid: str, backup_id: st
101
105
  await set_metadata(context, kbid, backup_id, metadata)
102
106
  tasks = []
103
107
  backing_up = []
108
+ logger.info(
109
+ f"Backup resources: {len(metadata.missing_resources)} remaining",
110
+ extra={"kbid": kbid, "backup_id": backup_id},
111
+ )
104
112
  if len(tasks) > 0:
105
113
  resources_bytes = await asyncio.gather(*tasks)
106
114
  metadata.total_size += sum(resources_bytes)
@@ -108,6 +116,7 @@ async def backup_resources(context: ApplicationContext, kbid: str, backup_id: st
108
116
  await set_metadata(context, kbid, backup_id, metadata)
109
117
  tasks = []
110
118
  backing_up = []
119
+ logger.info(f"Backup resources: completed", extra={"kbid": kbid, "backup_id": backup_id})
111
120
 
112
121
 
113
122
  async def backup_resource(context: ApplicationContext, backup_id: str, kbid: str, rid: str) -> int:
@@ -163,6 +172,13 @@ async def backup_resource_with_binaries(
163
172
  nonlocal total_size
164
173
 
165
174
  for cloud_file in get_cloud_files(bm):
175
+ if not await exists_cf(context, cloud_file):
176
+ logger.warning(
177
+ "Cloud file not found in storage, skipping",
178
+ extra={"kbid": kbid, "rid": rid, "cf_uri": cloud_file.uri},
179
+ )
180
+ continue
181
+
166
182
  serialized_cf = cloud_file.SerializeToString()
167
183
 
168
184
  async def cf_iterator():
@@ -244,6 +260,10 @@ async def delete_metadata(context: ApplicationContext, kbid: str, backup_id: str
244
260
  await txn.commit()
245
261
 
246
262
 
263
+ async def exists_cf(context: ApplicationContext, cf: resources_pb2.CloudFile) -> bool:
264
+ return await exists_in_storge(context.blob_storage, cf.bucket_name, cf.uri)
265
+
266
+
247
267
  async def upload_to_bucket(context: ApplicationContext, bytes_iterator: AsyncIterator[bytes], key: str):
248
268
  storage = context.blob_storage
249
269
  bucket = settings.backups_bucket
@@ -21,6 +21,7 @@
21
21
 
22
22
  import asyncio
23
23
  import functools
24
+ import logging
24
25
  import tarfile
25
26
  from typing import AsyncIterator, Callable, Optional, Union
26
27
 
@@ -39,6 +40,8 @@ from nucliadb_protos import knowledgebox_pb2 as kb_pb2
39
40
  from nucliadb_protos.resources_pb2 import CloudFile
40
41
  from nucliadb_protos.writer_pb2 import BrokerMessage
41
42
 
43
+ logger = logging.getLogger(__name__)
44
+
42
45
 
43
46
  async def restore_kb_task(context: ApplicationContext, msg: RestoreBackupRequest):
44
47
  kbid = msg.kb_id
@@ -193,7 +196,7 @@ class ResourceBackupReader:
193
196
  elif tarinfo.name.startswith("cloud-files"):
194
197
  raw_cf = await self.read_data(tarinfo)
195
198
  cf = CloudFile()
196
- cf.FromString(raw_cf)
199
+ cf.ParseFromString(raw_cf)
197
200
  return cf
198
201
  elif tarinfo.name.startswith("binaries"):
199
202
  uri = tarinfo.name.lstrip("binaries/")
@@ -219,14 +222,19 @@ async def restore_resource(context: ApplicationContext, kbid: str, backup_id: st
219
222
  bm = item
220
223
  bm.kbid = kbid
221
224
  break
222
-
223
- # Read the cloud file and its binary
224
- cf = await reader.read_item()
225
- assert isinstance(cf, CloudFile)
226
- cf_binary = await reader.read_item()
227
- assert isinstance(cf_binary, CloudFileBinary)
228
- assert cf.uri == cf_binary.uri
229
- await import_binary(context, kbid, cf, cf_binary.read)
225
+ elif isinstance(item, CloudFile):
226
+ # Read its binary and import it
227
+ cf = item
228
+ cf_binary = await reader.read_item()
229
+ assert isinstance(cf_binary, CloudFileBinary)
230
+ assert cf.uri == cf_binary.uri
231
+ await import_binary(context, kbid, cf, cf_binary.read)
232
+ else:
233
+ logger.error(
234
+ "Unexpected item in resource backup. Backup may be corrupted",
235
+ extra={"item_type": type(item), kbid: kbid, resource_id: resource_id},
236
+ )
237
+ continue
230
238
 
231
239
  await import_broker_message(context, kbid, bm)
232
240
 
nucliadb/backups/utils.py CHANGED
@@ -24,9 +24,12 @@ from nucliadb_utils.storages.storage import Storage
24
24
 
25
25
 
26
26
  async def exists_backup(storage: Storage, backup_id: str) -> bool:
27
- async for _ in storage.iterate_objects(
28
- bucket=settings.backups_bucket,
29
- prefix=StorageKeys.BACKUP_PREFIX.format(backup_id=backup_id),
30
- ):
27
+ return await exists_in_storge(
28
+ storage, settings.backups_bucket, StorageKeys.BACKUP_PREFIX.format(backup_id=backup_id)
29
+ )
30
+
31
+
32
+ async def exists_in_storge(storage: Storage, bucket: str, key: str) -> bool:
33
+ async for _ in storage.iterate_objects(bucket=bucket, prefix=key):
31
34
  return True
32
35
  return False
nucliadb/tasks/retries.py CHANGED
@@ -20,7 +20,7 @@
20
20
  import functools
21
21
  import logging
22
22
  from enum import Enum
23
- from typing import Optional
23
+ from typing import Callable, Optional
24
24
 
25
25
  from pydantic import BaseModel
26
26
 
@@ -73,7 +73,8 @@ class TaskRetryHandler:
73
73
  self.kbid = kbid
74
74
  self.task_type = task_type
75
75
  self.task_id = task_id
76
- self.max_retries = max_retries
76
+ # Limit max retries to 50
77
+ self.max_retries = min(max_retries, 50)
77
78
  self.context = context
78
79
 
79
80
  @property
@@ -94,7 +95,7 @@ class TaskRetryHandler:
94
95
  await txn.set(self.metadata_key, metadata.model_dump_json().encode())
95
96
  await txn.commit()
96
97
 
97
- def wrap(self, func):
98
+ def wrap(self, func: Callable) -> Callable:
98
99
  @functools.wraps(func)
99
100
  async def wrapper(*args, **kwargs):
100
101
  func_result = None
@@ -110,7 +111,7 @@ class TaskRetryHandler:
110
111
 
111
112
  if metadata.status in (TaskMetadata.Status.COMPLETED, TaskMetadata.Status.FAILED):
112
113
  logger.info(
113
- f"{self.type} task is {metadata.status.value}. Skipping",
114
+ f"{self.task_type} task is {metadata.status.value}. Skipping",
114
115
  extra={"kbid": self.kbid, "task_type": self.task_type, "task_id": self.task_id},
115
116
  )
116
117
  return
@@ -130,7 +131,7 @@ class TaskRetryHandler:
130
131
  except Exception as ex:
131
132
  metadata.retries += 1
132
133
  metadata.error_messages.append(str(ex))
133
- logger.info(
134
+ logger.exception(
134
135
  f"Task failed. Will be retried",
135
136
  extra={"kbid": self.kbid, "task_type": self.task_type, "task_id": self.task_id},
136
137
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.3.1.post3574
3
+ Version: 6.3.1.post3584
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.3.1.post3574
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.1.post3574
25
- Requires-Dist: nucliadb-protos>=6.3.1.post3574
26
- Requires-Dist: nucliadb-models>=6.3.1.post3574
27
- Requires-Dist: nidx-protos>=6.3.1.post3574
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.1.post3584
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.1.post3584
25
+ Requires-Dist: nucliadb-protos>=6.3.1.post3584
26
+ Requires-Dist: nucliadb-models>=6.3.1.post3584
27
+ Requires-Dist: nidx-protos>=6.3.1.post3584
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn
@@ -41,13 +41,13 @@ nucliadb/openapi.py,sha256=wDiw0dVEvTpJvbatkJ0JZLkKm9RItZT5PWRHjqRfqTA,2272
41
41
  nucliadb/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
42
  nucliadb/backups/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
43
43
  nucliadb/backups/const.py,sha256=9vPAhLxQO_gNAjSdPxWuv3V66s9WcdpjOQ89CZlfmuk,1894
44
- nucliadb/backups/create.py,sha256=AM_nC7TgHOX0EFGaTXClS28jBSK28fHrKNZi14z2wek,10442
44
+ nucliadb/backups/create.py,sha256=mvirguMbtxNgSDGG81l0kkgHWJSZPk4GFyra9nAkBZM,11275
45
45
  nucliadb/backups/delete.py,sha256=1rnBhVUGYYZJXSZUrrgYMDZ5NyswEWkIA-G-crRCyHk,2404
46
46
  nucliadb/backups/models.py,sha256=-hITU4Mv6AxePu12toBu_fjpEv6vVGcwNVxV22O9jQA,1273
47
- nucliadb/backups/restore.py,sha256=xhslVvTf4H8VmDucZpjrEFpKj6csPIWBadCPMVJYKQ8,9703
47
+ nucliadb/backups/restore.py,sha256=wepEgv4vBN5yeiZU-f17PbuFV4xT4_SVKplNr8xSJrE,10001
48
48
  nucliadb/backups/settings.py,sha256=SyzsInj1BRbBI0atg5IXWbMbOZ_eVg4eSQ3IcnUhCxQ,1357
49
49
  nucliadb/backups/tasks.py,sha256=4_kOVJ2yCwMvDEpzJgTuTt75TNlpq5woyw9sTAcaSkw,4194
50
- nucliadb/backups/utils.py,sha256=ayDaxfWP5cPnAkQH-tF4M6cnowsPQgU2ljYz_iL1CbE,1249
50
+ nucliadb/backups/utils.py,sha256=b1hi0gEp90tNrWHejNVoUgRpa4D6uKGhbACq0yeLkJY,1375
51
51
  nucliadb/common/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
52
52
  nucliadb/common/constants.py,sha256=QpigxJh_CtD85Evy0PtV5cVq6x0U_f9xfIcXz1ymkUg,869
53
53
  nucliadb/common/counters.py,sha256=8lOi3A2HeLDDlcNaS2QT1SfD3350VPBjiY3FkmHH1V8,977
@@ -272,7 +272,7 @@ nucliadb/tasks/consumer.py,sha256=xc0Ql3N1Iq52dJ3t4YYGJFj1NCQAly0J5W_brfLa_F8,68
272
272
  nucliadb/tasks/logger.py,sha256=C7keOEO_mjLVp5VbqAZ2QXfqVB2Hot7NgBlUP_SDSMw,924
273
273
  nucliadb/tasks/models.py,sha256=qrZKi5DNDQ07waMsp5L4_Fi7WRs57YiO-kmXlrBzEAA,1168
274
274
  nucliadb/tasks/producer.py,sha256=UnpJAzhj_GElsCoO5G6T4m6MshsgOaqR2tVzJmEta64,2625
275
- nucliadb/tasks/retries.py,sha256=Zv-3Hys-SKayG9VQ7_7EIflkegE5j-xPGrf-nwaxsfY,5075
275
+ nucliadb/tasks/retries.py,sha256=tLNtwAutGEfFV3Adr4a9ew-Wg4vuxBd72GGDE_Mma8s,5160
276
276
  nucliadb/tasks/utils.py,sha256=tV1AbWdFc3qfIULX44Veqj41FCD1B6XYjG6brULBeiw,1459
277
277
  nucliadb/tests/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
278
278
  nucliadb/tests/config.py,sha256=JN_Jhgj-fwM9_8IeO9pwxr6C1PiwRDrXxm67Y38rU30,2080
@@ -347,8 +347,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
347
347
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
348
348
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
349
349
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
350
- nucliadb-6.3.1.post3574.dist-info/METADATA,sha256=JiB_eKqj0pPsq4nf9VvqU9gm6CEEQkYvElBlLSISd7c,4291
351
- nucliadb-6.3.1.post3574.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
352
- nucliadb-6.3.1.post3574.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
353
- nucliadb-6.3.1.post3574.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
354
- nucliadb-6.3.1.post3574.dist-info/RECORD,,
350
+ nucliadb-6.3.1.post3584.dist-info/METADATA,sha256=1LFsE32EtmIRlKK9EAN1-kK6P5pfLq3pIRewpO0k2Ug,4291
351
+ nucliadb-6.3.1.post3584.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
352
+ nucliadb-6.3.1.post3584.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
353
+ nucliadb-6.3.1.post3584.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
354
+ nucliadb-6.3.1.post3584.dist-info/RECORD,,