nucliadb 6.3.1.post3574__py3-none-any.whl → 6.3.1.post3577__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import asyncio
21
+ import logging
21
22
  import tarfile
22
23
  from datetime import datetime, timezone
23
24
  from typing import AsyncIterator, Optional
@@ -29,6 +30,7 @@ from nucliadb.backups.const import (
29
30
  )
30
31
  from nucliadb.backups.models import BackupMetadata, CreateBackupRequest
31
32
  from nucliadb.backups.settings import settings
33
+ from nucliadb.backups.utils import exists_in_storge
32
34
  from nucliadb.common import datamanagers
33
35
  from nucliadb.common.context import ApplicationContext
34
36
  from nucliadb.export_import.utils import (
@@ -44,6 +46,8 @@ from nucliadb_utils.audit.stream import StreamAuditStorage
44
46
  from nucliadb_utils.storages.storage import StorageField
45
47
  from nucliadb_utils.utilities import get_audit
46
48
 
49
+ logger = logging.getLogger(__name__)
50
+
47
51
 
48
52
  async def backup_kb_task(context: ApplicationContext, msg: CreateBackupRequest):
49
53
  kbid = msg.kb_id
@@ -101,6 +105,10 @@ async def backup_resources(context: ApplicationContext, kbid: str, backup_id: st
101
105
  await set_metadata(context, kbid, backup_id, metadata)
102
106
  tasks = []
103
107
  backing_up = []
108
+ logger.info(
109
+ f"Backup resources: {len(metadata.missing_resources)} remaining",
110
+ extra={"kbid": kbid, "backup_id": backup_id},
111
+ )
104
112
  if len(tasks) > 0:
105
113
  resources_bytes = await asyncio.gather(*tasks)
106
114
  metadata.total_size += sum(resources_bytes)
@@ -108,6 +116,7 @@ async def backup_resources(context: ApplicationContext, kbid: str, backup_id: st
108
116
  await set_metadata(context, kbid, backup_id, metadata)
109
117
  tasks = []
110
118
  backing_up = []
119
+ logger.info(f"Backup resources: completed", extra={"kbid": kbid, "backup_id": backup_id})
111
120
 
112
121
 
113
122
  async def backup_resource(context: ApplicationContext, backup_id: str, kbid: str, rid: str) -> int:
@@ -163,6 +172,13 @@ async def backup_resource_with_binaries(
163
172
  nonlocal total_size
164
173
 
165
174
  for cloud_file in get_cloud_files(bm):
175
+ if not await exists_cf(context, cloud_file):
176
+ logger.warning(
177
+ "Cloud file not found in storage, skipping",
178
+ extra={"kbid": kbid, "rid": rid, "cf_uri": cloud_file.uri},
179
+ )
180
+ continue
181
+
166
182
  serialized_cf = cloud_file.SerializeToString()
167
183
 
168
184
  async def cf_iterator():
@@ -244,6 +260,10 @@ async def delete_metadata(context: ApplicationContext, kbid: str, backup_id: str
244
260
  await txn.commit()
245
261
 
246
262
 
263
+ async def exists_cf(context: ApplicationContext, cf: resources_pb2.CloudFile) -> bool:
264
+ return await exists_in_storge(context.blob_storage, cf.bucket_name, cf.uri)
265
+
266
+
247
267
  async def upload_to_bucket(context: ApplicationContext, bytes_iterator: AsyncIterator[bytes], key: str):
248
268
  storage = context.blob_storage
249
269
  bucket = settings.backups_bucket
@@ -21,6 +21,7 @@
21
21
 
22
22
  import asyncio
23
23
  import functools
24
+ import logging
24
25
  import tarfile
25
26
  from typing import AsyncIterator, Callable, Optional, Union
26
27
 
@@ -39,6 +40,8 @@ from nucliadb_protos import knowledgebox_pb2 as kb_pb2
39
40
  from nucliadb_protos.resources_pb2 import CloudFile
40
41
  from nucliadb_protos.writer_pb2 import BrokerMessage
41
42
 
43
+ logger = logging.getLogger(__name__)
44
+
42
45
 
43
46
  async def restore_kb_task(context: ApplicationContext, msg: RestoreBackupRequest):
44
47
  kbid = msg.kb_id
@@ -193,7 +196,7 @@ class ResourceBackupReader:
193
196
  elif tarinfo.name.startswith("cloud-files"):
194
197
  raw_cf = await self.read_data(tarinfo)
195
198
  cf = CloudFile()
196
- cf.FromString(raw_cf)
199
+ cf.ParseFromString(raw_cf)
197
200
  return cf
198
201
  elif tarinfo.name.startswith("binaries"):
199
202
  uri = tarinfo.name.lstrip("binaries/")
@@ -219,14 +222,19 @@ async def restore_resource(context: ApplicationContext, kbid: str, backup_id: st
219
222
  bm = item
220
223
  bm.kbid = kbid
221
224
  break
222
-
223
- # Read the cloud file and its binary
224
- cf = await reader.read_item()
225
- assert isinstance(cf, CloudFile)
226
- cf_binary = await reader.read_item()
227
- assert isinstance(cf_binary, CloudFileBinary)
228
- assert cf.uri == cf_binary.uri
229
- await import_binary(context, kbid, cf, cf_binary.read)
225
+ elif isinstance(item, CloudFile):
226
+ # Read its binary and import it
227
+ cf = item
228
+ cf_binary = await reader.read_item()
229
+ assert isinstance(cf_binary, CloudFileBinary)
230
+ assert cf.uri == cf_binary.uri
231
+ await import_binary(context, kbid, cf, cf_binary.read)
232
+ else:
233
+ logger.error(
234
+ "Unexpected item in resource backup. Backup may be corrupted",
235
+ extra={"item_type": type(item), kbid: kbid, resource_id: resource_id},
236
+ )
237
+ continue
230
238
 
231
239
  await import_broker_message(context, kbid, bm)
232
240
 
nucliadb/backups/utils.py CHANGED
@@ -24,9 +24,12 @@ from nucliadb_utils.storages.storage import Storage
24
24
 
25
25
 
26
26
  async def exists_backup(storage: Storage, backup_id: str) -> bool:
27
- async for _ in storage.iterate_objects(
28
- bucket=settings.backups_bucket,
29
- prefix=StorageKeys.BACKUP_PREFIX.format(backup_id=backup_id),
30
- ):
27
+ return await exists_in_storge(
28
+ storage, settings.backups_bucket, StorageKeys.BACKUP_PREFIX.format(backup_id=backup_id)
29
+ )
30
+
31
+
32
+ async def exists_in_storge(storage: Storage, bucket: str, key: str) -> bool:
33
+ async for _ in storage.iterate_objects(bucket=bucket, prefix=key):
31
34
  return True
32
35
  return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.3.1.post3574
3
+ Version: 6.3.1.post3577
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.3.1.post3574
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.1.post3574
25
- Requires-Dist: nucliadb-protos>=6.3.1.post3574
26
- Requires-Dist: nucliadb-models>=6.3.1.post3574
27
- Requires-Dist: nidx-protos>=6.3.1.post3574
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.1.post3577
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.1.post3577
25
+ Requires-Dist: nucliadb-protos>=6.3.1.post3577
26
+ Requires-Dist: nucliadb-models>=6.3.1.post3577
27
+ Requires-Dist: nidx-protos>=6.3.1.post3577
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn
@@ -41,13 +41,13 @@ nucliadb/openapi.py,sha256=wDiw0dVEvTpJvbatkJ0JZLkKm9RItZT5PWRHjqRfqTA,2272
41
41
  nucliadb/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
42
  nucliadb/backups/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
43
43
  nucliadb/backups/const.py,sha256=9vPAhLxQO_gNAjSdPxWuv3V66s9WcdpjOQ89CZlfmuk,1894
44
- nucliadb/backups/create.py,sha256=AM_nC7TgHOX0EFGaTXClS28jBSK28fHrKNZi14z2wek,10442
44
+ nucliadb/backups/create.py,sha256=mvirguMbtxNgSDGG81l0kkgHWJSZPk4GFyra9nAkBZM,11275
45
45
  nucliadb/backups/delete.py,sha256=1rnBhVUGYYZJXSZUrrgYMDZ5NyswEWkIA-G-crRCyHk,2404
46
46
  nucliadb/backups/models.py,sha256=-hITU4Mv6AxePu12toBu_fjpEv6vVGcwNVxV22O9jQA,1273
47
- nucliadb/backups/restore.py,sha256=xhslVvTf4H8VmDucZpjrEFpKj6csPIWBadCPMVJYKQ8,9703
47
+ nucliadb/backups/restore.py,sha256=wepEgv4vBN5yeiZU-f17PbuFV4xT4_SVKplNr8xSJrE,10001
48
48
  nucliadb/backups/settings.py,sha256=SyzsInj1BRbBI0atg5IXWbMbOZ_eVg4eSQ3IcnUhCxQ,1357
49
49
  nucliadb/backups/tasks.py,sha256=4_kOVJ2yCwMvDEpzJgTuTt75TNlpq5woyw9sTAcaSkw,4194
50
- nucliadb/backups/utils.py,sha256=ayDaxfWP5cPnAkQH-tF4M6cnowsPQgU2ljYz_iL1CbE,1249
50
+ nucliadb/backups/utils.py,sha256=b1hi0gEp90tNrWHejNVoUgRpa4D6uKGhbACq0yeLkJY,1375
51
51
  nucliadb/common/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
52
52
  nucliadb/common/constants.py,sha256=QpigxJh_CtD85Evy0PtV5cVq6x0U_f9xfIcXz1ymkUg,869
53
53
  nucliadb/common/counters.py,sha256=8lOi3A2HeLDDlcNaS2QT1SfD3350VPBjiY3FkmHH1V8,977
@@ -347,8 +347,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
347
347
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
348
348
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
349
349
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
350
- nucliadb-6.3.1.post3574.dist-info/METADATA,sha256=JiB_eKqj0pPsq4nf9VvqU9gm6CEEQkYvElBlLSISd7c,4291
351
- nucliadb-6.3.1.post3574.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
352
- nucliadb-6.3.1.post3574.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
353
- nucliadb-6.3.1.post3574.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
354
- nucliadb-6.3.1.post3574.dist-info/RECORD,,
350
+ nucliadb-6.3.1.post3577.dist-info/METADATA,sha256=7cMll6LH15F3_kAg1yrlOSDK8XFk77amc4TEa0kApug,4291
351
+ nucliadb-6.3.1.post3577.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
352
+ nucliadb-6.3.1.post3577.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
353
+ nucliadb-6.3.1.post3577.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
354
+ nucliadb-6.3.1.post3577.dist-info/RECORD,,