nucliadb 6.3.1.post3574__py3-none-any.whl → 6.3.1.post3584__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/backups/create.py +20 -0
- nucliadb/backups/restore.py +17 -9
- nucliadb/backups/utils.py +7 -4
- nucliadb/tasks/retries.py +6 -5
- {nucliadb-6.3.1.post3574.dist-info → nucliadb-6.3.1.post3584.dist-info}/METADATA +6 -6
- {nucliadb-6.3.1.post3574.dist-info → nucliadb-6.3.1.post3584.dist-info}/RECORD +9 -9
- {nucliadb-6.3.1.post3574.dist-info → nucliadb-6.3.1.post3584.dist-info}/WHEEL +0 -0
- {nucliadb-6.3.1.post3574.dist-info → nucliadb-6.3.1.post3584.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.3.1.post3574.dist-info → nucliadb-6.3.1.post3584.dist-info}/top_level.txt +0 -0
nucliadb/backups/create.py
CHANGED
@@ -18,6 +18,7 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import asyncio
|
21
|
+
import logging
|
21
22
|
import tarfile
|
22
23
|
from datetime import datetime, timezone
|
23
24
|
from typing import AsyncIterator, Optional
|
@@ -29,6 +30,7 @@ from nucliadb.backups.const import (
|
|
29
30
|
)
|
30
31
|
from nucliadb.backups.models import BackupMetadata, CreateBackupRequest
|
31
32
|
from nucliadb.backups.settings import settings
|
33
|
+
from nucliadb.backups.utils import exists_in_storge
|
32
34
|
from nucliadb.common import datamanagers
|
33
35
|
from nucliadb.common.context import ApplicationContext
|
34
36
|
from nucliadb.export_import.utils import (
|
@@ -44,6 +46,8 @@ from nucliadb_utils.audit.stream import StreamAuditStorage
|
|
44
46
|
from nucliadb_utils.storages.storage import StorageField
|
45
47
|
from nucliadb_utils.utilities import get_audit
|
46
48
|
|
49
|
+
logger = logging.getLogger(__name__)
|
50
|
+
|
47
51
|
|
48
52
|
async def backup_kb_task(context: ApplicationContext, msg: CreateBackupRequest):
|
49
53
|
kbid = msg.kb_id
|
@@ -101,6 +105,10 @@ async def backup_resources(context: ApplicationContext, kbid: str, backup_id: st
|
|
101
105
|
await set_metadata(context, kbid, backup_id, metadata)
|
102
106
|
tasks = []
|
103
107
|
backing_up = []
|
108
|
+
logger.info(
|
109
|
+
f"Backup resources: {len(metadata.missing_resources)} remaining",
|
110
|
+
extra={"kbid": kbid, "backup_id": backup_id},
|
111
|
+
)
|
104
112
|
if len(tasks) > 0:
|
105
113
|
resources_bytes = await asyncio.gather(*tasks)
|
106
114
|
metadata.total_size += sum(resources_bytes)
|
@@ -108,6 +116,7 @@ async def backup_resources(context: ApplicationContext, kbid: str, backup_id: st
|
|
108
116
|
await set_metadata(context, kbid, backup_id, metadata)
|
109
117
|
tasks = []
|
110
118
|
backing_up = []
|
119
|
+
logger.info(f"Backup resources: completed", extra={"kbid": kbid, "backup_id": backup_id})
|
111
120
|
|
112
121
|
|
113
122
|
async def backup_resource(context: ApplicationContext, backup_id: str, kbid: str, rid: str) -> int:
|
@@ -163,6 +172,13 @@ async def backup_resource_with_binaries(
|
|
163
172
|
nonlocal total_size
|
164
173
|
|
165
174
|
for cloud_file in get_cloud_files(bm):
|
175
|
+
if not await exists_cf(context, cloud_file):
|
176
|
+
logger.warning(
|
177
|
+
"Cloud file not found in storage, skipping",
|
178
|
+
extra={"kbid": kbid, "rid": rid, "cf_uri": cloud_file.uri},
|
179
|
+
)
|
180
|
+
continue
|
181
|
+
|
166
182
|
serialized_cf = cloud_file.SerializeToString()
|
167
183
|
|
168
184
|
async def cf_iterator():
|
@@ -244,6 +260,10 @@ async def delete_metadata(context: ApplicationContext, kbid: str, backup_id: str
|
|
244
260
|
await txn.commit()
|
245
261
|
|
246
262
|
|
263
|
+
async def exists_cf(context: ApplicationContext, cf: resources_pb2.CloudFile) -> bool:
|
264
|
+
return await exists_in_storge(context.blob_storage, cf.bucket_name, cf.uri)
|
265
|
+
|
266
|
+
|
247
267
|
async def upload_to_bucket(context: ApplicationContext, bytes_iterator: AsyncIterator[bytes], key: str):
|
248
268
|
storage = context.blob_storage
|
249
269
|
bucket = settings.backups_bucket
|
nucliadb/backups/restore.py
CHANGED
@@ -21,6 +21,7 @@
|
|
21
21
|
|
22
22
|
import asyncio
|
23
23
|
import functools
|
24
|
+
import logging
|
24
25
|
import tarfile
|
25
26
|
from typing import AsyncIterator, Callable, Optional, Union
|
26
27
|
|
@@ -39,6 +40,8 @@ from nucliadb_protos import knowledgebox_pb2 as kb_pb2
|
|
39
40
|
from nucliadb_protos.resources_pb2 import CloudFile
|
40
41
|
from nucliadb_protos.writer_pb2 import BrokerMessage
|
41
42
|
|
43
|
+
logger = logging.getLogger(__name__)
|
44
|
+
|
42
45
|
|
43
46
|
async def restore_kb_task(context: ApplicationContext, msg: RestoreBackupRequest):
|
44
47
|
kbid = msg.kb_id
|
@@ -193,7 +196,7 @@ class ResourceBackupReader:
|
|
193
196
|
elif tarinfo.name.startswith("cloud-files"):
|
194
197
|
raw_cf = await self.read_data(tarinfo)
|
195
198
|
cf = CloudFile()
|
196
|
-
cf.
|
199
|
+
cf.ParseFromString(raw_cf)
|
197
200
|
return cf
|
198
201
|
elif tarinfo.name.startswith("binaries"):
|
199
202
|
uri = tarinfo.name.lstrip("binaries/")
|
@@ -219,14 +222,19 @@ async def restore_resource(context: ApplicationContext, kbid: str, backup_id: st
|
|
219
222
|
bm = item
|
220
223
|
bm.kbid = kbid
|
221
224
|
break
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
225
|
+
elif isinstance(item, CloudFile):
|
226
|
+
# Read its binary and import it
|
227
|
+
cf = item
|
228
|
+
cf_binary = await reader.read_item()
|
229
|
+
assert isinstance(cf_binary, CloudFileBinary)
|
230
|
+
assert cf.uri == cf_binary.uri
|
231
|
+
await import_binary(context, kbid, cf, cf_binary.read)
|
232
|
+
else:
|
233
|
+
logger.error(
|
234
|
+
"Unexpected item in resource backup. Backup may be corrupted",
|
235
|
+
extra={"item_type": type(item), kbid: kbid, resource_id: resource_id},
|
236
|
+
)
|
237
|
+
continue
|
230
238
|
|
231
239
|
await import_broker_message(context, kbid, bm)
|
232
240
|
|
nucliadb/backups/utils.py
CHANGED
@@ -24,9 +24,12 @@ from nucliadb_utils.storages.storage import Storage
|
|
24
24
|
|
25
25
|
|
26
26
|
async def exists_backup(storage: Storage, backup_id: str) -> bool:
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
27
|
+
return await exists_in_storge(
|
28
|
+
storage, settings.backups_bucket, StorageKeys.BACKUP_PREFIX.format(backup_id=backup_id)
|
29
|
+
)
|
30
|
+
|
31
|
+
|
32
|
+
async def exists_in_storge(storage: Storage, bucket: str, key: str) -> bool:
|
33
|
+
async for _ in storage.iterate_objects(bucket=bucket, prefix=key):
|
31
34
|
return True
|
32
35
|
return False
|
nucliadb/tasks/retries.py
CHANGED
@@ -20,7 +20,7 @@
|
|
20
20
|
import functools
|
21
21
|
import logging
|
22
22
|
from enum import Enum
|
23
|
-
from typing import Optional
|
23
|
+
from typing import Callable, Optional
|
24
24
|
|
25
25
|
from pydantic import BaseModel
|
26
26
|
|
@@ -73,7 +73,8 @@ class TaskRetryHandler:
|
|
73
73
|
self.kbid = kbid
|
74
74
|
self.task_type = task_type
|
75
75
|
self.task_id = task_id
|
76
|
-
|
76
|
+
# Limit max retries to 50
|
77
|
+
self.max_retries = min(max_retries, 50)
|
77
78
|
self.context = context
|
78
79
|
|
79
80
|
@property
|
@@ -94,7 +95,7 @@ class TaskRetryHandler:
|
|
94
95
|
await txn.set(self.metadata_key, metadata.model_dump_json().encode())
|
95
96
|
await txn.commit()
|
96
97
|
|
97
|
-
def wrap(self, func):
|
98
|
+
def wrap(self, func: Callable) -> Callable:
|
98
99
|
@functools.wraps(func)
|
99
100
|
async def wrapper(*args, **kwargs):
|
100
101
|
func_result = None
|
@@ -110,7 +111,7 @@ class TaskRetryHandler:
|
|
110
111
|
|
111
112
|
if metadata.status in (TaskMetadata.Status.COMPLETED, TaskMetadata.Status.FAILED):
|
112
113
|
logger.info(
|
113
|
-
f"{self.
|
114
|
+
f"{self.task_type} task is {metadata.status.value}. Skipping",
|
114
115
|
extra={"kbid": self.kbid, "task_type": self.task_type, "task_id": self.task_id},
|
115
116
|
)
|
116
117
|
return
|
@@ -130,7 +131,7 @@ class TaskRetryHandler:
|
|
130
131
|
except Exception as ex:
|
131
132
|
metadata.retries += 1
|
132
133
|
metadata.error_messages.append(str(ex))
|
133
|
-
logger.
|
134
|
+
logger.exception(
|
134
135
|
f"Task failed. Will be retried",
|
135
136
|
extra={"kbid": self.kbid, "task_type": self.task_type, "task_id": self.task_id},
|
136
137
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.3.1.
|
3
|
+
Version: 6.3.1.post3584
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.3.1.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.1.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.3.1.
|
26
|
-
Requires-Dist: nucliadb-models>=6.3.1.
|
27
|
-
Requires-Dist: nidx-protos>=6.3.1.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.3.1.post3584
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.1.post3584
|
25
|
+
Requires-Dist: nucliadb-protos>=6.3.1.post3584
|
26
|
+
Requires-Dist: nucliadb-models>=6.3.1.post3584
|
27
|
+
Requires-Dist: nidx-protos>=6.3.1.post3584
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn
|
@@ -41,13 +41,13 @@ nucliadb/openapi.py,sha256=wDiw0dVEvTpJvbatkJ0JZLkKm9RItZT5PWRHjqRfqTA,2272
|
|
41
41
|
nucliadb/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
42
42
|
nucliadb/backups/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
43
43
|
nucliadb/backups/const.py,sha256=9vPAhLxQO_gNAjSdPxWuv3V66s9WcdpjOQ89CZlfmuk,1894
|
44
|
-
nucliadb/backups/create.py,sha256=
|
44
|
+
nucliadb/backups/create.py,sha256=mvirguMbtxNgSDGG81l0kkgHWJSZPk4GFyra9nAkBZM,11275
|
45
45
|
nucliadb/backups/delete.py,sha256=1rnBhVUGYYZJXSZUrrgYMDZ5NyswEWkIA-G-crRCyHk,2404
|
46
46
|
nucliadb/backups/models.py,sha256=-hITU4Mv6AxePu12toBu_fjpEv6vVGcwNVxV22O9jQA,1273
|
47
|
-
nucliadb/backups/restore.py,sha256=
|
47
|
+
nucliadb/backups/restore.py,sha256=wepEgv4vBN5yeiZU-f17PbuFV4xT4_SVKplNr8xSJrE,10001
|
48
48
|
nucliadb/backups/settings.py,sha256=SyzsInj1BRbBI0atg5IXWbMbOZ_eVg4eSQ3IcnUhCxQ,1357
|
49
49
|
nucliadb/backups/tasks.py,sha256=4_kOVJ2yCwMvDEpzJgTuTt75TNlpq5woyw9sTAcaSkw,4194
|
50
|
-
nucliadb/backups/utils.py,sha256=
|
50
|
+
nucliadb/backups/utils.py,sha256=b1hi0gEp90tNrWHejNVoUgRpa4D6uKGhbACq0yeLkJY,1375
|
51
51
|
nucliadb/common/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
52
52
|
nucliadb/common/constants.py,sha256=QpigxJh_CtD85Evy0PtV5cVq6x0U_f9xfIcXz1ymkUg,869
|
53
53
|
nucliadb/common/counters.py,sha256=8lOi3A2HeLDDlcNaS2QT1SfD3350VPBjiY3FkmHH1V8,977
|
@@ -272,7 +272,7 @@ nucliadb/tasks/consumer.py,sha256=xc0Ql3N1Iq52dJ3t4YYGJFj1NCQAly0J5W_brfLa_F8,68
|
|
272
272
|
nucliadb/tasks/logger.py,sha256=C7keOEO_mjLVp5VbqAZ2QXfqVB2Hot7NgBlUP_SDSMw,924
|
273
273
|
nucliadb/tasks/models.py,sha256=qrZKi5DNDQ07waMsp5L4_Fi7WRs57YiO-kmXlrBzEAA,1168
|
274
274
|
nucliadb/tasks/producer.py,sha256=UnpJAzhj_GElsCoO5G6T4m6MshsgOaqR2tVzJmEta64,2625
|
275
|
-
nucliadb/tasks/retries.py,sha256=
|
275
|
+
nucliadb/tasks/retries.py,sha256=tLNtwAutGEfFV3Adr4a9ew-Wg4vuxBd72GGDE_Mma8s,5160
|
276
276
|
nucliadb/tasks/utils.py,sha256=tV1AbWdFc3qfIULX44Veqj41FCD1B6XYjG6brULBeiw,1459
|
277
277
|
nucliadb/tests/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
278
278
|
nucliadb/tests/config.py,sha256=JN_Jhgj-fwM9_8IeO9pwxr6C1PiwRDrXxm67Y38rU30,2080
|
@@ -347,8 +347,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
347
347
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
348
348
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
349
349
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
350
|
-
nucliadb-6.3.1.
|
351
|
-
nucliadb-6.3.1.
|
352
|
-
nucliadb-6.3.1.
|
353
|
-
nucliadb-6.3.1.
|
354
|
-
nucliadb-6.3.1.
|
350
|
+
nucliadb-6.3.1.post3584.dist-info/METADATA,sha256=1LFsE32EtmIRlKK9EAN1-kK6P5pfLq3pIRewpO0k2Ug,4291
|
351
|
+
nucliadb-6.3.1.post3584.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
352
|
+
nucliadb-6.3.1.post3584.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
353
|
+
nucliadb-6.3.1.post3584.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
354
|
+
nucliadb-6.3.1.post3584.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|