PyPI - nucliadb - Versions diffs - 6.3.1.post3524__py3-none-any.whl → 6.3.1.post3531__py3-none-any.whl - Mend

nucliadb 6.3.1.post3524py3-none-any.whl → 6.3.1.post3531py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

nucliadb/backups/__init__.py +19 -0
nucliadb/backups/const.py +41 -0
nucliadb/backups/create.py +277 -0
nucliadb/backups/delete.py +69 -0
nucliadb/backups/models.py +44 -0
nucliadb/backups/restore.py +249 -0
nucliadb/backups/settings.py +37 -0
nucliadb/backups/tasks.py +126 -0
nucliadb/backups/utils.py +32 -0
nucliadb/common/cluster/grpc_node_dummy.py +2 -2
nucliadb/common/cluster/manager.py +1 -1
nucliadb/common/datamanagers/atomic.py +12 -18
nucliadb/common/datamanagers/entities.py +1 -1
nucliadb/export_import/utils.py +6 -1
nucliadb/ingest/app.py +6 -0
nucliadb/ingest/fields/exceptions.py +1 -1
nucliadb/ingest/orm/resource.py +3 -3
nucliadb/ingest/processing.py +2 -2
nucliadb/ingest/serialize.py +1 -1
nucliadb/ingest/service/writer.py +28 -3
nucliadb/search/search/metrics.py +1 -1
nucliadb/search/search/query_parser/catalog.py +4 -1
nucliadb/search/search/query_parser/filter_expression.py +4 -1
nucliadb/tasks/__init__.py +2 -2
nucliadb/tasks/retries.py +148 -0
nucliadb/writer/tus/gcs.py +1 -1
{nucliadb-6.3.1.post3524.dist-info → nucliadb-6.3.1.post3531.dist-info}/METADATA +6 -6
{nucliadb-6.3.1.post3524.dist-info → nucliadb-6.3.1.post3531.dist-info}/RECORD +31 -21
{nucliadb-6.3.1.post3524.dist-info → nucliadb-6.3.1.post3531.dist-info}/WHEEL +0 -0
{nucliadb-6.3.1.post3524.dist-info → nucliadb-6.3.1.post3531.dist-info}/entry_points.txt +0 -0
{nucliadb-6.3.1.post3524.dist-info → nucliadb-6.3.1.post3531.dist-info}/top_level.txt +0 -0

nucliadb/backups/restore.py ADDED Viewed

@@ -0,0 +1,249 @@
+# Copyright (C) 2021 Bosutech XXI S.L.
+#
+# nucliadb is offered under the AGPL v3.0 and as commercial software.
+# For commercial licensing, contact us at info@nuclia.com.
+#
+# AGPL:
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+import asyncio
+import functools
+import tarfile
+from typing import AsyncIterator, Callable, Optional, Union
+from nucliadb.backups.const import MaindbKeys, StorageKeys
+from nucliadb.backups.models import RestoreBackupRequest
+from nucliadb.backups.settings import settings
+from nucliadb.common.context import ApplicationContext
+from nucliadb.export_import.utils import (
+    import_binary,
+    import_broker_message,
+    set_entities_groups,
+    set_labels,
+)
+from nucliadb.tasks.retries import TaskRetryHandler
+from nucliadb_protos import knowledgebox_pb2 as kb_pb2
+from nucliadb_protos.resources_pb2 import CloudFile
+from nucliadb_protos.writer_pb2 import BrokerMessage
+async def restore_kb_retried(context: ApplicationContext, msg: RestoreBackupRequest):
+    kbid = msg.kbid
+    backup_id = msg.backup_id
+    retry_handler = TaskRetryHandler(
+        kbid=kbid,
+        task_type="restore",
+        task_id=backup_id,
+        context=context,
+        max_retries=3,
+    )
+    @retry_handler.wrap
+    async def _restore_kb(context: ApplicationContext, kbid: str, backup_id: str):
+        await restore_kb(context, kbid, backup_id)
+    await _restore_kb(context, kbid, backup_id)
+async def restore_kb(context: ApplicationContext, kbid: str, backup_id: str):
+    """
+    Downloads the backup files from the cloud storage and imports them into the KB.
+    """
+    await restore_resources(context, kbid, backup_id)
+    await restore_labels(context, kbid, backup_id)
+    await restore_entities(context, kbid, backup_id)
+    await delete_last_restored_resource_key(context, kbid, backup_id)
+async def restore_resources(context: ApplicationContext, kbid: str, backup_id: str):
+    last_restored = await get_last_restored_resource_key(context, kbid, backup_id)
+    tasks = []
+    async for object_info in context.blob_storage.iterate_objects(
+        bucket=settings.backups_bucket,
+        prefix=StorageKeys.RESOURCES_PREFIX.format(kbid=kbid, backup_id=backup_id),
+        start=last_restored,
+    ):
+        key = object_info.name
+        resource_id = key.split("/")[-1].rstrip(".tar")
+        tasks.append(asyncio.create_task(restore_resource(context, kbid, backup_id, resource_id)))
+        if len(tasks) > settings.restore_resources_concurrency:
+            await asyncio.gather(*tasks)
+            tasks = []
+            await set_last_restored_resource_key(context, kbid, backup_id, key)
+    if len(tasks) > 0:
+        await asyncio.gather(*tasks)
+        tasks = []
+        await set_last_restored_resource_key(context, kbid, backup_id, key)
+async def get_last_restored_resource_key(
+    context: ApplicationContext, kbid: str, backup_id: str
+) -> Optional[str]:
+    key = MaindbKeys.LAST_RESTORED.format(kbid=kbid, backup_id=backup_id)
+    async with context.kv_driver.transaction(read_only=True) as txn:
+        raw = await txn.get(key)
+        if raw is None:
+            return None
+        return raw.decode()
+async def set_last_restored_resource_key(
+    context: ApplicationContext, kbid: str, backup_id: str, resource_id: str
+):
+    key = MaindbKeys.LAST_RESTORED.format(kbid=kbid, backup_id=backup_id)
+    async with context.kv_driver.transaction() as txn:
+        await txn.set(key, resource_id.encode())
+        await txn.commit()
+async def delete_last_restored_resource_key(context: ApplicationContext, kbid: str, backup_id: str):
+    key = MaindbKeys.LAST_RESTORED.format(kbid=kbid, backup_id=backup_id)
+    async with context.kv_driver.transaction() as txn:
+        await txn.delete(key)
+        await txn.commit()
+class CloudFileBinary:
+    def __init__(self, uri: str, download_stream: Callable[[int], AsyncIterator[bytes]]):
+        self.uri = uri
+        self.download_stream = download_stream
+    async def read(self, chunk_size: int) -> AsyncIterator[bytes]:
+        async for chunk in self.download_stream(chunk_size):
+            yield chunk
+class ResourceBackupReader:
+    def __init__(self, download_stream: AsyncIterator[bytes]):
+        self.download_stream = download_stream
+        self.buffer = b""
+    async def read(self, size: int) -> bytes:
+        while len(self.buffer) < size:
+            chunk = await self.download_stream.__anext__()
+            self.buffer += chunk
+        result = self.buffer[:size]
+        self.buffer = self.buffer[size:]
+        return result
+    async def iter_data(self, total_bytes: int, chunk_size: int = 1024 * 1024) -> AsyncIterator[bytes]:
+        padding_bytes = 0
+        if total_bytes % 512 != 0:
+            # We need to read the padding bytes and then discard them
+            padding_bytes = 512 - (total_bytes % 512)
+        read_bytes = 0
+        padding_reached = False
+        async for chunk in self._iter(total_bytes + padding_bytes, chunk_size):
+            if padding_reached:
+                # Skip padding bytes. We can't break here because we need
+                # to read the padding bytes from the stream
+                continue
+            padding_reached = read_bytes + len(chunk) >= total_bytes
+            if padding_reached:
+                chunk = chunk[: total_bytes - read_bytes]
+            else:
+                read_bytes += len(chunk)
+            yield chunk
+    async def _iter(self, total_bytes: int, chunk_size: int = 1024 * 1024) -> AsyncIterator[bytes]:
+        remaining_bytes = total_bytes
+        while remaining_bytes > 0:
+            to_read = min(chunk_size, remaining_bytes)
+            chunk = await self.read(to_read)
+            yield chunk
+            remaining_bytes -= len(chunk)
+        assert remaining_bytes == 0
+    async def read_tarinfo(self):
+        raw_tar_header = await self.read(512)
+        return tarfile.TarInfo.frombuf(raw_tar_header, encoding="utf-8", errors="strict")
+    async def read_data(self, tarinfo: tarfile.TarInfo) -> bytes:
+        tarinfo_size = tarinfo.size
+        padding_bytes = 0
+        if tarinfo_size % 512 != 0:
+            # We need to read the padding bytes and then discard them
+            padding_bytes = 512 - (tarinfo_size % 512)
+        data = await self.read(tarinfo_size + padding_bytes)
+        return data[:tarinfo_size]
+    async def read_item(self) -> Union[BrokerMessage, CloudFile, CloudFileBinary]:
+        tarinfo = await self.read_tarinfo()
+        if tarinfo.name.startswith("broker-message"):
+            raw_bm = await self.read_data(tarinfo)
+            bm = BrokerMessage()
+            bm.ParseFromString(raw_bm)
+            return bm
+        elif tarinfo.name.startswith("cloud-files"):
+            raw_cf = await self.read_data(tarinfo)
+            cf = CloudFile()
+            cf.FromString(raw_cf)
+            return cf
+        elif tarinfo.name.startswith("binaries"):
+            uri = tarinfo.name.lstrip("binaries/")
+            size = tarinfo.size
+            download_stream = functools.partial(self.iter_data, size)
+            return CloudFileBinary(uri, download_stream)
+        else:  # pragma: no cover
+            raise ValueError(f"Unknown tar entry: {tarinfo.name}")
+async def restore_resource(context: ApplicationContext, kbid: str, backup_id: str, resource_id: str):
+    download_stream = context.blob_storage.download(
+        bucket=settings.backups_bucket,
+        key=StorageKeys.RESOURCE.format(kbid=kbid, backup_id=backup_id, resource_id=resource_id),
+    )
+    reader = ResourceBackupReader(download_stream)
+    bm = None
+    while True:
+        item = await reader.read_item()
+        if isinstance(item, BrokerMessage):
+            # When the broker message is read, this means all cloud files
+            # and binaries of that resource have been read and imported
+            bm = item
+            bm.kbid = kbid
+            break
+        # Read the cloud file and its binary
+        cf = await reader.read_item()
+        assert isinstance(cf, CloudFile)
+        cf_binary = await reader.read_item()
+        assert isinstance(cf_binary, CloudFileBinary)
+        assert cf.uri == cf_binary.uri
+        await import_binary(context, kbid, cf, cf_binary.read)
+    await import_broker_message(context, kbid, bm)
+async def restore_labels(context: ApplicationContext, kbid: str, backup_id: str):
+    raw = await context.blob_storage.downloadbytes(
+        bucket=settings.backups_bucket,
+        key=StorageKeys.LABELS.format(kbid=kbid, backup_id=backup_id),
+    )
+    labels = kb_pb2.Labels()
+    labels.ParseFromString(raw.getvalue())
+    await set_labels(context, kbid, labels)
+async def restore_entities(context: ApplicationContext, kbid: str, backup_id: str):
+    raw = await context.blob_storage.downloadbytes(
+        bucket=settings.backups_bucket,
+        key=StorageKeys.ENTITIES.format(kbid=kbid, backup_id=backup_id),
+    )
+    entities = kb_pb2.EntitiesGroups()
+    entities.ParseFromString(raw.getvalue())
+    await set_entities_groups(context, kbid, entities)

nucliadb/backups/settings.py ADDED Viewed

@@ -0,0 +1,37 @@
+# Copyright (C) 2021 Bosutech XXI S.L.
+#
+# nucliadb is offered under the AGPL v3.0 and as commercial software.
+# For commercial licensing, contact us at info@nuclia.com.
+#
+# AGPL:
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+from pydantic import Field
+from pydantic_settings import BaseSettings
+class BackupSettings(BaseSettings):
+    backups_bucket: str = Field(
+        default="backups", description="The bucket where the backups are stored."
+    )
+    restore_resources_concurrency: int = Field(
+        default=10, description="The number of concurrent resource restores."
+    )
+    backup_resources_concurrency: int = Field(
+        default=10, description="The number of concurrent resource backups."
+    )
+settings = BackupSettings()

nucliadb/backups/tasks.py ADDED Viewed

@@ -0,0 +1,126 @@
+# Copyright (C) 2021 Bosutech XXI S.L.
+#
+# nucliadb is offered under the AGPL v3.0 and as commercial software.
+# For commercial licensing, contact us at info@nuclia.com.
+#
+# AGPL:
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+from typing import Awaitable, Callable
+from nucliadb.backups.create import backup_kb_retried
+from nucliadb.backups.delete import delete_backup
+from nucliadb.backups.models import CreateBackupRequest, DeleteBackupRequest, RestoreBackupRequest
+from nucliadb.backups.restore import restore_kb_retried
+from nucliadb.common.context import ApplicationContext
+from nucliadb.tasks import create_consumer, create_producer
+from nucliadb.tasks.consumer import NatsTaskConsumer
+from nucliadb.tasks.producer import NatsTaskProducer
+def creator_consumer() -> NatsTaskConsumer[CreateBackupRequest]:
+    consumer: NatsTaskConsumer = create_consumer(
+        name="backup_creator",
+        stream="backups",
+        stream_subjects=["backups.>"],
+        consumer_subject="backups.create",
+        callback=backup_kb_retried,
+        msg_type=CreateBackupRequest,
+        max_concurrent_messages=10,
+    )
+    return consumer
+async def create(kbid: str, backup_id: str) -> None:
+    producer: NatsTaskProducer[CreateBackupRequest] = create_producer(
+        name="backup_creator",
+        stream="backups",
+        stream_subjects=["backups.>"],
+        producer_subject="backups.create",
+        msg_type=CreateBackupRequest,
+    )
+    msg = CreateBackupRequest(
+        kbid=kbid,
+        backup_id=backup_id,
+    )
+    await producer.send(msg)
+def restorer_consumer() -> NatsTaskConsumer[RestoreBackupRequest]:
+    consumer: NatsTaskConsumer = create_consumer(
+        name="backup_restorer",
+        stream="backups",
+        stream_subjects=["backups.>"],
+        consumer_subject="backups.restore",
+        callback=restore_kb_retried,
+        msg_type=RestoreBackupRequest,
+        max_concurrent_messages=10,
+    )
+    return consumer
+async def restore(kbid: str, backup_id: str) -> None:
+    producer: NatsTaskProducer[RestoreBackupRequest] = create_producer(
+        name="backup_restorer",
+        stream="backups",
+        stream_subjects=["backups.>"],
+        producer_subject="backups.restore",
+        msg_type=RestoreBackupRequest,
+    )
+    msg = RestoreBackupRequest(
+        kbid=kbid,
+        backup_id=backup_id,
+    )
+    await producer.send(msg)
+def deleter_consumer() -> NatsTaskConsumer[DeleteBackupRequest]:
+    consumer: NatsTaskConsumer = create_consumer(
+        name="backup_deleter",
+        stream="backups",
+        stream_subjects=["backups.>"],
+        consumer_subject="backups.delete",
+        callback=delete_backup,
+        msg_type=DeleteBackupRequest,
+        max_concurrent_messages=2,
+    )
+    return consumer
+async def delete(backup_id: str) -> None:
+    producer: NatsTaskProducer[DeleteBackupRequest] = create_producer(
+        name="backup_deleter",
+        stream="backups",
+        stream_subjects=["backups.>"],
+        producer_subject="backups.delete",
+        msg_type=DeleteBackupRequest,
+    )
+    msg = DeleteBackupRequest(
+        backup_id=backup_id,
+    )
+    await producer.send(msg)
+async def initialize_consumers(context: ApplicationContext) -> list[Callable[[], Awaitable[None]]]:
+    creator = creator_consumer()
+    restorer = restorer_consumer()
+    deleter = deleter_consumer()
+    await creator.initialize(context)
+    await restorer.initialize(context)
+    await deleter.initialize(context)
+    return [
+        creator.finalize,
+        restorer.finalize,
+        deleter.finalize,
+    ]

nucliadb/backups/utils.py ADDED Viewed

@@ -0,0 +1,32 @@
+# Copyright (C) 2021 Bosutech XXI S.L.
+#
+# nucliadb is offered under the AGPL v3.0 and as commercial software.
+# For commercial licensing, contact us at info@nuclia.com.
+#
+# AGPL:
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+from nucliadb.backups.const import StorageKeys
+from nucliadb.backups.settings import settings
+from nucliadb_utils.storages.storage import Storage
+async def exists_backup(storage: Storage, backup_id: str) -> bool:
+    async for _ in storage.iterate_objects(
+        bucket=settings.backups_bucket,
+        prefix=StorageKeys.BACKUP_PREFIX.format(backup_id=backup_id),
+    ):
+        return True
+    return False

nucliadb/common/cluster/grpc_node_dummy.py CHANGED Viewed

@@ -36,7 +36,7 @@ from nucliadb_protos.utils_pb2 import Relation
 class DummyWriterStub:  # pragma: no cover
-    def __init__(self):
+    def __init__(self: "DummyWriterStub"):
         self.calls: dict[str, list[Any]] = {}
     async def NewShard(self, data):  # pragma: no cover
@@ -82,7 +82,7 @@ class DummyWriterStub:  # pragma: no cover
 class DummyReaderStub:  # pragma: no cover
-    def __init__(self):
+    def __init__(self: "DummyReaderStub"):
         self.calls: dict[str, list[Any]] = {}
     async def GetShard(self, data):  # pragma: no cover

nucliadb/common/cluster/manager.py CHANGED Viewed

@@ -281,7 +281,7 @@ class KBShardManager:
 class StandaloneKBShardManager(KBShardManager):
     max_ops_before_checks = 200
-    def __init__(self):
+    def __init__(self: "StandaloneKBShardManager"):
         super().__init__()
         self._lock = asyncio.Lock()
         self._change_count: dict[tuple[str, str], int] = {}

nucliadb/common/datamanagers/atomic.py CHANGED Viewed

@@ -35,8 +35,12 @@ it's transaction
 """
-import sys
 from functools import wraps
+from typing import Awaitable, Callable, TypeVar
+from typing_extensions import Concatenate, ParamSpec
+from nucliadb.common.maindb.driver import Transaction
 from . import kb as kb_dm
 from . import labels as labels_dm
@@ -44,34 +48,24 @@ from . import resources as resources_dm
 from . import synonyms as synonyms_dm
 from .utils import with_ro_transaction, with_transaction
-# XXX: we are using the not exported _ParamSpec to support 3.9. Whenever we
-# upgrade to >= 3.10 we'll be able to use ParamSpecKwargs and improve the
-# typing. We are abusing of ParamSpec anywat to better support text editors, so
-# we also need to ignore some mypy complains
-__python_version = (sys.version_info.major, sys.version_info.minor)
-if __python_version == (3, 9):
-    from typing_extensions import ParamSpec
-else:
-    from typing import ParamSpec  # type: ignore
 P = ParamSpec("P")
+T = TypeVar("T")
-def ro_txn_wrap(fun: P) -> P:  # type: ignore
+def ro_txn_wrap(fun: Callable[Concatenate[Transaction, P], Awaitable[T]]) -> Callable[P, Awaitable[T]]:
     @wraps(fun)
-    async def wrapper(**kwargs: P.kwargs):
+    async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
         async with with_ro_transaction() as txn:
-            return await fun(txn, **kwargs)
+            return await fun(txn, *args, **kwargs)
     return wrapper
-def rw_txn_wrap(fun: P) -> P:  # type: ignore
+def rw_txn_wrap(fun: Callable[Concatenate[Transaction, P], Awaitable[T]]) -> Callable[P, Awaitable[T]]:
     @wraps(fun)
-    async def wrapper(**kwargs: P.kwargs):
+    async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
         async with with_transaction() as txn:
-            result = await fun(txn, **kwargs)
+            result = await fun(txn, *args, **kwargs)
             await txn.commit()
             return result

nucliadb/common/datamanagers/entities.py CHANGED Viewed

@@ -41,7 +41,7 @@ class EntitiesMetaCache:
     change the structure of this class or we'll break the index.
     """
-    def __init__(self):
+    def __init__(self: "EntitiesMetaCache") -> None:
         self.deleted_entities: dict[str, list[str]] = {}
         self.duplicate_entities: dict[str, dict[str, list[str]]] = {}
         # materialize by value for faster lookups

nucliadb/export_import/utils.py CHANGED Viewed

@@ -40,7 +40,7 @@ from nucliadb_protos import resources_pb2, writer_pb2
 from nucliadb_utils.const import Streams
 from nucliadb_utils.transaction import MaxTransactionSizeExceededError
-BinaryStream = AsyncGenerator[bytes, None]
+BinaryStream = AsyncIterator[bytes]
 BinaryStreamGenerator = Callable[[int], BinaryStream]
@@ -237,8 +237,11 @@ async def download_binary(
     context: ApplicationContext, cf: resources_pb2.CloudFile
 ) -> AsyncGenerator[bytes, None]:
     bucket_name = context.blob_storage.get_bucket_name_from_cf(cf)
+    downloaded_bytes = 0
     async for data in context.blob_storage.download(bucket_name, cf.uri):
         yield data
+        downloaded_bytes += len(data)
+    assert downloaded_bytes == cf.size, "Downloaded bytes do not match the expected size"
 async def get_entities(context: ApplicationContext, kbid: str) -> kb_pb2.EntitiesGroups:
@@ -416,6 +419,8 @@ class ExportStreamReader:
 class TaskRetryHandler:
     """
     Class that wraps an import/export task and adds retry logic to it.
+    TODO: This should be refactored to use generic task retry logic at tasks/retries.py::TaskRetryHandler
     """
     def __init__(

nucliadb/ingest/app.py CHANGED Viewed

@@ -22,6 +22,7 @@ import importlib.metadata
 from typing import Awaitable, Callable
 from nucliadb import health
+from nucliadb.backups.tasks import initialize_consumers as initialize_backup_consumers
 from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
 from nucliadb.common.context import ApplicationContext
 from nucliadb.common.nidx import start_nidx_utility
@@ -154,6 +155,7 @@ async def main_subscriber_workers():  # pragma: no cover
     await exports_consumer.initialize(context)
     imports_consumer = get_imports_consumer()
     await imports_consumer.initialize(context)
+    backup_consumers_finalizers = await initialize_backup_consumers(context)
     await run_until_exit(
         [
@@ -165,7 +167,10 @@ async def main_subscriber_workers():  # pragma: no cover
             metrics_server.shutdown,
             grpc_health_finalizer,
             context.finalize,
+            exports_consumer.finalize,
+            imports_consumer.finalize,
         ]
+        + backup_consumers_finalizers
         + finalizers
     )
@@ -216,6 +221,7 @@ def run_subscriber_workers() -> None:  # pragma: no cover
         - audit fields subscriber
         - export/import subscriber
         - materializer subscriber
+        - backups subscribers
     """
     setup_configuration()
     asyncio.run(main_subscriber_workers())

nucliadb/ingest/fields/exceptions.py CHANGED Viewed

@@ -28,4 +28,4 @@ class InvalidPBClass(Exception):
     def __init__(self, source: Type, destination: Type):
         self.source = source
         self.destination = destination
-        super().__init__("Source and destination does not match " f"{self.source} - {self.destination}")
+        super().__init__(f"Source and destination does not match {self.source} - {self.destination}")

nucliadb/ingest/orm/resource.py CHANGED Viewed

@@ -858,9 +858,9 @@ class Resource:
         for field_vectors in fields_vectors:
             # Bw/c with extracted vectors without vectorsets
             if not field_vectors.vectorset_id:
-                assert (
-                    len(vectorsets) == 1
-                ), "Invalid broker message, can't ingest vectors from unknown vectorset to KB with multiple vectorsets"
+                assert len(vectorsets) == 1, (
+                    "Invalid broker message, can't ingest vectors from unknown vectorset to KB with multiple vectorsets"
+                )
                 vectorset = list(vectorsets.values())[0]
             else:

nucliadb/ingest/processing.py CHANGED Viewed

@@ -477,9 +477,9 @@ class ProcessingEngine:
 class DummyProcessingEngine(ProcessingEngine):
-    def __init__(self):
+    def __init__(self: "DummyProcessingEngine"):
         self.calls: list[list[Any]] = []
-        self.values = defaultdict(list)
+        self.values: dict[str, Any] = defaultdict(list)
         self.onprem = True
     async def initialize(self):

nucliadb/ingest/serialize.py CHANGED Viewed

@@ -189,7 +189,7 @@ async def managed_serialize(
     include_values = ResourceProperties.VALUES in show
-    include_extracted_data = ResourceProperties.EXTRACTED in show and extracted is not []
+    include_extracted_data = ResourceProperties.EXTRACTED in show and extracted != []
     if ResourceProperties.BASIC in show:
         await orm_resource.get_basic()

nucliadb 6.3.1.post3524__py3-none-any.whl → 6.3.1.post3531__py3-none-any.whl

nucliadb 6.3.1.post3524py3-none-any.whl → 6.3.1.post3531py3-none-any.whl