PyPI - nucliadb - Versions diffs - 6.3.1.post3571__py3-none-any.whl → 6.3.1.post3577__py3-none-any.whl - Mend

nucliadb 6.3.1.post3571py3-none-any.whl → 6.3.1.post3577py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

nucliadb/backups/create.py CHANGED Viewed

@@ -18,6 +18,7 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
 import asyncio
+import logging
 import tarfile
 from datetime import datetime, timezone
 from typing import AsyncIterator, Optional
@@ -29,6 +30,7 @@ from nucliadb.backups.const import (
 )
 from nucliadb.backups.models import BackupMetadata, CreateBackupRequest
 from nucliadb.backups.settings import settings
+from nucliadb.backups.utils import exists_in_storge
 from nucliadb.common import datamanagers
 from nucliadb.common.context import ApplicationContext
 from nucliadb.export_import.utils import (
@@ -44,6 +46,8 @@ from nucliadb_utils.audit.stream import StreamAuditStorage
 from nucliadb_utils.storages.storage import StorageField
 from nucliadb_utils.utilities import get_audit
+logger = logging.getLogger(__name__)
 async def backup_kb_task(context: ApplicationContext, msg: CreateBackupRequest):
     kbid = msg.kb_id
@@ -101,6 +105,10 @@ async def backup_resources(context: ApplicationContext, kbid: str, backup_id: st
             await set_metadata(context, kbid, backup_id, metadata)
             tasks = []
             backing_up = []
+            logger.info(
+                f"Backup resources: {len(metadata.missing_resources)} remaining",
+                extra={"kbid": kbid, "backup_id": backup_id},
+            )
     if len(tasks) > 0:
         resources_bytes = await asyncio.gather(*tasks)
         metadata.total_size += sum(resources_bytes)
@@ -108,6 +116,7 @@ async def backup_resources(context: ApplicationContext, kbid: str, backup_id: st
         await set_metadata(context, kbid, backup_id, metadata)
         tasks = []
         backing_up = []
+        logger.info(f"Backup resources: completed", extra={"kbid": kbid, "backup_id": backup_id})
 async def backup_resource(context: ApplicationContext, backup_id: str, kbid: str, rid: str) -> int:
@@ -163,6 +172,13 @@ async def backup_resource_with_binaries(
         nonlocal total_size
         for cloud_file in get_cloud_files(bm):
+            if not await exists_cf(context, cloud_file):
+                logger.warning(
+                    "Cloud file not found in storage, skipping",
+                    extra={"kbid": kbid, "rid": rid, "cf_uri": cloud_file.uri},
+                )
+                continue
             serialized_cf = cloud_file.SerializeToString()
             async def cf_iterator():
@@ -244,6 +260,10 @@ async def delete_metadata(context: ApplicationContext, kbid: str, backup_id: str
         await txn.commit()
+async def exists_cf(context: ApplicationContext, cf: resources_pb2.CloudFile) -> bool:
+    return await exists_in_storge(context.blob_storage, cf.bucket_name, cf.uri)
 async def upload_to_bucket(context: ApplicationContext, bytes_iterator: AsyncIterator[bytes], key: str):
     storage = context.blob_storage
     bucket = settings.backups_bucket

nucliadb/backups/restore.py CHANGED Viewed

@@ -21,6 +21,7 @@
 import asyncio
 import functools
+import logging
 import tarfile
 from typing import AsyncIterator, Callable, Optional, Union
@@ -39,6 +40,8 @@ from nucliadb_protos import knowledgebox_pb2 as kb_pb2
 from nucliadb_protos.resources_pb2 import CloudFile
 from nucliadb_protos.writer_pb2 import BrokerMessage
+logger = logging.getLogger(__name__)
 async def restore_kb_task(context: ApplicationContext, msg: RestoreBackupRequest):
     kbid = msg.kb_id
@@ -193,7 +196,7 @@ class ResourceBackupReader:
         elif tarinfo.name.startswith("cloud-files"):
             raw_cf = await self.read_data(tarinfo)
             cf = CloudFile()
-            cf.FromString(raw_cf)
+            cf.ParseFromString(raw_cf)
             return cf
         elif tarinfo.name.startswith("binaries"):
             uri = tarinfo.name.lstrip("binaries/")
@@ -219,14 +222,19 @@ async def restore_resource(context: ApplicationContext, kbid: str, backup_id: st
             bm = item
             bm.kbid = kbid
             break
-        # Read the cloud file and its binary
-        cf = await reader.read_item()
-        assert isinstance(cf, CloudFile)
-        cf_binary = await reader.read_item()
-        assert isinstance(cf_binary, CloudFileBinary)
-        assert cf.uri == cf_binary.uri
-        await import_binary(context, kbid, cf, cf_binary.read)
+        elif isinstance(item, CloudFile):
+            # Read its binary and import it
+            cf = item
+            cf_binary = await reader.read_item()
+            assert isinstance(cf_binary, CloudFileBinary)
+            assert cf.uri == cf_binary.uri
+            await import_binary(context, kbid, cf, cf_binary.read)
+        else:
+            logger.error(
+                "Unexpected item in resource backup. Backup may be corrupted",
+                extra={"item_type": type(item), kbid: kbid, resource_id: resource_id},
+            )
+            continue
     await import_broker_message(context, kbid, bm)

nucliadb/backups/utils.py CHANGED Viewed

@@ -24,9 +24,12 @@ from nucliadb_utils.storages.storage import Storage
 async def exists_backup(storage: Storage, backup_id: str) -> bool:
-    async for _ in storage.iterate_objects(
-        bucket=settings.backups_bucket,
-        prefix=StorageKeys.BACKUP_PREFIX.format(backup_id=backup_id),
-    ):
+    return await exists_in_storge(
+        storage, settings.backups_bucket, StorageKeys.BACKUP_PREFIX.format(backup_id=backup_id)
+    )
+async def exists_in_storge(storage: Storage, bucket: str, key: str) -> bool:
+    async for _ in storage.iterate_objects(bucket=bucket, prefix=key):
         return True
     return False

nucliadb/search/search/chat/query.py CHANGED Viewed

@@ -272,6 +272,7 @@ async def get_relations_results_from_entities(
     timeout: Optional[float] = None,
     only_with_metadata: bool = False,
     only_agentic_relations: bool = False,
+    only_entity_to_entity: bool = False,
     deleted_entities: set[str] = set(),
 ) -> Relations:
     request = SearchRequest()
@@ -295,7 +296,11 @@ async def get_relations_results_from_entities(
     )
     relations_results: list[RelationSearchResponse] = [result.relation for result in results]
     return await merge_relations_results(
-        relations_results, request.relation_subgraph, only_with_metadata, only_agentic_relations
+        relations_results,
+        request.relation_subgraph,
+        only_with_metadata,
+        only_agentic_relations,
+        only_entity_to_entity,
     )

nucliadb/search/search/graph_strategy.py CHANGED Viewed

@@ -369,8 +369,10 @@ async def get_graph_results(
                     kbid=kbid,
                     entities=entities_to_explore,
                     timeout=5.0,
-                    only_with_metadata=True,
+                    only_with_metadata=not graph_strategy.relation_text_as_paragraphs,
                     only_agentic_relations=graph_strategy.agentic_graph_only,
+                    # We only want entity to entity relations (skip resource/labels/collaborators/etc.)
+                    only_entity_to_entity=True,
                     deleted_entities=explored_entities,
                 )
             except Exception as e:
@@ -683,6 +685,7 @@ def build_text_blocks_from_relations(
     triplets: dict[tuple[str, str, str], tuple[float, Relations, Optional[ParagraphId]]] = defaultdict(
         lambda: (0.0, Relations(entities={}), None)
     )
+    paragraph_count = 0
     for ent, subgraph in relations.entities.items():
         for rel, score in zip(subgraph.related_to, scores[ent]):
             key = (
@@ -702,6 +705,14 @@ def build_text_blocks_from_relations(
             # we keep the first one, but we lose the other ones
             if p_id is None and rel.metadata and rel.metadata.paragraph_id:
                 p_id = ParagraphId.from_string(rel.metadata.paragraph_id)
+            else:
+                # No paragraph ID set, fake it so we can hydrate the resource
+                p_id = ParagraphId(
+                    field_id=FieldId(rel.resource_id, "a", "usermetadata"),
+                    paragraph_start=paragraph_count,
+                    paragraph_end=paragraph_count + 1,
+                )
+                paragraph_count += 1
             existing_relations.entities[ent].related_to.append(rel)
             # XXX: Here we use the max even though all relations with same triplet should have same score
             triplets[key] = (max(existing_score, score), existing_relations, p_id)

nucliadb/search/search/merge.py CHANGED Viewed

@@ -35,6 +35,7 @@ from nucliadb.search.search.fetch import (
 )
 from nucliadb_models.common import FieldTypeName
 from nucliadb_models.labels import translate_system_to_alias_label
+from nucliadb_models.metadata import RelationType
 from nucliadb_models.resource import ExtractedDataTypeName
 from nucliadb_models.search import (
     DirectionalRelation,
@@ -445,6 +446,7 @@ async def merge_relations_results(
     query: EntitiesSubgraphRequest,
     only_with_metadata: bool = False,
     only_agentic: bool = False,
+    only_entity_to_entity: bool = False,
 ) -> Relations:
     loop = asyncio.get_event_loop()
     return await loop.run_in_executor(
@@ -454,6 +456,7 @@ async def merge_relations_results(
         query,
         only_with_metadata,
         only_agentic,
+        only_entity_to_entity,
     )
@@ -462,6 +465,7 @@ def _merge_relations_results(
     query: EntitiesSubgraphRequest,
     only_with_metadata: bool,
     only_agentic: bool,
+    only_entity_to_entity: bool,
 ) -> Relations:
     """
     Merge relation search responses into a single Relations object while applying filters.
@@ -490,33 +494,41 @@ def _merge_relations_results(
             # If only_with_metadata is True, we check that metadata for the relation is not None
             # If only_agentic is True, we check that metadata for the relation is not None and that it has a data_augmentation_task_id
             # TODO: This is suboptimal, we should be able to filter this in the query to the index,
-            if (not only_with_metadata or metadata) and (
-                not only_agentic or (metadata and metadata.data_augmentation_task_id)
-            ):
-                if origin.value in relations.entities:
-                    relations.entities[origin.value].related_to.append(
-                        DirectionalRelation(
-                            entity=destination.value,
-                            entity_type=relation_node_type_to_entity_type(destination.ntype),
-                            entity_subtype=destination.subtype,
-                            relation=relation_type,
-                            relation_label=relation_label,
-                            direction=RelationDirection.OUT,
-                            metadata=from_proto.relation_metadata(metadata) if metadata else None,
-                        )
+            if only_with_metadata and not metadata:
+                continue
+            if only_agentic and (not metadata or not metadata.data_augmentation_task_id):
+                continue
+            if only_entity_to_entity and relation_type != RelationType.ENTITY:
+                continue
+            if origin.value in relations.entities:
+                relations.entities[origin.value].related_to.append(
+                    DirectionalRelation(
+                        entity=destination.value,
+                        entity_type=relation_node_type_to_entity_type(destination.ntype),
+                        entity_subtype=destination.subtype,
+                        relation=relation_type,
+                        relation_label=relation_label,
+                        direction=RelationDirection.OUT,
+                        metadata=from_proto.relation_metadata(metadata) if metadata else None,
+                        resource_id=relation.resource_id,
                     )
-                elif destination.value in relations.entities:
-                    relations.entities[destination.value].related_to.append(
-                        DirectionalRelation(
-                            entity=origin.value,
-                            entity_type=relation_node_type_to_entity_type(origin.ntype),
-                            entity_subtype=origin.subtype,
-                            relation=relation_type,
-                            relation_label=relation_label,
-                            direction=RelationDirection.IN,
-                            metadata=from_proto.relation_metadata(metadata) if metadata else None,
-                        )
+                )
+            elif destination.value in relations.entities:
+                relations.entities[destination.value].related_to.append(
+                    DirectionalRelation(
+                        entity=origin.value,
+                        entity_type=relation_node_type_to_entity_type(origin.ntype),
+                        entity_subtype=origin.subtype,
+                        relation=relation_type,
+                        relation_label=relation_label,
+                        direction=RelationDirection.IN,
+                        metadata=from_proto.relation_metadata(metadata) if metadata else None,
+                        resource_id=relation.resource_id,
                     )
+                )
     return relations

{nucliadb-6.3.1.post3571.dist-info → nucliadb-6.3.1.post3577.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: nucliadb
-Version: 6.3.1.post3571
+Version: 6.3.1.post3577
 Summary: NucliaDB
 Author-email: Nuclia <nucliadb@nuclia.com>
 License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: <4,>=3.9
 Description-Content-Type: text/markdown
-Requires-Dist: nucliadb-telemetry[all]>=6.3.1.post3571
-Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.1.post3571
-Requires-Dist: nucliadb-protos>=6.3.1.post3571
-Requires-Dist: nucliadb-models>=6.3.1.post3571
-Requires-Dist: nidx-protos>=6.3.1.post3571
+Requires-Dist: nucliadb-telemetry[all]>=6.3.1.post3577
+Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.1.post3577
+Requires-Dist: nucliadb-protos>=6.3.1.post3577
+Requires-Dist: nucliadb-models>=6.3.1.post3577
+Requires-Dist: nidx-protos>=6.3.1.post3577
 Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
 Requires-Dist: nuclia-models>=0.24.2
 Requires-Dist: uvicorn

{nucliadb-6.3.1.post3571.dist-info → nucliadb-6.3.1.post3577.dist-info}/RECORD RENAMED Viewed

@@ -41,13 +41,13 @@ nucliadb/openapi.py,sha256=wDiw0dVEvTpJvbatkJ0JZLkKm9RItZT5PWRHjqRfqTA,2272
 nucliadb/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nucliadb/backups/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/backups/const.py,sha256=9vPAhLxQO_gNAjSdPxWuv3V66s9WcdpjOQ89CZlfmuk,1894
-nucliadb/backups/create.py,sha256=AM_nC7TgHOX0EFGaTXClS28jBSK28fHrKNZi14z2wek,10442
+nucliadb/backups/create.py,sha256=mvirguMbtxNgSDGG81l0kkgHWJSZPk4GFyra9nAkBZM,11275
 nucliadb/backups/delete.py,sha256=1rnBhVUGYYZJXSZUrrgYMDZ5NyswEWkIA-G-crRCyHk,2404
 nucliadb/backups/models.py,sha256=-hITU4Mv6AxePu12toBu_fjpEv6vVGcwNVxV22O9jQA,1273
-nucliadb/backups/restore.py,sha256=xhslVvTf4H8VmDucZpjrEFpKj6csPIWBadCPMVJYKQ8,9703
+nucliadb/backups/restore.py,sha256=wepEgv4vBN5yeiZU-f17PbuFV4xT4_SVKplNr8xSJrE,10001
 nucliadb/backups/settings.py,sha256=SyzsInj1BRbBI0atg5IXWbMbOZ_eVg4eSQ3IcnUhCxQ,1357
 nucliadb/backups/tasks.py,sha256=4_kOVJ2yCwMvDEpzJgTuTt75TNlpq5woyw9sTAcaSkw,4194
-nucliadb/backups/utils.py,sha256=ayDaxfWP5cPnAkQH-tF4M6cnowsPQgU2ljYz_iL1CbE,1249
+nucliadb/backups/utils.py,sha256=b1hi0gEp90tNrWHejNVoUgRpa4D6uKGhbACq0yeLkJY,1375
 nucliadb/common/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/common/constants.py,sha256=QpigxJh_CtD85Evy0PtV5cVq6x0U_f9xfIcXz1ymkUg,869
 nucliadb/common/counters.py,sha256=8lOi3A2HeLDDlcNaS2QT1SfD3350VPBjiY3FkmHH1V8,977
@@ -224,10 +224,10 @@ nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_Qzi
 nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
 nucliadb/search/search/find.py,sha256=jQZOqu8VeX8k3ELV8bLK4TwUUjGrvmubouxvO1IvJV0,10236
 nucliadb/search/search/find_merge.py,sha256=3FnzKFEnVemg6FO_6zveulbAU7klvsiPEBvLrpBBMg8,17450
-nucliadb/search/search/graph_strategy.py,sha256=ahwcUTQZ0Ll-rnS285DO9PmRyiM-1p4BM3UvmOYVwhM,31750
+nucliadb/search/search/graph_strategy.py,sha256=gisL2GpbSIa_SucyOwEt7TWdqURyAQqxvD_-PkXQct8,32339
 nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
 nucliadb/search/search/ingestion_agents.py,sha256=NeJr4EEX-bvFFMGvXOOwLv8uU7NuQ-ntJnnrhnKfMzY,3174
-nucliadb/search/search/merge.py,sha256=i_PTBFRqC5iTTziOMEltxLIlmokIou5hjjgR4BnoLBE,22635
+nucliadb/search/search/merge.py,sha256=aUn6f5XnwWzUFhVC6uBqHE8NKdlfgw_xcTo57rS23U8,22950
 nucliadb/search/search/metrics.py,sha256=GGGtXHLhK79_ESV277xkBVjcaMURXHCxYG0EdGamUd8,2886
 nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
 nucliadb/search/search/pgcatalog.py,sha256=V1NYLEUSXHpWmgcPIo1HS2riK_HDXSi-uykJjSoOOrE,9033
@@ -243,7 +243,7 @@ nucliadb/search/search/chat/ask.py,sha256=olZT08JVo3ZGDsDXkjvI2JTlqQln_o91HJzv0T
 nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
 nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
 nucliadb/search/search/chat/prompt.py,sha256=Jnja-Ss7skgnnDY8BymVfdeYsFPnIQFL8tEvcRXTKUE,47356
-nucliadb/search/search/chat/query.py,sha256=2QhVzvX12zLHOpVZ5MlBflqAauyCBl6dojhRGdm_6qU,16388
+nucliadb/search/search/chat/query.py,sha256=0IoeW-JNaRBe2d9C3bXNfkYpzmsN_IIg3U4Vqb8eOEk,16485
 nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/search/search/query_parser/catalog.py,sha256=PtH5nb6UTzH8l7Lmdd1RgLVFsn9CN5M5-JkVq9YeR4k,7116
 nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
@@ -347,8 +347,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
 nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
 nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
 nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
-nucliadb-6.3.1.post3571.dist-info/METADATA,sha256=s2P-Covs_cwHf5pNszgsGypGofi5r1zgIOV7ccxAh6M,4291
-nucliadb-6.3.1.post3571.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
-nucliadb-6.3.1.post3571.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
-nucliadb-6.3.1.post3571.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
-nucliadb-6.3.1.post3571.dist-info/RECORD,,
+nucliadb-6.3.1.post3577.dist-info/METADATA,sha256=7cMll6LH15F3_kAg1yrlOSDK8XFk77amc4TEa0kApug,4291
+nucliadb-6.3.1.post3577.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
+nucliadb-6.3.1.post3577.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
+nucliadb-6.3.1.post3577.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
+nucliadb-6.3.1.post3577.dist-info/RECORD,,

{nucliadb-6.3.1.post3571.dist-info → nucliadb-6.3.1.post3577.dist-info}/WHEEL RENAMED Viewed

File without changes

{nucliadb-6.3.1.post3571.dist-info → nucliadb-6.3.1.post3577.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nucliadb-6.3.1.post3571.dist-info → nucliadb-6.3.1.post3577.dist-info}/top_level.txt RENAMED Viewed

File without changes

nucliadb 6.3.1.post3571__py3-none-any.whl → 6.3.1.post3577__py3-none-any.whl

nucliadb 6.3.1.post3571py3-none-any.whl → 6.3.1.post3577py3-none-any.whl