PyPI - nucliadb - Versions diffs - 6.4.0.post4293__py3-none-any.whl → 6.4.0.post4313__py3-none-any.whl - Mend

nucliadb 6.4.0.post4293py3-none-any.whl → 6.4.0.post4313py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

nucliadb/purge/__init__.py CHANGED Viewed

@@ -232,26 +232,39 @@ async def purge_kb_vectorsets(driver: Driver, storage: Storage):
                 async for resource in kb.iterate_resources():
                     fields.extend((await resource.get_fields(force=True)).values())
-            # we don't need the maindb transaction anymore to remove vectors from storage
-            for field in fields:
-                if purge_payload.storage_key_kind == VectorSetConfig.StorageKeyKind.UNSET:
-                    # Bw/c for purge before adding purge payload. We assume
-                    # there's only 2 kinds of KBs: with one or with more than
-                    # one vectorset. KBs with one vectorset are not allowed to
-                    # delete their vectorset, so we wouldn't be here. It has to
-                    # be a KB with multiple, so the storage key kind has to be
-                    # this:
-                    await field.delete_vectors(
-                        vectorset, VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX
-                    )
-                else:
-                    await field.delete_vectors(vectorset, purge_payload.storage_key_kind)
+            logger.info(f"Purging {len(fields)} fields for vectorset {vectorset}", extra={"kbid": kbid})
+            for fields_batch in batchify(fields, 20):
+                tasks = []
+                for field in fields_batch:
+                    if purge_payload.storage_key_kind == VectorSetConfig.StorageKeyKind.UNSET:
+                        # Bw/c for purge before adding purge payload. We assume
+                        # there's only 2 kinds of KBs: with one or with more than
+                        # one vectorset. KBs with one vectorset are not allowed to
+                        # delete their vectorset, so we wouldn't be here. It has to
+                        # be a KB with multiple, so the storage key kind has to be
+                        # this:
+                        tasks.append(
+                            asyncio.create_task(
+                                field.delete_vectors(
+                                    vectorset, VectorSetConfig.StorageKeyKind.VECTORSET_PREFIX
+                                )
+                            )
+                        )
+                    else:
+                        tasks.append(
+                            asyncio.create_task(
+                                field.delete_vectors(vectorset, purge_payload.storage_key_kind)
+                            )
+                        )
+                await asyncio.gather(*tasks)
             # Finally, delete the key
             async with driver.transaction() as txn:
                 await txn.delete(key)
                 await txn.commit()
+            logger.info(f"Finished purging vectorset {vectorset} for KB", extra={"kbid": kbid})
         except Exception as exc:
             errors.capture_exception(exc)
             logger.error(
@@ -304,3 +317,9 @@ def run() -> int:  # pragma: no cover
     setup_logging()
     errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
     return asyncio.run(main())
+def batchify(iterable, n=1):
+    """Yield successive n-sized chunks from iterable."""
+    for i in range(0, len(iterable), n):
+        yield iterable[i : i + n]

nucliadb/search/api/v1/graph.py CHANGED Viewed

@@ -21,7 +21,7 @@ from fastapi import Header, Request, Response
 from fastapi_versioning import version
 from nucliadb.search.api.v1.router import KB_PREFIX, api
-from nucliadb.search.requesters.utils import Method, node_query
+from nucliadb.search.requesters.utils import Method, nidx_query
 from nucliadb.search.search.graph_merge import (
     build_graph_nodes_response,
     build_graph_relations_response,
@@ -71,7 +71,7 @@ async def graph_search_knowledgebox(
 ) -> GraphSearchResponse:
     pb_query = await parse_graph_search(kbid, item)
-    results, _, _ = await node_query(kbid, Method.GRAPH, pb_query)
+    results, _ = await nidx_query(kbid, Method.GRAPH, pb_query)
     return build_graph_response(results)
@@ -98,7 +98,7 @@ async def graph_nodes_search_knowledgebox(
 ) -> GraphNodesSearchResponse:
     pb_query = await parse_graph_node_search(kbid, item)
-    results, _, _ = await node_query(kbid, Method.GRAPH, pb_query)
+    results, _ = await nidx_query(kbid, Method.GRAPH, pb_query)
     return build_graph_nodes_response(results)
@@ -125,6 +125,6 @@ async def graph_relations_search_knowledgebox(
 ) -> GraphRelationsSearchResponse:
     pb_query = await parse_graph_relation_search(kbid, item)
-    results, _, _ = await node_query(kbid, Method.GRAPH, pb_query)
+    results, _ = await nidx_query(kbid, Method.GRAPH, pb_query)
     return build_graph_relations_response(results)

nucliadb/search/api/v1/resource/search.py CHANGED Viewed

@@ -27,7 +27,7 @@ from pydantic import ValidationError
 from nucliadb.models.responses import HTTPClientError
 from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, api
 from nucliadb.search.api.v1.utils import fastapi_query
-from nucliadb.search.requesters.utils import Method, node_query
+from nucliadb.search.requesters.utils import Method, nidx_query
 from nucliadb.search.search import cache
 from nucliadb.search.search.exceptions import InvalidQueryError
 from nucliadb.search.search.merge import merge_paragraphs_results
@@ -110,7 +110,7 @@ async def resource_search(
             detail = json.loads(exc.json())
             return HTTPClientError(status_code=422, detail=detail)
-        results, incomplete_results, queried_shards = await node_query(kbid, Method.SEARCH, pb_query)
+        results, queried_shards = await nidx_query(kbid, Method.SEARCH, pb_query)
         # We need to merge
         search_results = await merge_paragraphs_results(
@@ -120,8 +120,5 @@ async def resource_search(
             highlight_split=highlight,
             min_score=0.0,
         )
-        response.status_code = 206 if incomplete_results else 200
         search_results.shards = queried_shards
         return search_results

nucliadb/search/api/v1/search.py CHANGED Viewed

@@ -32,7 +32,7 @@ from nucliadb.models.responses import HTTPClientError
 from nucliadb.search import predict
 from nucliadb.search.api.v1.router import KB_PREFIX, api
 from nucliadb.search.api.v1.utils import fastapi_query
-from nucliadb.search.requesters.utils import Method, node_query
+from nucliadb.search.requesters.utils import Method, nidx_query
 from nucliadb.search.search import cache
 from nucliadb.search.search.exceptions import InvalidQueryError
 from nucliadb.search.search.merge import merge_results
@@ -265,8 +265,7 @@ async def search(
     pb_query, incomplete_results, autofilters, _ = await legacy_convert_retrieval_to_proto(parsed)
     # We need to query all nodes
-    results, query_incomplete_results, queried_shards = await node_query(kbid, Method.SEARCH, pb_query)
-    incomplete_results = incomplete_results or query_incomplete_results
+    results, queried_shards = await nidx_query(kbid, Method.SEARCH, pb_query)
     # We need to merge
     search_results = await merge_results(

nucliadb/search/api/v1/suggest.py CHANGED Viewed

@@ -28,7 +28,7 @@ from pydantic import ValidationError
 from nucliadb.models.responses import HTTPClientError
 from nucliadb.search.api.v1.router import KB_PREFIX, api
 from nucliadb.search.api.v1.utils import fastapi_query
-from nucliadb.search.requesters.utils import Method, node_query
+from nucliadb.search.requesters.utils import Method, nidx_query
 from nucliadb.search.search import cache
 from nucliadb.search.search.exceptions import InvalidQueryError
 from nucliadb.search.search.merge import merge_suggest_results
@@ -160,7 +160,7 @@ async def suggest(
             range_modification_end,
             hidden,
         )
-        results, incomplete_results, queried_shards = await node_query(kbid, Method.SUGGEST, pb_query)
+        results, queried_shards = await nidx_query(kbid, Method.SUGGEST, pb_query)
         # We need to merge
         search_results = await merge_suggest_results(
@@ -168,9 +168,6 @@ async def suggest(
             kbid=kbid,
             highlight=highlight,
         )
-        response.status_code = 206 if incomplete_results else 200
         if debug and queried_shards:
             search_results.shards = queried_shards

nucliadb/search/requesters/utils.py CHANGED Viewed

@@ -71,38 +71,38 @@ T = TypeVar(
 @overload
-async def node_query(
+async def nidx_query(
     kbid: str,
     method: Method,
     pb_query: SuggestRequest,
     timeout: Optional[float] = None,
-) -> tuple[list[SuggestResponse], bool, list[str]]: ...
+) -> tuple[list[SuggestResponse], list[str]]: ...
 @overload
-async def node_query(
+async def nidx_query(
     kbid: str,
     method: Method,
     pb_query: SearchRequest,
     timeout: Optional[float] = None,
-) -> tuple[list[SearchResponse], bool, list[str]]: ...
+) -> tuple[list[SearchResponse], list[str]]: ...
 @overload
-async def node_query(
+async def nidx_query(
     kbid: str,
     method: Method,
     pb_query: GraphSearchRequest,
     timeout: Optional[float] = None,
-) -> tuple[list[GraphSearchResponse], bool, list[str]]: ...
+) -> tuple[list[GraphSearchResponse], list[str]]: ...
-async def node_query(
+async def nidx_query(
     kbid: str,
     method: Method,
     pb_query: REQUEST_TYPE,
     timeout: Optional[float] = None,
-) -> tuple[Sequence[Union[T, BaseException]], bool, list[str]]:
+) -> tuple[Sequence[Union[T, BaseException]], list[str]]:
     timeout = timeout or settings.search_timeout
     shard_manager = get_shard_manager()
     try:
@@ -115,7 +115,6 @@ async def node_query(
     ops = []
     queried_shards = []
-    incomplete_results = False
     for shard_obj in shard_groups:
         shard_id = shard_obj.nidx_shard_id
@@ -127,10 +126,10 @@ async def node_query(
             queried_shards.append(shard_id)
     if not ops:
-        logger.warning(f"No node found for any of this resources shards {kbid}")
+        logger.warning(f"No shards found for kb", extra={"kbid": kbid})
         raise HTTPException(
             status_code=512,
-            detail=f"No node found for any of this resources shards {kbid}",
+            detail=f"No shards found for kb",
         )
     try:
@@ -144,12 +143,12 @@ async def node_query(
         )
         results = [exc]
-    error = validate_node_query_results(results or [])
+    error = validate_nidx_query_results(results or [])
     if error is not None:
         query_dict = MessageToDict(pb_query)
         query_dict.pop("vector", None)
         logger.error(
-            "Error while querying nodes",
+            "Error while querying nidx",
             extra={
                 "kbid": kbid,
                 "query": json.dumps(query_dict),
@@ -157,12 +156,12 @@ async def node_query(
         )
         raise error
-    return results, incomplete_results, queried_shards
+    return results, queried_shards
-def validate_node_query_results(results: list[Any]) -> Optional[HTTPException]:
+def validate_nidx_query_results(results: list[Any]) -> Optional[HTTPException]:
     """
-    Validate the results of a node query and return an exception if any error is found
+    Validate the results of a nidx query and return an exception if any error is found
     Handling of exception is responsibility of caller.
     """
@@ -175,14 +174,14 @@ def validate_node_query_results(results: list[Any]) -> Optional[HTTPException]:
             reason = "Error while querying shard data."
             if isinstance(result, AioRpcError):
                 if result.code() is GrpcStatusCode.INTERNAL:
-                    # handle node response errors
+                    # handle nidx response errors
                     details = result.details() or "gRPC error without details"
                     if "AllButQueryForbidden" in details:
                         status_code = 412
                         reason = details.split(":")[-1].strip().strip("'")
                     else:
                         reason = details
-                        logger.exception(f"Unhandled node error", exc_info=result)
+                        logger.exception(f"Unhandled nidx error", exc_info=result)
                 else:
                     logger.error(
                         f"Unhandled GRPC error while querying shard data: {result.debug_error_string()}"

nucliadb/search/search/chat/query.py CHANGED Viewed

@@ -28,7 +28,7 @@ from nidx_protos.nodereader_pb2 import (
 from nucliadb.common.models_utils import to_proto
 from nucliadb.search import logger
 from nucliadb.search.predict import AnswerStatusCode, RephraseResponse
-from nucliadb.search.requesters.utils import Method, node_query
+from nucliadb.search.requesters.utils import Method, nidx_query
 from nucliadb.search.search.chat.exceptions import NoRetrievalResultsError
 from nucliadb.search.search.exceptions import IncompleteFindResultsError
 from nucliadb.search.search.find import find
@@ -246,8 +246,6 @@ async def get_relations_results(
     kbid: str,
     text_answer: str,
     timeout: Optional[float] = None,
-    only_with_metadata: bool = False,
-    only_agentic_relations: bool = False,
 ) -> Relations:
     try:
         predict = get_predict()
@@ -257,8 +255,6 @@ async def get_relations_results(
             kbid=kbid,
             entities=detected_entities,
             timeout=timeout,
-            only_with_metadata=only_with_metadata,
-            only_agentic_relations=only_agentic_relations,
         )
     except Exception as exc:
         capture_exception(exc)
@@ -271,9 +267,6 @@ async def get_relations_results_from_entities(
     kbid: str,
     entities: Iterable[RelationNode],
     timeout: Optional[float] = None,
-    only_with_metadata: bool = False,
-    only_agentic_relations: bool = False,
-    only_entity_to_entity: bool = False,
     deleted_entities: set[str] = set(),
 ) -> Relations:
     entry_points = list(entities)
@@ -293,8 +286,7 @@ async def get_relations_results_from_entities(
     (
         results,
         _,
-        _,
-    ) = await node_query(
+    ) = await nidx_query(
         kbid,
         Method.SEARCH,
         request,
@@ -304,9 +296,6 @@ async def get_relations_results_from_entities(
     return await merge_relations_results(
         relations_results,
         entry_points,
-        only_with_metadata,
-        only_agentic_relations,
-        only_entity_to_entity,
     )

nucliadb/search/search/find.py CHANGED Viewed

@@ -23,7 +23,7 @@ from time import time
 from nucliadb.common.external_index_providers.base import ExternalIndexManager
 from nucliadb.common.external_index_providers.manager import get_external_index_manager
 from nucliadb.common.models_utils import to_proto
-from nucliadb.search.requesters.utils import Method, node_query
+from nucliadb.search.requesters.utils import Method, nidx_query
 from nucliadb.search.search.find_merge import (
     build_find_response,
     compose_find_resources,
@@ -105,10 +105,7 @@ async def _index_node_retrieval(
         ) = await legacy_convert_retrieval_to_proto(parsed)
     with metrics.time("index_search"):
-        results, query_incomplete_results, queried_shards = await node_query(
-            kbid, Method.SEARCH, pb_query
-        )
-    incomplete_results = incomplete_results or query_incomplete_results
+        results, queried_shards = await nidx_query(kbid, Method.SEARCH, pb_query)
     # Rank fusion merge, cut, hydrate and rerank
     with metrics.time("results_merge"):

nucliadb/search/search/graph_strategy.py CHANGED Viewed

@@ -19,6 +19,7 @@
 import heapq
 import json
 from collections import defaultdict
+from dataclasses import dataclass
 from typing import Any, Collection, Iterable, Optional, Union
 from nidx_protos import nodereader_pb2
@@ -33,16 +34,16 @@ from sentry_sdk import capture_exception
 from nucliadb.common.external_index_providers.base import TextBlockMatch
 from nucliadb.common.ids import FieldId, ParagraphId
 from nucliadb.search import logger
-from nucliadb.search.requesters.utils import Method, node_query
+from nucliadb.search.requesters.utils import Method, nidx_query
 from nucliadb.search.search.chat.query import (
     find_request_from_ask_request,
-    get_relations_results_from_entities,
 )
 from nucliadb.search.search.find_merge import (
     compose_find_resources,
     hydrate_and_rerank,
 )
 from nucliadb.search.search.hydrator import ResourceHydrationOptions, TextBlockHydrationOptions
+from nucliadb.search.search.merge import entity_type_to_relation_node_type, merge_relations_results
 from nucliadb.search.search.metrics import Metrics
 from nucliadb.search.search.rerankers import (
     Reranker,
@@ -74,7 +75,7 @@ from nucliadb_models.search import (
     TextPosition,
     UserPrompt,
 )
-from nucliadb_protos.utils_pb2 import RelationNode
+from nucliadb_protos.utils_pb2 import Relation, RelationNode
 SCHEMA = {
     "title": "score_triplets",
@@ -289,6 +290,17 @@ Now, let's get started! Here are the triplets you need to score:
 """
+@dataclass(frozen=True)
+class FrozenRelationNode:
+    ntype: RelationNode.NodeType.ValueType
+    subtype: str
+    value: str
+def freeze_node(r: RelationNode):
+    return FrozenRelationNode(ntype=r.ntype, subtype=r.subtype, value=r.value)
 class RelationsParagraphMatch(BaseModel):
     paragraph_id: ParagraphId
     score: float
@@ -310,13 +322,12 @@ async def get_graph_results(
     shards: Optional[list[str]] = None,
 ) -> tuple[KnowledgeboxFindResults, FindRequest]:
     relations = Relations(entities={})
-    explored_entities: set[str] = set()
+    explored_entities: set[FrozenRelationNode] = set()
     scores: dict[str, list[float]] = {}
     predict = get_predict()
+    entities_to_explore: list[RelationNode] = []
     for hop in range(graph_strategy.hops):
-        entities_to_explore: Iterable[RelationNode] = []
         if hop == 0:
             # Get the entities from the query
             with metrics.time("graph_strat_query_entities"):
@@ -326,14 +337,14 @@ async def get_graph_results(
                         query=query,
                     )
                     if relation_result is not None:
-                        entities_to_explore = (
+                        entities_to_explore = [
                             RelationNode(
                                 ntype=RelationNode.NodeType.ENTITY,
                                 value=result.value,
                                 subtype=result.family,
                             )
                             for result in relation_result.entities
-                        )
+                        ]
                 elif (
                     not entities_to_explore
                     or graph_strategy.query_entity_detection == QueryEntityDetection.PREDICT
@@ -353,7 +364,7 @@ async def get_graph_results(
                         entities_to_explore = []
         else:
             # Find neighbors of the current relations and remove the ones already explored
-            entities_to_explore = (
+            entities_to_explore = [
                 RelationNode(
                     ntype=RelationNode.NodeType.ENTITY,
                     value=relation.entity,
@@ -361,35 +372,50 @@ async def get_graph_results(
                 )
                 for subgraph in relations.entities.values()
                 for relation in subgraph.related_to
-                if relation.entity not in explored_entities
-            )
+                if FrozenRelationNode(
+                    ntype=entity_type_to_relation_node_type(relation.entity_type),
+                    subtype=relation.entity_subtype,
+                    value=relation.entity,
+                )
+                not in explored_entities
+            ]
+        if not entities_to_explore:
+            break
         # Get the relations for the new entities
+        relations_results = []
         with metrics.time("graph_strat_neighbor_relations"):
             try:
-                new_relations = await get_relations_results_from_entities(
-                    kbid=kbid,
-                    entities=entities_to_explore,
-                    timeout=5.0,
+                relations_results = await find_graph_neighbours(
+                    kbid,
+                    entities_to_explore,
+                    explored_entities,
+                    exclude_processor_relations=graph_strategy.exclude_processor_relations,
+                )
+                new_relations = await merge_relations_results(
+                    relations_results,
+                    entities_to_explore,
                     only_with_metadata=not graph_strategy.relation_text_as_paragraphs,
-                    only_agentic_relations=graph_strategy.agentic_graph_only,
-                    # We only want entity to entity relations (skip resource/labels/collaborators/etc.)
-                    only_entity_to_entity=True,
-                    deleted_entities=explored_entities,
                 )
             except Exception as e:
                 capture_exception(e)
                 logger.exception("Error in getting query relations for graph strategy")
                 new_relations = Relations(entities={})
-            new_subgraphs = new_relations.entities
+            relations.entities.update(new_relations.entities)
+            discovered_entities = []
-            explored_entities.update(new_subgraphs.keys())
+            for shard in relations_results:
+                for node in shard.nodes:
+                    if node not in entities_to_explore and freeze_node(node) not in explored_entities:
+                        discovered_entities.append(node)
-            if not new_subgraphs or all(not subgraph.related_to for subgraph in new_subgraphs.values()):
+            if not discovered_entities:
                 break
-            relations.entities.update(new_subgraphs)
+            explored_entities.update([freeze_node(n) for n in entities_to_explore])
+            entities_to_explore = discovered_entities
         # Rank the relevance of the relations
         with metrics.time("graph_strat_rank_relations"):
@@ -458,7 +484,7 @@ async def fuzzy_search_entities(
         request.query.path.bool_or.operands.append(subquery)
     try:
-        results, _, _ = await node_query(kbid, Method.GRAPH, request)
+        results, _ = await nidx_query(kbid, Method.GRAPH, request)
     except Exception as exc:
         capture_exception(exc)
         logger.exception("Error in finding entities in query for graph strategy")
@@ -898,3 +924,51 @@ def relations_matches_to_text_block_matches(
     paragraph_matches: Collection[RelationsParagraphMatch],
 ) -> list[TextBlockMatch]:
     return [relations_match_to_text_block_match(match) for match in paragraph_matches]
+async def find_graph_neighbours(
+    kbid: str,
+    entities_to_explore: list[RelationNode],
+    explored_entities: set[FrozenRelationNode],
+    exclude_processor_relations: bool,
+) -> list[nodereader_pb2.GraphSearchResponse]:
+    graph_query = nodereader_pb2.GraphSearchRequest(
+        kind=nodereader_pb2.GraphSearchRequest.QueryKind.PATH, top_k=100
+    )
+    # Explore starting from some entities
+    query_to_explore = nodereader_pb2.GraphQuery.PathQuery()
+    for entity in entities_to_explore:
+        entity_query = nodereader_pb2.GraphQuery.PathQuery()
+        entity_query.path.source.node_type = entity.ntype
+        entity_query.path.source.node_subtype = entity.subtype
+        entity_query.path.source.value = entity.value
+        entity_query.path.undirected = True
+        query_to_explore.bool_or.operands.append(entity_query)
+    graph_query.query.path.bool_and.operands.append(query_to_explore)
+    # Do not return already known entities
+    if explored_entities:
+        query_exclude_explored = nodereader_pb2.GraphQuery.PathQuery()
+        for explored in explored_entities:
+            entity_query = nodereader_pb2.GraphQuery.PathQuery()
+            entity_query.path.source.node_type = explored.ntype
+            entity_query.path.source.node_subtype = explored.subtype
+            entity_query.path.source.value = explored.value
+            entity_query.path.undirected = True
+            query_exclude_explored.bool_not.bool_or.operands.append(entity_query)
+        graph_query.query.path.bool_and.operands.append(query_exclude_explored)
+    # Only include relations between entities
+    only_entities = nodereader_pb2.GraphQuery.PathQuery()
+    only_entities.path.relation.relation_type = Relation.RelationType.ENTITY
+    graph_query.query.path.bool_and.operands.append(only_entities)
+    # Exclude processor entities
+    if exclude_processor_relations:
+        exclude_processor = nodereader_pb2.GraphQuery.PathQuery()
+        exclude_processor.facet.facet = "/g"
+        graph_query.query.path.bool_and.operands.append(exclude_processor)
+    (relations_results, _) = await nidx_query(kbid, Method.GRAPH, graph_query, timeout=5.0)
+    return relations_results

nucliadb/search/search/merge.py CHANGED Viewed

@@ -48,7 +48,6 @@ from nucliadb.search.search.fetch import (
 from nucliadb.search.search.query_parser.models import FulltextQuery, UnitRetrieval
 from nucliadb_models.common import FieldTypeName
 from nucliadb_models.labels import translate_system_to_alias_label
-from nucliadb_models.metadata import RelationType
 from nucliadb_models.resource import ExtractedDataTypeName
 from nucliadb_models.search import (
     DirectionalRelation,
@@ -93,6 +92,15 @@ def relation_node_type_to_entity_type(node_type: RelationNode.NodeType.ValueType
     }[node_type]
+def entity_type_to_relation_node_type(node_type: EntityType) -> RelationNode.NodeType.ValueType:
+    return {
+        EntityType.ENTITY: RelationNode.NodeType.ENTITY,
+        EntityType.LABEL: RelationNode.NodeType.LABEL,
+        EntityType.RESOURCE: RelationNode.NodeType.RESOURCE,
+        EntityType.USER: RelationNode.NodeType.USER,
+    }[node_type]
 def sort_results_by_score(results: Union[list[ParagraphResult], list[DocumentResult]]):
     results.sort(key=lambda x: (x.score.bm25, x.score.booster), reverse=True)
@@ -442,18 +450,10 @@ async def merge_relations_results(
     graph_responses: list[GraphSearchResponse],
     query_entry_points: Iterable[RelationNode],
     only_with_metadata: bool = False,
-    only_agentic: bool = False,
-    only_entity_to_entity: bool = False,
 ) -> Relations:
     loop = asyncio.get_event_loop()
     return await loop.run_in_executor(
-        None,
-        _merge_relations_results,
-        graph_responses,
-        query_entry_points,
-        only_with_metadata,
-        only_agentic,
-        only_entity_to_entity,
+        None, _merge_relations_results, graph_responses, query_entry_points, only_with_metadata
     )
@@ -461,21 +461,8 @@ def _merge_relations_results(
     graph_responses: list[GraphSearchResponse],
     query_entry_points: Iterable[RelationNode],
     only_with_metadata: bool,
-    only_agentic: bool,
-    only_entity_to_entity: bool,
 ) -> Relations:
-    """Merge relation search responses into a single Relations object while applying filters.
-    - When `only_with_metadata` is enabled, only include paths with metadata
-      (this can include paragraph_id and entity positions among other things)
-    - When `only_agentic` is enabled, ony include relations extracted by a Graph
-      Extraction Agent
-    - When `only_entity_to_entity` is enabled, only include relations between
-    nodes with type ENTITY
-    """
+    """Merge relation search responses into a single Relations object while applying filters."""
     relations = Relations(entities={})
     for entry_point in query_entry_points:
@@ -492,18 +479,9 @@ def _merge_relations_results(
             if path.resource_field_id is not None:
                 resource_id = path.resource_field_id.split("/")[0]
-            # If only_with_metadata is True, we check that metadata for the relation is not None
-            # If only_agentic is True, we check that metadata for the relation is not None and that it has a data_augmentation_task_id
-            # TODO: This is suboptimal, we should be able to filter this in the query to the index,
             if only_with_metadata and not metadata:
                 continue
-            if only_agentic and (not metadata or not metadata.data_augmentation_task_id):
-                continue
-            if only_entity_to_entity and relation_type != RelationType.ENTITY:
-                continue
             if origin.value in relations.entities:
                 relations.entities[origin.value].related_to.append(
                     DirectionalRelation(

nucliadb/search/search/query_parser/parsers/unit_retrieval.py CHANGED Viewed

@@ -273,6 +273,10 @@ class _Converter:
 def is_incomplete(retrieval: UnitRetrieval) -> bool:
+    """
+    Return true if the retrieval had the semantic feature on but the query endpoint
+    did not return the vector in the response.
+    """
     if retrieval.query.semantic is None:
         return False
     incomplete = retrieval.query.semantic.query is None or len(retrieval.query.semantic.query) == 0

{nucliadb-6.4.0.post4293.dist-info → nucliadb-6.4.0.post4313.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nucliadb
-Version: 6.4.0.post4293
+Version: 6.4.0.post4313
 Summary: NucliaDB
 Author-email: Nuclia <nucliadb@nuclia.com>
 License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: <4,>=3.9
 Description-Content-Type: text/markdown
-Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4293
-Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4293
-Requires-Dist: nucliadb-protos>=6.4.0.post4293
-Requires-Dist: nucliadb-models>=6.4.0.post4293
-Requires-Dist: nidx-protos>=6.4.0.post4293
+Requires-Dist: nucliadb-telemetry[all]>=6.4.0.post4313
+Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.4.0.post4313
+Requires-Dist: nucliadb-protos>=6.4.0.post4313
+Requires-Dist: nucliadb-models>=6.4.0.post4313
+Requires-Dist: nidx-protos>=6.4.0.post4313
 Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
 Requires-Dist: nuclia-models>=0.24.2
 Requires-Dist: uvicorn[standard]

{nucliadb-6.4.0.post4293.dist-info → nucliadb-6.4.0.post4313.dist-info}/RECORD RENAMED Viewed

@@ -179,7 +179,7 @@ nucliadb/models/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,8
 nucliadb/models/responses.py,sha256=qnuOoc7TrVSUnpikfTwHLKez47_DE4mSFzpxrwtqijA,1599
 nucliadb/models/internal/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/models/internal/processing.py,sha256=bzPr-hXliY81zMUgG-PDyDiFKP7Xbs71s2d0SIAu4Do,4090
-nucliadb/purge/__init__.py,sha256=UXbto56EWYLwZj6uEc-flQVe3gDDNFtM6EV-aIkryPU,12353
+nucliadb/purge/__init__.py,sha256=lZE7_FQMVz2rWiwRYrtKpAjVoO6tbnzTYofQbsGUqos,13118
 nucliadb/purge/orphan_shards.py,sha256=fcP37QoFNjS6q2XozLQImY1swC_EmHeNhAJwLvEkOww,7769
 nucliadb/reader/__init__.py,sha256=C5Efic7WlGm2U2C5WOyquMFbIj2Pojwe_8mwzVYnOzE,1304
 nucliadb/reader/app.py,sha256=Se-BFTE6d1v1msLzQn4q5XIhjnSxa2ckDSHdvm7NRf8,3096
@@ -216,34 +216,34 @@ nucliadb/search/api/v1/ask.py,sha256=b4tz33HNsfT5DXv_2DMc_jirnFsHuobreWkbAKkzj5o
 nucliadb/search/api/v1/catalog.py,sha256=W0cPWuC27Y4bO7Ifl1VQp8OPYfF5gv5yeWZBsuJMxUU,7721
 nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
 nucliadb/search/api/v1/find.py,sha256=C4sTGFRS9tQFF8v1zhnHQvnExJoGDYi78bZTRfwhGrc,10831
-nucliadb/search/api/v1/graph.py,sha256=ItVpzJbqfDLjoIo2fTb2mKGCM1Z34sx7CBb3gNmj6IQ,4274
+nucliadb/search/api/v1/graph.py,sha256=Km_ysePnhaEahdYp0gaF-234FHliB8LdUpfGOnqZ0rc,4265
 nucliadb/search/api/v1/knowledgebox.py,sha256=e9xeLPUqnQTx33i4A8xuV93ENvtJGrpjPlLRbGJtAI8,8415
 nucliadb/search/api/v1/predict_proxy.py,sha256=Q03ZTvWp7Sq0x71t5Br4LHxTiYsRd6-GCb4YuKqhynM,3131
 nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
-nucliadb/search/api/v1/search.py,sha256=Or-mUvmBAyh0Y55NqTYNXe_BWR0lLLaTSL2ChjJaE2M,12402
-nucliadb/search/api/v1/suggest.py,sha256=Em7ApddZNHMHjL_ZfXmUIVUk504f58J96JlxJXnIxaM,6438
+nucliadb/search/api/v1/search.py,sha256=bp2JfBO_wiPl7vG3-MXJfqdFfIGwJM3L25UqqGWj4V4,12304
+nucliadb/search/api/v1/suggest.py,sha256=GJ7DveD6c9_h0m6NbI7IAvfO2j82TtrGuLg6UF3GBh4,6350
 nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
 nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
 nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/search/api/v1/resource/ask.py,sha256=nsVzBSanSSlf0Ody6LSTjdEy75Vg283_YhbkAtWEjh8,3637
 nucliadb/search/api/v1/resource/ingestion_agents.py,sha256=fqqRCd8Wc9GciS5P98lcnihvTKStsZYYtOU-T1bc-6E,4771
-nucliadb/search/api/v1/resource/search.py,sha256=sz8-aAg_ucc6dfJddWH_C0Om4PrKJhvvDmgKHBczyBI,5018
+nucliadb/search/api/v1/resource/search.py,sha256=Gnn4CY5NO4AK5ZWwrSIRJqBDm16u8k0XtpUwDXEBeYY,4930
 nucliadb/search/api/v1/resource/utils.py,sha256=-NjZqAQtFEXKpIh8ui5S26ItnJ5rzmmG0BHxGSS9QPw,1141
 nucliadb/search/requesters/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
-nucliadb/search/requesters/utils.py,sha256=o5JtXX5KrqMtUJo3u6rw9EOOKXPiw-GaF0oGuZu7PPc,6225
+nucliadb/search/requesters/utils.py,sha256=Ne5fweSWk9hettQKyUZAMZrw_MTjPE5W_EVqj4p5XiI,6109
 nucliadb/search/search/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/search/search/cache.py,sha256=-6l3i2Qi8ig2SM_FCgOLIaQ48XVj7L5ctd5PdQRY5mY,4458
 nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298,1153
 nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
 nucliadb/search/search/fetch.py,sha256=eiljOKim-4OOEZn-3fyVZSYxztCH156BXYdqlIwVdN4,6181
 nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
-nucliadb/search/search/find.py,sha256=i1auc8visRakBwbbZGhyQgXNAmsaAVheisYi2xGjdKY,7925
+nucliadb/search/search/find.py,sha256=ZocoQNN28OHOmMaroGVFCnce3YHPZbFb1-9jxLNHSFM,7805
 nucliadb/search/search/find_merge.py,sha256=c-7IlfjfdmWAvQOyM7IO3bKS1EQpnR4oi6pN6mwrQKw,19815
 nucliadb/search/search/graph_merge.py,sha256=y5V7X-BhjHsKDXE69tzQLIIKGm4XuaFrZXw0odcHVNM,3402
-nucliadb/search/search/graph_strategy.py,sha256=zYfi1df982ZYOFtYSksnHEJvQn-ZZsCIFSruVZP_934,32891
+nucliadb/search/search/graph_strategy.py,sha256=LtPWGVL0RzxUgDLjrYgoQdZFmPBbln1fUsmXM1z5krs,35941
 nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
 nucliadb/search/search/ingestion_agents.py,sha256=NeJr4EEX-bvFFMGvXOOwLv8uU7NuQ-ntJnnrhnKfMzY,3174
-nucliadb/search/search/merge.py,sha256=Abg9YblQJvH2jDvXVT45MNxaIpNa7TTpsiUSJqb3NDc,23307
+nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lPo4,22470
 nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
 nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
 nucliadb/search/search/pgcatalog.py,sha256=s_J98fsX_RuFXwpejpkGqG-tD9ELuzz4YQ6U3ew5h2g,9313
@@ -259,7 +259,7 @@ nucliadb/search/search/chat/ask.py,sha256=aaNj0MeAbx9dyeKpQJdm3VsHMq9OmcCESxahbg
 nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
 nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
 nucliadb/search/search/chat/prompt.py,sha256=e8C7_MPr6Cn3nJHA4hWpeW3629KVI1ZUQA_wZf9Kiu4,48503
-nucliadb/search/search/chat/query.py,sha256=6v6twBUTWfUUzklVV6xqJSYPkAshnIrBH9wbTcjQvkI,17063
+nucliadb/search/search/chat/query.py,sha256=3jMPNbiFEOoS0ydMOPYkSx1qVlvAv51npzadWXDwkMs,16650
 nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
 nucliadb/search/search/query_parser/fetcher.py,sha256=SkvBRDfSKmuz-QygNKLAU4AhZhhDo1dnOZmt1zA28RA,16851
@@ -273,7 +273,7 @@ nucliadb/search/search/query_parser/parsers/common.py,sha256=o3028wUnK78lOmFK0jt
 nucliadb/search/search/query_parser/parsers/find.py,sha256=Fo4lXOnCbP0AKEc1mKLNINJBv63B4DPlix0vlhyesck,12717
 nucliadb/search/search/query_parser/parsers/graph.py,sha256=lDRJO_JvOe7yytNgXZyMogyPMgB5xc8obNY2kqz3yGU,9405
 nucliadb/search/search/query_parser/parsers/search.py,sha256=yEebeMOXJza7HMK3TdIPO6UGQbe79maSDg-GgohQIMk,10517
-nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=bcSvF2mW6IHFAs7_yA6TePw0zVtk9CmEA3j6xkhkDO8,11328
+nucliadb/search/search/query_parser/parsers/unit_retrieval.py,sha256=rW3YHDWLkI2Hhznl_1oOMhC01bwZMAjv-Wu3iHPIaiU,11475
 nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/standalone/api_router.py,sha256=hgq9FXpihzgjHkwcVGfGCSwyXy67fqXTfLFHuINzIi0,5567
 nucliadb/standalone/app.py,sha256=mAApNK_iVsQgJyd-mtwCeZq5csSimwnXmlQGH9a70pE,5586
@@ -368,8 +368,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
 nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
 nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
 nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
-nucliadb-6.4.0.post4293.dist-info/METADATA,sha256=-Dsk2JpP3WFTtfqDx92QjQqjbRhNV-wdkpcn14t1ttg,4223
-nucliadb-6.4.0.post4293.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
-nucliadb-6.4.0.post4293.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
-nucliadb-6.4.0.post4293.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
-nucliadb-6.4.0.post4293.dist-info/RECORD,,
+nucliadb-6.4.0.post4313.dist-info/METADATA,sha256=-Mp65qW_udL5EUZsp6CwEysmjbvuTY3u6Qbn0Bc3epI,4223
+nucliadb-6.4.0.post4313.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
+nucliadb-6.4.0.post4313.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
+nucliadb-6.4.0.post4313.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
+nucliadb-6.4.0.post4313.dist-info/RECORD,,

{nucliadb-6.4.0.post4293.dist-info → nucliadb-6.4.0.post4313.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.4.0)
+Generator: setuptools (80.7.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

{nucliadb-6.4.0.post4293.dist-info → nucliadb-6.4.0.post4313.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nucliadb-6.4.0.post4293.dist-info → nucliadb-6.4.0.post4313.dist-info}/top_level.txt RENAMED Viewed

File without changes

nucliadb 6.4.0.post4293__py3-none-any.whl → 6.4.0.post4313__py3-none-any.whl

nucliadb 6.4.0.post4293py3-none-any.whl → 6.4.0.post4313py3-none-any.whl