PyPI - nucliadb - Versions diffs - 6.2.1.post2838__py3-none-any.whl → 6.2.1.post2842__py3-none-any.whl - Mend

nucliadb 6.2.1.post2838py3-none-any.whl → 6.2.1.post2842py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

nucliadb/common/external_index_providers/base.py CHANGED Viewed

@@ -28,7 +28,7 @@ from nucliadb.common.counters import IndexCounts
 from nucliadb.common.external_index_providers.exceptions import ExternalIndexingError
 from nucliadb.common.ids import ParagraphId
 from nucliadb_models.external_index_providers import ExternalIndexProviderType
-from nucliadb_models.search import SCORE_TYPE, TextPosition
+from nucliadb_models.search import SCORE_TYPE, Relations, TextPosition
 from nucliadb_protos.knowledgebox_pb2 import (
     CreateExternalIndexProviderMetadata,
     StoredExternalIndexProviderMetadata,
@@ -73,6 +73,7 @@ class TextBlockMatch(BaseModel):
     paragraph_labels: list[str] = []
     field_labels: list[str] = []
     text: Optional[str] = None
+    relevant_relations: Optional[Relations] = None
 class QueryResults(BaseModel):

nucliadb/common/ids.py CHANGED Viewed

@@ -111,13 +111,11 @@ class FieldId:
         parts = value.split("/")
         if len(parts) == 3:
             rid, _type, key = parts
-            if _type not in FIELD_TYPE_STR_TO_PB:
-                raise ValueError(f"Invalid FieldId: {value}")
+            _type = cls.parse_field_type(_type)
             return cls(rid=rid, type=_type, key=key)
         elif len(parts) == 4:
             rid, _type, key, subfield_id = parts
-            if _type not in FIELD_TYPE_STR_TO_PB:
-                raise ValueError(f"Invalid FieldId: {value}")
+            _type = cls.parse_field_type(_type)
             return cls(
                 rid=rid,
                 type=_type,
@@ -127,6 +125,22 @@ class FieldId:
         else:
             raise ValueError(f"Invalid FieldId: {value}")
+    @classmethod
+    def parse_field_type(cls, _type: str) -> str:
+        if _type not in FIELD_TYPE_STR_TO_PB:
+            # Try to parse the enum value
+            # XXX: This is to support field types that are integer values of FieldType
+            # Which is how legacy processor relations reported the paragraph_id
+            try:
+                type_pb = FieldType.ValueType(int(_type))
+            except ValueError:
+                raise ValueError(f"Invalid FieldId: {_type}")
+            if type_pb in FIELD_TYPE_PB_TO_STR:
+                return FIELD_TYPE_PB_TO_STR[type_pb]
+            else:
+                raise ValueError(f"Invalid FieldId: {_type}")
+        return _type
 @dataclass
 class ParagraphId:

nucliadb/search/api/v1/suggest.py CHANGED Viewed

@@ -151,8 +151,6 @@ async def suggest(
         search_results = await merge_suggest_results(
             results,
             kbid=kbid,
-            show=show,
-            field_type_filter=field_type_filter,
             highlight=highlight,
         )

nucliadb/search/search/chat/ask.py CHANGED Viewed

@@ -57,6 +57,7 @@ from nucliadb.search.search.exceptions import (
     IncompleteFindResultsError,
     InvalidQueryError,
 )
+from nucliadb.search.search.graph_strategy import get_graph_results
 from nucliadb.search.search.metrics import RAGMetrics
 from nucliadb.search.search.query import QueryParser
 from nucliadb.search.utilities import get_predict
@@ -75,6 +76,7 @@ from nucliadb_models.search import (
     ErrorAskResponseItem,
     FindParagraph,
     FindRequest,
+    GraphStrategy,
     JSONAskResponseItem,
     KnowledgeboxFindResults,
     MetadataAskResponseItem,
@@ -629,6 +631,13 @@ def parse_prequeries(ask_request: AskRequest) -> Optional[PreQueriesStrategy]:
     return None
+def parse_graph_strategy(ask_request: AskRequest) -> Optional[GraphStrategy]:
+    for rag_strategy in ask_request.rag_strategies:
+        if rag_strategy.name == RagStrategyName.GRAPH:
+            return cast(GraphStrategy, rag_strategy)
+    return None
 async def retrieval_step(
     kbid: str,
     main_query: str,
@@ -675,17 +684,33 @@ async def retrieval_in_kb(
     metrics: RAGMetrics,
 ) -> RetrievalResults:
     prequeries = parse_prequeries(ask_request)
+    graph_strategy = parse_graph_strategy(ask_request)
     with metrics.time("retrieval"):
-        main_results, prequeries_results, query_parser = await get_find_results(
-            kbid=kbid,
-            query=main_query,
-            item=ask_request,
-            ndb_client=client_type,
-            user=user_id,
-            origin=origin,
-            metrics=metrics,
-            prequeries_strategy=prequeries,
-        )
+        prequeries_results = None
+        if graph_strategy is not None:
+            main_results, query_parser = await get_graph_results(
+                kbid=kbid,
+                query=main_query,
+                item=ask_request,
+                ndb_client=client_type,
+                user=user_id,
+                origin=origin,
+                graph_strategy=graph_strategy,
+                metrics=metrics,
+                shards=ask_request.shards,
+            )
+        # TODO (oni): Fallback to normal retrieval if no graph results are found
+        else:
+            main_results, prequeries_results, query_parser = await get_find_results(
+                kbid=kbid,
+                query=main_query,
+                item=ask_request,
+                ndb_client=client_type,
+                user=user_id,
+                origin=origin,
+                metrics=metrics,
+                prequeries_strategy=prequeries,
+            )
         if len(main_results.resources) == 0 and all(
             len(prequery_result.resources) == 0 for (_, prequery_result) in prequeries_results or []
         ):

nucliadb/search/search/chat/prompt.py CHANGED Viewed

@@ -1013,8 +1013,10 @@ class PromptContextBuilder:
                 neighbouring_paragraphs = cast(NeighbouringParagraphsStrategy, strategy)
             elif strategy.name == RagStrategyName.METADATA_EXTENSION:
                 metadata_extension = cast(MetadataExtensionStrategy, strategy)
-            elif strategy.name != RagStrategyName.PREQUERIES:  # pragma: no cover
-                # Prequeries are not handled here
+            elif (
+                strategy.name != RagStrategyName.PREQUERIES and strategy.name != RagStrategyName.GRAPH
+            ):  # pragma: no cover
+                # Prequeries and graph are not handled here
                 logger.warning(
                     "Unknown rag strategy",
                     extra={"strategy": strategy.name, "kbid": self.kbid},

nucliadb/search/search/chat/query.py CHANGED Viewed

@@ -18,7 +18,7 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
 import asyncio
-from typing import Optional
+from typing import Iterable, Optional
 from nucliadb.common.models_utils import to_proto
 from nucliadb.search import logger
@@ -51,6 +51,7 @@ from nucliadb_models.search import (
 )
 from nucliadb_protos import audit_pb2
 from nucliadb_protos.nodereader_pb2 import RelationSearchResponse, SearchRequest, SearchResponse
+from nucliadb_protos.utils_pb2 import RelationNode
 from nucliadb_telemetry.errors import capture_exception
 from nucliadb_utils.utilities import get_audit
@@ -145,15 +146,7 @@ async def get_find_results(
     return main_results, prequeries_results, query_parser
-async def run_main_query(
-    kbid: str,
-    query: str,
-    item: AskRequest,
-    ndb_client: NucliaDBClientType,
-    user: str,
-    origin: str,
-    metrics: RAGMetrics = RAGMetrics(),
-) -> tuple[KnowledgeboxFindResults, QueryParser]:
+def find_request_from_ask_request(item: AskRequest, query: str) -> FindRequest:
     find_request = FindRequest()
     find_request.resource_filters = item.resource_filters
     find_request.features = []
@@ -189,7 +182,19 @@ async def run_main_query(
     find_request.show_hidden = item.show_hidden
     # this executes the model validators, that can tweak some fields
-    FindRequest.model_validate(find_request)
+    return FindRequest.model_validate(find_request)
+async def run_main_query(
+    kbid: str,
+    query: str,
+    item: AskRequest,
+    ndb_client: NucliaDBClientType,
+    user: str,
+    origin: str,
+    metrics: RAGMetrics = RAGMetrics(),
+) -> tuple[KnowledgeboxFindResults, QueryParser]:
+    find_request = find_request_from_ask_request(item, query)
     find_results, incomplete, query_parser = await find(
         kbid,
@@ -211,36 +216,59 @@ async def get_relations_results(
     text_answer: str,
     target_shard_replicas: Optional[list[str]],
     timeout: Optional[float] = None,
+    only_with_metadata: bool = False,
+    only_agentic_relations: bool = False,
 ) -> Relations:
     try:
         predict = get_predict()
         detected_entities = await predict.detect_entities(kbid, text_answer)
-        request = SearchRequest()
-        request.relation_subgraph.entry_points.extend(detected_entities)
-        request.relation_subgraph.depth = 1
-        results: list[SearchResponse]
-        (
-            results,
-            _,
-            _,
-        ) = await node_query(
-            kbid,
-            Method.SEARCH,
-            request,
+        return await get_relations_results_from_entities(
+            kbid=kbid,
+            entities=detected_entities,
             target_shard_replicas=target_shard_replicas,
             timeout=timeout,
-            use_read_replica_nodes=True,
-            retry_on_primary=False,
+            only_with_metadata=only_with_metadata,
+            only_agentic_relations=only_agentic_relations,
         )
-        relations_results: list[RelationSearchResponse] = [result.relation for result in results]
-        return await merge_relations_results(relations_results, request.relation_subgraph)
     except Exception as exc:
         capture_exception(exc)
         logger.exception("Error getting relations results")
         return Relations(entities={})
+async def get_relations_results_from_entities(
+    *,
+    kbid: str,
+    entities: Iterable[RelationNode],
+    target_shard_replicas: Optional[list[str]],
+    timeout: Optional[float] = None,
+    only_with_metadata: bool = False,
+    only_agentic_relations: bool = False,
+) -> Relations:
+    request = SearchRequest()
+    request.relation_subgraph.entry_points.extend(entities)
+    request.relation_subgraph.depth = 1
+    results: list[SearchResponse]
+    (
+        results,
+        _,
+        _,
+    ) = await node_query(
+        kbid,
+        Method.SEARCH,
+        request,
+        target_shard_replicas=target_shard_replicas,
+        timeout=timeout,
+        use_read_replica_nodes=True,
+        retry_on_primary=False,
+    )
+    relations_results: list[RelationSearchResponse] = [result.relation for result in results]
+    return await merge_relations_results(
+        relations_results, request.relation_subgraph, only_with_metadata, only_agentic_relations
+    )
 def maybe_audit_chat(
     *,
     kbid: str,

nucliadb 6.2.1.post2838__py3-none-any.whl → 6.2.1.post2842__py3-none-any.whl

nucliadb 6.2.1.post2838py3-none-any.whl → 6.2.1.post2842py3-none-any.whl