PyPI - nucliadb - Versions diffs - 6.5.1.post4539__py3-none-any.whl → 6.6.1.post649__py3-none-any.whl - Mend

nucliadb 6.5.1.post4539py3-none-any.whl → 6.6.1.post649py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

migrations/0038_backfill_catalog_field_labels.py +90 -0
nucliadb/ingest/orm/processor/pgcatalog.py +17 -2
nucliadb/ingest/orm/resource.py +16 -4
nucliadb/ingest/processing.py +10 -3
nucliadb/search/api/v1/ask.py +10 -1
nucliadb/search/api/v1/catalog.py +2 -0
nucliadb/search/api/v1/find.py +2 -1
nucliadb/search/api/v1/resource/ask.py +16 -12
nucliadb/search/api/v1/summarize.py +7 -2
nucliadb/search/predict.py +10 -6
nucliadb/search/search/chat/ask.py +26 -1
nucliadb/search/search/chat/prompt.py +63 -21
nucliadb/search/search/pgcatalog.py +1 -1
nucliadb/search/search/predict_proxy.py +8 -11
nucliadb/search/search/query_parser/old_filters.py +1 -1
nucliadb/search/search/summarize.py +4 -2
{nucliadb-6.5.1.post4539.dist-info → nucliadb-6.6.1.post649.dist-info}/METADATA +7 -7
{nucliadb-6.5.1.post4539.dist-info → nucliadb-6.6.1.post649.dist-info}/RECORD +21 -20
{nucliadb-6.5.1.post4539.dist-info → nucliadb-6.6.1.post649.dist-info}/WHEEL +0 -0
{nucliadb-6.5.1.post4539.dist-info → nucliadb-6.6.1.post649.dist-info}/entry_points.txt +0 -0
{nucliadb-6.5.1.post4539.dist-info → nucliadb-6.6.1.post649.dist-info}/top_level.txt +0 -0

migrations/0038_backfill_catalog_field_labels.py ADDED Viewed

@@ -0,0 +1,90 @@
+# Copyright (C) 2021 Bosutech XXI S.L.
+#
+# nucliadb is offered under the AGPL v3.0 and as commercial software.
+# For commercial licensing, contact us at info@nuclia.com.
+#
+# AGPL:
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+"""Migration #38
+Backfill the catalog with labels from fields metadata
+"""
+import logging
+from typing import cast
+from nucliadb.common import datamanagers
+from nucliadb.common.maindb.pg import PGDriver, PGTransaction
+from nucliadb.ingest.orm.index_message import get_resource_index_message
+from nucliadb.ingest.orm.processor.pgcatalog import pgcatalog_update
+from nucliadb.migrator.context import ExecutionContext
+from nucliadb_protos import resources_pb2
+logger = logging.getLogger(__name__)
+async def migrate(context: ExecutionContext) -> None: ...
+async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
+    if not isinstance(context.kv_driver, PGDriver):
+        return
+    BATCH_SIZE = 100
+    async with context.kv_driver.transaction() as txn:
+        txn = cast(PGTransaction, txn)
+        start = ""
+        while True:
+            async with txn.connection.cursor() as cur:
+                # Get list of resources except those already in the catalog
+                await cur.execute(
+                    """
+                    SELECT key, value FROM resources
+                    WHERE key ~ ('^/kbs/' || %s || '/r/[^/]*$')
+                    AND key > %s
+                    ORDER BY key
+                    LIMIT %s""",
+                    (kbid, start, BATCH_SIZE),
+                )
+                to_index = []
+                rows = await cur.fetchall()
+                if len(rows) == 0:
+                    return
+                for key, basic_pb in rows:
+                    start = key
+                    # Only reindex resources with labels in field computed metadata
+                    basic = resources_pb2.Basic()
+                    basic.ParseFromString(basic_pb)
+                    if basic.computedmetadata.field_classifications:
+                        to_index.append(key)
+                logger.info(f"Reindexing {len(to_index)} catalog entries from {start}")
+                # Index each resource
+                for key in to_index:
+                    rid = key.split("/")[4]
+                    resource = await datamanagers.resources.get_resource(txn, kbid=kbid, rid=rid)
+                    if resource is None:
+                        logger.warning(f"Could not load resource {rid} for kbid {kbid}")
+                        continue
+                    index_message = await get_resource_index_message(resource, reindex=False)
+                    await pgcatalog_update(txn, kbid, resource, index_message)
+                if to_index:
+                    await txn.commit()

nucliadb/ingest/orm/processor/pgcatalog.py CHANGED Viewed

@@ -65,6 +65,21 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
         modified_at = created_at
     async with _pg_transaction(txn).connection.cursor() as cur:
+        # Do not index canceled labels
+        cancelled_labels = {
+            f"/l/{clf.labelset}/{clf.label}"
+            for clf in resource.basic.usermetadata.classifications
+            if clf.cancelled_by_user
+        }
+        # Labels from the resource and classification labels from each field
+        labels = [label for label in index_message.labels]
+        for classification in resource.basic.computedmetadata.field_classifications:
+            for clf in classification.classifications:
+                label = f"/l/{clf.labelset}/{clf.label}"
+                if label not in cancelled_labels:
+                    labels.append(label)
         await cur.execute(
             """
             INSERT INTO catalog
@@ -83,7 +98,7 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
                 "title": resource.basic.title,
                 "created_at": created_at,
                 "modified_at": modified_at,
-                "labels": list(index_message.labels),
+                "labels": labels,
                 "slug": resource.basic.slug,
             },
         )
@@ -99,7 +114,7 @@ async def pgcatalog_update(txn: Transaction, kbid: str, resource: Resource, inde
             {
                 "kbid": resource.kb.kbid,
                 "rid": resource.uuid,
-                "facets": list(extract_facets(index_message.labels)),
+                "facets": list(extract_facets(labels)),
             },
         )

nucliadb/ingest/orm/resource.py CHANGED Viewed

@@ -602,7 +602,7 @@ class Resource:
             FieldType.LINK,
             load=False,
         )
-        maybe_update_basic_thumbnail(self.basic, link_extracted_data.link_thumbnail)
+        maybe_update_basic_thumbnail(self.basic, link_extracted_data.link_thumbnail, self.kb.kbid)
         await field_link.set_link_extracted_data(link_extracted_data)
@@ -661,7 +661,7 @@ class Resource:
         # uri can change after extraction
         await field_file.set_file_extracted_data(file_extracted_data)
         maybe_update_basic_icon(self.basic, file_extracted_data.icon)
-        maybe_update_basic_thumbnail(self.basic, file_extracted_data.file_thumbnail)
+        maybe_update_basic_thumbnail(self.basic, file_extracted_data.file_thumbnail, self.kb.kbid)
         self.modified = True
     async def _should_update_resource_title_from_file_metadata(self) -> bool:
@@ -722,7 +722,9 @@ class Resource:
         )
         await field_obj.set_field_metadata(field_metadata)
-        maybe_update_basic_thumbnail(self.basic, field_metadata.metadata.metadata.thumbnail)
+        maybe_update_basic_thumbnail(
+            self.basic, field_metadata.metadata.metadata.thumbnail, self.kb.kbid
+        )
         update_basic_computedmetadata_classifications(self.basic, field_metadata)
         self.modified = True
@@ -879,13 +881,23 @@ def maybe_update_basic_icon(basic: PBBasic, mimetype: Optional[str]) -> bool:
     return True
-def maybe_update_basic_thumbnail(basic: PBBasic, thumbnail: Optional[CloudFile]) -> bool:
+def maybe_update_basic_thumbnail(basic: PBBasic, thumbnail: Optional[CloudFile], kbid: str) -> bool:
     if basic.thumbnail or thumbnail is None:
         return False
     basic.thumbnail = CloudLink.format_reader_download_uri(thumbnail.uri)
+    fix_kbid_in_thumbnail(basic, kbid)
     return True
+def fix_kbid_in_thumbnail(basic: PBBasic, kbid: str):
+    if basic.thumbnail.startswith("/kb/") and not basic.thumbnail.startswith(f"/kb/{kbid}/"):
+        # Replace the kbid in the thumbnail if it doesn't match the current kbid. This is necessary for
+        # resources that have been backed up and we are restoring them to a different kbid.
+        parts = basic.thumbnail.split("/", 3)
+        parts[2] = kbid
+        basic.thumbnail = "/".join(parts)
 def update_basic_languages(basic: Basic, languages: list[str]) -> bool:
     if len(languages) == 0:
         return False

nucliadb/ingest/processing.py CHANGED Viewed

@@ -25,7 +25,7 @@ import uuid
 from collections import defaultdict
 from contextlib import AsyncExitStack
 from enum import Enum
-from typing import Any, Optional, TypeVar
+from typing import Any, Optional
 import aiohttp
 import backoff
@@ -49,10 +49,14 @@ from nucliadb_utils.utilities import Utility, clean_utility, get_utility, set_ut
 logger = logging.getLogger(__name__)
-_T = TypeVar("_T")
+class ProcessingAPIUnavailableError(SendToProcessError): ...
-RETRIABLE_EXCEPTIONS = (aiohttp.client_exceptions.ClientConnectorError,)
+RETRIABLE_EXCEPTIONS = (
+    aiohttp.client_exceptions.ClientConnectorError,
+    ProcessingAPIUnavailableError,
+)
 MAX_TRIES = 4
@@ -409,6 +413,9 @@ class ProcessingEngine:
                 raise LimitsExceededError(resp.status, data["detail"])
             elif resp.status == 429:
                 raise LimitsExceededError(resp.status, "Rate limited")
+            elif resp.status in (502, 503):
+                logger.warning(f"Processing engine is not available, retrying. Status: {resp.status}")
+                raise ProcessingAPIUnavailableError()
             else:
                 error_text = await resp.text()
                 logger.warning(f"Error sending to process: {resp.status} {error_text}")

nucliadb/search/api/v1/ask.py CHANGED Viewed

@@ -59,6 +59,7 @@ async def ask_knowledgebox_endpoint(
     kbid: str,
     item: AskRequest,
     x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
+    x_show_consumption: bool = Header(default=False),
     x_nucliadb_user: str = Header(""),
     x_forwarded_for: str = Header(""),
     x_synchronous: bool = Header(
@@ -97,7 +98,13 @@ async def ask_knowledgebox_endpoint(
             return HTTPClientError(status_code=422, detail=detail)
     return await create_ask_response(
-        kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for, x_synchronous
+        kbid=kbid,
+        ask_request=item,
+        user_id=x_nucliadb_user,
+        client_type=x_ndb_client,
+        origin=x_forwarded_for,
+        x_synchronous=x_synchronous,
+        extra_predict_headers={"X-Show-Consumption": str(x_show_consumption).lower()},
     )
@@ -110,6 +117,7 @@ async def create_ask_response(
     origin: str,
     x_synchronous: bool,
     resource: Optional[str] = None,
+    extra_predict_headers: Optional[dict[str, str]] = None,
 ) -> Response:
     maybe_log_request_payload(kbid, "/ask", ask_request)
     ask_request.max_tokens = parse_max_tokens(ask_request.max_tokens)
@@ -122,6 +130,7 @@ async def create_ask_response(
                 client_type=client_type,
                 origin=origin,
                 resource=resource,
+                extra_predict_headers=extra_predict_headers,
             )
         except AnswerJsonSchemaTooLong as err:
             return HTTPClientError(status_code=400, detail=str(err))

nucliadb/search/api/v1/catalog.py CHANGED Viewed

@@ -199,6 +199,8 @@ async def catalog(
     response_model=CatalogFacetsResponse,
     response_model_exclude_unset=True,
     tags=["Search"],
+    summary="Show facets in Knowledge Box",
+    description="List all facets in a Knowledge Box and how many resources they apply to",
     include_in_schema=False,
 )
 @requires(NucliaDBRoles.READER)

nucliadb/search/api/v1/find.py CHANGED Viewed

@@ -133,7 +133,8 @@ async def find_knowledgebox(
     rank_fusion: RankFusionName = fastapi_query(SearchParamDefaults.rank_fusion),
     reranker: Union[RerankerName, Reranker] = fastapi_query(SearchParamDefaults.reranker),
     search_configuration: Optional[str] = Query(
-        default=None, description="Load find parameters from this configuration"
+        default=None,
+        description="Load find parameters from this configuration. Parameters in the request override parameters from the configuration.",
     ),
     x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
     x_nucliadb_user: str = Header(""),

nucliadb/search/api/v1/resource/ask.py CHANGED Viewed

@@ -48,6 +48,7 @@ async def resource_ask_endpoint_by_uuid(
     kbid: str,
     rid: str,
     item: AskRequest,
+    x_show_consumption: bool = Header(default=False),
     x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
     x_nucliadb_user: str = Header(""),
     x_forwarded_for: str = Header(""),
@@ -58,13 +59,14 @@ async def resource_ask_endpoint_by_uuid(
     ),
 ) -> Union[StreamingResponse, HTTPClientError, Response]:
     return await create_ask_response(
-        kbid,
-        item,
-        x_nucliadb_user,
-        x_ndb_client,
-        x_forwarded_for,
-        x_synchronous,
+        kbid=kbid,
+        ask_request=item,
+        user_id=x_nucliadb_user,
+        client_type=x_ndb_client,
+        origin=x_forwarded_for,
+        x_synchronous=x_synchronous,
         resource=rid,
+        extra_predict_headers={"X-Show-Consumption": str(x_show_consumption).lower()},
     )
@@ -83,6 +85,7 @@ async def resource_ask_endpoint_by_slug(
     kbid: str,
     slug: str,
     item: AskRequest,
+    x_show_consumption: bool = Header(default=False),
     x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
     x_nucliadb_user: str = Header(""),
     x_forwarded_for: str = Header(""),
@@ -96,11 +99,12 @@ async def resource_ask_endpoint_by_slug(
     if resource_id is None:
         return HTTPClientError(status_code=404, detail="Resource not found")
     return await create_ask_response(
-        kbid,
-        item,
-        x_nucliadb_user,
-        x_ndb_client,
-        x_forwarded_for,
-        x_synchronous,
+        kbid=kbid,
+        ask_request=item,
+        user_id=x_nucliadb_user,
+        client_type=x_ndb_client,
+        origin=x_forwarded_for,
+        x_synchronous=x_synchronous,
         resource=resource_id,
+        extra_predict_headers={"X-Show-Consumption": str(x_show_consumption).lower()},
     )

nucliadb/search/api/v1/summarize.py CHANGED Viewed

@@ -19,7 +19,7 @@
 #
 from typing import Union
-from fastapi import Request
+from fastapi import Header, Request
 from fastapi_versioning import version
 from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
@@ -47,9 +47,14 @@ async def summarize_endpoint(
     request: Request,
     kbid: str,
     item: SummarizeRequest,
+    x_show_consumption: bool = Header(default=False),
 ) -> Union[SummarizedResponse, HTTPClientError]:
     try:
-        return await summarize(kbid, item)
+        return await summarize(
+            kbid=kbid,
+            request=item,
+            extra_predict_headers={"X-Show-Consumption": str(x_show_consumption).lower()},
+        )
     except KnowledgeBoxNotFound:
         return HTTPClientError(status_code=404, detail="Knowledge box not found")
     except NoResourcesToSummarize:

nucliadb/search/predict.py CHANGED Viewed

@@ -293,7 +293,7 @@ class PredictEngine:
     @predict_observer.wrap({"type": "chat_ndjson"})
     async def chat_query_ndjson(
-        self, kbid: str, item: ChatModel
+        self, kbid: str, item: ChatModel, extra_headers: Optional[dict[str, str]] = None
     ) -> tuple[str, str, AsyncGenerator[GenerativeChunk, None]]:
         """
         Chat query using the new stream format
@@ -314,7 +314,7 @@ class PredictEngine:
             "POST",
             url=self.get_predict_url(CHAT, kbid),
             json=item.model_dump(),
-            headers=headers,
+            headers={**headers, **(extra_headers or {})},
             timeout=None,
         )
         await self.check_response(kbid, resp, expected_status=200)
@@ -396,7 +396,9 @@ class PredictEngine:
         return convert_relations(data)
     @predict_observer.wrap({"type": "summarize"})
-    async def summarize(self, kbid: str, item: SummarizeModel) -> SummarizedResponse:
+    async def summarize(
+        self, kbid: str, item: SummarizeModel, extra_headers: Optional[dict[str, str]] = None
+    ) -> SummarizedResponse:
         try:
             self.check_nua_key_is_configured_for_onprem()
         except NUAKeyMissingError:
@@ -407,7 +409,7 @@ class PredictEngine:
             "POST",
             url=self.get_predict_url(SUMMARIZE, kbid),
             json=item.model_dump(),
-            headers=self.get_predict_headers(kbid),
+            headers={**self.get_predict_headers(kbid), **(extra_headers or {})},
             timeout=None,
         )
         await self.check_response(kbid, resp, expected_status=200)
@@ -489,7 +491,7 @@ class DummyPredictEngine(PredictEngine):
         return RephraseResponse(rephrased_query=DUMMY_REPHRASE_QUERY, use_chat_history=None)
     async def chat_query_ndjson(
-        self, kbid: str, item: ChatModel
+        self, kbid: str, item: ChatModel, extra_headers: Optional[dict[str, str]] = None
     ) -> tuple[str, str, AsyncGenerator[GenerativeChunk, None]]:
         self.calls.append(("chat_query_ndjson", item))
@@ -559,7 +561,9 @@ class DummyPredictEngine(PredictEngine):
         else:
             return DUMMY_RELATION_NODE
-    async def summarize(self, kbid: str, item: SummarizeModel) -> SummarizedResponse:
+    async def summarize(
+        self, kbid: str, item: SummarizeModel, extra_headers: Optional[dict[str, str]] = None
+    ) -> SummarizedResponse:
         self.calls.append(("summarize", (kbid, item)))
         response = SummarizedResponse(
             summary="global summary",

nucliadb/search/search/chat/ask.py CHANGED Viewed

@@ -22,6 +22,7 @@ import functools
 import json
 from typing import AsyncGenerator, Optional, cast
+from nuclia_models.common.consumption import Consumption
 from nuclia_models.predict.generative_responses import (
     CitationsGenerativeResponse,
     GenerativeChunk,
@@ -83,6 +84,7 @@ from nucliadb_models.search import (
     ChatModel,
     ChatOptions,
     CitationsAskResponseItem,
+    ConsumptionResponseItem,
     DebugAskResponseItem,
     ErrorAskResponseItem,
     FindOptions,
@@ -106,6 +108,7 @@ from nucliadb_models.search import (
     StatusAskResponseItem,
     SyncAskMetadata,
     SyncAskResponse,
+    TokensDetail,
     UserPrompt,
     parse_custom_prompt,
     parse_rephrase_prompt,
@@ -169,6 +172,7 @@ class AskResult:
         self._citations: Optional[CitationsGenerativeResponse] = None
         self._metadata: Optional[MetaGenerativeResponse] = None
         self._relations: Optional[Relations] = None
+        self._consumption: Optional[Consumption] = None
     @property
     def status_code(self) -> AnswerStatusCode:
@@ -299,6 +303,20 @@ class AskResult:
                 ),
             )
+        if self._consumption is not None:
+            yield ConsumptionResponseItem(
+                normalized_tokens=TokensDetail(
+                    input=self._consumption.normalized_tokens.input,
+                    output=self._consumption.normalized_tokens.output,
+                    image=self._consumption.normalized_tokens.image,
+                ),
+                customer_key_tokens=TokensDetail(
+                    input=self._consumption.customer_key_tokens.input,
+                    output=self._consumption.customer_key_tokens.output,
+                    image=self._consumption.customer_key_tokens.image,
+                ),
+            )
         # Stream out the relations results
         should_query_relations = (
             self.ask_request_with_relations and self.status_code == AnswerStatusCode.SUCCESS
@@ -341,6 +359,7 @@ class AskResult:
                     generative_total=self._metadata.timings.get("generative"),
                 ),
             )
         citations = {}
         if self._citations is not None:
             citations = self._citations.citations
@@ -373,6 +392,7 @@ class AskResult:
             prequeries=prequeries_results,
             citations=citations,
             metadata=metadata,
+            consumption=self._consumption,
             learning_id=self.nuclia_learning_id or "",
             augmented_context=self.augmented_context,
         )
@@ -424,6 +444,8 @@ class AskResult:
                 self._citations = item
             elif isinstance(item, MetaGenerativeResponse):
                 self._metadata = item
+            elif isinstance(item, Consumption):
+                self._consumption = item
             else:
                 logger.warning(
                     f"Unexpected item in predict answer stream: {item}",
@@ -486,6 +508,7 @@ async def ask(
     client_type: NucliaDBClientType,
     origin: str,
     resource: Optional[str] = None,
+    extra_predict_headers: Optional[dict[str, str]] = None,
 ) -> AskResult:
     metrics = AskMetrics()
     chat_history = ask_request.chat_history or []
@@ -613,7 +636,9 @@ async def ask(
                 nuclia_learning_id,
                 nuclia_learning_model,
                 predict_answer_stream,
-            ) = await predict.chat_query_ndjson(kbid, chat_model)
+            ) = await predict.chat_query_ndjson(
+                kbid=kbid, item=chat_model, extra_headers=extra_predict_headers
+            )
     auditor = ChatAuditor(
         kbid=kbid,

nucliadb/search/search/chat/prompt.py CHANGED Viewed

@@ -69,6 +69,7 @@ from nucliadb_models.search import (
     RagStrategyName,
     TableImageStrategy,
     TextBlockAugmentationType,
+    TextPosition,
 )
 from nucliadb_protos import resources_pb2
 from nucliadb_protos.resources_pb2 import ExtractedText, FieldComputedMetadata
@@ -107,6 +108,9 @@ class CappedPromptContext:
     def __getitem__(self, key: str) -> str:
         return self.output.__getitem__(key)
+    def __contains__(self, key: str) -> bool:
+        return key in self.output
     def __delitem__(self, key: str) -> None:
         try:
             self.output.__delitem__(key)
@@ -395,7 +399,10 @@ def parse_text_block_id(text_block_id: str) -> TextBlockId:
 async def extend_prompt_context_with_origin_metadata(
-    context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
+    context: CappedPromptContext,
+    kbid,
+    text_block_ids: list[TextBlockId],
+    augmented_context: AugmentedContext,
 ):
     async def _get_origin(kbid: str, rid: str) -> tuple[str, Optional[Origin]]:
         origin = None
@@ -411,7 +418,7 @@ async def extend_prompt_context_with_origin_metadata(
     rid_to_origin = {rid: origin for rid, origin in origins if origin is not None}
     for tb_id in text_block_ids:
         origin = rid_to_origin.get(tb_id.rid)
-        if origin is not None and tb_id.full() in context.output:
+        if origin is not None and tb_id.full() in context:
             text = context.output.pop(tb_id.full())
             extended_text = text + f"\n\nDOCUMENT METADATA AT ORIGIN:\n{to_yaml(origin)}"
             context[tb_id.full()] = extended_text
@@ -424,7 +431,10 @@ async def extend_prompt_context_with_origin_metadata(
 async def extend_prompt_context_with_classification_labels(
-    context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
+    context: CappedPromptContext,
+    kbid: str,
+    text_block_ids: list[TextBlockId],
+    augmented_context: AugmentedContext,
 ):
     async def _get_labels(kbid: str, _id: TextBlockId) -> tuple[TextBlockId, list[tuple[str, str]]]:
         fid = _id if isinstance(_id, FieldId) else _id.field_id
@@ -449,7 +459,7 @@ async def extend_prompt_context_with_classification_labels(
     tb_id_to_labels = {tb_id: labels for tb_id, labels in classif_labels if len(labels) > 0}
     for tb_id in text_block_ids:
         labels = tb_id_to_labels.get(tb_id)
-        if labels is not None and tb_id.full() in context.output:
+        if labels is not None and tb_id.full() in context:
             text = context.output.pop(tb_id.full())
             labels_text = "DOCUMENT CLASSIFICATION LABELS:"
@@ -467,7 +477,10 @@ async def extend_prompt_context_with_classification_labels(
 async def extend_prompt_context_with_ner(
-    context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
+    context: CappedPromptContext,
+    kbid: str,
+    text_block_ids: list[TextBlockId],
+    augmented_context: AugmentedContext,
 ):
     async def _get_ners(kbid: str, _id: TextBlockId) -> tuple[TextBlockId, dict[str, set[str]]]:
         fid = _id if isinstance(_id, FieldId) else _id.field_id
@@ -494,7 +507,7 @@ async def extend_prompt_context_with_ner(
     tb_id_to_ners = {tb_id: ners for tb_id, ners in nerss if len(ners) > 0}
     for tb_id in text_block_ids:
         ners = tb_id_to_ners.get(tb_id)
-        if ners is not None and tb_id.full() in context.output:
+        if ners is not None and tb_id.full() in context:
             text = context.output.pop(tb_id.full())
             ners_text = "DOCUMENT NAMED ENTITIES (NERs):"
@@ -515,7 +528,10 @@ async def extend_prompt_context_with_ner(
 async def extend_prompt_context_with_extra_metadata(
-    context, kbid, text_block_ids: list[TextBlockId], augmented_context: AugmentedContext
+    context: CappedPromptContext,
+    kbid: str,
+    text_block_ids: list[TextBlockId],
+    augmented_context: AugmentedContext,
 ):
     async def _get_extra(kbid: str, rid: str) -> tuple[str, Optional[Extra]]:
         extra = None
@@ -531,7 +547,7 @@ async def extend_prompt_context_with_extra_metadata(
     rid_to_extra = {rid: extra for rid, extra in extras if extra is not None}
     for tb_id in text_block_ids:
         extra = rid_to_extra.get(tb_id.rid)
-        if extra is not None and tb_id.full() in context.output:
+        if extra is not None and tb_id.full() in context:
             text = context.output.pop(tb_id.full())
             extended_text = text + f"\n\nDOCUMENT EXTRA METADATA:\n{to_yaml(extra)}"
             context[tb_id.full()] = extended_text
@@ -600,7 +616,7 @@ async def field_extension_prompt_context(
             if tb_id.startswith(field.full()):
                 del context[tb_id]
         # Add the extracted text of each field to the beginning of the context.
-        if field.full() not in context.output:
+        if field.full() not in context:
             context[field.full()] = extracted_text
             augmented_context.fields[field.full()] = AugmentedTextBlock(
                 id=field.full(),
@@ -610,7 +626,7 @@ async def field_extension_prompt_context(
     # Add the extracted text of each paragraph to the end of the context.
     for paragraph in ordered_paragraphs:
-        if paragraph.id not in context.output:
+        if paragraph.id not in context:
             context[paragraph.id] = _clean_paragraph_text(paragraph)
@@ -668,7 +684,7 @@ async def neighbouring_paragraphs_prompt_context(
         if field_extracted_text is None:
             continue
         ptext = _get_paragraph_text(field_extracted_text, pid)
-        if ptext:
+        if ptext and pid.full() not in context:
             context[pid.full()] = ptext
         # Now add the neighbouring paragraphs
@@ -702,8 +718,8 @@ async def neighbouring_paragraphs_prompt_context(
                 npid = field_pids[neighbour_index]
             except IndexError:
                 continue
-            if npid in retrieved_paragraphs_ids or npid.full() in context.output:
-                # Already added above
+            if npid in retrieved_paragraphs_ids or npid.full() in context:
+                # Already added
                 continue
             ptext = _get_paragraph_text(field_extracted_text, npid)
             if not ptext:
@@ -712,6 +728,7 @@ async def neighbouring_paragraphs_prompt_context(
             augmented_context.paragraphs[npid.full()] = AugmentedTextBlock(
                 id=npid.full(),
                 text=ptext,
+                position=get_text_position(npid, neighbour_index, field_extracted_metadata),
                 parent=pid.full(),
                 augmentation_type=TextBlockAugmentationType.NEIGHBOURING_PARAGRAPHS,
             )
@@ -719,6 +736,30 @@ async def neighbouring_paragraphs_prompt_context(
     metrics.set("neighbouring_paragraphs_ops", len(augmented_context.paragraphs))
+def get_text_position(
+    paragraph_id: ParagraphId, index: int, field_metadata: FieldComputedMetadata
+) -> Optional[TextPosition]:
+    if paragraph_id.field_id.subfield_id:
+        metadata = field_metadata.split_metadata[paragraph_id.field_id.subfield_id]
+    else:
+        metadata = field_metadata.metadata
+    try:
+        pmetadata = metadata.paragraphs[index]
+    except IndexError:
+        return None
+    page_number = None
+    if pmetadata.HasField("page"):
+        page_number = pmetadata.page.page
+    return TextPosition(
+        page_number=page_number,
+        index=index,
+        start=pmetadata.start,
+        end=pmetadata.end,
+        start_seconds=list(pmetadata.start_seconds),
+        end_seconds=list(pmetadata.end_seconds),
+    )
 def get_neighbouring_indices(
     index: int, before: int, after: int, field_pids: list[ParagraphId]
 ) -> list[int]:
@@ -742,7 +783,8 @@ async def conversation_prompt_context(
         storage = await get_storage()
         kb = KnowledgeBoxORM(txn, storage, kbid)
         for paragraph in ordered_paragraphs:
-            context[paragraph.id] = _clean_paragraph_text(paragraph)
+            if paragraph.id not in context:
+                context[paragraph.id] = _clean_paragraph_text(paragraph)
             # If the paragraph is a conversation and it matches semantically, we assume we
             # have matched with the question, therefore try to include the answer to the
@@ -780,7 +822,7 @@ async def conversation_prompt_context(
                                 text = message.content.text.strip()
                             pid = f"{rid}/{field_type}/{field_id}/{ident}/0-{len(text) + 1}"
                             attachments.extend(message.content.attachments_fields)
-                            if pid in context.output:
+                            if pid in context:
                                 continue
                             context[pid] = text
                             augmented_context.paragraphs[pid] = AugmentedTextBlock(
@@ -802,7 +844,7 @@ async def conversation_prompt_context(
                             text = message.content.text.strip()
                         attachments.extend(message.content.attachments_fields)
                         pid = f"{rid}/{field_type}/{field_id}/{ident}/0-{len(text) + 1}"
-                        if pid in context.output:
+                        if pid in context:
                             continue
                         context[pid] = text
                         augmented_context.paragraphs[pid] = AugmentedTextBlock(
@@ -834,7 +876,7 @@ async def conversation_prompt_context(
                         text = message.content.text.strip()
                         attachments.extend(message.content.attachments_fields)
                         pid = f"{rid}/{field_type}/{field_id}/{message.ident}/0-{len(message.content.text) + 1}"
-                        if pid in context.output:
+                        if pid in context:
                             continue
                         context[pid] = text
                         augmented_context.paragraphs[pid] = AugmentedTextBlock(
@@ -854,7 +896,7 @@ async def conversation_prompt_context(
                         extracted_text = await field.get_extracted_text()
                         if extracted_text is not None:
                             pid = f"{rid}/{field_type}/{attachment.field_id}/0-{len(extracted_text.text) + 1}"
-                            if pid in context.output:
+                            if pid in context:
                                 continue
                             text = f"Attachment {attachment.field_id}: {extracted_text.text}\n\n"
                             context[pid] = text
@@ -977,9 +1019,9 @@ async def hierarchy_prompt_context(
         paragraph_text = _clean_paragraph_text(paragraph)
         context[paragraph.id] = paragraph_text
         if paragraph.id in augmented_paragraphs:
-            field_id = ParagraphId.from_string(paragraph.id).field_id.full()
-            augmented_context.fields[field_id] = AugmentedTextBlock(
-                id=field_id, text=paragraph_text, augmentation_type=TextBlockAugmentationType.HIERARCHY
+            pid = ParagraphId.from_string(paragraph.id)
+            augmented_context.paragraphs[pid.full()] = AugmentedTextBlock(
+                id=pid.full(), text=paragraph_text, augmentation_type=TextBlockAugmentationType.HIERARCHY
             )
     return

nucliadb/search/search/pgcatalog.py CHANGED Viewed

@@ -168,7 +168,7 @@ def _prepare_query_search(query: search_models.CatalogQuery, params: dict[str, A
         # executed per query is not a problem.
         # Remove zero-length words from the split
-        params["query"] = [word for word in SPLIT_REGEX.split(query.query) if word]
+        params["query"] = [word.lower() for word in SPLIT_REGEX.split(query.query) if word]
         return sql.SQL("regexp_split_to_array(lower(title), '\\W') @> %(query)s")
     elif query.match == search_models.CatalogQueryMatch.Fuzzy:
         params["query"] = query.query

nucliadb/search/search/predict_proxy.py CHANGED Viewed

@@ -19,7 +19,7 @@
 #
 import json
 from enum import Enum
-from typing import Any, AsyncIterable, Optional, Union
+from typing import Any, Optional, Union
 import aiohttp
 from fastapi.datastructures import QueryParams
@@ -63,6 +63,7 @@ class PredictProxiedEndpoints(str, Enum):
 ALLOWED_HEADERS = [
     "Accept",  # To allow 'application/x-ndjson' on the /chat endpoint
+    "X-show-consumption",  # To show token consumption in the response
 ]
 PREDICT_ANSWER_METRIC = "predict_answer_proxy_metric"
@@ -171,21 +172,13 @@ async def chat_streaming_generator(
     user_query: str,
     is_json: bool,
 ):
-    stream: AsyncIterable[bytes]
-    if is_json:
-        # ndjson: stream lines
-        stream = predict_response.content
-    else:
-        # plain text: stream chunks (last chunk is status)
-        stream = predict_response.content.iter_any()
     first = True
     status_code = AnswerStatusCode.ERROR.value
     text_answer = ""
     json_object = None
     metrics = AskMetrics()
     with metrics.time(PREDICT_ANSWER_METRIC):
-        async for chunk in stream:
+        async for chunk in predict_response.content:
             if first:
                 metrics.record_first_chunk_yielded()
                 first = False
@@ -211,7 +204,11 @@ async def chat_streaming_generator(
     if is_json is False and chunk:  # Ensure chunk is not empty before decoding
         # If response is text the status_code comes at the last chunk of data
-        status_code = chunk.decode()
+        last_chunk = chunk.decode()
+        if last_chunk[-1] == "0":
+            status_code = "0"
+        else:
+            status_code = last_chunk[-2:]
     audit_predict_proxy_endpoint(
         headers=predict_response.headers,

nucliadb/search/search/query_parser/old_filters.py CHANGED Viewed

@@ -212,7 +212,7 @@ def split_labels(
     else:
         paragraph_expr = FilterExpression()
         filter_list = getattr(paragraph_expr, combinator)
-        filter_list.extend(paragraph)
+        filter_list.operands.extend(paragraph)
     return field_expr, paragraph_expr

nucliadb/search/search/summarize.py CHANGED Viewed

@@ -45,7 +45,9 @@ class NoResourcesToSummarize(Exception):
     pass
-async def summarize(kbid: str, request: SummarizeRequest) -> SummarizedResponse:
+async def summarize(
+    kbid: str, request: SummarizeRequest, extra_predict_headers: Optional[dict[str, str]]
+) -> SummarizedResponse:
     predict_request = SummarizeModel()
     predict_request.generative_model = request.generative_model
     predict_request.user_prompt = request.user_prompt
@@ -62,7 +64,7 @@ async def summarize(kbid: str, request: SummarizeRequest) -> SummarizedResponse:
         raise NoResourcesToSummarize()
     predict = get_predict()
-    return await predict.summarize(kbid, predict_request)
+    return await predict.summarize(kbid=kbid, item=predict_request, extra_headers=extra_predict_headers)
 async def get_extracted_texts(kbid: str, resource_uuids_or_slugs: list[str]) -> ExtractedTexts:

{nucliadb-6.5.1.post4539.dist-info → nucliadb-6.6.1.post649.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nucliadb
-Version: 6.5.1.post4539
+Version: 6.6.1.post649
 Summary: NucliaDB
 Author-email: Nuclia <nucliadb@nuclia.com>
 License-Expression: AGPL-3.0-or-later
@@ -19,13 +19,13 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: <4,>=3.9
 Description-Content-Type: text/markdown
-Requires-Dist: nucliadb-telemetry[all]>=6.5.1.post4539
-Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.5.1.post4539
-Requires-Dist: nucliadb-protos>=6.5.1.post4539
-Requires-Dist: nucliadb-models>=6.5.1.post4539
-Requires-Dist: nidx-protos>=6.5.1.post4539
+Requires-Dist: nucliadb-telemetry[all]>=6.6.1.post649
+Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.6.1.post649
+Requires-Dist: nucliadb-protos>=6.6.1.post649
+Requires-Dist: nucliadb-models>=6.6.1.post649
+Requires-Dist: nidx-protos>=6.6.1.post649
 Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
-Requires-Dist: nuclia-models>=0.24.2
+Requires-Dist: nuclia-models>=0.43.0
 Requires-Dist: uvicorn[standard]
 Requires-Dist: argdantic
 Requires-Dist: aiohttp>=3.11.11

{nucliadb-6.5.1.post4539.dist-info → nucliadb-6.6.1.post649.dist-info}/RECORD RENAMED Viewed

@@ -33,6 +33,7 @@ migrations/0034_rollover_nidx_texts_3.py,sha256=t19QtWUgHxmTaBPoR1DooAby2IYmkLTQ
 migrations/0035_rollover_nidx_texts_4.py,sha256=W0_AUd01pjMpYMDC3yqF6HzDLgcnnPprL80kfyb1WZI,1187
 migrations/0036_backfill_catalog_slug.py,sha256=mizRM-HfPswKq4iEmqofu4kIT6Gd97ruT3qhb257vZk,2954
 migrations/0037_backfill_catalog_facets.py,sha256=KAf3VKbKePw7ykDnJi47LyJ7pK1JwYkwMxrsXUnbt9g,2788
+migrations/0038_backfill_catalog_field_labels.py,sha256=EKJwJfU0p1nDq7s71CpGhaX4t1iD2d1ZCzTmLcUAhDs,3382
 migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 migrations/pg/0001_bootstrap.py,sha256=3O_P17l0d0h48nebN6VQLXzM_B7S7zvDpaLR0koVgWE,1274
 migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
@@ -132,7 +133,7 @@ nucliadb/export_import/utils.py,sha256=XV3tJJdhgnVJRSj8AxZjgeipONtB107M185HVJmHp
 nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
 nucliadb/ingest/app.py,sha256=qiPad2eWgudRdLq0tB0MQZOxOezXO7QBK_ZpPNKQZO0,7378
 nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
-nucliadb/ingest/processing.py,sha256=QmkHq-BU4vub7JRWe9VHvQ2DcAmT6-CzgFXuZxXhcBU,20953
+nucliadb/ingest/processing.py,sha256=gAm591llkscMq0abhxQmpChDZIzto-76Dni4f7Flhfw,21229
 nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nucliadb/ingest/serialize.py,sha256=-TIjibJTbMqAowzRvyrG3R209vKqBZqXpdrQL9Dq4lo,16135
 nucliadb/ingest/settings.py,sha256=5qJICxwYb028a2iAhVbxOJB5X-hWtDLtiya-YhWostw,3179
@@ -162,12 +163,12 @@ nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmB
 nucliadb/ingest/orm/index_message.py,sha256=DWMTHJoVamUbK8opKl5csDvxfgz7c2j7phG1Ut4yIxk,15724
 nucliadb/ingest/orm/knowledgebox.py,sha256=_rkeTMIXMhR64gbYtZpFHoUHghV2DTJ2lUBqZsoqC_4,23898
 nucliadb/ingest/orm/metrics.py,sha256=OiuggTh-n3kZHA2G73NEUdIlh8c3yFrbusI88DK-Mko,1273
-nucliadb/ingest/orm/resource.py,sha256=OZEdoaaP56VaybuAbUHexGRMmM9C8-S0340jIHqamcQ,37177
+nucliadb/ingest/orm/resource.py,sha256=yB0HWC3jc_1b-zXu-3FJCKOdAPPSb1aRBHpbZhsvyQk,37749
 nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,2693
 nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
 nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
 nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
-nucliadb/ingest/orm/processor/pgcatalog.py,sha256=GpzQv0_iWTHbM90J0rAz_QIh_TMv1XbghyDgs8tk_8M,4014
+nucliadb/ingest/orm/processor/pgcatalog.py,sha256=VPQ_Evme7xmmGoQ45zt0Am0yPkaD4hxN1r5rEaVt6s8,4633
 nucliadb/ingest/orm/processor/processor.py,sha256=jaEBwbv--WyoC8zcdxWAyF0dAzVA5crVDJl56Bqv1eI,31444
 nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
 nucliadb/ingest/service/__init__.py,sha256=LHQFUkdmNBOWqBG0Md9sMMI7g5TQZ-hLAnhw6ZblrJg,2002
@@ -212,7 +213,7 @@ nucliadb/search/__init__.py,sha256=tnypbqcH4nBHbGpkINudhKgdLKpwXQCvDtPchUlsyY4,1
 nucliadb/search/app.py,sha256=-WEX1AZRA8R_9aeOo9ovOTwjXW_7VfwWN7N2ccSoqXg,3387
 nucliadb/search/lifecycle.py,sha256=hiylV-lxsAWkqTCulXBg0EIfMQdejSr8Zar0L_GLFT8,2218
 nucliadb/search/openapi.py,sha256=t3Wo_4baTrfPftg2BHsyLWNZ1MYn7ZRdW7ht-wFOgRs,1016
-nucliadb/search/predict.py,sha256=__0qwIU2CIRYRTYsbG9zZEjXXrxNe8puZWYJIyOT6dg,23492
+nucliadb/search/predict.py,sha256=xZtZaydg1pzXOSEDg0xyWNbbgA4zMQ59gbHi0wNuAxk,23770
 nucliadb/search/predict_models.py,sha256=pm4ykuWH9bTXxj5RlI2F6pmXSXOVt64WL_sRlc2u6Tk,6144
 nucliadb/search/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nucliadb/search/run.py,sha256=aFb-CXRi_C8YMpP_ivNj8KW1BYhADj88y8K9Lr_nUPI,1402
@@ -220,20 +221,20 @@ nucliadb/search/settings.py,sha256=vem3EcyYlTPSim0kEK-xe-erF4BZg0CT_LAb8ZRQAE8,1
 nucliadb/search/utilities.py,sha256=9SsRDw0rJVXVoLBfF7rBb6q080h-thZc7u8uRcTiBeY,1037
 nucliadb/search/api/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/search/api/v1/__init__.py,sha256=DH16OYnw9jQ38OpKlmdXeoq2j40ZPXZRtGvClKOkMhw,1239
-nucliadb/search/api/v1/ask.py,sha256=b4tz33HNsfT5DXv_2DMc_jirnFsHuobreWkbAKkzj5o,5337
-nucliadb/search/api/v1/catalog.py,sha256=aBNhgg-8Dj4kiB9IYshe46ph1FaeaPCjyxnNPgUu3AQ,7942
+nucliadb/search/api/v1/ask.py,sha256=hZUnk1opZuXp1IwTiingSatlUefg2CZ9r_Z9sUwZMaU,5698
+nucliadb/search/api/v1/catalog.py,sha256=5ZY3d8sVia1traUxVS0Q4aQJmgcOuXzbxis_uY4ulE4,8077
 nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
-nucliadb/search/api/v1/find.py,sha256=iMjyq4y0JOMC_x1B8kUfVdkCoc9G9Ark58kPLLY4HDw,10824
+nucliadb/search/api/v1/find.py,sha256=j6mxEyxjlLnZSqCT_N2LmOJlytsm1vkY4KFFmJRrtP8,10904
 nucliadb/search/api/v1/graph.py,sha256=gthqxCOn9biE6D6s93jRGLglk0ono8U7OyS390kWiI8,4178
 nucliadb/search/api/v1/knowledgebox.py,sha256=e9xeLPUqnQTx33i4A8xuV93ENvtJGrpjPlLRbGJtAI8,8415
 nucliadb/search/api/v1/predict_proxy.py,sha256=TnXKAqf_Go-9QVi6L5z4cXjnuNRe7XLJjF5QH_uwA1I,3504
 nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
 nucliadb/search/api/v1/search.py,sha256=eqlrvRE7IlMpunNwD1RJwt6RgMV01sIDJLgxxE7CFcE,12297
 nucliadb/search/api/v1/suggest.py,sha256=gaJE60r8-z6TVO05mQRKBITwXn2_ofM3B4-OtpOgZEk,6343
-nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
+nucliadb/search/api/v1/summarize.py,sha256=eJzgFJWUO80STx3lHc_0h9RZVaBCWF196nZUecfmqbE,2700
 nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
 nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
-nucliadb/search/api/v1/resource/ask.py,sha256=nsVzBSanSSlf0Ody6LSTjdEy75Vg283_YhbkAtWEjh8,3637
+nucliadb/search/api/v1/resource/ask.py,sha256=PlOXa17lnmj3KA9bARNfDqvnx7Pe9OTnwz-OwgGTUjU,4035
 nucliadb/search/api/v1/resource/ingestion_agents.py,sha256=AZ5_cH1jbf7d5wh_gz6EHLEKAzEOMrQZwEZAu1Q_3FE,4846
 nucliadb/search/api/v1/resource/search.py,sha256=PZR7fs5oYD0RKqKoD38NZMAnOJzBv35NB2YOr2xy1ck,4923
 nucliadb/search/api/v1/resource/utils.py,sha256=-NjZqAQtFEXKpIh8ui5S26ItnJ5rzmmG0BHxGSS9QPw,1141
@@ -254,25 +255,25 @@ nucliadb/search/search/ingestion_agents.py,sha256=IK6yOPEF9rST_uoqspdVdPk0pldjDh
 nucliadb/search/search/merge.py,sha256=XiRBsxhYPshPV7lZXD-9E259KZOPIf4I2tKosY0lPo4,22470
 nucliadb/search/search/metrics.py,sha256=3I6IN0qDSmqIvUaWJmT3rt-Jyjs6LcvnKI8ZqCiuJPY,3501
 nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
-nucliadb/search/search/pgcatalog.py,sha256=_AiyW6it66UX6BsZbM3-230IQhiEG4utoKYboviyOFI,16799
-nucliadb/search/search/predict_proxy.py,sha256=Q12I3VIAQqFgzBe9UeVEiAjUAdVT8NBfNDXWiP-pn1M,8858
+nucliadb/search/search/pgcatalog.py,sha256=0n_gDihZZhqrDLRHvHzS3IESvMRTcU6YShqizQMyE_Y,16807
+nucliadb/search/search/predict_proxy.py,sha256=Df8F5K-oS4TIXJc_y8UDViJTo7st5L0kMgxYPFZ39Vk,8806
 nucliadb/search/search/query.py,sha256=0qIQdt548L3jtKOyKo06aGJ73SLBxAW3N38_Hc1M3Uw,11528
 nucliadb/search/search/rank_fusion.py,sha256=xZtXhbmKb_56gs73u6KkFm2efvTATOSMmpOV2wrAIqE,9613
 nucliadb/search/search/rerankers.py,sha256=E2J1QdKAojqbhHM3KAyaOXKf6tJyETUxKs4tf_BEyqk,7472
 nucliadb/search/search/shards.py,sha256=mc5DK-MoCv9AFhlXlOFHbPvetcyNDzTFOJ5rimK8PC8,2636
-nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
+nucliadb/search/search/summarize.py,sha256=S4-mUS8d-rvHFcsr8Pa8N5NTxU6ZTxLFZTMKTTOOpr4,5098
 nucliadb/search/search/utils.py,sha256=ajRIXfdTF67dBVahQCXW-rSv6gJpUMPt3QhJrWqArTQ,2175
 nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
-nucliadb/search/search/chat/ask.py,sha256=0sgfiCbNaCZrTvYaRGtf5xL6VnzRgzofINiEP4IvhWs,38278
+nucliadb/search/search/chat/ask.py,sha256=vJ3TSdr-cT_xh43UnoYugqxnHv_-LFSCYoU7o0NnI1M,39368
 nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
 nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
-nucliadb/search/search/chat/prompt.py,sha256=SNsCtB9mZTODjnUMAH8YfPxn05Kjl2d5xTIteNxyVcI,52783
+nucliadb/search/search/chat/prompt.py,sha256=gmYRC3aK03vrDoBElJP5H5Z7OEeu79k5yTxv3FEkN0I,53866
 nucliadb/search/search/chat/query.py,sha256=3jMPNbiFEOoS0ydMOPYkSx1qVlvAv51npzadWXDwkMs,16650
 nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb/search/search/query_parser/exceptions.py,sha256=sVl9gRNzhE-s480LBBVkiXzNRbKhYRQN5F3it5tNNp8,939
 nucliadb/search/search/query_parser/fetcher.py,sha256=nP4EySj2BvH10QgCvgzvp13Nf22wwfHsdLbDoPlH2cQ,16831
 nucliadb/search/search/query_parser/models.py,sha256=kAslqX_-zaIdUpcpdNU2a5uQPQh7LC605qWLZ4aZ5T4,5064
-nucliadb/search/search/query_parser/old_filters.py,sha256=HircRqYEac_90bNCtFIJZ2RKA90kjbpNOQcp_ArBqR0,9083
+nucliadb/search/search/query_parser/old_filters.py,sha256=GsU3T3-WiSPvjucP7evHkshzAWZOli8qsuXChvWRCY0,9092
 nucliadb/search/search/query_parser/parsers/__init__.py,sha256=ySCNSdbesLXGZyR88919njulA6UE10_3PhqMG_Yj1o4,1034
 nucliadb/search/search/query_parser/parsers/ask.py,sha256=eTz8wS-EJHuAagR384h6TT64itymFZRpfZJGX8r6aZM,2771
 nucliadb/search/search/query_parser/parsers/catalog.py,sha256=JuDiBL2wdjAuEFEPo0e2nQ4VqWjF3FXakT0ziZk3Oes,7495
@@ -375,8 +376,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
 nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
 nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
 nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
-nucliadb-6.5.1.post4539.dist-info/METADATA,sha256=fB-dfmL9TqpjyMPOiHEu-EIwQLqH-a_j3bl3eTsjbFc,4158
-nucliadb-6.5.1.post4539.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-nucliadb-6.5.1.post4539.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
-nucliadb-6.5.1.post4539.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
-nucliadb-6.5.1.post4539.dist-info/RECORD,,
+nucliadb-6.6.1.post649.dist-info/METADATA,sha256=_peNGuFRZE9h5r-n-Aglwzr-hlIijYTLzMDBF5BWsss,4152
+nucliadb-6.6.1.post649.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+nucliadb-6.6.1.post649.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
+nucliadb-6.6.1.post649.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
+nucliadb-6.6.1.post649.dist-info/RECORD,,

{nucliadb-6.5.1.post4539.dist-info → nucliadb-6.6.1.post649.dist-info}/WHEEL RENAMED Viewed

File without changes

{nucliadb-6.5.1.post4539.dist-info → nucliadb-6.6.1.post649.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nucliadb-6.5.1.post4539.dist-info → nucliadb-6.6.1.post649.dist-info}/top_level.txt RENAMED Viewed

File without changes

nucliadb 6.5.1.post4539__py3-none-any.whl → 6.6.1.post649__py3-none-any.whl

nucliadb 6.5.1.post4539py3-none-any.whl → 6.6.1.post649py3-none-any.whl