PyPI - nucliadb - Versions diffs - 6.2.0.post2675__py3-none-any.whl → 6.2.1__py3-none-any.whl - Mend

nucliadb 6.2.0.post2675py3-none-any.whl → 6.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

migrations/0028_extracted_vectors_reference.py +61 -0
migrations/0029_backfill_field_status.py +149 -0
migrations/0030_label_deduplication.py +60 -0
nucliadb/common/cluster/manager.py +41 -331
nucliadb/common/cluster/rebalance.py +2 -2
nucliadb/common/cluster/rollover.py +12 -71
nucliadb/common/cluster/settings.py +3 -0
nucliadb/common/cluster/standalone/utils.py +0 -43
nucliadb/common/cluster/utils.py +0 -16
nucliadb/common/counters.py +1 -0
nucliadb/common/datamanagers/fields.py +48 -7
nucliadb/common/datamanagers/vectorsets.py +11 -2
nucliadb/common/external_index_providers/base.py +2 -1
nucliadb/common/external_index_providers/pinecone.py +3 -5
nucliadb/common/ids.py +18 -4
nucliadb/common/models_utils/from_proto.py +479 -0
nucliadb/common/models_utils/to_proto.py +60 -0
nucliadb/common/nidx.py +76 -37
nucliadb/export_import/models.py +3 -3
nucliadb/health.py +0 -7
nucliadb/ingest/app.py +0 -8
nucliadb/ingest/consumer/auditing.py +1 -1
nucliadb/ingest/consumer/shard_creator.py +1 -1
nucliadb/ingest/fields/base.py +83 -21
nucliadb/ingest/orm/brain.py +55 -56
nucliadb/ingest/orm/broker_message.py +12 -2
nucliadb/ingest/orm/entities.py +6 -17
nucliadb/ingest/orm/knowledgebox.py +44 -22
nucliadb/ingest/orm/processor/data_augmentation.py +7 -29
nucliadb/ingest/orm/processor/processor.py +5 -2
nucliadb/ingest/orm/resource.py +222 -413
nucliadb/ingest/processing.py +8 -2
nucliadb/ingest/serialize.py +77 -46
nucliadb/ingest/service/writer.py +2 -56
nucliadb/ingest/settings.py +1 -4
nucliadb/learning_proxy.py +6 -4
nucliadb/purge/__init__.py +102 -12
nucliadb/purge/orphan_shards.py +6 -4
nucliadb/reader/api/models.py +3 -3
nucliadb/reader/api/v1/__init__.py +1 -0
nucliadb/reader/api/v1/download.py +2 -2
nucliadb/reader/api/v1/knowledgebox.py +3 -3
nucliadb/reader/api/v1/resource.py +23 -12
nucliadb/reader/api/v1/services.py +4 -4
nucliadb/reader/api/v1/vectorsets.py +48 -0
nucliadb/search/api/v1/ask.py +11 -1
nucliadb/search/api/v1/feedback.py +3 -3
nucliadb/search/api/v1/knowledgebox.py +8 -13
nucliadb/search/api/v1/search.py +3 -2
nucliadb/search/api/v1/suggest.py +0 -2
nucliadb/search/predict.py +6 -4
nucliadb/search/requesters/utils.py +1 -2
nucliadb/search/search/chat/ask.py +77 -13
nucliadb/search/search/chat/prompt.py +16 -5
nucliadb/search/search/chat/query.py +74 -34
nucliadb/search/search/exceptions.py +2 -7
nucliadb/search/search/find.py +9 -5
nucliadb/search/search/find_merge.py +10 -4
nucliadb/search/search/graph_strategy.py +884 -0
nucliadb/search/search/hydrator.py +6 -0
nucliadb/search/search/merge.py +79 -24
nucliadb/search/search/query.py +74 -245
nucliadb/search/search/query_parser/exceptions.py +11 -1
nucliadb/search/search/query_parser/fetcher.py +405 -0
nucliadb/search/search/query_parser/models.py +0 -3
nucliadb/search/search/query_parser/parser.py +22 -21
nucliadb/search/search/rerankers.py +1 -42
nucliadb/search/search/shards.py +19 -0
nucliadb/standalone/api_router.py +2 -14
nucliadb/standalone/settings.py +4 -0
nucliadb/train/generators/field_streaming.py +7 -3
nucliadb/train/lifecycle.py +3 -6
nucliadb/train/nodes.py +14 -12
nucliadb/train/resource.py +380 -0
nucliadb/writer/api/constants.py +20 -16
nucliadb/writer/api/v1/__init__.py +1 -0
nucliadb/writer/api/v1/export_import.py +1 -1
nucliadb/writer/api/v1/field.py +13 -7
nucliadb/writer/api/v1/knowledgebox.py +3 -46
nucliadb/writer/api/v1/resource.py +20 -13
nucliadb/writer/api/v1/services.py +10 -1
nucliadb/writer/api/v1/upload.py +61 -34
nucliadb/writer/{vectorsets.py → api/v1/vectorsets.py} +99 -47
nucliadb/writer/back_pressure.py +17 -46
nucliadb/writer/resource/basic.py +9 -7
nucliadb/writer/resource/field.py +42 -9
nucliadb/writer/settings.py +2 -2
nucliadb/writer/tus/gcs.py +11 -10
{nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/METADATA +11 -14
{nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/RECORD +94 -96
{nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/WHEEL +1 -1
nucliadb/common/cluster/discovery/base.py +0 -178
nucliadb/common/cluster/discovery/k8s.py +0 -301
nucliadb/common/cluster/discovery/manual.py +0 -57
nucliadb/common/cluster/discovery/single.py +0 -51
nucliadb/common/cluster/discovery/types.py +0 -32
nucliadb/common/cluster/discovery/utils.py +0 -67
nucliadb/common/cluster/standalone/grpc_node_binding.py +0 -349
nucliadb/common/cluster/standalone/index_node.py +0 -123
nucliadb/common/cluster/standalone/service.py +0 -84
nucliadb/standalone/introspect.py +0 -208
nucliadb-6.2.0.post2675.dist-info/zip-safe +0 -1
/nucliadb/common/{cluster/discovery → models_utils}/__init__.py +0 -0
{nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/entry_points.txt +0 -0
{nucliadb-6.2.0.post2675.dist-info → nucliadb-6.2.1.dist-info}/top_level.txt +0 -0

nucliadb/reader/api/models.py CHANGED Viewed

@@ -22,7 +22,7 @@ from typing import TYPE_CHECKING, Any, Optional, Union
 from pydantic import BaseModel
 import nucliadb_models as models
-from nucliadb_models.common import FIELD_TYPES_MAP, FieldTypeName
+from nucliadb_models.common import FieldTypeName
 from nucliadb_models.resource import (
     ConversationFieldExtractedData,
     Error,
@@ -52,10 +52,10 @@ class ResourceField(BaseModel):
     value: ValueType = None
     extracted: Optional[ExtractedDataType] = None
     error: Optional[Error] = None
+    status: Optional[str] = None
+    errors: Optional[list[Error]] = None
-FIELD_NAMES_TO_PB_TYPE_MAP = {v: k for k, v in FIELD_TYPES_MAP.items()}
 FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP: dict[FieldTypeName, Any] = {
     FieldTypeName.TEXT: TextFieldExtractedData,
     FieldTypeName.FILE: FileFieldExtractedData,

nucliadb/reader/api/v1/__init__.py CHANGED Viewed

@@ -23,4 +23,5 @@ from . import knowledgebox  # noqa
 from . import learning_config  # noqa
 from . import resource  # noqa
 from . import services  # noqa
+from . import vectorsets  # noqa
 from .router import api  # noqa

nucliadb/reader/api/v1/download.py CHANGED Viewed

@@ -29,9 +29,9 @@ from starlette.datastructures import Headers
 from starlette.responses import StreamingResponse
 from nucliadb.common.ids import FIELD_TYPE_PB_TO_STR
+from nucliadb.common.models_utils import to_proto
 from nucliadb.ingest.serialize import get_resource_uuid_by_slug
 from nucliadb.reader import SERVICE_NAME, logger
-from nucliadb.reader.api.models import FIELD_NAMES_TO_PB_TYPE_MAP
 from nucliadb_models.common import FieldTypeName
 from nucliadb_models.resource import NucliaDBRoles
 from nucliadb_utils.authentication import requires_one
@@ -97,7 +97,7 @@ async def _download_extract_file(
     storage = await get_storage(service_name=SERVICE_NAME)
-    pb_field_type = FIELD_NAMES_TO_PB_TYPE_MAP[field_type]
+    pb_field_type = to_proto.field_type_name(field_type)
     field_type_letter = FIELD_TYPE_PB_TO_STR[pb_field_type]
     sf = storage.file_extracted(kbid, rid, field_type_letter, field_id, download_field)

nucliadb/reader/api/v1/knowledgebox.py CHANGED Viewed

@@ -23,9 +23,9 @@ from starlette.requests import Request
 from nucliadb.common import datamanagers
 from nucliadb.common.maindb.utils import get_driver
+from nucliadb.common.models_utils import from_proto
 from nucliadb.reader.api.v1.router import KB_PREFIX, KBS_PREFIX, api
 from nucliadb_models.resource import (
-    KnowledgeBoxConfig,
     KnowledgeBoxList,
     KnowledgeBoxObj,
     KnowledgeBoxObjSummary,
@@ -72,7 +72,7 @@ async def get_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
         return KnowledgeBoxObj(
             uuid=kbid,
             slug=kb_config.slug,
-            config=KnowledgeBoxConfig.from_message(kb_config),
+            config=from_proto.knowledgebox_config(kb_config),
         )
@@ -99,5 +99,5 @@ async def get_kb_by_slug(request: Request, slug: str) -> KnowledgeBoxObj:
         return KnowledgeBoxObj(
             uuid=kbid,
             slug=kb_config.slug,
-            config=KnowledgeBoxConfig.from_message(kb_config),
+            config=from_proto.knowledgebox_config(kb_config),
         )

nucliadb/reader/api/v1/resource.py CHANGED Viewed

@@ -22,9 +22,9 @@ from typing import Optional, Union
 from fastapi import Header, HTTPException, Query, Request, Response
 from fastapi_versioning import version
-import nucliadb_models as models
 from nucliadb.common.datamanagers.resources import KB_RESOURCE_SLUG_BASE
 from nucliadb.common.maindb.utils import get_driver
+from nucliadb.common.models_utils import from_proto, to_proto
 from nucliadb.ingest.fields.conversation import Conversation
 from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as ORMKnowledgeBox
 from nucliadb.ingest.orm.resource import Resource as ORMResource
@@ -37,7 +37,6 @@ from nucliadb.reader import SERVICE_NAME
 from nucliadb.reader.api import DEFAULT_RESOURCE_LIST_PAGE_SIZE
 from nucliadb.reader.api.models import (
     FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP,
-    FIELD_NAMES_TO_PB_TYPE_MAP,
     ResourceField,
 )
 from nucliadb.reader.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
@@ -53,6 +52,7 @@ from nucliadb_models.resource import (
 )
 from nucliadb_models.search import ResourceProperties
 from nucliadb_protos import resources_pb2
+from nucliadb_protos.writer_pb2 import FieldStatus
 from nucliadb_telemetry import errors
 from nucliadb_utils.authentication import requires, requires_one
 from nucliadb_utils.utilities import get_audit, get_storage
@@ -334,9 +334,7 @@ async def _get_resource_field(
 ) -> Response:
     storage = await get_storage(service_name=SERVICE_NAME)
     driver = get_driver()
-    pb_field_id = FIELD_NAMES_TO_PB_TYPE_MAP[field_type]
+    pb_field_id = to_proto.field_type_name(field_type)
     async with driver.transaction() as txn:
         kb = ORMKnowledgeBox(txn, storage, kbid)
@@ -358,15 +356,15 @@ async def _get_resource_field(
             if isinstance(value, resources_pb2.FieldText):
                 value = await field.get_value()
-                resource_field.value = models.FieldText.from_message(value)
+                resource_field.value = from_proto.field_text(value)
             if isinstance(value, resources_pb2.FieldFile):
                 value = await field.get_value()
-                resource_field.value = models.FieldFile.from_message(value)
+                resource_field.value = from_proto.field_file(value)
             if isinstance(value, resources_pb2.FieldLink):
                 value = await field.get_value()
-                resource_field.value = models.FieldLink.from_message(value)
+                resource_field.value = from_proto.field_link(value)
             if isinstance(field, Conversation):
                 if page == "first":
@@ -379,7 +377,7 @@ async def _get_resource_field(
                 value = await field.get_value(page=page_to_fetch)
                 if value is not None:
-                    resource_field.value = models.Conversation.from_message(value)
+                    resource_field.value = from_proto.conversation(value)
         if ResourceFieldProperties.EXTRACTED in show and extracted:
             resource_field.extracted = FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP[field_type]()
@@ -391,9 +389,22 @@ async def _get_resource_field(
             )
         if ResourceFieldProperties.ERROR in show:
-            error = await field.get_error()
-            if error is not None:
-                resource_field.error = Error(body=error.error, code=error.code)
+            status = await field.get_status()
+            if status is None:
+                status = FieldStatus()
+            resource_field.status = status.Status.Name(status.status)
+            if status.errors:
+                resource_field.errors = []
+                for error in status.errors:
+                    resource_field.errors.append(
+                        Error(
+                            body=error.source_error.error,
+                            code=error.source_error.code,
+                            code_str=error.source_error.ErrorCode.Name(error.source_error.code),
+                            created=error.created.ToDatetime(),
+                        )
+                    )
+                resource_field.error = resource_field.errors[-1]
     return Response(
         content=resource_field.model_dump_json(exclude_unset=True, by_alias=True),

nucliadb/reader/api/v1/services.py CHANGED Viewed

@@ -32,6 +32,7 @@ from nucliadb.common.context.fastapi import get_app_context
 from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
 from nucliadb.common.http_clients import processing
 from nucliadb.common.maindb.utils import get_driver
+from nucliadb.common.models_utils import from_proto
 from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
 from nucliadb.models.responses import HTTPClientError
 from nucliadb.reader import SERVICE_NAME
@@ -39,7 +40,6 @@ from nucliadb.reader.api.v1.router import KB_PREFIX, api
 from nucliadb.reader.reader.notifications import kb_notifications_stream
 from nucliadb_models.entities import (
     EntitiesGroup,
-    EntitiesGroupSummary,
     KnowledgeBoxEntities,
 )
 from nucliadb_models.labels import KnowledgeBoxLabels, LabelSet
@@ -86,7 +86,7 @@ async def list_entities_groups(kbid: str):
     if entities_groups.status == ListEntitiesGroupsResponse.Status.OK:
         response = KnowledgeBoxEntities(uuid=kbid)
         for key, eg_summary in entities_groups.groups.items():
-            entities_group = EntitiesGroupSummary.from_message(eg_summary)
+            entities_group = from_proto.entities_group_summary(eg_summary)
             response.groups[key] = entities_group
         return response
     elif entities_groups.status == ListEntitiesGroupsResponse.Status.NOTFOUND:
@@ -114,7 +114,7 @@ async def get_entity(request: Request, kbid: str, group: str) -> EntitiesGroup:
     kbobj: GetEntitiesGroupResponse = await ingest.GetEntitiesGroup(l_request)  # type: ignore
     if kbobj.status == GetEntitiesGroupResponse.Status.OK:
-        response = EntitiesGroup.from_message(kbobj.group)
+        response = from_proto.entities_group(kbobj.group)
         return response
     elif kbobj.status == GetEntitiesGroupResponse.Status.KB_NOT_FOUND:
         raise HTTPException(status_code=404, detail=f"Knowledge Box '{kbid}' does not exist")
@@ -208,7 +208,7 @@ async def get_custom_synonyms(request: Request, kbid: str):
     if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
         raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
     synonyms = await datamanagers.atomic.synonyms.get(kbid=kbid) or Synonyms()
-    return KnowledgeBoxSynonyms.from_message(synonyms)
+    return from_proto.kb_synonyms(synonyms)
 @api.get(

nucliadb/reader/api/v1/vectorsets.py ADDED Viewed

@@ -0,0 +1,48 @@
+# Copyright (C) 2021 Bosutech XXI S.L.
+#
+# nucliadb is offered under the AGPL v3.0 and as commercial software.
+# For commercial licensing, contact us at info@nuclia.com.
+#
+# AGPL:
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+from fastapi_versioning import version
+from starlette.requests import Request
+from nucliadb.common import datamanagers
+from nucliadb.reader.api.v1.router import KB_PREFIX, api
+from nucliadb_models.resource import (
+    NucliaDBRoles,
+)
+from nucliadb_models.vectorsets import VectorSetList, VectorSetListItem
+from nucliadb_utils.authentication import requires_one
+@api.get(
+    f"/{KB_PREFIX}/{{kbid}}/vectorsets",
+    status_code=200,
+    summary="List vector sets",
+    response_model=VectorSetList,
+    tags=["Vector Sets"],
+    # TODO: remove when the feature is mature
+    include_in_schema=False,
+)
+@requires_one([NucliaDBRoles.READER])
+@version(1)
+async def list_vectorsets(request: Request, kbid: str) -> VectorSetList:
+    vectorsets = []
+    async with datamanagers.with_ro_transaction() as txn:
+        async for vid, _ in datamanagers.vectorsets.iter(txn, kbid=kbid):
+            vectorsets.append(VectorSetListItem(id=vid))
+    return VectorSetList(vectorsets=vectorsets)

nucliadb/search/api/v1/ask.py CHANGED Viewed

@@ -36,7 +36,8 @@ from nucliadb_models.search import (
     SyncAskResponse,
     parse_max_tokens,
 )
-from nucliadb_utils.authentication import requires
+from nucliadb_models.security import RequestSecurity
+from nucliadb_utils.authentication import NucliaUser, requires
 @api.post(
@@ -62,6 +63,15 @@ async def ask_knowledgebox_endpoint(
         "This is slower and requires waiting for entire answer to be ready.",
     ),
 ) -> Union[StreamingResponse, HTTPClientError, Response]:
+    current_user: NucliaUser = request.user
+    # If present, security groups from AuthorizationBackend overrides any
+    # security group of the payload
+    if current_user.security_groups:
+        if item.security is None:
+            item.security = RequestSecurity(groups=current_user.security_groups)
+        else:
+            item.security.groups = current_user.security_groups
     return await create_ask_response(
         kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for, x_synchronous
     )

nucliadb/search/api/v1/feedback.py CHANGED Viewed

@@ -18,10 +18,10 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 #
 from fastapi import Header, Request, Response
 from fastapi_versioning import version
+from nucliadb.common.models_utils import to_proto
 from nucliadb.models.responses import HTTPClientError
 from nucliadb.search import logger
 from nucliadb.search.api.v1.router import KB_PREFIX, api
@@ -56,11 +56,11 @@ async def send_feedback_endpoint(
             audit.feedback(
                 kbid=kbid,
                 user=x_nucliadb_user,
-                client_type=x_ndb_client.to_proto(),
+                client_type=to_proto.client_type(x_ndb_client),
                 origin=x_forwarded_for,
                 learning_id=item.ident,
                 good=item.good,
-                task=item.task.to_proto(),
+                task=to_proto.feedback_task(item.task),
                 feedback=item.feedback,
                 text_block_id=item.text_block_id,
             )

nucliadb/search/api/v1/knowledgebox.py CHANGED Viewed

@@ -32,6 +32,7 @@ from nucliadb.common.cluster.utils import get_shard_manager
 from nucliadb.common.constants import AVG_PARAGRAPH_SIZE_BYTES
 from nucliadb.common.counters import IndexCounts
 from nucliadb.common.external_index_providers.manager import get_external_index_manager
+from nucliadb.common.models_utils import from_proto
 from nucliadb.search import logger
 from nucliadb.search.api.v1.router import KB_PREFIX, api
 from nucliadb.search.api.v1.utils import fastapi_query
@@ -47,9 +48,7 @@ from nucliadb_protos.noderesources_pb2 import Shard
 from nucliadb_protos.writer_pb2 import ShardObject as PBShardObject
 from nucliadb_protos.writer_pb2 import Shards
 from nucliadb_telemetry import errors
-from nucliadb_utils import const
 from nucliadb_utils.authentication import requires, requires_one
-from nucliadb_utils.utilities import has_feature
 MAX_PARAGRAPHS_FOR_SMALL_KB = 250_000
@@ -73,7 +72,7 @@ async def knowledgebox_shards(request: Request, kbid: str) -> KnowledgeboxShards
             status_code=404,
             detail="The knowledgebox or its shards configuration is missing",
         )
-    return KnowledgeboxShards.from_message(shards)
+    return from_proto.kb_shards(shards)
 @api.get(
@@ -124,8 +123,9 @@ async def _kb_counters(
         counters.sentences = index_counts.sentences
         is_small_kb = index_counts.paragraphs < MAX_PARAGRAPHS_FOR_SMALL_KB
         resource_count = await get_resources_count(kbid, force_calculate=is_small_kb)
-        # TODO: Find a way to query the fields count from the external index provider or use the catalog
+        # TODO: Find a way to query the fields count and size from the external index provider or use the catalog
         counters.resources = counters.fields = resource_count
+        counters.index_size = counters.paragraphs * AVG_PARAGRAPH_SIZE_BYTES
     else:
         node_index_counts, queried_shards = await get_node_index_counts(kbid)
         counters.fields = node_index_counts.fields
@@ -134,7 +134,7 @@ async def _kb_counters(
         is_small_kb = node_index_counts.paragraphs < MAX_PARAGRAPHS_FOR_SMALL_KB
         resource_count = await get_resources_count(kbid, force_calculate=is_small_kb)
         counters.resources = resource_count
-    counters.index_size = counters.paragraphs * AVG_PARAGRAPH_SIZE_BYTES
+        counters.index_size = node_index_counts.size_bytes
     if debug and queried_shards is not None:
         counters.shards = queried_shards
     return counters
@@ -165,9 +165,7 @@ async def get_node_index_counts(kbid: str) -> tuple[IndexCounts, list[str]]:
     queried_shards = []
     for shard_object in shard_groups:
         try:
-            node, shard_id = choose_node(
-                shard_object, use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": kbid})
-            )
+            node, shard_id = choose_node(shard_object)
         except KeyError:
             raise HTTPException(
                 status_code=500,
@@ -205,11 +203,7 @@ async def get_node_index_counts(kbid: str) -> tuple[IndexCounts, list[str]]:
     if results is None:
         raise HTTPException(status_code=503, detail=f"No shards found")
-    counts = IndexCounts(
-        fields=0,
-        paragraphs=0,
-        sentences=0,
-    )
+    counts = IndexCounts(fields=0, paragraphs=0, sentences=0, size_bytes=0)
     for shard in results:
         if isinstance(shard, Exception):
             logger.error("Error getting shard info", exc_info=shard)
@@ -218,4 +212,5 @@ async def get_node_index_counts(kbid: str) -> tuple[IndexCounts, list[str]]:
         counts.fields += shard.fields
         counts.paragraphs += shard.paragraphs
         counts.sentences += shard.sentences
+        counts.size_bytes += shard.size_bytes
     return counts, queried_shards

nucliadb/search/api/v1/search.py CHANGED Viewed

@@ -27,6 +27,7 @@ from fastapi_versioning import version
 from pydantic import ValidationError
 from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
+from nucliadb.common.models_utils import to_proto
 from nucliadb.models.responses import HTTPClientError
 from nucliadb.search import predict
 from nucliadb.search.api.v1.router import KB_PREFIX, api
@@ -292,7 +293,7 @@ async def search(
         hidden=await filter_hidden_resources(kbid, item.show_hidden),
         rephrase_prompt=item.rephrase_prompt,
     )
-    pb_query, incomplete_results, autofilters = await query_parser.parse()
+    pb_query, incomplete_results, autofilters, _ = await query_parser.parse()
     results, query_incomplete_results, queried_nodes = await node_query(
         kbid, Method.SEARCH, pb_query, target_shard_replicas=item.shards
@@ -318,7 +319,7 @@ async def search(
         audit.search(
             kbid,
             x_nucliadb_user,
-            x_ndb_client.to_proto(),
+            to_proto.client_type(x_ndb_client),
             x_forwarded_for,
             pb_query,
             time() - start_time,

nucliadb/search/api/v1/suggest.py CHANGED Viewed

@@ -151,8 +151,6 @@ async def suggest(
         search_results = await merge_suggest_results(
             results,
             kbid=kbid,
-            show=show,
-            field_type_filter=field_type_filter,
             highlight=highlight,
         )

nucliadb/search/predict.py CHANGED Viewed

@@ -21,7 +21,7 @@ import json
 import os
 import random
 from enum import Enum
-from typing import Any, AsyncIterator, Optional
+from typing import Any, AsyncGenerator, Optional
 from unittest.mock import AsyncMock, Mock
 import aiohttp
@@ -121,12 +121,14 @@ class AnswerStatusCode(str, Enum):
     SUCCESS = "0"
     ERROR = "-1"
     NO_CONTEXT = "-2"
+    NO_RETRIEVAL_DATA = "-3"
     def prettify(self) -> str:
         return {
             AnswerStatusCode.SUCCESS: "success",
             AnswerStatusCode.ERROR: "error",
             AnswerStatusCode.NO_CONTEXT: "no_context",
+            AnswerStatusCode.NO_RETRIEVAL_DATA: "no_retrieval_data",
         }[self]
@@ -266,7 +268,7 @@ class PredictEngine:
     @predict_observer.wrap({"type": "chat_ndjson"})
     async def chat_query_ndjson(
         self, kbid: str, item: ChatModel
-    ) -> tuple[str, str, AsyncIterator[GenerativeChunk]]:
+    ) -> tuple[str, str, AsyncGenerator[GenerativeChunk, None]]:
         """
         Chat query using the new stream format
         Format specs: https://github.com/ndjson/ndjson-spec
@@ -442,7 +444,7 @@ class DummyPredictEngine(PredictEngine):
     async def chat_query_ndjson(
         self, kbid: str, item: ChatModel
-    ) -> tuple[str, str, AsyncIterator[GenerativeChunk]]:
+    ) -> tuple[str, str, AsyncGenerator[GenerativeChunk, None]]:
         self.calls.append(("chat_query_ndjson", item))
         async def generate():
@@ -553,7 +555,7 @@ def get_answer_generator(response: aiohttp.ClientResponse):
 def get_chat_ndjson_generator(
     response: aiohttp.ClientResponse,
-) -> AsyncIterator[GenerativeChunk]:
+) -> AsyncGenerator[GenerativeChunk, None]:
     async def _parse_generative_chunks(gen):
         async for chunk in gen:
             try:

nucliadb/search/requesters/utils.py CHANGED Viewed

@@ -123,7 +123,6 @@ async def node_query(
         try:
             node, shard_id = cluster_manager.choose_node(
                 shard_obj,
-                use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": kbid}),
                 use_read_replica_nodes=use_read_replica_nodes,
                 target_shard_replicas=target_shard_replicas,
             )
@@ -224,7 +223,7 @@ def validate_node_query_results(results: list[Any]) -> Optional[HTTPException]:
                     )
             else:
                 errors.capture_exception(result)
-                logger.exception("Error while querying shard data", exc_info=result)
+                logger.exception(f"Error while querying shard data {result}", exc_info=result)
             return HTTPException(status_code=status_code, detail=reason)

nucliadb 6.2.0.post2675__py3-none-any.whl → 6.2.1__py3-none-any.whl

nucliadb 6.2.0.post2675py3-none-any.whl → 6.2.1py3-none-any.whl