PyPI - nucliadb - Versions diffs - 6.2.0.post2679__py3-none-any.whl → 6.2.1__py3-none-any.whl - Mend

nucliadb 6.2.0.post2679py3-none-any.whl → 6.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

migrations/0028_extracted_vectors_reference.py +61 -0
migrations/0029_backfill_field_status.py +149 -0
migrations/0030_label_deduplication.py +60 -0
nucliadb/common/cluster/manager.py +41 -331
nucliadb/common/cluster/rebalance.py +2 -2
nucliadb/common/cluster/rollover.py +12 -71
nucliadb/common/cluster/settings.py +3 -0
nucliadb/common/cluster/standalone/utils.py +0 -43
nucliadb/common/cluster/utils.py +0 -16
nucliadb/common/counters.py +1 -0
nucliadb/common/datamanagers/fields.py +48 -7
nucliadb/common/datamanagers/vectorsets.py +11 -2
nucliadb/common/external_index_providers/base.py +2 -1
nucliadb/common/external_index_providers/pinecone.py +3 -5
nucliadb/common/ids.py +18 -4
nucliadb/common/models_utils/from_proto.py +479 -0
nucliadb/common/models_utils/to_proto.py +60 -0
nucliadb/common/nidx.py +76 -37
nucliadb/export_import/models.py +3 -3
nucliadb/health.py +0 -7
nucliadb/ingest/app.py +0 -8
nucliadb/ingest/consumer/auditing.py +1 -1
nucliadb/ingest/consumer/shard_creator.py +1 -1
nucliadb/ingest/fields/base.py +83 -21
nucliadb/ingest/orm/brain.py +55 -56
nucliadb/ingest/orm/broker_message.py +12 -2
nucliadb/ingest/orm/entities.py +6 -17
nucliadb/ingest/orm/knowledgebox.py +44 -22
nucliadb/ingest/orm/processor/data_augmentation.py +7 -29
nucliadb/ingest/orm/processor/processor.py +5 -2
nucliadb/ingest/orm/resource.py +222 -413
nucliadb/ingest/processing.py +8 -2
nucliadb/ingest/serialize.py +77 -46
nucliadb/ingest/service/writer.py +2 -56
nucliadb/ingest/settings.py +1 -4
nucliadb/learning_proxy.py +6 -4
nucliadb/purge/__init__.py +102 -12
nucliadb/purge/orphan_shards.py +6 -4
nucliadb/reader/api/models.py +3 -3
nucliadb/reader/api/v1/__init__.py +1 -0
nucliadb/reader/api/v1/download.py +2 -2
nucliadb/reader/api/v1/knowledgebox.py +3 -3
nucliadb/reader/api/v1/resource.py +23 -12
nucliadb/reader/api/v1/services.py +4 -4
nucliadb/reader/api/v1/vectorsets.py +48 -0
nucliadb/search/api/v1/ask.py +11 -1
nucliadb/search/api/v1/feedback.py +3 -3
nucliadb/search/api/v1/knowledgebox.py +8 -13
nucliadb/search/api/v1/search.py +3 -2
nucliadb/search/api/v1/suggest.py +0 -2
nucliadb/search/predict.py +6 -4
nucliadb/search/requesters/utils.py +1 -2
nucliadb/search/search/chat/ask.py +77 -13
nucliadb/search/search/chat/prompt.py +16 -5
nucliadb/search/search/chat/query.py +74 -34
nucliadb/search/search/exceptions.py +2 -7
nucliadb/search/search/find.py +9 -5
nucliadb/search/search/find_merge.py +10 -4
nucliadb/search/search/graph_strategy.py +884 -0
nucliadb/search/search/hydrator.py +6 -0
nucliadb/search/search/merge.py +79 -24
nucliadb/search/search/query.py +74 -245
nucliadb/search/search/query_parser/exceptions.py +11 -1
nucliadb/search/search/query_parser/fetcher.py +405 -0
nucliadb/search/search/query_parser/models.py +0 -3
nucliadb/search/search/query_parser/parser.py +22 -21
nucliadb/search/search/rerankers.py +1 -42
nucliadb/search/search/shards.py +19 -0
nucliadb/standalone/api_router.py +2 -14
nucliadb/standalone/settings.py +4 -0
nucliadb/train/generators/field_streaming.py +7 -3
nucliadb/train/lifecycle.py +3 -6
nucliadb/train/nodes.py +14 -12
nucliadb/train/resource.py +380 -0
nucliadb/writer/api/constants.py +20 -16
nucliadb/writer/api/v1/__init__.py +1 -0
nucliadb/writer/api/v1/export_import.py +1 -1
nucliadb/writer/api/v1/field.py +13 -7
nucliadb/writer/api/v1/knowledgebox.py +3 -46
nucliadb/writer/api/v1/resource.py +20 -13
nucliadb/writer/api/v1/services.py +10 -1
nucliadb/writer/api/v1/upload.py +61 -34
nucliadb/writer/{vectorsets.py → api/v1/vectorsets.py} +99 -47
nucliadb/writer/back_pressure.py +17 -46
nucliadb/writer/resource/basic.py +9 -7
nucliadb/writer/resource/field.py +42 -9
nucliadb/writer/settings.py +2 -2
nucliadb/writer/tus/gcs.py +11 -10
{nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/METADATA +11 -14
{nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/RECORD +94 -96
{nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/WHEEL +1 -1
nucliadb/common/cluster/discovery/base.py +0 -178
nucliadb/common/cluster/discovery/k8s.py +0 -301
nucliadb/common/cluster/discovery/manual.py +0 -57
nucliadb/common/cluster/discovery/single.py +0 -51
nucliadb/common/cluster/discovery/types.py +0 -32
nucliadb/common/cluster/discovery/utils.py +0 -67
nucliadb/common/cluster/standalone/grpc_node_binding.py +0 -349
nucliadb/common/cluster/standalone/index_node.py +0 -123
nucliadb/common/cluster/standalone/service.py +0 -84
nucliadb/standalone/introspect.py +0 -208
nucliadb-6.2.0.post2679.dist-info/zip-safe +0 -1
/nucliadb/common/{cluster/discovery → models_utils}/__init__.py +0 -0
{nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/entry_points.txt +0 -0
{nucliadb-6.2.0.post2679.dist-info → nucliadb-6.2.1.dist-info}/top_level.txt +0 -0

nucliadb/search/search/query.py CHANGED Viewed

@@ -23,12 +23,10 @@ import string
 from datetime import datetime
 from typing import Any, Awaitable, Optional, Union
-from async_lru import alru_cache
 from nucliadb.common import datamanagers
-from nucliadb.common.maindb.utils import get_driver
+from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap
 from nucliadb.search import logger
-from nucliadb.search.predict import SendToPredictError, convert_relations
+from nucliadb.search.predict import SendToPredictError
 from nucliadb.search.search.filters import (
     convert_to_node_filters,
     flatten_filter_literals,
@@ -39,32 +37,31 @@ from nucliadb.search.search.filters import (
 )
 from nucliadb.search.search.metrics import (
     node_features,
-    query_parse_dependency_observer,
 )
+from nucliadb.search.search.query_parser.fetcher import Fetcher, get_classification_labels
 from nucliadb.search.search.rank_fusion import (
     RankFusionAlgorithm,
 )
 from nucliadb.search.search.rerankers import (
     Reranker,
 )
-from nucliadb.search.utilities import get_predict
 from nucliadb_models.internal.predict import QueryInfo
 from nucliadb_models.labels import LABEL_HIDDEN, translate_system_to_alias_label
 from nucliadb_models.metadata import ResourceProcessingStatus
 from nucliadb_models.search import (
     Filter,
+    KnowledgeGraphEntity,
     MaxTokens,
     MinScore,
     SearchOptions,
     SortField,
-    SortFieldMap,
     SortOptions,
     SortOrder,
     SortOrderMap,
     SuggestOptions,
 )
 from nucliadb_models.security import RequestSecurity
-from nucliadb_protos import knowledgebox_pb2, nodereader_pb2, utils_pb2
+from nucliadb_protos import nodereader_pb2, utils_pb2
 from nucliadb_protos.noderesources_pb2 import Resource
 from .exceptions import InvalidQueryError
@@ -88,13 +85,6 @@ class QueryParser:
     """
     _query_information_task: Optional[asyncio.Task] = None
-    _get_vectorset_task: Optional[asyncio.Task] = None
-    _detected_entities_task: Optional[asyncio.Task] = None
-    _entities_meta_cache_task: Optional[asyncio.Task] = None
-    _deleted_entities_groups_task: Optional[asyncio.Task] = None
-    _synonyms_task: Optional[asyncio.Task] = None
-    _get_classification_labels_task: Optional[asyncio.Task] = None
-    _get_matryoshka_dimension_task: Optional[asyncio.Task] = None
     def __init__(
         self,
@@ -106,6 +96,7 @@ class QueryParser:
         keyword_filters: Union[list[str], list[Filter]],
         top_k: int,
         min_score: MinScore,
+        query_entities: Optional[list[KnowledgeGraphEntity]] = None,
         faceted: Optional[list[str]] = None,
         sort: Optional[SortOptions] = None,
         range_creation_start: Optional[datetime] = None,
@@ -132,6 +123,7 @@ class QueryParser:
         self.kbid = kbid
         self.features = features
         self.query = query
+        self.query_entities = query_entities
         self.hidden = hidden
         if self.hidden is not None:
             if self.hidden:
@@ -169,6 +161,15 @@ class QueryParser:
         self.max_tokens = max_tokens
         self.rank_fusion = rank_fusion
         self.reranker = reranker
+        self.fetcher = Fetcher(
+            kbid=kbid,
+            query=query,
+            user_vector=user_vector,
+            vectorset=vectorset,
+            rephrase=rephrase,
+            rephrase_prompt=rephrase_prompt,
+            generative_model=generative_model,
+        )
     @property
     def has_vector_search(self) -> bool:
@@ -184,78 +185,12 @@ class QueryParser:
         return self._query_information_task
     async def _query_information(self) -> QueryInfo:
-        vectorset = await self.select_query_vectorset()
-        return await query_information(
-            self.kbid, self.query, vectorset, self.generative_model, self.rephrase, self.rephrase_prompt
-        )
-    def _get_vectorset(self) -> Awaitable[Optional[str]]:
-        if self._get_vectorset_task is None:
-            self._get_vectorset_task = asyncio.create_task(self._select_vectorset())
-        return self._get_vectorset_task
-    async def _select_vectorset(self) -> Optional[str]:
-        if self.vectorset:
-            return self.vectorset
-        # When vectorset is not provided we get the default from Predict API
-        try:
-            query_information = await self._get_query_information()
-        except SendToPredictError:
-            return None
-        if query_information.sentence is None:
-            logger.error(
-                "Asking for a vectorset but /query didn't return one", extra={"kbid": self.kbid}
-            )
-            return None
-        for vectorset in query_information.sentence.vectors.keys():
-            self.vectorset = vectorset
-            break
-        return self.vectorset
-    def _get_matryoshka_dimension(self) -> Awaitable[Optional[int]]:
-        if self._get_matryoshka_dimension_task is None:
-            self._get_matryoshka_dimension_task = asyncio.create_task(self._matryoshka_dimension())
-        return self._get_matryoshka_dimension_task
-    async def _matryoshka_dimension(self) -> Optional[int]:
-        vectorset = await self._select_vectorset()
-        return await get_matryoshka_dimension_cached(self.kbid, vectorset)
-    def _get_detected_entities(self) -> Awaitable[list[utils_pb2.RelationNode]]:
-        if self._detected_entities_task is None:  # pragma: no cover
-            self._detected_entities_task = asyncio.create_task(detect_entities(self.kbid, self.query))
-        return self._detected_entities_task
-    def _get_entities_meta_cache(
-        self,
-    ) -> Awaitable[datamanagers.entities.EntitiesMetaCache]:
-        if self._entities_meta_cache_task is None:
-            self._entities_meta_cache_task = asyncio.create_task(get_entities_meta_cache(self.kbid))
-        return self._entities_meta_cache_task
-    def _get_deleted_entity_groups(self) -> Awaitable[list[str]]:
-        if self._deleted_entities_groups_task is None:
-            self._deleted_entities_groups_task = asyncio.create_task(
-                get_deleted_entity_groups(self.kbid)
-            )
-        return self._deleted_entities_groups_task
-    def _get_synomyns(self) -> Awaitable[Optional[knowledgebox_pb2.Synonyms]]:
-        if self._synonyms_task is None:
-            self._synonyms_task = asyncio.create_task(get_kb_synonyms(self.kbid))
-        return self._synonyms_task
-    def _get_classification_labels(self) -> Awaitable[knowledgebox_pb2.Labels]:
-        if self._get_classification_labels_task is None:
-            self._get_classification_labels_task = asyncio.create_task(
-                get_classification_labels(self.kbid)
-            )
-        return self._get_classification_labels_task
+        # HACK: while transitioning to the new query parser, use fetcher under
+        # the hood for a smoother migration
+        query_info = await self.fetcher._predict_query_endpoint()
+        if query_info is None:
+            raise SendToPredictError("Error while using predict's query endpoint")
+        return query_info
     async def _schedule_dependency_tasks(self) -> None:
         """
@@ -263,23 +198,24 @@ class QueryParser:
         for the sake of the query being performed
         """
         if len(self.label_filters) > 0 and has_classification_label_filters(self.flat_label_filters):
-            asyncio.ensure_future(self._get_classification_labels())
+            asyncio.ensure_future(self.fetcher.get_classification_labels())
         if self.has_vector_search and self.user_vector is None:
             self.query_endpoint_used = True
             asyncio.ensure_future(self._get_query_information())
-            asyncio.ensure_future(self._get_matryoshka_dimension())
+            # XXX: should we also ensure get_vectorset and get_query_vector?
+            asyncio.ensure_future(self.fetcher.get_matryoshka_dimension())
         if (self.has_relations_search or self.autofilter) and len(self.query) > 0:
             if not self.query_endpoint_used:
                 # If we only need to detect entities, we don't need the query endpoint
-                asyncio.ensure_future(self._get_detected_entities())
-            asyncio.ensure_future(self._get_entities_meta_cache())
-            asyncio.ensure_future(self._get_deleted_entity_groups())
+                asyncio.ensure_future(self.fetcher.get_detected_entities())
+            asyncio.ensure_future(self.fetcher.get_entities_meta_cache())
+            asyncio.ensure_future(self.fetcher.get_deleted_entity_groups())
         if self.with_synonyms and self.query:
-            asyncio.ensure_future(self._get_synomyns())
+            asyncio.ensure_future(self.fetcher.get_synonyms())
-    async def parse(self) -> tuple[nodereader_pb2.SearchRequest, bool, list[str]]:
+    async def parse(self) -> tuple[nodereader_pb2.SearchRequest, bool, list[str], Optional[str]]:
         """
         :return: (request, incomplete, autofilters)
             where:
@@ -298,19 +234,20 @@ class QueryParser:
         await self.parse_filters(request)
         self.parse_document_search(request)
         self.parse_paragraph_search(request)
-        incomplete = await self.parse_vector_search(request)
+        incomplete, rephrased_query = await self.parse_vector_search(request)
+        # BUG: autofilters are not used to filter, but we say we do
         autofilters = await self.parse_relation_search(request)
         await self.parse_synonyms(request)
         await self.parse_min_score(request, incomplete)
         await self.adjust_page_size(request, self.rank_fusion, self.reranker)
-        return request, incomplete, autofilters
+        return request, incomplete, autofilters, rephrased_query
     async def parse_filters(self, request: nodereader_pb2.SearchRequest) -> None:
         if len(self.label_filters) > 0:
             field_labels = self.flat_label_filters
             paragraph_labels: list[str] = []
             if has_classification_label_filters(self.flat_label_filters):
-                classification_labels = await self._get_classification_labels()
+                classification_labels = await self.fetcher.get_classification_labels()
                 field_labels, paragraph_labels = split_labels_by_type(
                     self.flat_label_filters, classification_labels
                 )
@@ -388,7 +325,7 @@ class QueryParser:
         else:
             request.result_per_page = self.top_k
-        sort_field = SortFieldMap[self.sort.field] if self.sort else None
+        sort_field = get_sort_field_proto(self.sort.field) if self.sort else None
         if sort_field is not None:
             request.order.sort_by = sort_field
             request.order.type = SortOrderMap[self.sort.order]  # type: ignore
@@ -399,19 +336,13 @@ class QueryParser:
             semantic_min_score = self.min_score.semantic
         elif self.has_vector_search and not incomplete:
             query_information = await self._get_query_information()
-            vectorset = await self._select_vectorset()
-            if vectorset is not None:
-                semantic_threshold = query_information.semantic_thresholds.get(vectorset, None)
-                if semantic_threshold is not None:
-                    semantic_min_score = semantic_threshold
-                else:
-                    logger.warning(
-                        "Semantic threshold not found in query information, using default",
-                        extra={"kbid": self.kbid},
-                    )
+            vectorset = await self.fetcher.get_vectorset()
+            semantic_threshold = query_information.semantic_thresholds.get(vectorset, None)
+            if semantic_threshold is not None:
+                semantic_min_score = semantic_threshold
             else:
                 logger.warning(
-                    "Vectorset unset by user or predict, using default semantic threshold",
+                    "Semantic threshold not found in query information, using default",
                     extra={"kbid": self.kbid},
                 )
         self.min_score.semantic = semantic_min_score
@@ -428,91 +359,49 @@ class QueryParser:
             request.paragraph = True
             node_features.inc({"type": "paragraphs"})
-    async def select_query_vectorset(self) -> Optional[str]:
-        """Set and return the requested vectorset parameter (if used) validated
-        for the current KB.
-        """
-        if not self.vectorset:
-            return None
-        # validate vectorset
-        async with datamanagers.with_ro_transaction() as txn:
-            if not await datamanagers.vectorsets.exists(
-                txn, kbid=self.kbid, vectorset_id=self.vectorset
-            ):
-                raise InvalidQueryError(
-                    "vectorset",
-                    f"Vectorset {self.vectorset} doesn't exist in you Knowledge Box",
-                )
-        return self.vectorset
-    async def parse_vector_search(self, request: nodereader_pb2.SearchRequest) -> bool:
+    async def parse_vector_search(
+        self, request: nodereader_pb2.SearchRequest
+    ) -> tuple[bool, Optional[str]]:
         if not self.has_vector_search:
-            return False
+            return False, None
         node_features.inc({"type": "vectors"})
-        incomplete = False
-        vectorset = await self._select_vectorset()
-        if vectorset is not None:
-            request.vectorset = vectorset
-        query_vector = None
-        if self.user_vector is None:
-            try:
-                query_info = await self._get_query_information()
-            except SendToPredictError as err:
-                logger.warning(f"Errors on predict api trying to embedd query: {err}")
-                incomplete = True
-            else:
-                if query_info and query_info.sentence:
-                    if vectorset:
-                        if vectorset in query_info.sentence.vectors:
-                            query_vector = query_info.sentence.vectors[vectorset]
-                        else:
-                            incomplete = True
-                    else:
-                        for vectorset_id, vector in query_info.sentence.vectors.items():
-                            if vector:
-                                query_vector = vector
-                                break
-                        else:
-                            incomplete = True
-                else:
-                    incomplete = True
-        else:
-            query_vector = self.user_vector
+        vectorset = await self.fetcher.get_vectorset()
+        query_vector = await self.fetcher.get_query_vector()
+        rephrased_query = await self.fetcher.get_rephrased_query()
+        incomplete = query_vector is None
+        request.vectorset = vectorset
         if query_vector is not None:
-            matryoshka_dimension = await self._get_matryoshka_dimension()
-            if matryoshka_dimension is not None:
-                # KB using a matryoshka embeddings model, cut the query vector
-                # accordingly
-                query_vector = query_vector[:matryoshka_dimension]
             request.vector.extend(query_vector)
-        return incomplete
+        return incomplete, rephrased_query
     async def parse_relation_search(self, request: nodereader_pb2.SearchRequest) -> list[str]:
         autofilters = []
+        # BUG: autofiler should autofilter, not enable relation search
         if self.has_relations_search or self.autofilter:
-            if not self.query_endpoint_used:
-                detected_entities = await self._get_detected_entities()
+            if self.query_entities:
+                detected_entities = []
+                for entity in self.query_entities:
+                    relation_node = utils_pb2.RelationNode()
+                    relation_node.value = entity.name
+                    if entity.type is not None:
+                        relation_node.ntype = RelationNodeTypeMap[entity.type]
+                    if entity.subtype is not None:
+                        relation_node.subtype = entity.subtype
+                    detected_entities.append(relation_node)
             else:
-                query_info_result = await self._get_query_information()
-                if query_info_result.entities:
-                    detected_entities = convert_relations(query_info_result.entities.model_dump())
-                else:
-                    detected_entities = []
-            meta_cache = await self._get_entities_meta_cache()
+                detected_entities = await self.fetcher.get_detected_entities()
+            meta_cache = await self.fetcher.get_entities_meta_cache()
             detected_entities = expand_entities(meta_cache, detected_entities)
             if self.has_relations_search:
                 request.relation_subgraph.entry_points.extend(detected_entities)
                 request.relation_subgraph.depth = 1
-                request.relation_subgraph.deleted_groups.extend(await self._get_deleted_entity_groups())
+                request.relation_subgraph.deleted_groups.extend(
+                    await self.fetcher.get_deleted_entity_groups()
+                )
                 for group_id, deleted_entities in meta_cache.deleted_entities.items():
                     request.relation_subgraph.deleted_entities.append(
                         nodereader_pb2.EntitiesSubgraphRequest.DeletedEntities(
@@ -545,7 +434,7 @@ class QueryParser:
                 "Search with custom synonyms is only supported on paragraph and document search",
             )
-        synonyms = await self._get_synomyns()
+        synonyms = await self.fetcher.get_synonyms()
         if synonyms is None:
             # No synonyms found
             return
@@ -681,29 +570,6 @@ async def paragraph_query_to_pb(
     return request
-@query_parse_dependency_observer.wrap({"type": "query_information"})
-async def query_information(
-    kbid: str,
-    query: str,
-    semantic_model: Optional[str],
-    generative_model: Optional[str] = None,
-    rephrase: bool = False,
-    rephrase_prompt: Optional[str] = None,
-) -> QueryInfo:
-    predict = get_predict()
-    return await predict.query(kbid, query, semantic_model, generative_model, rephrase, rephrase_prompt)
-@query_parse_dependency_observer.wrap({"type": "detect_entities"})
-async def detect_entities(kbid: str, query: str) -> list[utils_pb2.RelationNode]:
-    predict = get_predict()
-    try:
-        return await predict.detect_entities(kbid, query)
-    except SendToPredictError as ex:
-        logger.warning(f"Errors on predict api detecting entities: {ex}")
-        return []
 def expand_entities(
     meta_cache: datamanagers.entities.EntitiesMetaCache,
     detected_entities: list[utils_pb2.RelationNode],
@@ -834,30 +700,6 @@ PROCESSING_STATUS_TO_PB_MAP = {
 }
-@query_parse_dependency_observer.wrap({"type": "synonyms"})
-async def get_kb_synonyms(kbid: str) -> Optional[knowledgebox_pb2.Synonyms]:
-    async with get_driver().transaction(read_only=True) as txn:
-        return await datamanagers.synonyms.get(txn, kbid=kbid)
-@query_parse_dependency_observer.wrap({"type": "entities_meta_cache"})
-async def get_entities_meta_cache(kbid: str) -> datamanagers.entities.EntitiesMetaCache:
-    async with get_driver().transaction(read_only=True) as txn:
-        return await datamanagers.entities.get_entities_meta_cache(txn, kbid=kbid)
-@query_parse_dependency_observer.wrap({"type": "deleted_entities_groups"})
-async def get_deleted_entity_groups(kbid: str) -> list[str]:
-    async with get_driver().transaction(read_only=True) as txn:
-        return list((await datamanagers.entities.get_deleted_groups(txn, kbid=kbid)).entities_groups)
-@query_parse_dependency_observer.wrap({"type": "classification_labels"})
-async def get_classification_labels(kbid: str) -> knowledgebox_pb2.Labels:
-    async with get_driver().transaction(read_only=True) as txn:
-        return await datamanagers.labels.get_labels(txn, kbid=kbid)
 def check_supported_filters(filters: dict[str, Any], paragraph_labels: list[str]):
     """
     Check if the provided filters are supported:
@@ -890,23 +732,10 @@ def check_supported_filters(filters: dict[str, Any], paragraph_labels: list[str]
             )
-@alru_cache(maxsize=None)
-async def get_matryoshka_dimension_cached(kbid: str, vectorset: Optional[str]) -> Optional[int]:
-    # This can be safely cached as the matryoshka dimension is not expected to change
-    return await get_matryoshka_dimension(kbid, vectorset)
-@query_parse_dependency_observer.wrap({"type": "matryoshka_dimension"})
-async def get_matryoshka_dimension(kbid: str, vectorset: Optional[str]) -> Optional[int]:
-    async with get_driver().transaction(read_only=True) as txn:
-        matryoshka_dimension = None
-        if not vectorset:
-            # XXX this should be migrated once we remove the "default" vectorset
-            # concept
-            matryoshka_dimension = await datamanagers.kb.get_matryoshka_vector_dimension(txn, kbid=kbid)
-        else:
-            vectorset_config = await datamanagers.vectorsets.get(txn, kbid=kbid, vectorset_id=vectorset)
-            if vectorset_config is not None and vectorset_config.vectorset_index_config.vector_dimension:
-                matryoshka_dimension = vectorset_config.vectorset_index_config.vector_dimension
-        return matryoshka_dimension
+def get_sort_field_proto(obj: SortField) -> Optional[nodereader_pb2.OrderBy.OrderField.ValueType]:
+    return {
+        SortField.SCORE: None,
+        SortField.CREATED: nodereader_pb2.OrderBy.OrderField.CREATED,
+        SortField.MODIFIED: nodereader_pb2.OrderBy.OrderField.MODIFIED,
+        SortField.TITLE: None,
+    }[obj]

nucliadb/search/search/query_parser/exceptions.py CHANGED Viewed

@@ -19,4 +19,14 @@
 #
-class ParserError(ValueError): ...
+class InternalParserError(ValueError):
+    """Raised when parsing fails due to some internal error"""
+class InvalidQueryError(Exception):
+    """Raised when parsing a query containing an invalid parameter"""
+    def __init__(self, param: str, reason: str):
+        self.param = param
+        self.reason = reason
+        super().__init__(f"Invalid query. Error in {param}: {reason}")

nucliadb 6.2.0.post2679__py3-none-any.whl → 6.2.1__py3-none-any.whl

nucliadb 6.2.0.post2679py3-none-any.whl → 6.2.1py3-none-any.whl