nucliadb 6.9.1.post5192__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0023_backfill_pg_catalog.py +2 -2
- migrations/0029_backfill_field_status.py +3 -4
- migrations/0032_remove_old_relations.py +2 -3
- migrations/0038_backfill_catalog_field_labels.py +2 -2
- migrations/0039_backfill_converation_splits_metadata.py +2 -2
- migrations/0041_reindex_conversations.py +137 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
- migrations/pg/0012_catalog_statistics_undo.py +26 -0
- nucliadb/backups/create.py +2 -15
- nucliadb/backups/restore.py +4 -15
- nucliadb/backups/tasks.py +4 -1
- nucliadb/common/back_pressure/cache.py +2 -3
- nucliadb/common/back_pressure/materializer.py +7 -13
- nucliadb/common/back_pressure/settings.py +6 -6
- nucliadb/common/back_pressure/utils.py +1 -0
- nucliadb/common/cache.py +9 -9
- nucliadb/common/catalog/interface.py +12 -12
- nucliadb/common/catalog/pg.py +41 -29
- nucliadb/common/catalog/utils.py +3 -3
- nucliadb/common/cluster/manager.py +5 -4
- nucliadb/common/cluster/rebalance.py +483 -114
- nucliadb/common/cluster/rollover.py +25 -9
- nucliadb/common/cluster/settings.py +3 -8
- nucliadb/common/cluster/utils.py +34 -8
- nucliadb/common/context/__init__.py +7 -8
- nucliadb/common/context/fastapi.py +1 -2
- nucliadb/common/datamanagers/__init__.py +2 -4
- nucliadb/common/datamanagers/atomic.py +4 -2
- nucliadb/common/datamanagers/cluster.py +1 -2
- nucliadb/common/datamanagers/fields.py +3 -4
- nucliadb/common/datamanagers/kb.py +6 -6
- nucliadb/common/datamanagers/labels.py +2 -3
- nucliadb/common/datamanagers/resources.py +10 -33
- nucliadb/common/datamanagers/rollover.py +5 -7
- nucliadb/common/datamanagers/search_configurations.py +1 -2
- nucliadb/common/datamanagers/synonyms.py +1 -2
- nucliadb/common/datamanagers/utils.py +4 -4
- nucliadb/common/datamanagers/vectorsets.py +4 -4
- nucliadb/common/external_index_providers/base.py +32 -5
- nucliadb/common/external_index_providers/manager.py +4 -5
- nucliadb/common/filter_expression.py +128 -40
- nucliadb/common/http_clients/processing.py +12 -23
- nucliadb/common/ids.py +6 -4
- nucliadb/common/locking.py +1 -2
- nucliadb/common/maindb/driver.py +9 -8
- nucliadb/common/maindb/local.py +5 -5
- nucliadb/common/maindb/pg.py +9 -8
- nucliadb/common/nidx.py +3 -4
- nucliadb/export_import/datamanager.py +4 -3
- nucliadb/export_import/exporter.py +11 -19
- nucliadb/export_import/importer.py +13 -6
- nucliadb/export_import/tasks.py +2 -0
- nucliadb/export_import/utils.py +6 -18
- nucliadb/health.py +2 -2
- nucliadb/ingest/app.py +8 -8
- nucliadb/ingest/consumer/consumer.py +8 -10
- nucliadb/ingest/consumer/pull.py +3 -8
- nucliadb/ingest/consumer/service.py +3 -3
- nucliadb/ingest/consumer/utils.py +1 -1
- nucliadb/ingest/fields/base.py +28 -49
- nucliadb/ingest/fields/conversation.py +12 -12
- nucliadb/ingest/fields/exceptions.py +1 -2
- nucliadb/ingest/fields/file.py +22 -8
- nucliadb/ingest/fields/link.py +7 -7
- nucliadb/ingest/fields/text.py +2 -3
- nucliadb/ingest/orm/brain_v2.py +78 -64
- nucliadb/ingest/orm/broker_message.py +2 -4
- nucliadb/ingest/orm/entities.py +10 -209
- nucliadb/ingest/orm/index_message.py +4 -4
- nucliadb/ingest/orm/knowledgebox.py +18 -27
- nucliadb/ingest/orm/processor/auditing.py +1 -3
- nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
- nucliadb/ingest/orm/processor/processor.py +27 -27
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
- nucliadb/ingest/orm/resource.py +72 -70
- nucliadb/ingest/orm/utils.py +1 -1
- nucliadb/ingest/processing.py +17 -17
- nucliadb/ingest/serialize.py +202 -145
- nucliadb/ingest/service/writer.py +3 -109
- nucliadb/ingest/settings.py +3 -4
- nucliadb/ingest/utils.py +1 -2
- nucliadb/learning_proxy.py +11 -11
- nucliadb/metrics_exporter.py +5 -4
- nucliadb/middleware/__init__.py +82 -1
- nucliadb/migrator/datamanager.py +3 -4
- nucliadb/migrator/migrator.py +1 -2
- nucliadb/migrator/models.py +1 -2
- nucliadb/migrator/settings.py +1 -2
- nucliadb/models/internal/augment.py +614 -0
- nucliadb/models/internal/processing.py +19 -19
- nucliadb/openapi.py +2 -2
- nucliadb/purge/__init__.py +3 -8
- nucliadb/purge/orphan_shards.py +1 -2
- nucliadb/reader/__init__.py +5 -0
- nucliadb/reader/api/models.py +6 -13
- nucliadb/reader/api/v1/download.py +59 -38
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/learning_config.py +24 -4
- nucliadb/reader/api/v1/resource.py +61 -9
- nucliadb/reader/api/v1/services.py +18 -14
- nucliadb/reader/app.py +3 -1
- nucliadb/reader/reader/notifications.py +1 -2
- nucliadb/search/api/v1/__init__.py +2 -0
- nucliadb/search/api/v1/ask.py +3 -4
- nucliadb/search/api/v1/augment.py +585 -0
- nucliadb/search/api/v1/catalog.py +11 -15
- nucliadb/search/api/v1/find.py +16 -22
- nucliadb/search/api/v1/hydrate.py +25 -25
- nucliadb/search/api/v1/knowledgebox.py +1 -2
- nucliadb/search/api/v1/predict_proxy.py +1 -2
- nucliadb/search/api/v1/resource/ask.py +7 -7
- nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
- nucliadb/search/api/v1/resource/search.py +9 -11
- nucliadb/search/api/v1/retrieve.py +130 -0
- nucliadb/search/api/v1/search.py +28 -32
- nucliadb/search/api/v1/suggest.py +11 -14
- nucliadb/search/api/v1/summarize.py +1 -2
- nucliadb/search/api/v1/utils.py +2 -2
- nucliadb/search/app.py +3 -2
- nucliadb/search/augmentor/__init__.py +21 -0
- nucliadb/search/augmentor/augmentor.py +232 -0
- nucliadb/search/augmentor/fields.py +704 -0
- nucliadb/search/augmentor/metrics.py +24 -0
- nucliadb/search/augmentor/paragraphs.py +334 -0
- nucliadb/search/augmentor/resources.py +238 -0
- nucliadb/search/augmentor/utils.py +33 -0
- nucliadb/search/lifecycle.py +3 -1
- nucliadb/search/predict.py +24 -17
- nucliadb/search/predict_models.py +8 -9
- nucliadb/search/requesters/utils.py +11 -10
- nucliadb/search/search/cache.py +19 -23
- nucliadb/search/search/chat/ask.py +88 -59
- nucliadb/search/search/chat/exceptions.py +3 -5
- nucliadb/search/search/chat/fetcher.py +201 -0
- nucliadb/search/search/chat/images.py +6 -4
- nucliadb/search/search/chat/old_prompt.py +1375 -0
- nucliadb/search/search/chat/parser.py +510 -0
- nucliadb/search/search/chat/prompt.py +563 -615
- nucliadb/search/search/chat/query.py +449 -36
- nucliadb/search/search/chat/rpc.py +85 -0
- nucliadb/search/search/fetch.py +3 -4
- nucliadb/search/search/filters.py +8 -11
- nucliadb/search/search/find.py +33 -31
- nucliadb/search/search/find_merge.py +124 -331
- nucliadb/search/search/graph_strategy.py +14 -12
- nucliadb/search/search/hydrator/__init__.py +3 -152
- nucliadb/search/search/hydrator/fields.py +92 -50
- nucliadb/search/search/hydrator/images.py +7 -7
- nucliadb/search/search/hydrator/paragraphs.py +42 -26
- nucliadb/search/search/hydrator/resources.py +20 -16
- nucliadb/search/search/ingestion_agents.py +5 -5
- nucliadb/search/search/merge.py +90 -94
- nucliadb/search/search/metrics.py +10 -9
- nucliadb/search/search/paragraphs.py +7 -9
- nucliadb/search/search/predict_proxy.py +13 -9
- nucliadb/search/search/query.py +14 -86
- nucliadb/search/search/query_parser/fetcher.py +51 -82
- nucliadb/search/search/query_parser/models.py +19 -20
- nucliadb/search/search/query_parser/old_filters.py +20 -19
- nucliadb/search/search/query_parser/parsers/ask.py +4 -5
- nucliadb/search/search/query_parser/parsers/catalog.py +5 -6
- nucliadb/search/search/query_parser/parsers/common.py +5 -6
- nucliadb/search/search/query_parser/parsers/find.py +6 -26
- nucliadb/search/search/query_parser/parsers/graph.py +13 -23
- nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
- nucliadb/search/search/query_parser/parsers/search.py +15 -53
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
- nucliadb/search/search/rank_fusion.py +18 -13
- nucliadb/search/search/rerankers.py +5 -6
- nucliadb/search/search/retrieval.py +300 -0
- nucliadb/search/search/summarize.py +5 -6
- nucliadb/search/search/utils.py +3 -4
- nucliadb/search/settings.py +1 -2
- nucliadb/standalone/api_router.py +1 -1
- nucliadb/standalone/app.py +4 -3
- nucliadb/standalone/auth.py +5 -6
- nucliadb/standalone/lifecycle.py +2 -2
- nucliadb/standalone/run.py +2 -4
- nucliadb/standalone/settings.py +5 -6
- nucliadb/standalone/versions.py +3 -4
- nucliadb/tasks/consumer.py +13 -8
- nucliadb/tasks/models.py +2 -1
- nucliadb/tasks/producer.py +3 -3
- nucliadb/tasks/retries.py +8 -7
- nucliadb/train/api/utils.py +1 -3
- nucliadb/train/api/v1/shards.py +1 -2
- nucliadb/train/api/v1/trainset.py +1 -2
- nucliadb/train/app.py +1 -1
- nucliadb/train/generator.py +4 -4
- nucliadb/train/generators/field_classifier.py +2 -2
- nucliadb/train/generators/field_streaming.py +6 -6
- nucliadb/train/generators/image_classifier.py +2 -2
- nucliadb/train/generators/paragraph_classifier.py +2 -2
- nucliadb/train/generators/paragraph_streaming.py +2 -2
- nucliadb/train/generators/question_answer_streaming.py +2 -2
- nucliadb/train/generators/sentence_classifier.py +2 -2
- nucliadb/train/generators/token_classifier.py +3 -2
- nucliadb/train/generators/utils.py +6 -5
- nucliadb/train/nodes.py +3 -3
- nucliadb/train/resource.py +6 -8
- nucliadb/train/settings.py +3 -4
- nucliadb/train/types.py +11 -11
- nucliadb/train/upload.py +3 -2
- nucliadb/train/uploader.py +1 -2
- nucliadb/train/utils.py +1 -2
- nucliadb/writer/api/v1/export_import.py +4 -1
- nucliadb/writer/api/v1/field.py +7 -11
- nucliadb/writer/api/v1/knowledgebox.py +3 -4
- nucliadb/writer/api/v1/resource.py +9 -20
- nucliadb/writer/api/v1/services.py +10 -132
- nucliadb/writer/api/v1/upload.py +73 -72
- nucliadb/writer/app.py +8 -2
- nucliadb/writer/resource/basic.py +12 -15
- nucliadb/writer/resource/field.py +7 -5
- nucliadb/writer/resource/origin.py +7 -0
- nucliadb/writer/settings.py +2 -3
- nucliadb/writer/tus/__init__.py +2 -3
- nucliadb/writer/tus/azure.py +1 -3
- nucliadb/writer/tus/dm.py +3 -3
- nucliadb/writer/tus/exceptions.py +3 -4
- nucliadb/writer/tus/gcs.py +5 -6
- nucliadb/writer/tus/s3.py +2 -3
- nucliadb/writer/tus/storage.py +3 -3
- {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +9 -10
- nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
- nucliadb/common/datamanagers/entities.py +0 -139
- nucliadb-6.9.1.post5192.dist-info/RECORD +0 -392
- {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
- {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
nucliadb/common/cache.py
CHANGED
|
@@ -24,7 +24,7 @@ from abc import ABC, abstractmethod
|
|
|
24
24
|
from contextvars import ContextVar
|
|
25
25
|
from dataclasses import dataclass
|
|
26
26
|
from functools import cached_property
|
|
27
|
-
from typing import Generic,
|
|
27
|
+
from typing import Generic, TypeVar
|
|
28
28
|
|
|
29
29
|
import backoff
|
|
30
30
|
from async_lru import _LRUCacheWrapper, alru_cache
|
|
@@ -66,9 +66,9 @@ class Cache(Generic[K, T], ABC):
|
|
|
66
66
|
|
|
67
67
|
"""
|
|
68
68
|
|
|
69
|
-
cache: _LRUCacheWrapper[
|
|
69
|
+
cache: _LRUCacheWrapper[T | None]
|
|
70
70
|
|
|
71
|
-
async def get(self, *args: K.args, **kwargs: K.kwargs) ->
|
|
71
|
+
async def get(self, *args: K.args, **kwargs: K.kwargs) -> T | None:
|
|
72
72
|
result = await self.cache(*args)
|
|
73
73
|
# Do not cache None
|
|
74
74
|
if result is None:
|
|
@@ -88,7 +88,7 @@ class Cache(Generic[K, T], ABC):
|
|
|
88
88
|
class ResourceCache(Cache[[str, str], ResourceORM]):
|
|
89
89
|
def __init__(self, cache_size: int) -> None:
|
|
90
90
|
@alru_cache(maxsize=cache_size)
|
|
91
|
-
async def _get_resource(kbid: str, rid: str) ->
|
|
91
|
+
async def _get_resource(kbid: str, rid: str) -> ResourceORM | None:
|
|
92
92
|
storage = await get_storage()
|
|
93
93
|
async with get_driver().ro_transaction() as txn:
|
|
94
94
|
kb = KnowledgeBoxORM(txn, storage, kbid)
|
|
@@ -115,7 +115,7 @@ class ExtractedTextCache(Cache[[str, FieldId], ExtractedText]):
|
|
|
115
115
|
def __init__(self, cache_size: int) -> None:
|
|
116
116
|
@alru_cache(maxsize=cache_size)
|
|
117
117
|
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
|
|
118
|
-
async def _get_extracted_text(kbid: str, field_id: FieldId) ->
|
|
118
|
+
async def _get_extracted_text(kbid: str, field_id: FieldId) -> ExtractedText | None:
|
|
119
119
|
storage = await get_storage()
|
|
120
120
|
try:
|
|
121
121
|
sf = storage.file_extracted(
|
|
@@ -144,18 +144,18 @@ class ExtractedTextCache(Cache[[str, FieldId], ExtractedText]):
|
|
|
144
144
|
|
|
145
145
|
# Global caches (per asyncio task)
|
|
146
146
|
|
|
147
|
-
rcache: ContextVar[
|
|
148
|
-
etcache: ContextVar[
|
|
147
|
+
rcache: ContextVar[ResourceCache | None] = ContextVar("rcache", default=None)
|
|
148
|
+
etcache: ContextVar[ExtractedTextCache | None] = ContextVar("etcache", default=None)
|
|
149
149
|
|
|
150
150
|
|
|
151
151
|
# Cache management
|
|
152
152
|
|
|
153
153
|
|
|
154
|
-
def get_resource_cache() ->
|
|
154
|
+
def get_resource_cache() -> ResourceCache | None:
|
|
155
155
|
return rcache.get()
|
|
156
156
|
|
|
157
157
|
|
|
158
|
-
def get_extracted_text_cache() ->
|
|
158
|
+
def get_extracted_text_cache() -> ExtractedTextCache | None:
|
|
159
159
|
return etcache.get()
|
|
160
160
|
|
|
161
161
|
|
|
@@ -22,7 +22,7 @@ from __future__ import annotations
|
|
|
22
22
|
import abc
|
|
23
23
|
import datetime
|
|
24
24
|
from dataclasses import dataclass
|
|
25
|
-
from typing import Literal
|
|
25
|
+
from typing import Literal
|
|
26
26
|
|
|
27
27
|
from pydantic import BaseModel, Field
|
|
28
28
|
|
|
@@ -49,22 +49,22 @@ class CatalogResourceData(BaseModel):
|
|
|
49
49
|
class CatalogExpression:
|
|
50
50
|
@dataclass
|
|
51
51
|
class Date:
|
|
52
|
-
field:
|
|
53
|
-
since:
|
|
54
|
-
until:
|
|
52
|
+
field: Literal["created_at"] | Literal["modified_at"]
|
|
53
|
+
since: datetime.datetime | None
|
|
54
|
+
until: datetime.datetime | None
|
|
55
55
|
|
|
56
|
-
bool_and:
|
|
57
|
-
bool_or:
|
|
58
|
-
bool_not:
|
|
59
|
-
date:
|
|
60
|
-
facet:
|
|
61
|
-
resource_id:
|
|
56
|
+
bool_and: list[CatalogExpression] | None = None
|
|
57
|
+
bool_or: list[CatalogExpression] | None = None
|
|
58
|
+
bool_not: CatalogExpression | None = None
|
|
59
|
+
date: Date | None = None
|
|
60
|
+
facet: str | None = None
|
|
61
|
+
resource_id: str | None = None
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
class CatalogQuery(BaseModel):
|
|
65
65
|
kbid: str
|
|
66
|
-
query:
|
|
67
|
-
filters:
|
|
66
|
+
query: search_models.CatalogQuery | None = Field(description="Full-text search query")
|
|
67
|
+
filters: CatalogExpression | None = Field(description="Filters to apply to the search")
|
|
68
68
|
sort: search_models.SortOptions = Field(description="Sorting option")
|
|
69
69
|
faceted: list[str] = Field(description="List of facets to compute during the search")
|
|
70
70
|
page_size: int = Field(description="Used for pagination. Maximum page size is 100")
|
nucliadb/common/catalog/pg.py
CHANGED
|
@@ -21,10 +21,11 @@
|
|
|
21
21
|
import logging
|
|
22
22
|
import re
|
|
23
23
|
from collections import defaultdict
|
|
24
|
-
from typing import Any, Literal,
|
|
24
|
+
from typing import Any, Literal, cast
|
|
25
25
|
|
|
26
26
|
from psycopg import AsyncCursor, sql
|
|
27
27
|
from psycopg.rows import DictRow, dict_row
|
|
28
|
+
from typing_extensions import assert_never
|
|
28
29
|
|
|
29
30
|
from nucliadb.common.catalog.interface import (
|
|
30
31
|
Catalog,
|
|
@@ -267,32 +268,46 @@ async def _faceted_search_unfiltered(
|
|
|
267
268
|
):
|
|
268
269
|
facet_params: dict[str, Any] = {}
|
|
269
270
|
facet_sql: sql.Composable
|
|
270
|
-
if
|
|
271
|
-
#
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
271
|
+
if list(tmp_facets.keys()) == ["/n/s"]:
|
|
272
|
+
# Special case when querying only for status. We know the list of possible facets and optimize
|
|
273
|
+
# by asking for each facet separately which makes better use of the index
|
|
274
|
+
sqls = []
|
|
275
|
+
for status in ["PENDING", "PROCESSED", "ERROR", "EMPTY"]:
|
|
276
|
+
sqls.append(
|
|
277
|
+
sql.SQL(
|
|
278
|
+
"SELECT facet, COUNT(*) FROM catalog_facets WHERE kbid = %(kbid)s AND facet = '/n/s/{}' GROUP BY facet".format(
|
|
279
|
+
status
|
|
280
|
+
)
|
|
277
281
|
)
|
|
278
282
|
)
|
|
279
|
-
|
|
280
|
-
facet_params[f"facet_len_{cnt}"] = -(len(prefix) + 1)
|
|
281
|
-
facet_sql = sql.SQL("AND {}").format(sql.SQL(" OR ").join(prefixes_sql))
|
|
282
|
-
elif all((facet.startswith("/l") or facet.startswith("/n/i") for facet in tmp_facets.keys())):
|
|
283
|
-
# Special case for the catalog query, which can have many facets asked for
|
|
284
|
-
# Filter for the categories (icon and labels) in the query, filter the rest in the code below
|
|
285
|
-
facet_sql = sql.SQL("AND (facet LIKE '/l/%%' OR facet like '/n/i/%%')")
|
|
283
|
+
await cur.execute(sql.SQL(" UNION ").join(sqls), {"kbid": catalog_query.kbid})
|
|
286
284
|
else:
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
285
|
+
if len(tmp_facets) <= 5:
|
|
286
|
+
# Asking for few facets, strictly filter to what we need in the query
|
|
287
|
+
prefixes_sql = []
|
|
288
|
+
for cnt, prefix in enumerate(tmp_facets.keys()):
|
|
289
|
+
prefixes_sql.append(
|
|
290
|
+
sql.SQL("(facet LIKE {} AND POSITION('/' IN RIGHT(facet, {})) = 0)").format(
|
|
291
|
+
sql.Placeholder(f"facet_{cnt}"), sql.Placeholder(f"facet_len_{cnt}")
|
|
292
|
+
)
|
|
293
|
+
)
|
|
294
|
+
facet_params[f"facet_{cnt}"] = f"{prefix}/%"
|
|
295
|
+
facet_params[f"facet_len_{cnt}"] = -(len(prefix) + 1)
|
|
296
|
+
facet_sql = sql.SQL("AND {}").format(sql.SQL(" OR ").join(prefixes_sql))
|
|
297
|
+
elif all(facet.startswith("/l") or facet.startswith("/n/i") for facet in tmp_facets.keys()):
|
|
298
|
+
# Special case for the catalog query, which can have many facets asked for
|
|
299
|
+
# Filter for the categories (icon and labels) in the query, filter the rest in the code below
|
|
300
|
+
facet_sql = sql.SQL("AND (facet LIKE '/l/%%' OR facet like '/n/i/%%')")
|
|
301
|
+
else:
|
|
302
|
+
# Worst case: ask for all facets and filter here. This is faster than applying lots of filters
|
|
303
|
+
facet_sql = sql.SQL("")
|
|
304
|
+
|
|
305
|
+
await cur.execute(
|
|
306
|
+
sql.SQL(
|
|
307
|
+
"SELECT facet, COUNT(*) FROM catalog_facets WHERE kbid = %(kbid)s {} GROUP BY facet"
|
|
308
|
+
).format(facet_sql),
|
|
309
|
+
{"kbid": catalog_query.kbid, **facet_params},
|
|
310
|
+
)
|
|
296
311
|
|
|
297
312
|
# Only keep the facets we asked for
|
|
298
313
|
for row in await cur.fetchall():
|
|
@@ -374,10 +389,7 @@ def _prepare_query_search(query: search_models.CatalogQuery, params: dict[str, A
|
|
|
374
389
|
params["query"] = "%" + query.query + "%"
|
|
375
390
|
return sql.SQL("title ILIKE %(query)s")
|
|
376
391
|
else: # pragma: no cover
|
|
377
|
-
|
|
378
|
-
# that is, if we are missing some ifs
|
|
379
|
-
_a: int = "a"
|
|
380
|
-
return sql.SQL("")
|
|
392
|
+
assert_never(query.match)
|
|
381
393
|
|
|
382
394
|
|
|
383
395
|
def _convert_filter(expr: CatalogExpression, filter_params: dict[str, Any]) -> sql.Composable:
|
|
@@ -409,7 +421,7 @@ def _convert_filter(expr: CatalogExpression, filter_params: dict[str, Any]) -> s
|
|
|
409
421
|
|
|
410
422
|
def _convert_boolean_op(
|
|
411
423
|
operands: list[CatalogExpression],
|
|
412
|
-
op:
|
|
424
|
+
op: Literal["and"] | Literal["or"],
|
|
413
425
|
filter_params: dict[str, Any],
|
|
414
426
|
) -> sql.Composable:
|
|
415
427
|
array_op = sql.SQL("@>" if op == "and" else "&&")
|
nucliadb/common/catalog/utils.py
CHANGED
|
@@ -40,17 +40,17 @@ def build_catalog_resource_data(resource: Resource, index_message: IndexMessage)
|
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
# Labels from the resource and classification labels from each field
|
|
43
|
-
labels =
|
|
43
|
+
labels = {label for label in index_message.labels}
|
|
44
44
|
for classification in resource.basic.computedmetadata.field_classifications:
|
|
45
45
|
for clf in classification.classifications:
|
|
46
46
|
label = f"/l/{clf.labelset}/{clf.label}"
|
|
47
47
|
if label not in cancelled_labels:
|
|
48
|
-
labels.
|
|
48
|
+
labels.add(label)
|
|
49
49
|
|
|
50
50
|
return CatalogResourceData(
|
|
51
51
|
title=resource.basic.title,
|
|
52
52
|
created_at=created_at,
|
|
53
53
|
modified_at=modified_at,
|
|
54
|
-
labels=labels,
|
|
54
|
+
labels=list(labels),
|
|
55
55
|
slug=resource.basic.slug,
|
|
56
56
|
)
|
|
@@ -20,7 +20,8 @@
|
|
|
20
20
|
import asyncio
|
|
21
21
|
import logging
|
|
22
22
|
import uuid
|
|
23
|
-
from
|
|
23
|
+
from collections.abc import Awaitable, Callable
|
|
24
|
+
from typing import Any
|
|
24
25
|
|
|
25
26
|
from nidx_protos import noderesources_pb2, nodewriter_pb2
|
|
26
27
|
from nidx_protos.nodewriter_pb2 import (
|
|
@@ -96,7 +97,7 @@ class KBShardManager:
|
|
|
96
97
|
# TODO: move to data manager
|
|
97
98
|
async def get_current_active_shard(
|
|
98
99
|
self, txn: Transaction, kbid: str
|
|
99
|
-
) ->
|
|
100
|
+
) -> writer_pb2.ShardObject | None:
|
|
100
101
|
kb_shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=False)
|
|
101
102
|
if kb_shards is None:
|
|
102
103
|
return None
|
|
@@ -196,7 +197,7 @@ class KBShardManager:
|
|
|
196
197
|
txid: int,
|
|
197
198
|
partition: str,
|
|
198
199
|
kb: str,
|
|
199
|
-
reindex_id:
|
|
200
|
+
reindex_id: str | None = None,
|
|
200
201
|
source: IndexMessageSource.ValueType = IndexMessageSource.PROCESSOR,
|
|
201
202
|
) -> None:
|
|
202
203
|
"""
|
|
@@ -306,7 +307,7 @@ class StandaloneKBShardManager(KBShardManager):
|
|
|
306
307
|
txid: int,
|
|
307
308
|
partition: str,
|
|
308
309
|
kb: str,
|
|
309
|
-
reindex_id:
|
|
310
|
+
reindex_id: str | None = None,
|
|
310
311
|
source: IndexMessageSource.ValueType = IndexMessageSource.PROCESSOR,
|
|
311
312
|
) -> None:
|
|
312
313
|
"""
|