nucliadb 6.9.1.post5192__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. migrations/0023_backfill_pg_catalog.py +2 -2
  2. migrations/0029_backfill_field_status.py +3 -4
  3. migrations/0032_remove_old_relations.py +2 -3
  4. migrations/0038_backfill_catalog_field_labels.py +2 -2
  5. migrations/0039_backfill_converation_splits_metadata.py +2 -2
  6. migrations/0041_reindex_conversations.py +137 -0
  7. migrations/pg/0010_shards_index.py +34 -0
  8. nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
  9. migrations/pg/0012_catalog_statistics_undo.py +26 -0
  10. nucliadb/backups/create.py +2 -15
  11. nucliadb/backups/restore.py +4 -15
  12. nucliadb/backups/tasks.py +4 -1
  13. nucliadb/common/back_pressure/cache.py +2 -3
  14. nucliadb/common/back_pressure/materializer.py +7 -13
  15. nucliadb/common/back_pressure/settings.py +6 -6
  16. nucliadb/common/back_pressure/utils.py +1 -0
  17. nucliadb/common/cache.py +9 -9
  18. nucliadb/common/catalog/interface.py +12 -12
  19. nucliadb/common/catalog/pg.py +41 -29
  20. nucliadb/common/catalog/utils.py +3 -3
  21. nucliadb/common/cluster/manager.py +5 -4
  22. nucliadb/common/cluster/rebalance.py +483 -114
  23. nucliadb/common/cluster/rollover.py +25 -9
  24. nucliadb/common/cluster/settings.py +3 -8
  25. nucliadb/common/cluster/utils.py +34 -8
  26. nucliadb/common/context/__init__.py +7 -8
  27. nucliadb/common/context/fastapi.py +1 -2
  28. nucliadb/common/datamanagers/__init__.py +2 -4
  29. nucliadb/common/datamanagers/atomic.py +4 -2
  30. nucliadb/common/datamanagers/cluster.py +1 -2
  31. nucliadb/common/datamanagers/fields.py +3 -4
  32. nucliadb/common/datamanagers/kb.py +6 -6
  33. nucliadb/common/datamanagers/labels.py +2 -3
  34. nucliadb/common/datamanagers/resources.py +10 -33
  35. nucliadb/common/datamanagers/rollover.py +5 -7
  36. nucliadb/common/datamanagers/search_configurations.py +1 -2
  37. nucliadb/common/datamanagers/synonyms.py +1 -2
  38. nucliadb/common/datamanagers/utils.py +4 -4
  39. nucliadb/common/datamanagers/vectorsets.py +4 -4
  40. nucliadb/common/external_index_providers/base.py +32 -5
  41. nucliadb/common/external_index_providers/manager.py +4 -5
  42. nucliadb/common/filter_expression.py +128 -40
  43. nucliadb/common/http_clients/processing.py +12 -23
  44. nucliadb/common/ids.py +6 -4
  45. nucliadb/common/locking.py +1 -2
  46. nucliadb/common/maindb/driver.py +9 -8
  47. nucliadb/common/maindb/local.py +5 -5
  48. nucliadb/common/maindb/pg.py +9 -8
  49. nucliadb/common/nidx.py +3 -4
  50. nucliadb/export_import/datamanager.py +4 -3
  51. nucliadb/export_import/exporter.py +11 -19
  52. nucliadb/export_import/importer.py +13 -6
  53. nucliadb/export_import/tasks.py +2 -0
  54. nucliadb/export_import/utils.py +6 -18
  55. nucliadb/health.py +2 -2
  56. nucliadb/ingest/app.py +8 -8
  57. nucliadb/ingest/consumer/consumer.py +8 -10
  58. nucliadb/ingest/consumer/pull.py +3 -8
  59. nucliadb/ingest/consumer/service.py +3 -3
  60. nucliadb/ingest/consumer/utils.py +1 -1
  61. nucliadb/ingest/fields/base.py +28 -49
  62. nucliadb/ingest/fields/conversation.py +12 -12
  63. nucliadb/ingest/fields/exceptions.py +1 -2
  64. nucliadb/ingest/fields/file.py +22 -8
  65. nucliadb/ingest/fields/link.py +7 -7
  66. nucliadb/ingest/fields/text.py +2 -3
  67. nucliadb/ingest/orm/brain_v2.py +78 -64
  68. nucliadb/ingest/orm/broker_message.py +2 -4
  69. nucliadb/ingest/orm/entities.py +10 -209
  70. nucliadb/ingest/orm/index_message.py +4 -4
  71. nucliadb/ingest/orm/knowledgebox.py +18 -27
  72. nucliadb/ingest/orm/processor/auditing.py +1 -3
  73. nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
  74. nucliadb/ingest/orm/processor/processor.py +27 -27
  75. nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
  76. nucliadb/ingest/orm/resource.py +72 -70
  77. nucliadb/ingest/orm/utils.py +1 -1
  78. nucliadb/ingest/processing.py +17 -17
  79. nucliadb/ingest/serialize.py +202 -145
  80. nucliadb/ingest/service/writer.py +3 -109
  81. nucliadb/ingest/settings.py +3 -4
  82. nucliadb/ingest/utils.py +1 -2
  83. nucliadb/learning_proxy.py +11 -11
  84. nucliadb/metrics_exporter.py +5 -4
  85. nucliadb/middleware/__init__.py +82 -1
  86. nucliadb/migrator/datamanager.py +3 -4
  87. nucliadb/migrator/migrator.py +1 -2
  88. nucliadb/migrator/models.py +1 -2
  89. nucliadb/migrator/settings.py +1 -2
  90. nucliadb/models/internal/augment.py +614 -0
  91. nucliadb/models/internal/processing.py +19 -19
  92. nucliadb/openapi.py +2 -2
  93. nucliadb/purge/__init__.py +3 -8
  94. nucliadb/purge/orphan_shards.py +1 -2
  95. nucliadb/reader/__init__.py +5 -0
  96. nucliadb/reader/api/models.py +6 -13
  97. nucliadb/reader/api/v1/download.py +59 -38
  98. nucliadb/reader/api/v1/export_import.py +4 -4
  99. nucliadb/reader/api/v1/learning_config.py +24 -4
  100. nucliadb/reader/api/v1/resource.py +61 -9
  101. nucliadb/reader/api/v1/services.py +18 -14
  102. nucliadb/reader/app.py +3 -1
  103. nucliadb/reader/reader/notifications.py +1 -2
  104. nucliadb/search/api/v1/__init__.py +2 -0
  105. nucliadb/search/api/v1/ask.py +3 -4
  106. nucliadb/search/api/v1/augment.py +585 -0
  107. nucliadb/search/api/v1/catalog.py +11 -15
  108. nucliadb/search/api/v1/find.py +16 -22
  109. nucliadb/search/api/v1/hydrate.py +25 -25
  110. nucliadb/search/api/v1/knowledgebox.py +1 -2
  111. nucliadb/search/api/v1/predict_proxy.py +1 -2
  112. nucliadb/search/api/v1/resource/ask.py +7 -7
  113. nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
  114. nucliadb/search/api/v1/resource/search.py +9 -11
  115. nucliadb/search/api/v1/retrieve.py +130 -0
  116. nucliadb/search/api/v1/search.py +28 -32
  117. nucliadb/search/api/v1/suggest.py +11 -14
  118. nucliadb/search/api/v1/summarize.py +1 -2
  119. nucliadb/search/api/v1/utils.py +2 -2
  120. nucliadb/search/app.py +3 -2
  121. nucliadb/search/augmentor/__init__.py +21 -0
  122. nucliadb/search/augmentor/augmentor.py +232 -0
  123. nucliadb/search/augmentor/fields.py +704 -0
  124. nucliadb/search/augmentor/metrics.py +24 -0
  125. nucliadb/search/augmentor/paragraphs.py +334 -0
  126. nucliadb/search/augmentor/resources.py +238 -0
  127. nucliadb/search/augmentor/utils.py +33 -0
  128. nucliadb/search/lifecycle.py +3 -1
  129. nucliadb/search/predict.py +24 -17
  130. nucliadb/search/predict_models.py +8 -9
  131. nucliadb/search/requesters/utils.py +11 -10
  132. nucliadb/search/search/cache.py +19 -23
  133. nucliadb/search/search/chat/ask.py +88 -59
  134. nucliadb/search/search/chat/exceptions.py +3 -5
  135. nucliadb/search/search/chat/fetcher.py +201 -0
  136. nucliadb/search/search/chat/images.py +6 -4
  137. nucliadb/search/search/chat/old_prompt.py +1375 -0
  138. nucliadb/search/search/chat/parser.py +510 -0
  139. nucliadb/search/search/chat/prompt.py +563 -615
  140. nucliadb/search/search/chat/query.py +449 -36
  141. nucliadb/search/search/chat/rpc.py +85 -0
  142. nucliadb/search/search/fetch.py +3 -4
  143. nucliadb/search/search/filters.py +8 -11
  144. nucliadb/search/search/find.py +33 -31
  145. nucliadb/search/search/find_merge.py +124 -331
  146. nucliadb/search/search/graph_strategy.py +14 -12
  147. nucliadb/search/search/hydrator/__init__.py +3 -152
  148. nucliadb/search/search/hydrator/fields.py +92 -50
  149. nucliadb/search/search/hydrator/images.py +7 -7
  150. nucliadb/search/search/hydrator/paragraphs.py +42 -26
  151. nucliadb/search/search/hydrator/resources.py +20 -16
  152. nucliadb/search/search/ingestion_agents.py +5 -5
  153. nucliadb/search/search/merge.py +90 -94
  154. nucliadb/search/search/metrics.py +10 -9
  155. nucliadb/search/search/paragraphs.py +7 -9
  156. nucliadb/search/search/predict_proxy.py +13 -9
  157. nucliadb/search/search/query.py +14 -86
  158. nucliadb/search/search/query_parser/fetcher.py +51 -82
  159. nucliadb/search/search/query_parser/models.py +19 -20
  160. nucliadb/search/search/query_parser/old_filters.py +20 -19
  161. nucliadb/search/search/query_parser/parsers/ask.py +4 -5
  162. nucliadb/search/search/query_parser/parsers/catalog.py +5 -6
  163. nucliadb/search/search/query_parser/parsers/common.py +5 -6
  164. nucliadb/search/search/query_parser/parsers/find.py +6 -26
  165. nucliadb/search/search/query_parser/parsers/graph.py +13 -23
  166. nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
  167. nucliadb/search/search/query_parser/parsers/search.py +15 -53
  168. nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
  169. nucliadb/search/search/rank_fusion.py +18 -13
  170. nucliadb/search/search/rerankers.py +5 -6
  171. nucliadb/search/search/retrieval.py +300 -0
  172. nucliadb/search/search/summarize.py +5 -6
  173. nucliadb/search/search/utils.py +3 -4
  174. nucliadb/search/settings.py +1 -2
  175. nucliadb/standalone/api_router.py +1 -1
  176. nucliadb/standalone/app.py +4 -3
  177. nucliadb/standalone/auth.py +5 -6
  178. nucliadb/standalone/lifecycle.py +2 -2
  179. nucliadb/standalone/run.py +2 -4
  180. nucliadb/standalone/settings.py +5 -6
  181. nucliadb/standalone/versions.py +3 -4
  182. nucliadb/tasks/consumer.py +13 -8
  183. nucliadb/tasks/models.py +2 -1
  184. nucliadb/tasks/producer.py +3 -3
  185. nucliadb/tasks/retries.py +8 -7
  186. nucliadb/train/api/utils.py +1 -3
  187. nucliadb/train/api/v1/shards.py +1 -2
  188. nucliadb/train/api/v1/trainset.py +1 -2
  189. nucliadb/train/app.py +1 -1
  190. nucliadb/train/generator.py +4 -4
  191. nucliadb/train/generators/field_classifier.py +2 -2
  192. nucliadb/train/generators/field_streaming.py +6 -6
  193. nucliadb/train/generators/image_classifier.py +2 -2
  194. nucliadb/train/generators/paragraph_classifier.py +2 -2
  195. nucliadb/train/generators/paragraph_streaming.py +2 -2
  196. nucliadb/train/generators/question_answer_streaming.py +2 -2
  197. nucliadb/train/generators/sentence_classifier.py +2 -2
  198. nucliadb/train/generators/token_classifier.py +3 -2
  199. nucliadb/train/generators/utils.py +6 -5
  200. nucliadb/train/nodes.py +3 -3
  201. nucliadb/train/resource.py +6 -8
  202. nucliadb/train/settings.py +3 -4
  203. nucliadb/train/types.py +11 -11
  204. nucliadb/train/upload.py +3 -2
  205. nucliadb/train/uploader.py +1 -2
  206. nucliadb/train/utils.py +1 -2
  207. nucliadb/writer/api/v1/export_import.py +4 -1
  208. nucliadb/writer/api/v1/field.py +7 -11
  209. nucliadb/writer/api/v1/knowledgebox.py +3 -4
  210. nucliadb/writer/api/v1/resource.py +9 -20
  211. nucliadb/writer/api/v1/services.py +10 -132
  212. nucliadb/writer/api/v1/upload.py +73 -72
  213. nucliadb/writer/app.py +8 -2
  214. nucliadb/writer/resource/basic.py +12 -15
  215. nucliadb/writer/resource/field.py +7 -5
  216. nucliadb/writer/resource/origin.py +7 -0
  217. nucliadb/writer/settings.py +2 -3
  218. nucliadb/writer/tus/__init__.py +2 -3
  219. nucliadb/writer/tus/azure.py +1 -3
  220. nucliadb/writer/tus/dm.py +3 -3
  221. nucliadb/writer/tus/exceptions.py +3 -4
  222. nucliadb/writer/tus/gcs.py +5 -6
  223. nucliadb/writer/tus/s3.py +2 -3
  224. nucliadb/writer/tus/storage.py +3 -3
  225. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +9 -10
  226. nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
  227. nucliadb/common/datamanagers/entities.py +0 -139
  228. nucliadb-6.9.1.post5192.dist-info/RECORD +0 -392
  229. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
  230. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
  231. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
nucliadb/common/cache.py CHANGED
@@ -24,7 +24,7 @@ from abc import ABC, abstractmethod
24
24
  from contextvars import ContextVar
25
25
  from dataclasses import dataclass
26
26
  from functools import cached_property
27
- from typing import Generic, Optional, TypeVar
27
+ from typing import Generic, TypeVar
28
28
 
29
29
  import backoff
30
30
  from async_lru import _LRUCacheWrapper, alru_cache
@@ -66,9 +66,9 @@ class Cache(Generic[K, T], ABC):
66
66
 
67
67
  """
68
68
 
69
- cache: _LRUCacheWrapper[Optional[T]]
69
+ cache: _LRUCacheWrapper[T | None]
70
70
 
71
- async def get(self, *args: K.args, **kwargs: K.kwargs) -> Optional[T]:
71
+ async def get(self, *args: K.args, **kwargs: K.kwargs) -> T | None:
72
72
  result = await self.cache(*args)
73
73
  # Do not cache None
74
74
  if result is None:
@@ -88,7 +88,7 @@ class Cache(Generic[K, T], ABC):
88
88
  class ResourceCache(Cache[[str, str], ResourceORM]):
89
89
  def __init__(self, cache_size: int) -> None:
90
90
  @alru_cache(maxsize=cache_size)
91
- async def _get_resource(kbid: str, rid: str) -> Optional[ResourceORM]:
91
+ async def _get_resource(kbid: str, rid: str) -> ResourceORM | None:
92
92
  storage = await get_storage()
93
93
  async with get_driver().ro_transaction() as txn:
94
94
  kb = KnowledgeBoxORM(txn, storage, kbid)
@@ -115,7 +115,7 @@ class ExtractedTextCache(Cache[[str, FieldId], ExtractedText]):
115
115
  def __init__(self, cache_size: int) -> None:
116
116
  @alru_cache(maxsize=cache_size)
117
117
  @backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
118
- async def _get_extracted_text(kbid: str, field_id: FieldId) -> Optional[ExtractedText]:
118
+ async def _get_extracted_text(kbid: str, field_id: FieldId) -> ExtractedText | None:
119
119
  storage = await get_storage()
120
120
  try:
121
121
  sf = storage.file_extracted(
@@ -144,18 +144,18 @@ class ExtractedTextCache(Cache[[str, FieldId], ExtractedText]):
144
144
 
145
145
  # Global caches (per asyncio task)
146
146
 
147
- rcache: ContextVar[Optional[ResourceCache]] = ContextVar("rcache", default=None)
148
- etcache: ContextVar[Optional[ExtractedTextCache]] = ContextVar("etcache", default=None)
147
+ rcache: ContextVar[ResourceCache | None] = ContextVar("rcache", default=None)
148
+ etcache: ContextVar[ExtractedTextCache | None] = ContextVar("etcache", default=None)
149
149
 
150
150
 
151
151
  # Cache management
152
152
 
153
153
 
154
- def get_resource_cache() -> Optional[ResourceCache]:
154
+ def get_resource_cache() -> ResourceCache | None:
155
155
  return rcache.get()
156
156
 
157
157
 
158
- def get_extracted_text_cache() -> Optional[ExtractedTextCache]:
158
+ def get_extracted_text_cache() -> ExtractedTextCache | None:
159
159
  return etcache.get()
160
160
 
161
161
 
@@ -22,7 +22,7 @@ from __future__ import annotations
22
22
  import abc
23
23
  import datetime
24
24
  from dataclasses import dataclass
25
- from typing import Literal, Optional, Union
25
+ from typing import Literal
26
26
 
27
27
  from pydantic import BaseModel, Field
28
28
 
@@ -49,22 +49,22 @@ class CatalogResourceData(BaseModel):
49
49
  class CatalogExpression:
50
50
  @dataclass
51
51
  class Date:
52
- field: Union[Literal["created_at"], Literal["modified_at"]]
53
- since: Optional[datetime.datetime]
54
- until: Optional[datetime.datetime]
52
+ field: Literal["created_at"] | Literal["modified_at"]
53
+ since: datetime.datetime | None
54
+ until: datetime.datetime | None
55
55
 
56
- bool_and: Optional[list["CatalogExpression"]] = None
57
- bool_or: Optional[list["CatalogExpression"]] = None
58
- bool_not: Optional["CatalogExpression"] = None
59
- date: Optional[Date] = None
60
- facet: Optional[str] = None
61
- resource_id: Optional[str] = None
56
+ bool_and: list[CatalogExpression] | None = None
57
+ bool_or: list[CatalogExpression] | None = None
58
+ bool_not: CatalogExpression | None = None
59
+ date: Date | None = None
60
+ facet: str | None = None
61
+ resource_id: str | None = None
62
62
 
63
63
 
64
64
  class CatalogQuery(BaseModel):
65
65
  kbid: str
66
- query: Optional[search_models.CatalogQuery] = Field(description="Full-text search query")
67
- filters: Optional[CatalogExpression] = Field(description="Filters to apply to the search")
66
+ query: search_models.CatalogQuery | None = Field(description="Full-text search query")
67
+ filters: CatalogExpression | None = Field(description="Filters to apply to the search")
68
68
  sort: search_models.SortOptions = Field(description="Sorting option")
69
69
  faceted: list[str] = Field(description="List of facets to compute during the search")
70
70
  page_size: int = Field(description="Used for pagination. Maximum page size is 100")
@@ -21,10 +21,11 @@
21
21
  import logging
22
22
  import re
23
23
  from collections import defaultdict
24
- from typing import Any, Literal, Union, cast
24
+ from typing import Any, Literal, cast
25
25
 
26
26
  from psycopg import AsyncCursor, sql
27
27
  from psycopg.rows import DictRow, dict_row
28
+ from typing_extensions import assert_never
28
29
 
29
30
  from nucliadb.common.catalog.interface import (
30
31
  Catalog,
@@ -267,32 +268,46 @@ async def _faceted_search_unfiltered(
267
268
  ):
268
269
  facet_params: dict[str, Any] = {}
269
270
  facet_sql: sql.Composable
270
- if len(tmp_facets) <= 5:
271
- # Asking for few facets, strictly filter to what we need in the query
272
- prefixes_sql = []
273
- for cnt, prefix in enumerate(tmp_facets.keys()):
274
- prefixes_sql.append(
275
- sql.SQL("(facet LIKE {} AND POSITION('/' IN RIGHT(facet, {})) = 0)").format(
276
- sql.Placeholder(f"facet_{cnt}"), sql.Placeholder(f"facet_len_{cnt}")
271
+ if list(tmp_facets.keys()) == ["/n/s"]:
272
+ # Special case when querying only for status. We know the list of possible facets and optimize
273
+ # by asking for each facet separately which makes better use of the index
274
+ sqls = []
275
+ for status in ["PENDING", "PROCESSED", "ERROR", "EMPTY"]:
276
+ sqls.append(
277
+ sql.SQL(
278
+ "SELECT facet, COUNT(*) FROM catalog_facets WHERE kbid = %(kbid)s AND facet = '/n/s/{}' GROUP BY facet".format(
279
+ status
280
+ )
277
281
  )
278
282
  )
279
- facet_params[f"facet_{cnt}"] = f"{prefix}/%"
280
- facet_params[f"facet_len_{cnt}"] = -(len(prefix) + 1)
281
- facet_sql = sql.SQL("AND {}").format(sql.SQL(" OR ").join(prefixes_sql))
282
- elif all((facet.startswith("/l") or facet.startswith("/n/i") for facet in tmp_facets.keys())):
283
- # Special case for the catalog query, which can have many facets asked for
284
- # Filter for the categories (icon and labels) in the query, filter the rest in the code below
285
- facet_sql = sql.SQL("AND (facet LIKE '/l/%%' OR facet like '/n/i/%%')")
283
+ await cur.execute(sql.SQL(" UNION ").join(sqls), {"kbid": catalog_query.kbid})
286
284
  else:
287
- # Worst case: ask for all facets and filter here. This is faster than applying lots of filters
288
- facet_sql = sql.SQL("")
289
-
290
- await cur.execute(
291
- sql.SQL(
292
- "SELECT facet, COUNT(*) FROM catalog_facets WHERE kbid = %(kbid)s {} GROUP BY facet"
293
- ).format(facet_sql),
294
- {"kbid": catalog_query.kbid, **facet_params},
295
- )
285
+ if len(tmp_facets) <= 5:
286
+ # Asking for few facets, strictly filter to what we need in the query
287
+ prefixes_sql = []
288
+ for cnt, prefix in enumerate(tmp_facets.keys()):
289
+ prefixes_sql.append(
290
+ sql.SQL("(facet LIKE {} AND POSITION('/' IN RIGHT(facet, {})) = 0)").format(
291
+ sql.Placeholder(f"facet_{cnt}"), sql.Placeholder(f"facet_len_{cnt}")
292
+ )
293
+ )
294
+ facet_params[f"facet_{cnt}"] = f"{prefix}/%"
295
+ facet_params[f"facet_len_{cnt}"] = -(len(prefix) + 1)
296
+ facet_sql = sql.SQL("AND {}").format(sql.SQL(" OR ").join(prefixes_sql))
297
+ elif all(facet.startswith("/l") or facet.startswith("/n/i") for facet in tmp_facets.keys()):
298
+ # Special case for the catalog query, which can have many facets asked for
299
+ # Filter for the categories (icon and labels) in the query, filter the rest in the code below
300
+ facet_sql = sql.SQL("AND (facet LIKE '/l/%%' OR facet like '/n/i/%%')")
301
+ else:
302
+ # Worst case: ask for all facets and filter here. This is faster than applying lots of filters
303
+ facet_sql = sql.SQL("")
304
+
305
+ await cur.execute(
306
+ sql.SQL(
307
+ "SELECT facet, COUNT(*) FROM catalog_facets WHERE kbid = %(kbid)s {} GROUP BY facet"
308
+ ).format(facet_sql),
309
+ {"kbid": catalog_query.kbid, **facet_params},
310
+ )
296
311
 
297
312
  # Only keep the facets we asked for
298
313
  for row in await cur.fetchall():
@@ -374,10 +389,7 @@ def _prepare_query_search(query: search_models.CatalogQuery, params: dict[str, A
374
389
  params["query"] = "%" + query.query + "%"
375
390
  return sql.SQL("title ILIKE %(query)s")
376
391
  else: # pragma: no cover
377
- # This is a trick so mypy generates an error if this branch can be reached,
378
- # that is, if we are missing some ifs
379
- _a: int = "a"
380
- return sql.SQL("")
392
+ assert_never(query.match)
381
393
 
382
394
 
383
395
  def _convert_filter(expr: CatalogExpression, filter_params: dict[str, Any]) -> sql.Composable:
@@ -409,7 +421,7 @@ def _convert_filter(expr: CatalogExpression, filter_params: dict[str, Any]) -> s
409
421
 
410
422
  def _convert_boolean_op(
411
423
  operands: list[CatalogExpression],
412
- op: Union[Literal["and"], Literal["or"]],
424
+ op: Literal["and"] | Literal["or"],
413
425
  filter_params: dict[str, Any],
414
426
  ) -> sql.Composable:
415
427
  array_op = sql.SQL("@>" if op == "and" else "&&")
@@ -40,17 +40,17 @@ def build_catalog_resource_data(resource: Resource, index_message: IndexMessage)
40
40
  }
41
41
 
42
42
  # Labels from the resource and classification labels from each field
43
- labels = [label for label in index_message.labels]
43
+ labels = {label for label in index_message.labels}
44
44
  for classification in resource.basic.computedmetadata.field_classifications:
45
45
  for clf in classification.classifications:
46
46
  label = f"/l/{clf.labelset}/{clf.label}"
47
47
  if label not in cancelled_labels:
48
- labels.append(label)
48
+ labels.add(label)
49
49
 
50
50
  return CatalogResourceData(
51
51
  title=resource.basic.title,
52
52
  created_at=created_at,
53
53
  modified_at=modified_at,
54
- labels=labels,
54
+ labels=list(labels),
55
55
  slug=resource.basic.slug,
56
56
  )
@@ -20,7 +20,8 @@
20
20
  import asyncio
21
21
  import logging
22
22
  import uuid
23
- from typing import Any, Awaitable, Callable, Optional
23
+ from collections.abc import Awaitable, Callable
24
+ from typing import Any
24
25
 
25
26
  from nidx_protos import noderesources_pb2, nodewriter_pb2
26
27
  from nidx_protos.nodewriter_pb2 import (
@@ -96,7 +97,7 @@ class KBShardManager:
96
97
  # TODO: move to data manager
97
98
  async def get_current_active_shard(
98
99
  self, txn: Transaction, kbid: str
99
- ) -> Optional[writer_pb2.ShardObject]:
100
+ ) -> writer_pb2.ShardObject | None:
100
101
  kb_shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=False)
101
102
  if kb_shards is None:
102
103
  return None
@@ -196,7 +197,7 @@ class KBShardManager:
196
197
  txid: int,
197
198
  partition: str,
198
199
  kb: str,
199
- reindex_id: Optional[str] = None,
200
+ reindex_id: str | None = None,
200
201
  source: IndexMessageSource.ValueType = IndexMessageSource.PROCESSOR,
201
202
  ) -> None:
202
203
  """
@@ -306,7 +307,7 @@ class StandaloneKBShardManager(KBShardManager):
306
307
  txid: int,
307
308
  partition: str,
308
309
  kb: str,
309
- reindex_id: Optional[str] = None,
310
+ reindex_id: str | None = None,
310
311
  source: IndexMessageSource.ValueType = IndexMessageSource.PROCESSOR,
311
312
  ) -> None:
312
313
  """