nucliadb 6.9.1.post5192__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. migrations/0023_backfill_pg_catalog.py +2 -2
  2. migrations/0029_backfill_field_status.py +3 -4
  3. migrations/0032_remove_old_relations.py +2 -3
  4. migrations/0038_backfill_catalog_field_labels.py +2 -2
  5. migrations/0039_backfill_converation_splits_metadata.py +2 -2
  6. migrations/0041_reindex_conversations.py +137 -0
  7. migrations/pg/0010_shards_index.py +34 -0
  8. nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
  9. migrations/pg/0012_catalog_statistics_undo.py +26 -0
  10. nucliadb/backups/create.py +2 -15
  11. nucliadb/backups/restore.py +4 -15
  12. nucliadb/backups/tasks.py +4 -1
  13. nucliadb/common/back_pressure/cache.py +2 -3
  14. nucliadb/common/back_pressure/materializer.py +7 -13
  15. nucliadb/common/back_pressure/settings.py +6 -6
  16. nucliadb/common/back_pressure/utils.py +1 -0
  17. nucliadb/common/cache.py +9 -9
  18. nucliadb/common/catalog/interface.py +12 -12
  19. nucliadb/common/catalog/pg.py +41 -29
  20. nucliadb/common/catalog/utils.py +3 -3
  21. nucliadb/common/cluster/manager.py +5 -4
  22. nucliadb/common/cluster/rebalance.py +483 -114
  23. nucliadb/common/cluster/rollover.py +25 -9
  24. nucliadb/common/cluster/settings.py +3 -8
  25. nucliadb/common/cluster/utils.py +34 -8
  26. nucliadb/common/context/__init__.py +7 -8
  27. nucliadb/common/context/fastapi.py +1 -2
  28. nucliadb/common/datamanagers/__init__.py +2 -4
  29. nucliadb/common/datamanagers/atomic.py +4 -2
  30. nucliadb/common/datamanagers/cluster.py +1 -2
  31. nucliadb/common/datamanagers/fields.py +3 -4
  32. nucliadb/common/datamanagers/kb.py +6 -6
  33. nucliadb/common/datamanagers/labels.py +2 -3
  34. nucliadb/common/datamanagers/resources.py +10 -33
  35. nucliadb/common/datamanagers/rollover.py +5 -7
  36. nucliadb/common/datamanagers/search_configurations.py +1 -2
  37. nucliadb/common/datamanagers/synonyms.py +1 -2
  38. nucliadb/common/datamanagers/utils.py +4 -4
  39. nucliadb/common/datamanagers/vectorsets.py +4 -4
  40. nucliadb/common/external_index_providers/base.py +32 -5
  41. nucliadb/common/external_index_providers/manager.py +4 -5
  42. nucliadb/common/filter_expression.py +128 -40
  43. nucliadb/common/http_clients/processing.py +12 -23
  44. nucliadb/common/ids.py +6 -4
  45. nucliadb/common/locking.py +1 -2
  46. nucliadb/common/maindb/driver.py +9 -8
  47. nucliadb/common/maindb/local.py +5 -5
  48. nucliadb/common/maindb/pg.py +9 -8
  49. nucliadb/common/nidx.py +3 -4
  50. nucliadb/export_import/datamanager.py +4 -3
  51. nucliadb/export_import/exporter.py +11 -19
  52. nucliadb/export_import/importer.py +13 -6
  53. nucliadb/export_import/tasks.py +2 -0
  54. nucliadb/export_import/utils.py +6 -18
  55. nucliadb/health.py +2 -2
  56. nucliadb/ingest/app.py +8 -8
  57. nucliadb/ingest/consumer/consumer.py +8 -10
  58. nucliadb/ingest/consumer/pull.py +3 -8
  59. nucliadb/ingest/consumer/service.py +3 -3
  60. nucliadb/ingest/consumer/utils.py +1 -1
  61. nucliadb/ingest/fields/base.py +28 -49
  62. nucliadb/ingest/fields/conversation.py +12 -12
  63. nucliadb/ingest/fields/exceptions.py +1 -2
  64. nucliadb/ingest/fields/file.py +22 -8
  65. nucliadb/ingest/fields/link.py +7 -7
  66. nucliadb/ingest/fields/text.py +2 -3
  67. nucliadb/ingest/orm/brain_v2.py +78 -64
  68. nucliadb/ingest/orm/broker_message.py +2 -4
  69. nucliadb/ingest/orm/entities.py +10 -209
  70. nucliadb/ingest/orm/index_message.py +4 -4
  71. nucliadb/ingest/orm/knowledgebox.py +18 -27
  72. nucliadb/ingest/orm/processor/auditing.py +1 -3
  73. nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
  74. nucliadb/ingest/orm/processor/processor.py +27 -27
  75. nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
  76. nucliadb/ingest/orm/resource.py +72 -70
  77. nucliadb/ingest/orm/utils.py +1 -1
  78. nucliadb/ingest/processing.py +17 -17
  79. nucliadb/ingest/serialize.py +202 -145
  80. nucliadb/ingest/service/writer.py +3 -109
  81. nucliadb/ingest/settings.py +3 -4
  82. nucliadb/ingest/utils.py +1 -2
  83. nucliadb/learning_proxy.py +11 -11
  84. nucliadb/metrics_exporter.py +5 -4
  85. nucliadb/middleware/__init__.py +82 -1
  86. nucliadb/migrator/datamanager.py +3 -4
  87. nucliadb/migrator/migrator.py +1 -2
  88. nucliadb/migrator/models.py +1 -2
  89. nucliadb/migrator/settings.py +1 -2
  90. nucliadb/models/internal/augment.py +614 -0
  91. nucliadb/models/internal/processing.py +19 -19
  92. nucliadb/openapi.py +2 -2
  93. nucliadb/purge/__init__.py +3 -8
  94. nucliadb/purge/orphan_shards.py +1 -2
  95. nucliadb/reader/__init__.py +5 -0
  96. nucliadb/reader/api/models.py +6 -13
  97. nucliadb/reader/api/v1/download.py +59 -38
  98. nucliadb/reader/api/v1/export_import.py +4 -4
  99. nucliadb/reader/api/v1/learning_config.py +24 -4
  100. nucliadb/reader/api/v1/resource.py +61 -9
  101. nucliadb/reader/api/v1/services.py +18 -14
  102. nucliadb/reader/app.py +3 -1
  103. nucliadb/reader/reader/notifications.py +1 -2
  104. nucliadb/search/api/v1/__init__.py +2 -0
  105. nucliadb/search/api/v1/ask.py +3 -4
  106. nucliadb/search/api/v1/augment.py +585 -0
  107. nucliadb/search/api/v1/catalog.py +11 -15
  108. nucliadb/search/api/v1/find.py +16 -22
  109. nucliadb/search/api/v1/hydrate.py +25 -25
  110. nucliadb/search/api/v1/knowledgebox.py +1 -2
  111. nucliadb/search/api/v1/predict_proxy.py +1 -2
  112. nucliadb/search/api/v1/resource/ask.py +7 -7
  113. nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
  114. nucliadb/search/api/v1/resource/search.py +9 -11
  115. nucliadb/search/api/v1/retrieve.py +130 -0
  116. nucliadb/search/api/v1/search.py +28 -32
  117. nucliadb/search/api/v1/suggest.py +11 -14
  118. nucliadb/search/api/v1/summarize.py +1 -2
  119. nucliadb/search/api/v1/utils.py +2 -2
  120. nucliadb/search/app.py +3 -2
  121. nucliadb/search/augmentor/__init__.py +21 -0
  122. nucliadb/search/augmentor/augmentor.py +232 -0
  123. nucliadb/search/augmentor/fields.py +704 -0
  124. nucliadb/search/augmentor/metrics.py +24 -0
  125. nucliadb/search/augmentor/paragraphs.py +334 -0
  126. nucliadb/search/augmentor/resources.py +238 -0
  127. nucliadb/search/augmentor/utils.py +33 -0
  128. nucliadb/search/lifecycle.py +3 -1
  129. nucliadb/search/predict.py +24 -17
  130. nucliadb/search/predict_models.py +8 -9
  131. nucliadb/search/requesters/utils.py +11 -10
  132. nucliadb/search/search/cache.py +19 -23
  133. nucliadb/search/search/chat/ask.py +88 -59
  134. nucliadb/search/search/chat/exceptions.py +3 -5
  135. nucliadb/search/search/chat/fetcher.py +201 -0
  136. nucliadb/search/search/chat/images.py +6 -4
  137. nucliadb/search/search/chat/old_prompt.py +1375 -0
  138. nucliadb/search/search/chat/parser.py +510 -0
  139. nucliadb/search/search/chat/prompt.py +563 -615
  140. nucliadb/search/search/chat/query.py +449 -36
  141. nucliadb/search/search/chat/rpc.py +85 -0
  142. nucliadb/search/search/fetch.py +3 -4
  143. nucliadb/search/search/filters.py +8 -11
  144. nucliadb/search/search/find.py +33 -31
  145. nucliadb/search/search/find_merge.py +124 -331
  146. nucliadb/search/search/graph_strategy.py +14 -12
  147. nucliadb/search/search/hydrator/__init__.py +3 -152
  148. nucliadb/search/search/hydrator/fields.py +92 -50
  149. nucliadb/search/search/hydrator/images.py +7 -7
  150. nucliadb/search/search/hydrator/paragraphs.py +42 -26
  151. nucliadb/search/search/hydrator/resources.py +20 -16
  152. nucliadb/search/search/ingestion_agents.py +5 -5
  153. nucliadb/search/search/merge.py +90 -94
  154. nucliadb/search/search/metrics.py +10 -9
  155. nucliadb/search/search/paragraphs.py +7 -9
  156. nucliadb/search/search/predict_proxy.py +13 -9
  157. nucliadb/search/search/query.py +14 -86
  158. nucliadb/search/search/query_parser/fetcher.py +51 -82
  159. nucliadb/search/search/query_parser/models.py +19 -20
  160. nucliadb/search/search/query_parser/old_filters.py +20 -19
  161. nucliadb/search/search/query_parser/parsers/ask.py +4 -5
  162. nucliadb/search/search/query_parser/parsers/catalog.py +5 -6
  163. nucliadb/search/search/query_parser/parsers/common.py +5 -6
  164. nucliadb/search/search/query_parser/parsers/find.py +6 -26
  165. nucliadb/search/search/query_parser/parsers/graph.py +13 -23
  166. nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
  167. nucliadb/search/search/query_parser/parsers/search.py +15 -53
  168. nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
  169. nucliadb/search/search/rank_fusion.py +18 -13
  170. nucliadb/search/search/rerankers.py +5 -6
  171. nucliadb/search/search/retrieval.py +300 -0
  172. nucliadb/search/search/summarize.py +5 -6
  173. nucliadb/search/search/utils.py +3 -4
  174. nucliadb/search/settings.py +1 -2
  175. nucliadb/standalone/api_router.py +1 -1
  176. nucliadb/standalone/app.py +4 -3
  177. nucliadb/standalone/auth.py +5 -6
  178. nucliadb/standalone/lifecycle.py +2 -2
  179. nucliadb/standalone/run.py +2 -4
  180. nucliadb/standalone/settings.py +5 -6
  181. nucliadb/standalone/versions.py +3 -4
  182. nucliadb/tasks/consumer.py +13 -8
  183. nucliadb/tasks/models.py +2 -1
  184. nucliadb/tasks/producer.py +3 -3
  185. nucliadb/tasks/retries.py +8 -7
  186. nucliadb/train/api/utils.py +1 -3
  187. nucliadb/train/api/v1/shards.py +1 -2
  188. nucliadb/train/api/v1/trainset.py +1 -2
  189. nucliadb/train/app.py +1 -1
  190. nucliadb/train/generator.py +4 -4
  191. nucliadb/train/generators/field_classifier.py +2 -2
  192. nucliadb/train/generators/field_streaming.py +6 -6
  193. nucliadb/train/generators/image_classifier.py +2 -2
  194. nucliadb/train/generators/paragraph_classifier.py +2 -2
  195. nucliadb/train/generators/paragraph_streaming.py +2 -2
  196. nucliadb/train/generators/question_answer_streaming.py +2 -2
  197. nucliadb/train/generators/sentence_classifier.py +2 -2
  198. nucliadb/train/generators/token_classifier.py +3 -2
  199. nucliadb/train/generators/utils.py +6 -5
  200. nucliadb/train/nodes.py +3 -3
  201. nucliadb/train/resource.py +6 -8
  202. nucliadb/train/settings.py +3 -4
  203. nucliadb/train/types.py +11 -11
  204. nucliadb/train/upload.py +3 -2
  205. nucliadb/train/uploader.py +1 -2
  206. nucliadb/train/utils.py +1 -2
  207. nucliadb/writer/api/v1/export_import.py +4 -1
  208. nucliadb/writer/api/v1/field.py +7 -11
  209. nucliadb/writer/api/v1/knowledgebox.py +3 -4
  210. nucliadb/writer/api/v1/resource.py +9 -20
  211. nucliadb/writer/api/v1/services.py +10 -132
  212. nucliadb/writer/api/v1/upload.py +73 -72
  213. nucliadb/writer/app.py +8 -2
  214. nucliadb/writer/resource/basic.py +12 -15
  215. nucliadb/writer/resource/field.py +7 -5
  216. nucliadb/writer/resource/origin.py +7 -0
  217. nucliadb/writer/settings.py +2 -3
  218. nucliadb/writer/tus/__init__.py +2 -3
  219. nucliadb/writer/tus/azure.py +1 -3
  220. nucliadb/writer/tus/dm.py +3 -3
  221. nucliadb/writer/tus/exceptions.py +3 -4
  222. nucliadb/writer/tus/gcs.py +5 -6
  223. nucliadb/writer/tus/s3.py +2 -3
  224. nucliadb/writer/tus/storage.py +3 -3
  225. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +9 -10
  226. nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
  227. nucliadb/common/datamanagers/entities.py +0 -139
  228. nucliadb-6.9.1.post5192.dist-info/RECORD +0 -392
  229. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
  230. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
  231. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
nucliadb/train/nodes.py CHANGED
@@ -17,7 +17,7 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import AsyncIterator, Optional
20
+ from collections.abc import AsyncIterator
21
21
 
22
22
  from nucliadb.common import datamanagers
23
23
  from nucliadb.common.cluster import manager
@@ -62,7 +62,7 @@ class TrainShardManager(manager.KBShardManager):
62
62
 
63
63
  return shard_object.nidx_shard_id
64
64
 
65
- async def get_kb_obj(self, txn: Transaction, kbid: str) -> Optional[KnowledgeBox]:
65
+ async def get_kb_obj(self, txn: Transaction, kbid: str) -> KnowledgeBox | None:
66
66
  if kbid is None:
67
67
  return None
68
68
 
@@ -72,7 +72,7 @@ class TrainShardManager(manager.KBShardManager):
72
72
  kbobj = KnowledgeBox(txn, self.storage, kbid)
73
73
  return kbobj
74
74
 
75
- async def get_kb_entities_manager(self, txn: Transaction, kbid: str) -> Optional[EntitiesManager]:
75
+ async def get_kb_entities_manager(self, txn: Transaction, kbid: str) -> EntitiesManager | None:
76
76
  kbobj = await self.get_kb_obj(txn, kbid)
77
77
  if kbobj is None:
78
78
  return None
@@ -19,7 +19,7 @@
19
19
  #
20
20
  from __future__ import annotations
21
21
 
22
- from typing import AsyncIterator, MutableMapping, Optional
22
+ from collections.abc import AsyncIterator, MutableMapping
23
23
 
24
24
  from nucliadb.common import datamanagers
25
25
  from nucliadb.ingest.orm.resource import Resource
@@ -69,9 +69,7 @@ async def iterate_sentences(
69
69
  # return any
70
70
  vectorset_id = None
71
71
  async with datamanagers.with_ro_transaction() as txn:
72
- async for vectorset_id, vs in datamanagers.vectorsets.iter(
73
- txn=txn, kbid=resource.kb.kbid
74
- ):
72
+ async for vectorset_id, vs in datamanagers.vectorsets.iter(txn=txn, kbid=resource.kbid):
75
73
  break
76
74
  assert vectorset_id is not None, "All KBs must have at least a vectorset"
77
75
  vo = await field.get_vectors(vectorset_id, vs.storage_key_kind)
@@ -81,7 +79,7 @@ async def iterate_sentences(
81
79
  if fm is None:
82
80
  continue
83
81
 
84
- field_metadatas: list[tuple[Optional[str], FieldMetadata]] = [(None, fm.metadata)]
82
+ field_metadatas: list[tuple[str | None, FieldMetadata]] = [(None, fm.metadata)]
85
83
  for subfield_metadata, splitted_metadata in fm.split_metadata.items():
86
84
  field_metadatas.append((subfield_metadata, splitted_metadata))
87
85
 
@@ -188,7 +186,7 @@ async def iterate_paragraphs(
188
186
  if fm is None:
189
187
  continue
190
188
 
191
- field_metadatas: list[tuple[Optional[str], FieldMetadata]] = [(None, fm.metadata)]
189
+ field_metadatas: list[tuple[str | None, FieldMetadata]] = [(None, fm.metadata)]
192
190
  for subfield_metadata, splitted_metadata in fm.split_metadata.items():
193
191
  field_metadatas.append((subfield_metadata, splitted_metadata))
194
192
 
@@ -264,7 +262,7 @@ async def iterate_fields(
264
262
  if fm is None:
265
263
  continue
266
264
 
267
- field_metadatas: list[tuple[Optional[str], FieldMetadata]] = [(None, fm.metadata)]
265
+ field_metadatas: list[tuple[str | None, FieldMetadata]] = [(None, fm.metadata)]
268
266
  for subfield_metadata, splitted_metadata in fm.split_metadata.items():
269
267
  field_metadatas.append((subfield_metadata, splitted_metadata))
270
268
 
@@ -319,7 +317,7 @@ async def generate_train_resource(
319
317
  if fm is None:
320
318
  continue
321
319
 
322
- field_metadatas: list[tuple[Optional[str], FieldMetadata]] = [(None, fm.metadata)]
320
+ field_metadatas: list[tuple[str | None, FieldMetadata]] = [(None, fm.metadata)]
323
321
  for subfield_metadata, splitted_metadata in fm.split_metadata.items():
324
322
  field_metadatas.append((subfield_metadata, splitted_metadata))
325
323
 
@@ -17,17 +17,16 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import Optional
21
20
 
22
21
  from nucliadb.ingest.settings import DriverSettings
23
22
 
24
23
 
25
24
  class Settings(DriverSettings):
26
25
  grpc_port: int = 8031
27
- train_grpc_address: Optional[str] = None
26
+ train_grpc_address: str | None = None
28
27
 
29
- nuclia_learning_url: Optional[str] = "https://nuclia.cloud/api/v1/learning/"
30
- nuclia_learning_apikey: Optional[str] = None
28
+ nuclia_learning_url: str | None = "https://nuclia.cloud/api/v1/learning/"
29
+ nuclia_learning_apikey: str | None = None
31
30
 
32
31
  internal_counter_api: str = "http://search.nuclia.svc.cluster.local:8030/api/v1/kb/{kbid}/counters"
33
32
 
nucliadb/train/types.py CHANGED
@@ -17,20 +17,20 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import TypeVar, Union
20
+ from typing import TypeVar
21
21
 
22
22
  from nucliadb_protos import dataset_pb2 as dpb
23
23
 
24
- TrainBatch = Union[
25
- dpb.FieldClassificationBatch,
26
- dpb.ImageClassificationBatch,
27
- dpb.ParagraphClassificationBatch,
28
- dpb.ParagraphStreamingBatch,
29
- dpb.QuestionAnswerStreamingBatch,
30
- dpb.SentenceClassificationBatch,
31
- dpb.TokenClassificationBatch,
32
- dpb.FieldStreamingBatch,
33
- ]
24
+ TrainBatch = (
25
+ dpb.FieldClassificationBatch
26
+ | dpb.ImageClassificationBatch
27
+ | dpb.ParagraphClassificationBatch
28
+ | dpb.ParagraphStreamingBatch
29
+ | dpb.QuestionAnswerStreamingBatch
30
+ | dpb.SentenceClassificationBatch
31
+ | dpb.TokenClassificationBatch
32
+ | dpb.FieldStreamingBatch
33
+ )
34
34
 
35
35
  T = TypeVar(
36
36
  "T",
nucliadb/train/upload.py CHANGED
@@ -20,8 +20,9 @@
20
20
  import argparse
21
21
  import asyncio
22
22
  import importlib.metadata
23
+ import inspect
23
24
  from asyncio import tasks
24
- from typing import Callable
25
+ from collections.abc import Callable
25
26
 
26
27
  from nucliadb.train.uploader import start_upload
27
28
  from nucliadb_telemetry import errors
@@ -89,7 +90,7 @@ def run() -> None:
89
90
  finally:
90
91
  try:
91
92
  for finalizer in finalizers:
92
- if asyncio.iscoroutinefunction(finalizer):
93
+ if inspect.iscoroutinefunction(finalizer):
93
94
  loop.run_until_complete(finalizer())
94
95
  else:
95
96
  finalizer()
@@ -17,7 +17,6 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import Optional
21
20
 
22
21
  import aiohttp
23
22
 
@@ -97,7 +96,7 @@ class UploadServicer:
97
96
  response.status = GetLabelsResponse.Status.NOTFOUND
98
97
  return response
99
98
  response.kb.uuid = kbid
100
- labels: Optional[Labels] = await datamanagers.atomic.labelset.get_all(kbid=kbid)
99
+ labels: Labels | None = await datamanagers.atomic.labelset.get_all(kbid=kbid)
101
100
  if labels is not None:
102
101
  response.labels.CopyFrom(labels)
103
102
  return response
nucliadb/train/utils.py CHANGED
@@ -17,7 +17,6 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import Optional
21
20
 
22
21
  from grpc import aio
23
22
  from grpc_health.v1 import health, health_pb2_grpc
@@ -37,7 +36,7 @@ from nucliadb_utils.utilities import (
37
36
  )
38
37
 
39
38
 
40
- async def start_train_grpc(service_name: Optional[str] = None):
39
+ async def start_train_grpc(service_name: str | None = None):
41
40
  actual_service = get_utility(Utility.TRAIN)
42
41
  if actual_service is not None:
43
42
  return
@@ -17,8 +17,8 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ from collections.abc import AsyncGenerator
20
21
  from datetime import datetime
21
- from typing import AsyncGenerator
22
22
  from uuid import uuid4
23
23
 
24
24
  from fastapi_versioning import version
@@ -66,6 +66,7 @@ from nucliadb_utils.authentication import requires_one
66
66
  summary="Start an export of a Knowledge Box",
67
67
  tags=["Knowledge Boxes"],
68
68
  response_model=CreateExportResponse,
69
+ include_in_schema=False,
69
70
  )
70
71
  @requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
71
72
  @version(1)
@@ -91,6 +92,7 @@ async def start_kb_export_endpoint(request: Request, kbid: str):
91
92
  tags=["Knowledge Boxes"],
92
93
  response_model=NewImportedKbResponse,
93
94
  openapi_extra={"x-hidden-operation": True},
95
+ include_in_schema=False,
94
96
  )
95
97
  @requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
96
98
  @version(1)
@@ -140,6 +142,7 @@ async def kb_create_and_import_endpoint(request: Request):
140
142
  summary="Start an import to a Knowledge Box",
141
143
  tags=["Knowledge Boxes"],
142
144
  response_model=CreateImportResponse,
145
+ include_in_schema=False,
143
146
  )
144
147
  @requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
145
148
  @version(1)
@@ -17,8 +17,9 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ from collections.abc import Callable
20
21
  from inspect import iscoroutinefunction
21
- from typing import TYPE_CHECKING, Annotated, Callable, List, Optional, Type, Union
22
+ from typing import TYPE_CHECKING, Annotated
22
23
 
23
24
  import pydantic
24
25
  from fastapi import HTTPException, Query, Response
@@ -72,12 +73,7 @@ if TYPE_CHECKING: # pragma: no cover
72
73
  else:
73
74
  FIELD_TYPE_NAME_TO_FIELD_TYPE_MAP: dict[models.FieldTypeName, int]
74
75
 
75
- FieldModelType = Union[
76
- models.TextField,
77
- models.LinkField,
78
- models.InputConversationField,
79
- models.FileField,
80
- ]
76
+ FieldModelType = models.TextField | models.LinkField | models.InputConversationField | models.FileField
81
77
 
82
78
  FIELD_TYPE_NAME_TO_FIELD_TYPE_MAP = {
83
79
  models.FieldTypeName.FILE: resources_pb2.FieldType.FILE,
@@ -278,7 +274,7 @@ async def parse_file_field_adapter(
278
274
  )
279
275
 
280
276
 
281
- FIELD_PARSERS_MAP: dict[Type, Callable] = {
277
+ FIELD_PARSERS_MAP: dict[type, Callable] = {
282
278
  models.TextField: parse_text_field_adapter,
283
279
  models.LinkField: parse_link_field_adapter,
284
280
  models.InputConversationField: parse_conversation_field_adapter,
@@ -463,7 +459,7 @@ async def append_messages_to_conversation_field_rslug_prefix(
463
459
  kbid: str,
464
460
  rslug: str,
465
461
  field_id: FieldIdString,
466
- messages: List[models.InputMessage],
462
+ messages: list[models.InputMessage],
467
463
  ) -> ResourceFieldAdded:
468
464
  try:
469
465
  field = models.InputConversationField(messages=messages)
@@ -488,7 +484,7 @@ async def append_messages_to_conversation_field_rid_prefix(
488
484
  kbid: str,
489
485
  rid: str,
490
486
  field_id: FieldIdString,
491
- messages: List[models.InputMessage],
487
+ messages: list[models.InputMessage],
492
488
  ) -> ResourceFieldAdded:
493
489
  try:
494
490
  field = models.InputConversationField(messages=messages)
@@ -550,7 +546,7 @@ async def reprocess_file_field(
550
546
  rid: str,
551
547
  field_id: FieldIdString,
552
548
  x_nucliadb_user: Annotated[str, X_NUCLIADB_USER] = "",
553
- x_file_password: Annotated[Optional[str], X_FILE_PASSWORD] = None,
549
+ x_file_password: Annotated[str | None, X_FILE_PASSWORD] = None,
554
550
  reset_title: bool = Query(
555
551
  default=False,
556
552
  description="Reset the title of the resource so that the file or link computed titles are set after processing.",
@@ -17,10 +17,9 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- import asyncio
21
20
  from functools import partial
22
21
 
23
- from fastapi import HTTPException
22
+ from fastapi import BackgroundTasks, HTTPException
24
23
  from fastapi_versioning import version
25
24
  from starlette.requests import Request
26
25
 
@@ -184,7 +183,7 @@ async def update_kb(request: Request, kbid: str, item: KnowledgeBoxConfig) -> Kn
184
183
  )
185
184
  @requires(NucliaDBRoles.MANAGER)
186
185
  @version(1)
187
- async def delete_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
186
+ async def delete_kb(request: Request, kbid: str, background: BackgroundTasks) -> KnowledgeBoxObj:
188
187
  driver = get_driver()
189
188
  try:
190
189
  await KnowledgeBox.delete(driver, kbid=kbid)
@@ -208,6 +207,6 @@ async def delete_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
208
207
  # be nice and notify processing this KB is being deleted so we waste
209
208
  # resources
210
209
  processing = get_processing()
211
- asyncio.create_task(processing.delete_from_processing(kbid=kbid))
210
+ background.add_task(processing.delete_from_processing, kbid=kbid)
212
211
 
213
212
  return KnowledgeBoxObj(uuid=kbid)
@@ -17,13 +17,12 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- import asyncio
21
20
  import contextlib
22
21
  from time import time
23
- from typing import Annotated, Optional
22
+ from typing import Annotated
24
23
  from uuid import uuid4
25
24
 
26
- from fastapi import HTTPException, Query, Response
25
+ from fastapi import BackgroundTasks, HTTPException, Query, Response
27
26
  from fastapi_versioning import version
28
27
  from starlette.requests import Request
29
28
 
@@ -498,12 +497,10 @@ async def _reprocess_resource(
498
497
  @requires(NucliaDBRoles.WRITER)
499
498
  @version(1)
500
499
  async def delete_resource_rslug_prefix(
501
- request: Request,
502
- kbid: str,
503
- rslug: str,
500
+ request: Request, kbid: str, rslug: str, background: BackgroundTasks
504
501
  ):
505
502
  rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
506
- return await _delete_resource(request, kbid, rid)
503
+ return await _delete_resource(request, kbid, rid, background)
507
504
 
508
505
 
509
506
  @api.delete(
@@ -514,19 +511,11 @@ async def delete_resource_rslug_prefix(
514
511
  )
515
512
  @requires(NucliaDBRoles.WRITER)
516
513
  @version(1)
517
- async def delete_resource_rid_prefix(
518
- request: Request,
519
- kbid: str,
520
- rid: str,
521
- ):
522
- return await _delete_resource(request, kbid, rid)
514
+ async def delete_resource_rid_prefix(request: Request, kbid: str, rid: str, background: BackgroundTasks):
515
+ return await _delete_resource(request, kbid, rid, background)
523
516
 
524
517
 
525
- async def _delete_resource(
526
- request: Request,
527
- kbid: str,
528
- rid: str,
529
- ):
518
+ async def _delete_resource(request: Request, kbid: str, rid: str, background: BackgroundTasks):
530
519
  await validate_rid_exists_or_raise_error(kbid, rid)
531
520
 
532
521
  partitioning = get_partitioning()
@@ -541,7 +530,7 @@ async def _delete_resource(
541
530
  parse_audit(writer.audit, request)
542
531
  await transaction.commit(writer, partition)
543
532
  processing = get_processing()
544
- asyncio.create_task(processing.delete_from_processing(kbid=kbid, resource_id=rid))
533
+ background.add_task(processing.delete_from_processing, kbid=kbid, resource_id=rid)
545
534
 
546
535
  return Response(status_code=204)
547
536
 
@@ -637,7 +626,7 @@ def needs_resource_reindex(item: UpdateResourcePayload) -> bool:
637
626
  )
638
627
 
639
628
 
640
- async def maybe_send_to_process(toprocess: PushPayload, partition) -> Optional[int]:
629
+ async def maybe_send_to_process(toprocess: PushPayload, partition) -> int | None:
641
630
  if not needs_reprocess(toprocess):
642
631
  return None
643
632
 
@@ -17,152 +17,22 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from fastapi import HTTPException, Response
20
+ from fastapi import Body, HTTPException, Path, Response
21
21
  from fastapi_versioning import version
22
22
  from starlette.requests import Request
23
23
 
24
24
  from nucliadb.common import datamanagers
25
25
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
26
26
  from nucliadb.common.models_utils import to_proto
27
- from nucliadb.models.responses import (
28
- HTTPConflict,
29
- HTTPInternalServerError,
30
- HTTPNotFound,
31
- )
32
27
  from nucliadb.writer.api.v1.router import KB_PREFIX, api
33
28
  from nucliadb_models.configuration import SearchConfiguration
34
- from nucliadb_models.entities import (
35
- CreateEntitiesGroupPayload,
36
- UpdateEntitiesGroupPayload,
37
- )
38
29
  from nucliadb_models.labels import LabelSet
39
30
  from nucliadb_models.resource import NucliaDBRoles
40
31
  from nucliadb_models.synonyms import KnowledgeBoxSynonyms
41
32
  from nucliadb_protos import writer_pb2
42
33
  from nucliadb_protos.knowledgebox_pb2 import Label as LabelPB
43
34
  from nucliadb_protos.knowledgebox_pb2 import LabelSet as LabelSetPB
44
- from nucliadb_protos.writer_pb2 import (
45
- DelEntitiesRequest,
46
- NewEntitiesGroupRequest,
47
- NewEntitiesGroupResponse,
48
- OpStatusWriter,
49
- UpdateEntitiesGroupRequest,
50
- UpdateEntitiesGroupResponse,
51
- )
52
35
  from nucliadb_utils.authentication import requires
53
- from nucliadb_utils.utilities import get_ingest
54
-
55
-
56
- @api.post(
57
- f"/{KB_PREFIX}/{{kbid}}/entitiesgroups",
58
- status_code=200,
59
- summary="Create Knowledge Box Entities Group",
60
- tags=["Knowledge Box Services"],
61
- openapi_extra={"x-operation_order": 1},
62
- )
63
- @requires(NucliaDBRoles.WRITER)
64
- @version(1)
65
- async def create_entities_group(request: Request, kbid: str, item: CreateEntitiesGroupPayload):
66
- ingest = get_ingest()
67
-
68
- pbrequest: NewEntitiesGroupRequest = NewEntitiesGroupRequest()
69
- pbrequest.kb.uuid = kbid
70
- pbrequest.group = item.group
71
- pbrequest.entities.custom = True
72
- if item.title:
73
- pbrequest.entities.title = item.title
74
- if item.color:
75
- pbrequest.entities.color = item.color
76
-
77
- for key, entity in item.entities.items():
78
- entitypb = pbrequest.entities.entities[key]
79
- entitypb.value = entity.value
80
- entitypb.merged = entity.merged
81
- entitypb.deleted = False
82
- entitypb.represents.extend(entity.represents)
83
-
84
- status: NewEntitiesGroupResponse = await ingest.NewEntitiesGroup(pbrequest) # type: ignore
85
- if status.status == NewEntitiesGroupResponse.Status.OK:
86
- return
87
- elif status.status == NewEntitiesGroupResponse.Status.KB_NOT_FOUND:
88
- return HTTPNotFound(detail="Knowledge Box does not exist")
89
- elif status.status == NewEntitiesGroupResponse.Status.ALREADY_EXISTS:
90
- return HTTPConflict(
91
- detail=f"Entities group {item.group} already exists in this Knowledge box",
92
- )
93
- elif status.status == NewEntitiesGroupResponse.Status.ERROR:
94
- return HTTPInternalServerError(detail="Error on settings entities on a Knowledge box")
95
-
96
-
97
- @api.patch(
98
- f"/{KB_PREFIX}/{{kbid}}/entitiesgroup/{{group}}",
99
- status_code=200,
100
- summary="Update Knowledge Box Entities Group",
101
- tags=["Knowledge Box Services"],
102
- openapi_extra={"x-operation_order": 2},
103
- )
104
- @requires(NucliaDBRoles.WRITER)
105
- @version(1)
106
- async def update_entities_group(
107
- request: Request, kbid: str, group: str, item: UpdateEntitiesGroupPayload
108
- ):
109
- ingest = get_ingest()
110
-
111
- pbrequest: UpdateEntitiesGroupRequest = UpdateEntitiesGroupRequest()
112
- pbrequest.kb.uuid = kbid
113
- pbrequest.group = group
114
- pbrequest.title = item.title or ""
115
- pbrequest.color = item.color or ""
116
-
117
- for name, entity in item.add.items():
118
- entitypb = pbrequest.add[name]
119
- entitypb.value = entity.value
120
- entitypb.merged = entity.merged
121
- entitypb.represents.extend(entity.represents)
122
-
123
- for name, entity in item.update.items():
124
- entitypb = pbrequest.update[name]
125
- entitypb.value = entity.value
126
- entitypb.merged = entity.merged
127
- entitypb.represents.extend(entity.represents)
128
-
129
- pbrequest.delete.extend(item.delete)
130
-
131
- status: UpdateEntitiesGroupResponse = await ingest.UpdateEntitiesGroup(pbrequest) # type: ignore
132
- if status.status == UpdateEntitiesGroupResponse.Status.OK:
133
- return
134
- elif status.status == UpdateEntitiesGroupResponse.Status.KB_NOT_FOUND:
135
- return HTTPNotFound(detail="Knowledge Box does not exist")
136
- elif status.status == UpdateEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND:
137
- return HTTPNotFound(detail="Entities group does not exist")
138
- elif status.status == UpdateEntitiesGroupResponse.Status.ERROR:
139
- return HTTPInternalServerError(detail="Error on settings entities on a Knowledge box")
140
-
141
-
142
- @api.delete(
143
- f"/{KB_PREFIX}/{{kbid}}/entitiesgroup/{{group}}",
144
- status_code=200,
145
- summary="Delete Knowledge Box Entities",
146
- tags=["Knowledge Box Services"],
147
- openapi_extra={"x-operation_order": 3},
148
- )
149
- @requires(NucliaDBRoles.WRITER)
150
- @version(1)
151
- async def delete_entities(request: Request, kbid: str, group: str):
152
- ingest = get_ingest()
153
- pbrequest: DelEntitiesRequest = DelEntitiesRequest()
154
- pbrequest.kb.uuid = kbid
155
- pbrequest.group = group
156
-
157
- status: OpStatusWriter = await ingest.DelEntities(pbrequest) # type: ignore
158
- if status.status == OpStatusWriter.Status.OK:
159
- return None
160
- elif status.status == OpStatusWriter.Status.NOTFOUND:
161
- raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
162
- elif status.status == OpStatusWriter.Status.ERROR:
163
- raise HTTPException(status_code=500, detail="Error on deleting entities from a Knowledge box")
164
-
165
- return Response(status_code=204)
166
36
 
167
37
 
168
38
  @api.post(
@@ -174,7 +44,15 @@ async def delete_entities(request: Request, kbid: str, group: str):
174
44
  )
175
45
  @requires(NucliaDBRoles.WRITER)
176
46
  @version(1)
177
- async def set_labelset_endpoint(request: Request, kbid: str, labelset: str, item: LabelSet):
47
+ async def set_labelset_endpoint(
48
+ request: Request,
49
+ kbid: str,
50
+ labelset: str = Path(
51
+ title="The ID of the labelset to create or update. This is a unique identifier that should be used at search time.",
52
+ examples=["categories", "movie-genres", "document-types"],
53
+ ),
54
+ item: LabelSet = Body(...),
55
+ ):
178
56
  if item.title is None:
179
57
  item.title = labelset
180
58