nucliadb 6.9.1.post5192__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. migrations/0023_backfill_pg_catalog.py +2 -2
  2. migrations/0029_backfill_field_status.py +3 -4
  3. migrations/0032_remove_old_relations.py +2 -3
  4. migrations/0038_backfill_catalog_field_labels.py +2 -2
  5. migrations/0039_backfill_converation_splits_metadata.py +2 -2
  6. migrations/0041_reindex_conversations.py +137 -0
  7. migrations/pg/0010_shards_index.py +34 -0
  8. nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
  9. migrations/pg/0012_catalog_statistics_undo.py +26 -0
  10. nucliadb/backups/create.py +2 -15
  11. nucliadb/backups/restore.py +4 -15
  12. nucliadb/backups/tasks.py +4 -1
  13. nucliadb/common/back_pressure/cache.py +2 -3
  14. nucliadb/common/back_pressure/materializer.py +7 -13
  15. nucliadb/common/back_pressure/settings.py +6 -6
  16. nucliadb/common/back_pressure/utils.py +1 -0
  17. nucliadb/common/cache.py +9 -9
  18. nucliadb/common/catalog/interface.py +12 -12
  19. nucliadb/common/catalog/pg.py +41 -29
  20. nucliadb/common/catalog/utils.py +3 -3
  21. nucliadb/common/cluster/manager.py +5 -4
  22. nucliadb/common/cluster/rebalance.py +483 -114
  23. nucliadb/common/cluster/rollover.py +25 -9
  24. nucliadb/common/cluster/settings.py +3 -8
  25. nucliadb/common/cluster/utils.py +34 -8
  26. nucliadb/common/context/__init__.py +7 -8
  27. nucliadb/common/context/fastapi.py +1 -2
  28. nucliadb/common/datamanagers/__init__.py +2 -4
  29. nucliadb/common/datamanagers/atomic.py +4 -2
  30. nucliadb/common/datamanagers/cluster.py +1 -2
  31. nucliadb/common/datamanagers/fields.py +3 -4
  32. nucliadb/common/datamanagers/kb.py +6 -6
  33. nucliadb/common/datamanagers/labels.py +2 -3
  34. nucliadb/common/datamanagers/resources.py +10 -33
  35. nucliadb/common/datamanagers/rollover.py +5 -7
  36. nucliadb/common/datamanagers/search_configurations.py +1 -2
  37. nucliadb/common/datamanagers/synonyms.py +1 -2
  38. nucliadb/common/datamanagers/utils.py +4 -4
  39. nucliadb/common/datamanagers/vectorsets.py +4 -4
  40. nucliadb/common/external_index_providers/base.py +32 -5
  41. nucliadb/common/external_index_providers/manager.py +4 -5
  42. nucliadb/common/filter_expression.py +128 -40
  43. nucliadb/common/http_clients/processing.py +12 -23
  44. nucliadb/common/ids.py +6 -4
  45. nucliadb/common/locking.py +1 -2
  46. nucliadb/common/maindb/driver.py +9 -8
  47. nucliadb/common/maindb/local.py +5 -5
  48. nucliadb/common/maindb/pg.py +9 -8
  49. nucliadb/common/nidx.py +3 -4
  50. nucliadb/export_import/datamanager.py +4 -3
  51. nucliadb/export_import/exporter.py +11 -19
  52. nucliadb/export_import/importer.py +13 -6
  53. nucliadb/export_import/tasks.py +2 -0
  54. nucliadb/export_import/utils.py +6 -18
  55. nucliadb/health.py +2 -2
  56. nucliadb/ingest/app.py +8 -8
  57. nucliadb/ingest/consumer/consumer.py +8 -10
  58. nucliadb/ingest/consumer/pull.py +3 -8
  59. nucliadb/ingest/consumer/service.py +3 -3
  60. nucliadb/ingest/consumer/utils.py +1 -1
  61. nucliadb/ingest/fields/base.py +28 -49
  62. nucliadb/ingest/fields/conversation.py +12 -12
  63. nucliadb/ingest/fields/exceptions.py +1 -2
  64. nucliadb/ingest/fields/file.py +22 -8
  65. nucliadb/ingest/fields/link.py +7 -7
  66. nucliadb/ingest/fields/text.py +2 -3
  67. nucliadb/ingest/orm/brain_v2.py +78 -64
  68. nucliadb/ingest/orm/broker_message.py +2 -4
  69. nucliadb/ingest/orm/entities.py +10 -209
  70. nucliadb/ingest/orm/index_message.py +4 -4
  71. nucliadb/ingest/orm/knowledgebox.py +18 -27
  72. nucliadb/ingest/orm/processor/auditing.py +1 -3
  73. nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
  74. nucliadb/ingest/orm/processor/processor.py +27 -27
  75. nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
  76. nucliadb/ingest/orm/resource.py +72 -70
  77. nucliadb/ingest/orm/utils.py +1 -1
  78. nucliadb/ingest/processing.py +17 -17
  79. nucliadb/ingest/serialize.py +202 -145
  80. nucliadb/ingest/service/writer.py +3 -109
  81. nucliadb/ingest/settings.py +3 -4
  82. nucliadb/ingest/utils.py +1 -2
  83. nucliadb/learning_proxy.py +11 -11
  84. nucliadb/metrics_exporter.py +5 -4
  85. nucliadb/middleware/__init__.py +82 -1
  86. nucliadb/migrator/datamanager.py +3 -4
  87. nucliadb/migrator/migrator.py +1 -2
  88. nucliadb/migrator/models.py +1 -2
  89. nucliadb/migrator/settings.py +1 -2
  90. nucliadb/models/internal/augment.py +614 -0
  91. nucliadb/models/internal/processing.py +19 -19
  92. nucliadb/openapi.py +2 -2
  93. nucliadb/purge/__init__.py +3 -8
  94. nucliadb/purge/orphan_shards.py +1 -2
  95. nucliadb/reader/__init__.py +5 -0
  96. nucliadb/reader/api/models.py +6 -13
  97. nucliadb/reader/api/v1/download.py +59 -38
  98. nucliadb/reader/api/v1/export_import.py +4 -4
  99. nucliadb/reader/api/v1/learning_config.py +24 -4
  100. nucliadb/reader/api/v1/resource.py +61 -9
  101. nucliadb/reader/api/v1/services.py +18 -14
  102. nucliadb/reader/app.py +3 -1
  103. nucliadb/reader/reader/notifications.py +1 -2
  104. nucliadb/search/api/v1/__init__.py +2 -0
  105. nucliadb/search/api/v1/ask.py +3 -4
  106. nucliadb/search/api/v1/augment.py +585 -0
  107. nucliadb/search/api/v1/catalog.py +11 -15
  108. nucliadb/search/api/v1/find.py +16 -22
  109. nucliadb/search/api/v1/hydrate.py +25 -25
  110. nucliadb/search/api/v1/knowledgebox.py +1 -2
  111. nucliadb/search/api/v1/predict_proxy.py +1 -2
  112. nucliadb/search/api/v1/resource/ask.py +7 -7
  113. nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
  114. nucliadb/search/api/v1/resource/search.py +9 -11
  115. nucliadb/search/api/v1/retrieve.py +130 -0
  116. nucliadb/search/api/v1/search.py +28 -32
  117. nucliadb/search/api/v1/suggest.py +11 -14
  118. nucliadb/search/api/v1/summarize.py +1 -2
  119. nucliadb/search/api/v1/utils.py +2 -2
  120. nucliadb/search/app.py +3 -2
  121. nucliadb/search/augmentor/__init__.py +21 -0
  122. nucliadb/search/augmentor/augmentor.py +232 -0
  123. nucliadb/search/augmentor/fields.py +704 -0
  124. nucliadb/search/augmentor/metrics.py +24 -0
  125. nucliadb/search/augmentor/paragraphs.py +334 -0
  126. nucliadb/search/augmentor/resources.py +238 -0
  127. nucliadb/search/augmentor/utils.py +33 -0
  128. nucliadb/search/lifecycle.py +3 -1
  129. nucliadb/search/predict.py +24 -17
  130. nucliadb/search/predict_models.py +8 -9
  131. nucliadb/search/requesters/utils.py +11 -10
  132. nucliadb/search/search/cache.py +19 -23
  133. nucliadb/search/search/chat/ask.py +88 -59
  134. nucliadb/search/search/chat/exceptions.py +3 -5
  135. nucliadb/search/search/chat/fetcher.py +201 -0
  136. nucliadb/search/search/chat/images.py +6 -4
  137. nucliadb/search/search/chat/old_prompt.py +1375 -0
  138. nucliadb/search/search/chat/parser.py +510 -0
  139. nucliadb/search/search/chat/prompt.py +563 -615
  140. nucliadb/search/search/chat/query.py +449 -36
  141. nucliadb/search/search/chat/rpc.py +85 -0
  142. nucliadb/search/search/fetch.py +3 -4
  143. nucliadb/search/search/filters.py +8 -11
  144. nucliadb/search/search/find.py +33 -31
  145. nucliadb/search/search/find_merge.py +124 -331
  146. nucliadb/search/search/graph_strategy.py +14 -12
  147. nucliadb/search/search/hydrator/__init__.py +3 -152
  148. nucliadb/search/search/hydrator/fields.py +92 -50
  149. nucliadb/search/search/hydrator/images.py +7 -7
  150. nucliadb/search/search/hydrator/paragraphs.py +42 -26
  151. nucliadb/search/search/hydrator/resources.py +20 -16
  152. nucliadb/search/search/ingestion_agents.py +5 -5
  153. nucliadb/search/search/merge.py +90 -94
  154. nucliadb/search/search/metrics.py +10 -9
  155. nucliadb/search/search/paragraphs.py +7 -9
  156. nucliadb/search/search/predict_proxy.py +13 -9
  157. nucliadb/search/search/query.py +14 -86
  158. nucliadb/search/search/query_parser/fetcher.py +51 -82
  159. nucliadb/search/search/query_parser/models.py +19 -20
  160. nucliadb/search/search/query_parser/old_filters.py +20 -19
  161. nucliadb/search/search/query_parser/parsers/ask.py +4 -5
  162. nucliadb/search/search/query_parser/parsers/catalog.py +5 -6
  163. nucliadb/search/search/query_parser/parsers/common.py +5 -6
  164. nucliadb/search/search/query_parser/parsers/find.py +6 -26
  165. nucliadb/search/search/query_parser/parsers/graph.py +13 -23
  166. nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
  167. nucliadb/search/search/query_parser/parsers/search.py +15 -53
  168. nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
  169. nucliadb/search/search/rank_fusion.py +18 -13
  170. nucliadb/search/search/rerankers.py +5 -6
  171. nucliadb/search/search/retrieval.py +300 -0
  172. nucliadb/search/search/summarize.py +5 -6
  173. nucliadb/search/search/utils.py +3 -4
  174. nucliadb/search/settings.py +1 -2
  175. nucliadb/standalone/api_router.py +1 -1
  176. nucliadb/standalone/app.py +4 -3
  177. nucliadb/standalone/auth.py +5 -6
  178. nucliadb/standalone/lifecycle.py +2 -2
  179. nucliadb/standalone/run.py +2 -4
  180. nucliadb/standalone/settings.py +5 -6
  181. nucliadb/standalone/versions.py +3 -4
  182. nucliadb/tasks/consumer.py +13 -8
  183. nucliadb/tasks/models.py +2 -1
  184. nucliadb/tasks/producer.py +3 -3
  185. nucliadb/tasks/retries.py +8 -7
  186. nucliadb/train/api/utils.py +1 -3
  187. nucliadb/train/api/v1/shards.py +1 -2
  188. nucliadb/train/api/v1/trainset.py +1 -2
  189. nucliadb/train/app.py +1 -1
  190. nucliadb/train/generator.py +4 -4
  191. nucliadb/train/generators/field_classifier.py +2 -2
  192. nucliadb/train/generators/field_streaming.py +6 -6
  193. nucliadb/train/generators/image_classifier.py +2 -2
  194. nucliadb/train/generators/paragraph_classifier.py +2 -2
  195. nucliadb/train/generators/paragraph_streaming.py +2 -2
  196. nucliadb/train/generators/question_answer_streaming.py +2 -2
  197. nucliadb/train/generators/sentence_classifier.py +2 -2
  198. nucliadb/train/generators/token_classifier.py +3 -2
  199. nucliadb/train/generators/utils.py +6 -5
  200. nucliadb/train/nodes.py +3 -3
  201. nucliadb/train/resource.py +6 -8
  202. nucliadb/train/settings.py +3 -4
  203. nucliadb/train/types.py +11 -11
  204. nucliadb/train/upload.py +3 -2
  205. nucliadb/train/uploader.py +1 -2
  206. nucliadb/train/utils.py +1 -2
  207. nucliadb/writer/api/v1/export_import.py +4 -1
  208. nucliadb/writer/api/v1/field.py +7 -11
  209. nucliadb/writer/api/v1/knowledgebox.py +3 -4
  210. nucliadb/writer/api/v1/resource.py +9 -20
  211. nucliadb/writer/api/v1/services.py +10 -132
  212. nucliadb/writer/api/v1/upload.py +73 -72
  213. nucliadb/writer/app.py +8 -2
  214. nucliadb/writer/resource/basic.py +12 -15
  215. nucliadb/writer/resource/field.py +7 -5
  216. nucliadb/writer/resource/origin.py +7 -0
  217. nucliadb/writer/settings.py +2 -3
  218. nucliadb/writer/tus/__init__.py +2 -3
  219. nucliadb/writer/tus/azure.py +1 -3
  220. nucliadb/writer/tus/dm.py +3 -3
  221. nucliadb/writer/tus/exceptions.py +3 -4
  222. nucliadb/writer/tus/gcs.py +5 -6
  223. nucliadb/writer/tus/s3.py +2 -3
  224. nucliadb/writer/tus/storage.py +3 -3
  225. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +9 -10
  226. nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
  227. nucliadb/common/datamanagers/entities.py +0 -139
  228. nucliadb-6.9.1.post5192.dist-info/RECORD +0 -392
  229. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
  230. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
  231. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
nucliadb/openapi.py CHANGED
@@ -33,11 +33,11 @@ def is_versioned_route(route):
33
33
 
34
34
 
35
35
  def extract_openapi(application, version, commit_id, app_name):
36
- app = [
36
+ app = next(
37
37
  route.app
38
38
  for route in application.routes
39
39
  if is_versioned_route(route) and route.app.version == version
40
- ][0]
40
+ )
41
41
  document = get_openapi(
42
42
  title=app.title,
43
43
  version=app.version,
@@ -19,7 +19,8 @@
19
19
  #
20
20
  import asyncio
21
21
  import importlib.metadata
22
- from typing import AsyncGenerator
22
+ from collections.abc import AsyncGenerator
23
+ from itertools import batched # type: ignore
23
24
 
24
25
  from nucliadb.common import datamanagers
25
26
  from nucliadb.common.cluster.exceptions import NodeError, ShardNotFound
@@ -233,7 +234,7 @@ async def purge_kb_vectorsets(driver: Driver, storage: Storage):
233
234
  fields.extend((await resource.get_fields(force=True)).values())
234
235
 
235
236
  logger.info(f"Purging {len(fields)} fields for vectorset {vectorset}", extra={"kbid": kbid})
236
- for fields_batch in batchify(fields, 20):
237
+ for fields_batch in batched(fields, n=20):
237
238
  tasks = []
238
239
  for field in fields_batch:
239
240
  if purge_payload.storage_key_kind == VectorSetConfig.StorageKeyKind.UNSET:
@@ -317,9 +318,3 @@ def run() -> int: # pragma: no cover
317
318
  setup_logging()
318
319
  errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
319
320
  return asyncio.run(main())
320
-
321
-
322
- def batchify(iterable, n=1):
323
- """Yield successive n-sized chunks from iterable."""
324
- for i in range(0, len(iterable), n):
325
- yield iterable[i : i + n]
@@ -20,7 +20,6 @@
20
20
  import argparse
21
21
  import asyncio
22
22
  import importlib.metadata
23
- from typing import Optional
24
23
 
25
24
  from grpc.aio import AioRpcError
26
25
  from nidx_protos import nodereader_pb2, noderesources_pb2
@@ -113,7 +112,7 @@ async def _get_stored_shards(driver: Driver) -> dict[str, ShardKb]:
113
112
  return stored_shards
114
113
 
115
114
 
116
- async def _get_kbid(shard_id: str) -> Optional[str]:
115
+ async def _get_kbid(shard_id: str) -> str | None:
117
116
  kbid = None
118
117
  try:
119
118
  req = nodereader_pb2.GetShardRequest()
@@ -19,6 +19,8 @@
19
19
  #
20
20
  import logging
21
21
 
22
+ from fastapi import Header
23
+
22
24
  SERVICE_NAME = "nucliadb.reader"
23
25
  logger = logging.getLogger(SERVICE_NAME)
24
26
 
@@ -35,3 +37,6 @@ class EndpointFilter(logging.Filter):
35
37
 
36
38
  # Add filter to the logger
37
39
  logging.getLogger("uvicorn.access").addFilter(EndpointFilter())
40
+
41
+
42
+ RANGE_HEADER = Header(description="Standard HTTP Range header that enable multipart requests")
@@ -17,7 +17,7 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import TYPE_CHECKING, Any, Optional, Union
20
+ from typing import TYPE_CHECKING, Any
21
21
 
22
22
  from pydantic import BaseModel
23
23
 
@@ -33,14 +33,7 @@ from nucliadb_models.resource import (
33
33
  )
34
34
 
35
35
  if TYPE_CHECKING: # pragma: no cover
36
- ValueType = Optional[
37
- Union[
38
- models.FieldText,
39
- models.FieldFile,
40
- models.FieldLink,
41
- models.Conversation,
42
- ]
43
- ]
36
+ ValueType = models.FieldText | models.FieldFile | models.FieldLink | models.Conversation | None
44
37
  else:
45
38
  # without Any, pydantic fails to anything as validate() fails using the Union
46
39
  ValueType = Any
@@ -50,10 +43,10 @@ class ResourceField(BaseModel):
50
43
  field_type: FieldTypeName
51
44
  field_id: str
52
45
  value: ValueType = None
53
- extracted: Optional[ExtractedDataType] = None
54
- error: Optional[Error] = None
55
- status: Optional[str] = None
56
- errors: Optional[list[Error]] = None
46
+ extracted: ExtractedDataType | None = None
47
+ error: Error | None = None
48
+ status: str | None = None
49
+ errors: list[Error] | None = None
57
50
 
58
51
 
59
52
  FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP: dict[FieldTypeName, Any] = {
@@ -18,20 +18,18 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import urllib.parse
21
- from enum import Enum
22
- from typing import Optional
21
+ from typing import Annotated
23
22
 
24
23
  from fastapi import HTTPException
25
24
  from fastapi.requests import Request
26
25
  from fastapi.responses import Response
27
26
  from fastapi_versioning import version
28
- from starlette.datastructures import Headers
29
27
  from starlette.responses import StreamingResponse
30
28
 
29
+ from nucliadb.common import datamanagers
31
30
  from nucliadb.common.ids import FIELD_TYPE_PB_TO_STR
32
31
  from nucliadb.common.models_utils import to_proto
33
- from nucliadb.ingest.serialize import get_resource_uuid_by_slug
34
- from nucliadb.reader import SERVICE_NAME, logger
32
+ from nucliadb.reader import RANGE_HEADER, SERVICE_NAME, logger
35
33
  from nucliadb_models.common import FieldTypeName
36
34
  from nucliadb_models.resource import NucliaDBRoles
37
35
  from nucliadb_utils.authentication import requires_one
@@ -41,13 +39,8 @@ from nucliadb_utils.utilities import get_storage
41
39
  from .router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
42
40
 
43
41
 
44
- class DownloadType(Enum):
45
- EXTRACTED = "extracted"
46
- FIELD = "field"
47
-
48
-
49
42
  @api.get(
50
- f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/{{field_type}}/{{field_id}}/download/extracted/{{download_field:path}}", # noqa
43
+ f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/{{field_type}}/{{field_id}}/download/extracted/{{download_field:path}}",
51
44
  tags=["Resource fields"],
52
45
  status_code=200,
53
46
  summary="Download extracted binary file (by slug)",
@@ -61,12 +54,20 @@ async def download_extract_file_rslug_prefix(
61
54
  field_type: FieldTypeName,
62
55
  field_id: str,
63
56
  download_field: str,
57
+ range: Annotated[str | None, RANGE_HEADER] = None,
64
58
  ) -> Response:
65
- return await _download_extract_file(request, kbid, field_type, field_id, download_field, rslug=rslug)
59
+ return await _download_extract_file(
60
+ kbid,
61
+ field_type,
62
+ field_id,
63
+ download_field,
64
+ rslug=rslug,
65
+ range_request=range,
66
+ )
66
67
 
67
68
 
68
69
  @api.get(
69
- f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/{{field_type}}/{{field_id}}/download/extracted/{{download_field:path}}", # noqa
70
+ f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/{{field_type}}/{{field_id}}/download/extracted/{{download_field:path}}",
70
71
  tags=["Resource fields"],
71
72
  status_code=200,
72
73
  summary="Download extracted binary file (by id)",
@@ -80,18 +81,21 @@ async def download_extract_file_rid_prefix(
80
81
  field_type: FieldTypeName,
81
82
  field_id: str,
82
83
  download_field: str,
84
+ range: Annotated[str | None, RANGE_HEADER] = None,
83
85
  ) -> Response:
84
- return await _download_extract_file(request, kbid, field_type, field_id, download_field, rid=rid)
86
+ return await _download_extract_file(
87
+ kbid, field_type, field_id, download_field, rid=rid, range_request=range
88
+ )
85
89
 
86
90
 
87
91
  async def _download_extract_file(
88
- request: Request,
89
92
  kbid: str,
90
93
  field_type: FieldTypeName,
91
94
  field_id: str,
92
95
  download_field: str,
93
- rid: Optional[str] = None,
94
- rslug: Optional[str] = None,
96
+ rid: str | None = None,
97
+ rslug: str | None = None,
98
+ range_request: str | None = None,
95
99
  ) -> Response:
96
100
  rid = await _get_resource_uuid_from_params(kbid, rid, rslug)
97
101
 
@@ -102,7 +106,7 @@ async def _download_extract_file(
102
106
 
103
107
  sf = storage.file_extracted(kbid, rid, field_type_letter, field_id, download_field)
104
108
 
105
- return await download_api(sf, request.headers)
109
+ return await download_api(sf, range_request)
106
110
 
107
111
 
108
112
  @api.get(
@@ -119,8 +123,9 @@ async def download_field_file_rslug_prefix(
119
123
  rslug: str,
120
124
  field_id: str,
121
125
  inline: bool = False,
126
+ range: Annotated[str | None, RANGE_HEADER] = None,
122
127
  ) -> Response:
123
- return await _download_field_file(request, kbid, field_id, rslug=rslug, inline=inline)
128
+ return await _download_field_file(kbid, field_id, rslug=rslug, range_request=range, inline=inline)
124
129
 
125
130
 
126
131
  @api.get(
@@ -137,16 +142,17 @@ async def download_field_file_rid_prefix(
137
142
  rid: str,
138
143
  field_id: str,
139
144
  inline: bool = False,
145
+ range: Annotated[str | None, RANGE_HEADER] = None,
140
146
  ) -> Response:
141
- return await _download_field_file(request, kbid, field_id, rid=rid, inline=inline)
147
+ return await _download_field_file(kbid, field_id, rid=rid, range_request=range, inline=inline)
142
148
 
143
149
 
144
150
  async def _download_field_file(
145
- request: Request,
146
151
  kbid: str,
147
152
  field_id: str,
148
- rid: Optional[str] = None,
149
- rslug: Optional[str] = None,
153
+ rid: str | None = None,
154
+ rslug: str | None = None,
155
+ range_request: str | None = None,
150
156
  inline: bool = False,
151
157
  ) -> Response:
152
158
  rid = await _get_resource_uuid_from_params(kbid, rid, rslug)
@@ -155,11 +161,11 @@ async def _download_field_file(
155
161
 
156
162
  sf = storage.file_field(kbid, rid, field_id)
157
163
 
158
- return await download_api(sf, request.headers, inline=inline)
164
+ return await download_api(sf, range_request=range_request, inline=inline)
159
165
 
160
166
 
161
167
  @api.get(
162
- f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/conversation/{{field_id}}/download/field/{{message_id}}/{{file_num}}", # noqa
168
+ f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/conversation/{{field_id}}/download/field/{{message_id}}/{{file_num}}",
163
169
  tags=["Resource fields"],
164
170
  status_code=200,
165
171
  summary="Download conversation binary field (by slug)",
@@ -173,14 +179,20 @@ async def download_field_conversation_rslug_prefix(
173
179
  field_id: str,
174
180
  message_id: str,
175
181
  file_num: int,
182
+ range: Annotated[str | None, RANGE_HEADER] = None,
176
183
  ) -> Response:
177
184
  return await _download_field_conversation_attachment(
178
- request, kbid, field_id, message_id, file_num, rslug=rslug
185
+ kbid,
186
+ field_id,
187
+ message_id,
188
+ file_num,
189
+ rslug=rslug,
190
+ range_request=range,
179
191
  )
180
192
 
181
193
 
182
194
  @api.get(
183
- f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/conversation/{{field_id}}/download/field/{{message_id}}/{{file_num}}", # noqa
195
+ f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/conversation/{{field_id}}/download/field/{{message_id}}/{{file_num}}",
184
196
  tags=["Resource fields"],
185
197
  status_code=200,
186
198
  summary="Download conversation binary field (by id)",
@@ -194,20 +206,26 @@ async def download_field_conversation_attachment_rid_prefix(
194
206
  field_id: str,
195
207
  message_id: str,
196
208
  file_num: int,
209
+ range: Annotated[str | None, RANGE_HEADER] = None,
197
210
  ) -> Response:
198
211
  return await _download_field_conversation_attachment(
199
- request, kbid, field_id, message_id, file_num, rid=rid
212
+ kbid,
213
+ field_id,
214
+ message_id,
215
+ file_num,
216
+ rid=rid,
217
+ range_request=range,
200
218
  )
201
219
 
202
220
 
203
221
  async def _download_field_conversation_attachment(
204
- request: Request,
205
222
  kbid: str,
206
223
  field_id: str,
207
224
  message_id: str,
208
225
  file_num: int,
209
- rid: Optional[str] = None,
210
- rslug: Optional[str] = None,
226
+ rid: str | None = None,
227
+ rslug: str | None = None,
228
+ range_request: str | None = None,
211
229
  ) -> Response:
212
230
  rid = await _get_resource_uuid_from_params(kbid, rid, rslug)
213
231
 
@@ -217,11 +235,11 @@ async def _download_field_conversation_attachment(
217
235
  kbid, rid, field_id, message_id, attachment_index=file_num
218
236
  )
219
237
 
220
- return await download_api(sf, request.headers)
238
+ return await download_api(sf, range_request)
221
239
 
222
240
 
223
- async def download_api(sf: StorageField, headers: Headers, inline: bool = False):
224
- metadata: Optional[ObjectMetadata] = await sf.exists()
241
+ async def download_api(sf: StorageField, range_request: str | None = None, inline: bool = False):
242
+ metadata: ObjectMetadata | None = await sf.exists()
225
243
  if metadata is None:
226
244
  raise HTTPException(status_code=404, detail="Specified file doesn't exist")
227
245
 
@@ -240,9 +258,8 @@ async def download_api(sf: StorageField, headers: Headers, inline: bool = False)
240
258
  }
241
259
 
242
260
  range = Range()
243
- if "range" in headers and file_size > -1:
261
+ if range_request and file_size > -1:
244
262
  status_code = 206
245
- range_request = headers["range"]
246
263
  try:
247
264
  start, end, range_size = parse_media_range(range_request, file_size)
248
265
  except NotImplementedError:
@@ -295,13 +312,17 @@ async def download_api(sf: StorageField, headers: Headers, inline: bool = False)
295
312
  )
296
313
 
297
314
 
298
- async def _get_resource_uuid_from_params(kbid, rid: Optional[str], rslug: Optional[str]) -> str:
315
+ async def _get_resource_uuid_from_params(kbid, rid: str | None, rslug: str | None) -> str:
299
316
  if not any([rid, rslug]):
300
317
  raise ValueError("Either rid or slug must be set")
301
318
 
302
319
  if not rid:
303
320
  # Attempt to get it from slug
304
- rid = await get_resource_uuid_by_slug(kbid, rslug, service_name=SERVICE_NAME) # type: ignore
321
+ rid = await datamanagers.atomic.resources.get_resource_uuid_from_slug(
322
+ kbid=kbid,
323
+ # mypy doesn't infer that we already checked for slug to be something
324
+ slug=rslug, # type: ignore[arg-type]
325
+ )
305
326
  if rid is None:
306
327
  raise HTTPException(status_code=404, detail="Resource does not exist")
307
328
 
@@ -17,7 +17,7 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import AsyncGenerator, AsyncIterable, Union
20
+ from collections.abc import AsyncGenerator, AsyncIterable
21
21
 
22
22
  from fastapi.responses import StreamingResponse
23
23
  from fastapi_versioning import version
@@ -108,7 +108,7 @@ async def download_export_and_delete(
108
108
  @version(1)
109
109
  async def get_export_status_endpoint(
110
110
  request: Request, kbid: str, export_id: str
111
- ) -> Union[StatusResponse, HTTPClientError]:
111
+ ) -> StatusResponse | HTTPClientError:
112
112
  context = get_app_context(request.app)
113
113
  if not await exists_kb(kbid):
114
114
  return HTTPClientError(status_code=404, detail="Knowledge Box not found")
@@ -127,7 +127,7 @@ async def get_export_status_endpoint(
127
127
  @version(1)
128
128
  async def get_import_status_endpoint(
129
129
  request: Request, kbid: str, import_id: str
130
- ) -> Union[StatusResponse, HTTPClientError]:
130
+ ) -> StatusResponse | HTTPClientError:
131
131
  context = get_app_context(request.app)
132
132
  if not await exists_kb(kbid):
133
133
  return HTTPClientError(status_code=404, detail="Knowledge Box not found")
@@ -137,7 +137,7 @@ async def get_import_status_endpoint(
137
137
 
138
138
  async def _get_status(
139
139
  context: ApplicationContext, type: str, kbid: str, id: str
140
- ) -> Union[StatusResponse, HTTPClientError]:
140
+ ) -> StatusResponse | HTTPClientError:
141
141
  if type not in ("export", "import"):
142
142
  raise ValueError(f"Incorrect type: {type}")
143
143
 
@@ -17,7 +17,6 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import Dict
21
20
 
22
21
  from fastapi import Header, Request
23
22
  from fastapi_versioning import version
@@ -35,7 +34,7 @@ from nucliadb_utils.settings import is_onprem_nucliadb
35
34
  path=f"/{KB_PREFIX}/{{kbid}}/models/{{model_id}}/{{filename:path}}",
36
35
  status_code=200,
37
36
  summary="Download the Knowledege Box model",
38
- description="Download the trained model or any other generated file as a result of a training task on a Knowledge Box.", # noqa
37
+ description="Download the trained model or any other generated file as a result of a training task on a Knowledge Box.",
39
38
  response_model=None,
40
39
  tags=["Models"],
41
40
  )
@@ -128,6 +127,27 @@ async def get_schema_for_configuration_updates(
128
127
  )
129
128
 
130
129
 
130
+ @api.get(
131
+ path=f"/{KB_PREFIX}/{{kbid}}/generative_providers",
132
+ status_code=200,
133
+ summary="Available models for a knowledge box",
134
+ description="Get all available models for a knowledge box grouped by provider",
135
+ response_model=None,
136
+ tags=["Models"],
137
+ )
138
+ @requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
139
+ @version(1)
140
+ async def get_models_group_by_providers(
141
+ request: Request, kbid: str, x_nucliadb_account: str = Header(default="", include_in_schema=False)
142
+ ):
143
+ return await learning_config_proxy(
144
+ request,
145
+ "GET",
146
+ f"/generative_providers/{kbid}",
147
+ headers={"account-id": x_nucliadb_account},
148
+ )
149
+
150
+
131
151
  @api.get(
132
152
  path=f"/nua/schema",
133
153
  status_code=200,
@@ -152,7 +172,7 @@ async def get_schema_for_configuration_creation(
152
172
  status_code=200,
153
173
  summary="Learning extract strategies",
154
174
  description="Get available extract strategies ",
155
- response_model=Dict[str, ExtractConfig],
175
+ response_model=dict[str, ExtractConfig],
156
176
  tags=["Extract Strategies"],
157
177
  )
158
178
  @requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
@@ -189,7 +209,7 @@ async def get_extract_strategy_from_id(
189
209
  status_code=200,
190
210
  summary="Learning split strategies",
191
211
  description="Get available split strategies ",
192
- response_model=Dict[str, SplitConfiguration],
212
+ response_model=dict[str, SplitConfiguration],
193
213
  tags=["Split Strategies"],
194
214
  )
195
215
  @requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
@@ -17,11 +17,12 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import Optional, Union
20
+ from typing import cast
21
21
 
22
22
  from fastapi import Header, HTTPException, Query, Request, Response
23
23
  from fastapi_versioning import version
24
24
 
25
+ from nucliadb.common import datamanagers
25
26
  from nucliadb.common.datamanagers.resources import KB_RESOURCE_SLUG_BASE
26
27
  from nucliadb.common.maindb.utils import get_driver
27
28
  from nucliadb.common.models_utils import from_proto, to_proto
@@ -58,6 +59,57 @@ from nucliadb_utils.authentication import requires, requires_one
58
59
  from nucliadb_utils.utilities import get_audit, get_storage
59
60
 
60
61
 
62
+ @api.head(
63
+ f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}",
64
+ status_code=200,
65
+ summary="Head Resource (by id)",
66
+ responses={404: {"description": "Resource does not exist"}},
67
+ tags=["Resources"],
68
+ )
69
+ @requires(NucliaDBRoles.READER)
70
+ @version(1)
71
+ async def head_resource_by_uuid(
72
+ request: Request,
73
+ kbid: str,
74
+ rid: str,
75
+ ):
76
+ return await head_resource(kbid=kbid, rid=rid)
77
+
78
+
79
+ @api.head(
80
+ f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}",
81
+ status_code=200,
82
+ summary="Head Resource (by slug)",
83
+ responses={404: {"description": "Resource does not exist"}},
84
+ tags=["Resources"],
85
+ )
86
+ @requires(NucliaDBRoles.READER)
87
+ @version(1)
88
+ async def head_resource_by_slug(
89
+ request: Request,
90
+ kbid: str,
91
+ rslug: str,
92
+ ):
93
+ return await head_resource(kbid=kbid, rslug=rslug)
94
+
95
+
96
+ async def head_resource(
97
+ *,
98
+ kbid: str,
99
+ rslug: str | None = None,
100
+ rid: str | None = None,
101
+ ) -> None:
102
+ if all([rslug, rid]) or not any([rslug, rid]):
103
+ raise ValueError("Either rid or rslug must be provided, but not both")
104
+ if rid is None:
105
+ rslug = cast(str, rslug)
106
+ rid = await datamanagers.atomic.resources.get_resource_uuid_from_slug(kbid=kbid, slug=rslug)
107
+ if rid is None:
108
+ raise HTTPException(status_code=404, detail="Resource does not exist")
109
+ if not await datamanagers.atomic.resources.resource_exists(kbid=kbid, rid=rid):
110
+ raise HTTPException(status_code=404, detail="Resource does not exist")
111
+
112
+
61
113
  @api.get(
62
114
  f"/{KB_PREFIX}/{{kbid}}/resources",
63
115
  status_code=200,
@@ -211,8 +263,8 @@ async def get_resource_by_slug(
211
263
 
212
264
  async def _get_resource(
213
265
  *,
214
- rslug: Optional[str] = None,
215
- rid: Optional[str] = None,
266
+ rslug: str | None = None,
267
+ rid: str | None = None,
216
268
  kbid: str,
217
269
  show: list[ResourceProperties],
218
270
  field_type_filter: list[FieldTypeName],
@@ -269,7 +321,7 @@ async def get_resource_field_rslug_prefix(
269
321
  ),
270
322
  # not working with latest pydantic/fastapi
271
323
  # page: Union[Literal["last", "first"], int] = Query("last"),
272
- page: Union[str, int] = Query("last"),
324
+ page: str | int = Query("last"),
273
325
  ) -> Response:
274
326
  return await _get_resource_field(
275
327
  kbid,
@@ -309,7 +361,7 @@ async def get_resource_field_rid_prefix(
309
361
  ),
310
362
  # not working with latest pydantic/fastapi
311
363
  # page: Union[Literal["last", "first"], int] = Query("last"),
312
- page: Union[str, int] = Query("last"),
364
+ page: str | int = Query("last"),
313
365
  ) -> Response:
314
366
  return await _get_resource_field(
315
367
  kbid,
@@ -328,9 +380,9 @@ async def _get_resource_field(
328
380
  field_id: str,
329
381
  show: list[ResourceFieldProperties],
330
382
  extracted: list[ExtractedDataTypeName],
331
- page: Union[str, int],
332
- rid: Optional[str] = None,
333
- rslug: Optional[str] = None,
383
+ page: str | int,
384
+ rid: str | None = None,
385
+ rslug: str | None = None,
334
386
  ) -> Response:
335
387
  storage = await get_storage(service_name=SERVICE_NAME)
336
388
  driver = get_driver()
@@ -344,7 +396,7 @@ async def _get_resource_field(
344
396
  if rid is None:
345
397
  raise HTTPException(status_code=404, detail="Resource does not exist")
346
398
 
347
- resource = ORMResource(txn, storage, kb, rid)
399
+ resource = ORMResource(txn, storage, kbid, rid)
348
400
  field = await resource.get_field(field_id, pb_field_id, load=True)
349
401
  if field is None:
350
402
  raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
@@ -18,9 +18,8 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import asyncio
21
- from typing import Optional, Union
22
21
 
23
- from fastapi import HTTPException
22
+ from fastapi import HTTPException, Path
24
23
  from fastapi.responses import StreamingResponse
25
24
  from fastapi_versioning import version
26
25
  from google.protobuf.json_format import MessageToDict
@@ -69,7 +68,7 @@ from nucliadb_utils.utilities import get_ingest, get_storage
69
68
  @version(1)
70
69
  async def get_entities(
71
70
  request: Request, kbid: str, show_entities: bool = False
72
- ) -> Union[KnowledgeBoxEntities, HTTPClientError]:
71
+ ) -> KnowledgeBoxEntities | HTTPClientError:
73
72
  if show_entities:
74
73
  return HTTPClientError(
75
74
  status_code=400,
@@ -169,7 +168,14 @@ async def get_labelsets(kbid: str) -> KnowledgeBoxLabels:
169
168
  )
170
169
  @requires(NucliaDBRoles.READER)
171
170
  @version(1)
172
- async def get_labelset_endpoint(request: Request, kbid: str, labelset: str) -> LabelSet:
171
+ async def get_labelset_endpoint(
172
+ request: Request,
173
+ kbid: str,
174
+ labelset: str = Path(
175
+ title="The ID of the labelset to get. This is a unique identifier that should be used at search time.",
176
+ examples=["categories", "movie-genres", "document-types"],
177
+ ),
178
+ ) -> LabelSet:
173
179
  try:
174
180
  return await get_labelset(kbid, labelset)
175
181
  except KnowledgeBoxNotFound:
@@ -186,7 +192,7 @@ async def get_labelset(kbid: str, labelset_id: str) -> LabelSet:
186
192
  kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
187
193
  if not kb_exists:
188
194
  raise KnowledgeBoxNotFound()
189
- labelset: Optional[writer_pb2.LabelSet] = await datamanagers.atomic.labelset.get(
195
+ labelset: writer_pb2.LabelSet | None = await datamanagers.atomic.labelset.get(
190
196
  kbid=kbid, labelset_id=labelset_id
191
197
  )
192
198
  if labelset is None:
@@ -222,17 +228,15 @@ async def get_custom_synonyms(request: Request, kbid: str):
222
228
  f"/{KB_PREFIX}/{{kbid}}/notifications",
223
229
  status_code=200,
224
230
  summary="Knowledge Box Notifications Stream",
225
- description="Provides a stream of activity notifications for the given Knowledge Box. The stream will be automatically closed after 2 minutes.", # noqa: E501
231
+ description="Provides a stream of activity notifications for the given Knowledge Box. The stream will be automatically closed after 2 minutes.",
226
232
  tags=["Knowledge Box Services"],
227
- response_description="Each line of the response is a Base64-encoded JSON object representing a notification. Refer to [the internal documentation](https://github.com/nuclia/nucliadb/blob/main/docs/tutorials/KB_NOTIFICATIONS.md) for a more detailed explanation of each notification type.", # noqa: E501
233
+ response_description="Each line of the response is a Base64-encoded JSON object representing a notification. Refer to [the internal documentation](https://github.com/nuclia/nucliadb/blob/main/docs/tutorials/KB_NOTIFICATIONS.md) for a more detailed explanation of each notification type.",
228
234
  response_model=None,
229
235
  responses={"404": {"description": "Knowledge Box not found"}},
230
236
  )
231
237
  @requires(NucliaDBRoles.READER)
232
238
  @version(1)
233
- async def notifications_endpoint(
234
- request: Request, kbid: str
235
- ) -> Union[StreamingResponse, HTTPClientError]:
239
+ async def notifications_endpoint(request: Request, kbid: str) -> StreamingResponse | HTTPClientError:
236
240
  if in_standalone_mode():
237
241
  return HTTPClientError(
238
242
  status_code=404,
@@ -274,10 +278,10 @@ async def exists_kb(kbid: str) -> bool:
274
278
  async def processing_status(
275
279
  request: Request,
276
280
  kbid: str,
277
- cursor: Optional[str] = None,
278
- scheduled: Optional[bool] = None,
281
+ cursor: str | None = None,
282
+ scheduled: bool | None = None,
279
283
  limit: int = 20,
280
- ) -> Union[processing.RequestsResults, HTTPClientError]:
284
+ ) -> processing.RequestsResults | HTTPClientError:
281
285
  if not await exists_kb(kbid=kbid):
282
286
  return HTTPClientError(status_code=404, detail="Knowledge Box not found")
283
287
 
@@ -294,7 +298,7 @@ async def processing_status(
294
298
 
295
299
  async def _composition(
296
300
  result: processing.RequestsResult,
297
- ) -> Optional[processing.RequestsResult]:
301
+ ) -> processing.RequestsResult | None:
298
302
  async with max_simultaneous:
299
303
  resource = await kb.get(result.resource_id)
300
304
  if resource is None: