nucliadb 6.7.2.post4874__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. migrations/0023_backfill_pg_catalog.py +8 -4
  2. migrations/0028_extracted_vectors_reference.py +1 -1
  3. migrations/0029_backfill_field_status.py +3 -4
  4. migrations/0032_remove_old_relations.py +2 -3
  5. migrations/0038_backfill_catalog_field_labels.py +8 -4
  6. migrations/0039_backfill_converation_splits_metadata.py +106 -0
  7. migrations/0040_migrate_search_configurations.py +79 -0
  8. migrations/0041_reindex_conversations.py +137 -0
  9. migrations/pg/0010_shards_index.py +34 -0
  10. nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
  11. migrations/pg/0012_catalog_statistics_undo.py +26 -0
  12. nucliadb/backups/create.py +2 -15
  13. nucliadb/backups/restore.py +4 -15
  14. nucliadb/backups/tasks.py +4 -1
  15. nucliadb/common/back_pressure/cache.py +2 -3
  16. nucliadb/common/back_pressure/materializer.py +7 -13
  17. nucliadb/common/back_pressure/settings.py +6 -6
  18. nucliadb/common/back_pressure/utils.py +1 -0
  19. nucliadb/common/cache.py +9 -9
  20. nucliadb/common/catalog/__init__.py +79 -0
  21. nucliadb/common/catalog/dummy.py +36 -0
  22. nucliadb/common/catalog/interface.py +85 -0
  23. nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +330 -232
  24. nucliadb/common/catalog/utils.py +56 -0
  25. nucliadb/common/cluster/manager.py +8 -23
  26. nucliadb/common/cluster/rebalance.py +484 -112
  27. nucliadb/common/cluster/rollover.py +36 -9
  28. nucliadb/common/cluster/settings.py +4 -9
  29. nucliadb/common/cluster/utils.py +34 -8
  30. nucliadb/common/context/__init__.py +7 -8
  31. nucliadb/common/context/fastapi.py +1 -2
  32. nucliadb/common/datamanagers/__init__.py +2 -4
  33. nucliadb/common/datamanagers/atomic.py +9 -2
  34. nucliadb/common/datamanagers/cluster.py +1 -2
  35. nucliadb/common/datamanagers/fields.py +3 -4
  36. nucliadb/common/datamanagers/kb.py +6 -6
  37. nucliadb/common/datamanagers/labels.py +2 -3
  38. nucliadb/common/datamanagers/resources.py +10 -33
  39. nucliadb/common/datamanagers/rollover.py +5 -7
  40. nucliadb/common/datamanagers/search_configurations.py +1 -2
  41. nucliadb/common/datamanagers/synonyms.py +1 -2
  42. nucliadb/common/datamanagers/utils.py +4 -4
  43. nucliadb/common/datamanagers/vectorsets.py +4 -4
  44. nucliadb/common/external_index_providers/base.py +32 -5
  45. nucliadb/common/external_index_providers/manager.py +5 -34
  46. nucliadb/common/external_index_providers/settings.py +1 -27
  47. nucliadb/common/filter_expression.py +129 -41
  48. nucliadb/common/http_clients/exceptions.py +8 -0
  49. nucliadb/common/http_clients/processing.py +16 -23
  50. nucliadb/common/http_clients/utils.py +3 -0
  51. nucliadb/common/ids.py +82 -58
  52. nucliadb/common/locking.py +1 -2
  53. nucliadb/common/maindb/driver.py +9 -8
  54. nucliadb/common/maindb/local.py +5 -5
  55. nucliadb/common/maindb/pg.py +9 -8
  56. nucliadb/common/nidx.py +22 -5
  57. nucliadb/common/vector_index_config.py +1 -1
  58. nucliadb/export_import/datamanager.py +4 -3
  59. nucliadb/export_import/exporter.py +11 -19
  60. nucliadb/export_import/importer.py +13 -6
  61. nucliadb/export_import/tasks.py +2 -0
  62. nucliadb/export_import/utils.py +6 -18
  63. nucliadb/health.py +2 -2
  64. nucliadb/ingest/app.py +8 -8
  65. nucliadb/ingest/consumer/consumer.py +8 -10
  66. nucliadb/ingest/consumer/pull.py +10 -8
  67. nucliadb/ingest/consumer/service.py +5 -30
  68. nucliadb/ingest/consumer/shard_creator.py +16 -5
  69. nucliadb/ingest/consumer/utils.py +1 -1
  70. nucliadb/ingest/fields/base.py +37 -49
  71. nucliadb/ingest/fields/conversation.py +55 -9
  72. nucliadb/ingest/fields/exceptions.py +1 -2
  73. nucliadb/ingest/fields/file.py +22 -8
  74. nucliadb/ingest/fields/link.py +7 -7
  75. nucliadb/ingest/fields/text.py +2 -3
  76. nucliadb/ingest/orm/brain_v2.py +89 -57
  77. nucliadb/ingest/orm/broker_message.py +2 -4
  78. nucliadb/ingest/orm/entities.py +10 -209
  79. nucliadb/ingest/orm/index_message.py +128 -113
  80. nucliadb/ingest/orm/knowledgebox.py +91 -59
  81. nucliadb/ingest/orm/processor/auditing.py +1 -3
  82. nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
  83. nucliadb/ingest/orm/processor/processor.py +98 -153
  84. nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
  85. nucliadb/ingest/orm/resource.py +82 -71
  86. nucliadb/ingest/orm/utils.py +1 -1
  87. nucliadb/ingest/partitions.py +12 -1
  88. nucliadb/ingest/processing.py +17 -17
  89. nucliadb/ingest/serialize.py +202 -145
  90. nucliadb/ingest/service/writer.py +15 -114
  91. nucliadb/ingest/settings.py +36 -15
  92. nucliadb/ingest/utils.py +1 -2
  93. nucliadb/learning_proxy.py +23 -26
  94. nucliadb/metrics_exporter.py +20 -6
  95. nucliadb/middleware/__init__.py +82 -1
  96. nucliadb/migrator/datamanager.py +4 -11
  97. nucliadb/migrator/migrator.py +1 -2
  98. nucliadb/migrator/models.py +1 -2
  99. nucliadb/migrator/settings.py +1 -2
  100. nucliadb/models/internal/augment.py +614 -0
  101. nucliadb/models/internal/processing.py +19 -19
  102. nucliadb/openapi.py +2 -2
  103. nucliadb/purge/__init__.py +3 -8
  104. nucliadb/purge/orphan_shards.py +1 -2
  105. nucliadb/reader/__init__.py +5 -0
  106. nucliadb/reader/api/models.py +6 -13
  107. nucliadb/reader/api/v1/download.py +59 -38
  108. nucliadb/reader/api/v1/export_import.py +4 -4
  109. nucliadb/reader/api/v1/knowledgebox.py +37 -9
  110. nucliadb/reader/api/v1/learning_config.py +33 -14
  111. nucliadb/reader/api/v1/resource.py +61 -9
  112. nucliadb/reader/api/v1/services.py +18 -14
  113. nucliadb/reader/app.py +3 -1
  114. nucliadb/reader/reader/notifications.py +1 -2
  115. nucliadb/search/api/v1/__init__.py +3 -0
  116. nucliadb/search/api/v1/ask.py +3 -4
  117. nucliadb/search/api/v1/augment.py +585 -0
  118. nucliadb/search/api/v1/catalog.py +15 -19
  119. nucliadb/search/api/v1/find.py +16 -22
  120. nucliadb/search/api/v1/hydrate.py +328 -0
  121. nucliadb/search/api/v1/knowledgebox.py +1 -2
  122. nucliadb/search/api/v1/predict_proxy.py +1 -2
  123. nucliadb/search/api/v1/resource/ask.py +28 -8
  124. nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
  125. nucliadb/search/api/v1/resource/search.py +9 -11
  126. nucliadb/search/api/v1/retrieve.py +130 -0
  127. nucliadb/search/api/v1/search.py +28 -32
  128. nucliadb/search/api/v1/suggest.py +11 -14
  129. nucliadb/search/api/v1/summarize.py +1 -2
  130. nucliadb/search/api/v1/utils.py +2 -2
  131. nucliadb/search/app.py +3 -2
  132. nucliadb/search/augmentor/__init__.py +21 -0
  133. nucliadb/search/augmentor/augmentor.py +232 -0
  134. nucliadb/search/augmentor/fields.py +704 -0
  135. nucliadb/search/augmentor/metrics.py +24 -0
  136. nucliadb/search/augmentor/paragraphs.py +334 -0
  137. nucliadb/search/augmentor/resources.py +238 -0
  138. nucliadb/search/augmentor/utils.py +33 -0
  139. nucliadb/search/lifecycle.py +3 -1
  140. nucliadb/search/predict.py +33 -19
  141. nucliadb/search/predict_models.py +8 -9
  142. nucliadb/search/requesters/utils.py +11 -10
  143. nucliadb/search/search/cache.py +19 -42
  144. nucliadb/search/search/chat/ask.py +131 -59
  145. nucliadb/search/search/chat/exceptions.py +3 -5
  146. nucliadb/search/search/chat/fetcher.py +201 -0
  147. nucliadb/search/search/chat/images.py +6 -4
  148. nucliadb/search/search/chat/old_prompt.py +1375 -0
  149. nucliadb/search/search/chat/parser.py +510 -0
  150. nucliadb/search/search/chat/prompt.py +563 -615
  151. nucliadb/search/search/chat/query.py +453 -32
  152. nucliadb/search/search/chat/rpc.py +85 -0
  153. nucliadb/search/search/fetch.py +3 -4
  154. nucliadb/search/search/filters.py +8 -11
  155. nucliadb/search/search/find.py +33 -31
  156. nucliadb/search/search/find_merge.py +124 -331
  157. nucliadb/search/search/graph_strategy.py +14 -12
  158. nucliadb/search/search/hydrator/__init__.py +49 -0
  159. nucliadb/search/search/hydrator/fields.py +217 -0
  160. nucliadb/search/search/hydrator/images.py +130 -0
  161. nucliadb/search/search/hydrator/paragraphs.py +323 -0
  162. nucliadb/search/search/hydrator/resources.py +60 -0
  163. nucliadb/search/search/ingestion_agents.py +5 -5
  164. nucliadb/search/search/merge.py +90 -94
  165. nucliadb/search/search/metrics.py +24 -7
  166. nucliadb/search/search/paragraphs.py +7 -9
  167. nucliadb/search/search/predict_proxy.py +44 -18
  168. nucliadb/search/search/query.py +14 -86
  169. nucliadb/search/search/query_parser/fetcher.py +51 -82
  170. nucliadb/search/search/query_parser/models.py +19 -48
  171. nucliadb/search/search/query_parser/old_filters.py +20 -19
  172. nucliadb/search/search/query_parser/parsers/ask.py +5 -6
  173. nucliadb/search/search/query_parser/parsers/catalog.py +7 -11
  174. nucliadb/search/search/query_parser/parsers/common.py +21 -13
  175. nucliadb/search/search/query_parser/parsers/find.py +6 -29
  176. nucliadb/search/search/query_parser/parsers/graph.py +18 -28
  177. nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
  178. nucliadb/search/search/query_parser/parsers/search.py +15 -56
  179. nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
  180. nucliadb/search/search/rank_fusion.py +18 -13
  181. nucliadb/search/search/rerankers.py +6 -7
  182. nucliadb/search/search/retrieval.py +300 -0
  183. nucliadb/search/search/summarize.py +5 -6
  184. nucliadb/search/search/utils.py +3 -4
  185. nucliadb/search/settings.py +1 -2
  186. nucliadb/standalone/api_router.py +1 -1
  187. nucliadb/standalone/app.py +4 -3
  188. nucliadb/standalone/auth.py +5 -6
  189. nucliadb/standalone/lifecycle.py +2 -2
  190. nucliadb/standalone/run.py +5 -4
  191. nucliadb/standalone/settings.py +5 -6
  192. nucliadb/standalone/versions.py +3 -4
  193. nucliadb/tasks/consumer.py +13 -8
  194. nucliadb/tasks/models.py +2 -1
  195. nucliadb/tasks/producer.py +3 -3
  196. nucliadb/tasks/retries.py +8 -7
  197. nucliadb/train/api/utils.py +1 -3
  198. nucliadb/train/api/v1/shards.py +1 -2
  199. nucliadb/train/api/v1/trainset.py +1 -2
  200. nucliadb/train/app.py +1 -1
  201. nucliadb/train/generator.py +4 -4
  202. nucliadb/train/generators/field_classifier.py +2 -2
  203. nucliadb/train/generators/field_streaming.py +6 -6
  204. nucliadb/train/generators/image_classifier.py +2 -2
  205. nucliadb/train/generators/paragraph_classifier.py +2 -2
  206. nucliadb/train/generators/paragraph_streaming.py +2 -2
  207. nucliadb/train/generators/question_answer_streaming.py +2 -2
  208. nucliadb/train/generators/sentence_classifier.py +4 -10
  209. nucliadb/train/generators/token_classifier.py +3 -2
  210. nucliadb/train/generators/utils.py +6 -5
  211. nucliadb/train/nodes.py +3 -3
  212. nucliadb/train/resource.py +6 -8
  213. nucliadb/train/settings.py +3 -4
  214. nucliadb/train/types.py +11 -11
  215. nucliadb/train/upload.py +3 -2
  216. nucliadb/train/uploader.py +1 -2
  217. nucliadb/train/utils.py +1 -2
  218. nucliadb/writer/api/v1/export_import.py +4 -1
  219. nucliadb/writer/api/v1/field.py +15 -14
  220. nucliadb/writer/api/v1/knowledgebox.py +18 -56
  221. nucliadb/writer/api/v1/learning_config.py +5 -4
  222. nucliadb/writer/api/v1/resource.py +9 -20
  223. nucliadb/writer/api/v1/services.py +10 -132
  224. nucliadb/writer/api/v1/upload.py +73 -72
  225. nucliadb/writer/app.py +8 -2
  226. nucliadb/writer/resource/basic.py +12 -15
  227. nucliadb/writer/resource/field.py +43 -5
  228. nucliadb/writer/resource/origin.py +7 -0
  229. nucliadb/writer/settings.py +2 -3
  230. nucliadb/writer/tus/__init__.py +2 -3
  231. nucliadb/writer/tus/azure.py +5 -7
  232. nucliadb/writer/tus/dm.py +3 -3
  233. nucliadb/writer/tus/exceptions.py +3 -4
  234. nucliadb/writer/tus/gcs.py +15 -22
  235. nucliadb/writer/tus/s3.py +2 -3
  236. nucliadb/writer/tus/storage.py +3 -3
  237. {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +10 -11
  238. nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
  239. nucliadb/common/datamanagers/entities.py +0 -139
  240. nucliadb/common/external_index_providers/pinecone.py +0 -894
  241. nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
  242. nucliadb/search/search/hydrator.py +0 -197
  243. nucliadb-6.7.2.post4874.dist-info/RECORD +0 -383
  244. {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
  245. {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
  246. {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
nucliadb/openapi.py CHANGED
@@ -33,11 +33,11 @@ def is_versioned_route(route):
33
33
 
34
34
 
35
35
  def extract_openapi(application, version, commit_id, app_name):
36
- app = [
36
+ app = next(
37
37
  route.app
38
38
  for route in application.routes
39
39
  if is_versioned_route(route) and route.app.version == version
40
- ][0]
40
+ )
41
41
  document = get_openapi(
42
42
  title=app.title,
43
43
  version=app.version,
@@ -19,7 +19,8 @@
19
19
  #
20
20
  import asyncio
21
21
  import importlib.metadata
22
- from typing import AsyncGenerator
22
+ from collections.abc import AsyncGenerator
23
+ from itertools import batched # type: ignore
23
24
 
24
25
  from nucliadb.common import datamanagers
25
26
  from nucliadb.common.cluster.exceptions import NodeError, ShardNotFound
@@ -233,7 +234,7 @@ async def purge_kb_vectorsets(driver: Driver, storage: Storage):
233
234
  fields.extend((await resource.get_fields(force=True)).values())
234
235
 
235
236
  logger.info(f"Purging {len(fields)} fields for vectorset {vectorset}", extra={"kbid": kbid})
236
- for fields_batch in batchify(fields, 20):
237
+ for fields_batch in batched(fields, n=20):
237
238
  tasks = []
238
239
  for field in fields_batch:
239
240
  if purge_payload.storage_key_kind == VectorSetConfig.StorageKeyKind.UNSET:
@@ -317,9 +318,3 @@ def run() -> int: # pragma: no cover
317
318
  setup_logging()
318
319
  errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
319
320
  return asyncio.run(main())
320
-
321
-
322
- def batchify(iterable, n=1):
323
- """Yield successive n-sized chunks from iterable."""
324
- for i in range(0, len(iterable), n):
325
- yield iterable[i : i + n]
@@ -20,7 +20,6 @@
20
20
  import argparse
21
21
  import asyncio
22
22
  import importlib.metadata
23
- from typing import Optional
24
23
 
25
24
  from grpc.aio import AioRpcError
26
25
  from nidx_protos import nodereader_pb2, noderesources_pb2
@@ -113,7 +112,7 @@ async def _get_stored_shards(driver: Driver) -> dict[str, ShardKb]:
113
112
  return stored_shards
114
113
 
115
114
 
116
- async def _get_kbid(shard_id: str) -> Optional[str]:
115
+ async def _get_kbid(shard_id: str) -> str | None:
117
116
  kbid = None
118
117
  try:
119
118
  req = nodereader_pb2.GetShardRequest()
@@ -19,6 +19,8 @@
19
19
  #
20
20
  import logging
21
21
 
22
+ from fastapi import Header
23
+
22
24
  SERVICE_NAME = "nucliadb.reader"
23
25
  logger = logging.getLogger(SERVICE_NAME)
24
26
 
@@ -35,3 +37,6 @@ class EndpointFilter(logging.Filter):
35
37
 
36
38
  # Add filter to the logger
37
39
  logging.getLogger("uvicorn.access").addFilter(EndpointFilter())
40
+
41
+
42
+ RANGE_HEADER = Header(description="Standard HTTP Range header that enable multipart requests")
@@ -17,7 +17,7 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import TYPE_CHECKING, Any, Optional, Union
20
+ from typing import TYPE_CHECKING, Any
21
21
 
22
22
  from pydantic import BaseModel
23
23
 
@@ -33,14 +33,7 @@ from nucliadb_models.resource import (
33
33
  )
34
34
 
35
35
  if TYPE_CHECKING: # pragma: no cover
36
- ValueType = Optional[
37
- Union[
38
- models.FieldText,
39
- models.FieldFile,
40
- models.FieldLink,
41
- models.Conversation,
42
- ]
43
- ]
36
+ ValueType = models.FieldText | models.FieldFile | models.FieldLink | models.Conversation | None
44
37
  else:
45
38
  # without Any, pydantic fails to anything as validate() fails using the Union
46
39
  ValueType = Any
@@ -50,10 +43,10 @@ class ResourceField(BaseModel):
50
43
  field_type: FieldTypeName
51
44
  field_id: str
52
45
  value: ValueType = None
53
- extracted: Optional[ExtractedDataType] = None
54
- error: Optional[Error] = None
55
- status: Optional[str] = None
56
- errors: Optional[list[Error]] = None
46
+ extracted: ExtractedDataType | None = None
47
+ error: Error | None = None
48
+ status: str | None = None
49
+ errors: list[Error] | None = None
57
50
 
58
51
 
59
52
  FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP: dict[FieldTypeName, Any] = {
@@ -18,20 +18,18 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import urllib.parse
21
- from enum import Enum
22
- from typing import Optional
21
+ from typing import Annotated
23
22
 
24
23
  from fastapi import HTTPException
25
24
  from fastapi.requests import Request
26
25
  from fastapi.responses import Response
27
26
  from fastapi_versioning import version
28
- from starlette.datastructures import Headers
29
27
  from starlette.responses import StreamingResponse
30
28
 
29
+ from nucliadb.common import datamanagers
31
30
  from nucliadb.common.ids import FIELD_TYPE_PB_TO_STR
32
31
  from nucliadb.common.models_utils import to_proto
33
- from nucliadb.ingest.serialize import get_resource_uuid_by_slug
34
- from nucliadb.reader import SERVICE_NAME, logger
32
+ from nucliadb.reader import RANGE_HEADER, SERVICE_NAME, logger
35
33
  from nucliadb_models.common import FieldTypeName
36
34
  from nucliadb_models.resource import NucliaDBRoles
37
35
  from nucliadb_utils.authentication import requires_one
@@ -41,13 +39,8 @@ from nucliadb_utils.utilities import get_storage
41
39
  from .router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
42
40
 
43
41
 
44
- class DownloadType(Enum):
45
- EXTRACTED = "extracted"
46
- FIELD = "field"
47
-
48
-
49
42
  @api.get(
50
- f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/{{field_type}}/{{field_id}}/download/extracted/{{download_field:path}}", # noqa
43
+ f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/{{field_type}}/{{field_id}}/download/extracted/{{download_field:path}}",
51
44
  tags=["Resource fields"],
52
45
  status_code=200,
53
46
  summary="Download extracted binary file (by slug)",
@@ -61,12 +54,20 @@ async def download_extract_file_rslug_prefix(
61
54
  field_type: FieldTypeName,
62
55
  field_id: str,
63
56
  download_field: str,
57
+ range: Annotated[str | None, RANGE_HEADER] = None,
64
58
  ) -> Response:
65
- return await _download_extract_file(request, kbid, field_type, field_id, download_field, rslug=rslug)
59
+ return await _download_extract_file(
60
+ kbid,
61
+ field_type,
62
+ field_id,
63
+ download_field,
64
+ rslug=rslug,
65
+ range_request=range,
66
+ )
66
67
 
67
68
 
68
69
  @api.get(
69
- f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/{{field_type}}/{{field_id}}/download/extracted/{{download_field:path}}", # noqa
70
+ f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/{{field_type}}/{{field_id}}/download/extracted/{{download_field:path}}",
70
71
  tags=["Resource fields"],
71
72
  status_code=200,
72
73
  summary="Download extracted binary file (by id)",
@@ -80,18 +81,21 @@ async def download_extract_file_rid_prefix(
80
81
  field_type: FieldTypeName,
81
82
  field_id: str,
82
83
  download_field: str,
84
+ range: Annotated[str | None, RANGE_HEADER] = None,
83
85
  ) -> Response:
84
- return await _download_extract_file(request, kbid, field_type, field_id, download_field, rid=rid)
86
+ return await _download_extract_file(
87
+ kbid, field_type, field_id, download_field, rid=rid, range_request=range
88
+ )
85
89
 
86
90
 
87
91
  async def _download_extract_file(
88
- request: Request,
89
92
  kbid: str,
90
93
  field_type: FieldTypeName,
91
94
  field_id: str,
92
95
  download_field: str,
93
- rid: Optional[str] = None,
94
- rslug: Optional[str] = None,
96
+ rid: str | None = None,
97
+ rslug: str | None = None,
98
+ range_request: str | None = None,
95
99
  ) -> Response:
96
100
  rid = await _get_resource_uuid_from_params(kbid, rid, rslug)
97
101
 
@@ -102,7 +106,7 @@ async def _download_extract_file(
102
106
 
103
107
  sf = storage.file_extracted(kbid, rid, field_type_letter, field_id, download_field)
104
108
 
105
- return await download_api(sf, request.headers)
109
+ return await download_api(sf, range_request)
106
110
 
107
111
 
108
112
  @api.get(
@@ -119,8 +123,9 @@ async def download_field_file_rslug_prefix(
119
123
  rslug: str,
120
124
  field_id: str,
121
125
  inline: bool = False,
126
+ range: Annotated[str | None, RANGE_HEADER] = None,
122
127
  ) -> Response:
123
- return await _download_field_file(request, kbid, field_id, rslug=rslug, inline=inline)
128
+ return await _download_field_file(kbid, field_id, rslug=rslug, range_request=range, inline=inline)
124
129
 
125
130
 
126
131
  @api.get(
@@ -137,16 +142,17 @@ async def download_field_file_rid_prefix(
137
142
  rid: str,
138
143
  field_id: str,
139
144
  inline: bool = False,
145
+ range: Annotated[str | None, RANGE_HEADER] = None,
140
146
  ) -> Response:
141
- return await _download_field_file(request, kbid, field_id, rid=rid, inline=inline)
147
+ return await _download_field_file(kbid, field_id, rid=rid, range_request=range, inline=inline)
142
148
 
143
149
 
144
150
  async def _download_field_file(
145
- request: Request,
146
151
  kbid: str,
147
152
  field_id: str,
148
- rid: Optional[str] = None,
149
- rslug: Optional[str] = None,
153
+ rid: str | None = None,
154
+ rslug: str | None = None,
155
+ range_request: str | None = None,
150
156
  inline: bool = False,
151
157
  ) -> Response:
152
158
  rid = await _get_resource_uuid_from_params(kbid, rid, rslug)
@@ -155,11 +161,11 @@ async def _download_field_file(
155
161
 
156
162
  sf = storage.file_field(kbid, rid, field_id)
157
163
 
158
- return await download_api(sf, request.headers, inline=inline)
164
+ return await download_api(sf, range_request=range_request, inline=inline)
159
165
 
160
166
 
161
167
  @api.get(
162
- f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/conversation/{{field_id}}/download/field/{{message_id}}/{{file_num}}", # noqa
168
+ f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/conversation/{{field_id}}/download/field/{{message_id}}/{{file_num}}",
163
169
  tags=["Resource fields"],
164
170
  status_code=200,
165
171
  summary="Download conversation binary field (by slug)",
@@ -173,14 +179,20 @@ async def download_field_conversation_rslug_prefix(
173
179
  field_id: str,
174
180
  message_id: str,
175
181
  file_num: int,
182
+ range: Annotated[str | None, RANGE_HEADER] = None,
176
183
  ) -> Response:
177
184
  return await _download_field_conversation_attachment(
178
- request, kbid, field_id, message_id, file_num, rslug=rslug
185
+ kbid,
186
+ field_id,
187
+ message_id,
188
+ file_num,
189
+ rslug=rslug,
190
+ range_request=range,
179
191
  )
180
192
 
181
193
 
182
194
  @api.get(
183
- f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/conversation/{{field_id}}/download/field/{{message_id}}/{{file_num}}", # noqa
195
+ f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/conversation/{{field_id}}/download/field/{{message_id}}/{{file_num}}",
184
196
  tags=["Resource fields"],
185
197
  status_code=200,
186
198
  summary="Download conversation binary field (by id)",
@@ -194,20 +206,26 @@ async def download_field_conversation_attachment_rid_prefix(
194
206
  field_id: str,
195
207
  message_id: str,
196
208
  file_num: int,
209
+ range: Annotated[str | None, RANGE_HEADER] = None,
197
210
  ) -> Response:
198
211
  return await _download_field_conversation_attachment(
199
- request, kbid, field_id, message_id, file_num, rid=rid
212
+ kbid,
213
+ field_id,
214
+ message_id,
215
+ file_num,
216
+ rid=rid,
217
+ range_request=range,
200
218
  )
201
219
 
202
220
 
203
221
  async def _download_field_conversation_attachment(
204
- request: Request,
205
222
  kbid: str,
206
223
  field_id: str,
207
224
  message_id: str,
208
225
  file_num: int,
209
- rid: Optional[str] = None,
210
- rslug: Optional[str] = None,
226
+ rid: str | None = None,
227
+ rslug: str | None = None,
228
+ range_request: str | None = None,
211
229
  ) -> Response:
212
230
  rid = await _get_resource_uuid_from_params(kbid, rid, rslug)
213
231
 
@@ -217,11 +235,11 @@ async def _download_field_conversation_attachment(
217
235
  kbid, rid, field_id, message_id, attachment_index=file_num
218
236
  )
219
237
 
220
- return await download_api(sf, request.headers)
238
+ return await download_api(sf, range_request)
221
239
 
222
240
 
223
- async def download_api(sf: StorageField, headers: Headers, inline: bool = False):
224
- metadata: Optional[ObjectMetadata] = await sf.exists()
241
+ async def download_api(sf: StorageField, range_request: str | None = None, inline: bool = False):
242
+ metadata: ObjectMetadata | None = await sf.exists()
225
243
  if metadata is None:
226
244
  raise HTTPException(status_code=404, detail="Specified file doesn't exist")
227
245
 
@@ -240,9 +258,8 @@ async def download_api(sf: StorageField, headers: Headers, inline: bool = False)
240
258
  }
241
259
 
242
260
  range = Range()
243
- if "range" in headers and file_size > -1:
261
+ if range_request and file_size > -1:
244
262
  status_code = 206
245
- range_request = headers["range"]
246
263
  try:
247
264
  start, end, range_size = parse_media_range(range_request, file_size)
248
265
  except NotImplementedError:
@@ -295,13 +312,17 @@ async def download_api(sf: StorageField, headers: Headers, inline: bool = False)
295
312
  )
296
313
 
297
314
 
298
- async def _get_resource_uuid_from_params(kbid, rid: Optional[str], rslug: Optional[str]) -> str:
315
+ async def _get_resource_uuid_from_params(kbid, rid: str | None, rslug: str | None) -> str:
299
316
  if not any([rid, rslug]):
300
317
  raise ValueError("Either rid or slug must be set")
301
318
 
302
319
  if not rid:
303
320
  # Attempt to get it from slug
304
- rid = await get_resource_uuid_by_slug(kbid, rslug, service_name=SERVICE_NAME) # type: ignore
321
+ rid = await datamanagers.atomic.resources.get_resource_uuid_from_slug(
322
+ kbid=kbid,
323
+ # mypy doesn't infer that we already checked for slug to be something
324
+ slug=rslug, # type: ignore[arg-type]
325
+ )
305
326
  if rid is None:
306
327
  raise HTTPException(status_code=404, detail="Resource does not exist")
307
328
 
@@ -17,7 +17,7 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import AsyncGenerator, AsyncIterable, Union
20
+ from collections.abc import AsyncGenerator, AsyncIterable
21
21
 
22
22
  from fastapi.responses import StreamingResponse
23
23
  from fastapi_versioning import version
@@ -108,7 +108,7 @@ async def download_export_and_delete(
108
108
  @version(1)
109
109
  async def get_export_status_endpoint(
110
110
  request: Request, kbid: str, export_id: str
111
- ) -> Union[StatusResponse, HTTPClientError]:
111
+ ) -> StatusResponse | HTTPClientError:
112
112
  context = get_app_context(request.app)
113
113
  if not await exists_kb(kbid):
114
114
  return HTTPClientError(status_code=404, detail="Knowledge Box not found")
@@ -127,7 +127,7 @@ async def get_export_status_endpoint(
127
127
  @version(1)
128
128
  async def get_import_status_endpoint(
129
129
  request: Request, kbid: str, import_id: str
130
- ) -> Union[StatusResponse, HTTPClientError]:
130
+ ) -> StatusResponse | HTTPClientError:
131
131
  context = get_app_context(request.app)
132
132
  if not await exists_kb(kbid):
133
133
  return HTTPClientError(status_code=404, detail="Knowledge Box not found")
@@ -137,7 +137,7 @@ async def get_import_status_endpoint(
137
137
 
138
138
  async def _get_status(
139
139
  context: ApplicationContext, type: str, kbid: str, id: str
140
- ) -> Union[StatusResponse, HTTPClientError]:
140
+ ) -> StatusResponse | HTTPClientError:
141
141
  if type not in ("export", "import"):
142
142
  raise ValueError(f"Incorrect type: {type}")
143
143
 
@@ -17,7 +17,7 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from fastapi import HTTPException
20
+ from fastapi import Header, HTTPException
21
21
  from fastapi_versioning import version
22
22
  from starlette.requests import Request
23
23
 
@@ -44,12 +44,20 @@ from nucliadb_utils.authentication import requires, requires_one
44
44
  )
45
45
  @requires(NucliaDBRoles.MANAGER)
46
46
  @version(1)
47
- async def get_kbs(request: Request, prefix: str = "") -> KnowledgeBoxList:
47
+ async def get_kbs(
48
+ request: Request,
49
+ prefix: str = "",
50
+ x_nucliadb_account: str = Header(default="", include_in_schema=False),
51
+ ) -> KnowledgeBoxList:
48
52
  driver = get_driver()
49
53
  async with driver.ro_transaction() as txn:
50
54
  response = KnowledgeBoxList()
51
55
  async for kbid, slug in datamanagers.kb.get_kbs(txn, prefix=prefix):
52
- response.kbs.append(KnowledgeBoxObjSummary(slug=slug or None, uuid=kbid))
56
+ response.kbs.append(
57
+ KnowledgeBoxObjSummary(
58
+ slug=user_kb_slug(slug, account_id=x_nucliadb_account) or None, uuid=kbid
59
+ )
60
+ )
53
61
  return response
54
62
 
55
63
 
@@ -62,7 +70,9 @@ async def get_kbs(request: Request, prefix: str = "") -> KnowledgeBoxList:
62
70
  )
63
71
  @requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.READER])
64
72
  @version(1)
65
- async def get_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
73
+ async def get_kb(
74
+ request: Request, kbid: str, x_nucliadb_account: str = Header(default="", include_in_schema=False)
75
+ ) -> KnowledgeBoxObj:
66
76
  driver = get_driver()
67
77
  async with driver.ro_transaction() as txn:
68
78
  kb_config = await datamanagers.kb.get_config(txn, kbid=kbid)
@@ -71,7 +81,7 @@ async def get_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
71
81
 
72
82
  return KnowledgeBoxObj(
73
83
  uuid=kbid,
74
- slug=kb_config.slug,
84
+ slug=user_kb_slug(kb_config.slug, account_id=x_nucliadb_account),
75
85
  config=from_proto.knowledgebox_config(kb_config),
76
86
  )
77
87
 
@@ -85,12 +95,18 @@ async def get_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
85
95
  )
86
96
  @requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.READER])
87
97
  @version(1)
88
- async def get_kb_by_slug(request: Request, slug: str) -> KnowledgeBoxObj:
98
+ async def get_kb_by_slug(
99
+ request: Request, slug: str, x_nucliadb_account: str = Header(default="", include_in_schema=False)
100
+ ) -> KnowledgeBoxObj:
89
101
  driver = get_driver()
90
102
  async with driver.ro_transaction() as txn:
91
- kbid = await datamanagers.kb.get_kb_uuid(txn, slug=slug)
103
+ # For cloud, the account id is prepended in order to be able to reuse the same slug in different accounts.
104
+ kbid = await datamanagers.kb.get_kb_uuid(txn, slug=f"{x_nucliadb_account}:{slug}")
92
105
  if kbid is None:
93
- raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
106
+ # For onprem, the slug is fully controlled by the user
107
+ kbid = await datamanagers.kb.get_kb_uuid(txn, slug=slug)
108
+ if kbid is None:
109
+ raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
94
110
 
95
111
  kb_config = await datamanagers.kb.get_config(txn, kbid=kbid)
96
112
  if kb_config is None:
@@ -98,6 +114,18 @@ async def get_kb_by_slug(request: Request, slug: str) -> KnowledgeBoxObj:
98
114
 
99
115
  return KnowledgeBoxObj(
100
116
  uuid=kbid,
101
- slug=kb_config.slug,
117
+ slug=user_kb_slug(kb_config.slug, account_id=x_nucliadb_account),
102
118
  config=from_proto.knowledgebox_config(kb_config),
103
119
  )
120
+
121
+
122
+ def user_kb_slug(stored_slug: str, account_id: str) -> str:
123
+ if account_id != "":
124
+ # On cloud deployments, backend prepends the account id to the user-defined slug.
125
+ # This is required to make kb slugs reused across different accounts using the same nucliadb.
126
+ # We strip it so the user does not see it.
127
+ return stored_slug.split(f"{account_id}:")[-1]
128
+ else:
129
+ # On on-prem deployments, the account_id is set to "" by default and we don't need to strip
130
+ # anything as the backend is not invovled in the kb creation process.
131
+ return stored_slug
@@ -17,9 +17,8 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import Dict
21
20
 
22
- from fastapi import Request
21
+ from fastapi import Header, Request
23
22
  from fastapi_versioning import version
24
23
  from nuclia_models.config.proto import ExtractConfig, SplitConfiguration
25
24
 
@@ -35,7 +34,7 @@ from nucliadb_utils.settings import is_onprem_nucliadb
35
34
  path=f"/{KB_PREFIX}/{{kbid}}/models/{{model_id}}/{{filename:path}}",
36
35
  status_code=200,
37
36
  summary="Download the Knowledege Box model",
38
- description="Download the trained model or any other generated file as a result of a training task on a Knowledge Box.", # noqa
37
+ description="Download the trained model or any other generated file as a result of a training task on a Knowledge Box.",
39
38
  response_model=None,
40
39
  tags=["Models"],
41
40
  )
@@ -60,15 +59,11 @@ async def download_model(
60
59
  )
61
60
  @requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
62
61
  @version(1)
63
- async def get_configuration(
64
- request: Request,
65
- kbid: str,
66
- ):
62
+ async def get_configuration(request: Request, kbid: str):
67
63
  return await learning_config_proxy(
68
64
  request,
69
65
  "GET",
70
66
  f"/config/{kbid}",
71
- extra_headers={"X-STF-USER": request.headers.get("X-NUCLIADB-USER", "")},
72
67
  )
73
68
 
74
69
 
@@ -108,7 +103,6 @@ async def get_model(
108
103
  request,
109
104
  "GET",
110
105
  f"/models/{kbid}/model/{model_id}",
111
- extra_headers={"X-STF-USER": request.headers.get("X-NUCLIADB-USER", "")},
112
106
  )
113
107
 
114
108
 
@@ -123,10 +117,35 @@ async def get_model(
123
117
  @requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
124
118
  @version(1)
125
119
  async def get_schema_for_configuration_updates(
126
- request: Request,
127
- kbid: str,
120
+ request: Request, kbid: str, x_nucliadb_account: str = Header(default="", include_in_schema=False)
128
121
  ):
129
- return await learning_config_proxy(request, "GET", f"/schema/{kbid}")
122
+ return await learning_config_proxy(
123
+ request,
124
+ "GET",
125
+ f"/schema/{kbid}",
126
+ headers={"account-id": x_nucliadb_account},
127
+ )
128
+
129
+
130
+ @api.get(
131
+ path=f"/{KB_PREFIX}/{{kbid}}/generative_providers",
132
+ status_code=200,
133
+ summary="Available models for a knowledge box",
134
+ description="Get all available models for a knowledge box grouped by provider",
135
+ response_model=None,
136
+ tags=["Models"],
137
+ )
138
+ @requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
139
+ @version(1)
140
+ async def get_models_group_by_providers(
141
+ request: Request, kbid: str, x_nucliadb_account: str = Header(default="", include_in_schema=False)
142
+ ):
143
+ return await learning_config_proxy(
144
+ request,
145
+ "GET",
146
+ f"/generative_providers/{kbid}",
147
+ headers={"account-id": x_nucliadb_account},
148
+ )
130
149
 
131
150
 
132
151
  @api.get(
@@ -153,7 +172,7 @@ async def get_schema_for_configuration_creation(
153
172
  status_code=200,
154
173
  summary="Learning extract strategies",
155
174
  description="Get available extract strategies ",
156
- response_model=Dict[str, ExtractConfig],
175
+ response_model=dict[str, ExtractConfig],
157
176
  tags=["Extract Strategies"],
158
177
  )
159
178
  @requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])
@@ -190,7 +209,7 @@ async def get_extract_strategy_from_id(
190
209
  status_code=200,
191
210
  summary="Learning split strategies",
192
211
  description="Get available split strategies ",
193
- response_model=Dict[str, SplitConfiguration],
212
+ response_model=dict[str, SplitConfiguration],
194
213
  tags=["Split Strategies"],
195
214
  )
196
215
  @requires_one([NucliaDBRoles.READER, NucliaDBRoles.MANAGER])