nucliadb 6.9.1.post5192__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. migrations/0023_backfill_pg_catalog.py +2 -2
  2. migrations/0029_backfill_field_status.py +3 -4
  3. migrations/0032_remove_old_relations.py +2 -3
  4. migrations/0038_backfill_catalog_field_labels.py +2 -2
  5. migrations/0039_backfill_converation_splits_metadata.py +2 -2
  6. migrations/0041_reindex_conversations.py +137 -0
  7. migrations/pg/0010_shards_index.py +34 -0
  8. nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
  9. migrations/pg/0012_catalog_statistics_undo.py +26 -0
  10. nucliadb/backups/create.py +2 -15
  11. nucliadb/backups/restore.py +4 -15
  12. nucliadb/backups/tasks.py +4 -1
  13. nucliadb/common/back_pressure/cache.py +2 -3
  14. nucliadb/common/back_pressure/materializer.py +7 -13
  15. nucliadb/common/back_pressure/settings.py +6 -6
  16. nucliadb/common/back_pressure/utils.py +1 -0
  17. nucliadb/common/cache.py +9 -9
  18. nucliadb/common/catalog/interface.py +12 -12
  19. nucliadb/common/catalog/pg.py +41 -29
  20. nucliadb/common/catalog/utils.py +3 -3
  21. nucliadb/common/cluster/manager.py +5 -4
  22. nucliadb/common/cluster/rebalance.py +483 -114
  23. nucliadb/common/cluster/rollover.py +25 -9
  24. nucliadb/common/cluster/settings.py +3 -8
  25. nucliadb/common/cluster/utils.py +34 -8
  26. nucliadb/common/context/__init__.py +7 -8
  27. nucliadb/common/context/fastapi.py +1 -2
  28. nucliadb/common/datamanagers/__init__.py +2 -4
  29. nucliadb/common/datamanagers/atomic.py +4 -2
  30. nucliadb/common/datamanagers/cluster.py +1 -2
  31. nucliadb/common/datamanagers/fields.py +3 -4
  32. nucliadb/common/datamanagers/kb.py +6 -6
  33. nucliadb/common/datamanagers/labels.py +2 -3
  34. nucliadb/common/datamanagers/resources.py +10 -33
  35. nucliadb/common/datamanagers/rollover.py +5 -7
  36. nucliadb/common/datamanagers/search_configurations.py +1 -2
  37. nucliadb/common/datamanagers/synonyms.py +1 -2
  38. nucliadb/common/datamanagers/utils.py +4 -4
  39. nucliadb/common/datamanagers/vectorsets.py +4 -4
  40. nucliadb/common/external_index_providers/base.py +32 -5
  41. nucliadb/common/external_index_providers/manager.py +4 -5
  42. nucliadb/common/filter_expression.py +128 -40
  43. nucliadb/common/http_clients/processing.py +12 -23
  44. nucliadb/common/ids.py +6 -4
  45. nucliadb/common/locking.py +1 -2
  46. nucliadb/common/maindb/driver.py +9 -8
  47. nucliadb/common/maindb/local.py +5 -5
  48. nucliadb/common/maindb/pg.py +9 -8
  49. nucliadb/common/nidx.py +3 -4
  50. nucliadb/export_import/datamanager.py +4 -3
  51. nucliadb/export_import/exporter.py +11 -19
  52. nucliadb/export_import/importer.py +13 -6
  53. nucliadb/export_import/tasks.py +2 -0
  54. nucliadb/export_import/utils.py +6 -18
  55. nucliadb/health.py +2 -2
  56. nucliadb/ingest/app.py +8 -8
  57. nucliadb/ingest/consumer/consumer.py +8 -10
  58. nucliadb/ingest/consumer/pull.py +3 -8
  59. nucliadb/ingest/consumer/service.py +3 -3
  60. nucliadb/ingest/consumer/utils.py +1 -1
  61. nucliadb/ingest/fields/base.py +28 -49
  62. nucliadb/ingest/fields/conversation.py +12 -12
  63. nucliadb/ingest/fields/exceptions.py +1 -2
  64. nucliadb/ingest/fields/file.py +22 -8
  65. nucliadb/ingest/fields/link.py +7 -7
  66. nucliadb/ingest/fields/text.py +2 -3
  67. nucliadb/ingest/orm/brain_v2.py +78 -64
  68. nucliadb/ingest/orm/broker_message.py +2 -4
  69. nucliadb/ingest/orm/entities.py +10 -209
  70. nucliadb/ingest/orm/index_message.py +4 -4
  71. nucliadb/ingest/orm/knowledgebox.py +18 -27
  72. nucliadb/ingest/orm/processor/auditing.py +1 -3
  73. nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
  74. nucliadb/ingest/orm/processor/processor.py +27 -27
  75. nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
  76. nucliadb/ingest/orm/resource.py +72 -70
  77. nucliadb/ingest/orm/utils.py +1 -1
  78. nucliadb/ingest/processing.py +17 -17
  79. nucliadb/ingest/serialize.py +202 -145
  80. nucliadb/ingest/service/writer.py +3 -109
  81. nucliadb/ingest/settings.py +3 -4
  82. nucliadb/ingest/utils.py +1 -2
  83. nucliadb/learning_proxy.py +11 -11
  84. nucliadb/metrics_exporter.py +5 -4
  85. nucliadb/middleware/__init__.py +82 -1
  86. nucliadb/migrator/datamanager.py +3 -4
  87. nucliadb/migrator/migrator.py +1 -2
  88. nucliadb/migrator/models.py +1 -2
  89. nucliadb/migrator/settings.py +1 -2
  90. nucliadb/models/internal/augment.py +614 -0
  91. nucliadb/models/internal/processing.py +19 -19
  92. nucliadb/openapi.py +2 -2
  93. nucliadb/purge/__init__.py +3 -8
  94. nucliadb/purge/orphan_shards.py +1 -2
  95. nucliadb/reader/__init__.py +5 -0
  96. nucliadb/reader/api/models.py +6 -13
  97. nucliadb/reader/api/v1/download.py +59 -38
  98. nucliadb/reader/api/v1/export_import.py +4 -4
  99. nucliadb/reader/api/v1/learning_config.py +24 -4
  100. nucliadb/reader/api/v1/resource.py +61 -9
  101. nucliadb/reader/api/v1/services.py +18 -14
  102. nucliadb/reader/app.py +3 -1
  103. nucliadb/reader/reader/notifications.py +1 -2
  104. nucliadb/search/api/v1/__init__.py +2 -0
  105. nucliadb/search/api/v1/ask.py +3 -4
  106. nucliadb/search/api/v1/augment.py +585 -0
  107. nucliadb/search/api/v1/catalog.py +11 -15
  108. nucliadb/search/api/v1/find.py +16 -22
  109. nucliadb/search/api/v1/hydrate.py +25 -25
  110. nucliadb/search/api/v1/knowledgebox.py +1 -2
  111. nucliadb/search/api/v1/predict_proxy.py +1 -2
  112. nucliadb/search/api/v1/resource/ask.py +7 -7
  113. nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
  114. nucliadb/search/api/v1/resource/search.py +9 -11
  115. nucliadb/search/api/v1/retrieve.py +130 -0
  116. nucliadb/search/api/v1/search.py +28 -32
  117. nucliadb/search/api/v1/suggest.py +11 -14
  118. nucliadb/search/api/v1/summarize.py +1 -2
  119. nucliadb/search/api/v1/utils.py +2 -2
  120. nucliadb/search/app.py +3 -2
  121. nucliadb/search/augmentor/__init__.py +21 -0
  122. nucliadb/search/augmentor/augmentor.py +232 -0
  123. nucliadb/search/augmentor/fields.py +704 -0
  124. nucliadb/search/augmentor/metrics.py +24 -0
  125. nucliadb/search/augmentor/paragraphs.py +334 -0
  126. nucliadb/search/augmentor/resources.py +238 -0
  127. nucliadb/search/augmentor/utils.py +33 -0
  128. nucliadb/search/lifecycle.py +3 -1
  129. nucliadb/search/predict.py +24 -17
  130. nucliadb/search/predict_models.py +8 -9
  131. nucliadb/search/requesters/utils.py +11 -10
  132. nucliadb/search/search/cache.py +19 -23
  133. nucliadb/search/search/chat/ask.py +88 -59
  134. nucliadb/search/search/chat/exceptions.py +3 -5
  135. nucliadb/search/search/chat/fetcher.py +201 -0
  136. nucliadb/search/search/chat/images.py +6 -4
  137. nucliadb/search/search/chat/old_prompt.py +1375 -0
  138. nucliadb/search/search/chat/parser.py +510 -0
  139. nucliadb/search/search/chat/prompt.py +563 -615
  140. nucliadb/search/search/chat/query.py +449 -36
  141. nucliadb/search/search/chat/rpc.py +85 -0
  142. nucliadb/search/search/fetch.py +3 -4
  143. nucliadb/search/search/filters.py +8 -11
  144. nucliadb/search/search/find.py +33 -31
  145. nucliadb/search/search/find_merge.py +124 -331
  146. nucliadb/search/search/graph_strategy.py +14 -12
  147. nucliadb/search/search/hydrator/__init__.py +3 -152
  148. nucliadb/search/search/hydrator/fields.py +92 -50
  149. nucliadb/search/search/hydrator/images.py +7 -7
  150. nucliadb/search/search/hydrator/paragraphs.py +42 -26
  151. nucliadb/search/search/hydrator/resources.py +20 -16
  152. nucliadb/search/search/ingestion_agents.py +5 -5
  153. nucliadb/search/search/merge.py +90 -94
  154. nucliadb/search/search/metrics.py +10 -9
  155. nucliadb/search/search/paragraphs.py +7 -9
  156. nucliadb/search/search/predict_proxy.py +13 -9
  157. nucliadb/search/search/query.py +14 -86
  158. nucliadb/search/search/query_parser/fetcher.py +51 -82
  159. nucliadb/search/search/query_parser/models.py +19 -20
  160. nucliadb/search/search/query_parser/old_filters.py +20 -19
  161. nucliadb/search/search/query_parser/parsers/ask.py +4 -5
  162. nucliadb/search/search/query_parser/parsers/catalog.py +5 -6
  163. nucliadb/search/search/query_parser/parsers/common.py +5 -6
  164. nucliadb/search/search/query_parser/parsers/find.py +6 -26
  165. nucliadb/search/search/query_parser/parsers/graph.py +13 -23
  166. nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
  167. nucliadb/search/search/query_parser/parsers/search.py +15 -53
  168. nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
  169. nucliadb/search/search/rank_fusion.py +18 -13
  170. nucliadb/search/search/rerankers.py +5 -6
  171. nucliadb/search/search/retrieval.py +300 -0
  172. nucliadb/search/search/summarize.py +5 -6
  173. nucliadb/search/search/utils.py +3 -4
  174. nucliadb/search/settings.py +1 -2
  175. nucliadb/standalone/api_router.py +1 -1
  176. nucliadb/standalone/app.py +4 -3
  177. nucliadb/standalone/auth.py +5 -6
  178. nucliadb/standalone/lifecycle.py +2 -2
  179. nucliadb/standalone/run.py +2 -4
  180. nucliadb/standalone/settings.py +5 -6
  181. nucliadb/standalone/versions.py +3 -4
  182. nucliadb/tasks/consumer.py +13 -8
  183. nucliadb/tasks/models.py +2 -1
  184. nucliadb/tasks/producer.py +3 -3
  185. nucliadb/tasks/retries.py +8 -7
  186. nucliadb/train/api/utils.py +1 -3
  187. nucliadb/train/api/v1/shards.py +1 -2
  188. nucliadb/train/api/v1/trainset.py +1 -2
  189. nucliadb/train/app.py +1 -1
  190. nucliadb/train/generator.py +4 -4
  191. nucliadb/train/generators/field_classifier.py +2 -2
  192. nucliadb/train/generators/field_streaming.py +6 -6
  193. nucliadb/train/generators/image_classifier.py +2 -2
  194. nucliadb/train/generators/paragraph_classifier.py +2 -2
  195. nucliadb/train/generators/paragraph_streaming.py +2 -2
  196. nucliadb/train/generators/question_answer_streaming.py +2 -2
  197. nucliadb/train/generators/sentence_classifier.py +2 -2
  198. nucliadb/train/generators/token_classifier.py +3 -2
  199. nucliadb/train/generators/utils.py +6 -5
  200. nucliadb/train/nodes.py +3 -3
  201. nucliadb/train/resource.py +6 -8
  202. nucliadb/train/settings.py +3 -4
  203. nucliadb/train/types.py +11 -11
  204. nucliadb/train/upload.py +3 -2
  205. nucliadb/train/uploader.py +1 -2
  206. nucliadb/train/utils.py +1 -2
  207. nucliadb/writer/api/v1/export_import.py +4 -1
  208. nucliadb/writer/api/v1/field.py +7 -11
  209. nucliadb/writer/api/v1/knowledgebox.py +3 -4
  210. nucliadb/writer/api/v1/resource.py +9 -20
  211. nucliadb/writer/api/v1/services.py +10 -132
  212. nucliadb/writer/api/v1/upload.py +73 -72
  213. nucliadb/writer/app.py +8 -2
  214. nucliadb/writer/resource/basic.py +12 -15
  215. nucliadb/writer/resource/field.py +7 -5
  216. nucliadb/writer/resource/origin.py +7 -0
  217. nucliadb/writer/settings.py +2 -3
  218. nucliadb/writer/tus/__init__.py +2 -3
  219. nucliadb/writer/tus/azure.py +1 -3
  220. nucliadb/writer/tus/dm.py +3 -3
  221. nucliadb/writer/tus/exceptions.py +3 -4
  222. nucliadb/writer/tus/gcs.py +5 -6
  223. nucliadb/writer/tus/s3.py +2 -3
  224. nucliadb/writer/tus/storage.py +3 -3
  225. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +9 -10
  226. nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
  227. nucliadb/common/datamanagers/entities.py +0 -139
  228. nucliadb-6.9.1.post5192.dist-info/RECORD +0 -392
  229. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
  230. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
  231. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
@@ -18,12 +18,11 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import base64
21
- import pickle
22
21
  import uuid
23
22
  from datetime import datetime
24
23
  from hashlib import md5
25
24
  from io import BytesIO
26
- from typing import Annotated, Optional
25
+ from typing import Annotated
27
26
 
28
27
  from fastapi import HTTPException
29
28
  from fastapi.requests import Request
@@ -123,17 +122,17 @@ TUS_HEADERS = {
123
122
  def tus_options(
124
123
  request: Request,
125
124
  kbid: str,
126
- rid: Optional[str] = None,
127
- rslug: Optional[str] = None,
128
- upload_id: Optional[str] = None,
129
- field: Optional[str] = None,
125
+ rid: str | None = None,
126
+ rslug: str | None = None,
127
+ upload_id: str | None = None,
128
+ field: str | None = None,
130
129
  ) -> Response:
131
130
  return _tus_options()
132
131
 
133
132
 
134
133
  def _tus_options() -> Response:
135
134
  """
136
- Gather information about the Servers current configuration such as enabled extensions, version...
135
+ Gather information about the Server's current configuration such as enabled extensions, version...
137
136
  """
138
137
  resp = Response(headers=TUS_HEADERS, status_code=204)
139
138
  return resp
@@ -152,9 +151,9 @@ async def tus_post_rslug_prefix(
152
151
  kbid: str,
153
152
  rslug: str,
154
153
  field: FieldIdString,
155
- item: Optional[CreateResourcePayload] = None,
156
- x_extract_strategy: Annotated[Optional[str], X_EXTRACT_STRATEGY] = None,
157
- x_split_strategy: Annotated[Optional[str], X_SPLIT_STRATEGY] = None,
154
+ item: CreateResourcePayload | None = None,
155
+ x_extract_strategy: Annotated[str | None, X_EXTRACT_STRATEGY] = None,
156
+ x_split_strategy: Annotated[str | None, X_SPLIT_STRATEGY] = None,
158
157
  ) -> Response:
159
158
  rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
160
159
  return await _tus_post(
@@ -181,9 +180,9 @@ async def tus_post_rid_prefix(
181
180
  kbid: str,
182
181
  path_rid: str,
183
182
  field: FieldIdString,
184
- item: Optional[CreateResourcePayload] = None,
185
- x_extract_strategy: Annotated[Optional[str], X_EXTRACT_STRATEGY] = None,
186
- x_split_strategy: Annotated[Optional[str], X_SPLIT_STRATEGY] = None,
183
+ item: CreateResourcePayload | None = None,
184
+ x_extract_strategy: Annotated[str | None, X_EXTRACT_STRATEGY] = None,
185
+ x_split_strategy: Annotated[str | None, X_SPLIT_STRATEGY] = None,
187
186
  ) -> Response:
188
187
  return await _tus_post(
189
188
  request,
@@ -207,9 +206,9 @@ async def tus_post_rid_prefix(
207
206
  async def tus_post(
208
207
  request: Request,
209
208
  kbid: str,
210
- item: Optional[CreateResourcePayload] = None,
211
- x_extract_strategy: Annotated[Optional[str], X_EXTRACT_STRATEGY] = None,
212
- x_split_strategy: Annotated[Optional[str], X_SPLIT_STRATEGY] = None,
209
+ item: CreateResourcePayload | None = None,
210
+ x_extract_strategy: Annotated[str | None, X_EXTRACT_STRATEGY] = None,
211
+ x_split_strategy: Annotated[str | None, X_SPLIT_STRATEGY] = None,
213
212
  ) -> Response:
214
213
  return await _tus_post(
215
214
  request, kbid, item, extract_strategy=x_extract_strategy, split_strategy=x_split_strategy
@@ -220,11 +219,11 @@ async def tus_post(
220
219
  async def _tus_post(
221
220
  request: Request,
222
221
  kbid: str,
223
- item: Optional[CreateResourcePayload] = None,
224
- path_rid: Optional[str] = None,
225
- field_id: Optional[str] = None,
226
- extract_strategy: Optional[str] = None,
227
- split_strategy: Optional[str] = None,
222
+ item: CreateResourcePayload | None = None,
223
+ path_rid: str | None = None,
224
+ field_id: str | None = None,
225
+ extract_strategy: str | None = None,
226
+ split_strategy: str | None = None,
228
227
  ) -> Response:
229
228
  """
230
229
  An empty POST request is used to create a new upload resource.
@@ -265,7 +264,7 @@ async def _tus_post(
265
264
  try:
266
265
  metadata = parse_tus_metadata(request.headers["upload-metadata"])
267
266
  except InvalidTUSMetadata as exc:
268
- raise HTTPBadRequest(detail=f"Upload-Metadata header contains errors: {str(exc)}")
267
+ raise HTTPBadRequest(detail=f"Upload-Metadata header contains errors: {exc!s}")
269
268
  else:
270
269
  metadata = {}
271
270
 
@@ -309,8 +308,8 @@ async def _tus_post(
309
308
  metadata["implies_resource_creation"] = implies_resource_creation
310
309
 
311
310
  creation_payload = None
312
- if implies_resource_creation:
313
- creation_payload = base64.b64encode(pickle.dumps(item)).decode()
311
+ if implies_resource_creation and item is not None:
312
+ creation_payload = item.model_dump()
314
313
 
315
314
  await dm.load(upload_id)
316
315
  await dm.start(request)
@@ -339,7 +338,7 @@ async def _tus_post(
339
338
  return Response(
340
339
  status_code=201,
341
340
  headers={
342
- "Location": location, # noqa
341
+ "Location": location,
343
342
  "Tus-Resumable": "1.0.0",
344
343
  "Access-Control-Expose-Headers": "Location,Tus-Resumable",
345
344
  },
@@ -485,8 +484,8 @@ async def tus_patch(
485
484
  request: Request,
486
485
  kbid: str,
487
486
  upload_id: str,
488
- rid: Optional[str] = None,
489
- field: Optional[str] = None,
487
+ rid: str | None = None,
488
+ field: str | None = None,
490
489
  ):
491
490
  try:
492
491
  return await _tus_patch(
@@ -508,8 +507,8 @@ async def _tus_patch(
508
507
  request: Request,
509
508
  kbid: str,
510
509
  upload_id: str,
511
- rid: Optional[str] = None,
512
- field: Optional[str] = None,
510
+ rid: str | None = None,
511
+ field: str | None = None,
513
512
  ) -> Response:
514
513
  """
515
514
  Upload all bytes in the requests and append them in the specified offset
@@ -545,6 +544,13 @@ async def _tus_patch(
545
544
  )
546
545
 
547
546
  storage_manager = get_storage_manager()
547
+
548
+ # We consider this to be the last chunk if we have the size stored and we've reached it
549
+ current_chunk_size = int(request.headers["content-length"])
550
+ upload_finished = dm.get("size") is not None and dm.offset + current_chunk_size >= dm.get("size")
551
+ if not upload_finished:
552
+ validate_intermediate_tus_chunk(current_chunk_size, storage_manager)
553
+
548
554
  read_bytes = await storage_manager.append(
549
555
  dm,
550
556
  storage_manager.iterate_body_chunks(request, storage_manager.chunk_size),
@@ -563,8 +569,6 @@ async def _tus_patch(
563
569
  ["Upload-Offset", "Tus-Resumable", "Tus-Upload-Finished"]
564
570
  ),
565
571
  }
566
-
567
- upload_finished = dm.get("size") is not None and dm.offset >= dm.get("size")
568
572
  if upload_finished:
569
573
  rid = dm.get("rid", rid)
570
574
  if rid is None:
@@ -580,9 +584,7 @@ async def _tus_patch(
580
584
  item_payload = dm.get("item")
581
585
  creation_payload = None
582
586
  if item_payload is not None:
583
- if isinstance(item_payload, str):
584
- item_payload = item_payload.encode()
585
- creation_payload = pickle.loads(base64.b64decode(item_payload))
587
+ creation_payload = CreateResourcePayload.model_validate(item_payload)
586
588
 
587
589
  content_type = dm.get("metadata", {}).get("content_type")
588
590
  if content_type is not None and not content_types.valid(content_type):
@@ -616,7 +618,6 @@ async def _tus_patch(
616
618
 
617
619
  headers["NDB-Seq"] = f"{seqid}"
618
620
  else:
619
- validate_intermediate_tus_chunk(read_bytes, storage_manager)
620
621
  await dm.save()
621
622
 
622
623
  return Response(headers=headers)
@@ -643,12 +644,12 @@ async def upload_rslug_prefix(
643
644
  kbid: str,
644
645
  rslug: str,
645
646
  field: FieldIdString,
646
- x_filename: Annotated[Optional[str], X_FILENAME] = None,
647
- x_password: Annotated[Optional[str], X_PASSWORD] = None,
648
- x_language: Annotated[Optional[str], X_LANGUAGE] = None,
649
- x_md5: Annotated[Optional[str], X_MD5] = None,
650
- x_extract_strategy: Annotated[Optional[str], X_EXTRACT_STRATEGY] = None,
651
- x_split_strategy: Annotated[Optional[str], X_SPLIT_STRATEGY] = None,
647
+ x_filename: Annotated[str | None, X_FILENAME] = None,
648
+ x_password: Annotated[str | None, X_PASSWORD] = None,
649
+ x_language: Annotated[str | None, X_LANGUAGE] = None,
650
+ x_md5: Annotated[str | None, X_MD5] = None,
651
+ x_extract_strategy: Annotated[str | None, X_EXTRACT_STRATEGY] = None,
652
+ x_split_strategy: Annotated[str | None, X_SPLIT_STRATEGY] = None,
652
653
  ) -> ResourceFileUploaded:
653
654
  rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
654
655
  return await _upload(
@@ -679,12 +680,12 @@ async def upload_rid_prefix(
679
680
  kbid: str,
680
681
  path_rid: str,
681
682
  field: FieldIdString,
682
- x_filename: Annotated[Optional[str], X_FILENAME] = None,
683
- x_password: Annotated[Optional[str], X_PASSWORD] = None,
684
- x_language: Annotated[Optional[str], X_LANGUAGE] = None,
685
- x_md5: Annotated[Optional[str], X_MD5] = None,
686
- x_extract_strategy: Annotated[Optional[str], X_EXTRACT_STRATEGY] = None,
687
- x_split_strategy: Annotated[Optional[str], X_SPLIT_STRATEGY] = None,
683
+ x_filename: Annotated[str | None, X_FILENAME] = None,
684
+ x_password: Annotated[str | None, X_PASSWORD] = None,
685
+ x_language: Annotated[str | None, X_LANGUAGE] = None,
686
+ x_md5: Annotated[str | None, X_MD5] = None,
687
+ x_extract_strategy: Annotated[str | None, X_EXTRACT_STRATEGY] = None,
688
+ x_split_strategy: Annotated[str | None, X_SPLIT_STRATEGY] = None,
688
689
  ) -> ResourceFileUploaded:
689
690
  return await _upload(
690
691
  request,
@@ -712,12 +713,12 @@ async def upload_rid_prefix(
712
713
  async def upload(
713
714
  request: StarletteRequest,
714
715
  kbid: str,
715
- x_filename: Annotated[Optional[str], X_FILENAME] = None,
716
- x_password: Annotated[Optional[str], X_PASSWORD] = None,
717
- x_language: Annotated[Optional[str], X_LANGUAGE] = None,
718
- x_md5: Annotated[Optional[str], X_MD5] = None,
719
- x_extract_strategy: Annotated[Optional[str], X_EXTRACT_STRATEGY] = None,
720
- x_split_strategy: Annotated[Optional[str], X_SPLIT_STRATEGY] = None,
716
+ x_filename: Annotated[str | None, X_FILENAME] = None,
717
+ x_password: Annotated[str | None, X_PASSWORD] = None,
718
+ x_language: Annotated[str | None, X_LANGUAGE] = None,
719
+ x_md5: Annotated[str | None, X_MD5] = None,
720
+ x_extract_strategy: Annotated[str | None, X_EXTRACT_STRATEGY] = None,
721
+ x_split_strategy: Annotated[str | None, X_SPLIT_STRATEGY] = None,
721
722
  ) -> ResourceFileUploaded:
722
723
  return await _upload(
723
724
  request,
@@ -735,14 +736,14 @@ async def upload(
735
736
  async def _upload(
736
737
  request: StarletteRequest,
737
738
  kbid: str,
738
- path_rid: Optional[str] = None,
739
- field: Optional[str] = None,
740
- x_filename: Optional[str] = None,
741
- x_password: Optional[str] = None,
742
- x_language: Optional[str] = None,
743
- x_md5: Optional[str] = None,
744
- x_extract_strategy: Optional[str] = None,
745
- x_split_strategy: Optional[str] = None,
739
+ path_rid: str | None = None,
740
+ field: str | None = None,
741
+ x_filename: str | None = None,
742
+ x_password: str | None = None,
743
+ x_language: str | None = None,
744
+ x_md5: str | None = None,
745
+ x_extract_strategy: str | None = None,
746
+ x_split_strategy: str | None = None,
746
747
  ) -> ResourceFileUploaded:
747
748
  if path_rid is not None:
748
749
  await validate_rid_exists_or_raise_error(kbid, path_rid)
@@ -847,9 +848,9 @@ async def _upload(
847
848
 
848
849
  async def validate_field_upload(
849
850
  kbid: str,
850
- rid: Optional[str] = None,
851
- field: Optional[str] = None,
852
- md5: Optional[str] = None,
851
+ rid: str | None = None,
852
+ field: str | None = None,
853
+ md5: str | None = None,
853
854
  ):
854
855
  """Validate field upload and return blob storage path, rid and field id.
855
856
 
@@ -892,14 +893,14 @@ async def store_file_on_nuclia_db(
892
893
  field: str,
893
894
  content_type: str = "application/octet-stream",
894
895
  override_resource_title: bool = False,
895
- filename: Optional[str] = None,
896
- password: Optional[str] = None,
897
- language: Optional[str] = None,
898
- md5: Optional[str] = None,
899
- item: Optional[CreateResourcePayload] = None,
900
- extract_strategy: Optional[str] = None,
901
- split_strategy: Optional[str] = None,
902
- ) -> Optional[int]:
896
+ filename: str | None = None,
897
+ password: str | None = None,
898
+ language: str | None = None,
899
+ md5: str | None = None,
900
+ item: CreateResourcePayload | None = None,
901
+ extract_strategy: str | None = None,
902
+ split_strategy: str | None = None,
903
+ ) -> int | None:
903
904
  # File is on NucliaDB Storage at path
904
905
  partitioning = get_partitioning()
905
906
  processing = get_processing()
nucliadb/writer/app.py CHANGED
@@ -26,6 +26,7 @@ from starlette.middleware.authentication import AuthenticationMiddleware
26
26
  from starlette.requests import ClientDisconnect
27
27
  from starlette.responses import HTMLResponse
28
28
 
29
+ from nucliadb.middleware import ClientErrorPayloadLoggerMiddleware
29
30
  from nucliadb.writer import API_PREFIX
30
31
  from nucliadb.writer.api.v1.router import api as api_v1
31
32
  from nucliadb.writer.lifecycle import lifespan
@@ -41,14 +42,18 @@ from nucliadb_utils.settings import running_settings
41
42
 
42
43
  middleware = []
43
44
 
44
- middleware.extend([Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend())])
45
+ middleware.extend(
46
+ [
47
+ Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend()),
48
+ Middleware(ClientErrorPayloadLoggerMiddleware),
49
+ ]
50
+ )
45
51
 
46
52
 
47
53
  errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
48
54
 
49
55
  fastapi_settings = dict(
50
56
  debug=running_settings.debug,
51
- middleware=middleware,
52
57
  lifespan=lifespan,
53
58
  exception_handlers={
54
59
  Exception: global_exception_handler,
@@ -70,6 +75,7 @@ def create_application() -> FastAPI:
70
75
  prefix_format=f"/{API_PREFIX}/v{{major}}",
71
76
  default_version=(1, 0),
72
77
  enable_latest=False,
78
+ middleware=middleware,
73
79
  kwargs=fastapi_settings,
74
80
  )
75
81
 
@@ -18,7 +18,6 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  from datetime import datetime
21
- from typing import Optional, Union
22
21
 
23
22
  from fastapi import HTTPException
24
23
 
@@ -122,18 +121,16 @@ def parse_basic_modify(bm: BrokerMessage, item: ComingResourcePayload, toprocess
122
121
  bm.basic.fieldmetadata.append(userfieldmetadata)
123
122
 
124
123
  if item.usermetadata is not None:
125
- # protobufers repeated fields don't support assignment
126
- # will allways be a clean basic
127
- bm.basic.usermetadata.classifications.extend(
128
- [
129
- Classification(
130
- labelset=x.labelset,
131
- label=x.label,
132
- cancelled_by_user=x.cancelled_by_user,
133
- )
134
- for x in item.usermetadata.classifications
135
- ]
136
- )
124
+ classifs = []
125
+ for classif in item.usermetadata.classifications:
126
+ classif_pb = Classification(
127
+ labelset=classif.labelset,
128
+ label=classif.label,
129
+ cancelled_by_user=classif.cancelled_by_user,
130
+ )
131
+ if classif_pb not in classifs:
132
+ classifs.append(classif_pb)
133
+ bm.basic.usermetadata.classifications.extend(classifs)
137
134
 
138
135
  relation_node_resource = RelationNode(value=bm.uuid, ntype=RelationNode.NodeType.RESOURCE)
139
136
  relations = []
@@ -180,7 +177,7 @@ def parse_basic_creation(
180
177
  bm: BrokerMessage,
181
178
  item: CreateResourcePayload,
182
179
  toprocess: PushPayload,
183
- kb_config: Optional[KnowledgeBoxConfig],
180
+ kb_config: KnowledgeBoxConfig | None,
184
181
  ):
185
182
  bm.basic.created.FromDatetime(datetime.now())
186
183
 
@@ -263,7 +260,7 @@ def build_question_answer_annotation_pb(
263
260
 
264
261
 
265
262
  def parse_user_classifications(
266
- item: Union[CreateResourcePayload, UpdateResourcePayload],
263
+ item: CreateResourcePayload | UpdateResourcePayload,
267
264
  ) -> list[ClassificationLabel]:
268
265
  return (
269
266
  [
@@ -19,7 +19,6 @@
19
19
  #
20
20
  import dataclasses
21
21
  from datetime import datetime
22
- from typing import Optional, Union
23
22
 
24
23
  from fastapi import HTTPException
25
24
  from google.protobuf.json_format import MessageToDict
@@ -88,7 +87,7 @@ async def extract_file_field(
88
87
  resource: ORMResource,
89
88
  toprocess: PushPayload,
90
89
  resource_classifications: ResourceClassifications,
91
- password: Optional[str] = None,
90
+ password: str | None = None,
92
91
  ):
93
92
  field_type = resources_pb2.FieldType.FILE
94
93
  field = await resource.get_field(field_id, field_type)
@@ -183,7 +182,7 @@ async def extract_fields(resource: ORMResource, toprocess: PushPayload):
183
182
  async def parse_fields(
184
183
  writer: BrokerMessage,
185
184
  toprocess: PushPayload,
186
- item: Union[CreateResourcePayload, UpdateResourcePayload],
185
+ item: CreateResourcePayload | UpdateResourcePayload,
187
186
  kbid: str,
188
187
  uuid: str,
189
188
  x_skip_store: bool,
@@ -555,7 +554,7 @@ async def _conversation_append_checks(
555
554
  kbid: str, rid: str, field_id: str, input: models.InputConversationField
556
555
  ):
557
556
  async with datamanagers.with_ro_transaction() as txn:
558
- resource_obj = await datamanagers.resources.get_resource(txn, kbid=kbid, rid=rid)
557
+ resource_obj = await ORMResource.get(txn, kbid=kbid, rid=rid)
559
558
  if resource_obj is None:
560
559
  return
561
560
  conv: Conversation = await resource_obj.get_field(
@@ -564,7 +563,10 @@ async def _conversation_append_checks(
564
563
 
565
564
  # Make sure that the max number of messages is not exceeded
566
565
  current_message_count = (await conv.get_metadata()).total
567
- if len(input.messages) + current_message_count > MAX_CONVERSATION_MESSAGES:
566
+ if (
567
+ MAX_CONVERSATION_MESSAGES is not None
568
+ and (len(input.messages) + current_message_count) > MAX_CONVERSATION_MESSAGES
569
+ ):
568
570
  raise HTTPException(
569
571
  status_code=422,
570
572
  detail=f"Conversation fields cannot have more than {MAX_CONVERSATION_MESSAGES} messages.",
@@ -42,6 +42,13 @@ def parse_origin(origin: Origin, origin_payload: InputOrigin):
42
42
  origin.metadata.update(origin_payload.metadata)
43
43
  if origin_payload.path:
44
44
  origin.path = origin_payload.path
45
+ if origin_payload.sync_metadata is not None:
46
+ origin.sync_metadata.CopyFrom(
47
+ resources_pb2.SyncMetadata(
48
+ file_id=origin_payload.sync_metadata.file_id,
49
+ auth_provider=origin_payload.sync_metadata.auth_provider,
50
+ )
51
+ )
45
52
  origin.source = Origin.Source.API
46
53
 
47
54
 
@@ -17,15 +17,14 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import Optional
21
20
 
22
21
  from pydantic_settings import BaseSettings
23
22
 
24
23
 
25
24
  class Settings(BaseSettings):
26
25
  dm_enabled: bool = True
27
- dm_redis_host: Optional[str] = None
28
- dm_redis_port: Optional[int] = None
26
+ dm_redis_host: str | None = None
27
+ dm_redis_port: int | None = None
29
28
 
30
29
 
31
30
  settings = Settings()
@@ -18,7 +18,6 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  from dataclasses import dataclass
21
- from typing import Optional
22
21
 
23
22
  from nucliadb.writer.settings import settings as writer_settings
24
23
  from nucliadb.writer.tus.dm import FileDataManager, RedisFileDataManagerFactory
@@ -37,8 +36,8 @@ class TusStorageDriver:
37
36
  manager: FileStorageManager
38
37
 
39
38
 
40
- DRIVER: Optional[TusStorageDriver] = None
41
- REDIS_FILE_DATA_MANAGER_FACTORY: Optional[RedisFileDataManagerFactory] = None
39
+ DRIVER: TusStorageDriver | None = None
40
+ REDIS_FILE_DATA_MANAGER_FACTORY: RedisFileDataManagerFactory | None = None
42
41
 
43
42
 
44
43
  async def initialize():
@@ -19,8 +19,6 @@
19
19
  #
20
20
  from __future__ import annotations
21
21
 
22
- from typing import Optional
23
-
24
22
  from nucliadb.writer import logger
25
23
  from nucliadb.writer.tus.dm import FileDataManager
26
24
  from nucliadb.writer.tus.storage import BlobStore, FileStorageManager
@@ -40,7 +38,7 @@ class AzureBlobStore(BlobStore):
40
38
  logger.exception("Error closing AzureBlobStore")
41
39
  self._object_store = None
42
40
 
43
- async def initialize(self, account_url: str, connection_string: Optional[str] = None):
41
+ async def initialize(self, account_url: str, connection_string: str | None = None):
44
42
  self.bucket = "nucliadb-{kbid}"
45
43
  self.source = CloudFile.Source.AZURE
46
44
  self._object_store = AzureObjectStore(account_url, connection_string=connection_string)
nucliadb/writer/tus/dm.py CHANGED
@@ -18,7 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import time
21
- from typing import Any, Optional
21
+ from typing import Any
22
22
 
23
23
  import backoff
24
24
  import orjson
@@ -43,7 +43,7 @@ DATA: dict[str, Any] = {}
43
43
 
44
44
 
45
45
  class FileDataManager:
46
- _data: Optional[dict[str, Any]] = None
46
+ _data: dict[str, Any] | None = None
47
47
  _loaded = False
48
48
  key = None
49
49
  _ttl = 60 * 50 * 5 # 5 minutes should be plenty of time between activity
@@ -63,7 +63,7 @@ class FileDataManager:
63
63
  if self._data and "last_activity" in self._data:
64
64
  # check for another active upload, fail if we're screwing with
65
65
  # someone else
66
- last_activity: Optional[int] = self._data.get("last_activity")
66
+ last_activity: int | None = self._data.get("last_activity")
67
67
  if last_activity and (time.time() - last_activity) < self._ttl:
68
68
  if request.headers and request.headers.get("tus-override-upload", "0") != "1":
69
69
  raise HTTPPreconditionFailed(
@@ -17,7 +17,6 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import Optional
21
20
 
22
21
  from starlette.exceptions import HTTPException as StarletteHTTPException
23
22
 
@@ -27,11 +26,11 @@ class InvalidTUSMetadata(Exception):
27
26
 
28
27
 
29
28
  class HTTPException(StarletteHTTPException):
30
- _status_code: Optional[int] = None
29
+ _status_code: int | None = None
31
30
 
32
- def __init__(self, detail: Optional[str] = None):
31
+ def __init__(self, detail: str | None = None):
33
32
  if self._status_code:
34
- super(HTTPException, self).__init__(status_code=self._status_code, detail=detail)
33
+ super().__init__(status_code=self._status_code, detail=detail)
35
34
  else:
36
35
  raise AttributeError("Status code not defined")
37
36
 
@@ -28,7 +28,6 @@ import tempfile
28
28
  import uuid
29
29
  from concurrent.futures import ThreadPoolExecutor
30
30
  from copy import deepcopy
31
- from typing import Optional
32
31
  from urllib.parse import quote_plus
33
32
 
34
33
  import aiohttp
@@ -74,11 +73,11 @@ RETRIABLE_EXCEPTIONS = (
74
73
 
75
74
 
76
75
  class GCloudBlobStore(BlobStore):
77
- _session: Optional[aiohttp.ClientSession] = None
76
+ _session: aiohttp.ClientSession | None = None
78
77
  loop = None
79
78
  upload_url: str
80
79
  object_base_url: str
81
- json_credentials: Optional[str]
80
+ json_credentials: str | None
82
81
  bucket: str
83
82
  location: str
84
83
  project: str
@@ -123,7 +122,7 @@ class GCloudBlobStore(BlobStore):
123
122
  project: str,
124
123
  bucket_labels,
125
124
  object_base_url: str,
126
- json_credentials: Optional[str],
125
+ json_credentials: str | None,
127
126
  ):
128
127
  self.bucket = bucket
129
128
  self.source = CloudFile.Source.GCS
@@ -131,7 +130,7 @@ class GCloudBlobStore(BlobStore):
131
130
  self.project = project
132
131
  self.bucket_labels = bucket_labels
133
132
  self.object_base_url = object_base_url + "/storage/v1/b"
134
- self.upload_url = object_base_url + "/upload/storage/v1/b/{bucket}/o?uploadType=resumable" # noqa
133
+ self.upload_url = object_base_url + "/upload/storage/v1/b/{bucket}/o?uploadType=resumable"
135
134
  self.json_credentials = json_credentials
136
135
  self._credentials = None
137
136
 
@@ -311,7 +310,7 @@ class GCloudFileStorageManager(FileStorageManager):
311
310
  },
312
311
  data=data,
313
312
  ) as call:
314
- text = await call.text() # noqa
313
+ text = await call.text()
315
314
  if call.status not in [200, 201, 308]:
316
315
  raise GoogleCloudException(f"{call.status}: {text}")
317
316
  return call
nucliadb/writer/tus/s3.py CHANGED
@@ -22,7 +22,6 @@ from __future__ import annotations
22
22
  import base64
23
23
  import uuid
24
24
  from contextlib import AsyncExitStack
25
- from typing import Optional
26
25
 
27
26
  import aiobotocore # type: ignore
28
27
  import aiohttp
@@ -195,8 +194,8 @@ class S3BlobStore(BlobStore):
195
194
  endpoint_url,
196
195
  region_name,
197
196
  bucket,
198
- bucket_tags: Optional[dict[str, str]] = None,
199
- kms_key_id: Optional[str] = None,
197
+ bucket_tags: dict[str, str] | None = None,
198
+ kms_key_id: str | None = None,
200
199
  ):
201
200
  self.bucket = bucket
202
201
  self.bucket_tags = bucket_tags
@@ -19,7 +19,7 @@
19
19
  #
20
20
  from __future__ import annotations
21
21
 
22
- from typing import AsyncIterator, Optional
22
+ from collections.abc import AsyncIterator
23
23
 
24
24
  from nucliadb.writer.tus.dm import FileDataManager
25
25
  from nucliadb_protos.resources_pb2 import CloudFile
@@ -47,13 +47,13 @@ class BlobStore:
47
47
 
48
48
  class FileStorageManager:
49
49
  chunk_size: int
50
- min_upload_size: Optional[int] = None
50
+ min_upload_size: int | None = None
51
51
 
52
52
  def __init__(self, storage: BlobStore):
53
53
  self.storage = storage
54
54
 
55
55
  def iter_data(
56
- self, uri: str, kbid: str, headers: Optional[dict[str, str]] = None
56
+ self, uri: str, kbid: str, headers: dict[str, str] | None = None
57
57
  ) -> AsyncIterator[bytes]:
58
58
  raise NotImplementedError()
59
59