nucliadb 6.7.2.post4874__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. migrations/0023_backfill_pg_catalog.py +8 -4
  2. migrations/0028_extracted_vectors_reference.py +1 -1
  3. migrations/0029_backfill_field_status.py +3 -4
  4. migrations/0032_remove_old_relations.py +2 -3
  5. migrations/0038_backfill_catalog_field_labels.py +8 -4
  6. migrations/0039_backfill_converation_splits_metadata.py +106 -0
  7. migrations/0040_migrate_search_configurations.py +79 -0
  8. migrations/0041_reindex_conversations.py +137 -0
  9. migrations/pg/0010_shards_index.py +34 -0
  10. nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
  11. migrations/pg/0012_catalog_statistics_undo.py +26 -0
  12. nucliadb/backups/create.py +2 -15
  13. nucliadb/backups/restore.py +4 -15
  14. nucliadb/backups/tasks.py +4 -1
  15. nucliadb/common/back_pressure/cache.py +2 -3
  16. nucliadb/common/back_pressure/materializer.py +7 -13
  17. nucliadb/common/back_pressure/settings.py +6 -6
  18. nucliadb/common/back_pressure/utils.py +1 -0
  19. nucliadb/common/cache.py +9 -9
  20. nucliadb/common/catalog/__init__.py +79 -0
  21. nucliadb/common/catalog/dummy.py +36 -0
  22. nucliadb/common/catalog/interface.py +85 -0
  23. nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +330 -232
  24. nucliadb/common/catalog/utils.py +56 -0
  25. nucliadb/common/cluster/manager.py +8 -23
  26. nucliadb/common/cluster/rebalance.py +484 -112
  27. nucliadb/common/cluster/rollover.py +36 -9
  28. nucliadb/common/cluster/settings.py +4 -9
  29. nucliadb/common/cluster/utils.py +34 -8
  30. nucliadb/common/context/__init__.py +7 -8
  31. nucliadb/common/context/fastapi.py +1 -2
  32. nucliadb/common/datamanagers/__init__.py +2 -4
  33. nucliadb/common/datamanagers/atomic.py +9 -2
  34. nucliadb/common/datamanagers/cluster.py +1 -2
  35. nucliadb/common/datamanagers/fields.py +3 -4
  36. nucliadb/common/datamanagers/kb.py +6 -6
  37. nucliadb/common/datamanagers/labels.py +2 -3
  38. nucliadb/common/datamanagers/resources.py +10 -33
  39. nucliadb/common/datamanagers/rollover.py +5 -7
  40. nucliadb/common/datamanagers/search_configurations.py +1 -2
  41. nucliadb/common/datamanagers/synonyms.py +1 -2
  42. nucliadb/common/datamanagers/utils.py +4 -4
  43. nucliadb/common/datamanagers/vectorsets.py +4 -4
  44. nucliadb/common/external_index_providers/base.py +32 -5
  45. nucliadb/common/external_index_providers/manager.py +5 -34
  46. nucliadb/common/external_index_providers/settings.py +1 -27
  47. nucliadb/common/filter_expression.py +129 -41
  48. nucliadb/common/http_clients/exceptions.py +8 -0
  49. nucliadb/common/http_clients/processing.py +16 -23
  50. nucliadb/common/http_clients/utils.py +3 -0
  51. nucliadb/common/ids.py +82 -58
  52. nucliadb/common/locking.py +1 -2
  53. nucliadb/common/maindb/driver.py +9 -8
  54. nucliadb/common/maindb/local.py +5 -5
  55. nucliadb/common/maindb/pg.py +9 -8
  56. nucliadb/common/nidx.py +22 -5
  57. nucliadb/common/vector_index_config.py +1 -1
  58. nucliadb/export_import/datamanager.py +4 -3
  59. nucliadb/export_import/exporter.py +11 -19
  60. nucliadb/export_import/importer.py +13 -6
  61. nucliadb/export_import/tasks.py +2 -0
  62. nucliadb/export_import/utils.py +6 -18
  63. nucliadb/health.py +2 -2
  64. nucliadb/ingest/app.py +8 -8
  65. nucliadb/ingest/consumer/consumer.py +8 -10
  66. nucliadb/ingest/consumer/pull.py +10 -8
  67. nucliadb/ingest/consumer/service.py +5 -30
  68. nucliadb/ingest/consumer/shard_creator.py +16 -5
  69. nucliadb/ingest/consumer/utils.py +1 -1
  70. nucliadb/ingest/fields/base.py +37 -49
  71. nucliadb/ingest/fields/conversation.py +55 -9
  72. nucliadb/ingest/fields/exceptions.py +1 -2
  73. nucliadb/ingest/fields/file.py +22 -8
  74. nucliadb/ingest/fields/link.py +7 -7
  75. nucliadb/ingest/fields/text.py +2 -3
  76. nucliadb/ingest/orm/brain_v2.py +89 -57
  77. nucliadb/ingest/orm/broker_message.py +2 -4
  78. nucliadb/ingest/orm/entities.py +10 -209
  79. nucliadb/ingest/orm/index_message.py +128 -113
  80. nucliadb/ingest/orm/knowledgebox.py +91 -59
  81. nucliadb/ingest/orm/processor/auditing.py +1 -3
  82. nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
  83. nucliadb/ingest/orm/processor/processor.py +98 -153
  84. nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
  85. nucliadb/ingest/orm/resource.py +82 -71
  86. nucliadb/ingest/orm/utils.py +1 -1
  87. nucliadb/ingest/partitions.py +12 -1
  88. nucliadb/ingest/processing.py +17 -17
  89. nucliadb/ingest/serialize.py +202 -145
  90. nucliadb/ingest/service/writer.py +15 -114
  91. nucliadb/ingest/settings.py +36 -15
  92. nucliadb/ingest/utils.py +1 -2
  93. nucliadb/learning_proxy.py +23 -26
  94. nucliadb/metrics_exporter.py +20 -6
  95. nucliadb/middleware/__init__.py +82 -1
  96. nucliadb/migrator/datamanager.py +4 -11
  97. nucliadb/migrator/migrator.py +1 -2
  98. nucliadb/migrator/models.py +1 -2
  99. nucliadb/migrator/settings.py +1 -2
  100. nucliadb/models/internal/augment.py +614 -0
  101. nucliadb/models/internal/processing.py +19 -19
  102. nucliadb/openapi.py +2 -2
  103. nucliadb/purge/__init__.py +3 -8
  104. nucliadb/purge/orphan_shards.py +1 -2
  105. nucliadb/reader/__init__.py +5 -0
  106. nucliadb/reader/api/models.py +6 -13
  107. nucliadb/reader/api/v1/download.py +59 -38
  108. nucliadb/reader/api/v1/export_import.py +4 -4
  109. nucliadb/reader/api/v1/knowledgebox.py +37 -9
  110. nucliadb/reader/api/v1/learning_config.py +33 -14
  111. nucliadb/reader/api/v1/resource.py +61 -9
  112. nucliadb/reader/api/v1/services.py +18 -14
  113. nucliadb/reader/app.py +3 -1
  114. nucliadb/reader/reader/notifications.py +1 -2
  115. nucliadb/search/api/v1/__init__.py +3 -0
  116. nucliadb/search/api/v1/ask.py +3 -4
  117. nucliadb/search/api/v1/augment.py +585 -0
  118. nucliadb/search/api/v1/catalog.py +15 -19
  119. nucliadb/search/api/v1/find.py +16 -22
  120. nucliadb/search/api/v1/hydrate.py +328 -0
  121. nucliadb/search/api/v1/knowledgebox.py +1 -2
  122. nucliadb/search/api/v1/predict_proxy.py +1 -2
  123. nucliadb/search/api/v1/resource/ask.py +28 -8
  124. nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
  125. nucliadb/search/api/v1/resource/search.py +9 -11
  126. nucliadb/search/api/v1/retrieve.py +130 -0
  127. nucliadb/search/api/v1/search.py +28 -32
  128. nucliadb/search/api/v1/suggest.py +11 -14
  129. nucliadb/search/api/v1/summarize.py +1 -2
  130. nucliadb/search/api/v1/utils.py +2 -2
  131. nucliadb/search/app.py +3 -2
  132. nucliadb/search/augmentor/__init__.py +21 -0
  133. nucliadb/search/augmentor/augmentor.py +232 -0
  134. nucliadb/search/augmentor/fields.py +704 -0
  135. nucliadb/search/augmentor/metrics.py +24 -0
  136. nucliadb/search/augmentor/paragraphs.py +334 -0
  137. nucliadb/search/augmentor/resources.py +238 -0
  138. nucliadb/search/augmentor/utils.py +33 -0
  139. nucliadb/search/lifecycle.py +3 -1
  140. nucliadb/search/predict.py +33 -19
  141. nucliadb/search/predict_models.py +8 -9
  142. nucliadb/search/requesters/utils.py +11 -10
  143. nucliadb/search/search/cache.py +19 -42
  144. nucliadb/search/search/chat/ask.py +131 -59
  145. nucliadb/search/search/chat/exceptions.py +3 -5
  146. nucliadb/search/search/chat/fetcher.py +201 -0
  147. nucliadb/search/search/chat/images.py +6 -4
  148. nucliadb/search/search/chat/old_prompt.py +1375 -0
  149. nucliadb/search/search/chat/parser.py +510 -0
  150. nucliadb/search/search/chat/prompt.py +563 -615
  151. nucliadb/search/search/chat/query.py +453 -32
  152. nucliadb/search/search/chat/rpc.py +85 -0
  153. nucliadb/search/search/fetch.py +3 -4
  154. nucliadb/search/search/filters.py +8 -11
  155. nucliadb/search/search/find.py +33 -31
  156. nucliadb/search/search/find_merge.py +124 -331
  157. nucliadb/search/search/graph_strategy.py +14 -12
  158. nucliadb/search/search/hydrator/__init__.py +49 -0
  159. nucliadb/search/search/hydrator/fields.py +217 -0
  160. nucliadb/search/search/hydrator/images.py +130 -0
  161. nucliadb/search/search/hydrator/paragraphs.py +323 -0
  162. nucliadb/search/search/hydrator/resources.py +60 -0
  163. nucliadb/search/search/ingestion_agents.py +5 -5
  164. nucliadb/search/search/merge.py +90 -94
  165. nucliadb/search/search/metrics.py +24 -7
  166. nucliadb/search/search/paragraphs.py +7 -9
  167. nucliadb/search/search/predict_proxy.py +44 -18
  168. nucliadb/search/search/query.py +14 -86
  169. nucliadb/search/search/query_parser/fetcher.py +51 -82
  170. nucliadb/search/search/query_parser/models.py +19 -48
  171. nucliadb/search/search/query_parser/old_filters.py +20 -19
  172. nucliadb/search/search/query_parser/parsers/ask.py +5 -6
  173. nucliadb/search/search/query_parser/parsers/catalog.py +7 -11
  174. nucliadb/search/search/query_parser/parsers/common.py +21 -13
  175. nucliadb/search/search/query_parser/parsers/find.py +6 -29
  176. nucliadb/search/search/query_parser/parsers/graph.py +18 -28
  177. nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
  178. nucliadb/search/search/query_parser/parsers/search.py +15 -56
  179. nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
  180. nucliadb/search/search/rank_fusion.py +18 -13
  181. nucliadb/search/search/rerankers.py +6 -7
  182. nucliadb/search/search/retrieval.py +300 -0
  183. nucliadb/search/search/summarize.py +5 -6
  184. nucliadb/search/search/utils.py +3 -4
  185. nucliadb/search/settings.py +1 -2
  186. nucliadb/standalone/api_router.py +1 -1
  187. nucliadb/standalone/app.py +4 -3
  188. nucliadb/standalone/auth.py +5 -6
  189. nucliadb/standalone/lifecycle.py +2 -2
  190. nucliadb/standalone/run.py +5 -4
  191. nucliadb/standalone/settings.py +5 -6
  192. nucliadb/standalone/versions.py +3 -4
  193. nucliadb/tasks/consumer.py +13 -8
  194. nucliadb/tasks/models.py +2 -1
  195. nucliadb/tasks/producer.py +3 -3
  196. nucliadb/tasks/retries.py +8 -7
  197. nucliadb/train/api/utils.py +1 -3
  198. nucliadb/train/api/v1/shards.py +1 -2
  199. nucliadb/train/api/v1/trainset.py +1 -2
  200. nucliadb/train/app.py +1 -1
  201. nucliadb/train/generator.py +4 -4
  202. nucliadb/train/generators/field_classifier.py +2 -2
  203. nucliadb/train/generators/field_streaming.py +6 -6
  204. nucliadb/train/generators/image_classifier.py +2 -2
  205. nucliadb/train/generators/paragraph_classifier.py +2 -2
  206. nucliadb/train/generators/paragraph_streaming.py +2 -2
  207. nucliadb/train/generators/question_answer_streaming.py +2 -2
  208. nucliadb/train/generators/sentence_classifier.py +4 -10
  209. nucliadb/train/generators/token_classifier.py +3 -2
  210. nucliadb/train/generators/utils.py +6 -5
  211. nucliadb/train/nodes.py +3 -3
  212. nucliadb/train/resource.py +6 -8
  213. nucliadb/train/settings.py +3 -4
  214. nucliadb/train/types.py +11 -11
  215. nucliadb/train/upload.py +3 -2
  216. nucliadb/train/uploader.py +1 -2
  217. nucliadb/train/utils.py +1 -2
  218. nucliadb/writer/api/v1/export_import.py +4 -1
  219. nucliadb/writer/api/v1/field.py +15 -14
  220. nucliadb/writer/api/v1/knowledgebox.py +18 -56
  221. nucliadb/writer/api/v1/learning_config.py +5 -4
  222. nucliadb/writer/api/v1/resource.py +9 -20
  223. nucliadb/writer/api/v1/services.py +10 -132
  224. nucliadb/writer/api/v1/upload.py +73 -72
  225. nucliadb/writer/app.py +8 -2
  226. nucliadb/writer/resource/basic.py +12 -15
  227. nucliadb/writer/resource/field.py +43 -5
  228. nucliadb/writer/resource/origin.py +7 -0
  229. nucliadb/writer/settings.py +2 -3
  230. nucliadb/writer/tus/__init__.py +2 -3
  231. nucliadb/writer/tus/azure.py +5 -7
  232. nucliadb/writer/tus/dm.py +3 -3
  233. nucliadb/writer/tus/exceptions.py +3 -4
  234. nucliadb/writer/tus/gcs.py +15 -22
  235. nucliadb/writer/tus/s3.py +2 -3
  236. nucliadb/writer/tus/storage.py +3 -3
  237. {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +10 -11
  238. nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
  239. nucliadb/common/datamanagers/entities.py +0 -139
  240. nucliadb/common/external_index_providers/pinecone.py +0 -894
  241. nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
  242. nucliadb/search/search/hydrator.py +0 -197
  243. nucliadb-6.7.2.post4874.dist-info/RECORD +0 -383
  244. {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
  245. {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
  246. {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
@@ -17,13 +17,12 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- import asyncio
21
20
  import contextlib
22
21
  from time import time
23
- from typing import Annotated, Optional
22
+ from typing import Annotated
24
23
  from uuid import uuid4
25
24
 
26
- from fastapi import HTTPException, Query, Response
25
+ from fastapi import BackgroundTasks, HTTPException, Query, Response
27
26
  from fastapi_versioning import version
28
27
  from starlette.requests import Request
29
28
 
@@ -498,12 +497,10 @@ async def _reprocess_resource(
498
497
  @requires(NucliaDBRoles.WRITER)
499
498
  @version(1)
500
499
  async def delete_resource_rslug_prefix(
501
- request: Request,
502
- kbid: str,
503
- rslug: str,
500
+ request: Request, kbid: str, rslug: str, background: BackgroundTasks
504
501
  ):
505
502
  rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
506
- return await _delete_resource(request, kbid, rid)
503
+ return await _delete_resource(request, kbid, rid, background)
507
504
 
508
505
 
509
506
  @api.delete(
@@ -514,19 +511,11 @@ async def delete_resource_rslug_prefix(
514
511
  )
515
512
  @requires(NucliaDBRoles.WRITER)
516
513
  @version(1)
517
- async def delete_resource_rid_prefix(
518
- request: Request,
519
- kbid: str,
520
- rid: str,
521
- ):
522
- return await _delete_resource(request, kbid, rid)
514
+ async def delete_resource_rid_prefix(request: Request, kbid: str, rid: str, background: BackgroundTasks):
515
+ return await _delete_resource(request, kbid, rid, background)
523
516
 
524
517
 
525
- async def _delete_resource(
526
- request: Request,
527
- kbid: str,
528
- rid: str,
529
- ):
518
+ async def _delete_resource(request: Request, kbid: str, rid: str, background: BackgroundTasks):
530
519
  await validate_rid_exists_or_raise_error(kbid, rid)
531
520
 
532
521
  partitioning = get_partitioning()
@@ -541,7 +530,7 @@ async def _delete_resource(
541
530
  parse_audit(writer.audit, request)
542
531
  await transaction.commit(writer, partition)
543
532
  processing = get_processing()
544
- asyncio.create_task(processing.delete_from_processing(kbid=kbid, resource_id=rid))
533
+ background.add_task(processing.delete_from_processing, kbid=kbid, resource_id=rid)
545
534
 
546
535
  return Response(status_code=204)
547
536
 
@@ -637,7 +626,7 @@ def needs_resource_reindex(item: UpdateResourcePayload) -> bool:
637
626
  )
638
627
 
639
628
 
640
- async def maybe_send_to_process(toprocess: PushPayload, partition) -> Optional[int]:
629
+ async def maybe_send_to_process(toprocess: PushPayload, partition) -> int | None:
641
630
  if not needs_reprocess(toprocess):
642
631
  return None
643
632
 
@@ -17,152 +17,22 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from fastapi import HTTPException, Response
20
+ from fastapi import Body, HTTPException, Path, Response
21
21
  from fastapi_versioning import version
22
22
  from starlette.requests import Request
23
23
 
24
24
  from nucliadb.common import datamanagers
25
25
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
26
26
  from nucliadb.common.models_utils import to_proto
27
- from nucliadb.models.responses import (
28
- HTTPConflict,
29
- HTTPInternalServerError,
30
- HTTPNotFound,
31
- )
32
27
  from nucliadb.writer.api.v1.router import KB_PREFIX, api
33
28
  from nucliadb_models.configuration import SearchConfiguration
34
- from nucliadb_models.entities import (
35
- CreateEntitiesGroupPayload,
36
- UpdateEntitiesGroupPayload,
37
- )
38
29
  from nucliadb_models.labels import LabelSet
39
30
  from nucliadb_models.resource import NucliaDBRoles
40
31
  from nucliadb_models.synonyms import KnowledgeBoxSynonyms
41
32
  from nucliadb_protos import writer_pb2
42
33
  from nucliadb_protos.knowledgebox_pb2 import Label as LabelPB
43
34
  from nucliadb_protos.knowledgebox_pb2 import LabelSet as LabelSetPB
44
- from nucliadb_protos.writer_pb2 import (
45
- DelEntitiesRequest,
46
- NewEntitiesGroupRequest,
47
- NewEntitiesGroupResponse,
48
- OpStatusWriter,
49
- UpdateEntitiesGroupRequest,
50
- UpdateEntitiesGroupResponse,
51
- )
52
35
  from nucliadb_utils.authentication import requires
53
- from nucliadb_utils.utilities import get_ingest
54
-
55
-
56
- @api.post(
57
- f"/{KB_PREFIX}/{{kbid}}/entitiesgroups",
58
- status_code=200,
59
- summary="Create Knowledge Box Entities Group",
60
- tags=["Knowledge Box Services"],
61
- openapi_extra={"x-operation_order": 1},
62
- )
63
- @requires(NucliaDBRoles.WRITER)
64
- @version(1)
65
- async def create_entities_group(request: Request, kbid: str, item: CreateEntitiesGroupPayload):
66
- ingest = get_ingest()
67
-
68
- pbrequest: NewEntitiesGroupRequest = NewEntitiesGroupRequest()
69
- pbrequest.kb.uuid = kbid
70
- pbrequest.group = item.group
71
- pbrequest.entities.custom = True
72
- if item.title:
73
- pbrequest.entities.title = item.title
74
- if item.color:
75
- pbrequest.entities.color = item.color
76
-
77
- for key, entity in item.entities.items():
78
- entitypb = pbrequest.entities.entities[key]
79
- entitypb.value = entity.value
80
- entitypb.merged = entity.merged
81
- entitypb.deleted = False
82
- entitypb.represents.extend(entity.represents)
83
-
84
- status: NewEntitiesGroupResponse = await ingest.NewEntitiesGroup(pbrequest) # type: ignore
85
- if status.status == NewEntitiesGroupResponse.Status.OK:
86
- return
87
- elif status.status == NewEntitiesGroupResponse.Status.KB_NOT_FOUND:
88
- return HTTPNotFound(detail="Knowledge Box does not exist")
89
- elif status.status == NewEntitiesGroupResponse.Status.ALREADY_EXISTS:
90
- return HTTPConflict(
91
- detail=f"Entities group {item.group} already exists in this Knowledge box",
92
- )
93
- elif status.status == NewEntitiesGroupResponse.Status.ERROR:
94
- return HTTPInternalServerError(detail="Error on settings entities on a Knowledge box")
95
-
96
-
97
- @api.patch(
98
- f"/{KB_PREFIX}/{{kbid}}/entitiesgroup/{{group}}",
99
- status_code=200,
100
- summary="Update Knowledge Box Entities Group",
101
- tags=["Knowledge Box Services"],
102
- openapi_extra={"x-operation_order": 2},
103
- )
104
- @requires(NucliaDBRoles.WRITER)
105
- @version(1)
106
- async def update_entities_group(
107
- request: Request, kbid: str, group: str, item: UpdateEntitiesGroupPayload
108
- ):
109
- ingest = get_ingest()
110
-
111
- pbrequest: UpdateEntitiesGroupRequest = UpdateEntitiesGroupRequest()
112
- pbrequest.kb.uuid = kbid
113
- pbrequest.group = group
114
- pbrequest.title = item.title or ""
115
- pbrequest.color = item.color or ""
116
-
117
- for name, entity in item.add.items():
118
- entitypb = pbrequest.add[name]
119
- entitypb.value = entity.value
120
- entitypb.merged = entity.merged
121
- entitypb.represents.extend(entity.represents)
122
-
123
- for name, entity in item.update.items():
124
- entitypb = pbrequest.update[name]
125
- entitypb.value = entity.value
126
- entitypb.merged = entity.merged
127
- entitypb.represents.extend(entity.represents)
128
-
129
- pbrequest.delete.extend(item.delete)
130
-
131
- status: UpdateEntitiesGroupResponse = await ingest.UpdateEntitiesGroup(pbrequest) # type: ignore
132
- if status.status == UpdateEntitiesGroupResponse.Status.OK:
133
- return
134
- elif status.status == UpdateEntitiesGroupResponse.Status.KB_NOT_FOUND:
135
- return HTTPNotFound(detail="Knowledge Box does not exist")
136
- elif status.status == UpdateEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND:
137
- return HTTPNotFound(detail="Entities group does not exist")
138
- elif status.status == UpdateEntitiesGroupResponse.Status.ERROR:
139
- return HTTPInternalServerError(detail="Error on settings entities on a Knowledge box")
140
-
141
-
142
- @api.delete(
143
- f"/{KB_PREFIX}/{{kbid}}/entitiesgroup/{{group}}",
144
- status_code=200,
145
- summary="Delete Knowledge Box Entities",
146
- tags=["Knowledge Box Services"],
147
- openapi_extra={"x-operation_order": 3},
148
- )
149
- @requires(NucliaDBRoles.WRITER)
150
- @version(1)
151
- async def delete_entities(request: Request, kbid: str, group: str):
152
- ingest = get_ingest()
153
- pbrequest: DelEntitiesRequest = DelEntitiesRequest()
154
- pbrequest.kb.uuid = kbid
155
- pbrequest.group = group
156
-
157
- status: OpStatusWriter = await ingest.DelEntities(pbrequest) # type: ignore
158
- if status.status == OpStatusWriter.Status.OK:
159
- return None
160
- elif status.status == OpStatusWriter.Status.NOTFOUND:
161
- raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
162
- elif status.status == OpStatusWriter.Status.ERROR:
163
- raise HTTPException(status_code=500, detail="Error on deleting entities from a Knowledge box")
164
-
165
- return Response(status_code=204)
166
36
 
167
37
 
168
38
  @api.post(
@@ -174,7 +44,15 @@ async def delete_entities(request: Request, kbid: str, group: str):
174
44
  )
175
45
  @requires(NucliaDBRoles.WRITER)
176
46
  @version(1)
177
- async def set_labelset_endpoint(request: Request, kbid: str, labelset: str, item: LabelSet):
47
+ async def set_labelset_endpoint(
48
+ request: Request,
49
+ kbid: str,
50
+ labelset: str = Path(
51
+ title="The ID of the labelset to create or update. This is a unique identifier that should be used at search time.",
52
+ examples=["categories", "movie-genres", "document-types"],
53
+ ),
54
+ item: LabelSet = Body(...),
55
+ ):
178
56
  if item.title is None:
179
57
  item.title = labelset
180
58
 
@@ -18,12 +18,11 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import base64
21
- import pickle
22
21
  import uuid
23
22
  from datetime import datetime
24
23
  from hashlib import md5
25
24
  from io import BytesIO
26
- from typing import Annotated, Optional
25
+ from typing import Annotated
27
26
 
28
27
  from fastapi import HTTPException
29
28
  from fastapi.requests import Request
@@ -123,17 +122,17 @@ TUS_HEADERS = {
123
122
  def tus_options(
124
123
  request: Request,
125
124
  kbid: str,
126
- rid: Optional[str] = None,
127
- rslug: Optional[str] = None,
128
- upload_id: Optional[str] = None,
129
- field: Optional[str] = None,
125
+ rid: str | None = None,
126
+ rslug: str | None = None,
127
+ upload_id: str | None = None,
128
+ field: str | None = None,
130
129
  ) -> Response:
131
130
  return _tus_options()
132
131
 
133
132
 
134
133
  def _tus_options() -> Response:
135
134
  """
136
- Gather information about the Servers current configuration such as enabled extensions, version...
135
+ Gather information about the Server's current configuration such as enabled extensions, version...
137
136
  """
138
137
  resp = Response(headers=TUS_HEADERS, status_code=204)
139
138
  return resp
@@ -152,9 +151,9 @@ async def tus_post_rslug_prefix(
152
151
  kbid: str,
153
152
  rslug: str,
154
153
  field: FieldIdString,
155
- item: Optional[CreateResourcePayload] = None,
156
- x_extract_strategy: Annotated[Optional[str], X_EXTRACT_STRATEGY] = None,
157
- x_split_strategy: Annotated[Optional[str], X_SPLIT_STRATEGY] = None,
154
+ item: CreateResourcePayload | None = None,
155
+ x_extract_strategy: Annotated[str | None, X_EXTRACT_STRATEGY] = None,
156
+ x_split_strategy: Annotated[str | None, X_SPLIT_STRATEGY] = None,
158
157
  ) -> Response:
159
158
  rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
160
159
  return await _tus_post(
@@ -181,9 +180,9 @@ async def tus_post_rid_prefix(
181
180
  kbid: str,
182
181
  path_rid: str,
183
182
  field: FieldIdString,
184
- item: Optional[CreateResourcePayload] = None,
185
- x_extract_strategy: Annotated[Optional[str], X_EXTRACT_STRATEGY] = None,
186
- x_split_strategy: Annotated[Optional[str], X_SPLIT_STRATEGY] = None,
183
+ item: CreateResourcePayload | None = None,
184
+ x_extract_strategy: Annotated[str | None, X_EXTRACT_STRATEGY] = None,
185
+ x_split_strategy: Annotated[str | None, X_SPLIT_STRATEGY] = None,
187
186
  ) -> Response:
188
187
  return await _tus_post(
189
188
  request,
@@ -207,9 +206,9 @@ async def tus_post_rid_prefix(
207
206
  async def tus_post(
208
207
  request: Request,
209
208
  kbid: str,
210
- item: Optional[CreateResourcePayload] = None,
211
- x_extract_strategy: Annotated[Optional[str], X_EXTRACT_STRATEGY] = None,
212
- x_split_strategy: Annotated[Optional[str], X_SPLIT_STRATEGY] = None,
209
+ item: CreateResourcePayload | None = None,
210
+ x_extract_strategy: Annotated[str | None, X_EXTRACT_STRATEGY] = None,
211
+ x_split_strategy: Annotated[str | None, X_SPLIT_STRATEGY] = None,
213
212
  ) -> Response:
214
213
  return await _tus_post(
215
214
  request, kbid, item, extract_strategy=x_extract_strategy, split_strategy=x_split_strategy
@@ -220,11 +219,11 @@ async def tus_post(
220
219
  async def _tus_post(
221
220
  request: Request,
222
221
  kbid: str,
223
- item: Optional[CreateResourcePayload] = None,
224
- path_rid: Optional[str] = None,
225
- field_id: Optional[str] = None,
226
- extract_strategy: Optional[str] = None,
227
- split_strategy: Optional[str] = None,
222
+ item: CreateResourcePayload | None = None,
223
+ path_rid: str | None = None,
224
+ field_id: str | None = None,
225
+ extract_strategy: str | None = None,
226
+ split_strategy: str | None = None,
228
227
  ) -> Response:
229
228
  """
230
229
  An empty POST request is used to create a new upload resource.
@@ -265,7 +264,7 @@ async def _tus_post(
265
264
  try:
266
265
  metadata = parse_tus_metadata(request.headers["upload-metadata"])
267
266
  except InvalidTUSMetadata as exc:
268
- raise HTTPBadRequest(detail=f"Upload-Metadata header contains errors: {str(exc)}")
267
+ raise HTTPBadRequest(detail=f"Upload-Metadata header contains errors: {exc!s}")
269
268
  else:
270
269
  metadata = {}
271
270
 
@@ -309,8 +308,8 @@ async def _tus_post(
309
308
  metadata["implies_resource_creation"] = implies_resource_creation
310
309
 
311
310
  creation_payload = None
312
- if implies_resource_creation:
313
- creation_payload = base64.b64encode(pickle.dumps(item)).decode()
311
+ if implies_resource_creation and item is not None:
312
+ creation_payload = item.model_dump()
314
313
 
315
314
  await dm.load(upload_id)
316
315
  await dm.start(request)
@@ -339,7 +338,7 @@ async def _tus_post(
339
338
  return Response(
340
339
  status_code=201,
341
340
  headers={
342
- "Location": location, # noqa
341
+ "Location": location,
343
342
  "Tus-Resumable": "1.0.0",
344
343
  "Access-Control-Expose-Headers": "Location,Tus-Resumable",
345
344
  },
@@ -485,8 +484,8 @@ async def tus_patch(
485
484
  request: Request,
486
485
  kbid: str,
487
486
  upload_id: str,
488
- rid: Optional[str] = None,
489
- field: Optional[str] = None,
487
+ rid: str | None = None,
488
+ field: str | None = None,
490
489
  ):
491
490
  try:
492
491
  return await _tus_patch(
@@ -508,8 +507,8 @@ async def _tus_patch(
508
507
  request: Request,
509
508
  kbid: str,
510
509
  upload_id: str,
511
- rid: Optional[str] = None,
512
- field: Optional[str] = None,
510
+ rid: str | None = None,
511
+ field: str | None = None,
513
512
  ) -> Response:
514
513
  """
515
514
  Upload all bytes in the requests and append them in the specified offset
@@ -545,6 +544,13 @@ async def _tus_patch(
545
544
  )
546
545
 
547
546
  storage_manager = get_storage_manager()
547
+
548
+ # We consider this to be the last chunk if we have the size stored and we've reached it
549
+ current_chunk_size = int(request.headers["content-length"])
550
+ upload_finished = dm.get("size") is not None and dm.offset + current_chunk_size >= dm.get("size")
551
+ if not upload_finished:
552
+ validate_intermediate_tus_chunk(current_chunk_size, storage_manager)
553
+
548
554
  read_bytes = await storage_manager.append(
549
555
  dm,
550
556
  storage_manager.iterate_body_chunks(request, storage_manager.chunk_size),
@@ -563,8 +569,6 @@ async def _tus_patch(
563
569
  ["Upload-Offset", "Tus-Resumable", "Tus-Upload-Finished"]
564
570
  ),
565
571
  }
566
-
567
- upload_finished = dm.get("size") is not None and dm.offset >= dm.get("size")
568
572
  if upload_finished:
569
573
  rid = dm.get("rid", rid)
570
574
  if rid is None:
@@ -580,9 +584,7 @@ async def _tus_patch(
580
584
  item_payload = dm.get("item")
581
585
  creation_payload = None
582
586
  if item_payload is not None:
583
- if isinstance(item_payload, str):
584
- item_payload = item_payload.encode()
585
- creation_payload = pickle.loads(base64.b64decode(item_payload))
587
+ creation_payload = CreateResourcePayload.model_validate(item_payload)
586
588
 
587
589
  content_type = dm.get("metadata", {}).get("content_type")
588
590
  if content_type is not None and not content_types.valid(content_type):
@@ -616,7 +618,6 @@ async def _tus_patch(
616
618
 
617
619
  headers["NDB-Seq"] = f"{seqid}"
618
620
  else:
619
- validate_intermediate_tus_chunk(read_bytes, storage_manager)
620
621
  await dm.save()
621
622
 
622
623
  return Response(headers=headers)
@@ -643,12 +644,12 @@ async def upload_rslug_prefix(
643
644
  kbid: str,
644
645
  rslug: str,
645
646
  field: FieldIdString,
646
- x_filename: Annotated[Optional[str], X_FILENAME] = None,
647
- x_password: Annotated[Optional[str], X_PASSWORD] = None,
648
- x_language: Annotated[Optional[str], X_LANGUAGE] = None,
649
- x_md5: Annotated[Optional[str], X_MD5] = None,
650
- x_extract_strategy: Annotated[Optional[str], X_EXTRACT_STRATEGY] = None,
651
- x_split_strategy: Annotated[Optional[str], X_SPLIT_STRATEGY] = None,
647
+ x_filename: Annotated[str | None, X_FILENAME] = None,
648
+ x_password: Annotated[str | None, X_PASSWORD] = None,
649
+ x_language: Annotated[str | None, X_LANGUAGE] = None,
650
+ x_md5: Annotated[str | None, X_MD5] = None,
651
+ x_extract_strategy: Annotated[str | None, X_EXTRACT_STRATEGY] = None,
652
+ x_split_strategy: Annotated[str | None, X_SPLIT_STRATEGY] = None,
652
653
  ) -> ResourceFileUploaded:
653
654
  rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
654
655
  return await _upload(
@@ -679,12 +680,12 @@ async def upload_rid_prefix(
679
680
  kbid: str,
680
681
  path_rid: str,
681
682
  field: FieldIdString,
682
- x_filename: Annotated[Optional[str], X_FILENAME] = None,
683
- x_password: Annotated[Optional[str], X_PASSWORD] = None,
684
- x_language: Annotated[Optional[str], X_LANGUAGE] = None,
685
- x_md5: Annotated[Optional[str], X_MD5] = None,
686
- x_extract_strategy: Annotated[Optional[str], X_EXTRACT_STRATEGY] = None,
687
- x_split_strategy: Annotated[Optional[str], X_SPLIT_STRATEGY] = None,
683
+ x_filename: Annotated[str | None, X_FILENAME] = None,
684
+ x_password: Annotated[str | None, X_PASSWORD] = None,
685
+ x_language: Annotated[str | None, X_LANGUAGE] = None,
686
+ x_md5: Annotated[str | None, X_MD5] = None,
687
+ x_extract_strategy: Annotated[str | None, X_EXTRACT_STRATEGY] = None,
688
+ x_split_strategy: Annotated[str | None, X_SPLIT_STRATEGY] = None,
688
689
  ) -> ResourceFileUploaded:
689
690
  return await _upload(
690
691
  request,
@@ -712,12 +713,12 @@ async def upload_rid_prefix(
712
713
  async def upload(
713
714
  request: StarletteRequest,
714
715
  kbid: str,
715
- x_filename: Annotated[Optional[str], X_FILENAME] = None,
716
- x_password: Annotated[Optional[str], X_PASSWORD] = None,
717
- x_language: Annotated[Optional[str], X_LANGUAGE] = None,
718
- x_md5: Annotated[Optional[str], X_MD5] = None,
719
- x_extract_strategy: Annotated[Optional[str], X_EXTRACT_STRATEGY] = None,
720
- x_split_strategy: Annotated[Optional[str], X_SPLIT_STRATEGY] = None,
716
+ x_filename: Annotated[str | None, X_FILENAME] = None,
717
+ x_password: Annotated[str | None, X_PASSWORD] = None,
718
+ x_language: Annotated[str | None, X_LANGUAGE] = None,
719
+ x_md5: Annotated[str | None, X_MD5] = None,
720
+ x_extract_strategy: Annotated[str | None, X_EXTRACT_STRATEGY] = None,
721
+ x_split_strategy: Annotated[str | None, X_SPLIT_STRATEGY] = None,
721
722
  ) -> ResourceFileUploaded:
722
723
  return await _upload(
723
724
  request,
@@ -735,14 +736,14 @@ async def upload(
735
736
  async def _upload(
736
737
  request: StarletteRequest,
737
738
  kbid: str,
738
- path_rid: Optional[str] = None,
739
- field: Optional[str] = None,
740
- x_filename: Optional[str] = None,
741
- x_password: Optional[str] = None,
742
- x_language: Optional[str] = None,
743
- x_md5: Optional[str] = None,
744
- x_extract_strategy: Optional[str] = None,
745
- x_split_strategy: Optional[str] = None,
739
+ path_rid: str | None = None,
740
+ field: str | None = None,
741
+ x_filename: str | None = None,
742
+ x_password: str | None = None,
743
+ x_language: str | None = None,
744
+ x_md5: str | None = None,
745
+ x_extract_strategy: str | None = None,
746
+ x_split_strategy: str | None = None,
746
747
  ) -> ResourceFileUploaded:
747
748
  if path_rid is not None:
748
749
  await validate_rid_exists_or_raise_error(kbid, path_rid)
@@ -847,9 +848,9 @@ async def _upload(
847
848
 
848
849
  async def validate_field_upload(
849
850
  kbid: str,
850
- rid: Optional[str] = None,
851
- field: Optional[str] = None,
852
- md5: Optional[str] = None,
851
+ rid: str | None = None,
852
+ field: str | None = None,
853
+ md5: str | None = None,
853
854
  ):
854
855
  """Validate field upload and return blob storage path, rid and field id.
855
856
 
@@ -892,14 +893,14 @@ async def store_file_on_nuclia_db(
892
893
  field: str,
893
894
  content_type: str = "application/octet-stream",
894
895
  override_resource_title: bool = False,
895
- filename: Optional[str] = None,
896
- password: Optional[str] = None,
897
- language: Optional[str] = None,
898
- md5: Optional[str] = None,
899
- item: Optional[CreateResourcePayload] = None,
900
- extract_strategy: Optional[str] = None,
901
- split_strategy: Optional[str] = None,
902
- ) -> Optional[int]:
896
+ filename: str | None = None,
897
+ password: str | None = None,
898
+ language: str | None = None,
899
+ md5: str | None = None,
900
+ item: CreateResourcePayload | None = None,
901
+ extract_strategy: str | None = None,
902
+ split_strategy: str | None = None,
903
+ ) -> int | None:
903
904
  # File is on NucliaDB Storage at path
904
905
  partitioning = get_partitioning()
905
906
  processing = get_processing()
nucliadb/writer/app.py CHANGED
@@ -26,6 +26,7 @@ from starlette.middleware.authentication import AuthenticationMiddleware
26
26
  from starlette.requests import ClientDisconnect
27
27
  from starlette.responses import HTMLResponse
28
28
 
29
+ from nucliadb.middleware import ClientErrorPayloadLoggerMiddleware
29
30
  from nucliadb.writer import API_PREFIX
30
31
  from nucliadb.writer.api.v1.router import api as api_v1
31
32
  from nucliadb.writer.lifecycle import lifespan
@@ -41,14 +42,18 @@ from nucliadb_utils.settings import running_settings
41
42
 
42
43
  middleware = []
43
44
 
44
- middleware.extend([Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend())])
45
+ middleware.extend(
46
+ [
47
+ Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend()),
48
+ Middleware(ClientErrorPayloadLoggerMiddleware),
49
+ ]
50
+ )
45
51
 
46
52
 
47
53
  errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
48
54
 
49
55
  fastapi_settings = dict(
50
56
  debug=running_settings.debug,
51
- middleware=middleware,
52
57
  lifespan=lifespan,
53
58
  exception_handlers={
54
59
  Exception: global_exception_handler,
@@ -70,6 +75,7 @@ def create_application() -> FastAPI:
70
75
  prefix_format=f"/{API_PREFIX}/v{{major}}",
71
76
  default_version=(1, 0),
72
77
  enable_latest=False,
78
+ middleware=middleware,
73
79
  kwargs=fastapi_settings,
74
80
  )
75
81
 
@@ -18,7 +18,6 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  from datetime import datetime
21
- from typing import Optional, Union
22
21
 
23
22
  from fastapi import HTTPException
24
23
 
@@ -122,18 +121,16 @@ def parse_basic_modify(bm: BrokerMessage, item: ComingResourcePayload, toprocess
122
121
  bm.basic.fieldmetadata.append(userfieldmetadata)
123
122
 
124
123
  if item.usermetadata is not None:
125
- # protobufers repeated fields don't support assignment
126
- # will allways be a clean basic
127
- bm.basic.usermetadata.classifications.extend(
128
- [
129
- Classification(
130
- labelset=x.labelset,
131
- label=x.label,
132
- cancelled_by_user=x.cancelled_by_user,
133
- )
134
- for x in item.usermetadata.classifications
135
- ]
136
- )
124
+ classifs = []
125
+ for classif in item.usermetadata.classifications:
126
+ classif_pb = Classification(
127
+ labelset=classif.labelset,
128
+ label=classif.label,
129
+ cancelled_by_user=classif.cancelled_by_user,
130
+ )
131
+ if classif_pb not in classifs:
132
+ classifs.append(classif_pb)
133
+ bm.basic.usermetadata.classifications.extend(classifs)
137
134
 
138
135
  relation_node_resource = RelationNode(value=bm.uuid, ntype=RelationNode.NodeType.RESOURCE)
139
136
  relations = []
@@ -180,7 +177,7 @@ def parse_basic_creation(
180
177
  bm: BrokerMessage,
181
178
  item: CreateResourcePayload,
182
179
  toprocess: PushPayload,
183
- kb_config: Optional[KnowledgeBoxConfig],
180
+ kb_config: KnowledgeBoxConfig | None,
184
181
  ):
185
182
  bm.basic.created.FromDatetime(datetime.now())
186
183
 
@@ -263,7 +260,7 @@ def build_question_answer_annotation_pb(
263
260
 
264
261
 
265
262
  def parse_user_classifications(
266
- item: Union[CreateResourcePayload, UpdateResourcePayload],
263
+ item: CreateResourcePayload | UpdateResourcePayload,
267
264
  ) -> list[ClassificationLabel]:
268
265
  return (
269
266
  [