nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -18,7 +18,6 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import base64
21
- import mimetypes
22
21
  import pickle
23
22
  import uuid
24
23
  from datetime import datetime
@@ -30,30 +29,23 @@ from fastapi import HTTPException
30
29
  from fastapi.params import Header
31
30
  from fastapi.requests import Request
32
31
  from fastapi.responses import Response
33
- from fastapi_versioning import version # type: ignore
34
- from grpc import StatusCode as GrpcStatusCode
35
- from grpc.aio import AioRpcError
36
- from nucliadb_protos.resources_pb2 import FieldFile
37
- from nucliadb_protos.writer_pb2 import (
38
- BrokerMessage,
39
- ResourceFieldExistsResponse,
40
- ResourceFieldId,
41
- )
32
+ from fastapi_versioning import version
42
33
  from starlette.requests import Request as StarletteRequest
43
34
 
35
+ from nucliadb.common import datamanagers
44
36
  from nucliadb.ingest.orm.utils import set_title
45
37
  from nucliadb.ingest.processing import PushPayload, Source
46
38
  from nucliadb.models.responses import HTTPClientError
47
39
  from nucliadb.writer import SERVICE_NAME
48
- from nucliadb.writer.api.v1.resource import get_rid_from_params_or_raise_error
49
- from nucliadb.writer.back_pressure import maybe_back_pressure
50
- from nucliadb.writer.exceptions import (
51
- ConflictError,
52
- IngestNotAvailable,
53
- ResourceNotFound,
40
+ from nucliadb.writer.api.v1 import transaction
41
+ from nucliadb.writer.api.v1.resource import (
42
+ get_rid_from_slug_or_raise_error,
43
+ validate_rid_exists_or_raise_error,
54
44
  )
45
+ from nucliadb.writer.api.v1.slug import ensure_slug_uniqueness, noop_context_manager
46
+ from nucliadb.writer.back_pressure import maybe_back_pressure
55
47
  from nucliadb.writer.resource.audit import parse_audit
56
- from nucliadb.writer.resource.basic import parse_basic, set_processing_info
48
+ from nucliadb.writer.resource.basic import parse_basic_creation
57
49
  from nucliadb.writer.resource.field import parse_fields
58
50
  from nucliadb.writer.resource.origin import parse_extra, parse_origin
59
51
  from nucliadb.writer.tus import TUSUPLOAD, UPLOAD, get_dm, get_storage_manager
@@ -62,23 +54,24 @@ from nucliadb.writer.tus.exceptions import (
62
54
  HTTPConflict,
63
55
  HTTPNotFound,
64
56
  HTTPPreconditionFailed,
65
- HTTPServiceUnavailable,
66
57
  InvalidTUSMetadata,
67
58
  ResumableURINotAvailable,
68
59
  )
69
- from nucliadb.writer.tus.storage import FileStorageManager # type: ignore
60
+ from nucliadb.writer.tus.storage import FileStorageManager
70
61
  from nucliadb.writer.tus.utils import parse_tus_metadata
71
62
  from nucliadb.writer.utilities import get_processing
63
+ from nucliadb_models import content_types
72
64
  from nucliadb_models.resource import NucliaDBRoles
65
+ from nucliadb_models.utils import FieldIdString
73
66
  from nucliadb_models.writer import CreateResourcePayload, ResourceFileUploaded
67
+ from nucliadb_protos.resources_pb2 import CloudFile, FieldFile, Metadata
68
+ from nucliadb_protos.writer_pb2 import BrokerMessage
74
69
  from nucliadb_utils.authentication import requires_one
75
70
  from nucliadb_utils.exceptions import LimitsExceededError, SendToProcessError
76
71
  from nucliadb_utils.storages.storage import KB_RESOURCE_FIELD
77
72
  from nucliadb_utils.utilities import (
78
- get_ingest,
79
73
  get_partitioning,
80
74
  get_storage,
81
- get_transaction_utility,
82
75
  )
83
76
 
84
77
  from .router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
@@ -105,14 +98,14 @@ TUS_HEADERS = {
105
98
  @api.options(
106
99
  f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/file/{{field}}/{TUSUPLOAD}",
107
100
  tags=["Resource field TUS uploads"],
108
- name="TUS Server information",
101
+ summary="TUS Server information",
109
102
  openapi_extra={"x-operation-order": 4},
110
103
  include_in_schema=False,
111
104
  )
112
105
  @api.options(
113
106
  f"/{KB_PREFIX}/{{kbid}}/{TUSUPLOAD}",
114
107
  tags=["Knowledge Box TUS uploads"],
115
- name="TUS Server information",
108
+ summary="TUS Server information",
116
109
  openapi_extra={"x-operation-order": 4},
117
110
  )
118
111
  @version(1)
@@ -138,7 +131,7 @@ def _tus_options() -> Response:
138
131
  @api.post(
139
132
  f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/file/{{field}}/{TUSUPLOAD}",
140
133
  tags=["Resource field TUS uploads"],
141
- name="Create new upload on a Resource (by slug)",
134
+ summary="Create new upload on a Resource (by slug)",
142
135
  openapi_extra={"x-operation-order": 1},
143
136
  )
144
137
  @requires_one([NucliaDBRoles.WRITER])
@@ -147,16 +140,17 @@ async def tus_post_rslug_prefix(
147
140
  request: Request,
148
141
  kbid: str,
149
142
  rslug: str,
150
- field: str,
143
+ field: FieldIdString,
151
144
  item: Optional[CreateResourcePayload] = None,
152
145
  ) -> Response:
153
- return await _tus_post(request, kbid, item=item, rslug=rslug, field=field)
146
+ rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
147
+ return await _tus_post(request, kbid, item, path_rid=rid, field_id=field)
154
148
 
155
149
 
156
150
  @api.post(
157
151
  f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{path_rid}}/file/{{field}}/{TUSUPLOAD}",
158
152
  tags=["Resource field TUS uploads"],
159
- name="Create new upload on a Resource (by id)",
153
+ summary="Create new upload on a Resource (by id)",
160
154
  openapi_extra={"x-operation-order": 1},
161
155
  )
162
156
  @requires_one([NucliaDBRoles.WRITER])
@@ -165,16 +159,16 @@ async def tus_post_rid_prefix(
165
159
  request: Request,
166
160
  kbid: str,
167
161
  path_rid: str,
168
- field: str,
162
+ field: FieldIdString,
169
163
  item: Optional[CreateResourcePayload] = None,
170
164
  ) -> Response:
171
- return await _tus_post(request, kbid, item=item, path_rid=path_rid, field=field)
165
+ return await _tus_post(request, kbid, item, path_rid=path_rid, field_id=field)
172
166
 
173
167
 
174
168
  @api.post(
175
169
  f"/{KB_PREFIX}/{{kbid}}/{TUSUPLOAD}",
176
170
  tags=["Knowledge Box TUS uploads"],
177
- name="Create new upload on a Knowledge Box",
171
+ summary="Create new upload on a Knowledge Box",
178
172
  openapi_extra={"x-operation-order": 1},
179
173
  )
180
174
  @requires_one([NucliaDBRoles.WRITER])
@@ -184,7 +178,7 @@ async def tus_post(
184
178
  kbid: str,
185
179
  item: Optional[CreateResourcePayload] = None,
186
180
  ) -> Response:
187
- return await _tus_post(request, kbid, item=item)
181
+ return await _tus_post(request, kbid, item)
188
182
 
189
183
 
190
184
  # called by one the three POST above - there are defined distinctly to produce clean API doc
@@ -193,23 +187,29 @@ async def _tus_post(
193
187
  kbid: str,
194
188
  item: Optional[CreateResourcePayload] = None,
195
189
  path_rid: Optional[str] = None,
196
- rslug: Optional[str] = None,
197
- field: Optional[str] = None,
190
+ field_id: Optional[str] = None,
198
191
  ) -> Response:
199
192
  """
200
193
  An empty POST request is used to create a new upload resource.
201
194
  The Upload-Length header indicates the size of the entire upload in bytes.
202
195
  """
196
+ if path_rid is not None:
197
+ await validate_rid_exists_or_raise_error(kbid, path_rid)
198
+
199
+ kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
200
+ if item and item.hidden and not (kb_config and kb_config.hidden_resources_enabled):
201
+ raise HTTPException(
202
+ status_code=422,
203
+ detail="Cannot hide a resource: the KB does not have hidden resources enabled",
204
+ )
205
+
206
+ await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
207
+
203
208
  dm = get_dm()
204
209
  storage_manager = get_storage_manager()
205
210
 
206
- if rslug is not None:
207
- path_rid = await get_rid_from_params_or_raise_error(kbid, slug=rslug)
208
-
209
211
  implies_resource_creation = path_rid is None
210
212
 
211
- await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
212
-
213
213
  deferred_length = False
214
214
  if request.headers.get("upload-defer-length") == "1":
215
215
  deferred_length = True
@@ -228,22 +228,11 @@ async def _tus_post(
228
228
  try:
229
229
  metadata = parse_tus_metadata(request.headers["upload-metadata"])
230
230
  except InvalidTUSMetadata as exc:
231
- raise HTTPBadRequest(
232
- detail=f"Upload-Metadata header contains errors: {str(exc)}"
233
- )
231
+ raise HTTPBadRequest(detail=f"Upload-Metadata header contains errors: {str(exc)}")
234
232
  else:
235
233
  metadata = {}
236
234
 
237
- try:
238
- path, rid, field = await start_upload_field(
239
- kbid, path_rid, field, metadata.get("md5")
240
- )
241
- except ResourceNotFound:
242
- raise HTTPNotFound("Resource is not found or not yet available")
243
- except ConflictError:
244
- raise HTTPConflict("A resource with the same uploaded file already exists")
245
- except IngestNotAvailable:
246
- raise HTTPServiceUnavailable("Upload not available right now, try again")
235
+ path, rid, field = await validate_field_upload(kbid, path_rid, field_id, metadata.get("md5"))
247
236
 
248
237
  if implies_resource_creation:
249
238
  # When uploading a file to a new kb resource, we want to allow multiple
@@ -269,8 +258,15 @@ async def _tus_post(
269
258
  request_content_type = None
270
259
  if item is None:
271
260
  request_content_type = request.headers.get("content-type")
272
- if not request_content_type:
273
- request_content_type = guess_content_type(metadata["filename"])
261
+ if request_content_type is None:
262
+ request_content_type = content_types.guess(metadata["filename"]) or "application/octet-stream"
263
+
264
+ if request_content_type is not None and not content_types.valid(request_content_type):
265
+ raise HTTPException(
266
+ status_code=415,
267
+ detail=f"Unsupported content type: {request_content_type}",
268
+ )
269
+
274
270
  metadata.setdefault("content_type", request_content_type)
275
271
 
276
272
  metadata["implies_resource_creation"] = implies_resource_creation
@@ -300,9 +296,7 @@ async def _tus_post(
300
296
  await dm.save()
301
297
 
302
298
  # Find the URL for upload, with the same parameter as this call
303
- location = api.url_path_for(
304
- "Upload information", upload_id=upload_id, **request.path_params
305
- )
299
+ location = api.url_path_for("Upload information", upload_id=upload_id, **request.path_params)
306
300
  return Response(
307
301
  status_code=201,
308
302
  headers={
@@ -319,6 +313,7 @@ async def _tus_post(
319
313
  status_code=200,
320
314
  openapi_extra={"x-operation-order": 3},
321
315
  name="Upload information",
316
+ summary="Upload information",
322
317
  )
323
318
  @requires_one([NucliaDBRoles.WRITER])
324
319
  @version(1)
@@ -326,7 +321,7 @@ async def tus_head_rslug_prefix(
326
321
  request: Request,
327
322
  kbid: str,
328
323
  rslug: str,
329
- field: str,
324
+ field: FieldIdString,
330
325
  upload_id: str,
331
326
  ) -> Response:
332
327
  return await _tus_head(upload_id)
@@ -338,6 +333,7 @@ async def tus_head_rslug_prefix(
338
333
  status_code=200,
339
334
  openapi_extra={"x-operation-order": 3},
340
335
  name="Upload information",
336
+ summary="Upload information",
341
337
  )
342
338
  @requires_one([NucliaDBRoles.WRITER])
343
339
  @version(1)
@@ -345,7 +341,7 @@ async def tus_head_rid_prefix(
345
341
  request: Request,
346
342
  kbid: str,
347
343
  path_rid: str,
348
- field: str,
344
+ field: FieldIdString,
349
345
  upload_id: str,
350
346
  ) -> Response:
351
347
  return await _tus_head(upload_id)
@@ -357,6 +353,7 @@ async def tus_head_rid_prefix(
357
353
  status_code=200,
358
354
  openapi_extra={"x-operation-order": 3},
359
355
  name="Upload information",
356
+ summary="Upload information",
360
357
  )
361
358
  @requires_one([NucliaDBRoles.WRITER])
362
359
  @version(1)
@@ -393,7 +390,7 @@ async def _tus_head(
393
390
  f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/file/{{field}}/{TUSUPLOAD}/{{upload_id}}",
394
391
  tags=["Resource field TUS uploads"],
395
392
  status_code=200,
396
- name="Upload data on a Resource (by slug)",
393
+ summary="Upload data on a Resource (by slug)",
397
394
  openapi_extra={"x-operation-order": 2},
398
395
  )
399
396
  @requires_one([NucliaDBRoles.WRITER])
@@ -402,20 +399,18 @@ async def tus_patch_rslug_prefix(
402
399
  request: Request,
403
400
  kbid: str,
404
401
  rslug: str,
405
- field: str,
402
+ field: FieldIdString,
406
403
  upload_id: str,
407
- x_synchronous: bool = Header(False), # type: ignore
408
404
  ) -> Response:
409
- return await tus_patch(
410
- request, kbid, upload_id, rslug=rslug, field=field, x_synchronous=x_synchronous
411
- )
405
+ rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
406
+ return await tus_patch(request, kbid, upload_id, rid=rid, field=field)
412
407
 
413
408
 
414
409
  @api.patch(
415
410
  f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/file/{{field}}/{TUSUPLOAD}/{{upload_id}}",
416
411
  tags=["Resource field TUS uploads"],
417
412
  status_code=200,
418
- name="Upload data on a Resource (by id)",
413
+ summary="Upload data on a Resource (by id)",
419
414
  openapi_extra={"x-operation-order": 2},
420
415
  )
421
416
  @requires_one([NucliaDBRoles.WRITER])
@@ -424,20 +419,17 @@ async def tus_patch_rid_prefix(
424
419
  request: Request,
425
420
  kbid: str,
426
421
  rid: str,
427
- field: str,
422
+ field: FieldIdString,
428
423
  upload_id: str,
429
- x_synchronous: bool = Header(False), # type: ignore
430
424
  ) -> Response:
431
- return await tus_patch(
432
- request, kbid, upload_id, rid=rid, field=field, x_synchronous=x_synchronous
433
- )
425
+ return await tus_patch(request, kbid, upload_id, rid=rid, field=field)
434
426
 
435
427
 
436
428
  @api.patch(
437
429
  f"/{KB_PREFIX}/{{kbid}}/{TUSUPLOAD}/{{upload_id}}",
438
430
  tags=["Knowledge Box TUS uploads"],
439
431
  status_code=200,
440
- name="Upload data on a Knowledge Box",
432
+ summary="Upload data on a Knowledge Box",
441
433
  openapi_extra={"x-operation-order": 2},
442
434
  )
443
435
  @requires_one([NucliaDBRoles.WRITER])
@@ -446,9 +438,8 @@ async def patch(
446
438
  request: Request,
447
439
  kbid: str,
448
440
  upload_id: str,
449
- x_synchronous: bool = Header(False), # type: ignore
450
441
  ) -> Response:
451
- return await tus_patch(request, kbid, upload_id, x_synchronous=x_synchronous)
442
+ return await tus_patch(request, kbid, upload_id)
452
443
 
453
444
 
454
445
  async def tus_patch(
@@ -456,9 +447,7 @@ async def tus_patch(
456
447
  kbid: str,
457
448
  upload_id: str,
458
449
  rid: Optional[str] = None,
459
- rslug: Optional[str] = None,
460
450
  field: Optional[str] = None,
461
- x_synchronous: bool = False,
462
451
  ):
463
452
  try:
464
453
  return await _tus_patch(
@@ -466,9 +455,7 @@ async def tus_patch(
466
455
  kbid,
467
456
  upload_id,
468
457
  rid=rid,
469
- rslug=rslug,
470
458
  field=field,
471
- x_synchronous=x_synchronous,
472
459
  )
473
460
  except ResumableURINotAvailable:
474
461
  return HTTPClientError(
@@ -483,15 +470,13 @@ async def _tus_patch(
483
470
  kbid: str,
484
471
  upload_id: str,
485
472
  rid: Optional[str] = None,
486
- rslug: Optional[str] = None,
487
473
  field: Optional[str] = None,
488
- x_synchronous: bool = False,
489
474
  ) -> Response:
490
475
  """
491
- Upload all bytes in the requests and append them in the specifyied offset
476
+ Upload all bytes in the requests and append them in the specified offset
492
477
  """
493
- if rslug is not None:
494
- rid = await get_rid_from_params_or_raise_error(kbid, slug=rslug)
478
+ if rid is not None:
479
+ await validate_rid_exists_or_raise_error(kbid, rid)
495
480
 
496
481
  dm = get_dm()
497
482
  await dm.load(upload_id)
@@ -517,8 +502,7 @@ async def _tus_patch(
517
502
 
518
503
  if offset != dm.offset:
519
504
  raise HTTPConflict(
520
- detail=f"Current upload offset({offset}) does not match "
521
- f"object offset {dm.offset}"
505
+ detail=f"Current upload offset({offset}) does not match " f"object offset {dm.offset}"
522
506
  )
523
507
 
524
508
  storage_manager = get_storage_manager()
@@ -530,9 +514,7 @@ async def _tus_patch(
530
514
 
531
515
  if to_upload and read_bytes != to_upload: # pragma: no cover
532
516
  # check length matches if provided
533
- raise HTTPPreconditionFailed(
534
- detail="Upload size does not match what was provided"
535
- )
517
+ raise HTTPPreconditionFailed(detail="Upload size does not match what was provided")
536
518
  await dm.update(offset=offset + read_bytes)
537
519
 
538
520
  headers = {
@@ -544,7 +526,6 @@ async def _tus_patch(
544
526
  }
545
527
 
546
528
  upload_finished = dm.get("size") is not None and dm.offset >= dm.get("size")
547
-
548
529
  if upload_finished:
549
530
  rid = dm.get("rid", rid)
550
531
  if rid is None:
@@ -563,13 +544,19 @@ async def _tus_patch(
563
544
  if isinstance(item_payload, str):
564
545
  item_payload = item_payload.encode()
565
546
  creation_payload = pickle.loads(base64.b64decode(item_payload))
547
+
548
+ content_type = dm.get("metadata", {}).get("content_type")
549
+ if content_type is not None and not content_types.valid(content_type):
550
+ return HTTPClientError(
551
+ status_code=415,
552
+ detail=f"Unsupported content type: {content_type}",
553
+ )
554
+
566
555
  try:
567
556
  seqid = await store_file_on_nuclia_db(
568
557
  size=dm.get("size"),
569
- content_type=dm.get("metadata", {}).get("content_type"),
570
- override_resource_title=dm.get("metadata", {}).get(
571
- "implies_resource_creation", False
572
- ),
558
+ content_type=content_type,
559
+ override_resource_title=dm.get("metadata", {}).get("implies_resource_creation", False),
573
560
  filename=dm.get("metadata", {}).get("filename"),
574
561
  password=dm.get("metadata", {}).get("password"),
575
562
  language=dm.get("metadata", {}).get("language"),
@@ -582,34 +569,30 @@ async def _tus_patch(
582
569
  request=request,
583
570
  bucket=storage_manager.storage.get_bucket_name(kbid),
584
571
  item=creation_payload,
585
- wait_on_commit=x_synchronous,
586
572
  )
587
573
  except LimitsExceededError as exc:
588
574
  raise HTTPException(status_code=exc.status_code, detail=exc.detail)
589
575
 
590
576
  headers["NDB-Seq"] = f"{seqid}"
591
577
  else:
592
- check_uploaded_chunk_size(read_bytes, storage_manager)
578
+ validate_intermediate_tus_chunk(read_bytes, storage_manager)
593
579
  await dm.save()
594
580
 
595
581
  return Response(headers=headers)
596
582
 
597
583
 
598
- def check_uploaded_chunk_size(read_bytes: int, storage_manager: FileStorageManager):
599
- if (
600
- storage_manager.min_upload_size is not None
601
- and read_bytes < storage_manager.min_upload_size
602
- ):
603
- raise HTTPPreconditionFailed(
604
- detail=f"Intermediate chunks cannot be smaller than {storage_manager.min_upload_size} bytes"
605
- )
584
+ def validate_intermediate_tus_chunk(read_bytes: int, storage_manager: FileStorageManager):
585
+ try:
586
+ storage_manager.validate_intermediate_chunk(read_bytes)
587
+ except ValueError as err:
588
+ raise HTTPPreconditionFailed(detail=str(err))
606
589
 
607
590
 
608
591
  @api.post(
609
592
  f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/file/{{field}}/{UPLOAD}",
610
593
  status_code=201,
611
594
  tags=["Resource fields"],
612
- name="Upload binary file on a Resource (by slug)",
595
+ summary="Upload binary file on a Resource (by slug)",
613
596
  description="Upload a file as a field on an existing resource, if the field exists will return a conflict (419)",
614
597
  )
615
598
  @requires_one([NucliaDBRoles.WRITER])
@@ -618,23 +601,22 @@ async def upload_rslug_prefix(
618
601
  request: StarletteRequest,
619
602
  kbid: str,
620
603
  rslug: str,
621
- field: str,
604
+ field: FieldIdString,
622
605
  x_filename: Optional[list[str]] = Header(None), # type: ignore
623
606
  x_password: Optional[list[str]] = Header(None), # type: ignore
624
607
  x_language: Optional[list[str]] = Header(None), # type: ignore
625
608
  x_md5: Optional[list[str]] = Header(None), # type: ignore
626
- x_synchronous: bool = Header(False), # type: ignore
627
609
  ) -> ResourceFileUploaded:
610
+ rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
628
611
  return await _upload(
629
612
  request,
630
613
  kbid,
631
- rslug=rslug,
614
+ path_rid=rid,
632
615
  field=field,
633
616
  x_filename=x_filename,
634
617
  x_password=x_password,
635
618
  x_language=x_language,
636
619
  x_md5=x_md5,
637
- x_synchronous=x_synchronous,
638
620
  )
639
621
 
640
622
 
@@ -642,7 +624,7 @@ async def upload_rslug_prefix(
642
624
  f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{path_rid}}/file/{{field}}/{UPLOAD}",
643
625
  status_code=201,
644
626
  tags=["Resource fields"],
645
- name="Upload binary file on a Resource (by id)",
627
+ summary="Upload binary file on a Resource (by id)",
646
628
  description="Upload a file as a field on an existing resource, if the field exists will return a conflict (419)",
647
629
  )
648
630
  @requires_one([NucliaDBRoles.WRITER])
@@ -651,12 +633,11 @@ async def upload_rid_prefix(
651
633
  request: StarletteRequest,
652
634
  kbid: str,
653
635
  path_rid: str,
654
- field: str,
636
+ field: FieldIdString,
655
637
  x_filename: Optional[list[str]] = Header(None), # type: ignore
656
638
  x_password: Optional[list[str]] = Header(None), # type: ignore
657
639
  x_language: Optional[list[str]] = Header(None), # type: ignore
658
640
  x_md5: Optional[list[str]] = Header(None), # type: ignore
659
- x_synchronous: bool = Header(False), # type: ignore
660
641
  ) -> ResourceFileUploaded:
661
642
  return await _upload(
662
643
  request,
@@ -667,7 +648,6 @@ async def upload_rid_prefix(
667
648
  x_password=x_password,
668
649
  x_language=x_language,
669
650
  x_md5=x_md5,
670
- x_synchronous=x_synchronous,
671
651
  )
672
652
 
673
653
 
@@ -675,7 +655,7 @@ async def upload_rid_prefix(
675
655
  f"/{KB_PREFIX}/{{kbid}}/{UPLOAD}",
676
656
  status_code=201,
677
657
  tags=["Knowledge Boxes"],
678
- name="Upload binary file on a Knowledge Box",
658
+ summary="Upload binary file on a Knowledge Box",
679
659
  description="Upload a file onto a Knowledge Box, field id will be file and rid will be autogenerated. ",
680
660
  )
681
661
  @requires_one([NucliaDBRoles.WRITER])
@@ -687,7 +667,6 @@ async def upload(
687
667
  x_password: Optional[list[str]] = Header(None), # type: ignore
688
668
  x_language: Optional[list[str]] = Header(None), # type: ignore
689
669
  x_md5: Optional[list[str]] = Header(None), # type: ignore
690
- x_synchronous: bool = Header(False), # type: ignore
691
670
  ) -> ResourceFileUploaded:
692
671
  return await _upload(
693
672
  request,
@@ -696,7 +675,6 @@ async def upload(
696
675
  x_password=x_password,
697
676
  x_language=x_language,
698
677
  x_md5=x_md5,
699
- x_synchronous=x_synchronous,
700
678
  )
701
679
 
702
680
 
@@ -705,30 +683,19 @@ async def _upload(
705
683
  request: StarletteRequest,
706
684
  kbid: str,
707
685
  path_rid: Optional[str] = None,
708
- rslug: Optional[str] = None,
709
686
  field: Optional[str] = None,
710
687
  x_filename: Optional[list[str]] = Header(None), # type: ignore
711
688
  x_password: Optional[list[str]] = Header(None), # type: ignore
712
689
  x_language: Optional[list[str]] = Header(None), # type: ignore
713
690
  x_md5: Optional[list[str]] = Header(None), # type: ignore
714
- x_synchronous: bool = Header(False), # type: ignore
715
691
  ) -> ResourceFileUploaded:
716
- if rslug is not None:
717
- path_rid = await get_rid_from_params_or_raise_error(kbid, slug=rslug)
692
+ if path_rid is not None:
693
+ await validate_rid_exists_or_raise_error(kbid, path_rid)
718
694
 
719
695
  await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
720
696
 
721
697
  md5_user = x_md5[0] if x_md5 is not None and len(x_md5) > 0 else None
722
- try:
723
- path, rid, valid_field = await start_upload_field(
724
- kbid, path_rid, field, md5_user
725
- )
726
- except ResourceNotFound:
727
- raise HTTPNotFound("Resource is not found or not yet available")
728
- except ConflictError:
729
- raise HTTPConflict("A resource with the same uploaded file already exists")
730
- except IngestNotAvailable:
731
- raise HTTPServiceUnavailable("Upload not available right now, try again")
698
+ path, rid, valid_field = await validate_field_upload(kbid, path_rid, field, md5_user)
732
699
  dm = get_dm()
733
700
  storage_manager = get_storage_manager()
734
701
 
@@ -757,8 +724,14 @@ async def _upload(
757
724
  # - content-type set by the user in the upload request header takes precedence.
758
725
  # - if not set, we will try to guess it from the filename and default to a generic binary content type otherwise
759
726
  content_type = request.headers.get("content-type")
760
- if not content_type:
761
- content_type = guess_content_type(filename)
727
+ if content_type is None:
728
+ content_type = content_types.guess(filename) or "application/octet-stream"
729
+
730
+ if not content_types.valid(content_type):
731
+ raise HTTPException(
732
+ status_code=415,
733
+ detail=f"Unsupported content type: {content_type}",
734
+ )
762
735
 
763
736
  metadata = {"content_type": content_type, "filename": filename}
764
737
 
@@ -808,7 +781,6 @@ async def _upload(
808
781
  path=path,
809
782
  request=request,
810
783
  bucket=storage_manager.storage.get_bucket_name(kbid),
811
- wait_on_commit=x_synchronous,
812
784
  )
813
785
  except LimitsExceededError as exc:
814
786
  raise HTTPException(status_code=exc.status_code, detail=exc.detail)
@@ -816,43 +788,37 @@ async def _upload(
816
788
  return ResourceFileUploaded(seqid=seqid, uuid=rid, field_id=valid_field)
817
789
 
818
790
 
819
- async def start_upload_field(
791
+ async def validate_field_upload(
820
792
  kbid: str,
821
793
  rid: Optional[str] = None,
822
794
  field: Optional[str] = None,
823
795
  md5: Optional[str] = None,
824
796
  ):
825
- ingest = get_ingest()
826
- pbrequest = ResourceFieldId()
827
- pbrequest.kbid = kbid
828
- if rid is not None:
829
- pbrequest.rid = rid
797
+ """Validate field upload and return blob storage path, rid and field id.
830
798
 
831
- elif rid is None and md5 is not None:
832
- pbrequest.rid = md5
799
+ This function assumes KB exists
800
+ """
833
801
 
834
- try:
835
- response: ResourceFieldExistsResponse = await ingest.ResourceFieldExists(pbrequest) # type: ignore
836
- except AioRpcError as exc:
837
- if exc.code() is GrpcStatusCode.UNAVAILABLE:
838
- raise IngestNotAvailable()
802
+ if rid is None:
803
+ # we are going to create a new resource and a field
804
+ if md5 is not None:
805
+ exists = await datamanagers.atomic.resources.resource_exists(kbid=kbid, rid=md5)
806
+ if exists:
807
+ raise HTTPConflict("A resource with the same uploaded file already exists")
808
+ rid = md5
839
809
  else:
840
- raise exc
841
-
842
- if response.found is False and rid is not None:
843
- raise ResourceNotFound()
844
- elif response.found is True and rid is None and md5 is not None:
845
- raise ConflictError()
846
-
847
- if rid is None and md5 is None:
848
- rid = uuid.uuid4().hex
849
- elif rid is None:
850
- rid = md5
851
-
852
- if field is None and md5 is None:
853
- field = uuid.uuid4().hex
854
- elif field is None:
855
- field = md5
810
+ rid = uuid.uuid4().hex
811
+ else:
812
+ # we're adding a field to a resource
813
+ exists = await datamanagers.atomic.resources.resource_exists(kbid=kbid, rid=rid)
814
+ if not exists:
815
+ raise HTTPNotFound("Resource is not found or not yet available")
816
+
817
+ if field is None:
818
+ if md5 is None:
819
+ field = uuid.uuid4().hex
820
+ else:
821
+ field = md5
856
822
 
857
823
  path = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, field=field)
858
824
  return path, rid, field
@@ -864,7 +830,7 @@ async def store_file_on_nuclia_db(
864
830
  path: str,
865
831
  request: Request,
866
832
  bucket: str,
867
- source: Source,
833
+ source: CloudFile.Source.ValueType,
868
834
  rid: str,
869
835
  field: str,
870
836
  content_type: str = "application/octet-stream",
@@ -874,12 +840,9 @@ async def store_file_on_nuclia_db(
874
840
  language: Optional[str] = None,
875
841
  md5: Optional[str] = None,
876
842
  item: Optional[CreateResourcePayload] = None,
877
- wait_on_commit: bool = False,
878
843
  ) -> Optional[int]:
879
844
  # File is on NucliaDB Storage at path
880
-
881
845
  partitioning = get_partitioning()
882
- transaction = get_transaction_utility()
883
846
  processing = get_processing()
884
847
  storage = await get_storage(service_name=SERVICE_NAME)
885
848
 
@@ -901,14 +864,17 @@ async def store_file_on_nuclia_db(
901
864
 
902
865
  parse_audit(writer.audit, request)
903
866
 
867
+ unique_slug_context_manager = noop_context_manager()
904
868
  if item is not None:
905
869
  if item.slug:
870
+ unique_slug_context_manager = ensure_slug_uniqueness(kbid, item.slug)
906
871
  writer.slug = item.slug
907
872
  toprocess.slug = item.slug
908
873
 
909
874
  toprocess.processing_options = item.processing_options
910
875
 
911
- parse_basic(writer, item, toprocess)
876
+ kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
877
+ parse_basic_creation(writer, item, toprocess, kb_config)
912
878
  if item.origin is not None:
913
879
  parse_origin(writer.origin, item.origin)
914
880
  if item.extra is not None:
@@ -924,52 +890,61 @@ async def store_file_on_nuclia_db(
924
890
  uuid=rid,
925
891
  x_skip_store=False,
926
892
  )
893
+ else:
894
+ # Use defaults for everything, but don't forget hidden which depends on KB config
895
+ kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
896
+ if kb_config and kb_config.hidden_resources_hide_on_creation:
897
+ writer.basic.hidden = True
898
+
899
+ async with unique_slug_context_manager:
900
+ if override_resource_title and filename is not None:
901
+ set_title(writer, toprocess, filename)
902
+
903
+ writer.basic.icon = content_type
904
+ writer.basic.created.FromDatetime(datetime.now())
905
+
906
+ # Update resource with file
907
+ file_field = FieldFile()
908
+ file_field.added.FromDatetime(datetime.now())
909
+ file_field.file.bucket_name = bucket
910
+ file_field.file.content_type = content_type
911
+ if filename is not None:
912
+ file_field.file.filename = filename
913
+ file_field.file.uri = path
914
+ file_field.file.source = source
915
+
916
+ if md5:
917
+ file_field.file.md5 = md5
918
+ if size:
919
+ file_field.file.size = size
920
+ if language:
921
+ file_field.language = language
922
+ if password:
923
+ file_field.password = password
924
+
925
+ writer.files[field].CopyFrom(file_field)
926
+ # Do not store passwords on maindb
927
+ writer.files[field].ClearField("password")
928
+
929
+ toprocess.filefield[field] = await processing.convert_internal_filefield_to_str(
930
+ file_field, storage=storage
931
+ )
927
932
 
928
- if override_resource_title and filename is not None:
929
- set_title(writer, toprocess, filename)
930
-
931
- writer.basic.icon = content_type
932
- writer.basic.created.FromDatetime(datetime.now())
933
-
934
- # Update resource with file
935
- file_field = FieldFile()
936
- file_field.added.FromDatetime(datetime.now())
937
- file_field.file.bucket_name = bucket
938
- file_field.file.content_type = content_type
939
- if filename is not None:
940
- file_field.file.filename = filename
941
- file_field.file.uri = path
942
- file_field.file.source = source
943
-
944
- if md5:
945
- file_field.file.md5 = md5
946
- if size:
947
- file_field.file.size = size
948
- if language:
949
- file_field.language = language
950
- if password:
951
- file_field.password = password
952
-
953
- writer.files[field].CopyFrom(file_field)
954
- # Do not store passwords on maindb
955
- writer.files[field].ClearField("password")
956
-
957
- toprocess.filefield[field] = await processing.convert_internal_filefield_to_str(
958
- file_field, storage=storage
959
- )
960
-
961
- try:
962
- processing_info = await processing.send_to_process(toprocess, partition)
963
- except LimitsExceededError as exc:
964
- raise HTTPException(status_code=exc.status_code, detail=exc.detail)
965
- except SendToProcessError:
966
- raise HTTPException(status_code=500, detail="Error while sending to process")
967
-
968
- writer.source = BrokerMessage.MessageSource.WRITER
969
- set_processing_info(writer, processing_info)
970
- await transaction.commit(writer, partition, wait=wait_on_commit)
933
+ writer.source = BrokerMessage.MessageSource.WRITER
934
+ writer.basic.metadata.status = Metadata.Status.PENDING
935
+ writer.basic.metadata.useful = True
936
+ await transaction.commit(writer, partition)
937
+ try:
938
+ processing_info = await processing.send_to_process(toprocess, partition)
939
+ except LimitsExceededError as exc:
940
+ raise HTTPException(status_code=exc.status_code, detail=exc.detail)
941
+ except SendToProcessError:
942
+ raise HTTPException(
943
+ status_code=500,
944
+ detail="Error while sending to process. Try calling /reprocess",
945
+ )
971
946
 
972
- return processing_info.seqid
947
+ return processing_info.seqid
973
948
 
974
949
 
975
950
  def maybe_b64decode(some_string: str) -> str:
@@ -978,9 +953,3 @@ def maybe_b64decode(some_string: str) -> str:
978
953
  except ValueError:
979
954
  # not b64encoded
980
955
  return some_string
981
-
982
-
983
- def guess_content_type(filename: str) -> str:
984
- default = "application/octet-stream"
985
- guessed, _ = mimetypes.guess_type(filename)
986
- return guessed or default