nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -403
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +70 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +143 -117
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +37 -128
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -18,21 +18,18 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import base64
21
- import mimetypes
22
21
  import pickle
23
22
  import uuid
24
23
  from datetime import datetime
25
24
  from hashlib import md5
26
25
  from io import BytesIO
27
- from typing import Optional
26
+ from typing import Annotated, Optional
28
27
 
29
28
  from fastapi import HTTPException
30
29
  from fastapi.params import Header
31
30
  from fastapi.requests import Request
32
31
  from fastapi.responses import Response
33
- from fastapi_versioning import version # type: ignore
34
- from nucliadb_protos.resources_pb2 import FieldFile, Metadata
35
- from nucliadb_protos.writer_pb2 import BrokerMessage
32
+ from fastapi_versioning import version
36
33
  from starlette.requests import Request as StarletteRequest
37
34
 
38
35
  from nucliadb.common import datamanagers
@@ -40,13 +37,15 @@ from nucliadb.ingest.orm.utils import set_title
40
37
  from nucliadb.ingest.processing import PushPayload, Source
41
38
  from nucliadb.models.responses import HTTPClientError
42
39
  from nucliadb.writer import SERVICE_NAME
40
+ from nucliadb.writer.api.v1 import transaction
43
41
  from nucliadb.writer.api.v1.resource import (
44
42
  get_rid_from_slug_or_raise_error,
45
43
  validate_rid_exists_or_raise_error,
46
44
  )
45
+ from nucliadb.writer.api.v1.slug import ensure_slug_uniqueness, noop_context_manager
47
46
  from nucliadb.writer.back_pressure import maybe_back_pressure
48
47
  from nucliadb.writer.resource.audit import parse_audit
49
- from nucliadb.writer.resource.basic import parse_basic
48
+ from nucliadb.writer.resource.basic import parse_basic_creation
50
49
  from nucliadb.writer.resource.field import parse_fields
51
50
  from nucliadb.writer.resource.origin import parse_extra, parse_origin
52
51
  from nucliadb.writer.tus import TUSUPLOAD, UPLOAD, get_dm, get_storage_manager
@@ -58,20 +57,21 @@ from nucliadb.writer.tus.exceptions import (
58
57
  InvalidTUSMetadata,
59
58
  ResumableURINotAvailable,
60
59
  )
61
- from nucliadb.writer.tus.storage import FileStorageManager # type: ignore
60
+ from nucliadb.writer.tus.storage import FileStorageManager
62
61
  from nucliadb.writer.tus.utils import parse_tus_metadata
63
62
  from nucliadb.writer.utilities import get_processing
63
+ from nucliadb_models import content_types
64
64
  from nucliadb_models.resource import NucliaDBRoles
65
65
  from nucliadb_models.utils import FieldIdString
66
66
  from nucliadb_models.writer import CreateResourcePayload, ResourceFileUploaded
67
+ from nucliadb_protos.resources_pb2 import CloudFile, FieldFile, Metadata
68
+ from nucliadb_protos.writer_pb2 import BrokerMessage
67
69
  from nucliadb_utils.authentication import requires_one
68
70
  from nucliadb_utils.exceptions import LimitsExceededError, SendToProcessError
69
71
  from nucliadb_utils.storages.storage import KB_RESOURCE_FIELD
70
- from nucliadb_utils.transaction import TransactionCommitTimeoutError
71
72
  from nucliadb_utils.utilities import (
72
73
  get_partitioning,
73
74
  get_storage,
74
- get_transaction_utility,
75
75
  )
76
76
 
77
77
  from .router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
@@ -82,6 +82,10 @@ TUS_HEADERS = {
82
82
  "Tus-Extension": "creation-defer-length",
83
83
  }
84
84
 
85
+ ExtractStrategyHeader = Header(
86
+ description="Extract strategy to use when uploading a file. If not provided, the default strategy will be used.",
87
+ )
88
+
85
89
 
86
90
  @api.options(
87
91
  f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/file/{{field}}/{TUSUPLOAD}/{{upload_id}}",
@@ -142,9 +146,12 @@ async def tus_post_rslug_prefix(
142
146
  rslug: str,
143
147
  field: FieldIdString,
144
148
  item: Optional[CreateResourcePayload] = None,
149
+ x_extract_strategy: Annotated[Optional[str], ExtractStrategyHeader] = None,
145
150
  ) -> Response:
146
151
  rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
147
- return await _tus_post(request, kbid, item, path_rid=rid, field_id=field)
152
+ return await _tus_post(
153
+ request, kbid, item, path_rid=rid, field_id=field, extract_strategy=x_extract_strategy
154
+ )
148
155
 
149
156
 
150
157
  @api.post(
@@ -161,8 +168,11 @@ async def tus_post_rid_prefix(
161
168
  path_rid: str,
162
169
  field: FieldIdString,
163
170
  item: Optional[CreateResourcePayload] = None,
171
+ x_extract_strategy: Annotated[Optional[str], ExtractStrategyHeader] = None,
164
172
  ) -> Response:
165
- return await _tus_post(request, kbid, item, path_rid=path_rid, field_id=field)
173
+ return await _tus_post(
174
+ request, kbid, item, path_rid=path_rid, field_id=field, extract_strategy=x_extract_strategy
175
+ )
166
176
 
167
177
 
168
178
  @api.post(
@@ -177,8 +187,9 @@ async def tus_post(
177
187
  request: Request,
178
188
  kbid: str,
179
189
  item: Optional[CreateResourcePayload] = None,
190
+ x_extract_strategy: Annotated[Optional[str], ExtractStrategyHeader] = None,
180
191
  ) -> Response:
181
- return await _tus_post(request, kbid, item)
192
+ return await _tus_post(request, kbid, item, extract_strategy=x_extract_strategy)
182
193
 
183
194
 
184
195
  # called by one the three POST above - there are defined distinctly to produce clean API doc
@@ -188,6 +199,7 @@ async def _tus_post(
188
199
  item: Optional[CreateResourcePayload] = None,
189
200
  path_rid: Optional[str] = None,
190
201
  field_id: Optional[str] = None,
202
+ extract_strategy: Optional[str] = None,
191
203
  ) -> Response:
192
204
  """
193
205
  An empty POST request is used to create a new upload resource.
@@ -196,6 +208,13 @@ async def _tus_post(
196
208
  if path_rid is not None:
197
209
  await validate_rid_exists_or_raise_error(kbid, path_rid)
198
210
 
211
+ kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
212
+ if item and item.hidden and not (kb_config and kb_config.hidden_resources_enabled):
213
+ raise HTTPException(
214
+ status_code=422,
215
+ detail="Cannot hide a resource: the KB does not have hidden resources enabled",
216
+ )
217
+
199
218
  await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
200
219
 
201
220
  dm = get_dm()
@@ -221,15 +240,11 @@ async def _tus_post(
221
240
  try:
222
241
  metadata = parse_tus_metadata(request.headers["upload-metadata"])
223
242
  except InvalidTUSMetadata as exc:
224
- raise HTTPBadRequest(
225
- detail=f"Upload-Metadata header contains errors: {str(exc)}"
226
- )
243
+ raise HTTPBadRequest(detail=f"Upload-Metadata header contains errors: {str(exc)}")
227
244
  else:
228
245
  metadata = {}
229
246
 
230
- path, rid, field = await validate_field_upload(
231
- kbid, path_rid, field_id, metadata.get("md5")
232
- )
247
+ path, rid, field = await validate_field_upload(kbid, path_rid, field_id, metadata.get("md5"))
233
248
 
234
249
  if implies_resource_creation:
235
250
  # When uploading a file to a new kb resource, we want to allow multiple
@@ -255,8 +270,15 @@ async def _tus_post(
255
270
  request_content_type = None
256
271
  if item is None:
257
272
  request_content_type = request.headers.get("content-type")
258
- if not request_content_type:
259
- request_content_type = guess_content_type(metadata["filename"])
273
+ if request_content_type is None:
274
+ request_content_type = content_types.guess(metadata["filename"]) or "application/octet-stream"
275
+
276
+ if request_content_type is not None and not content_types.valid(request_content_type):
277
+ raise HTTPException(
278
+ status_code=415,
279
+ detail=f"Unsupported content type: {request_content_type}",
280
+ )
281
+
260
282
  metadata.setdefault("content_type", request_content_type)
261
283
 
262
284
  metadata["implies_resource_creation"] = implies_resource_creation
@@ -275,6 +297,7 @@ async def _tus_post(
275
297
  deferred_length=deferred_length,
276
298
  offset=0,
277
299
  item=creation_payload,
300
+ extract_strategy=extract_strategy,
278
301
  )
279
302
 
280
303
  if size is not None:
@@ -286,9 +309,7 @@ async def _tus_post(
286
309
  await dm.save()
287
310
 
288
311
  # Find the URL for upload, with the same parameter as this call
289
- location = api.url_path_for(
290
- "Upload information", upload_id=upload_id, **request.path_params
291
- )
312
+ location = api.url_path_for("Upload information", upload_id=upload_id, **request.path_params)
292
313
  return Response(
293
314
  status_code=201,
294
315
  headers={
@@ -465,7 +486,7 @@ async def _tus_patch(
465
486
  field: Optional[str] = None,
466
487
  ) -> Response:
467
488
  """
468
- Upload all bytes in the requests and append them in the specifyied offset
489
+ Upload all bytes in the requests and append them in the specified offset
469
490
  """
470
491
  if rid is not None:
471
492
  await validate_rid_exists_or_raise_error(kbid, rid)
@@ -494,8 +515,7 @@ async def _tus_patch(
494
515
 
495
516
  if offset != dm.offset:
496
517
  raise HTTPConflict(
497
- detail=f"Current upload offset({offset}) does not match "
498
- f"object offset {dm.offset}"
518
+ detail=f"Current upload offset({offset}) does not match " f"object offset {dm.offset}"
499
519
  )
500
520
 
501
521
  storage_manager = get_storage_manager()
@@ -507,9 +527,7 @@ async def _tus_patch(
507
527
 
508
528
  if to_upload and read_bytes != to_upload: # pragma: no cover
509
529
  # check length matches if provided
510
- raise HTTPPreconditionFailed(
511
- detail="Upload size does not match what was provided"
512
- )
530
+ raise HTTPPreconditionFailed(detail="Upload size does not match what was provided")
513
531
  await dm.update(offset=offset + read_bytes)
514
532
 
515
533
  headers = {
@@ -521,7 +539,6 @@ async def _tus_patch(
521
539
  }
522
540
 
523
541
  upload_finished = dm.get("size") is not None and dm.offset >= dm.get("size")
524
-
525
542
  if upload_finished:
526
543
  rid = dm.get("rid", rid)
527
544
  if rid is None:
@@ -540,13 +557,19 @@ async def _tus_patch(
540
557
  if isinstance(item_payload, str):
541
558
  item_payload = item_payload.encode()
542
559
  creation_payload = pickle.loads(base64.b64decode(item_payload))
560
+
561
+ content_type = dm.get("metadata", {}).get("content_type")
562
+ if content_type is not None and not content_types.valid(content_type):
563
+ return HTTPClientError(
564
+ status_code=415,
565
+ detail=f"Unsupported content type: {content_type}",
566
+ )
567
+
543
568
  try:
544
569
  seqid = await store_file_on_nuclia_db(
545
570
  size=dm.get("size"),
546
- content_type=dm.get("metadata", {}).get("content_type"),
547
- override_resource_title=dm.get("metadata", {}).get(
548
- "implies_resource_creation", False
549
- ),
571
+ content_type=content_type,
572
+ override_resource_title=dm.get("metadata", {}).get("implies_resource_creation", False),
550
573
  filename=dm.get("metadata", {}).get("filename"),
551
574
  password=dm.get("metadata", {}).get("password"),
552
575
  language=dm.get("metadata", {}).get("language"),
@@ -559,26 +582,24 @@ async def _tus_patch(
559
582
  request=request,
560
583
  bucket=storage_manager.storage.get_bucket_name(kbid),
561
584
  item=creation_payload,
585
+ extract_strategy=dm.get("extract_strategy") or None,
562
586
  )
563
587
  except LimitsExceededError as exc:
564
588
  raise HTTPException(status_code=exc.status_code, detail=exc.detail)
565
589
 
566
590
  headers["NDB-Seq"] = f"{seqid}"
567
591
  else:
568
- check_uploaded_chunk_size(read_bytes, storage_manager)
592
+ validate_intermediate_tus_chunk(read_bytes, storage_manager)
569
593
  await dm.save()
570
594
 
571
595
  return Response(headers=headers)
572
596
 
573
597
 
574
- def check_uploaded_chunk_size(read_bytes: int, storage_manager: FileStorageManager):
575
- if (
576
- storage_manager.min_upload_size is not None
577
- and read_bytes < storage_manager.min_upload_size
578
- ):
579
- raise HTTPPreconditionFailed(
580
- detail=f"Intermediate chunks cannot be smaller than {storage_manager.min_upload_size} bytes"
581
- )
598
+ def validate_intermediate_tus_chunk(read_bytes: int, storage_manager: FileStorageManager):
599
+ try:
600
+ storage_manager.validate_intermediate_chunk(read_bytes)
601
+ except ValueError as err:
602
+ raise HTTPPreconditionFailed(detail=str(err))
582
603
 
583
604
 
584
605
  @api.post(
@@ -599,6 +620,7 @@ async def upload_rslug_prefix(
599
620
  x_password: Optional[list[str]] = Header(None), # type: ignore
600
621
  x_language: Optional[list[str]] = Header(None), # type: ignore
601
622
  x_md5: Optional[list[str]] = Header(None), # type: ignore
623
+ x_extract_strategy: Annotated[Optional[str], ExtractStrategyHeader] = None,
602
624
  ) -> ResourceFileUploaded:
603
625
  rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
604
626
  return await _upload(
@@ -610,6 +632,7 @@ async def upload_rslug_prefix(
610
632
  x_password=x_password,
611
633
  x_language=x_language,
612
634
  x_md5=x_md5,
635
+ x_extract_strategy=x_extract_strategy,
613
636
  )
614
637
 
615
638
 
@@ -631,6 +654,7 @@ async def upload_rid_prefix(
631
654
  x_password: Optional[list[str]] = Header(None), # type: ignore
632
655
  x_language: Optional[list[str]] = Header(None), # type: ignore
633
656
  x_md5: Optional[list[str]] = Header(None), # type: ignore
657
+ x_extract_strategy: Annotated[Optional[str], ExtractStrategyHeader] = None,
634
658
  ) -> ResourceFileUploaded:
635
659
  return await _upload(
636
660
  request,
@@ -641,6 +665,7 @@ async def upload_rid_prefix(
641
665
  x_password=x_password,
642
666
  x_language=x_language,
643
667
  x_md5=x_md5,
668
+ x_extract_strategy=x_extract_strategy,
644
669
  )
645
670
 
646
671
 
@@ -660,6 +685,7 @@ async def upload(
660
685
  x_password: Optional[list[str]] = Header(None), # type: ignore
661
686
  x_language: Optional[list[str]] = Header(None), # type: ignore
662
687
  x_md5: Optional[list[str]] = Header(None), # type: ignore
688
+ x_extract_strategy: Annotated[Optional[str], ExtractStrategyHeader] = None,
663
689
  ) -> ResourceFileUploaded:
664
690
  return await _upload(
665
691
  request,
@@ -668,6 +694,7 @@ async def upload(
668
694
  x_password=x_password,
669
695
  x_language=x_language,
670
696
  x_md5=x_md5,
697
+ x_extract_strategy=x_extract_strategy,
671
698
  )
672
699
 
673
700
 
@@ -681,6 +708,7 @@ async def _upload(
681
708
  x_password: Optional[list[str]] = Header(None), # type: ignore
682
709
  x_language: Optional[list[str]] = Header(None), # type: ignore
683
710
  x_md5: Optional[list[str]] = Header(None), # type: ignore
711
+ x_extract_strategy: Optional[str] = None,
684
712
  ) -> ResourceFileUploaded:
685
713
  if path_rid is not None:
686
714
  await validate_rid_exists_or_raise_error(kbid, path_rid)
@@ -688,9 +716,7 @@ async def _upload(
688
716
  await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
689
717
 
690
718
  md5_user = x_md5[0] if x_md5 is not None and len(x_md5) > 0 else None
691
- path, rid, valid_field = await validate_field_upload(
692
- kbid, path_rid, field, md5_user
693
- )
719
+ path, rid, valid_field = await validate_field_upload(kbid, path_rid, field, md5_user)
694
720
  dm = get_dm()
695
721
  storage_manager = get_storage_manager()
696
722
 
@@ -719,8 +745,14 @@ async def _upload(
719
745
  # - content-type set by the user in the upload request header takes precedence.
720
746
  # - if not set, we will try to guess it from the filename and default to a generic binary content type otherwise
721
747
  content_type = request.headers.get("content-type")
722
- if not content_type:
723
- content_type = guess_content_type(filename)
748
+ if content_type is None:
749
+ content_type = content_types.guess(filename) or "application/octet-stream"
750
+
751
+ if not content_types.valid(content_type):
752
+ raise HTTPException(
753
+ status_code=415,
754
+ detail=f"Unsupported content type: {content_type}",
755
+ )
724
756
 
725
757
  metadata = {"content_type": content_type, "filename": filename}
726
758
 
@@ -770,6 +802,7 @@ async def _upload(
770
802
  path=path,
771
803
  request=request,
772
804
  bucket=storage_manager.storage.get_bucket_name(kbid),
805
+ extract_strategy=x_extract_strategy,
773
806
  )
774
807
  except LimitsExceededError as exc:
775
808
  raise HTTPException(status_code=exc.status_code, detail=exc.detail)
@@ -791,13 +824,9 @@ async def validate_field_upload(
791
824
  if rid is None:
792
825
  # we are going to create a new resource and a field
793
826
  if md5 is not None:
794
- exists = await datamanagers.atomic.resources.resource_exists(
795
- kbid=kbid, rid=md5
796
- )
827
+ exists = await datamanagers.atomic.resources.resource_exists(kbid=kbid, rid=md5)
797
828
  if exists:
798
- raise HTTPConflict(
799
- "A resource with the same uploaded file already exists"
800
- )
829
+ raise HTTPConflict("A resource with the same uploaded file already exists")
801
830
  rid = md5
802
831
  else:
803
832
  rid = uuid.uuid4().hex
@@ -823,7 +852,7 @@ async def store_file_on_nuclia_db(
823
852
  path: str,
824
853
  request: Request,
825
854
  bucket: str,
826
- source: Source,
855
+ source: CloudFile.Source.ValueType,
827
856
  rid: str,
828
857
  field: str,
829
858
  content_type: str = "application/octet-stream",
@@ -833,11 +862,10 @@ async def store_file_on_nuclia_db(
833
862
  language: Optional[str] = None,
834
863
  md5: Optional[str] = None,
835
864
  item: Optional[CreateResourcePayload] = None,
865
+ extract_strategy: Optional[str] = None,
836
866
  ) -> Optional[int]:
837
867
  # File is on NucliaDB Storage at path
838
-
839
868
  partitioning = get_partitioning()
840
- transaction = get_transaction_utility()
841
869
  processing = get_processing()
842
870
  storage = await get_storage(service_name=SERVICE_NAME)
843
871
 
@@ -859,14 +887,17 @@ async def store_file_on_nuclia_db(
859
887
 
860
888
  parse_audit(writer.audit, request)
861
889
 
890
+ unique_slug_context_manager = noop_context_manager()
862
891
  if item is not None:
863
892
  if item.slug:
893
+ unique_slug_context_manager = ensure_slug_uniqueness(kbid, item.slug)
864
894
  writer.slug = item.slug
865
895
  toprocess.slug = item.slug
866
896
 
867
897
  toprocess.processing_options = item.processing_options
868
898
 
869
- parse_basic(writer, item, toprocess)
899
+ kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
900
+ parse_basic_creation(writer, item, toprocess, kb_config)
870
901
  if item.origin is not None:
871
902
  parse_origin(writer.origin, item.origin)
872
903
  if item.extra is not None:
@@ -882,62 +913,63 @@ async def store_file_on_nuclia_db(
882
913
  uuid=rid,
883
914
  x_skip_store=False,
884
915
  )
885
-
886
- if override_resource_title and filename is not None:
887
- set_title(writer, toprocess, filename)
888
-
889
- writer.basic.icon = content_type
890
- writer.basic.created.FromDatetime(datetime.now())
891
-
892
- # Update resource with file
893
- file_field = FieldFile()
894
- file_field.added.FromDatetime(datetime.now())
895
- file_field.file.bucket_name = bucket
896
- file_field.file.content_type = content_type
897
- if filename is not None:
898
- file_field.file.filename = filename
899
- file_field.file.uri = path
900
- file_field.file.source = source
901
-
902
- if md5:
903
- file_field.file.md5 = md5
904
- if size:
905
- file_field.file.size = size
906
- if language:
907
- file_field.language = language
908
- if password:
909
- file_field.password = password
910
-
911
- writer.files[field].CopyFrom(file_field)
912
- # Do not store passwords on maindb
913
- writer.files[field].ClearField("password")
914
-
915
- toprocess.filefield[field] = await processing.convert_internal_filefield_to_str(
916
- file_field, storage=storage
917
- )
918
-
919
- writer.source = BrokerMessage.MessageSource.WRITER
920
- writer.basic.metadata.status = Metadata.Status.PENDING
921
- writer.basic.metadata.useful = True
922
- try:
923
- await transaction.commit(writer, partition, wait=True)
924
- except TransactionCommitTimeoutError:
925
- raise HTTPException(
926
- status_code=501,
927
- detail="Inconsistent write. This resource will not be processed and may not be stored.",
916
+ else:
917
+ # Use defaults for everything, but don't forget hidden which depends on KB config
918
+ kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
919
+ if kb_config and kb_config.hidden_resources_hide_on_creation:
920
+ writer.basic.hidden = True
921
+
922
+ async with unique_slug_context_manager:
923
+ if override_resource_title and filename is not None:
924
+ set_title(writer, toprocess, filename)
925
+
926
+ writer.basic.icon = content_type
927
+ writer.basic.created.FromDatetime(datetime.now())
928
+
929
+ # Update resource with file
930
+ file_field = FieldFile()
931
+ file_field.added.FromDatetime(datetime.now())
932
+ file_field.file.bucket_name = bucket
933
+ file_field.file.content_type = content_type
934
+ if filename is not None:
935
+ file_field.file.filename = filename
936
+ file_field.file.uri = path
937
+ file_field.file.source = source
938
+
939
+ if md5:
940
+ file_field.file.md5 = md5
941
+ if size:
942
+ file_field.file.size = size
943
+ if language:
944
+ file_field.language = language
945
+ if password:
946
+ file_field.password = password
947
+ if extract_strategy is not None:
948
+ file_field.extract_strategy = extract_strategy
949
+
950
+ writer.files[field].CopyFrom(file_field)
951
+ # Do not store passwords on maindb
952
+ writer.files[field].ClearField("password")
953
+
954
+ toprocess.filefield[field] = await processing.convert_internal_filefield_to_str(
955
+ file_field, storage=storage
928
956
  )
929
957
 
930
- try:
931
- processing_info = await processing.send_to_process(toprocess, partition)
932
- except LimitsExceededError as exc:
933
- raise HTTPException(status_code=exc.status_code, detail=exc.detail)
934
- except SendToProcessError:
935
- raise HTTPException(
936
- status_code=500,
937
- detail="Error while sending to process. Try calling /reprocess",
938
- )
958
+ writer.source = BrokerMessage.MessageSource.WRITER
959
+ writer.basic.metadata.status = Metadata.Status.PENDING
960
+ writer.basic.metadata.useful = True
961
+ await transaction.commit(writer, partition)
962
+ try:
963
+ processing_info = await processing.send_to_process(toprocess, partition)
964
+ except LimitsExceededError as exc:
965
+ raise HTTPException(status_code=exc.status_code, detail=exc.detail)
966
+ except SendToProcessError:
967
+ raise HTTPException(
968
+ status_code=500,
969
+ detail="Error while sending to process. Try calling /reprocess",
970
+ )
939
971
 
940
- return processing_info.seqid
972
+ return processing_info.seqid
941
973
 
942
974
 
943
975
  def maybe_b64decode(some_string: str) -> str:
@@ -946,9 +978,3 @@ def maybe_b64decode(some_string: str) -> str:
946
978
  except ValueError:
947
979
  # not b64encoded
948
980
  return some_string
949
-
950
-
951
- def guess_content_type(filename: str) -> str:
952
- default = "application/octet-stream"
953
- guessed, _ = mimetypes.guess_type(filename)
954
- return guessed or default
nucliadb/writer/app.py CHANGED
@@ -18,61 +18,38 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- import functools
21
+ import importlib.metadata
22
22
 
23
- import pkg_resources
24
23
  from fastapi import FastAPI
25
24
  from starlette.middleware import Middleware
26
25
  from starlette.middleware.authentication import AuthenticationMiddleware
27
- from starlette.middleware.cors import CORSMiddleware
28
26
  from starlette.requests import ClientDisconnect
29
27
  from starlette.responses import HTMLResponse
30
28
 
31
- from nucliadb.common.context.fastapi import get_app_context, set_app_context
32
29
  from nucliadb.writer import API_PREFIX
33
30
  from nucliadb.writer.api.v1.router import api as api_v1
34
- from nucliadb.writer.lifecycle import finalize, initialize
31
+ from nucliadb.writer.lifecycle import lifespan
35
32
  from nucliadb_telemetry import errors
36
33
  from nucliadb_telemetry.fastapi.utils import (
37
34
  client_disconnect_handler,
38
35
  global_exception_handler,
39
36
  )
40
- from nucliadb_utils import const
41
37
  from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
42
38
  from nucliadb_utils.fastapi.openapi import extend_openapi
43
39
  from nucliadb_utils.fastapi.versioning import VersionedFastAPI
44
- from nucliadb_utils.settings import http_settings, running_settings
45
- from nucliadb_utils.utilities import has_feature
40
+ from nucliadb_utils.settings import running_settings
46
41
 
47
42
  middleware = []
48
43
 
49
- if has_feature(const.Features.CORS_MIDDLEWARE, default=False):
50
- middleware.append(
51
- Middleware(
52
- CORSMiddleware,
53
- allow_origins=http_settings.cors_origins,
54
- allow_methods=["*"],
55
- # Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
56
- # Browsers already showing deprecation notices, so it needs to be specified explicitly
57
- allow_headers=["*", "Authorization"],
58
- )
59
- )
60
-
61
- middleware.extend(
62
- [Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend())]
63
- )
64
-
44
+ middleware.extend([Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend())])
65
45
 
66
- errors.setup_error_handling(pkg_resources.get_distribution("nucliadb").version)
67
46
 
68
- on_startup = [initialize]
69
- on_shutdown = [finalize]
47
+ errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
70
48
 
71
49
  fastapi_settings = dict(
72
50
  debug=running_settings.debug,
73
51
  middleware=middleware,
74
- on_startup=on_startup,
75
- on_shutdown=on_shutdown,
52
+ lifespan=lifespan,
76
53
  exception_handlers={
77
54
  Exception: global_exception_handler,
78
55
  ClientDisconnect: client_disconnect_handler,
@@ -102,18 +79,4 @@ def create_application() -> FastAPI:
102
79
  # Use raw starlette routes to avoid unnecessary overhead
103
80
  application.add_route("/", homepage)
104
81
 
105
- set_app_context(application)
106
- maybe_configure_back_pressure(application)
107
82
  return application
108
-
109
-
110
- def maybe_configure_back_pressure(application: FastAPI):
111
- from nucliadb.writer.back_pressure import start_materializer, stop_materializer
112
- from nucliadb.writer.settings import back_pressure_settings
113
- from nucliadb_utils.settings import is_onprem_nucliadb
114
-
115
- if back_pressure_settings.enabled and not is_onprem_nucliadb():
116
- context = get_app_context(application)
117
- start_materializer_with_context = functools.partial(start_materializer, context)
118
- application.add_event_handler("startup", start_materializer_with_context)
119
- application.add_event_handler("shutdown", stop_materializer)