nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -18,7 +18,6 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import base64
21
- import mimetypes
22
21
  import pickle
23
22
  import uuid
24
23
  from datetime import datetime
@@ -30,9 +29,7 @@ from fastapi import HTTPException
30
29
  from fastapi.params import Header
31
30
  from fastapi.requests import Request
32
31
  from fastapi.responses import Response
33
- from fastapi_versioning import version # type: ignore
34
- from nucliadb_protos.resources_pb2 import FieldFile, Metadata
35
- from nucliadb_protos.writer_pb2 import BrokerMessage
32
+ from fastapi_versioning import version
36
33
  from starlette.requests import Request as StarletteRequest
37
34
 
38
35
  from nucliadb.common import datamanagers
@@ -40,13 +37,15 @@ from nucliadb.ingest.orm.utils import set_title
40
37
  from nucliadb.ingest.processing import PushPayload, Source
41
38
  from nucliadb.models.responses import HTTPClientError
42
39
  from nucliadb.writer import SERVICE_NAME
40
+ from nucliadb.writer.api.v1 import transaction
43
41
  from nucliadb.writer.api.v1.resource import (
44
42
  get_rid_from_slug_or_raise_error,
45
43
  validate_rid_exists_or_raise_error,
46
44
  )
45
+ from nucliadb.writer.api.v1.slug import ensure_slug_uniqueness, noop_context_manager
47
46
  from nucliadb.writer.back_pressure import maybe_back_pressure
48
47
  from nucliadb.writer.resource.audit import parse_audit
49
- from nucliadb.writer.resource.basic import parse_basic
48
+ from nucliadb.writer.resource.basic import parse_basic_creation
50
49
  from nucliadb.writer.resource.field import parse_fields
51
50
  from nucliadb.writer.resource.origin import parse_extra, parse_origin
52
51
  from nucliadb.writer.tus import TUSUPLOAD, UPLOAD, get_dm, get_storage_manager
@@ -58,20 +57,21 @@ from nucliadb.writer.tus.exceptions import (
58
57
  InvalidTUSMetadata,
59
58
  ResumableURINotAvailable,
60
59
  )
61
- from nucliadb.writer.tus.storage import FileStorageManager # type: ignore
60
+ from nucliadb.writer.tus.storage import FileStorageManager
62
61
  from nucliadb.writer.tus.utils import parse_tus_metadata
63
62
  from nucliadb.writer.utilities import get_processing
63
+ from nucliadb_models import content_types
64
64
  from nucliadb_models.resource import NucliaDBRoles
65
65
  from nucliadb_models.utils import FieldIdString
66
66
  from nucliadb_models.writer import CreateResourcePayload, ResourceFileUploaded
67
+ from nucliadb_protos.resources_pb2 import CloudFile, FieldFile, Metadata
68
+ from nucliadb_protos.writer_pb2 import BrokerMessage
67
69
  from nucliadb_utils.authentication import requires_one
68
70
  from nucliadb_utils.exceptions import LimitsExceededError, SendToProcessError
69
71
  from nucliadb_utils.storages.storage import KB_RESOURCE_FIELD
70
- from nucliadb_utils.transaction import TransactionCommitTimeoutError
71
72
  from nucliadb_utils.utilities import (
72
73
  get_partitioning,
73
74
  get_storage,
74
- get_transaction_utility,
75
75
  )
76
76
 
77
77
  from .router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
@@ -196,6 +196,13 @@ async def _tus_post(
196
196
  if path_rid is not None:
197
197
  await validate_rid_exists_or_raise_error(kbid, path_rid)
198
198
 
199
+ kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
200
+ if item and item.hidden and not (kb_config and kb_config.hidden_resources_enabled):
201
+ raise HTTPException(
202
+ status_code=422,
203
+ detail="Cannot hide a resource: the KB does not have hidden resources enabled",
204
+ )
205
+
199
206
  await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
200
207
 
201
208
  dm = get_dm()
@@ -221,15 +228,11 @@ async def _tus_post(
221
228
  try:
222
229
  metadata = parse_tus_metadata(request.headers["upload-metadata"])
223
230
  except InvalidTUSMetadata as exc:
224
- raise HTTPBadRequest(
225
- detail=f"Upload-Metadata header contains errors: {str(exc)}"
226
- )
231
+ raise HTTPBadRequest(detail=f"Upload-Metadata header contains errors: {str(exc)}")
227
232
  else:
228
233
  metadata = {}
229
234
 
230
- path, rid, field = await validate_field_upload(
231
- kbid, path_rid, field_id, metadata.get("md5")
232
- )
235
+ path, rid, field = await validate_field_upload(kbid, path_rid, field_id, metadata.get("md5"))
233
236
 
234
237
  if implies_resource_creation:
235
238
  # When uploading a file to a new kb resource, we want to allow multiple
@@ -255,8 +258,15 @@ async def _tus_post(
255
258
  request_content_type = None
256
259
  if item is None:
257
260
  request_content_type = request.headers.get("content-type")
258
- if not request_content_type:
259
- request_content_type = guess_content_type(metadata["filename"])
261
+ if request_content_type is None:
262
+ request_content_type = content_types.guess(metadata["filename"]) or "application/octet-stream"
263
+
264
+ if request_content_type is not None and not content_types.valid(request_content_type):
265
+ raise HTTPException(
266
+ status_code=415,
267
+ detail=f"Unsupported content type: {request_content_type}",
268
+ )
269
+
260
270
  metadata.setdefault("content_type", request_content_type)
261
271
 
262
272
  metadata["implies_resource_creation"] = implies_resource_creation
@@ -286,9 +296,7 @@ async def _tus_post(
286
296
  await dm.save()
287
297
 
288
298
  # Find the URL for upload, with the same parameter as this call
289
- location = api.url_path_for(
290
- "Upload information", upload_id=upload_id, **request.path_params
291
- )
299
+ location = api.url_path_for("Upload information", upload_id=upload_id, **request.path_params)
292
300
  return Response(
293
301
  status_code=201,
294
302
  headers={
@@ -465,7 +473,7 @@ async def _tus_patch(
465
473
  field: Optional[str] = None,
466
474
  ) -> Response:
467
475
  """
468
- Upload all bytes in the requests and append them in the specifyied offset
476
+ Upload all bytes in the requests and append them in the specified offset
469
477
  """
470
478
  if rid is not None:
471
479
  await validate_rid_exists_or_raise_error(kbid, rid)
@@ -494,8 +502,7 @@ async def _tus_patch(
494
502
 
495
503
  if offset != dm.offset:
496
504
  raise HTTPConflict(
497
- detail=f"Current upload offset({offset}) does not match "
498
- f"object offset {dm.offset}"
505
+ detail=f"Current upload offset({offset}) does not match " f"object offset {dm.offset}"
499
506
  )
500
507
 
501
508
  storage_manager = get_storage_manager()
@@ -507,9 +514,7 @@ async def _tus_patch(
507
514
 
508
515
  if to_upload and read_bytes != to_upload: # pragma: no cover
509
516
  # check length matches if provided
510
- raise HTTPPreconditionFailed(
511
- detail="Upload size does not match what was provided"
512
- )
517
+ raise HTTPPreconditionFailed(detail="Upload size does not match what was provided")
513
518
  await dm.update(offset=offset + read_bytes)
514
519
 
515
520
  headers = {
@@ -521,7 +526,6 @@ async def _tus_patch(
521
526
  }
522
527
 
523
528
  upload_finished = dm.get("size") is not None and dm.offset >= dm.get("size")
524
-
525
529
  if upload_finished:
526
530
  rid = dm.get("rid", rid)
527
531
  if rid is None:
@@ -540,13 +544,19 @@ async def _tus_patch(
540
544
  if isinstance(item_payload, str):
541
545
  item_payload = item_payload.encode()
542
546
  creation_payload = pickle.loads(base64.b64decode(item_payload))
547
+
548
+ content_type = dm.get("metadata", {}).get("content_type")
549
+ if content_type is not None and not content_types.valid(content_type):
550
+ return HTTPClientError(
551
+ status_code=415,
552
+ detail=f"Unsupported content type: {content_type}",
553
+ )
554
+
543
555
  try:
544
556
  seqid = await store_file_on_nuclia_db(
545
557
  size=dm.get("size"),
546
- content_type=dm.get("metadata", {}).get("content_type"),
547
- override_resource_title=dm.get("metadata", {}).get(
548
- "implies_resource_creation", False
549
- ),
558
+ content_type=content_type,
559
+ override_resource_title=dm.get("metadata", {}).get("implies_resource_creation", False),
550
560
  filename=dm.get("metadata", {}).get("filename"),
551
561
  password=dm.get("metadata", {}).get("password"),
552
562
  language=dm.get("metadata", {}).get("language"),
@@ -565,20 +575,17 @@ async def _tus_patch(
565
575
 
566
576
  headers["NDB-Seq"] = f"{seqid}"
567
577
  else:
568
- check_uploaded_chunk_size(read_bytes, storage_manager)
578
+ validate_intermediate_tus_chunk(read_bytes, storage_manager)
569
579
  await dm.save()
570
580
 
571
581
  return Response(headers=headers)
572
582
 
573
583
 
574
- def check_uploaded_chunk_size(read_bytes: int, storage_manager: FileStorageManager):
575
- if (
576
- storage_manager.min_upload_size is not None
577
- and read_bytes < storage_manager.min_upload_size
578
- ):
579
- raise HTTPPreconditionFailed(
580
- detail=f"Intermediate chunks cannot be smaller than {storage_manager.min_upload_size} bytes"
581
- )
584
+ def validate_intermediate_tus_chunk(read_bytes: int, storage_manager: FileStorageManager):
585
+ try:
586
+ storage_manager.validate_intermediate_chunk(read_bytes)
587
+ except ValueError as err:
588
+ raise HTTPPreconditionFailed(detail=str(err))
582
589
 
583
590
 
584
591
  @api.post(
@@ -688,9 +695,7 @@ async def _upload(
688
695
  await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
689
696
 
690
697
  md5_user = x_md5[0] if x_md5 is not None and len(x_md5) > 0 else None
691
- path, rid, valid_field = await validate_field_upload(
692
- kbid, path_rid, field, md5_user
693
- )
698
+ path, rid, valid_field = await validate_field_upload(kbid, path_rid, field, md5_user)
694
699
  dm = get_dm()
695
700
  storage_manager = get_storage_manager()
696
701
 
@@ -719,8 +724,14 @@ async def _upload(
719
724
  # - content-type set by the user in the upload request header takes precedence.
720
725
  # - if not set, we will try to guess it from the filename and default to a generic binary content type otherwise
721
726
  content_type = request.headers.get("content-type")
722
- if not content_type:
723
- content_type = guess_content_type(filename)
727
+ if content_type is None:
728
+ content_type = content_types.guess(filename) or "application/octet-stream"
729
+
730
+ if not content_types.valid(content_type):
731
+ raise HTTPException(
732
+ status_code=415,
733
+ detail=f"Unsupported content type: {content_type}",
734
+ )
724
735
 
725
736
  metadata = {"content_type": content_type, "filename": filename}
726
737
 
@@ -791,13 +802,9 @@ async def validate_field_upload(
791
802
  if rid is None:
792
803
  # we are going to create a new resource and a field
793
804
  if md5 is not None:
794
- exists = await datamanagers.atomic.resources.resource_exists(
795
- kbid=kbid, rid=md5
796
- )
805
+ exists = await datamanagers.atomic.resources.resource_exists(kbid=kbid, rid=md5)
797
806
  if exists:
798
- raise HTTPConflict(
799
- "A resource with the same uploaded file already exists"
800
- )
807
+ raise HTTPConflict("A resource with the same uploaded file already exists")
801
808
  rid = md5
802
809
  else:
803
810
  rid = uuid.uuid4().hex
@@ -823,7 +830,7 @@ async def store_file_on_nuclia_db(
823
830
  path: str,
824
831
  request: Request,
825
832
  bucket: str,
826
- source: Source,
833
+ source: CloudFile.Source.ValueType,
827
834
  rid: str,
828
835
  field: str,
829
836
  content_type: str = "application/octet-stream",
@@ -835,9 +842,7 @@ async def store_file_on_nuclia_db(
835
842
  item: Optional[CreateResourcePayload] = None,
836
843
  ) -> Optional[int]:
837
844
  # File is on NucliaDB Storage at path
838
-
839
845
  partitioning = get_partitioning()
840
- transaction = get_transaction_utility()
841
846
  processing = get_processing()
842
847
  storage = await get_storage(service_name=SERVICE_NAME)
843
848
 
@@ -859,14 +864,17 @@ async def store_file_on_nuclia_db(
859
864
 
860
865
  parse_audit(writer.audit, request)
861
866
 
867
+ unique_slug_context_manager = noop_context_manager()
862
868
  if item is not None:
863
869
  if item.slug:
870
+ unique_slug_context_manager = ensure_slug_uniqueness(kbid, item.slug)
864
871
  writer.slug = item.slug
865
872
  toprocess.slug = item.slug
866
873
 
867
874
  toprocess.processing_options = item.processing_options
868
875
 
869
- parse_basic(writer, item, toprocess)
876
+ kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
877
+ parse_basic_creation(writer, item, toprocess, kb_config)
870
878
  if item.origin is not None:
871
879
  parse_origin(writer.origin, item.origin)
872
880
  if item.extra is not None:
@@ -882,62 +890,61 @@ async def store_file_on_nuclia_db(
882
890
  uuid=rid,
883
891
  x_skip_store=False,
884
892
  )
885
-
886
- if override_resource_title and filename is not None:
887
- set_title(writer, toprocess, filename)
888
-
889
- writer.basic.icon = content_type
890
- writer.basic.created.FromDatetime(datetime.now())
891
-
892
- # Update resource with file
893
- file_field = FieldFile()
894
- file_field.added.FromDatetime(datetime.now())
895
- file_field.file.bucket_name = bucket
896
- file_field.file.content_type = content_type
897
- if filename is not None:
898
- file_field.file.filename = filename
899
- file_field.file.uri = path
900
- file_field.file.source = source
901
-
902
- if md5:
903
- file_field.file.md5 = md5
904
- if size:
905
- file_field.file.size = size
906
- if language:
907
- file_field.language = language
908
- if password:
909
- file_field.password = password
910
-
911
- writer.files[field].CopyFrom(file_field)
912
- # Do not store passwords on maindb
913
- writer.files[field].ClearField("password")
914
-
915
- toprocess.filefield[field] = await processing.convert_internal_filefield_to_str(
916
- file_field, storage=storage
917
- )
918
-
919
- writer.source = BrokerMessage.MessageSource.WRITER
920
- writer.basic.metadata.status = Metadata.Status.PENDING
921
- writer.basic.metadata.useful = True
922
- try:
923
- await transaction.commit(writer, partition, wait=True)
924
- except TransactionCommitTimeoutError:
925
- raise HTTPException(
926
- status_code=501,
927
- detail="Inconsistent write. This resource will not be processed and may not be stored.",
893
+ else:
894
+ # Use defaults for everything, but don't forget hidden which depends on KB config
895
+ kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
896
+ if kb_config and kb_config.hidden_resources_hide_on_creation:
897
+ writer.basic.hidden = True
898
+
899
+ async with unique_slug_context_manager:
900
+ if override_resource_title and filename is not None:
901
+ set_title(writer, toprocess, filename)
902
+
903
+ writer.basic.icon = content_type
904
+ writer.basic.created.FromDatetime(datetime.now())
905
+
906
+ # Update resource with file
907
+ file_field = FieldFile()
908
+ file_field.added.FromDatetime(datetime.now())
909
+ file_field.file.bucket_name = bucket
910
+ file_field.file.content_type = content_type
911
+ if filename is not None:
912
+ file_field.file.filename = filename
913
+ file_field.file.uri = path
914
+ file_field.file.source = source
915
+
916
+ if md5:
917
+ file_field.file.md5 = md5
918
+ if size:
919
+ file_field.file.size = size
920
+ if language:
921
+ file_field.language = language
922
+ if password:
923
+ file_field.password = password
924
+
925
+ writer.files[field].CopyFrom(file_field)
926
+ # Do not store passwords on maindb
927
+ writer.files[field].ClearField("password")
928
+
929
+ toprocess.filefield[field] = await processing.convert_internal_filefield_to_str(
930
+ file_field, storage=storage
928
931
  )
929
932
 
930
- try:
931
- processing_info = await processing.send_to_process(toprocess, partition)
932
- except LimitsExceededError as exc:
933
- raise HTTPException(status_code=exc.status_code, detail=exc.detail)
934
- except SendToProcessError:
935
- raise HTTPException(
936
- status_code=500,
937
- detail="Error while sending to process. Try calling /reprocess",
938
- )
933
+ writer.source = BrokerMessage.MessageSource.WRITER
934
+ writer.basic.metadata.status = Metadata.Status.PENDING
935
+ writer.basic.metadata.useful = True
936
+ await transaction.commit(writer, partition)
937
+ try:
938
+ processing_info = await processing.send_to_process(toprocess, partition)
939
+ except LimitsExceededError as exc:
940
+ raise HTTPException(status_code=exc.status_code, detail=exc.detail)
941
+ except SendToProcessError:
942
+ raise HTTPException(
943
+ status_code=500,
944
+ detail="Error while sending to process. Try calling /reprocess",
945
+ )
939
946
 
940
- return processing_info.seqid
947
+ return processing_info.seqid
941
948
 
942
949
 
943
950
  def maybe_b64decode(some_string: str) -> str:
@@ -946,9 +953,3 @@ def maybe_b64decode(some_string: str) -> str:
946
953
  except ValueError:
947
954
  # not b64encoded
948
955
  return some_string
949
-
950
-
951
- def guess_content_type(filename: str) -> str:
952
- default = "application/octet-stream"
953
- guessed, _ = mimetypes.guess_type(filename)
954
- return guessed or default
nucliadb/writer/app.py CHANGED
@@ -18,61 +18,38 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- import functools
21
+ import importlib.metadata
22
22
 
23
- import pkg_resources
24
23
  from fastapi import FastAPI
25
24
  from starlette.middleware import Middleware
26
25
  from starlette.middleware.authentication import AuthenticationMiddleware
27
- from starlette.middleware.cors import CORSMiddleware
28
26
  from starlette.requests import ClientDisconnect
29
27
  from starlette.responses import HTMLResponse
30
28
 
31
- from nucliadb.common.context.fastapi import get_app_context, set_app_context
32
29
  from nucliadb.writer import API_PREFIX
33
30
  from nucliadb.writer.api.v1.router import api as api_v1
34
- from nucliadb.writer.lifecycle import finalize, initialize
31
+ from nucliadb.writer.lifecycle import lifespan
35
32
  from nucliadb_telemetry import errors
36
33
  from nucliadb_telemetry.fastapi.utils import (
37
34
  client_disconnect_handler,
38
35
  global_exception_handler,
39
36
  )
40
- from nucliadb_utils import const
41
37
  from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
42
38
  from nucliadb_utils.fastapi.openapi import extend_openapi
43
39
  from nucliadb_utils.fastapi.versioning import VersionedFastAPI
44
- from nucliadb_utils.settings import http_settings, running_settings
45
- from nucliadb_utils.utilities import has_feature
40
+ from nucliadb_utils.settings import running_settings
46
41
 
47
42
  middleware = []
48
43
 
49
- if has_feature(const.Features.CORS_MIDDLEWARE, default=False):
50
- middleware.append(
51
- Middleware(
52
- CORSMiddleware,
53
- allow_origins=http_settings.cors_origins,
54
- allow_methods=["*"],
55
- # Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
56
- # Browsers already showing deprecation notices, so it needs to be specified explicitly
57
- allow_headers=["*", "Authorization"],
58
- )
59
- )
60
-
61
- middleware.extend(
62
- [Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend())]
63
- )
64
-
44
+ middleware.extend([Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend())])
65
45
 
66
- errors.setup_error_handling(pkg_resources.get_distribution("nucliadb").version)
67
46
 
68
- on_startup = [initialize]
69
- on_shutdown = [finalize]
47
+ errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
70
48
 
71
49
  fastapi_settings = dict(
72
50
  debug=running_settings.debug,
73
51
  middleware=middleware,
74
- on_startup=on_startup,
75
- on_shutdown=on_shutdown,
52
+ lifespan=lifespan,
76
53
  exception_handlers={
77
54
  Exception: global_exception_handler,
78
55
  ClientDisconnect: client_disconnect_handler,
@@ -102,18 +79,4 @@ def create_application() -> FastAPI:
102
79
  # Use raw starlette routes to avoid unnecessary overhead
103
80
  application.add_route("/", homepage)
104
81
 
105
- set_app_context(application)
106
- maybe_configure_back_pressure(application)
107
82
  return application
108
-
109
-
110
- def maybe_configure_back_pressure(application: FastAPI):
111
- from nucliadb.writer.back_pressure import start_materializer, stop_materializer
112
- from nucliadb.writer.settings import back_pressure_settings
113
- from nucliadb_utils.settings import is_onprem_nucliadb
114
-
115
- if back_pressure_settings.enabled and not is_onprem_nucliadb():
116
- context = get_app_context(application)
117
- start_materializer_with_context = functools.partial(start_materializer, context)
118
- application.add_event_handler("startup", start_materializer_with_context)
119
- application.add_event_handler("shutdown", stop_materializer)
@@ -28,7 +28,6 @@ from typing import Optional
28
28
  from async_lru import alru_cache
29
29
  from cachetools import TTLCache
30
30
  from fastapi import HTTPException, Request
31
- from nucliadb_protos.writer_pb2 import ShardObject
32
31
 
33
32
  from nucliadb.common import datamanagers
34
33
  from nucliadb.common.cluster.manager import get_index_nodes
@@ -37,6 +36,7 @@ from nucliadb.common.context.fastapi import get_app_context
37
36
  from nucliadb.common.http_clients.processing import ProcessingHTTPClient
38
37
  from nucliadb.writer import logger
39
38
  from nucliadb.writer.settings import back_pressure_settings as settings
39
+ from nucliadb_protos.writer_pb2 import ShardObject
40
40
  from nucliadb_telemetry import metrics
41
41
  from nucliadb_utils import const
42
42
  from nucliadb_utils.nats import NatsConnectionManager
@@ -112,9 +112,7 @@ def cached_back_pressure(kbid: str, resource_uuid: Optional[str] = None):
112
112
  if data is not None:
113
113
  try_after = data.try_after
114
114
  back_pressure_type = data.type
115
- RATE_LIMITED_REQUESTS_COUNTER.inc(
116
- {"type": back_pressure_type, "cached": "true"}
117
- )
115
+ RATE_LIMITED_REQUESTS_COUNTER.inc({"type": back_pressure_type, "cached": "true"})
118
116
  logger.info(
119
117
  "Back pressure applied from cache",
120
118
  extra={
@@ -137,9 +135,7 @@ def cached_back_pressure(kbid: str, resource_uuid: Optional[str] = None):
137
135
  except BackPressureException as exc:
138
136
  try_after = exc.data.try_after
139
137
  back_pressure_type = exc.data.type
140
- RATE_LIMITED_REQUESTS_COUNTER.inc(
141
- {"type": back_pressure_type, "cached": "false"}
142
- )
138
+ RATE_LIMITED_REQUESTS_COUNTER.inc({"type": back_pressure_type, "cached": "false"})
143
139
  _cache.set(cache_key, exc.data)
144
140
  raise HTTPException(
145
141
  status_code=429,
@@ -248,14 +244,10 @@ class Materializer:
248
244
  for node in get_index_nodes():
249
245
  try:
250
246
  with back_pressure_observer({"type": "get_indexing_pending"}):
251
- self.indexing_pending[node.id] = (
252
- await get_nats_consumer_pending_messages(
253
- self.nats_manager,
254
- stream=const.Streams.INDEX.name,
255
- consumer=const.Streams.INDEX.group.format(
256
- node=node.id
257
- ),
258
- )
247
+ self.indexing_pending[node.id] = await get_nats_consumer_pending_messages(
248
+ self.nats_manager,
249
+ stream=const.Streams.INDEX.name,
250
+ consumer=const.Streams.INDEX.group.format(node=node.id),
259
251
  )
260
252
  except Exception:
261
253
  logger.exception(
@@ -336,9 +328,7 @@ def get_materializer() -> Materializer:
336
328
  return MATERIALIZER
337
329
 
338
330
 
339
- async def maybe_back_pressure(
340
- request: Request, kbid: str, resource_uuid: Optional[str] = None
341
- ) -> None:
331
+ async def maybe_back_pressure(request: Request, kbid: str, resource_uuid: Optional[str] = None) -> None:
342
332
  """
343
333
  This function does system checks to see if we need to put back pressure on writes.
344
334
  In that case, a HTTP 429 will be raised with the estimated time to try again.
@@ -348,9 +338,7 @@ async def maybe_back_pressure(
348
338
  await back_pressure_checks(request, kbid, resource_uuid)
349
339
 
350
340
 
351
- async def back_pressure_checks(
352
- request: Request, kbid: str, resource_uuid: Optional[str] = None
353
- ):
341
+ async def back_pressure_checks(request: Request, kbid: str, resource_uuid: Optional[str] = None):
354
342
  """
355
343
  Will raise a 429 if back pressure is needed:
356
344
  - If the processing engine is behind.
@@ -361,9 +349,7 @@ async def back_pressure_checks(
361
349
  materializer = get_materializer()
362
350
  with cached_back_pressure(kbid, resource_uuid):
363
351
  check_ingest_behind(materializer.get_ingest_pending())
364
- await check_indexing_behind(
365
- context, kbid, resource_uuid, materializer.get_indexing_pending()
366
- )
352
+ await check_indexing_behind(context, kbid, resource_uuid, materializer.get_indexing_pending())
367
353
  await check_processing_behind(materializer, kbid)
368
354
 
369
355
 
@@ -418,9 +404,7 @@ async def check_indexing_behind(
418
404
 
419
405
  # Get nodes that are involved in the indexing of the request
420
406
  if resource_uuid is not None:
421
- nodes_to_check = await get_nodes_for_resource_shard(
422
- context, kbid, resource_uuid
423
- )
407
+ nodes_to_check = await get_nodes_for_resource_shard(context, kbid, resource_uuid)
424
408
  else:
425
409
  nodes_to_check = await get_nodes_for_kb_active_shards(context, kbid)
426
410
 
@@ -488,9 +472,7 @@ def estimate_try_after(rate: float, pending: int, max_wait: int) -> datetime:
488
472
 
489
473
 
490
474
  @alru_cache(maxsize=1024, ttl=60 * 15)
491
- async def get_nodes_for_kb_active_shards(
492
- context: ApplicationContext, kbid: str
493
- ) -> list[str]:
475
+ async def get_nodes_for_kb_active_shards(context: ApplicationContext, kbid: str) -> list[str]:
494
476
  with back_pressure_observer({"type": "get_kb_active_shard"}):
495
477
  active_shard = await get_kb_active_shard(context, kbid)
496
478
  if active_shard is None:
@@ -521,20 +503,16 @@ async def get_nats_consumer_pending_messages(
521
503
  return consumer_info.num_pending
522
504
 
523
505
 
524
- async def get_kb_active_shard(
525
- context: ApplicationContext, kbid: str
526
- ) -> Optional[ShardObject]:
527
- async with context.kv_driver.transaction() as txn:
506
+ async def get_kb_active_shard(context: ApplicationContext, kbid: str) -> Optional[ShardObject]:
507
+ async with context.kv_driver.transaction(read_only=True) as txn:
528
508
  return await context.shard_manager.get_current_active_shard(txn, kbid)
529
509
 
530
510
 
531
511
  async def get_resource_shard(
532
512
  context: ApplicationContext, kbid: str, resource_uuid: str
533
513
  ) -> Optional[ShardObject]:
534
- async with datamanagers.with_transaction(read_only=True) as txn:
535
- shard_id = await datamanagers.resources.get_resource_shard_id(
536
- txn, kbid=kbid, rid=resource_uuid
537
- )
514
+ async with datamanagers.with_ro_transaction() as txn:
515
+ shard_id = await datamanagers.resources.get_resource_shard_id(txn, kbid=kbid, rid=resource_uuid)
538
516
  if shard_id is None:
539
517
  # Resource does not exist
540
518
  logger.debug(
@@ -19,10 +19,6 @@
19
19
  #
20
20
 
21
21
 
22
- class ConflictError(Exception):
23
- pass
24
-
25
-
26
22
  class ResourceNotFound(Exception):
27
23
  pass
28
24