nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -17,13 +17,20 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from nucliadb.ingest.processing import start_processing_engine
20
+ from contextlib import asynccontextmanager
21
+
22
+ from fastapi import FastAPI
23
+
24
+ from nucliadb.common.context.fastapi import inject_app_context
25
+ from nucliadb.ingest.processing import start_processing_engine, stop_processing_engine
21
26
  from nucliadb.ingest.utils import start_ingest, stop_ingest
22
27
  from nucliadb.writer import SERVICE_NAME
28
+ from nucliadb.writer.back_pressure import start_materializer, stop_materializer
29
+ from nucliadb.writer.settings import back_pressure_settings
23
30
  from nucliadb.writer.tus import finalize as storage_finalize
24
31
  from nucliadb.writer.tus import initialize as storage_initialize
25
- from nucliadb.writer.utilities import get_processing
26
32
  from nucliadb_telemetry.utils import clean_telemetry, setup_telemetry
33
+ from nucliadb_utils.settings import is_onprem_nucliadb
27
34
  from nucliadb_utils.utilities import (
28
35
  finalize_utilities,
29
36
  start_partitioning_utility,
@@ -32,29 +39,28 @@ from nucliadb_utils.utilities import (
32
39
  )
33
40
 
34
41
 
35
- async def initialize():
36
- await setup_telemetry(SERVICE_NAME)
42
+ @asynccontextmanager
43
+ async def lifespan(app: FastAPI):
44
+ back_pressure_enabled = back_pressure_settings.enabled and not is_onprem_nucliadb()
37
45
 
46
+ await setup_telemetry(SERVICE_NAME)
38
47
  await start_ingest(SERVICE_NAME)
39
-
40
48
  await start_processing_engine()
41
-
42
49
  start_partitioning_utility()
43
-
44
50
  await start_transaction_utility(SERVICE_NAME)
45
51
  await storage_initialize()
46
52
 
53
+ # Inject application context into the fastapi app's state
54
+ async with inject_app_context(app) as context:
55
+ if back_pressure_enabled:
56
+ await start_materializer(context)
57
+ yield
47
58
 
48
- async def finalize():
59
+ if back_pressure_enabled:
60
+ await stop_materializer()
49
61
  await stop_transaction_utility()
50
-
51
62
  await stop_ingest()
52
- processing = get_processing()
53
- if processing is not None:
54
- await processing.finalize()
55
-
63
+ await stop_processing_engine()
56
64
  await storage_finalize()
57
-
58
65
  await clean_telemetry(SERVICE_NAME)
59
-
60
66
  await finalize_utilities()
File without changes
@@ -19,9 +19,10 @@
19
19
  #
20
20
  from datetime import datetime
21
21
 
22
- from nucliadb_protos.writer_pb2 import Audit
23
22
  from starlette.requests import Request
24
23
 
24
+ from nucliadb_protos.writer_pb2 import Audit
25
+
25
26
 
26
27
  def parse_audit(audit: Audit, request: Request):
27
28
  audit.user = request.headers.get("X-NUCLIADB-USER", "")
@@ -18,8 +18,31 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  from datetime import datetime
21
+ from typing import Optional
21
22
 
22
23
  from fastapi import HTTPException
24
+
25
+ from nucliadb.common.models_utils import to_proto
26
+ from nucliadb.common.models_utils.from_proto import (
27
+ RelationNodeTypeMap,
28
+ RelationTypeMap,
29
+ )
30
+ from nucliadb.ingest.orm.utils import set_title
31
+ from nucliadb.ingest.processing import PushPayload
32
+ from nucliadb_models.content_types import GENERIC_MIME_TYPE
33
+ from nucliadb_models.file import FileField
34
+ from nucliadb_models.link import LinkField
35
+ from nucliadb_models.metadata import (
36
+ ParagraphAnnotation,
37
+ QuestionAnswerAnnotation,
38
+ )
39
+ from nucliadb_models.text import TEXT_FORMAT_TO_MIMETYPE, PushTextFormat, Text
40
+ from nucliadb_models.writer import (
41
+ ComingResourcePayload,
42
+ CreateResourcePayload,
43
+ UpdateResourcePayload,
44
+ )
45
+ from nucliadb_protos.knowledgebox_pb2 import KnowledgeBoxConfig
23
46
  from nucliadb_protos.resources_pb2 import (
24
47
  Answers,
25
48
  Basic,
@@ -30,38 +53,19 @@ from nucliadb_protos.resources_pb2 import (
30
53
  Metadata,
31
54
  PageSelections,
32
55
  Paragraph,
56
+ TokenSplit,
57
+ UserFieldMetadata,
58
+ VisualSelection,
33
59
  )
34
60
  from nucliadb_protos.resources_pb2 import ParagraphAnnotation as PBParagraphAnnotation
35
61
  from nucliadb_protos.resources_pb2 import (
36
62
  QuestionAnswerAnnotation as PBQuestionAnswerAnnotation,
37
63
  )
38
- from nucliadb_protos.resources_pb2 import TokenSplit, UserFieldMetadata, VisualSelection
39
64
  from nucliadb_protos.utils_pb2 import Relation, RelationNode
40
65
  from nucliadb_protos.writer_pb2 import BrokerMessage
41
66
 
42
- from nucliadb.ingest.orm.utils import set_title
43
- from nucliadb.ingest.processing import PushPayload
44
- from nucliadb_models.common import FIELD_TYPES_MAP_REVERSE
45
- from nucliadb_models.file import FileField
46
- from nucliadb_models.link import LinkField
47
- from nucliadb_models.metadata import (
48
- ParagraphAnnotation,
49
- QuestionAnswerAnnotation,
50
- RelationNodeTypeMap,
51
- RelationTypeMap,
52
- )
53
- from nucliadb_models.text import TEXT_FORMAT_TO_MIMETYPE, PushTextFormat, Text
54
- from nucliadb_models.writer import (
55
- GENERIC_MIME_TYPE,
56
- ComingResourcePayload,
57
- CreateResourcePayload,
58
- UpdateResourcePayload,
59
- )
60
-
61
67
 
62
- def parse_basic_modify(
63
- bm: BrokerMessage, item: ComingResourcePayload, toprocess: PushPayload
64
- ):
68
+ def parse_basic_modify(bm: BrokerMessage, item: ComingResourcePayload, toprocess: PushPayload):
65
69
  bm.basic.modified.FromDatetime(datetime.now())
66
70
  if item.title:
67
71
  set_title(bm, toprocess, item.title)
@@ -83,14 +87,9 @@ def parse_basic_modify(
83
87
  bm.basic.metadata.useful = True
84
88
  bm.basic.metadata.status = Metadata.Status.PENDING
85
89
 
86
- toprocess.genericfield["summary"] = Text(
87
- body=item.summary, format=PushTextFormat.PLAIN
88
- )
90
+ toprocess.genericfield["summary"] = Text(body=item.summary, format=PushTextFormat.PLAIN)
89
91
  if item.thumbnail:
90
92
  bm.basic.thumbnail = item.thumbnail
91
- if item.layout:
92
- bm.basic.layout = item.layout
93
-
94
93
  if item.metadata is not None:
95
94
  bm.basic.metadata.metadata.update(item.metadata.metadata)
96
95
  if item.metadata.language:
@@ -147,9 +146,8 @@ def parse_basic_modify(
147
146
  userfieldmetadata.question_answers.append(qa_annotation_pb)
148
147
 
149
148
  userfieldmetadata.field.field = fieldmetadata.field.field
150
- userfieldmetadata.field.field_type = FIELD_TYPES_MAP_REVERSE[ # type: ignore
151
- fieldmetadata.field.field_type.value
152
- ]
149
+
150
+ userfieldmetadata.field.field_type = to_proto.field_type(fieldmetadata.field.field_type)
153
151
 
154
152
  bm.basic.fieldmetadata.append(userfieldmetadata)
155
153
 
@@ -167,9 +165,7 @@ def parse_basic_modify(
167
165
  ]
168
166
  )
169
167
 
170
- relation_node_resource = RelationNode(
171
- value=bm.uuid, ntype=RelationNode.NodeType.RESOURCE
172
- )
168
+ relation_node_resource = RelationNode(value=bm.uuid, ntype=RelationNode.NodeType.RESOURCE)
173
169
  relations = []
174
170
  for relation in item.usermetadata.relations:
175
171
  if relation.from_ is None:
@@ -205,8 +201,16 @@ def parse_basic_modify(
205
201
  unique_groups = list(set(item.security.access_groups))
206
202
  bm.security.access_groups.extend(unique_groups)
207
203
 
204
+ if item.hidden is not None:
205
+ bm.basic.hidden = item.hidden
206
+
208
207
 
209
- def parse_basic(bm: BrokerMessage, item: CreateResourcePayload, toprocess: PushPayload):
208
+ def parse_basic_creation(
209
+ bm: BrokerMessage,
210
+ item: CreateResourcePayload,
211
+ toprocess: PushPayload,
212
+ kb_config: Optional[KnowledgeBoxConfig],
213
+ ):
210
214
  bm.basic.created.FromDatetime(datetime.now())
211
215
 
212
216
  if item.title is None:
@@ -215,6 +219,10 @@ def parse_basic(bm: BrokerMessage, item: CreateResourcePayload, toprocess: PushP
215
219
 
216
220
  parse_basic_modify(bm, item, toprocess)
217
221
 
222
+ if item.hidden is None:
223
+ if kb_config and kb_config.hidden_resources_hide_on_creation:
224
+ bm.basic.hidden = True
225
+
218
226
 
219
227
  def set_status(basic: Basic, item: CreateResourcePayload):
220
228
  basic.metadata.status = Metadata.Status.PENDING
@@ -227,15 +235,11 @@ def set_status_modify(basic: Basic, item: UpdateResourcePayload):
227
235
  def validate_classifications(paragraph: ParagraphAnnotation):
228
236
  classifications = paragraph.classifications
229
237
  if len(classifications) == 0:
230
- raise HTTPException(
231
- status_code=422, detail="ensure classifications has at least 1 items"
232
- )
238
+ raise HTTPException(status_code=422, detail="ensure classifications has at least 1 items")
233
239
 
234
- unique_classifications = {tuple(cf.dict().values()) for cf in classifications}
240
+ unique_classifications = {tuple(cf.model_dump().values()) for cf in classifications}
235
241
  if len(unique_classifications) != len(classifications):
236
- raise HTTPException(
237
- status_code=422, detail="Paragraph classifications need to be unique"
238
- )
242
+ raise HTTPException(status_code=422, detail="Paragraph classifications need to be unique")
239
243
 
240
244
 
241
245
  def compute_title(item: CreateResourcePayload, rid: str) -> str:
@@ -273,9 +277,7 @@ def build_question_answer_annotation_pb(
273
277
  pb.cancelled_by_user = qa_annotation.cancelled_by_user
274
278
  pb.question_answer.question.text = qa_annotation.question_answer.question.text
275
279
  if qa_annotation.question_answer.question.language is not None:
276
- pb.question_answer.question.language = (
277
- qa_annotation.question_answer.question.language
278
- )
280
+ pb.question_answer.question.language = qa_annotation.question_answer.question.language
279
281
  pb.question_answer.question.ids_paragraphs.extend(
280
282
  qa_annotation.question_answer.question.ids_paragraphs
281
283
  )
@@ -21,23 +21,23 @@ from datetime import datetime
21
21
  from typing import Optional, Union
22
22
 
23
23
  from google.protobuf.json_format import MessageToDict
24
- from nucliadb_protos.writer_pb2 import BrokerMessage
25
24
 
26
25
  import nucliadb_models as models
26
+ from nucliadb.common.models_utils import from_proto, to_proto
27
27
  from nucliadb.ingest.fields.conversation import Conversation
28
28
  from nucliadb.ingest.orm.resource import Resource as ORMResource
29
29
  from nucliadb.ingest.processing import PushPayload
30
30
  from nucliadb.writer import SERVICE_NAME
31
- from nucliadb.writer.layouts import serialize_blocks
32
31
  from nucliadb.writer.utilities import get_processing
33
- from nucliadb_models.common import FIELD_TYPES_MAP, FieldTypeName
32
+ from nucliadb_models.common import FieldTypeName
33
+ from nucliadb_models.content_types import GENERIC_MIME_TYPE
34
34
  from nucliadb_models.conversation import PushConversation
35
35
  from nucliadb_models.writer import (
36
- GENERIC_MIME_TYPE,
37
36
  CreateResourcePayload,
38
37
  UpdateResourcePayload,
39
38
  )
40
39
  from nucliadb_protos import resources_pb2
40
+ from nucliadb_protos.writer_pb2 import BrokerMessage
41
41
  from nucliadb_utils.storages.storage import StorageField
42
42
  from nucliadb_utils.utilities import get_storage
43
43
 
@@ -80,12 +80,11 @@ async def extract_fields(resource: ORMResource, toprocess: PushPayload):
80
80
  storage = await get_storage(service_name=SERVICE_NAME)
81
81
  await resource.get_fields()
82
82
  for (field_type, field_id), field in resource.fields.items():
83
- field_type_name = FIELD_TYPES_MAP[field_type]
83
+ field_type_name = from_proto.field_type_name(field_type)
84
84
 
85
85
  if field_type_name not in {
86
86
  FieldTypeName.TEXT,
87
87
  FieldTypeName.FILE,
88
- FieldTypeName.LAYOUT,
89
88
  FieldTypeName.CONVERSATION,
90
89
  FieldTypeName.LINK,
91
90
  }:
@@ -114,28 +113,7 @@ async def extract_fields(resource: ORMResource, toprocess: PushPayload):
114
113
  parsed_text["format"] = models.PushTextFormat[parsed_text["format"]]
115
114
  toprocess.textfield[field_id] = models.Text(**parsed_text)
116
115
 
117
- if field_type_name is FieldTypeName.LAYOUT:
118
- parsed_layout = MessageToDict(
119
- field_pb,
120
- preserving_proto_field_name=True,
121
- including_default_value_fields=True,
122
- )
123
- parsed_layout["format"] = resources_pb2.FieldLayout.Format.Value(
124
- parsed_layout["format"]
125
- )
126
-
127
- for blockid, block in parsed_layout["body"]["blocks"].items():
128
- cf = field_pb.body.blocks[blockid].file
129
- block["file"] = await processing.convert_internal_cf_to_str(cf, storage)
130
-
131
- parsed_layout["blocks"] = parsed_layout.get("body", {}).get("blocks", {})
132
- del parsed_layout["body"]
133
-
134
- toprocess.layoutfield[field_id] = models.LayoutDiff(**parsed_layout)
135
-
136
- if field_type_name is FieldTypeName.CONVERSATION and isinstance(
137
- field, Conversation
138
- ):
116
+ if field_type_name is FieldTypeName.CONVERSATION and isinstance(field, Conversation):
139
117
  metadata = await field.get_metadata()
140
118
  if metadata.pages == 0:
141
119
  continue
@@ -156,14 +134,13 @@ async def extract_fields(resource: ORMResource, toprocess: PushPayload):
156
134
  await processing.convert_internal_cf_to_str(cf, storage)
157
135
  for cf in message.content.attachments
158
136
  ]
159
- parsed_message["content"]["format"] = (
160
- resources_pb2.MessageContent.Format.Value(
161
- parsed_message["content"]["format"]
162
- )
163
- )
164
- full_conversation.messages.append(
165
- models.PushMessage(**parsed_message)
137
+ if "attachments_fields" in parsed_message["content"]:
138
+ # Not defined on the push payload
139
+ del parsed_message["content"]["attachments_fields"]
140
+ parsed_message["content"]["format"] = resources_pb2.MessageContent.Format.Value(
141
+ parsed_message["content"]["format"]
166
142
  )
143
+ full_conversation.messages.append(models.PushMessage(**parsed_message))
167
144
  toprocess.conversationfield[field_id] = full_conversation
168
145
 
169
146
 
@@ -176,9 +153,7 @@ async def parse_fields(
176
153
  x_skip_store: bool,
177
154
  ):
178
155
  for key, file_field in item.files.items():
179
- await parse_file_field(
180
- key, file_field, writer, toprocess, kbid, uuid, skip_store=x_skip_store
181
- )
156
+ await parse_file_field(key, file_field, writer, toprocess, kbid, uuid, skip_store=x_skip_store)
182
157
 
183
158
  for key, link_field in item.links.items():
184
159
  parse_link_field(key, link_field, writer, toprocess)
@@ -186,19 +161,8 @@ async def parse_fields(
186
161
  for key, text_field in item.texts.items():
187
162
  parse_text_field(key, text_field, writer, toprocess)
188
163
 
189
- for key, layout_field in item.layouts.items():
190
- await parse_layout_field(key, layout_field, writer, toprocess, kbid, uuid)
191
-
192
164
  for key, conversation_field in item.conversations.items():
193
- await parse_conversation_field(
194
- key, conversation_field, writer, toprocess, kbid, uuid
195
- )
196
-
197
- for key, datetime_field in item.datetimes.items():
198
- parse_datetime_field(key, datetime_field, writer, toprocess)
199
-
200
- for key, keywordset_field in item.keywordsets.items():
201
- parse_keywordset_field(key, keywordset_field, writer, toprocess)
165
+ await parse_conversation_field(key, conversation_field, writer, toprocess, kbid, uuid)
202
166
 
203
167
 
204
168
  def parse_text_field(
@@ -208,9 +172,7 @@ def parse_text_field(
208
172
  toprocess: PushPayload,
209
173
  ) -> None:
210
174
  writer.texts[key].body = text_field.body
211
- writer.texts[key].format = resources_pb2.FieldText.Format.Value(
212
- text_field.format.value
213
- )
175
+ writer.texts[key].format = resources_pb2.FieldText.Format.Value(text_field.format.value)
214
176
  etw = resources_pb2.ExtractedTextWrapper()
215
177
  etw.field.field = key
216
178
  etw.field.field_type = resources_pb2.FieldType.TEXT
@@ -338,78 +300,6 @@ def parse_link_field(
338
300
  )
339
301
 
340
302
 
341
- def parse_keywordset_field(
342
- key: str,
343
- keywordset_field: models.FieldKeywordset,
344
- writer: BrokerMessage,
345
- toprocess: PushPayload,
346
- ) -> None:
347
- if keywordset_field.keywords is None:
348
- return
349
-
350
- for keyword in keywordset_field.keywords:
351
- fieldpb = resources_pb2.Keyword()
352
- fieldpb.value = keyword.value
353
- writer.keywordsets[key].keywords.append(fieldpb)
354
-
355
-
356
- def parse_datetime_field(
357
- key: str,
358
- datetime_field: models.FieldDatetime,
359
- writer: BrokerMessage,
360
- toprocess: PushPayload,
361
- ) -> None:
362
- if datetime_field.value is None:
363
- return
364
-
365
- writer.datetimes[key].value.FromDatetime(datetime_field.value)
366
-
367
-
368
- async def parse_layout_field(
369
- key: str,
370
- layout_field: models.InputLayoutField,
371
- writer: BrokerMessage,
372
- toprocess: PushPayload,
373
- kbid: str,
374
- uuid: str,
375
- ) -> None:
376
- storage = await get_storage(service_name=SERVICE_NAME)
377
- processing = get_processing()
378
-
379
- lc: resources_pb2.FieldLayout = await serialize_blocks(
380
- layout_field, kbid, uuid, key, storage
381
- )
382
- writer.layouts[key].CopyFrom(lc)
383
-
384
- toprocess_blocks = {}
385
- for blockid, block in layout_field.body.blocks.items():
386
- sf_conv_field: StorageField = storage.layout_field(
387
- kbid, uuid, field=key, ident=block.ident
388
- )
389
- cf_conv_field = await storage.upload_b64file_to_cloudfile(
390
- sf_conv_field,
391
- block.file.payload.encode(),
392
- block.file.filename,
393
- block.file.content_type,
394
- block.file.md5,
395
- )
396
-
397
- toprocess_blocks[blockid] = models.PushLayoutBlock(
398
- x=block.x,
399
- y=block.y,
400
- cols=block.cols,
401
- rows=block.rows,
402
- type=block.type,
403
- ident=block.ident,
404
- payload=block.payload,
405
- file=await processing.convert_internal_cf_to_str(cf_conv_field, storage),
406
- )
407
-
408
- toprocess.layoutfield[key] = models.LayoutDiff(
409
- format=lc.format, blocks=toprocess_blocks # type: ignore
410
- )
411
-
412
-
413
303
  async def parse_conversation_field(
414
304
  key: str,
415
305
  conversation_field: models.InputConversationField,
@@ -441,8 +331,16 @@ async def parse_conversation_field(
441
331
  )
442
332
 
443
333
  cm.content.text = message.content.text
444
- cm.content.format = resources_pb2.MessageContent.Format.Value(
445
- message.content.format.value
334
+ cm.content.format = resources_pb2.MessageContent.Format.Value(message.content.format.value)
335
+ cm.content.attachments_fields.extend(
336
+ [
337
+ resources_pb2.FieldRef(
338
+ field_type=to_proto.field_type_name(attachment.field_type),
339
+ field_id=attachment.field_id,
340
+ split=attachment.split if attachment.split is not None else "",
341
+ )
342
+ for attachment in message.content.attachments_fields
343
+ ]
446
344
  )
447
345
 
448
346
  for count, file in enumerate(message.content.attachments):
@@ -16,10 +16,9 @@
16
16
  #
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- from nucliadb_protos.resources_pb2 import Origin
20
-
21
19
  from nucliadb_models import Extra, InputOrigin
22
20
  from nucliadb_protos import resources_pb2
21
+ from nucliadb_protos.resources_pb2 import Origin
23
22
 
24
23
 
25
24
  def parse_origin(origin: Origin, origin_payload: InputOrigin):
@@ -48,16 +48,20 @@ class BackPressureSettings(BaseSettings):
48
48
  description="Estimation of the processing rate in messages per second. This is used to calculate the try again in time", # noqa
49
49
  )
50
50
  max_indexing_pending: int = Field(
51
- default=100,
51
+ default=200,
52
52
  description="Max number of messages pending to index in a node queue before rate limiting writes. Set to 0 to disable indexing back pressure checks", # noqa
53
+ alias="back_pressure_max_indexing_pending",
53
54
  )
54
55
  max_ingest_pending: int = Field(
55
- default=1000,
56
+ # Disabled by default
57
+ default=0,
56
58
  description="Max number of messages pending to be ingested by processed consumers before rate limiting writes. Set to 0 to disable ingest back pressure checks", # noqa
59
+ alias="back_pressure_max_ingest_pending",
57
60
  )
58
61
  max_processing_pending: int = Field(
59
62
  default=1000,
60
63
  description="Max number of messages pending to process per Knowledge Box before rate limiting writes. Set to 0 to disable processing back pressure checks", # noqa
64
+ alias="back_pressure_max_processing_pending",
61
65
  )
62
66
  indexing_check_interval: int = Field(
63
67
  default=30,
@@ -23,10 +23,6 @@ from typing import Optional
23
23
  from nucliadb.writer.settings import settings as writer_settings
24
24
  from nucliadb.writer.tus.dm import FileDataManager, RedisFileDataManagerFactory
25
25
  from nucliadb.writer.tus.exceptions import ManagerNotAvailable
26
- from nucliadb.writer.tus.gcs import GCloudBlobStore, GCloudFileStorageManager
27
- from nucliadb.writer.tus.local import LocalBlobStore, LocalFileStorageManager
28
- from nucliadb.writer.tus.pg import PGBlobStore, PGFileStorageManager
29
- from nucliadb.writer.tus.s3 import S3BlobStore, S3FileStorageManager
30
26
  from nucliadb.writer.tus.storage import BlobStore, FileStorageManager
31
27
  from nucliadb_utils.exceptions import ConfigurationError
32
28
  from nucliadb_utils.settings import FileBackendConfig, storage_settings
@@ -48,6 +44,8 @@ REDIS_FILE_DATA_MANAGER_FACTORY: Optional[RedisFileDataManagerFactory] = None
48
44
  async def initialize():
49
45
  global DRIVER
50
46
  if storage_settings.file_backend == FileBackendConfig.GCS:
47
+ from nucliadb.writer.tus.gcs import GCloudBlobStore, GCloudFileStorageManager
48
+
51
49
  storage_backend = GCloudBlobStore()
52
50
 
53
51
  await storage_backend.initialize(
@@ -64,6 +62,8 @@ async def initialize():
64
62
  DRIVER = TusStorageDriver(backend=storage_backend, manager=storage_manager)
65
63
 
66
64
  elif storage_settings.file_backend == FileBackendConfig.S3:
65
+ from nucliadb.writer.tus.s3 import S3BlobStore, S3FileStorageManager
66
+
67
67
  storage_backend = S3BlobStore()
68
68
 
69
69
  await storage_backend.initialize(
@@ -83,6 +83,8 @@ async def initialize():
83
83
  DRIVER = TusStorageDriver(backend=storage_backend, manager=storage_manager)
84
84
 
85
85
  elif storage_settings.file_backend == FileBackendConfig.LOCAL:
86
+ from nucliadb.writer.tus.local import LocalBlobStore, LocalFileStorageManager
87
+
86
88
  storage_backend = LocalBlobStore(storage_settings.local_files)
87
89
 
88
90
  await storage_backend.initialize()
@@ -91,12 +93,18 @@ async def initialize():
91
93
 
92
94
  DRIVER = TusStorageDriver(backend=storage_backend, manager=storage_manager)
93
95
 
94
- elif storage_settings.file_backend == FileBackendConfig.PG:
95
- storage_backend = PGBlobStore(storage_settings.driver_pg_url)
96
+ elif storage_settings.file_backend == FileBackendConfig.AZURE:
97
+ from nucliadb.writer.tus.azure import AzureBlobStore, AzureFileStorageManager
96
98
 
97
- await storage_backend.initialize()
99
+ if storage_settings.azure_account_url is None:
100
+ raise ConfigurationError("AZURE_ACCOUNT_URL env variable not configured")
98
101
 
99
- storage_manager = PGFileStorageManager(storage_backend)
102
+ storage_backend = AzureBlobStore()
103
+ await storage_backend.initialize(
104
+ storage_settings.azure_account_url,
105
+ connection_string=storage_settings.azure_connection_string,
106
+ )
107
+ storage_manager = AzureFileStorageManager(storage_backend)
100
108
 
101
109
  DRIVER = TusStorageDriver(backend=storage_backend, manager=storage_manager)
102
110
 
@@ -117,7 +125,7 @@ async def finalize():
117
125
  REDIS_FILE_DATA_MANAGER_FACTORY = None
118
126
 
119
127
 
120
- def get_dm() -> FileDataManager: # type: ignore
128
+ def get_dm() -> FileDataManager:
121
129
  if writer_settings.dm_enabled:
122
130
  global REDIS_FILE_DATA_MANAGER_FACTORY
123
131
  if REDIS_FILE_DATA_MANAGER_FACTORY is None:
@@ -136,9 +144,3 @@ def get_storage_manager() -> FileStorageManager:
136
144
  if DRIVER is None:
137
145
  raise ManagerNotAvailable()
138
146
  return DRIVER.manager
139
-
140
-
141
- def clear_storage():
142
- global DRIVER
143
-
144
- DRIVER = None