nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/writer/app.py CHANGED
@@ -18,74 +18,38 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- import functools
21
+ import importlib.metadata
22
22
 
23
- import pkg_resources
24
23
  from fastapi import FastAPI
25
- from fastapi.responses import JSONResponse
26
24
  from starlette.middleware import Middleware
27
25
  from starlette.middleware.authentication import AuthenticationMiddleware
28
- from starlette.middleware.cors import CORSMiddleware
29
- from starlette.requests import ClientDisconnect, Request
26
+ from starlette.requests import ClientDisconnect
30
27
  from starlette.responses import HTMLResponse
31
28
 
32
- from nucliadb.common.context.fastapi import get_app_context, set_app_context
33
29
  from nucliadb.writer import API_PREFIX
34
30
  from nucliadb.writer.api.v1.router import api as api_v1
35
- from nucliadb.writer.lifecycle import finalize, initialize
31
+ from nucliadb.writer.lifecycle import lifespan
36
32
  from nucliadb_telemetry import errors
37
- from nucliadb_utils import const
33
+ from nucliadb_telemetry.fastapi.utils import (
34
+ client_disconnect_handler,
35
+ global_exception_handler,
36
+ )
38
37
  from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
39
38
  from nucliadb_utils.fastapi.openapi import extend_openapi
40
39
  from nucliadb_utils.fastapi.versioning import VersionedFastAPI
41
- from nucliadb_utils.settings import http_settings, running_settings
42
- from nucliadb_utils.utilities import has_feature
40
+ from nucliadb_utils.settings import running_settings
43
41
 
44
42
  middleware = []
45
43
 
46
- if has_feature(const.Features.CORS_MIDDLEWARE, default=False):
47
- middleware.append(
48
- Middleware(
49
- CORSMiddleware,
50
- allow_origins=http_settings.cors_origins,
51
- allow_methods=["*"],
52
- # Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
53
- # Browsers already showing deprecation notices, so it needs to be specified explicitly
54
- allow_headers=["*", "Authorization"],
55
- )
56
- )
57
-
58
- middleware.extend(
59
- [Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend())]
60
- )
61
-
62
-
63
- errors.setup_error_handling(pkg_resources.get_distribution("nucliadb").version)
44
+ middleware.extend([Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend())])
64
45
 
65
- on_startup = [initialize]
66
- on_shutdown = [finalize]
67
-
68
-
69
- async def global_exception_handler(request: Request, exc: Exception):
70
- errors.capture_exception(exc)
71
- return JSONResponse(
72
- status_code=500,
73
- content={"detail": "Something went wrong, please contact your administrator"},
74
- )
75
-
76
-
77
- async def client_disconnect_handler(request: Request, exc: ClientDisconnect):
78
- return JSONResponse(
79
- status_code=200,
80
- content={"detail": "Client disconnected while an operation was in course"},
81
- )
82
46
 
47
+ errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
83
48
 
84
49
  fastapi_settings = dict(
85
50
  debug=running_settings.debug,
86
51
  middleware=middleware,
87
- on_startup=on_startup,
88
- on_shutdown=on_shutdown,
52
+ lifespan=lifespan,
89
53
  exception_handlers={
90
54
  Exception: global_exception_handler,
91
55
  ClientDisconnect: client_disconnect_handler,
@@ -115,18 +79,4 @@ def create_application() -> FastAPI:
115
79
  # Use raw starlette routes to avoid unnecessary overhead
116
80
  application.add_route("/", homepage)
117
81
 
118
- set_app_context(application)
119
- maybe_configure_back_pressure(application)
120
82
  return application
121
-
122
-
123
- def maybe_configure_back_pressure(application: FastAPI):
124
- from nucliadb.writer.back_pressure import start_materializer, stop_materializer
125
- from nucliadb.writer.settings import back_pressure_settings
126
- from nucliadb_utils.settings import is_onprem_nucliadb
127
-
128
- if back_pressure_settings.enabled and not is_onprem_nucliadb():
129
- context = get_app_context(application)
130
- start_materializer_with_context = functools.partial(start_materializer, context)
131
- application.add_event_handler("startup", start_materializer_with_context)
132
- application.add_event_handler("shutdown", stop_materializer)
@@ -28,7 +28,6 @@ from typing import Optional
28
28
  from async_lru import alru_cache
29
29
  from cachetools import TTLCache
30
30
  from fastapi import HTTPException, Request
31
- from nucliadb_protos.writer_pb2 import ShardObject
32
31
 
33
32
  from nucliadb.common import datamanagers
34
33
  from nucliadb.common.cluster.manager import get_index_nodes
@@ -37,11 +36,11 @@ from nucliadb.common.context.fastapi import get_app_context
37
36
  from nucliadb.common.http_clients.processing import ProcessingHTTPClient
38
37
  from nucliadb.writer import logger
39
38
  from nucliadb.writer.settings import back_pressure_settings as settings
39
+ from nucliadb_protos.writer_pb2 import ShardObject
40
40
  from nucliadb_telemetry import metrics
41
41
  from nucliadb_utils import const
42
42
  from nucliadb_utils.nats import NatsConnectionManager
43
43
  from nucliadb_utils.settings import is_onprem_nucliadb
44
- from nucliadb_utils.utilities import has_feature
45
44
 
46
45
  __all__ = ["maybe_back_pressure"]
47
46
 
@@ -49,7 +48,7 @@ __all__ = ["maybe_back_pressure"]
49
48
  back_pressure_observer = metrics.Observer("nucliadb_back_pressure", labels={"type": ""})
50
49
 
51
50
 
52
- rate_limited_requests_counter = metrics.Counter(
51
+ RATE_LIMITED_REQUESTS_COUNTER = metrics.Counter(
53
52
  "nucliadb_rate_limited_requests", labels={"type": "", "cached": ""}
54
53
  )
55
54
 
@@ -113,8 +112,15 @@ def cached_back_pressure(kbid: str, resource_uuid: Optional[str] = None):
113
112
  if data is not None:
114
113
  try_after = data.try_after
115
114
  back_pressure_type = data.type
116
- rate_limited_requests_counter.inc(
117
- {"type": back_pressure_type, "cached": "true"}
115
+ RATE_LIMITED_REQUESTS_COUNTER.inc({"type": back_pressure_type, "cached": "true"})
116
+ logger.info(
117
+ "Back pressure applied from cache",
118
+ extra={
119
+ "type": back_pressure_type,
120
+ "try_after": try_after,
121
+ "kbid": kbid,
122
+ "resource_uuid": resource_uuid,
123
+ },
118
124
  )
119
125
  raise HTTPException(
120
126
  status_code=429,
@@ -129,9 +135,7 @@ def cached_back_pressure(kbid: str, resource_uuid: Optional[str] = None):
129
135
  except BackPressureException as exc:
130
136
  try_after = exc.data.try_after
131
137
  back_pressure_type = exc.data.type
132
- rate_limited_requests_counter.inc(
133
- {"type": back_pressure_type, "cached": "false"}
134
- )
138
+ RATE_LIMITED_REQUESTS_COUNTER.inc({"type": back_pressure_type, "cached": "false"})
135
139
  _cache.set(cache_key, exc.data)
136
140
  raise HTTPException(
137
141
  status_code=429,
@@ -216,6 +220,11 @@ class Materializer:
216
220
  )
217
221
  return 0
218
222
 
223
+ if pending > 0:
224
+ logger.info(
225
+ f"Processing returned {pending} pending messages for KB",
226
+ extra={"kbid": kbid},
227
+ )
219
228
  self.processing_pending_cache[kbid] = pending
220
229
  return pending
221
230
 
@@ -235,9 +244,7 @@ class Materializer:
235
244
  for node in get_index_nodes():
236
245
  try:
237
246
  with back_pressure_observer({"type": "get_indexing_pending"}):
238
- self.indexing_pending[
239
- node.id
240
- ] = await get_nats_consumer_pending_messages(
247
+ self.indexing_pending[node.id] = await get_nats_consumer_pending_messages(
241
248
  self.nats_manager,
242
249
  stream=const.Streams.INDEX.name,
243
250
  consumer=const.Streams.INDEX.group.format(node=node.id),
@@ -321,22 +328,17 @@ def get_materializer() -> Materializer:
321
328
  return MATERIALIZER
322
329
 
323
330
 
324
- async def maybe_back_pressure(
325
- request: Request, kbid: str, resource_uuid: Optional[str] = None
326
- ) -> None:
331
+ async def maybe_back_pressure(request: Request, kbid: str, resource_uuid: Optional[str] = None) -> None:
327
332
  """
328
333
  This function does system checks to see if we need to put back pressure on writes.
329
334
  In that case, a HTTP 429 will be raised with the estimated time to try again.
330
335
  """
331
- if has_feature(const.Features.BACK_PRESSURE, context={"kbid": kbid}):
332
- if not is_back_pressure_enabled() or is_onprem_nucliadb():
333
- return
334
- await back_pressure_checks(request, kbid, resource_uuid)
336
+ if not is_back_pressure_enabled() or is_onprem_nucliadb():
337
+ return
338
+ await back_pressure_checks(request, kbid, resource_uuid)
335
339
 
336
340
 
337
- async def back_pressure_checks(
338
- request: Request, kbid: str, resource_uuid: Optional[str] = None
339
- ):
341
+ async def back_pressure_checks(request: Request, kbid: str, resource_uuid: Optional[str] = None):
340
342
  """
341
343
  Will raise a 429 if back pressure is needed:
342
344
  - If the processing engine is behind.
@@ -347,9 +349,7 @@ async def back_pressure_checks(
347
349
  materializer = get_materializer()
348
350
  with cached_back_pressure(kbid, resource_uuid):
349
351
  check_ingest_behind(materializer.get_ingest_pending())
350
- await check_indexing_behind(
351
- context, kbid, resource_uuid, materializer.get_indexing_pending()
352
- )
352
+ await check_indexing_behind(context, kbid, resource_uuid, materializer.get_indexing_pending())
353
353
  await check_processing_behind(materializer, kbid)
354
354
 
355
355
 
@@ -366,9 +366,19 @@ async def check_processing_behind(materializer: Materializer, kbid: str):
366
366
  kb_pending = await materializer.get_processing_pending(kbid)
367
367
  if kb_pending > max_pending:
368
368
  try_after = estimate_try_after(
369
- rate=settings.processing_rate, pending=kb_pending
369
+ rate=settings.processing_rate,
370
+ pending=kb_pending,
371
+ max_wait=settings.max_wait_time,
370
372
  )
371
373
  data = BackPressureData(type="processing", try_after=try_after)
374
+ logger.info(
375
+ "Processing back pressure applied",
376
+ extra={
377
+ "kbid": kbid,
378
+ "try_after": try_after,
379
+ "pending": kb_pending,
380
+ },
381
+ )
372
382
  raise BackPressureException(data)
373
383
 
374
384
 
@@ -394,9 +404,7 @@ async def check_indexing_behind(
394
404
 
395
405
  # Get nodes that are involved in the indexing of the request
396
406
  if resource_uuid is not None:
397
- nodes_to_check = await get_nodes_for_resource_shard(
398
- context, kbid, resource_uuid
399
- )
407
+ nodes_to_check = await get_nodes_for_resource_shard(context, kbid, resource_uuid)
400
408
  else:
401
409
  nodes_to_check = await get_nodes_for_kb_active_shards(context, kbid)
402
410
 
@@ -418,9 +426,20 @@ async def check_indexing_behind(
418
426
 
419
427
  if highest_pending > max_pending:
420
428
  try_after = estimate_try_after(
421
- rate=settings.indexing_rate, pending=highest_pending
429
+ rate=settings.indexing_rate,
430
+ pending=highest_pending,
431
+ max_wait=settings.max_wait_time,
422
432
  )
423
433
  data = BackPressureData(type="indexing", try_after=try_after)
434
+ logger.info(
435
+ "Indexing back pressure applied",
436
+ extra={
437
+ "kbid": kbid,
438
+ "resource_uuid": resource_uuid,
439
+ "try_after": try_after,
440
+ "pending": highest_pending,
441
+ },
442
+ )
424
443
  raise BackPressureException(data)
425
444
 
426
445
 
@@ -432,24 +451,28 @@ def check_ingest_behind(ingest_pending: int):
432
451
 
433
452
  if ingest_pending > max_pending:
434
453
  try_after = estimate_try_after(
435
- rate=settings.ingest_rate, pending=ingest_pending
454
+ rate=settings.ingest_rate,
455
+ pending=ingest_pending,
456
+ max_wait=settings.max_wait_time,
436
457
  )
437
458
  data = BackPressureData(type="ingest", try_after=try_after)
459
+ logger.info(
460
+ "Ingest back pressure applied",
461
+ extra={"try_after": try_after, "pending": ingest_pending},
462
+ )
438
463
  raise BackPressureException(data)
439
464
 
440
465
 
441
- def estimate_try_after(rate: float, pending: int) -> datetime:
466
+ def estimate_try_after(rate: float, pending: int, max_wait: int) -> datetime:
442
467
  """
443
468
  This function estimates the time to try again based on the rate and the number of pending messages.
444
469
  """
445
- delta_seconds = pending / rate
470
+ delta_seconds = min(pending / rate, max_wait)
446
471
  return datetime.utcnow() + timedelta(seconds=delta_seconds)
447
472
 
448
473
 
449
474
  @alru_cache(maxsize=1024, ttl=60 * 15)
450
- async def get_nodes_for_kb_active_shards(
451
- context: ApplicationContext, kbid: str
452
- ) -> list[str]:
475
+ async def get_nodes_for_kb_active_shards(context: ApplicationContext, kbid: str) -> list[str]:
453
476
  with back_pressure_observer({"type": "get_kb_active_shard"}):
454
477
  active_shard = await get_kb_active_shard(context, kbid)
455
478
  if active_shard is None:
@@ -480,20 +503,16 @@ async def get_nats_consumer_pending_messages(
480
503
  return consumer_info.num_pending
481
504
 
482
505
 
483
- async def get_kb_active_shard(
484
- context: ApplicationContext, kbid: str
485
- ) -> Optional[ShardObject]:
486
- async with context.kv_driver.transaction() as txn:
506
+ async def get_kb_active_shard(context: ApplicationContext, kbid: str) -> Optional[ShardObject]:
507
+ async with context.kv_driver.transaction(read_only=True) as txn:
487
508
  return await context.shard_manager.get_current_active_shard(txn, kbid)
488
509
 
489
510
 
490
511
  async def get_resource_shard(
491
512
  context: ApplicationContext, kbid: str, resource_uuid: str
492
513
  ) -> Optional[ShardObject]:
493
- async with datamanagers.with_transaction(read_only=True) as txn:
494
- shard_id = await datamanagers.resources.get_resource_shard_id(
495
- txn, kbid=kbid, rid=resource_uuid
496
- )
514
+ async with datamanagers.with_ro_transaction() as txn:
515
+ shard_id = await datamanagers.resources.get_resource_shard_id(txn, kbid=kbid, rid=resource_uuid)
497
516
  if shard_id is None:
498
517
  # Resource does not exist
499
518
  logger.debug(
@@ -19,10 +19,6 @@
19
19
  #
20
20
 
21
21
 
22
- class ConflictError(Exception):
23
- pass
24
-
25
-
26
22
  class ResourceNotFound(Exception):
27
23
  pass
28
24
 
@@ -17,13 +17,20 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from nucliadb.ingest.processing import start_processing_engine
20
+ from contextlib import asynccontextmanager
21
+
22
+ from fastapi import FastAPI
23
+
24
+ from nucliadb.common.context.fastapi import inject_app_context
25
+ from nucliadb.ingest.processing import start_processing_engine, stop_processing_engine
21
26
  from nucliadb.ingest.utils import start_ingest, stop_ingest
22
27
  from nucliadb.writer import SERVICE_NAME
28
+ from nucliadb.writer.back_pressure import start_materializer, stop_materializer
29
+ from nucliadb.writer.settings import back_pressure_settings
23
30
  from nucliadb.writer.tus import finalize as storage_finalize
24
31
  from nucliadb.writer.tus import initialize as storage_initialize
25
- from nucliadb.writer.utilities import get_processing
26
32
  from nucliadb_telemetry.utils import clean_telemetry, setup_telemetry
33
+ from nucliadb_utils.settings import is_onprem_nucliadb
27
34
  from nucliadb_utils.utilities import (
28
35
  finalize_utilities,
29
36
  start_partitioning_utility,
@@ -32,29 +39,28 @@ from nucliadb_utils.utilities import (
32
39
  )
33
40
 
34
41
 
35
- async def initialize():
36
- await setup_telemetry(SERVICE_NAME)
42
+ @asynccontextmanager
43
+ async def lifespan(app: FastAPI):
44
+ back_pressure_enabled = back_pressure_settings.enabled and not is_onprem_nucliadb()
37
45
 
46
+ await setup_telemetry(SERVICE_NAME)
38
47
  await start_ingest(SERVICE_NAME)
39
-
40
48
  await start_processing_engine()
41
-
42
49
  start_partitioning_utility()
43
-
44
50
  await start_transaction_utility(SERVICE_NAME)
45
51
  await storage_initialize()
46
52
 
53
+ # Inject application context into the fastapi app's state
54
+ async with inject_app_context(app) as context:
55
+ if back_pressure_enabled:
56
+ await start_materializer(context)
57
+ yield
47
58
 
48
- async def finalize():
59
+ if back_pressure_enabled:
60
+ await stop_materializer()
49
61
  await stop_transaction_utility()
50
-
51
62
  await stop_ingest()
52
- processing = get_processing()
53
- if processing is not None:
54
- await processing.finalize()
55
-
63
+ await stop_processing_engine()
56
64
  await storage_finalize()
57
-
58
65
  await clean_telemetry(SERVICE_NAME)
59
-
60
66
  await finalize_utilities()
File without changes
@@ -19,9 +19,10 @@
19
19
  #
20
20
  from datetime import datetime
21
21
 
22
- from nucliadb_protos.writer_pb2 import Audit
23
22
  from starlette.requests import Request
24
23
 
24
+ from nucliadb_protos.writer_pb2 import Audit
25
+
25
26
 
26
27
  def parse_audit(audit: Audit, request: Request):
27
28
  audit.user = request.headers.get("X-NUCLIADB-USER", "")
@@ -18,8 +18,31 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  from datetime import datetime
21
+ from typing import Optional
21
22
 
22
23
  from fastapi import HTTPException
24
+
25
+ from nucliadb.common.models_utils import to_proto
26
+ from nucliadb.common.models_utils.from_proto import (
27
+ RelationNodeTypeMap,
28
+ RelationTypeMap,
29
+ )
30
+ from nucliadb.ingest.orm.utils import set_title
31
+ from nucliadb.ingest.processing import PushPayload
32
+ from nucliadb_models.content_types import GENERIC_MIME_TYPE
33
+ from nucliadb_models.file import FileField
34
+ from nucliadb_models.link import LinkField
35
+ from nucliadb_models.metadata import (
36
+ ParagraphAnnotation,
37
+ QuestionAnswerAnnotation,
38
+ )
39
+ from nucliadb_models.text import TEXT_FORMAT_TO_MIMETYPE, PushTextFormat, Text
40
+ from nucliadb_models.writer import (
41
+ ComingResourcePayload,
42
+ CreateResourcePayload,
43
+ UpdateResourcePayload,
44
+ )
45
+ from nucliadb_protos.knowledgebox_pb2 import KnowledgeBoxConfig
23
46
  from nucliadb_protos.resources_pb2 import (
24
47
  Answers,
25
48
  Basic,
@@ -30,38 +53,19 @@ from nucliadb_protos.resources_pb2 import (
30
53
  Metadata,
31
54
  PageSelections,
32
55
  Paragraph,
56
+ TokenSplit,
57
+ UserFieldMetadata,
58
+ VisualSelection,
33
59
  )
34
60
  from nucliadb_protos.resources_pb2 import ParagraphAnnotation as PBParagraphAnnotation
35
61
  from nucliadb_protos.resources_pb2 import (
36
62
  QuestionAnswerAnnotation as PBQuestionAnswerAnnotation,
37
63
  )
38
- from nucliadb_protos.resources_pb2 import TokenSplit, UserFieldMetadata, VisualSelection
39
64
  from nucliadb_protos.utils_pb2 import Relation, RelationNode
40
65
  from nucliadb_protos.writer_pb2 import BrokerMessage
41
66
 
42
- from nucliadb.ingest.orm.utils import set_title
43
- from nucliadb.ingest.processing import ProcessingInfo, PushPayload
44
- from nucliadb_models.common import FIELD_TYPES_MAP_REVERSE
45
- from nucliadb_models.file import FileField
46
- from nucliadb_models.link import LinkField
47
- from nucliadb_models.metadata import (
48
- ParagraphAnnotation,
49
- QuestionAnswerAnnotation,
50
- RelationNodeTypeMap,
51
- RelationTypeMap,
52
- )
53
- from nucliadb_models.text import TEXT_FORMAT_TO_MIMETYPE, PushTextFormat, Text
54
- from nucliadb_models.writer import (
55
- GENERIC_MIME_TYPE,
56
- ComingResourcePayload,
57
- CreateResourcePayload,
58
- UpdateResourcePayload,
59
- )
60
-
61
67
 
62
- def parse_basic_modify(
63
- bm: BrokerMessage, item: ComingResourcePayload, toprocess: PushPayload
64
- ):
68
+ def parse_basic_modify(bm: BrokerMessage, item: ComingResourcePayload, toprocess: PushPayload):
65
69
  bm.basic.modified.FromDatetime(datetime.now())
66
70
  if item.title:
67
71
  set_title(bm, toprocess, item.title)
@@ -83,14 +87,9 @@ def parse_basic_modify(
83
87
  bm.basic.metadata.useful = True
84
88
  bm.basic.metadata.status = Metadata.Status.PENDING
85
89
 
86
- toprocess.genericfield["summary"] = Text(
87
- body=item.summary, format=PushTextFormat.PLAIN
88
- )
90
+ toprocess.genericfield["summary"] = Text(body=item.summary, format=PushTextFormat.PLAIN)
89
91
  if item.thumbnail:
90
92
  bm.basic.thumbnail = item.thumbnail
91
- if item.layout:
92
- bm.basic.layout = item.layout
93
-
94
93
  if item.metadata is not None:
95
94
  bm.basic.metadata.metadata.update(item.metadata.metadata)
96
95
  if item.metadata.language:
@@ -147,9 +146,8 @@ def parse_basic_modify(
147
146
  userfieldmetadata.question_answers.append(qa_annotation_pb)
148
147
 
149
148
  userfieldmetadata.field.field = fieldmetadata.field.field
150
- userfieldmetadata.field.field_type = FIELD_TYPES_MAP_REVERSE[ # type: ignore
151
- fieldmetadata.field.field_type.value
152
- ]
149
+
150
+ userfieldmetadata.field.field_type = to_proto.field_type(fieldmetadata.field.field_type)
153
151
 
154
152
  bm.basic.fieldmetadata.append(userfieldmetadata)
155
153
 
@@ -167,9 +165,7 @@ def parse_basic_modify(
167
165
  ]
168
166
  )
169
167
 
170
- relation_node_resource = RelationNode(
171
- value=bm.uuid, ntype=RelationNode.NodeType.RESOURCE
172
- )
168
+ relation_node_resource = RelationNode(value=bm.uuid, ntype=RelationNode.NodeType.RESOURCE)
173
169
  relations = []
174
170
  for relation in item.usermetadata.relations:
175
171
  if relation.from_ is None:
@@ -205,8 +201,16 @@ def parse_basic_modify(
205
201
  unique_groups = list(set(item.security.access_groups))
206
202
  bm.security.access_groups.extend(unique_groups)
207
203
 
204
+ if item.hidden is not None:
205
+ bm.basic.hidden = item.hidden
208
206
 
209
- def parse_basic(bm: BrokerMessage, item: CreateResourcePayload, toprocess: PushPayload):
207
+
208
+ def parse_basic_creation(
209
+ bm: BrokerMessage,
210
+ item: CreateResourcePayload,
211
+ toprocess: PushPayload,
212
+ kb_config: Optional[KnowledgeBoxConfig],
213
+ ):
210
214
  bm.basic.created.FromDatetime(datetime.now())
211
215
 
212
216
  if item.title is None:
@@ -215,6 +219,10 @@ def parse_basic(bm: BrokerMessage, item: CreateResourcePayload, toprocess: PushP
215
219
 
216
220
  parse_basic_modify(bm, item, toprocess)
217
221
 
222
+ if item.hidden is None:
223
+ if kb_config and kb_config.hidden_resources_hide_on_creation:
224
+ bm.basic.hidden = True
225
+
218
226
 
219
227
  def set_status(basic: Basic, item: CreateResourcePayload):
220
228
  basic.metadata.status = Metadata.Status.PENDING
@@ -224,34 +232,14 @@ def set_status_modify(basic: Basic, item: UpdateResourcePayload):
224
232
  basic.metadata.status = Metadata.Status.PENDING
225
233
 
226
234
 
227
- def set_processing_info(bm: BrokerMessage, processing_info: ProcessingInfo):
228
- """
229
- Processing V2 does not have this awkward processing info data field and storage
230
- but keeping for b/w compatibility.
231
-
232
- Once V1 is removed, this code can be removed because status checking will be done
233
- in a separate API that is not part of NucliaDB.
234
- """
235
- if processing_info.seqid is not None:
236
- bm.basic.last_seqid = processing_info.seqid
237
- if processing_info.account_seq is not None:
238
- bm.basic.last_account_seq = processing_info.account_seq
239
- if processing_info.queue is not None:
240
- bm.basic.queue = bm.basic.QueueType.Value(processing_info.queue.name)
241
-
242
-
243
235
  def validate_classifications(paragraph: ParagraphAnnotation):
244
236
  classifications = paragraph.classifications
245
237
  if len(classifications) == 0:
246
- raise HTTPException(
247
- status_code=422, detail="ensure classifications has at least 1 items"
248
- )
238
+ raise HTTPException(status_code=422, detail="ensure classifications has at least 1 items")
249
239
 
250
- unique_classifications = {tuple(cf.dict().values()) for cf in classifications}
240
+ unique_classifications = {tuple(cf.model_dump().values()) for cf in classifications}
251
241
  if len(unique_classifications) != len(classifications):
252
- raise HTTPException(
253
- status_code=422, detail="Paragraph classifications need to be unique"
254
- )
242
+ raise HTTPException(status_code=422, detail="Paragraph classifications need to be unique")
255
243
 
256
244
 
257
245
  def compute_title(item: CreateResourcePayload, rid: str) -> str:
@@ -289,9 +277,7 @@ def build_question_answer_annotation_pb(
289
277
  pb.cancelled_by_user = qa_annotation.cancelled_by_user
290
278
  pb.question_answer.question.text = qa_annotation.question_answer.question.text
291
279
  if qa_annotation.question_answer.question.language is not None:
292
- pb.question_answer.question.language = (
293
- qa_annotation.question_answer.question.language
294
- )
280
+ pb.question_answer.question.language = qa_annotation.question_answer.question.language
295
281
  pb.question_answer.question.ids_paragraphs.extend(
296
282
  qa_annotation.question_answer.question.ids_paragraphs
297
283
  )