nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -403
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +70 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +143 -117
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +37 -128
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -81,9 +81,7 @@ class AuthHeaderAuthenticationBackend(NucliaCloudAuthenticationBackend):
81
81
  def __init__(self, settings: Settings) -> None:
82
82
  self.settings = settings
83
83
 
84
- async def authenticate(
85
- self, request: HTTPConnection
86
- ) -> Optional[tuple[AuthCredentials, BaseUser]]:
84
+ async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
87
85
  token_resp = await authenticate_auth_token(self.settings, request)
88
86
  if token_resp is not None:
89
87
  return token_resp
@@ -94,9 +92,7 @@ class AuthHeaderAuthenticationBackend(NucliaCloudAuthenticationBackend):
94
92
  user = request.headers[self.settings.auth_policy_user_header]
95
93
  nuclia_user: BaseUser = NucliaUser(username=user)
96
94
 
97
- auth_creds = AuthCredentials(
98
- get_mapped_roles(settings=self.settings, data={"user": user})
99
- )
95
+ auth_creds = AuthCredentials(get_mapped_roles(settings=self.settings, data={"user": user}))
100
96
 
101
97
  return auth_creds, nuclia_user
102
98
 
@@ -113,9 +109,7 @@ class OAuth2AuthenticationBackend(NucliaCloudAuthenticationBackend):
113
109
  def __init__(self, settings: Settings) -> None:
114
110
  self.settings = settings
115
111
 
116
- async def authenticate(
117
- self, request: HTTPConnection
118
- ) -> Optional[tuple[AuthCredentials, BaseUser]]:
112
+ async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
119
113
  token_resp = await authenticate_auth_token(self.settings, request)
120
114
  if token_resp is not None:
121
115
  return token_resp
@@ -133,9 +127,7 @@ class OAuth2AuthenticationBackend(NucliaCloudAuthenticationBackend):
133
127
  try:
134
128
  token_data = orjson.loads(base64.b64decode(token_split[1] + "==="))
135
129
  except Exception:
136
- logger.warning(
137
- f"Could not parse jwt bearer token value: {token}", exc_info=True
138
- )
130
+ logger.warning(f"Could not parse jwt bearer token value: {token}", exc_info=True)
139
131
  return None
140
132
 
141
133
  if "sub" not in token_data:
@@ -168,9 +160,7 @@ class BasicAuthAuthenticationBackend(NucliaCloudAuthenticationBackend):
168
160
  def __init__(self, settings: Settings) -> None:
169
161
  self.settings = settings
170
162
 
171
- async def authenticate(
172
- self, request: HTTPConnection
173
- ) -> Optional[tuple[AuthCredentials, BaseUser]]:
163
+ async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
174
164
  token_resp = await authenticate_auth_token(self.settings, request)
175
165
  if token_resp is not None:
176
166
  return token_resp
@@ -186,9 +176,7 @@ class BasicAuthAuthenticationBackend(NucliaCloudAuthenticationBackend):
186
176
  user = token.split(":")[0]
187
177
 
188
178
  nuclia_user: BaseUser = NucliaUser(username=user)
189
- auth_creds = AuthCredentials(
190
- get_mapped_roles(settings=self.settings, data={"user": user})
191
- )
179
+ auth_creds = AuthCredentials(get_mapped_roles(settings=self.settings, data={"user": user}))
192
180
 
193
181
  return auth_creds, nuclia_user
194
182
 
@@ -201,9 +189,7 @@ class UpstreamNaiveAuthenticationBackend(NucliaCloudAuthenticationBackend):
201
189
  user_header=settings.auth_policy_user_header,
202
190
  )
203
191
 
204
- async def authenticate(
205
- self, request: HTTPConnection
206
- ) -> Optional[tuple[AuthCredentials, BaseUser]]:
192
+ async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
207
193
  token_resp = await authenticate_auth_token(self.settings, request)
208
194
  if token_resp is not None:
209
195
  return token_resp
@@ -44,12 +44,9 @@ def config_standalone_driver(nucliadb_args: Settings):
44
44
 
45
45
  if ingest_settings.driver == DriverConfig.NOT_SET:
46
46
  # no driver specified, for standalone, we force defaulting to local here
47
- ingest_settings.driver = DriverConfig.LOCAL
47
+ ingest_settings.driver = DriverConfig.PG
48
48
 
49
- if (
50
- ingest_settings.driver == DriverConfig.LOCAL
51
- and ingest_settings.driver_local_url is None
52
- ):
49
+ if ingest_settings.driver == DriverConfig.LOCAL and ingest_settings.driver_local_url is None:
53
50
  # also provide default path for local driver when none provided
54
51
  ingest_settings.driver_local_url = "./data/main"
55
52
 
@@ -57,11 +54,11 @@ def config_standalone_driver(nucliadb_args: Settings):
57
54
  # no driver specified, for standalone, we try to automate some settings here
58
55
  storage_settings.file_backend = FileBackendConfig.LOCAL
59
56
 
60
- if (
61
- storage_settings.file_backend == FileBackendConfig.LOCAL
62
- and storage_settings.local_files is None
63
- ):
64
- storage_settings.local_files = "./data/blob"
57
+ if storage_settings.file_backend == FileBackendConfig.LOCAL:
58
+ if storage_settings.local_files is None:
59
+ storage_settings.local_files = "./data/blob"
60
+ if storage_settings.local_indexing_bucket is None:
61
+ storage_settings.local_indexing_bucket = "indexer"
65
62
 
66
63
  if ingest_settings.driver_local_url is not None and not os.path.isdir(
67
64
  ingest_settings.driver_local_url
@@ -18,48 +18,49 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import asyncio
21
+ from contextlib import asynccontextmanager
22
+
23
+ from fastapi import FastAPI
21
24
 
22
25
  from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
26
+ from nucliadb.common.context.fastapi import inject_app_context
23
27
  from nucliadb.ingest.app import initialize_grpc as initialize_ingest_grpc
24
28
  from nucliadb.ingest.app import initialize_pull_workers
25
29
  from nucliadb.ingest.settings import settings as ingest_settings
26
- from nucliadb.reader.lifecycle import finalize as finalize_reader
27
- from nucliadb.reader.lifecycle import initialize as initialize_reader
28
- from nucliadb.search.lifecycle import finalize as finalize_search
29
- from nucliadb.search.lifecycle import initialize as initialize_search
30
- from nucliadb.train.lifecycle import finalize as finalize_train
31
- from nucliadb.train.lifecycle import initialize as initialize_train
32
- from nucliadb.writer.lifecycle import finalize as finalize_writer
33
- from nucliadb.writer.lifecycle import initialize as initialize_writer
30
+ from nucliadb.reader.lifecycle import lifespan as reader_lifespan
31
+ from nucliadb.search.lifecycle import lifespan as search_lifespan
32
+ from nucliadb.train.lifecycle import lifespan as train_lifespan
33
+ from nucliadb.writer.lifecycle import lifespan as writer_lifespan
34
34
  from nucliadb_utils.utilities import finalize_utilities
35
35
 
36
36
  SYNC_FINALIZERS = []
37
37
 
38
38
 
39
- async def initialize():
39
+ @asynccontextmanager
40
+ async def lifespan(app: FastAPI):
40
41
  if ingest_settings.disable_pull_worker:
41
42
  finalizers = await initialize_ingest_grpc()
42
43
  else:
43
44
  finalizers = await initialize_pull_workers()
44
45
  SYNC_FINALIZERS.extend(finalizers)
45
- await initialize_writer()
46
- await initialize_reader()
47
- await initialize_search()
48
- await initialize_train()
49
- await setup_cluster()
50
46
 
47
+ async with (
48
+ writer_lifespan(app),
49
+ reader_lifespan(app),
50
+ search_lifespan(app),
51
+ train_lifespan(app),
52
+ inject_app_context(app),
53
+ ):
54
+ await setup_cluster()
55
+
56
+ yield
51
57
 
52
- async def finalize():
53
- for finalizer in SYNC_FINALIZERS:
54
- if asyncio.iscoroutinefunction(finalizer):
55
- await finalizer()
56
- else:
57
- finalizer()
58
- SYNC_FINALIZERS.clear()
58
+ for finalizer in SYNC_FINALIZERS:
59
+ if asyncio.iscoroutinefunction(finalizer):
60
+ await finalizer()
61
+ else:
62
+ finalizer()
63
+ SYNC_FINALIZERS.clear()
59
64
 
60
- await finalize_writer()
61
- await finalize_reader()
62
- await finalize_search()
63
- await finalize_train()
64
65
  await finalize_utilities()
65
66
  await teardown_cluster()
@@ -54,7 +54,5 @@ async def safe_run_migrations():
54
54
  await run_migrator(forever=False)
55
55
  break
56
56
  except locking.ResourceLocked:
57
- sys.stdout.write(
58
- "Another worker is already running migrations. Waiting...\n"
59
- )
57
+ sys.stdout.write("Another worker is already running migrations. Waiting...\n")
60
58
  continue
@@ -25,7 +25,7 @@ import argdantic
25
25
  from nucliadb.standalone.config import config_nucliadb
26
26
  from nucliadb.standalone.settings import Settings
27
27
 
28
- parser = argdantic.ArgParser()
28
+ parser: argdantic.ArgParser = argdantic.ArgParser()
29
29
 
30
30
 
31
31
  @parser.command(singleton=True, name="NucliaDB", help="NucliaDB Starting script")
File without changes
@@ -42,7 +42,7 @@ from nucliadb_utils.settings import nuclia_settings, storage_settings
42
42
  logger = logging.getLogger(__name__)
43
43
 
44
44
 
45
- parser = argdantic.ArgParser()
45
+ parser: argdantic.ArgParser = argdantic.ArgParser()
46
46
 
47
47
 
48
48
  @parser.command(singleton=True, name="NucliaDB", help="NucliaDB Starting script")
@@ -98,7 +98,7 @@ def run():
98
98
  "API": f"http://{settings.http_host}:{settings.http_port}/api",
99
99
  "Admin UI": f"http://{settings.http_host}:{settings.http_port}/admin",
100
100
  "Key-value backend": ingest_settings.driver.value,
101
- "Blog storage backend": storage_settings.file_backend.value,
101
+ "Blob storage backend": storage_settings.file_backend.value,
102
102
  "Cluster discovery mode": cluster_settings.cluster_discovery_mode.value,
103
103
  "Node replicas": cluster_settings.node_replicas,
104
104
  "Index data path": os.path.realpath(cluster_settings.data_path),
@@ -121,10 +121,7 @@ def run():
121
121
  settings_to_output["NUA API zone"] = nuclia_settings.nuclia_zone
122
122
 
123
123
  settings_to_output_fmted = "\n".join(
124
- [
125
- f"|| - {k}:{' ' * (27 - len(k))}{v}"
126
- for k, v in settings_to_output.items()
127
- ]
124
+ [f"|| - {k}:{' ' * (27 - len(k))}{v}" for k, v in settings_to_output.items()]
128
125
  )
129
126
 
130
127
  installed_version = versions.installed_nucliadb()
@@ -27,6 +27,7 @@ from nucliadb.ingest.settings import DriverSettings
27
27
  from nucliadb_models.resource import NucliaDBRoles
28
28
  from nucliadb_telemetry.settings import LogFormatType, LogLevel, LogOutputType
29
29
  from nucliadb_utils.settings import StorageSettings
30
+ from nucliadb_utils.storages.settings import Settings as ExtendedStorageSettings
30
31
 
31
32
 
32
33
  class StandaloneDiscoveryMode(Enum):
@@ -43,29 +44,23 @@ class AuthPolicy(Enum):
43
44
  UPSTREAM_BASICAUTH = "upstream_basicauth"
44
45
 
45
46
 
46
- class Settings(DriverSettings, StorageSettings):
47
+ class Settings(DriverSettings, StorageSettings, ExtendedStorageSettings):
47
48
  # be consistent here with DATA_PATH env var
48
- data_path: str = pydantic.Field(
49
- "./data/node", description="Path to node index files"
50
- )
49
+ data_path: str = pydantic.Field("./data/node", description="Path to node index files")
51
50
 
52
51
  # all settings here are mapped in to other env var settings used
53
52
  # in the app. These are helper settings to make things easier to
54
53
  # use with standalone app vs cluster app.
55
54
  nua_api_key: Optional[str] = pydantic.Field(
56
55
  default=None,
57
- description="Nuclia Understanding API Key. Read how to generate a NUA Key here: https://docs.nuclia.dev/docs/docs/using/understanding/intro#get-a-nua-key", # noqa
58
- )
59
- zone: Optional[str] = pydantic.Field(
60
- default=None, description="Nuclia Understanding API Zone ID"
56
+ description="Nuclia Understanding API Key. Read how to generate a NUA Key here: https://docs.nuclia.dev/docs/rag/advanced/understanding/intro#get-a-nua-key", # noqa
61
57
  )
58
+ zone: Optional[str] = pydantic.Field(default=None, description="Nuclia Understanding API Zone ID")
62
59
  http_host: str = pydantic.Field(default="0.0.0.0", description="HTTP Port")
63
60
  http_port: int = pydantic.Field(default=8080, description="HTTP Port")
64
61
  ingest_grpc_port: int = pydantic.Field(default=8030, description="Ingest GRPC Port")
65
62
  train_grpc_port: int = pydantic.Field(default=8031, description="Train GRPC Port")
66
- standalone_node_port: int = pydantic.Field(
67
- default=10009, description="Node GRPC Port"
68
- )
63
+ standalone_node_port: int = pydantic.Field(default=10009, description="Node GRPC Port")
69
64
 
70
65
  auth_policy: AuthPolicy = pydantic.Field(
71
66
  default=AuthPolicy.UPSTREAM_NAIVE,
@@ -93,10 +88,9 @@ class Settings(DriverSettings, StorageSettings):
93
88
  description="Default role to assign to user that is authenticated \
94
89
  upstream. Not used with `upstream_naive` auth policy.",
95
90
  )
96
- auth_policy_role_mapping: Optional[dict[str, dict[str, list[NucliaDBRoles]]]] = (
97
- pydantic.Field(
98
- default=None,
99
- description="""
91
+ auth_policy_role_mapping: Optional[dict[str, dict[str, list[NucliaDBRoles]]]] = pydantic.Field(
92
+ default=None,
93
+ description="""
100
94
  Role mapping for `upstream_auth_header`, `upstream_oauth2` and `upstream_basicauth` auth policies.
101
95
  Allows mapping different properties from the auth request to a role.
102
96
  Available roles are: `READER`, `WRITER`, `MANAGER`.
@@ -106,7 +100,6 @@ Examples:
106
100
  - `{"group": {"managers": "MANAGER"}}` will map the users that have a `group` claim of
107
101
  `managers` on the jwt provided by upstream to the role `MANAGER` on `upstream_oauth2` policies.
108
102
  """,
109
- )
110
103
  )
111
104
 
112
105
  jwk_key: Optional[str] = pydantic.Field(
@@ -18,10 +18,10 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import enum
21
+ import importlib.metadata
21
22
  import logging
22
23
  from typing import Optional
23
24
 
24
- import pkg_resources
25
25
  from cachetools import TTLCache
26
26
 
27
27
  from nucliadb.common.http_clients.pypi import PyPi
@@ -65,14 +65,24 @@ def is_newer_release(installed: str, latest: str) -> bool:
65
65
  >>> is_newer_release("1.2.3", "1.2.3.post1")
66
66
  False
67
67
  """
68
- parsed_installed = pkg_resources.parse_version(_release(installed))
69
- parsed_latest = pkg_resources.parse_version(_release(latest))
68
+
69
+ def parse_version(version: str) -> tuple[int, int, int]:
70
+ parts = version.split(".")
71
+ if len(parts) > 3:
72
+ raise ValueError(f"Invalid version string: {version}")
73
+ major = int(parts[0]) if len(parts) >= 1 else 0
74
+ minor = int(parts[1]) if len(parts) >= 2 else 0
75
+ patch = int(parts[2]) if len(parts) == 3 else 0
76
+ return (major, minor, patch)
77
+
78
+ parsed_installed = parse_version(_release(installed))
79
+ parsed_latest = parse_version(_release(latest))
70
80
  return parsed_latest > parsed_installed
71
81
 
72
82
 
73
83
  def _release(version: str) -> str:
74
84
  """
75
- Strips the .postX part of the version so that wecan compare major.minor.patch only.
85
+ Strips the .postX part of the version so that we can compare major.minor.patch only.
76
86
 
77
87
  >>> _release("1.2.3")
78
88
  '1.2.3'
@@ -83,7 +93,7 @@ def _release(version: str) -> str:
83
93
 
84
94
 
85
95
  def get_installed_version(package_name: str) -> str:
86
- return pkg_resources.get_distribution(package_name).version
96
+ return importlib.metadata.distribution(package_name).version
87
97
 
88
98
 
89
99
  async def get_latest_version(package: str) -> Optional[str]:
@@ -60,7 +60,9 @@ class NatsTaskConsumer:
60
60
  async def initialize(self, context: ApplicationContext):
61
61
  self.context = context
62
62
  await create_nats_stream_if_not_exists(
63
- self.context, self.stream.name, subjects=[self.stream.subject] # type: ignore
63
+ self.context,
64
+ self.stream.name, # type: ignore
65
+ subjects=[self.stream.subject], # type: ignore
64
66
  )
65
67
  await self._setup_nats_subscription()
66
68
  self.initialized = True
@@ -128,11 +130,9 @@ class NatsTaskConsumer:
128
130
  f"Message received: subject:{subject}, seqid: {seqid}, reply: {reply}",
129
131
  extra={"consumer_name": self.name},
130
132
  )
131
- async with MessageProgressUpdater(
132
- msg, nats_consumer_settings.nats_ack_wait * 0.66
133
- ):
133
+ async with MessageProgressUpdater(msg, nats_consumer_settings.nats_ack_wait * 0.66):
134
134
  try:
135
- task_msg = self.msg_type.parse_raw(msg.data)
135
+ task_msg = self.msg_type.model_validate_json(msg.data)
136
136
  except pydantic.ValidationError as e:
137
137
  errors.capture_exception(e)
138
138
  logger.error(
@@ -144,9 +144,7 @@ class NatsTaskConsumer:
144
144
  await msg.ack()
145
145
  return
146
146
 
147
- logger.info(
148
- f"Starting task consumption", extra={"consumer_name": self.name}
149
- )
147
+ logger.info(f"Starting task consumption", extra={"consumer_name": self.name})
150
148
  try:
151
149
  await self.callback(self.context, task_msg) # type: ignore
152
150
  except asyncio.CancelledError:
@@ -200,9 +198,7 @@ def create_consumer(
200
198
  return consumer
201
199
 
202
200
 
203
- async def start_consumer(
204
- task_name: str, context: ApplicationContext
205
- ) -> NatsTaskConsumer:
201
+ async def start_consumer(task_name: str, context: ApplicationContext) -> NatsTaskConsumer:
206
202
  """
207
203
  Returns an initialized consumer for the given task name, ready to consume messages from the task stream.
208
204
  """
@@ -214,7 +210,7 @@ async def start_consumer(
214
210
  name=f"{task_name}_consumer",
215
211
  stream=task.stream,
216
212
  callback=task.callback, # type: ignore
217
- msg_type=task.msg_type, # type: ignore
213
+ msg_type=task.msg_type,
218
214
  max_concurrent_messages=task.max_concurrent_messages,
219
215
  )
220
216
  await consumer.initialize(context)
@@ -44,11 +44,13 @@ class NatsTaskProducer:
44
44
  async def initialize(self, context: ApplicationContext):
45
45
  self.context = context
46
46
  await create_nats_stream_if_not_exists(
47
- self.context, self.stream.name, subjects=[self.stream.subject] # type: ignore
47
+ self.context,
48
+ self.stream.name, # type: ignore
49
+ subjects=[self.stream.subject], # type: ignore
48
50
  )
49
51
  self.initialized = True
50
52
 
51
- async def __call__(self, msg: MsgType) -> int: # type: ignore
53
+ async def __call__(self, msg: MsgType) -> int:
52
54
  """
53
55
  Publish message to the producer's nats stream.
54
56
  Returns the sequence number of the published message.
@@ -57,7 +59,8 @@ class NatsTaskProducer:
57
59
  raise RuntimeError("NatsTaskProducer not initialized")
58
60
  try:
59
61
  pub_ack = await self.context.nats_manager.js.publish( # type: ignore
60
- self.stream.subject, msg.json().encode("utf-8") # type: ignore
62
+ self.stream.subject, # type: ignore
63
+ msg.model_dump_json().encode("utf-8"), # type: ignore
61
64
  )
62
65
  logger.info(
63
66
  "Message sent to Nats",
@@ -93,8 +96,6 @@ async def get_producer(task_name: str, context: ApplicationContext) -> NatsTaskP
93
96
  task = get_registered_task(task_name)
94
97
  except KeyError:
95
98
  raise ValueError(f"Task {task_name} not registered")
96
- producer = create_producer(
97
- name=f"{task_name}_producer", stream=task.stream, msg_type=task.msg_type
98
- )
99
+ producer = create_producer(name=f"{task_name}_producer", stream=task.stream, msg_type=task.msg_type)
99
100
  await producer.initialize(context)
100
101
  return producer
@@ -0,0 +1,53 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ # This is a test fixture which is useful outside nucliadb tests. In particular
22
+ # it is used for the testbed. Keeping it under src so it can be imported outside
23
+ def reset_config():
24
+ from nucliadb.common.cluster import settings as cluster_settings
25
+ from nucliadb.ingest import settings as ingest_settings
26
+ from nucliadb.train import settings as train_settings
27
+ from nucliadb.writer import settings as writer_settings
28
+ from nucliadb_utils import settings as utils_settings
29
+ from nucliadb_utils.cache import settings as cache_settings
30
+
31
+ all_settings = [
32
+ cluster_settings.settings,
33
+ ingest_settings.settings,
34
+ train_settings.settings,
35
+ writer_settings.settings,
36
+ cache_settings.settings,
37
+ utils_settings.audit_settings,
38
+ utils_settings.http_settings,
39
+ utils_settings.indexing_settings,
40
+ utils_settings.nuclia_settings,
41
+ utils_settings.nucliadb_settings,
42
+ utils_settings.storage_settings,
43
+ utils_settings.transaction_settings,
44
+ ]
45
+ for settings in all_settings:
46
+ defaults = type(settings)()
47
+ for attr, _value in settings:
48
+ default_value = getattr(defaults, attr)
49
+ setattr(settings, attr, default_value)
50
+
51
+ from nucliadb.common.cluster import manager
52
+
53
+ manager.INDEX_NODES.clear()
@@ -28,9 +28,7 @@ logger = logging.getLogger(SERVICE_NAME)
28
28
  class EndpointFilter(logging.Filter):
29
29
  def filter(self, record: logging.LogRecord) -> bool:
30
30
  return (
31
- record.args is not None
32
- and len(record.args) >= 3
33
- and record.args[2] not in ("/", "/metrics") # type: ignore
31
+ record.args is not None and len(record.args) >= 3 and record.args[2] not in ("/", "/metrics") # type: ignore
34
32
  )
35
33
 
36
34
 
@@ -21,9 +21,8 @@
21
21
 
22
22
  from typing import Optional
23
23
 
24
- from nucliadb_protos.dataset_pb2 import TrainSet
25
-
26
24
  from nucliadb.train.utils import get_shard_manager
25
+ from nucliadb_protos.dataset_pb2 import TrainSet
27
26
 
28
27
 
29
28
  async def get_kb_partitions(kbid: str, prefix: Optional[str] = None):
@@ -21,7 +21,7 @@
21
21
 
22
22
  from fastapi import HTTPException, Request
23
23
  from fastapi.responses import StreamingResponse
24
- from fastapi_versioning import version # type: ignore
24
+ from fastapi_versioning import version
25
25
 
26
26
  from nucliadb.train.api.utils import get_kb_partitions, get_train
27
27
  from nucliadb.train.api.v1.router import KB_PREFIX, api
@@ -21,7 +21,7 @@
21
21
  from typing import Optional
22
22
 
23
23
  from fastapi import Request
24
- from fastapi_versioning import version # type: ignore
24
+ from fastapi_versioning import version
25
25
 
26
26
  from nucliadb.train.api.utils import get_kb_partitions
27
27
  from nucliadb.train.api.v1.router import KB_PREFIX, api
@@ -52,9 +52,7 @@ async def get_partitions_all(request: Request, kbid: str) -> TrainSetPartitions:
52
52
  )
53
53
  @requires_one([NucliaDBRoles.READER])
54
54
  @version(1)
55
- async def get_partitions_prefix(
56
- request: Request, kbid: str, prefix: str
57
- ) -> TrainSetPartitions:
55
+ async def get_partitions_prefix(request: Request, kbid: str, prefix: str) -> TrainSetPartitions:
58
56
  return await get_partitions(kbid, prefix=prefix)
59
57
 
60
58
 
nucliadb/train/app.py CHANGED
@@ -17,65 +17,44 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- import pkg_resources
20
+ import importlib.metadata
21
+
21
22
  from fastapi import FastAPI
22
23
  from starlette.middleware import Middleware
23
24
  from starlette.middleware.authentication import AuthenticationMiddleware
24
- from starlette.middleware.cors import CORSMiddleware
25
25
  from starlette.requests import ClientDisconnect, Request
26
26
  from starlette.responses import HTMLResponse
27
27
 
28
- from nucliadb.middleware.transaction import ReadOnlyTransactionMiddleware
29
28
  from nucliadb.train import API_PREFIX
30
29
  from nucliadb.train.api.v1.router import api
31
- from nucliadb.train.lifecycle import finalize, initialize
30
+ from nucliadb.train.lifecycle import lifespan
32
31
  from nucliadb_telemetry import errors
33
32
  from nucliadb_telemetry.fastapi.utils import (
34
33
  client_disconnect_handler,
35
34
  global_exception_handler,
36
35
  )
37
- from nucliadb_utils import const
36
+ from nucliadb_utils.audit.stream import AuditMiddleware
38
37
  from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
39
38
  from nucliadb_utils.fastapi.openapi import extend_openapi
40
39
  from nucliadb_utils.fastapi.versioning import VersionedFastAPI
41
- from nucliadb_utils.settings import http_settings, running_settings
42
- from nucliadb_utils.utilities import has_feature
40
+ from nucliadb_utils.settings import running_settings
41
+ from nucliadb_utils.utilities import get_audit
43
42
 
44
43
  middleware = []
45
-
46
- if has_feature(const.Features.CORS_MIDDLEWARE, default=False):
47
- middleware.append(
48
- Middleware(
49
- CORSMiddleware,
50
- allow_origins=http_settings.cors_origins,
51
- allow_methods=["*"],
52
- # Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
53
- # Browsers already showing deprecation notices, so it needs to be specified explicitly
54
- allow_headers=["*", "Authorization"],
55
- )
56
- )
57
-
58
44
  middleware.extend(
59
45
  [
60
- Middleware(
61
- AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend()
62
- ),
63
- Middleware(ReadOnlyTransactionMiddleware),
46
+ Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend()),
47
+ Middleware(AuditMiddleware, audit_utility_getter=get_audit),
64
48
  ]
65
49
  )
66
50
 
67
- errors.setup_error_handling(pkg_resources.get_distribution("nucliadb").version)
68
-
69
-
70
- on_startup = [initialize]
71
- on_shutdown = [finalize]
51
+ errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
72
52
 
73
53
 
74
54
  fastapi_settings = dict(
75
55
  debug=running_settings.debug,
76
56
  middleware=middleware,
77
- on_startup=on_startup,
78
- on_shutdown=on_shutdown,
57
+ lifespan=lifespan,
79
58
  exception_handlers={
80
59
  Exception: global_exception_handler,
81
60
  ClientDisconnect: client_disconnect_handler,