nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -81,9 +81,7 @@ class AuthHeaderAuthenticationBackend(NucliaCloudAuthenticationBackend):
81
81
  def __init__(self, settings: Settings) -> None:
82
82
  self.settings = settings
83
83
 
84
- async def authenticate(
85
- self, request: HTTPConnection
86
- ) -> Optional[tuple[AuthCredentials, BaseUser]]:
84
+ async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
87
85
  token_resp = await authenticate_auth_token(self.settings, request)
88
86
  if token_resp is not None:
89
87
  return token_resp
@@ -94,9 +92,7 @@ class AuthHeaderAuthenticationBackend(NucliaCloudAuthenticationBackend):
94
92
  user = request.headers[self.settings.auth_policy_user_header]
95
93
  nuclia_user: BaseUser = NucliaUser(username=user)
96
94
 
97
- auth_creds = AuthCredentials(
98
- get_mapped_roles(settings=self.settings, data={"user": user})
99
- )
95
+ auth_creds = AuthCredentials(get_mapped_roles(settings=self.settings, data={"user": user}))
100
96
 
101
97
  return auth_creds, nuclia_user
102
98
 
@@ -113,9 +109,7 @@ class OAuth2AuthenticationBackend(NucliaCloudAuthenticationBackend):
113
109
  def __init__(self, settings: Settings) -> None:
114
110
  self.settings = settings
115
111
 
116
- async def authenticate(
117
- self, request: HTTPConnection
118
- ) -> Optional[tuple[AuthCredentials, BaseUser]]:
112
+ async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
119
113
  token_resp = await authenticate_auth_token(self.settings, request)
120
114
  if token_resp is not None:
121
115
  return token_resp
@@ -133,9 +127,7 @@ class OAuth2AuthenticationBackend(NucliaCloudAuthenticationBackend):
133
127
  try:
134
128
  token_data = orjson.loads(base64.b64decode(token_split[1] + "==="))
135
129
  except Exception:
136
- logger.warning(
137
- f"Could not parse jwt bearer token value: {token}", exc_info=True
138
- )
130
+ logger.warning(f"Could not parse jwt bearer token value: {token}", exc_info=True)
139
131
  return None
140
132
 
141
133
  if "sub" not in token_data:
@@ -168,9 +160,7 @@ class BasicAuthAuthenticationBackend(NucliaCloudAuthenticationBackend):
168
160
  def __init__(self, settings: Settings) -> None:
169
161
  self.settings = settings
170
162
 
171
- async def authenticate(
172
- self, request: HTTPConnection
173
- ) -> Optional[tuple[AuthCredentials, BaseUser]]:
163
+ async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
174
164
  token_resp = await authenticate_auth_token(self.settings, request)
175
165
  if token_resp is not None:
176
166
  return token_resp
@@ -186,9 +176,7 @@ class BasicAuthAuthenticationBackend(NucliaCloudAuthenticationBackend):
186
176
  user = token.split(":")[0]
187
177
 
188
178
  nuclia_user: BaseUser = NucliaUser(username=user)
189
- auth_creds = AuthCredentials(
190
- get_mapped_roles(settings=self.settings, data={"user": user})
191
- )
179
+ auth_creds = AuthCredentials(get_mapped_roles(settings=self.settings, data={"user": user}))
192
180
 
193
181
  return auth_creds, nuclia_user
194
182
 
@@ -201,9 +189,7 @@ class UpstreamNaiveAuthenticationBackend(NucliaCloudAuthenticationBackend):
201
189
  user_header=settings.auth_policy_user_header,
202
190
  )
203
191
 
204
- async def authenticate(
205
- self, request: HTTPConnection
206
- ) -> Optional[tuple[AuthCredentials, BaseUser]]:
192
+ async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
207
193
  token_resp = await authenticate_auth_token(self.settings, request)
208
194
  if token_resp is not None:
209
195
  return token_resp
@@ -37,19 +37,16 @@ def config_standalone_driver(nucliadb_args: Settings):
37
37
  )
38
38
 
39
39
  # update global settings with arg values
40
- for fieldname in DriverSettings.__fields__.keys():
40
+ for fieldname in DriverSettings.model_fields.keys():
41
41
  setattr(ingest_settings, fieldname, getattr(nucliadb_args, fieldname))
42
- for fieldname in StorageSettings.__fields__.keys():
42
+ for fieldname in StorageSettings.model_fields.keys():
43
43
  setattr(storage_settings, fieldname, getattr(nucliadb_args, fieldname))
44
44
 
45
45
  if ingest_settings.driver == DriverConfig.NOT_SET:
46
46
  # no driver specified, for standalone, we force defaulting to local here
47
- ingest_settings.driver = DriverConfig.LOCAL
47
+ ingest_settings.driver = DriverConfig.PG
48
48
 
49
- if (
50
- ingest_settings.driver == DriverConfig.LOCAL
51
- and ingest_settings.driver_local_url is None
52
- ):
49
+ if ingest_settings.driver == DriverConfig.LOCAL and ingest_settings.driver_local_url is None:
53
50
  # also provide default path for local driver when none provided
54
51
  ingest_settings.driver_local_url = "./data/main"
55
52
 
@@ -57,11 +54,11 @@ def config_standalone_driver(nucliadb_args: Settings):
57
54
  # no driver specified, for standalone, we try to automate some settings here
58
55
  storage_settings.file_backend = FileBackendConfig.LOCAL
59
56
 
60
- if (
61
- storage_settings.file_backend == FileBackendConfig.LOCAL
62
- and storage_settings.local_files is None
63
- ):
64
- storage_settings.local_files = "./data/blob"
57
+ if storage_settings.file_backend == FileBackendConfig.LOCAL:
58
+ if storage_settings.local_files is None:
59
+ storage_settings.local_files = "./data/blob"
60
+ if storage_settings.local_indexing_bucket is None:
61
+ storage_settings.local_indexing_bucket = "indexer"
65
62
 
66
63
  if ingest_settings.driver_local_url is not None and not os.path.isdir(
67
64
  ingest_settings.driver_local_url
@@ -34,7 +34,7 @@ from pydantic import BaseModel
34
34
 
35
35
  from nucliadb.common.cluster import manager as cluster_manager
36
36
  from nucliadb.standalone.settings import Settings
37
- from nucliadb_telemetry.settings import LogSettings
37
+ from nucliadb_telemetry.settings import LogOutputType, LogSettings
38
38
 
39
39
  MB = 1024 * 1024
40
40
  CHUNK_SIZE = 2 * MB
@@ -70,7 +70,7 @@ class NodeInfo(BaseModel):
70
70
  id: str
71
71
  address: str
72
72
  shard_count: int
73
- primary_id: Optional[str]
73
+ primary_id: Optional[str] = None
74
74
 
75
75
 
76
76
  class ClusterInfo(BaseModel):
@@ -86,7 +86,7 @@ async def stream_tar(app: FastAPI) -> AsyncGenerator[bytes, None]:
86
86
  await add_cluster_info(temp_dir, tar)
87
87
  settings: Settings = app.settings.copy() # type: ignore
88
88
  await add_settings(temp_dir, tar, settings)
89
- if settings.log_output_type == "file":
89
+ if settings.log_output_type == LogOutputType.FILE:
90
90
  await add_logs(tar)
91
91
 
92
92
  async for chunk in stream_out_tar(tar_file):
@@ -164,7 +164,7 @@ def _add_cluster_info_to_tar(temp_dir: str, tar: tarfile.TarFile):
164
164
  )
165
165
  cluster_info_file = os.path.join(temp_dir, "cluster_info.txt")
166
166
  with open(cluster_info_file, "w") as f:
167
- f.write(cluster_info.json(indent=4))
167
+ f.write(cluster_info.model_dump_json(indent=4))
168
168
  tar.add(cluster_info_file, arcname="cluster_info.txt")
169
169
 
170
170
 
@@ -177,7 +177,7 @@ def _add_settings_to_tar(temp_dir: str, tar: tarfile.TarFile, settings: Settings
177
177
  remove_sensitive_settings(settings)
178
178
  settings_file = os.path.join(temp_dir, "settings.json")
179
179
  with open(settings_file, "w") as f:
180
- f.write(settings.json(indent=4))
180
+ f.write(settings.model_dump_json(indent=4))
181
181
  tar.add(settings_file, arcname="settings.json")
182
182
 
183
183
 
@@ -18,48 +18,49 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import asyncio
21
+ from contextlib import asynccontextmanager
22
+
23
+ from fastapi import FastAPI
21
24
 
22
25
  from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
26
+ from nucliadb.common.context.fastapi import inject_app_context
23
27
  from nucliadb.ingest.app import initialize_grpc as initialize_ingest_grpc
24
28
  from nucliadb.ingest.app import initialize_pull_workers
25
29
  from nucliadb.ingest.settings import settings as ingest_settings
26
- from nucliadb.reader.lifecycle import finalize as finalize_reader
27
- from nucliadb.reader.lifecycle import initialize as initialize_reader
28
- from nucliadb.search.lifecycle import finalize as finalize_search
29
- from nucliadb.search.lifecycle import initialize as initialize_search
30
- from nucliadb.train.lifecycle import finalize as finalize_train
31
- from nucliadb.train.lifecycle import initialize as initialize_train
32
- from nucliadb.writer.lifecycle import finalize as finalize_writer
33
- from nucliadb.writer.lifecycle import initialize as initialize_writer
30
+ from nucliadb.reader.lifecycle import lifespan as reader_lifespan
31
+ from nucliadb.search.lifecycle import lifespan as search_lifespan
32
+ from nucliadb.train.lifecycle import lifespan as train_lifespan
33
+ from nucliadb.writer.lifecycle import lifespan as writer_lifespan
34
34
  from nucliadb_utils.utilities import finalize_utilities
35
35
 
36
36
  SYNC_FINALIZERS = []
37
37
 
38
38
 
39
- async def initialize():
39
+ @asynccontextmanager
40
+ async def lifespan(app: FastAPI):
40
41
  if ingest_settings.disable_pull_worker:
41
42
  finalizers = await initialize_ingest_grpc()
42
43
  else:
43
44
  finalizers = await initialize_pull_workers()
44
45
  SYNC_FINALIZERS.extend(finalizers)
45
- await initialize_writer()
46
- await initialize_reader()
47
- await initialize_search()
48
- await initialize_train()
49
- await setup_cluster()
50
46
 
47
+ async with (
48
+ writer_lifespan(app),
49
+ reader_lifespan(app),
50
+ search_lifespan(app),
51
+ train_lifespan(app),
52
+ inject_app_context(app),
53
+ ):
54
+ await setup_cluster()
55
+
56
+ yield
51
57
 
52
- async def finalize():
53
- for finalizer in SYNC_FINALIZERS:
54
- if asyncio.iscoroutinefunction(finalizer):
55
- await finalizer()
56
- else:
57
- finalizer()
58
- SYNC_FINALIZERS.clear()
58
+ for finalizer in SYNC_FINALIZERS:
59
+ if asyncio.iscoroutinefunction(finalizer):
60
+ await finalizer()
61
+ else:
62
+ finalizer()
63
+ SYNC_FINALIZERS.clear()
59
64
 
60
- await finalize_writer()
61
- await finalize_reader()
62
- await finalize_search()
63
- await finalize_train()
64
65
  await finalize_utilities()
65
66
  await teardown_cluster()
@@ -0,0 +1,58 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ import asyncio
21
+ import sys
22
+
23
+ from nucliadb.common import locking
24
+ from nucliadb.common.cluster.standalone.utils import is_worker_node
25
+ from nucliadb.migrator.command import run as run_migrator
26
+
27
+
28
+ def run_migrations():
29
+ """
30
+ Run migrations for the standalone mode.
31
+ """
32
+ loop = asyncio.new_event_loop()
33
+ loop.run_until_complete(safe_run_migrations())
34
+ loop.close()
35
+
36
+
37
+ async def safe_run_migrations():
38
+ """
39
+ Run migrations for the standalone mode, only if the node is a worker node.
40
+ The worker node will keep blocked until the migrations are run -- it relies
41
+ on the migrator's internal distributed lock.
42
+ """
43
+ if not is_worker_node():
44
+ return
45
+
46
+ sys.stdout.write(
47
+ """-------------------------------------------------
48
+ | Running Migrations for NucliaDB Standalone
49
+ -------------------------------------------------
50
+ """
51
+ )
52
+ while True:
53
+ try:
54
+ await run_migrator(forever=False)
55
+ break
56
+ except locking.ResourceLocked:
57
+ sys.stdout.write("Another worker is already running migrations. Waiting...\n")
58
+ continue
@@ -20,21 +20,22 @@
20
20
  # Standalone purge command
21
21
  import asyncio
22
22
 
23
- import pydantic_argparse
23
+ import argdantic
24
24
 
25
25
  from nucliadb.standalone.config import config_nucliadb
26
26
  from nucliadb.standalone.settings import Settings
27
27
 
28
+ parser: argdantic.ArgParser = argdantic.ArgParser()
29
+
30
+
31
+ @parser.command(singleton=True, name="NucliaDB", help="NucliaDB Starting script")
32
+ def setting(settings: Settings) -> Settings:
33
+ return settings
34
+
28
35
 
29
36
  def purge():
30
37
  from nucliadb.purge import main
31
38
 
32
- parser = pydantic_argparse.ArgumentParser(
33
- model=Settings,
34
- prog="NucliaDB",
35
- description="NucliaDB Starting script",
36
- )
37
- nucliadb_args = parser.parse_typed_args()
38
-
39
+ nucliadb_args = parser()
39
40
  config_nucliadb(nucliadb_args)
40
41
  asyncio.run(main())
File without changes
@@ -23,7 +23,7 @@ import os
23
23
  import sys
24
24
  from typing import Optional
25
25
 
26
- import pydantic_argparse
26
+ import argdantic
27
27
  import uvicorn # type: ignore
28
28
  from fastapi import FastAPI
29
29
 
@@ -31,6 +31,7 @@ from nucliadb.common.cluster.settings import settings as cluster_settings
31
31
  from nucliadb.ingest.settings import settings as ingest_settings
32
32
  from nucliadb.standalone import versions
33
33
  from nucliadb.standalone.config import config_nucliadb
34
+ from nucliadb.standalone.migrations import run_migrations
34
35
  from nucliadb.standalone.settings import Settings
35
36
  from nucliadb_telemetry import errors
36
37
  from nucliadb_telemetry.fastapi import instrument_app
@@ -41,14 +42,17 @@ from nucliadb_utils.settings import nuclia_settings, storage_settings
41
42
  logger = logging.getLogger(__name__)
42
43
 
43
44
 
45
+ parser: argdantic.ArgParser = argdantic.ArgParser()
46
+
47
+
48
+ @parser.command(singleton=True, name="NucliaDB", help="NucliaDB Starting script")
49
+ def setting_parser(settings: Settings) -> Settings:
50
+ return settings
51
+
52
+
44
53
  def setup() -> Settings:
45
54
  errors.setup_error_handling(versions.get_installed_version("nucliadb"))
46
- parser = pydantic_argparse.ArgumentParser(
47
- model=Settings,
48
- prog="NucliaDB",
49
- description="NucliaDB Starting script",
50
- )
51
- nucliadb_args = parser.parse_typed_args()
55
+ nucliadb_args = parser()
52
56
 
53
57
  log_settings = LogSettings(
54
58
  # change default settings for standalone
@@ -80,6 +84,7 @@ def get_server(settings: Settings) -> tuple[FastAPI, uvicorn.Server]:
80
84
 
81
85
  def run():
82
86
  settings = setup()
87
+ run_migrations()
83
88
  app, server = get_server(settings)
84
89
  instrument_app(app, excluded_urls=["/"], metrics=True)
85
90
 
@@ -92,29 +97,31 @@ def run():
92
97
  settings_to_output = {
93
98
  "API": f"http://{settings.http_host}:{settings.http_port}/api",
94
99
  "Admin UI": f"http://{settings.http_host}:{settings.http_port}/admin",
95
- "Key-value backend": ingest_settings.driver,
96
- "Blog storage backend": storage_settings.file_backend,
97
- "Cluster discovery mode": cluster_settings.cluster_discovery_mode,
100
+ "Key-value backend": ingest_settings.driver.value,
101
+ "Blob storage backend": storage_settings.file_backend.value,
102
+ "Cluster discovery mode": cluster_settings.cluster_discovery_mode.value,
98
103
  "Node replicas": cluster_settings.node_replicas,
99
104
  "Index data path": os.path.realpath(cluster_settings.data_path),
100
105
  "Node port": cluster_settings.standalone_node_port,
101
- "Auth policy": settings.auth_policy,
102
- "Log output type": settings.log_output_type,
103
- "Node role": cluster_settings.standalone_node_role,
106
+ "Auth policy": settings.auth_policy.value,
107
+ "Node role": cluster_settings.standalone_node_role.value,
108
+ }
109
+ log_settings = {
110
+ "Log output type": settings.log_output_type.value,
111
+ "Log format type": settings.log_format_type.value,
112
+ "Log level": settings.log_level.value,
104
113
  }
105
114
  if settings.log_output_type == LogOutputType.FILE:
106
115
  log_folder = os.path.realpath(os.path.dirname(LogSettings().access_log))
107
- settings_to_output["Log folder path"] = log_folder
116
+ log_settings["Log folder path"] = log_folder
117
+ settings_to_output.update(log_settings)
108
118
 
109
119
  if nuclia_settings.nuclia_service_account:
110
120
  settings_to_output["NUA API key"] = "Configured ✔"
111
121
  settings_to_output["NUA API zone"] = nuclia_settings.nuclia_zone
112
122
 
113
123
  settings_to_output_fmted = "\n".join(
114
- [
115
- f"|| - {k}:{' ' * (27 - len(k))}{v}"
116
- for k, v in settings_to_output.items()
117
- ]
124
+ [f"|| - {k}:{' ' * (27 - len(k))}{v}" for k, v in settings_to_output.items()]
118
125
  )
119
126
 
120
127
  installed_version = versions.installed_nucliadb()
@@ -27,42 +27,40 @@ from nucliadb.ingest.settings import DriverSettings
27
27
  from nucliadb_models.resource import NucliaDBRoles
28
28
  from nucliadb_telemetry.settings import LogFormatType, LogLevel, LogOutputType
29
29
  from nucliadb_utils.settings import StorageSettings
30
+ from nucliadb_utils.storages.settings import Settings as ExtendedStorageSettings
30
31
 
31
32
 
32
- class StandaloneDiscoveryMode(str, Enum):
33
+ class StandaloneDiscoveryMode(Enum):
33
34
  DEFAULT = "default"
34
35
  MANUAL = "manual"
35
36
  KUBERNETES = "kubernetes"
36
37
  SINGLE_NODE = "single_node"
37
38
 
38
39
 
39
- class AuthPolicy(str, Enum):
40
+ class AuthPolicy(Enum):
40
41
  UPSTREAM_NAIVE = "upstream_naive"
41
42
  UPSTREAM_AUTH_HEADER = "upstream_auth_header"
42
43
  UPSTREAM_OAUTH2 = "upstream_oauth2"
43
44
  UPSTREAM_BASICAUTH = "upstream_basicauth"
44
45
 
45
46
 
46
- class Settings(DriverSettings, StorageSettings):
47
+ class Settings(DriverSettings, StorageSettings, ExtendedStorageSettings):
47
48
  # be consistent here with DATA_PATH env var
48
- data_path: str = pydantic.Field(
49
- "./data/node", description="Path to node index files"
50
- )
49
+ data_path: str = pydantic.Field("./data/node", description="Path to node index files")
51
50
 
52
51
  # all settings here are mapped in to other env var settings used
53
52
  # in the app. These are helper settings to make things easier to
54
53
  # use with standalone app vs cluster app.
55
54
  nua_api_key: Optional[str] = pydantic.Field(
56
- description="Nuclia Understanding API Key. Read how to generate a NUA Key here: https://docs.nuclia.dev/docs/docs/using/understanding/intro#get-a-nua-key" # noqa
55
+ default=None,
56
+ description="Nuclia Understanding API Key. Read how to generate a NUA Key here: https://docs.nuclia.dev/docs/rag/advanced/understanding/intro#get-a-nua-key", # noqa
57
57
  )
58
- zone: Optional[str] = pydantic.Field(description="Nuclia Understanding API Zone ID")
58
+ zone: Optional[str] = pydantic.Field(default=None, description="Nuclia Understanding API Zone ID")
59
59
  http_host: str = pydantic.Field(default="0.0.0.0", description="HTTP Port")
60
60
  http_port: int = pydantic.Field(default=8080, description="HTTP Port")
61
61
  ingest_grpc_port: int = pydantic.Field(default=8030, description="Ingest GRPC Port")
62
62
  train_grpc_port: int = pydantic.Field(default=8031, description="Train GRPC Port")
63
- standalone_node_port: int = pydantic.Field(
64
- default=10009, description="Node GRPC Port"
65
- )
63
+ standalone_node_port: int = pydantic.Field(default=10009, description="Node GRPC Port")
66
64
 
67
65
  auth_policy: AuthPolicy = pydantic.Field(
68
66
  default=AuthPolicy.UPSTREAM_NAIVE,
@@ -90,9 +88,7 @@ class Settings(DriverSettings, StorageSettings):
90
88
  description="Default role to assign to user that is authenticated \
91
89
  upstream. Not used with `upstream_naive` auth policy.",
92
90
  )
93
- auth_policy_role_mapping: Optional[
94
- dict[str, dict[str, list[NucliaDBRoles]]]
95
- ] = pydantic.Field(
91
+ auth_policy_role_mapping: Optional[dict[str, dict[str, list[NucliaDBRoles]]]] = pydantic.Field(
96
92
  default=None,
97
93
  description="""
98
94
  Role mapping for `upstream_auth_header`, `upstream_oauth2` and `upstream_basicauth` auth policies.
@@ -18,10 +18,10 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import enum
21
+ import importlib.metadata
21
22
  import logging
22
23
  from typing import Optional
23
24
 
24
- import pkg_resources
25
25
  from cachetools import TTLCache
26
26
 
27
27
  from nucliadb.common.http_clients.pypi import PyPi
@@ -65,14 +65,24 @@ def is_newer_release(installed: str, latest: str) -> bool:
65
65
  >>> is_newer_release("1.2.3", "1.2.3.post1")
66
66
  False
67
67
  """
68
- parsed_installed = pkg_resources.parse_version(_release(installed))
69
- parsed_latest = pkg_resources.parse_version(_release(latest))
68
+
69
+ def parse_version(version: str) -> tuple[int, int, int]:
70
+ parts = version.split(".")
71
+ if len(parts) > 3:
72
+ raise ValueError(f"Invalid version string: {version}")
73
+ major = int(parts[0]) if len(parts) >= 1 else 0
74
+ minor = int(parts[1]) if len(parts) >= 2 else 0
75
+ patch = int(parts[2]) if len(parts) == 3 else 0
76
+ return (major, minor, patch)
77
+
78
+ parsed_installed = parse_version(_release(installed))
79
+ parsed_latest = parse_version(_release(latest))
70
80
  return parsed_latest > parsed_installed
71
81
 
72
82
 
73
83
  def _release(version: str) -> str:
74
84
  """
75
- Strips the .postX part of the version so that wecan compare major.minor.patch only.
85
+ Strips the .postX part of the version so that we can compare major.minor.patch only.
76
86
 
77
87
  >>> _release("1.2.3")
78
88
  '1.2.3'
@@ -83,7 +93,7 @@ def _release(version: str) -> str:
83
93
 
84
94
 
85
95
  def get_installed_version(package_name: str) -> str:
86
- return pkg_resources.get_distribution(package_name).version
96
+ return importlib.metadata.distribution(package_name).version
87
97
 
88
98
 
89
99
  async def get_latest_version(package: str) -> Optional[str]:
@@ -60,7 +60,9 @@ class NatsTaskConsumer:
60
60
  async def initialize(self, context: ApplicationContext):
61
61
  self.context = context
62
62
  await create_nats_stream_if_not_exists(
63
- self.context, self.stream.name, subjects=[self.stream.subject] # type: ignore
63
+ self.context,
64
+ self.stream.name, # type: ignore
65
+ subjects=[self.stream.subject], # type: ignore
64
66
  )
65
67
  await self._setup_nats_subscription()
66
68
  self.initialized = True
@@ -128,11 +130,9 @@ class NatsTaskConsumer:
128
130
  f"Message received: subject:{subject}, seqid: {seqid}, reply: {reply}",
129
131
  extra={"consumer_name": self.name},
130
132
  )
131
- async with MessageProgressUpdater(
132
- msg, nats_consumer_settings.nats_ack_wait * 0.66
133
- ):
133
+ async with MessageProgressUpdater(msg, nats_consumer_settings.nats_ack_wait * 0.66):
134
134
  try:
135
- task_msg = self.msg_type.parse_raw(msg.data)
135
+ task_msg = self.msg_type.model_validate_json(msg.data)
136
136
  except pydantic.ValidationError as e:
137
137
  errors.capture_exception(e)
138
138
  logger.error(
@@ -144,9 +144,7 @@ class NatsTaskConsumer:
144
144
  await msg.ack()
145
145
  return
146
146
 
147
- logger.info(
148
- f"Starting task consumption", extra={"consumer_name": self.name}
149
- )
147
+ logger.info(f"Starting task consumption", extra={"consumer_name": self.name})
150
148
  try:
151
149
  await self.callback(self.context, task_msg) # type: ignore
152
150
  except asyncio.CancelledError:
@@ -200,9 +198,7 @@ def create_consumer(
200
198
  return consumer
201
199
 
202
200
 
203
- async def start_consumer(
204
- task_name: str, context: ApplicationContext
205
- ) -> NatsTaskConsumer:
201
+ async def start_consumer(task_name: str, context: ApplicationContext) -> NatsTaskConsumer:
206
202
  """
207
203
  Returns an initialized consumer for the given task name, ready to consume messages from the task stream.
208
204
  """
@@ -214,7 +210,7 @@ async def start_consumer(
214
210
  name=f"{task_name}_consumer",
215
211
  stream=task.stream,
216
212
  callback=task.callback, # type: ignore
217
- msg_type=task.msg_type, # type: ignore
213
+ msg_type=task.msg_type,
218
214
  max_concurrent_messages=task.max_concurrent_messages,
219
215
  )
220
216
  await consumer.initialize(context)
@@ -44,11 +44,13 @@ class NatsTaskProducer:
44
44
  async def initialize(self, context: ApplicationContext):
45
45
  self.context = context
46
46
  await create_nats_stream_if_not_exists(
47
- self.context, self.stream.name, subjects=[self.stream.subject] # type: ignore
47
+ self.context,
48
+ self.stream.name, # type: ignore
49
+ subjects=[self.stream.subject], # type: ignore
48
50
  )
49
51
  self.initialized = True
50
52
 
51
- async def __call__(self, msg: MsgType) -> int: # type: ignore
53
+ async def __call__(self, msg: MsgType) -> int:
52
54
  """
53
55
  Publish message to the producer's nats stream.
54
56
  Returns the sequence number of the published message.
@@ -57,7 +59,8 @@ class NatsTaskProducer:
57
59
  raise RuntimeError("NatsTaskProducer not initialized")
58
60
  try:
59
61
  pub_ack = await self.context.nats_manager.js.publish( # type: ignore
60
- self.stream.subject, msg.json().encode("utf-8") # type: ignore
62
+ self.stream.subject, # type: ignore
63
+ msg.model_dump_json().encode("utf-8"), # type: ignore
61
64
  )
62
65
  logger.info(
63
66
  "Message sent to Nats",
@@ -93,8 +96,6 @@ async def get_producer(task_name: str, context: ApplicationContext) -> NatsTaskP
93
96
  task = get_registered_task(task_name)
94
97
  except KeyError:
95
98
  raise ValueError(f"Task {task_name} not registered")
96
- producer = create_producer(
97
- name=f"{task_name}_producer", stream=task.stream, msg_type=task.msg_type
98
- )
99
+ producer = create_producer(name=f"{task_name}_producer", stream=task.stream, msg_type=task.msg_type)
99
100
  await producer.initialize(context)
100
101
  return producer