nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -0,0 +1,260 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ import os
22
+ from typing import Optional
23
+
24
+ from nidx_protos.nidx_pb2_grpc import NidxApiStub, NidxSearcherStub
25
+
26
+ from nucliadb.common.cluster.base import AbstractIndexNode
27
+ from nucliadb.common.cluster.settings import settings
28
+ from nucliadb.ingest.settings import DriverConfig
29
+ from nucliadb.ingest.settings import settings as ingest_settings
30
+ from nucliadb_protos.nodewriter_pb2 import (
31
+ IndexMessage,
32
+ )
33
+ from nucliadb_utils import logger
34
+ from nucliadb_utils.grpc import get_traced_grpc_channel
35
+ from nucliadb_utils.nats import NatsConnectionManager
36
+ from nucliadb_utils.settings import FileBackendConfig, indexing_settings, storage_settings
37
+ from nucliadb_utils.storages.settings import settings as extended_storage_settings
38
+ from nucliadb_utils.utilities import Utility, clean_utility, get_utility, set_utility
39
+
40
+ NIDX_ENABLED = bool(os.environ.get("NIDX_ENABLED"))
41
+
42
+
43
+ class NidxUtility:
44
+ api_client = None
45
+ searcher_client = None
46
+
47
+ async def initialize(self):
48
+ raise NotImplementedError()
49
+
50
+ async def finalize(self):
51
+ raise NotImplementedError()
52
+
53
+ async def index(self, msg: IndexMessage) -> int:
54
+ raise NotImplementedError()
55
+
56
+ def wait_for_sync(self):
57
+ pass
58
+
59
+
60
+ def _storage_config(prefix: str, bucket: Optional[str]) -> dict[str, str]:
61
+ config = {}
62
+ if storage_settings.file_backend == FileBackendConfig.LOCAL:
63
+ local_bucket = bucket or storage_settings.local_indexing_bucket
64
+ file_path = f"{storage_settings.local_files}/{local_bucket}"
65
+ os.makedirs(file_path, exist_ok=True)
66
+
67
+ config[f"{prefix}__OBJECT_STORE"] = "file"
68
+ config[f"{prefix}__FILE_PATH"] = file_path
69
+ elif storage_settings.file_backend == FileBackendConfig.GCS:
70
+ gcs_bucket = bucket or extended_storage_settings.gcs_indexing_bucket
71
+ config[f"{prefix}__OBJECT_STORE"] = "gcs"
72
+ if gcs_bucket:
73
+ config[f"{prefix}__BUCKET"] = gcs_bucket
74
+ if storage_settings.gcs_base64_creds:
75
+ config[f"{prefix}__BASE64_CREDS"] = storage_settings.gcs_base64_creds
76
+ if storage_settings.gcs_endpoint_url:
77
+ config[f"{prefix}__ENDPOINT"] = storage_settings.gcs_endpoint_url
78
+ elif storage_settings.file_backend == FileBackendConfig.S3:
79
+ s3_bucket = bucket or extended_storage_settings.s3_indexing_bucket
80
+ config[f"{prefix}__OBJECT_STORE"] = "s3"
81
+ if s3_bucket:
82
+ config[f"{prefix}__BUCKET"] = s3_bucket
83
+ config[f"{prefix}__CLIENT_ID"] = storage_settings.s3_client_id or ""
84
+ config[f"{prefix}__CLIENT_SECRET"] = storage_settings.s3_client_secret or ""
85
+ config[f"{prefix}__REGION_NAME"] = storage_settings.s3_region_name or ""
86
+ if storage_settings.s3_endpoint:
87
+ config[f"{prefix}__ENDPOINT"] = storage_settings.s3_endpoint
88
+
89
+ return config
90
+
91
+
92
+ class NidxBindingUtility(NidxUtility):
93
+ """Implements Nidx utility using the binding"""
94
+
95
+ def __init__(self):
96
+ if ingest_settings.driver != DriverConfig.PG:
97
+ raise ValueError("nidx_binding requires DRIVER=pg")
98
+
99
+ self.config = {
100
+ "METADATA__DATABASE_URL": ingest_settings.driver_pg_url,
101
+ **_storage_config("INDEXER", None),
102
+ **_storage_config("STORAGE", "nidx"),
103
+ }
104
+
105
+ async def initialize(self):
106
+ import nidx_binding # type: ignore
107
+
108
+ self.binding = nidx_binding.NidxBinding(self.config)
109
+ self.api_client = NidxApiStub(
110
+ get_traced_grpc_channel(f"localhost:{self.binding.api_port}", "nidx_api")
111
+ )
112
+ self.searcher_client = NidxSearcherStub(
113
+ get_traced_grpc_channel(f"localhost:{self.binding.searcher_port}", "nidx_searcher")
114
+ )
115
+
116
+ async def finalize(self):
117
+ del self.binding
118
+
119
+ async def index(self, msg: IndexMessage) -> int:
120
+ return self.binding.index(msg.SerializeToString())
121
+
122
+ def wait_for_sync(self):
123
+ self.binding.wait_for_sync()
124
+
125
+
126
+ class NidxServiceUtility(NidxUtility):
127
+ """Implements Nidx utility connecting to the network service"""
128
+
129
+ def __init__(self):
130
+ if indexing_settings.index_nidx_subject is None:
131
+ raise ValueError("INDEX_NIDX_SUBJECT needed for nidx utility")
132
+
133
+ if not settings.nidx_api_address or not settings.nidx_searcher_address:
134
+ raise ValueError("NIDX_API_ADDRESS and NIDX_SEARCHER_ADDRESS are required")
135
+
136
+ self.nats_connection_manager = NatsConnectionManager(
137
+ service_name="NidxIndexer",
138
+ nats_servers=indexing_settings.index_jetstream_servers,
139
+ nats_creds=indexing_settings.index_jetstream_auth,
140
+ )
141
+ self.subject = indexing_settings.index_nidx_subject
142
+
143
+ async def initialize(self):
144
+ await self.nats_connection_manager.initialize()
145
+ self.api_client = NidxApiStub(get_traced_grpc_channel(settings.nidx_api_address, "nidx_api"))
146
+ self.searcher_client = NidxSearcherStub(
147
+ get_traced_grpc_channel(settings.nidx_searcher_address, "nidx_searcher")
148
+ )
149
+
150
+ async def finalize(self):
151
+ await self.nats_connection_manager.finalize()
152
+
153
+ async def index(self, writer: IndexMessage) -> int:
154
+ res = await self.nats_connection_manager.js.publish(self.subject, writer.SerializeToString())
155
+ logger.info(
156
+ f" = Pushed message to nidx shard: {writer.shard}, txid: {writer.txid} seqid: {res.seq}" # noqa
157
+ )
158
+ return res.seq
159
+
160
+
161
+ async def start_nidx_utility() -> Optional[NidxUtility]:
162
+ if not NIDX_ENABLED:
163
+ return None
164
+
165
+ nidx = get_nidx()
166
+ if nidx:
167
+ return nidx
168
+
169
+ nidx_utility: NidxUtility
170
+ if settings.standalone_mode:
171
+ nidx_utility = NidxBindingUtility()
172
+ else:
173
+ nidx_utility = NidxServiceUtility()
174
+
175
+ await nidx_utility.initialize()
176
+ set_utility(Utility.NIDX, nidx_utility)
177
+ return nidx_utility
178
+
179
+
180
+ async def stop_nidx_utility():
181
+ nidx_utility = get_nidx()
182
+ if nidx_utility:
183
+ clean_utility(Utility.NIDX)
184
+ await nidx_utility.finalize()
185
+
186
+
187
+ def get_nidx() -> Optional[NidxUtility]:
188
+ return get_utility(Utility.NIDX)
189
+
190
+
191
+ def get_nidx_api_client() -> Optional["NidxApiStub"]:
192
+ nidx = get_nidx()
193
+ if nidx:
194
+ return nidx.api_client
195
+ else:
196
+ return None
197
+
198
+
199
+ def get_nidx_searcher_client() -> Optional["NidxSearcherStub"]:
200
+ nidx = get_nidx()
201
+ if nidx:
202
+ return nidx.searcher_client
203
+ else:
204
+ return None
205
+
206
+
207
+ # TODO: Remove the index node abstraction
208
+ class NodeNidxAdapter:
209
+ def __init__(self, api_client, searcher_client):
210
+ # API methods
211
+ self.GetShard = api_client.GetShard
212
+ self.NewShard = api_client.NewShard
213
+ self.DeleteShard = api_client.DeleteShard
214
+ self.ListShards = api_client.ListShards
215
+ self.AddVectorSet = api_client.AddVectorSet
216
+ self.RemoveVectorSet = api_client.RemoveVectorSet
217
+ self.ListVectorSets = api_client.ListVectorSets
218
+ self.GetMetadata = api_client.GetMetadata
219
+
220
+ # Searcher methods
221
+ self.Search = searcher_client.Search
222
+ self.Suggest = searcher_client.Suggest
223
+ self.Paragraphs = searcher_client.Paragraphs
224
+ self.Documents = searcher_client.Documents
225
+
226
+
227
+ class FakeNode(AbstractIndexNode):
228
+ def __init__(self, api_client, searcher_client):
229
+ self.client = NodeNidxAdapter(api_client, searcher_client)
230
+
231
+ @property
232
+ def reader(self):
233
+ return self.client
234
+
235
+ @property
236
+ def writer(self):
237
+ return self.client
238
+
239
+ def is_read_replica(_):
240
+ return False
241
+
242
+ @property
243
+ def id(self):
244
+ return "nidx"
245
+
246
+ @property
247
+ def address(self):
248
+ return "nidx"
249
+
250
+ @property
251
+ def primary_id(self):
252
+ return "nidx"
253
+
254
+
255
+ def get_nidx_fake_node() -> Optional[FakeNode]:
256
+ nidx = get_nidx()
257
+ if nidx:
258
+ return FakeNode(nidx.api_client, nidx.searcher_client)
259
+ else:
260
+ return None
@@ -17,6 +17,7 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ import json
20
21
  from datetime import datetime
21
22
  from typing import AsyncGenerator, Union
22
23
 
@@ -53,25 +54,38 @@ class ExportImportDataManager:
53
54
 
54
55
  async def get_metadata(self, type: str, kbid: str, id: str) -> Metadata:
55
56
  key = self._get_maindb_metadata_key(type, kbid, id)
56
- async with self.driver.transaction() as txn:
57
+ async with self.driver.transaction(read_only=True) as txn:
57
58
  data = await txn.get(key)
58
59
  if data is None or data == b"":
59
60
  raise MetadataNotFound()
60
61
  decoded = data.decode("utf-8")
61
- model_type = {
62
- "export": ExportMetadata,
63
- "import": ImportMetadata,
64
- }[type]
65
- return model_type.parse_raw(decoded) # type: ignore
62
+ if type == "export":
63
+ model_type = ExportMetadata
64
+ elif type == "import":
65
+ model_type = ImportMetadata # type: ignore
66
+ else:
67
+ raise ValueError(f"Invalid type: {type}")
68
+ json_decoded = json.loads(decoded)
69
+
70
+ # For some reason, the total and processed fields are not always present in the metadata.
71
+ # This is to unblock already created exports that hit this bug.
72
+ if json_decoded.get("total") is None:
73
+ json_decoded["total"] = 0
74
+ if json_decoded.get("processed") is None:
75
+ json_decoded["processed"] = 0
76
+
77
+ return model_type.model_validate(json_decoded)
66
78
 
67
79
  async def set_metadata(
68
80
  self,
69
81
  type: str,
70
82
  metadata: Metadata,
71
83
  ):
84
+ metadata.processed = metadata.processed or 0
85
+ metadata.total = metadata.total or 0
72
86
  metadata.modified = datetime.utcnow()
73
87
  key = self._get_maindb_metadata_key(type, metadata.kbid, metadata.id)
74
- data = metadata.json().encode("utf-8")
88
+ data = metadata.model_dump_json().encode("utf-8")
75
89
  async with self.driver.transaction() as txn:
76
90
  await txn.set(key, data)
77
91
  await txn.commit()
@@ -97,9 +111,7 @@ class ExportImportDataManager:
97
111
  await self.storage.uploaditerator(export_bytes, field, cf)
98
112
  return cf.size
99
113
 
100
- async def download_export(
101
- self, kbid: str, export_id: str
102
- ) -> AsyncGenerator[bytes, None]:
114
+ async def download_export(self, kbid: str, export_id: str) -> AsyncGenerator[bytes, None]:
103
115
  key = STORAGE_EXPORT_KEY.format(export_id=export_id)
104
116
  bucket = self.storage.get_bucket_name(kbid)
105
117
  async for chunk in self.storage.download(bucket, key):
@@ -125,13 +137,9 @@ class ExportImportDataManager:
125
137
  async for chunk in self.storage.download(bucket, key):
126
138
  yield chunk
127
139
 
128
- def _get_storage_field(
129
- self, kbid: str, key: str, cf: resources_pb2.CloudFile
130
- ) -> StorageField:
140
+ def _get_storage_field(self, kbid: str, key: str, cf: resources_pb2.CloudFile) -> StorageField:
131
141
  bucket = self.storage.get_bucket_name(kbid)
132
- return self.storage.field_klass(
133
- storage=self.storage, bucket=bucket, fullkey=key, field=cf
134
- )
142
+ return self.storage.field_klass(storage=self.storage, bucket=bucket, fullkey=key, field=cf)
135
143
 
136
144
  async def delete_import(self, kbid: str, import_id: str):
137
145
  key = STORAGE_IMPORT_KEY.format(import_id=import_id)
@@ -151,6 +159,4 @@ class ExportImportDataManager:
151
159
  await func(kbid, id)
152
160
  except Exception as ex:
153
161
  errors.capture_exception(ex)
154
- logger.exception(
155
- f"Could not delete {type} {id} from storage", extra={"kbid": kbid}
156
- )
162
+ logger.exception(f"Could not delete {type} {id} from storage", extra={"kbid": kbid})
@@ -70,3 +70,11 @@ class WrongExportStreamFormat(Exception):
70
70
  """
71
71
 
72
72
  pass
73
+
74
+
75
+ class IncompatibleExport(Exception):
76
+ """
77
+ Raised when trying to import an export file that is incompatible with the destination knowledgebox.
78
+ """
79
+
80
+ pass
@@ -35,6 +35,7 @@ from nucliadb.export_import.utils import (
35
35
  get_cloud_files,
36
36
  get_entities,
37
37
  get_labels,
38
+ get_learning_config,
38
39
  iter_kb_resource_uuids,
39
40
  )
40
41
  from nucliadb_protos import writer_pb2
@@ -51,6 +52,9 @@ async def export_kb(
51
52
 
52
53
  If a metadata object is provided, uses it to resume the export if it was interrupted.
53
54
  """
55
+ async for chunk in export_learning_config(kbid):
56
+ yield chunk
57
+
54
58
  resources_iterator = export_resources(context, kbid)
55
59
  if metadata is not None:
56
60
  assert metadata.kbid == kbid
@@ -66,9 +70,7 @@ async def export_kb(
66
70
  yield chunk
67
71
 
68
72
 
69
- async def export_kb_to_blob_storage(
70
- context: ApplicationContext, msg: NatsTaskMessage
71
- ) -> None:
73
+ async def export_kb_to_blob_storage(context: ApplicationContext, msg: NatsTaskMessage) -> None:
72
74
  """
73
75
  Exports the data of a knowledgebox to the blob storage service.
74
76
  """
@@ -86,7 +88,7 @@ async def export_kb_to_blob_storage(
86
88
  export_size = await upload_export_retried(iterator, kbid, export_id)
87
89
 
88
90
  # Store export size
89
- metadata.total = metadata.processed = export_size
91
+ metadata.total = metadata.processed = export_size or 0
90
92
  await dm.set_metadata("export", metadata)
91
93
 
92
94
 
@@ -103,9 +105,7 @@ async def export_resources(
103
105
  yield chunk
104
106
 
105
107
 
106
- async def export_resources_resumable(
107
- context, metadata: ExportMetadata
108
- ) -> AsyncGenerator[bytes, None]:
108
+ async def export_resources_resumable(context, metadata: ExportMetadata) -> AsyncGenerator[bytes, None]:
109
109
  dm = ExportImportDataManager(context.kv_driver, context.blob_storage)
110
110
 
111
111
  kbid = metadata.kbid
@@ -189,3 +189,16 @@ async def export_labels(
189
189
  yield ExportedItemType.LABELS.encode("utf-8")
190
190
  yield len(data).to_bytes(4, byteorder="big")
191
191
  yield data
192
+
193
+
194
+ async def export_learning_config(
195
+ kbid: str,
196
+ ) -> AsyncGenerator[bytes, None]:
197
+ lconfig = await get_learning_config(kbid)
198
+ if lconfig is None:
199
+ logger.warning(f"No learning configuration found for kbid", extra={"kbid": kbid})
200
+ return
201
+ data = lconfig.model_dump_json().encode("utf-8")
202
+ yield ExportedItemType.LEARNING_CONFIG.encode("utf-8")
203
+ yield len(data).to_bytes(4, byteorder="big")
204
+ yield data
@@ -28,9 +28,7 @@ from nucliadb.export_import.models import (
28
28
  NatsTaskMessage,
29
29
  )
30
30
  from nucliadb.export_import.utils import (
31
- ExportStream,
32
31
  ExportStreamReader,
33
- IteratorExportStream,
34
32
  TaskRetryHandler,
35
33
  import_binary,
36
34
  import_broker_message,
@@ -47,7 +45,7 @@ BinaryStreamGenerator = Callable[[int], BinaryStream]
47
45
  async def import_kb(
48
46
  context: ApplicationContext,
49
47
  kbid: str,
50
- stream: ExportStream,
48
+ stream: AsyncGenerator[bytes, None],
51
49
  metadata: Optional[ImportMetadata] = None,
52
50
  ) -> None:
53
51
  """
@@ -91,28 +89,25 @@ async def import_kb(
91
89
  await dm.set_metadata("import", metadata)
92
90
 
93
91
  if metadata is not None:
94
- metadata.processed = stream_reader.read_bytes
92
+ metadata.processed = stream_reader.read_bytes or 0
95
93
  await dm.set_metadata("import", metadata)
96
94
 
97
95
 
98
- async def import_kb_from_blob_storage(
99
- context: ApplicationContext, msg: NatsTaskMessage
100
- ):
96
+ async def import_kb_from_blob_storage(context: ApplicationContext, msg: NatsTaskMessage):
101
97
  """
102
98
  Imports to a knowledgebox from an export stored in the blob storage service.
103
99
  """
104
100
  kbid, import_id = msg.kbid, msg.id
105
101
  dm = ExportImportDataManager(context.kv_driver, context.blob_storage)
106
102
  metadata = await dm.get_metadata(type="import", kbid=kbid, id=import_id)
107
- iterator = dm.download_import(kbid, import_id)
108
- stream = IteratorExportStream(iterator)
109
103
 
110
104
  retry_handler = TaskRetryHandler("import", dm, metadata)
111
105
 
112
106
  @retry_handler.wrap
113
- async def import_kb_retried(context, kbid, stream, metadata):
107
+ async def import_kb_retried(context, kbid, metadata):
108
+ stream = dm.download_import(kbid, import_id)
114
109
  await import_kb(context, kbid, stream, metadata)
115
110
 
116
- await import_kb_retried(context, kbid, stream, metadata) # type: ignore
111
+ await import_kb_retried(context, kbid, metadata)
117
112
 
118
113
  await dm.try_delete_from_storage("import", kbid, import_id)
@@ -17,7 +17,7 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from datetime import datetime
20
+ import datetime
21
21
  from enum import Enum
22
22
  from typing import Any
23
23
 
@@ -36,6 +36,7 @@ class ExportedItemType(str, Enum):
36
36
  LABELS = "LAB"
37
37
  ENTITIES = "ENT"
38
38
  BINARY = "BIN"
39
+ LEARNING_CONFIG = "LEA"
39
40
 
40
41
 
41
42
  ExportItem = tuple[ExportedItemType, Any]
@@ -56,8 +57,8 @@ class Metadata(BaseModel):
56
57
  task: TaskMetadata = TaskMetadata(status=Status.SCHEDULED)
57
58
  total: int = 0
58
59
  processed: int = 0
59
- created: datetime = datetime.utcnow()
60
- modified: datetime = datetime.utcnow()
60
+ created: datetime.datetime = datetime.datetime.now(datetime.timezone.utc)
61
+ modified: datetime.datetime = datetime.datetime.now(datetime.timezone.utc)
61
62
 
62
63
 
63
64
  class ExportMetadata(Metadata):
@@ -65,8 +66,7 @@ class ExportMetadata(Metadata):
65
66
  exported_resources: list[str] = list()
66
67
 
67
68
 
68
- class ImportMetadata(Metadata):
69
- ...
69
+ class ImportMetadata(Metadata): ...
70
70
 
71
71
 
72
72
  class NatsTaskMessage(BaseModel):
@@ -32,7 +32,7 @@ def get_exports_consumer() -> NatsTaskConsumer:
32
32
  name="exports_consumer",
33
33
  stream=const.Streams.KB_EXPORTS, # type: ignore
34
34
  callback=export_kb_to_blob_storage, # type: ignore
35
- msg_type=NatsTaskMessage, # type: ignore
35
+ msg_type=NatsTaskMessage,
36
36
  max_concurrent_messages=10,
37
37
  )
38
38
 
@@ -41,7 +41,7 @@ async def get_exports_producer(context: ApplicationContext) -> NatsTaskProducer:
41
41
  producer = create_producer(
42
42
  name="exports_producer",
43
43
  stream=const.Streams.KB_EXPORTS, # type: ignore
44
- msg_type=NatsTaskMessage, # type: ignore
44
+ msg_type=NatsTaskMessage,
45
45
  )
46
46
  await producer.initialize(context)
47
47
  return producer
@@ -52,7 +52,7 @@ def get_imports_consumer() -> NatsTaskConsumer:
52
52
  name="imports_consumer",
53
53
  stream=const.Streams.KB_IMPORTS, # type: ignore
54
54
  callback=import_kb_from_blob_storage, # type: ignore
55
- msg_type=NatsTaskMessage, # type: ignore
55
+ msg_type=NatsTaskMessage,
56
56
  max_concurrent_messages=10,
57
57
  )
58
58
 
@@ -61,7 +61,7 @@ async def get_imports_producer(context: ApplicationContext) -> NatsTaskProducer:
61
61
  producer = create_producer(
62
62
  name="imports_producer",
63
63
  stream=const.Streams.KB_IMPORTS, # type: ignore
64
- msg_type=NatsTaskMessage, # type: ignore
64
+ msg_type=NatsTaskMessage,
65
65
  )
66
66
  await producer.initialize(context)
67
67
  return producer