nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -20,8 +20,7 @@
20
20
  from nucliadb.migrator.context import ExecutionContext
21
21
 
22
22
 
23
- async def migrate(context: ExecutionContext) -> None:
24
- ...
23
+ async def migrate(context: ExecutionContext) -> None: ...
25
24
 
26
25
 
27
26
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
@@ -17,46 +17,11 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import Optional
21
20
 
22
- from nucliadb_protos.resources_pb2 import AllFieldIDs, FieldID
23
-
24
- from nucliadb.common import datamanagers
25
21
  from nucliadb.migrator.context import ExecutionContext
26
- from nucliadb.migrator.migrator import logger
27
-
28
-
29
- async def migrate(context: ExecutionContext) -> None:
30
- ...
31
22
 
32
23
 
33
- async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
34
- async for resource_id in datamanagers.resources.iterate_resource_ids(kbid=kbid):
35
- async with context.kv_driver.transaction() as txn:
36
- resource = await datamanagers.resources.get_resource(
37
- txn, kbid=kbid, rid=resource_id
38
- )
39
- if resource is None:
40
- logger.warning(
41
- f"kb={kbid} rid={resource_id}: resource not found. Skipping..."
42
- )
43
- continue
24
+ async def migrate(context: ExecutionContext) -> None: ...
44
25
 
45
- all_fields: Optional[AllFieldIDs] = await resource.get_all_field_ids()
46
- if all_fields is not None:
47
- logger.warning(
48
- f"kb={kbid} rid={resource_id}: already has all fields key. Skipping..."
49
- )
50
- continue
51
26
 
52
- # Migrate resource
53
- logger.warning(f"kb={kbid} rid={resource_id}: migrating...")
54
- all_fields = AllFieldIDs()
55
- async for (
56
- field_type,
57
- field_id,
58
- ) in resource._deprecated_scan_fields_ids():
59
- fid = FieldID(field_type=field_type, field=field_id)
60
- all_fields.fields.append(fid)
61
- await resource.set_all_field_ids(all_fields)
62
- await txn.commit()
27
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None: ...
@@ -20,8 +20,7 @@
20
20
  from nucliadb.migrator.context import ExecutionContext
21
21
 
22
22
 
23
- async def migrate(context: ExecutionContext) -> None:
24
- ...
23
+ async def migrate(context: ExecutionContext) -> None: ...
25
24
 
26
25
 
27
26
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
@@ -20,8 +20,7 @@
20
20
  from nucliadb.migrator.context import ExecutionContext
21
21
 
22
22
 
23
- async def migrate(context: ExecutionContext) -> None:
24
- ...
23
+ async def migrate(context: ExecutionContext) -> None: ...
25
24
 
26
25
 
27
26
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
@@ -20,9 +20,7 @@
20
20
  from nucliadb.migrator.context import ExecutionContext
21
21
 
22
22
 
23
- async def migrate(context: ExecutionContext) -> None:
24
- ...
23
+ async def migrate(context: ExecutionContext) -> None: ...
25
24
 
26
25
 
27
- async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
28
- ...
26
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None: ...
@@ -28,8 +28,7 @@ from nucliadb.common.cluster.rollover import clean_rollover_status
28
28
  from nucliadb.migrator.context import ExecutionContext
29
29
 
30
30
 
31
- async def migrate(context: ExecutionContext) -> None:
32
- ...
31
+ async def migrate(context: ExecutionContext) -> None: ...
33
32
 
34
33
 
35
34
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
@@ -26,13 +26,14 @@ rollover will do the upgrade automatically.
26
26
 
27
27
  """
28
28
 
29
- from nucliadb.common.cluster.rollover import rollover_kb_shards
30
29
  from nucliadb.migrator.context import ExecutionContext
31
30
 
32
31
 
33
- async def migrate(context: ExecutionContext) -> None:
34
- ...
32
+ async def migrate(context: ExecutionContext) -> None: ...
35
33
 
36
34
 
37
35
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
38
- await rollover_kb_shards(context, kbid)
36
+ """
37
+ We only need 1 rollover migration defined at a time; otherwise, we will
38
+ possibly run many for a kb when we only ever need to run one
39
+ """
@@ -27,24 +27,23 @@ index data loss. Rollover affected KBs
27
27
 
28
28
  import logging
29
29
 
30
- from nucliadb.common.cluster.rollover import rollover_kb_shards
31
30
  from nucliadb.migrator.context import ExecutionContext
32
31
 
33
32
  logger = logging.getLogger(__name__)
34
33
 
35
- AFFECTED_KBS = [
36
- "1efc5a33-bc5a-490c-8b47-b190beee212d",
37
- "f11d6eb9-da5e-4519-ac3d-e304bfa5c354",
38
- "096d9070-f7be-40c8-a24c-19c89072e3ff",
39
- "848f01bc-341a-4346-b473-6b11b76b26eb",
40
- ]
34
+ # AFFECTED_KBS = [
35
+ # "1efc5a33-bc5a-490c-8b47-b190beee212d",
36
+ # "f11d6eb9-da5e-4519-ac3d-e304bfa5c354",
37
+ # "096d9070-f7be-40c8-a24c-19c89072e3ff",
38
+ # "848f01bc-341a-4346-b473-6b11b76b26eb",
39
+ # ]
41
40
 
42
41
 
43
- async def migrate(context: ExecutionContext) -> None:
44
- ...
42
+ async def migrate(context: ExecutionContext) -> None: ...
45
43
 
46
44
 
47
45
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
48
- if kbid in AFFECTED_KBS:
49
- logger.info(f"Rolling over affected KB: {kbid}")
50
- await rollover_kb_shards(context, kbid)
46
+ """
47
+ We only need 1 rollover migration defined at a time; otherwise, we will
48
+ possibly run many for a kb when we only ever need to run one
49
+ """
@@ -26,28 +26,12 @@ Tikv doesn't really like scanning a lot of keys, so we need to materialize the l
26
26
 
27
27
  import logging
28
28
 
29
- from nucliadb.common import datamanagers
30
29
  from nucliadb.migrator.context import ExecutionContext
31
30
 
32
31
  logger = logging.getLogger(__name__)
33
32
 
34
33
 
35
- async def migrate(context: ExecutionContext) -> None:
36
- ...
34
+ async def migrate(context: ExecutionContext) -> None: ...
37
35
 
38
36
 
39
- async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
40
- async with context.kv_driver.transaction() as txn:
41
- labelset_list = await datamanagers.labels._get_labelset_ids(txn, kbid=kbid)
42
- if labelset_list is not None:
43
- logger.info("No need for labelset list migration", extra={"kbid": kbid})
44
- return
45
-
46
- labelset_list = await datamanagers.labels._deprecated_scan_labelset_ids(
47
- txn, kbid=kbid
48
- )
49
- await datamanagers.labels._set_labelset_ids(
50
- txn, kbid=kbid, labelsets=labelset_list
51
- )
52
- logger.info("Labelset list migrated", extra={"kbid": kbid})
53
- await txn.commit()
37
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None: ...
@@ -23,26 +23,20 @@ Allow targeted rollover of KBs
23
23
  """
24
24
 
25
25
  import logging
26
- import os
27
26
 
28
- from nucliadb.common.cluster.rollover import rollover_kb_shards
29
27
  from nucliadb.migrator.context import ExecutionContext
30
28
 
31
29
  logger = logging.getLogger(__name__)
32
30
 
33
31
 
34
- AFFECTED_KBS = [
35
- kbid.strip()
36
- for kbid in os.environ.get("ROLLOVER_KBS", "").split(",")
37
- if kbid.strip()
38
- ]
32
+ # AFFECTED_KBS = [kbid.strip() for kbid in os.environ.get("ROLLOVER_KBS", "").split(",") if kbid.strip()]
39
33
 
40
34
 
41
- async def migrate(context: ExecutionContext) -> None:
42
- ...
35
+ async def migrate(context: ExecutionContext) -> None: ...
43
36
 
44
37
 
45
38
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
46
- if kbid in AFFECTED_KBS:
47
- logger.info(f"Rolling over affected KB: {kbid}")
48
- await rollover_kb_shards(context, kbid)
39
+ """
40
+ We only need 1 rollover migration defined at a time; otherwise, we will
41
+ possibly run many for a kb when we only ever need to run one
42
+ """
@@ -20,9 +20,7 @@
20
20
  from nucliadb.migrator.context import ExecutionContext
21
21
 
22
22
 
23
- async def migrate(context: ExecutionContext) -> None:
24
- ...
23
+ async def migrate(context: ExecutionContext) -> None: ...
25
24
 
26
25
 
27
- async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
28
- ...
26
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None: ...
@@ -23,19 +23,17 @@ Allow targeted rollover of KBs
23
23
  """
24
24
 
25
25
  import logging
26
- import os
27
26
 
28
- from nucliadb.common.cluster.rollover import rollover_kb_shards
29
27
  from nucliadb.migrator.context import ExecutionContext
30
28
 
31
29
  logger = logging.getLogger(__name__)
32
30
 
33
31
 
34
- async def migrate(context: ExecutionContext) -> None:
35
- ...
32
+ async def migrate(context: ExecutionContext) -> None: ...
36
33
 
37
34
 
38
35
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
39
- if os.environ.get("RUNNING_ENVIRONMENT", os.environ.get("ENVIRONMENT")) == "stage":
40
- logger.info(f"Rolling over affected KB: {kbid}")
41
- await rollover_kb_shards(context, kbid)
36
+ """
37
+ We only need 1 rollover migration defined at a time; otherwise, we will
38
+ possibly run many for a kb when we only ever need to run one
39
+ """
@@ -24,26 +24,20 @@ Targeted rollover for a specific KB
24
24
  """
25
25
 
26
26
  import logging
27
- import os
28
27
 
29
- from nucliadb.common.cluster.rollover import rollover_kb_shards
30
28
  from nucliadb.migrator.context import ExecutionContext
31
29
 
32
30
  logger = logging.getLogger(__name__)
33
31
 
34
32
 
35
- AFFECTED_KBS = [
36
- kbid.strip()
37
- for kbid in os.environ.get("ROLLOVER_KBS", "").split(",")
38
- if kbid.strip()
39
- ]
33
+ # AFFECTED_KBS = [kbid.strip() for kbid in os.environ.get("ROLLOVER_KBS", "").split(",") if kbid.strip()]
40
34
 
41
35
 
42
- async def migrate(context: ExecutionContext) -> None:
43
- ...
36
+ async def migrate(context: ExecutionContext) -> None: ...
44
37
 
45
38
 
46
39
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
47
- if kbid in AFFECTED_KBS:
48
- logger.info(f"Rolling over affected KB: {kbid}")
49
- await rollover_kb_shards(context, kbid)
40
+ """
41
+ We only need 1 rollover migration defined at a time; otherwise, we will
42
+ possibly run many for a kb when we only ever need to run one
43
+ """
@@ -25,47 +25,42 @@ Targeted rollover for a specific KBs which still don't have the latest version o
25
25
 
26
26
  import logging
27
27
 
28
- from nucliadb_protos.noderesources_pb2 import ShardCreated
29
-
30
- from nucliadb.common import datamanagers
31
- from nucliadb.common.cluster.rollover import rollover_kb_shards
32
28
  from nucliadb.migrator.context import ExecutionContext
33
29
 
34
30
  logger = logging.getLogger(__name__)
35
31
 
36
32
 
37
- class ShardsObjectNotFound(Exception):
38
- ...
33
+ class ShardsObjectNotFound(Exception): ...
39
34
 
40
35
 
41
- async def migrate(context: ExecutionContext) -> None:
42
- ...
36
+ async def migrate(context: ExecutionContext) -> None: ...
43
37
 
44
38
 
45
39
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
46
- try:
47
- if await has_old_paragraphs_index(context, kbid):
48
- logger.info("Rolling over affected KB", extra={"kbid": kbid})
49
- await rollover_kb_shards(context, kbid)
50
- else:
51
- logger.info(
52
- "KB already has the latest version of the paragraphs index, skipping rollover",
53
- extra={"kbid": kbid},
54
- )
55
- except ShardsObjectNotFound:
56
- logger.warning("KB not found, skipping rollover", extra={"kbid": kbid})
40
+ """
41
+ We only need 1 rollover migration defined at a time; otherwise, we will
42
+ possibly run many for a kb when we only ever need to run one
43
+ """
44
+ # try:
45
+ # if await has_old_paragraphs_index(context, kbid):
46
+ # logger.info("Rolling over affected KB", extra={"kbid": kbid})
47
+ # await rollover_kb_index(context, kbid)
48
+ # else:
49
+ # logger.info(
50
+ # "KB already has the latest version of the paragraphs index, skipping rollover",
51
+ # extra={"kbid": kbid},
52
+ # )
53
+ # except ShardsObjectNotFound:
54
+ # logger.warning("KB not found, skipping rollover", extra={"kbid": kbid})
57
55
 
58
56
 
59
- async def has_old_paragraphs_index(context: ExecutionContext, kbid: str) -> bool:
60
- async with context.kv_driver.transaction(read_only=True) as txn:
61
- shards_object = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
62
- if not shards_object:
63
- raise ShardsObjectNotFound()
64
- for shard in shards_object.shards:
65
- for replica in shard.replicas:
66
- if (
67
- replica.shard.paragraph_service
68
- != ShardCreated.ParagraphService.PARAGRAPH_V2
69
- ):
70
- return True
71
- return False
57
+ # async def has_old_paragraphs_index(context: ExecutionContext, kbid: str) -> bool:
58
+ # async with context.kv_driver.transaction(read_only=True) as txn:
59
+ # shards_object = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=False)
60
+ # if not shards_object:
61
+ # raise ShardsObjectNotFound()
62
+ # for shard in shards_object.shards:
63
+ # for replica in shard.replicas:
64
+ # if replica.shard.paragraph_service != ShardCreated.ParagraphService.PARAGRAPH_V2:
65
+ # return True
66
+ # return False
@@ -37,13 +37,12 @@ from nucliadb.migrator.context import ExecutionContext
37
37
  logger = logging.getLogger(__name__)
38
38
 
39
39
 
40
- async def migrate(context: ExecutionContext) -> None:
41
- ...
40
+ async def migrate(context: ExecutionContext) -> None: ...
42
41
 
43
42
 
44
43
  async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
45
44
  async with context.kv_driver.transaction() as txn:
46
- shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
45
+ shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=True)
47
46
  if shards is None:
48
47
  logger.error("KB without shards", extra={"kbid": kbid})
49
48
  return
@@ -53,9 +52,7 @@ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
53
52
  shards.shards[shards.actual].read_only = False
54
53
 
55
54
  # just ensure we're writing it correctly
56
- assert [shard_object.read_only for shard_object in shards.shards].count(
57
- False
58
- ) == 1
55
+ assert [shard_object.read_only for shard_object in shards.shards].count(False) == 1
59
56
 
60
57
  await datamanagers.cluster.update_kb_shards(txn, kbid=kbid, shards=shards)
61
58
  await txn.commit()
@@ -0,0 +1,59 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """Migration #18
22
+
23
+ Due to a bug on backend services, some kbslugs were not properly deleted and got
24
+ orphan. Let's delete them!
25
+
26
+ """
27
+
28
+ import logging
29
+
30
+ from nucliadb.common import datamanagers
31
+ from nucliadb.common.datamanagers.kb import KB_SLUGS_BASE
32
+ from nucliadb.migrator.context import ExecutionContext
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ async def migrate(context: ExecutionContext) -> None:
38
+ async with context.kv_driver.transaction() as txn:
39
+ async for key in txn.keys(KB_SLUGS_BASE):
40
+ slug = key.replace(KB_SLUGS_BASE, "")
41
+ value = await txn.get(key, for_update=False)
42
+ if value is None:
43
+ # KB with slug but without uuid? Seems wrong, let's remove it too
44
+ logger.info("Removing /kbslugs with empty value", extra={"maindb_key": key})
45
+ await txn.delete(key)
46
+ continue
47
+
48
+ kbid = value.decode()
49
+ if not (await datamanagers.kb.exists_kb(txn, kbid=kbid)):
50
+ # log data too just in case
51
+ logger.info(
52
+ "Removing orphan /kbslugs key",
53
+ extra={"kbid": kbid, "kb_slug": slug, "maindb_key": key},
54
+ )
55
+ await txn.delete(key)
56
+ await txn.commit()
57
+
58
+
59
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None: ...
@@ -0,0 +1,66 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """Migration #19
22
+
23
+ Targeted rollover for a specific KBs which still don't have the latest version of the paragraphs index
24
+ """
25
+
26
+ import logging
27
+
28
+ from nucliadb.migrator.context import ExecutionContext
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class ShardsObjectNotFound(Exception): ...
34
+
35
+
36
+ async def migrate(context: ExecutionContext) -> None: ...
37
+
38
+
39
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
40
+ """
41
+ We only need 1 rollover migration defined at a time; otherwise, we will
42
+ possibly run many for a kb when we only ever need to run one
43
+ """
44
+ # try:
45
+ # if await has_old_paragraphs_index(context, kbid):
46
+ # logger.info("Rolling over affected KB", extra={"kbid": kbid})
47
+ # await rollover_kb_index(context, kbid)
48
+ # else:
49
+ # logger.info(
50
+ # "KB already has the latest version of the paragraphs index, skipping rollover",
51
+ # extra={"kbid": kbid},
52
+ # )
53
+ # except ShardsObjectNotFound:
54
+ # logger.warning("KB not found, skipping rollover", extra={"kbid": kbid})
55
+
56
+
57
+ # async def has_old_paragraphs_index(context: ExecutionContext, kbid: str) -> bool:
58
+ # async with context.kv_driver.transaction(read_only=True) as txn:
59
+ # shards_object = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
60
+ # if not shards_object:
61
+ # raise ShardsObjectNotFound()
62
+ # for shard in shards_object.shards:
63
+ # for replica in shard.replicas:
64
+ # if replica.shard.paragraph_service != ShardCreated.ParagraphService.PARAGRAPH_V3:
65
+ # return True
66
+ # return False
@@ -0,0 +1,83 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """Migration #20
22
+ This migration is for reducing the number of nodes in a cluster.
23
+ Essentially, it is a rollover shards migration only for KBs that have
24
+ shards in the nodes we want to remove from the cluster.
25
+ Will read the DRAIN_NODES envvar to get the list of nodes to drain, and will
26
+ create new shards in the remaining nodes.
27
+ """
28
+
29
+ import logging
30
+
31
+ from nucliadb.common import datamanagers
32
+ from nucliadb.common.cluster.rollover import rollover_kb_index
33
+ from nucliadb.common.cluster.settings import settings as cluster_settings
34
+ from nucliadb.migrator.context import ExecutionContext
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ async def migrate(context: ExecutionContext) -> None: ...
40
+
41
+
42
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
43
+ """
44
+ Rollover KB shards if any of the shards are on the nodes to drain
45
+ """
46
+ drain_node_ids = cluster_settings.drain_nodes
47
+ if len(drain_node_ids) == 0:
48
+ logger.info("Skipping migration because no drain_nodes are set")
49
+ return
50
+
51
+ if not await kb_has_shards_on_drain_nodes(kbid, drain_node_ids):
52
+ logger.info(
53
+ "KB does not have shards on the nodes to drain, skipping rollover",
54
+ extra={"kbid": kbid},
55
+ )
56
+ return
57
+
58
+ logger.info("Rolling over affected KB", extra={"kbid": kbid})
59
+ await rollover_kb_index(context, kbid, drain_nodes=drain_node_ids)
60
+
61
+
62
+ async def kb_has_shards_on_drain_nodes(kbid: str, drain_node_ids: list[str]) -> bool:
63
+ async with datamanagers.with_ro_transaction() as txn:
64
+ shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
65
+ if not shards:
66
+ logger.warning("Shards object not found", extra={"kbid": kbid})
67
+ return False
68
+ shard_in_drain_nodes = False
69
+ for shard in shards.shards:
70
+ for replica in shard.replicas:
71
+ if replica.node in drain_node_ids:
72
+ logger.info(
73
+ "Shard found in drain nodes, will rollover it",
74
+ extra={
75
+ "kbid": kbid,
76
+ "logical_shard": shard.shard,
77
+ "replica_shard_id": replica.shard.id,
78
+ "node": replica.node,
79
+ "drain_node_ids": drain_node_ids,
80
+ },
81
+ )
82
+ shard_in_drain_nodes = True
83
+ return shard_in_drain_nodes
@@ -17,29 +17,28 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from unittest import mock
21
20
 
22
- import pytest
21
+ """Migration #21
23
22
 
24
- from nucliadb.standalone.run import run, run_async_nucliadb
25
- from nucliadb.standalone.settings import Settings
23
+ With the new vectorsets implementation, we need to store some information on
24
+ maindb. As the key "/kbs/{kbid}/vectorsets" was already used at some point, this
25
+ migration will ensure to overwrite the key and set the new value
26
26
 
27
+ """
27
28
 
28
- @pytest.fixture(scope="function", autouse=True)
29
- def mocked_deps():
30
- with mock.patch("uvicorn.Server.run"), mock.patch(
31
- "pydantic_argparse.ArgumentParser.parse_typed_args", return_value=Settings()
32
- ), mock.patch(
33
- "nucliadb.standalone.run.get_latest_nucliadb", return_value="1.0.0"
34
- ), mock.patch(
35
- "uvicorn.Server.startup"
36
- ):
37
- yield
29
+ import logging
38
30
 
31
+ from nucliadb.common import datamanagers
32
+ from nucliadb.migrator.context import ExecutionContext
39
33
 
40
- def test_run():
41
- run()
34
+ logger = logging.getLogger(__name__)
42
35
 
43
36
 
44
- async def test_run_async_nucliadb():
45
- await run_async_nucliadb(Settings())
37
+ async def migrate(context: ExecutionContext) -> None: ...
38
+
39
+
40
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
41
+ async with context.kv_driver.transaction() as txn:
42
+ logger.info(f"Overwriting vectorsets key", extra={"kbid": kbid})
43
+ await datamanagers.vectorsets.initialize(txn, kbid=kbid)
44
+ await txn.commit()