nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/writer/tus/s3.py CHANGED
@@ -19,21 +19,22 @@
19
19
  #
20
20
  from __future__ import annotations
21
21
 
22
+ import base64
22
23
  import uuid
23
24
  from contextlib import AsyncExitStack
24
- from typing import AsyncIterator, Optional
25
+ from typing import Optional
25
26
 
26
27
  import aiobotocore # type: ignore
27
28
  import aiohttp
28
- import backoff # type: ignore
29
+ import backoff
29
30
  import botocore # type: ignore
30
31
  from aiobotocore.session import AioSession # type: ignore
31
- from nucliadb_protos.resources_pb2 import CloudFile
32
32
 
33
33
  from nucliadb.writer import logger
34
34
  from nucliadb.writer.tus.dm import FileDataManager
35
- from nucliadb.writer.tus.exceptions import CloudFileNotFound, ResumableURINotAvailable
35
+ from nucliadb.writer.tus.exceptions import ResumableURINotAvailable
36
36
  from nucliadb.writer.tus.storage import BlobStore, FileStorageManager
37
+ from nucliadb_protos.resources_pb2 import CloudFile
37
38
  from nucliadb_utils.storages.s3 import (
38
39
  CHUNK_SIZE,
39
40
  MIN_UPLOAD_SIZE,
@@ -53,9 +54,7 @@ class S3FileStorageManager(FileStorageManager):
53
54
  chunk_size = CHUNK_SIZE
54
55
  min_upload_size = MIN_UPLOAD_SIZE
55
56
 
56
- @backoff.on_exception(
57
- backoff.expo, RETRIABLE_EXCEPTIONS, jitter=backoff.random_jitter, max_tries=3
58
- )
57
+ @backoff.on_exception(backoff.expo, RETRIABLE_EXCEPTIONS, jitter=backoff.random_jitter, max_tries=3)
59
58
  async def _abort_multipart(self, dm: FileDataManager):
60
59
  try:
61
60
  mpu = dm.get("mpu")
@@ -72,21 +71,25 @@ class S3FileStorageManager(FileStorageManager):
72
71
  if dm.get("mpu") is not None:
73
72
  await self._abort_multipart(dm)
74
73
 
74
+ custom_metadata: dict[str, str] = {
75
+ "base64_filename": base64.b64encode((dm.filename or "").encode()).decode(),
76
+ "content_type": dm.content_type or "",
77
+ "size": str(dm.size),
78
+ }
79
+
75
80
  await dm.update(
76
81
  path=path,
77
82
  upload_file_id=upload_file_id,
78
83
  multipart={"Parts": []},
79
84
  block=1,
80
- mpu=await self._create_multipart(path, bucket),
85
+ mpu=await self._create_multipart(path, bucket, custom_metadata),
81
86
  bucket=bucket,
82
87
  )
83
88
 
84
- @backoff.on_exception(
85
- backoff.expo, RETRIABLE_EXCEPTIONS, jitter=backoff.random_jitter, max_tries=3
86
- )
87
- async def _create_multipart(self, path, bucket):
89
+ @backoff.on_exception(backoff.expo, RETRIABLE_EXCEPTIONS, jitter=backoff.random_jitter, max_tries=3)
90
+ async def _create_multipart(self, path, bucket, custom_metadata: dict[str, str]):
88
91
  return await self.storage._s3aioclient.create_multipart_upload(
89
- Bucket=bucket, Key=path
92
+ Bucket=bucket, Key=path, Metadata=custom_metadata
90
93
  )
91
94
 
92
95
  async def append(self, dm: FileDataManager, iterable, offset) -> int:
@@ -96,16 +99,12 @@ class S3FileStorageManager(FileStorageManager):
96
99
  size += len(chunk)
97
100
  part = await self._upload_part(dm, chunk)
98
101
  multipart = dm.get("multipart")
99
- multipart["Parts"].append(
100
- {"PartNumber": dm.get("block"), "ETag": part["ETag"]}
101
- )
102
+ multipart["Parts"].append({"PartNumber": dm.get("block"), "ETag": part["ETag"]})
102
103
  await dm.update(multipart=multipart, block=dm.get("block") + 1)
103
104
 
104
105
  return size
105
106
 
106
- @backoff.on_exception(
107
- backoff.expo, RETRIABLE_EXCEPTIONS, jitter=backoff.random_jitter, max_tries=3
108
- )
107
+ @backoff.on_exception(backoff.expo, RETRIABLE_EXCEPTIONS, jitter=backoff.random_jitter, max_tries=3)
109
108
  async def _upload_part(self, dm: FileDataManager, data):
110
109
  mpu = dm.get("mpu")
111
110
  if mpu is None:
@@ -128,18 +127,14 @@ class S3FileStorageManager(FileStorageManager):
128
127
  await dm.finish()
129
128
  return path
130
129
 
131
- @backoff.on_exception(
132
- backoff.expo, RETRIABLE_EXCEPTIONS, jitter=backoff.random_jitter, max_tries=3
133
- )
130
+ @backoff.on_exception(backoff.expo, RETRIABLE_EXCEPTIONS, jitter=backoff.random_jitter, max_tries=3)
134
131
  async def _complete_multipart_upload(self, dm: FileDataManager):
135
132
  # if blocks is 0, it means the file is of zero length so we need to
136
133
  # trick it to finish a multiple part with no data.
137
134
  if dm.get("block") == 1:
138
135
  part = await self._upload_part(dm, b"")
139
136
  multipart = dm.get("multipart")
140
- multipart["Parts"].append(
141
- {"PartNumber": dm.get("block"), "ETag": part["ETag"]}
142
- )
137
+ multipart["Parts"].append({"PartNumber": dm.get("block"), "ETag": part["ETag"]})
143
138
  await dm.update(multipart=multipart, block=dm.get("block") + 1)
144
139
  await self.storage._s3aioclient.complete_multipart_upload(
145
140
  Bucket=dm.get("bucket"),
@@ -148,45 +143,10 @@ class S3FileStorageManager(FileStorageManager):
148
143
  MultipartUpload=dm.get("multipart"),
149
144
  )
150
145
 
151
- @backoff.on_exception(
152
- backoff.expo, RETRIABLE_EXCEPTIONS, jitter=backoff.random_jitter, max_tries=3
153
- )
146
+ @backoff.on_exception(backoff.expo, RETRIABLE_EXCEPTIONS, jitter=backoff.random_jitter, max_tries=3)
154
147
  async def _download(self, uri: str, kbid: str, **kwargs):
155
148
  bucket = self.storage.get_bucket_name(kbid)
156
- return await self.storage._s3aioclient.get_object(
157
- Bucket=bucket, Key=uri, **kwargs
158
- )
159
-
160
- async def iter_data(
161
- self, uri: str, kbid: str, headers: Optional[dict[str, str]] = None
162
- ):
163
- if headers is None:
164
- headers = {}
165
- try:
166
- downloader = await self._download(uri, kbid, **headers)
167
- except self.storage._s3aioclient.exceptions.NoSuchKey:
168
- raise CloudFileNotFound()
169
-
170
- # we do not want to timeout ever from this...
171
- # downloader['Body'].set_socket_timeout(999999)
172
- stream = downloader["Body"]
173
- data = await stream.read(CHUNK_SIZE)
174
- while True:
175
- if not data:
176
- break
177
- yield data
178
- data = await stream.read(CHUNK_SIZE)
179
-
180
- async def read_range(
181
- self, uri, kbid: str, start: int, end: int
182
- ) -> AsyncIterator[bytes]:
183
- """
184
- Iterate through ranges of data
185
- """
186
- async for chunk in self.iter_data(
187
- uri, kbid, headers={"Range": f"bytes={start}-{end - 1}"}
188
- ):
189
- yield chunk
149
+ return await self.storage._s3aioclient.get_object(Bucket=bucket, Key=uri, **kwargs)
190
150
 
191
151
  async def delete_upload(self, uri: str, kbid: str):
192
152
  bucket = self.storage.get_bucket_name(kbid)
@@ -198,6 +158,10 @@ class S3FileStorageManager(FileStorageManager):
198
158
  else:
199
159
  raise AttributeError("No valid uri")
200
160
 
161
+ def validate_intermediate_chunk(self, uploaded_bytes: int):
162
+ if uploaded_bytes % self.min_upload_size != 0:
163
+ raise ValueError(f"Intermediate chunks need to be multiples of {self.min_upload_size} bytes")
164
+
201
165
 
202
166
  class S3BlobStore(BlobStore):
203
167
  async def check_exists(self, bucket_name: str) -> bool:
@@ -213,9 +177,7 @@ class S3BlobStore(BlobStore):
213
177
  async def create_bucket(self, bucket):
214
178
  exists = await self.check_exists(bucket)
215
179
  if not exists:
216
- await create_bucket(
217
- self._s3aioclient, bucket, self.bucket_tags, self.region_name
218
- )
180
+ await create_bucket(self._s3aioclient, bucket, self.bucket_tags, self.region_name)
219
181
  return exists
220
182
 
221
183
  async def finalize(self):
@@ -247,9 +209,7 @@ class S3BlobStore(BlobStore):
247
209
  verify=verify_ssl,
248
210
  use_ssl=ssl,
249
211
  region_name=region_name,
250
- config=aiobotocore.config.AioConfig(
251
- None, max_pool_connections=max_pool_connections
252
- ),
212
+ config=aiobotocore.config.AioConfig(None, max_pool_connections=max_pool_connections),
253
213
  )
254
214
  session = AioSession()
255
215
  self._s3aioclient = await self._exit_stack.enter_async_context(
@@ -21,15 +21,8 @@ from __future__ import annotations
21
21
 
22
22
  from typing import AsyncIterator, Optional
23
23
 
24
- from lru import LRU # type: ignore
25
- from nucliadb_protos.resources_pb2 import CloudFile
26
- from starlette.responses import StreamingResponse
27
-
28
- from nucliadb.writer import logger
29
24
  from nucliadb.writer.tus.dm import FileDataManager
30
- from nucliadb.writer.tus.exceptions import HTTPRangeNotSatisfiable
31
-
32
- CACHED_BUCKETS = LRU(50) # type: ignore
25
+ from nucliadb_protos.resources_pb2 import CloudFile
33
26
 
34
27
 
35
28
  class BlobStore:
@@ -56,14 +49,9 @@ class FileStorageManager:
56
49
  chunk_size: int
57
50
  min_upload_size: Optional[int] = None
58
51
 
59
- def __init__(self, storage):
52
+ def __init__(self, storage: BlobStore):
60
53
  self.storage = storage
61
54
 
62
- def read_range(
63
- self, uri: str, kbid: str, start: int, end: int
64
- ) -> AsyncIterator[bytes]:
65
- raise NotImplementedError()
66
-
67
55
  def iter_data(
68
56
  self, uri: str, kbid: str, headers: Optional[dict[str, str]] = None
69
57
  ) -> AsyncIterator[bytes]:
@@ -81,48 +69,6 @@ class FileStorageManager:
81
69
  async def delete_upload(self, uri, kbid):
82
70
  raise NotImplementedError()
83
71
 
84
- async def full_download(self, content_length, content_type, upload_id):
85
- return StreamingResponse(
86
- self.iter_data(upload_id),
87
- media_type=content_type,
88
- headers={
89
- "Content-Length": str(content_length),
90
- "Content-Type": content_type,
91
- },
92
- )
93
-
94
- async def range_download(
95
- self, content_length, content_type, upload_id, range_header
96
- ):
97
- try:
98
- start, _, end = range_header.split("bytes=")[-1].partition("-")
99
- start = int(start)
100
- if len(end) == 0:
101
- # bytes=0- is valid
102
- end = content_length - 1
103
- end = int(end) + 1 # python is inclusive, http is exclusive
104
- except (IndexError, ValueError):
105
- # range errors fallback to full download
106
- raise HTTPRangeNotSatisfiable(detail=f"Range not parsable {range_header}")
107
- if start > end or start < 0:
108
- raise HTTPRangeNotSatisfiable(detail="Invalid range {start}-{end}")
109
- if end > content_length:
110
- raise HTTPRangeNotSatisfiable(
111
- detail="Invalid range {start}-{end}, too large end value"
112
- )
113
-
114
- logger.debug(f"Range request: {range_header}")
115
- headers = {
116
- "Content-Range": f"bytes {start}-{end - 1}/{content_length}",
117
- "Content-Type": content_type,
118
- }
119
-
120
- return StreamingResponse(
121
- self.read_range(upload_id, start, end),
122
- media_type=content_type,
123
- headers=headers,
124
- )
125
-
126
72
  async def iterate_body_chunks(self, request, chunk_size):
127
73
  partial = b""
128
74
  remaining = b""
@@ -146,3 +92,6 @@ class FileStorageManager:
146
92
 
147
93
  if partial or remaining:
148
94
  yield partial + remaining
95
+
96
+ def validate_intermediate_chunk(self, uploaded_bytes: int):
97
+ raise NotImplementedError()
@@ -0,0 +1,125 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ # Copyright (C) 2021 Bosutech XXI S.L.
21
+ #
22
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
23
+ # For commercial licensing, contact us at info@nuclia.com.
24
+ #
25
+ # AGPL:
26
+ # This program is free software: you can redistribute it and/or modify
27
+ # it under the terms of the GNU Affero General Public License as
28
+ # published by the Free Software Foundation, either version 3 of the
29
+ # License, or (at your option) any later version.
30
+ #
31
+ # This program is distributed in the hope that it will be useful,
32
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
33
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34
+ # GNU Affero General Public License for more details.
35
+ #
36
+ # You should have received a copy of the GNU Affero General Public License
37
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
38
+ #
39
+
40
+ from nucliadb import learning_proxy
41
+ from nucliadb.common import datamanagers
42
+ from nucliadb.ingest.orm.exceptions import VectorSetConflict
43
+ from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
44
+ from nucliadb.writer import logger
45
+ from nucliadb_protos import knowledgebox_pb2
46
+ from nucliadb_telemetry import errors
47
+ from nucliadb_utils.utilities import get_storage
48
+
49
+
50
+ async def add(kbid: str, vectorset_id: str) -> None:
51
+ # First off, add the vectorset to the learning configuration if it's not already there
52
+ lconfig = await learning_proxy.get_configuration(kbid)
53
+ assert lconfig is not None
54
+ semantic_models = lconfig.model_dump()["semantic_models"]
55
+ if vectorset_id not in semantic_models:
56
+ semantic_models.append(vectorset_id)
57
+ await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
58
+ lconfig = await learning_proxy.get_configuration(kbid)
59
+ assert lconfig is not None
60
+
61
+ # Then, add the vectorset to the index if it's not already there
62
+ async with datamanagers.with_rw_transaction() as txn:
63
+ kbobj = KnowledgeBox(txn, await get_storage(), kbid)
64
+ vectorset_config = get_vectorset_config(lconfig, vectorset_id)
65
+ try:
66
+ await kbobj.create_vectorset(vectorset_config)
67
+ await txn.commit()
68
+ except VectorSetConflict:
69
+ # Vectorset already exists, nothing to do
70
+ return
71
+
72
+
73
+ async def delete(kbid: str, vectorset_id: str) -> None:
74
+ lconfig = await learning_proxy.get_configuration(kbid)
75
+ if lconfig is not None:
76
+ semantic_models = lconfig.model_dump()["semantic_models"]
77
+ if vectorset_id in semantic_models:
78
+ semantic_models.remove(vectorset_id)
79
+ await learning_proxy.update_configuration(kbid, {"semantic_models": semantic_models})
80
+ try:
81
+ async with datamanagers.with_rw_transaction() as txn:
82
+ kbobj = KnowledgeBox(txn, await get_storage(), kbid)
83
+ await kbobj.delete_vectorset(vectorset_id=vectorset_id)
84
+ await txn.commit()
85
+ except Exception as ex:
86
+ errors.capture_exception(ex)
87
+ logger.exception(
88
+ "Could not delete vectorset from index", extra={"kbid": kbid, "vectorset_id": vectorset_id}
89
+ )
90
+
91
+
92
+ def get_vectorset_config(
93
+ learning_config: learning_proxy.LearningConfiguration, vectorset_id: str
94
+ ) -> knowledgebox_pb2.VectorSetConfig:
95
+ """
96
+ Create a VectorSetConfig from a LearningConfiguration for a given vectorset_id
97
+ """
98
+ vectorset_config = knowledgebox_pb2.VectorSetConfig(vectorset_id=vectorset_id)
99
+ vectorset_index_config = knowledgebox_pb2.VectorIndexConfig(
100
+ vector_type=knowledgebox_pb2.VectorType.DENSE_F32,
101
+ )
102
+ model_config = learning_config.semantic_model_configs[vectorset_id]
103
+
104
+ # Parse similarity function
105
+ parsed_similarity = learning_proxy.SimilarityFunction(model_config.similarity)
106
+ if parsed_similarity == learning_proxy.SimilarityFunction.COSINE.value:
107
+ vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.COSINE
108
+ elif parsed_similarity == learning_proxy.SimilarityFunction.DOT.value:
109
+ vectorset_index_config.similarity = knowledgebox_pb2.VectorSimilarity.DOT
110
+ else:
111
+ raise ValueError(
112
+ f"Unknown similarity function {model_config.similarity}, parsed as {parsed_similarity}"
113
+ )
114
+
115
+ # Parse vector dimension
116
+ vectorset_index_config.vector_dimension = model_config.size
117
+
118
+ # Parse matryoshka dimensions
119
+ if len(model_config.matryoshka_dims) > 0:
120
+ vectorset_index_config.normalize_vectors = True
121
+ vectorset_config.matryoshka_dimensions.extend(model_config.matryoshka_dims)
122
+ else:
123
+ vectorset_index_config.normalize_vectors = False
124
+ vectorset_config.vectorset_index_config.CopyFrom(vectorset_index_config)
125
+ return vectorset_config
@@ -0,0 +1,148 @@
1
+ Metadata-Version: 2.2
2
+ Name: nucliadb
3
+ Version: 6.2.1.post2777
4
+ Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
+ Author: NucliaDB Community
6
+ Author-email: nucliadb@nuclia.com
7
+ License: BSD
8
+ Project-URL: Nuclia, https://nuclia.com
9
+ Project-URL: Github, https://github.com/nuclia/nucliadb
10
+ Project-URL: Slack, https://nuclia-community.slack.com
11
+ Project-URL: API Reference, https://docs.nuclia.dev/docs/api
12
+ Keywords: search,semantic,AI
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Information Technology
16
+ Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
17
+ Classifier: Programming Language :: Python
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3 :: Only
23
+ Requires-Python: >=3.9, <4
24
+ Description-Content-Type: text/markdown
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post2777
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post2777
27
+ Requires-Dist: nucliadb-protos>=6.2.1.post2777
28
+ Requires-Dist: nucliadb-models>=6.2.1.post2777
29
+ Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
+ Requires-Dist: nucliadb-node-binding>=2.26.0
31
+ Requires-Dist: nuclia-models>=0.24.2
32
+ Requires-Dist: uvicorn
33
+ Requires-Dist: argdantic
34
+ Requires-Dist: aiohttp>=3.11.11
35
+ Requires-Dist: lru-dict>=1.1.7
36
+ Requires-Dist: backoff
37
+ Requires-Dist: aiofiles>=0.8.0
38
+ Requires-Dist: psutil>=5.9.7
39
+ Requires-Dist: types-psutil>=5.9.5.17
40
+ Requires-Dist: types-aiofiles>=0.8.3
41
+ Requires-Dist: protobuf>=4.22.3
42
+ Requires-Dist: types-protobuf<5,>=4.24
43
+ Requires-Dist: grpcio<1.63.0,>=1.44.0
44
+ Requires-Dist: grpcio-health-checking<1.63.0,>=1.44.0
45
+ Requires-Dist: grpcio-channelz<1.63.0,>=1.44.0
46
+ Requires-Dist: grpcio-status<1.63.0,>=1.44.0
47
+ Requires-Dist: grpcio-tools<1.63.0,>=1.44.0
48
+ Requires-Dist: grpcio-testing<1.63.0,>=1.44.0
49
+ Requires-Dist: grpcio-reflection<1.63.0,>=1.44.0
50
+ Requires-Dist: orjson>=3.6.7
51
+ Requires-Dist: types-setuptools
52
+ Requires-Dist: pydantic>=2.6
53
+ Requires-Dist: pydantic-settings>=2.2
54
+ Requires-Dist: aiobotocore>=2.9.0
55
+ Requires-Dist: botocore>=1.34.0
56
+ Requires-Dist: google-cloud-storage
57
+ Requires-Dist: gcloud
58
+ Requires-Dist: oauth2client
59
+ Requires-Dist: jwcrypto>=1.5.6
60
+ Requires-Dist: pyyaml>=5.1
61
+ Requires-Dist: fastapi-versioning>=0.10.0
62
+ Requires-Dist: fastapi>=0.95.2
63
+ Requires-Dist: sentry-sdk>=2.8.0
64
+ Requires-Dist: pyjwt>=2.4.0
65
+ Requires-Dist: mmh3>=3.0.0
66
+ Requires-Dist: httpx>=0.23.0
67
+ Requires-Dist: grpc-stubs>=1.44.0
68
+ Requires-Dist: aiodns>=3.0.0
69
+ Requires-Dist: types-orjson
70
+ Requires-Dist: psycopg[binary,pool]
71
+ Requires-Dist: multidict>=6.0.4
72
+ Requires-Dist: deprecated>=1.2.12
73
+ Requires-Dist: asgiref>=3.3.2
74
+ Requires-Dist: jmespath>=1.0.0
75
+ Requires-Dist: idna>=3.3
76
+ Requires-Dist: sniffio>=1.2.0
77
+ Requires-Dist: async_lru>=2.0.4
78
+ Requires-Dist: async-timeout>=4.0.3
79
+ Requires-Dist: cachetools>=5.3.2
80
+ Requires-Dist: types-cachetools>=5.3.0.5
81
+ Requires-Dist: kubernetes_asyncio<30.0.0
82
+ Provides-Extra: redis
83
+ Requires-Dist: redis>=4.3.4; extra == "redis"
84
+ Dynamic: author
85
+ Dynamic: author-email
86
+ Dynamic: classifier
87
+ Dynamic: description
88
+ Dynamic: description-content-type
89
+ Dynamic: home-page
90
+ Dynamic: keywords
91
+ Dynamic: license
92
+ Dynamic: project-url
93
+ Dynamic: provides-extra
94
+ Dynamic: requires-dist
95
+ Dynamic: requires-python
96
+
97
+ # nucliadb
98
+
99
+ This module contains most of the Python components for NucliaDB:
100
+
101
+ - ingest
102
+ - reader
103
+ - writer
104
+ - search
105
+ - train
106
+
107
+ # NucliaDB Migrations
108
+
109
+ This module is used to manage NucliaDB Migrations.
110
+
111
+ All migrations will be provided in the `migrations` folder and have a filename
112
+ that follows the structure: `[sequence]_[migration name].py`.
113
+ Where `sequence` is the order the migration should be run in with zero padding.
114
+ Example: `0001_migrate_data.py`.
115
+
116
+ Each migration should have the following:
117
+
118
+ ```python
119
+ from nucliadb.migrator.context import ExecutionContext
120
+
121
+
122
+ async def migrate(context: ExecutionContext) -> None:
123
+ """
124
+ Non-kb type of migration. Migrate global data.
125
+ """
126
+
127
+
128
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
129
+ """
130
+ Migrate kb.
131
+
132
+ Must have both types of migrations.
133
+ """
134
+ ```
135
+
136
+
137
+ ## How migrations are managed
138
+
139
+ - All migrations utilize a distributed lock to prevent simulateously running jobs
140
+ - Global migration state:
141
+ - current version
142
+ - target version
143
+ - KBs to migrate
144
+ - KB Migration State:
145
+ - current version
146
+
147
+ - Migrations are currently run with a deployment and will be continuously retried on failure.
148
+ - Running migrations in a deployment is to make sure a migration does not prevent code deployment.