nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -17,7 +17,7 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import AsyncGenerator, Optional
20
+ from typing import TYPE_CHECKING, AsyncGenerator, Optional
21
21
 
22
22
  import backoff
23
23
 
@@ -26,13 +26,15 @@ from nucliadb.common.maindb.driver import Transaction
26
26
  from nucliadb.common.maindb.exceptions import ConflictError, NotFoundError
27
27
 
28
28
  # These should be refactored
29
- from nucliadb.ingest.orm.resource import KB_RESOURCE_SLUG, KB_RESOURCE_SLUG_BASE
30
- from nucliadb.ingest.orm.resource import Resource as ResourceORM
31
29
  from nucliadb.ingest.settings import settings as ingest_settings
32
- from nucliadb_protos import noderesources_pb2, resources_pb2, writer_pb2
30
+ from nucliadb_protos import resources_pb2
33
31
  from nucliadb_utils.utilities import get_storage
34
32
 
35
- from .utils import with_transaction
33
+ from .utils import with_ro_transaction
34
+
35
+ if TYPE_CHECKING:
36
+ from nucliadb.ingest.orm.resource import Resource as ResourceORM
37
+
36
38
 
37
39
  KB_RESOURCE_BASIC = "/kbs/{kbid}/r/{uuid}"
38
40
  KB_RESOURCE_BASIC_FS = "/kbs/{kbid}/r/{uuid}/basic" # Only used on FS driver
@@ -41,11 +43,16 @@ KB_RESOURCE_EXTRA = "/kbs/{kbid}/r/{uuid}/extra"
41
43
  KB_RESOURCE_SECURITY = "/kbs/{kbid}/r/{uuid}/security"
42
44
  KB_RESOURCE_RELATIONS = "/kbs/{kbid}/r/{uuid}/relations"
43
45
 
44
- KB_RESOURCE_SHARD = "/kbs/{kbid}/r/{uuid}/shard"
46
+ KB_RESOURCE_SLUG_BASE = "/kbs/{kbid}/s/"
47
+ KB_RESOURCE_SLUG = f"{KB_RESOURCE_SLUG_BASE}{{slug}}"
48
+
49
+ KB_RESOURCE_FIELDS = "/kbs/{kbid}/r/{uuid}/f/"
45
50
 
46
51
  KB_RESOURCE_ALL_FIELDS = "/kbs/{kbid}/r/{uuid}/allfields"
47
52
  KB_MATERIALIZED_RESOURCES_COUNT = "/kbs/{kbid}/materialized/resources/count"
48
53
 
54
+ KB_RESOURCE_SHARD = "/kbs/{kbid}/r/{uuid}/shard"
55
+
49
56
 
50
57
  async def resource_exists(txn: Transaction, *, kbid: str, rid: str) -> bool:
51
58
  basic = await get_basic_raw(txn, kbid=kbid, rid=rid)
@@ -55,10 +62,8 @@ async def resource_exists(txn: Transaction, *, kbid: str, rid: str) -> bool:
55
62
  # id and slug
56
63
 
57
64
 
58
- async def get_resource_uuid_from_slug(
59
- txn: Transaction, *, kbid: str, slug: str
60
- ) -> Optional[str]:
61
- encoded_uuid = await txn.get(KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug))
65
+ async def get_resource_uuid_from_slug(txn: Transaction, *, kbid: str, slug: str) -> Optional[str]:
66
+ encoded_uuid = await txn.get(KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug, for_update=False))
62
67
  if not encoded_uuid:
63
68
  return None
64
69
  return encoded_uuid.decode()
@@ -95,13 +100,11 @@ async def modify_slug(txn: Transaction, *, kbid: str, rid: str, new_slug: str) -
95
100
  # resource-shard
96
101
 
97
102
 
98
- @backoff.on_exception(
99
- backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
100
- )
103
+ @backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
101
104
  async def get_resource_shard_id(
102
- txn: Transaction, *, kbid: str, rid: str
105
+ txn: Transaction, *, kbid: str, rid: str, for_update: bool = False
103
106
  ) -> Optional[str]:
104
- shard = await txn.get(KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid))
107
+ shard = await txn.get(KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid, for_update=for_update))
105
108
  if shard is not None:
106
109
  return shard.decode()
107
110
  else:
@@ -115,9 +118,7 @@ async def set_resource_shard_id(txn: Transaction, *, kbid: str, rid: str, shard:
115
118
  # Basic
116
119
 
117
120
 
118
- async def get_basic(
119
- txn: Transaction, *, kbid: str, rid: str
120
- ) -> Optional[resources_pb2.Basic]:
121
+ async def get_basic(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Basic]:
121
122
  raw = await get_basic_raw(txn, kbid=kbid, rid=rid)
122
123
  if raw is None:
123
124
  return None
@@ -134,9 +135,7 @@ async def get_basic_raw(txn: Transaction, *, kbid: str, rid: str) -> Optional[by
134
135
  return raw_basic
135
136
 
136
137
 
137
- async def set_basic(
138
- txn: Transaction, *, kbid: str, rid: str, basic: resources_pb2.Basic
139
- ):
138
+ async def set_basic(txn: Transaction, *, kbid: str, rid: str, basic: resources_pb2.Basic):
140
139
  if ingest_settings.driver == "local":
141
140
  await txn.set(
142
141
  KB_RESOURCE_BASIC_FS.format(kbid=kbid, uuid=rid),
@@ -152,16 +151,12 @@ async def set_basic(
152
151
  # Origin
153
152
 
154
153
 
155
- async def get_origin(
156
- txn: Transaction, *, kbid: str, rid: str
157
- ) -> Optional[resources_pb2.Origin]:
154
+ async def get_origin(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Origin]:
158
155
  key = KB_RESOURCE_ORIGIN.format(kbid=kbid, uuid=rid)
159
156
  return await get_kv_pb(txn, key, resources_pb2.Origin)
160
157
 
161
158
 
162
- async def set_origin(
163
- txn: Transaction, *, kbid: str, rid: str, origin: resources_pb2.Origin
164
- ):
159
+ async def set_origin(txn: Transaction, *, kbid: str, rid: str, origin: resources_pb2.Origin):
165
160
  key = KB_RESOURCE_ORIGIN.format(kbid=kbid, uuid=rid)
166
161
  await txn.set(key, origin.SerializeToString())
167
162
 
@@ -169,16 +164,12 @@ async def set_origin(
169
164
  # Extra
170
165
 
171
166
 
172
- async def get_extra(
173
- txn: Transaction, *, kbid: str, rid: str
174
- ) -> Optional[resources_pb2.Extra]:
167
+ async def get_extra(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Extra]:
175
168
  key = KB_RESOURCE_EXTRA.format(kbid=kbid, uuid=rid)
176
169
  return await get_kv_pb(txn, key, resources_pb2.Extra)
177
170
 
178
171
 
179
- async def set_extra(
180
- txn: Transaction, *, kbid: str, rid: str, extra: resources_pb2.Extra
181
- ):
172
+ async def set_extra(txn: Transaction, *, kbid: str, rid: str, extra: resources_pb2.Extra):
182
173
  key = KB_RESOURCE_EXTRA.format(kbid=kbid, uuid=rid)
183
174
  await txn.set(key, extra.SerializeToString())
184
175
 
@@ -186,16 +177,12 @@ async def set_extra(
186
177
  # Security
187
178
 
188
179
 
189
- async def get_security(
190
- txn: Transaction, *, kbid: str, rid: str
191
- ) -> Optional[resources_pb2.Security]:
180
+ async def get_security(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Security]:
192
181
  key = KB_RESOURCE_SECURITY.format(kbid=kbid, uuid=rid)
193
182
  return await get_kv_pb(txn, key, resources_pb2.Security)
194
183
 
195
184
 
196
- async def set_security(
197
- txn: Transaction, *, kbid: str, rid: str, security: resources_pb2.Security
198
- ):
185
+ async def set_security(txn: Transaction, *, kbid: str, rid: str, security: resources_pb2.Security):
199
186
  key = KB_RESOURCE_SECURITY.format(kbid=kbid, uuid=rid)
200
187
  await txn.set(key, security.SerializeToString())
201
188
 
@@ -203,16 +190,12 @@ async def set_security(
203
190
  # Relations
204
191
 
205
192
 
206
- async def get_relations(
207
- txn: Transaction, *, kbid: str, rid: str
208
- ) -> Optional[resources_pb2.Relations]:
193
+ async def get_relations(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Relations]:
209
194
  key = KB_RESOURCE_RELATIONS.format(kbid=kbid, uuid=rid)
210
195
  return await get_kv_pb(txn, key, resources_pb2.Relations)
211
196
 
212
197
 
213
- async def set_relations(
214
- txn: Transaction, *, kbid: str, rid: str, relations: resources_pb2.Relations
215
- ):
198
+ async def set_relations(txn: Transaction, *, kbid: str, rid: str, relations: resources_pb2.Relations):
216
199
  key = KB_RESOURCE_RELATIONS.format(kbid=kbid, uuid=rid)
217
200
  await txn.set(key, relations.SerializeToString())
218
201
 
@@ -241,25 +224,17 @@ async def iterate_resource_ids(*, kbid: str) -> AsyncGenerator[str, None]:
241
224
  yield rid
242
225
 
243
226
 
244
- @backoff.on_exception(
245
- backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
246
- )
227
+ @backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
247
228
  async def _iter_resource_slugs(*, kbid: str) -> AsyncGenerator[str, None]:
248
- async with with_transaction() as txn:
249
- async for key in txn.keys(
250
- match=KB_RESOURCE_SLUG_BASE.format(kbid=kbid), count=-1
251
- ):
229
+ async with with_ro_transaction() as txn:
230
+ async for key in txn.keys(match=KB_RESOURCE_SLUG_BASE.format(kbid=kbid)):
252
231
  yield key.split("/")[-1]
253
232
 
254
233
 
255
- @backoff.on_exception(
256
- backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
257
- )
234
+ @backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
258
235
  async def _get_resource_ids_from_slugs(kbid: str, slugs: list[str]) -> list[str]:
259
- async with with_transaction() as txn:
260
- rids = await txn.batch_get(
261
- [KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug) for slug in slugs]
262
- )
236
+ async with with_ro_transaction() as txn:
237
+ rids = await txn.batch_get([KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug) for slug in slugs])
263
238
  return [rid.decode() for rid in rids if rid is not None]
264
239
 
265
240
 
@@ -288,26 +263,24 @@ async def get_number_of_resources(txn: Transaction, *, kbid: str) -> int:
288
263
  """
289
264
  Return cached number of resources in a knowledgebox.
290
265
  """
291
- raw_value = await txn.get(KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid))
266
+ raw_value = await txn.get(KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid), for_update=False)
292
267
  if raw_value is None:
293
268
  return -1
294
269
  return int(raw_value)
295
270
 
296
271
 
297
272
  async def set_number_of_resources(txn: Transaction, kbid: str, value: int) -> None:
298
- await txn.set(
299
- KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid), str(value).encode()
300
- )
273
+ await txn.set(KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid), str(value).encode())
301
274
 
302
275
 
303
276
  # Fields (materialized key with all field ids)
304
277
 
305
278
 
306
279
  async def get_all_field_ids(
307
- txn: Transaction, *, kbid: str, rid: str
280
+ txn: Transaction, *, kbid: str, rid: str, for_update: bool = False
308
281
  ) -> Optional[resources_pb2.AllFieldIDs]:
309
282
  key = KB_RESOURCE_ALL_FIELDS.format(kbid=kbid, uuid=rid)
310
- return await get_kv_pb(txn, key, resources_pb2.AllFieldIDs)
283
+ return await get_kv_pb(txn, key, resources_pb2.AllFieldIDs, for_update=for_update)
311
284
 
312
285
 
313
286
  async def set_all_field_ids(
@@ -317,9 +290,7 @@ async def set_all_field_ids(
317
290
  await txn.set(key, allfields.SerializeToString())
318
291
 
319
292
 
320
- async def has_field(
321
- txn: Transaction, *, kbid: str, rid: str, field_id: resources_pb2.FieldID
322
- ) -> bool:
293
+ async def has_field(txn: Transaction, *, kbid: str, rid: str, field_id: resources_pb2.FieldID) -> bool:
323
294
  fields = await get_all_field_ids(txn, kbid=kbid, rid=rid)
324
295
  if fields is None:
325
296
  return False
@@ -332,25 +303,8 @@ async def has_field(
332
303
  # ORM mix (this functions shouldn't belong here)
333
304
 
334
305
 
335
- async def get_broker_message(
336
- txn: Transaction, *, kbid: str, rid: str
337
- ) -> Optional[writer_pb2.BrokerMessage]:
338
- resource = await get_resource(txn, kbid=kbid, rid=rid)
339
- if resource is None:
340
- return None
341
-
342
- resource.disable_vectors = False
343
- resource.txn = txn
344
- bm = await resource.generate_broker_message()
345
- return bm
346
-
347
-
348
- @backoff.on_exception(
349
- backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
350
- )
351
- async def get_resource(
352
- txn: Transaction, *, kbid: str, rid: str
353
- ) -> Optional[ResourceORM]:
306
+ @backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
307
+ async def get_resource(txn: Transaction, *, kbid: str, rid: str) -> Optional["ResourceORM"]:
354
308
  """
355
309
  Not ideal to return Resource type here but refactoring would
356
310
  require a lot of changes.
@@ -362,19 +316,3 @@ async def get_resource(
362
316
 
363
317
  kb_orm = KnowledgeBoxORM(txn, await get_storage(), kbid)
364
318
  return await kb_orm.get(rid)
365
-
366
-
367
- @backoff.on_exception(
368
- backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
369
- )
370
- async def get_resource_index_message(
371
- txn: Transaction, *, kbid: str, rid: str
372
- ) -> Optional[noderesources_pb2.Resource]:
373
- # prevent circulat imports -- this is not ideal that we have the ORM mix here.
374
- from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
375
-
376
- kb_orm = KnowledgeBoxORM(txn, await get_storage(), kbid)
377
- res = await kb_orm.get(rid)
378
- if res is None:
379
- return None
380
- return (await res.generate_index_message()).brain
@@ -21,22 +21,42 @@ import logging
21
21
  from typing import AsyncGenerator, Optional
22
22
 
23
23
  import orjson
24
+ from pydantic import BaseModel
24
25
 
25
26
  from nucliadb.common.maindb.driver import Transaction
27
+ from nucliadb_protos import knowledgebox_pb2 as kb_pb2
26
28
  from nucliadb_protos import writer_pb2
27
29
 
28
- from .utils import get_kv_pb, with_transaction
30
+ from .utils import get_kv_pb, with_ro_transaction
29
31
 
30
32
  logger = logging.getLogger(__name__)
31
33
 
34
+ KB_ROLLOVER_STATE = "/kbs/{kbid}/rollover/state"
32
35
  KB_ROLLOVER_SHARDS = "/kbs/{kbid}/rollover/shards"
36
+ KB_ROLLOVER_EXTERNAL_INDEX_METADATA = "/kbs/{kbid}/rollover/external_index_metadata"
33
37
  KB_ROLLOVER_RESOURCES_TO_INDEX = "/kbs/{kbid}/rollover/to-index/{resource}"
34
38
  KB_ROLLOVER_RESOURCES_INDEXED = "/kbs/{kbid}/rollover/indexed/{resource}"
35
39
 
36
40
 
37
- async def get_kb_rollover_shards(
38
- txn: Transaction, *, kbid: str
39
- ) -> Optional[writer_pb2.Shards]:
41
+ class RolloverState(BaseModel):
42
+ rollover_shards_created: bool = False
43
+ external_index_created: bool = False
44
+ resources_scheduled: bool = False
45
+ resources_indexed: bool = False
46
+ cutover_shards: bool = False
47
+ cutover_external_index: bool = False
48
+ resources_validated: bool = False
49
+
50
+
51
+ class RolloverStateNotFoundError(Exception):
52
+ """
53
+ Raised when the rollover state is not found.
54
+ """
55
+
56
+ ...
57
+
58
+
59
+ async def get_kb_rollover_shards(txn: Transaction, *, kbid: str) -> Optional[writer_pb2.Shards]:
40
60
  key = KB_ROLLOVER_SHARDS.format(kbid=kbid)
41
61
  return await get_kv_pb(txn, key, writer_pb2.Shards)
42
62
 
@@ -106,7 +126,9 @@ async def get_indexed_data(
106
126
  val = await txn.get(key)
107
127
  if val is not None:
108
128
  data = orjson.loads(val)
109
- return tuple(data) # type: ignore
129
+ shard_id: str = data[0]
130
+ modification_time: int = data[1]
131
+ return shard_id, modification_time
110
132
  return None
111
133
 
112
134
 
@@ -122,15 +144,13 @@ async def iter_indexed_keys(*, kbid: str) -> AsyncGenerator[str, None]:
122
144
  internally managed
123
145
  """
124
146
  start_key = KB_ROLLOVER_RESOURCES_INDEXED.format(kbid=kbid, resource="")
125
- async with with_transaction() as txn:
126
- async for key in txn.keys(match=start_key, count=-1):
147
+ async with with_ro_transaction() as txn:
148
+ async for key in txn.keys(match=start_key):
127
149
  yield key.split("/")[-1]
128
150
 
129
151
 
130
- async def _get_batch_indexed_data(
131
- *, kbid, batch: list[str]
132
- ) -> list[tuple[str, tuple[str, int]]]:
133
- async with with_transaction() as txn:
152
+ async def _get_batch_indexed_data(*, kbid, batch: list[str]) -> list[tuple[str, tuple[str, int]]]:
153
+ async with with_ro_transaction() as txn:
134
154
  values = await txn.batch_get(
135
155
  [
136
156
  KB_ROLLOVER_RESOURCES_INDEXED.format(kbid=kbid, resource=resource_id)
@@ -140,14 +160,15 @@ async def _get_batch_indexed_data(
140
160
  results: list[tuple[str, tuple[str, int]]] = []
141
161
  for key, val in zip(batch, values):
142
162
  if val is not None:
143
- data: tuple[str, int] = tuple(orjson.loads(val)) # type: ignore
163
+ shard_id: str
164
+ modification_time: int
165
+ shard_id, modification_time = orjson.loads(val)
166
+ data = (shard_id, modification_time)
144
167
  results.append((key.split("/")[-1], data))
145
168
  return results
146
169
 
147
170
 
148
- async def iterate_indexed_data(
149
- *, kbid: str
150
- ) -> AsyncGenerator[tuple[str, tuple[str, int]], None]:
171
+ async def iterate_indexed_data(*, kbid: str) -> AsyncGenerator[tuple[str, tuple[str, int]], None]:
151
172
  """
152
173
  This function is optimized for reducing the time a transaction is open.
153
174
 
@@ -164,3 +185,43 @@ async def iterate_indexed_data(
164
185
  if len(batch) > 0:
165
186
  for key, val in await _get_batch_indexed_data(kbid=kbid, batch=batch):
166
187
  yield key, val
188
+
189
+
190
+ async def get_rollover_state(txn: Transaction, kbid: str) -> RolloverState:
191
+ key = KB_ROLLOVER_STATE.format(kbid=kbid)
192
+ val = await txn.get(key)
193
+ if not val:
194
+ raise RolloverStateNotFoundError(kbid)
195
+ return RolloverState.model_validate_json(val)
196
+
197
+
198
+ async def set_rollover_state(txn: Transaction, kbid: str, state: RolloverState) -> None:
199
+ key = KB_ROLLOVER_STATE.format(kbid=kbid)
200
+ await txn.set(key, state.model_dump_json().encode())
201
+
202
+
203
+ async def clear_rollover_state(txn: Transaction, kbid: str) -> None:
204
+ key = KB_ROLLOVER_STATE.format(kbid=kbid)
205
+ await txn.delete(key)
206
+
207
+
208
+ async def update_kb_rollover_external_index_metadata(
209
+ txn: Transaction, *, kbid: str, metadata: kb_pb2.StoredExternalIndexProviderMetadata
210
+ ) -> None:
211
+ key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
212
+ await txn.set(key, metadata.SerializeToString())
213
+
214
+
215
+ async def get_kb_rollover_external_index_metadata(
216
+ txn: Transaction, *, kbid: str
217
+ ) -> Optional[kb_pb2.StoredExternalIndexProviderMetadata]:
218
+ key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
219
+ val = await txn.get(key)
220
+ if not val:
221
+ return None
222
+ return kb_pb2.StoredExternalIndexProviderMetadata.FromString(val)
223
+
224
+
225
+ async def delete_kb_rollover_external_index_metadata(txn: Transaction, *, kbid: str) -> None:
226
+ key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
227
+ await txn.delete(key)
@@ -29,7 +29,7 @@ KB_SYNONYMS = "/kbs/{kbid}/synonyms"
29
29
 
30
30
  async def get(txn: Transaction, *, kbid: str) -> Optional[knowledgebox_pb2.Synonyms]:
31
31
  key = KB_SYNONYMS.format(kbid=kbid)
32
- return await get_kv_pb(txn, key, knowledgebox_pb2.Synonyms)
32
+ return await get_kv_pb(txn, key, knowledgebox_pb2.Synonyms, for_update=False)
33
33
 
34
34
 
35
35
  async def set(txn: Transaction, *, kbid: str, synonyms: knowledgebox_pb2.Synonyms):
@@ -29,9 +29,9 @@ PB_TYPE = TypeVar("PB_TYPE", bound=Message)
29
29
 
30
30
 
31
31
  async def get_kv_pb(
32
- txn: Transaction, key: str, pb_type: Type[PB_TYPE]
32
+ txn: Transaction, key: str, pb_type: Type[PB_TYPE], for_update: bool = True
33
33
  ) -> Optional[PB_TYPE]:
34
- serialized: Optional[bytes] = await txn.get(key)
34
+ serialized: Optional[bytes] = await txn.get(key, for_update=for_update)
35
35
  if serialized is None:
36
36
  return None
37
37
  pb = pb_type()
@@ -40,9 +40,18 @@ async def get_kv_pb(
40
40
 
41
41
 
42
42
  @contextlib.asynccontextmanager
43
- async def with_transaction(read_only: bool = False, wait_for_abort: bool = True):
43
+ async def with_rw_transaction():
44
44
  driver = get_driver()
45
- async with driver.transaction(
46
- read_only=read_only, wait_for_abort=wait_for_abort
47
- ) as txn:
45
+ async with driver.transaction(read_only=False) as txn:
48
46
  yield txn
47
+
48
+
49
+ # For backwards compatibility
50
+ with_transaction = with_rw_transaction
51
+
52
+
53
+ @contextlib.asynccontextmanager
54
+ async def with_ro_transaction():
55
+ driver = get_driver()
56
+ async with driver.transaction(read_only=True) as ro_txn:
57
+ yield ro_txn
@@ -0,0 +1,110 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ from typing import AsyncIterator, Optional
21
+
22
+ from nucliadb.common.datamanagers.utils import get_kv_pb
23
+ from nucliadb.common.maindb.driver import Transaction
24
+ from nucliadb_protos import knowledgebox_pb2
25
+
26
+ KB_VECTORSETS = "/kbs/{kbid}/vectorsets"
27
+
28
+
29
+ class BrokenInvariant(Exception):
30
+ pass
31
+
32
+
33
+ async def initialize(txn: Transaction, *, kbid: str):
34
+ key = KB_VECTORSETS.format(kbid=kbid)
35
+ await txn.set(key, knowledgebox_pb2.KnowledgeBoxVectorSetsConfig().SerializeToString())
36
+
37
+
38
+ async def get(
39
+ txn: Transaction, *, kbid: str, vectorset_id: str
40
+ ) -> Optional[knowledgebox_pb2.VectorSetConfig]:
41
+ kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=False)
42
+ index = _find_vectorset(kb_vectorsets, vectorset_id)
43
+ if index is None:
44
+ return None
45
+ return kb_vectorsets.vectorsets[index]
46
+
47
+
48
+ async def exists(txn, *, kbid: str, vectorset_id: str) -> bool:
49
+ kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=False)
50
+ return _find_vectorset(kb_vectorsets, vectorset_id) is not None
51
+
52
+
53
+ async def iter(
54
+ txn: Transaction, *, kbid: str
55
+ ) -> AsyncIterator[tuple[str, knowledgebox_pb2.VectorSetConfig]]:
56
+ kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=False)
57
+ for config in kb_vectorsets.vectorsets:
58
+ yield config.vectorset_id, config
59
+
60
+
61
+ async def set(txn: Transaction, *, kbid: str, config: knowledgebox_pb2.VectorSetConfig):
62
+ """Create or update a vectorset configuration"""
63
+ kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=True)
64
+ index = _find_vectorset(kb_vectorsets, config.vectorset_id)
65
+ if index is None:
66
+ # adding a new vectorset
67
+ kb_vectorsets.vectorsets.append(config)
68
+ else:
69
+ # updating a vectorset
70
+ kb_vectorsets.vectorsets[index].CopyFrom(config)
71
+
72
+ key = KB_VECTORSETS.format(kbid=kbid)
73
+ await txn.set(key, kb_vectorsets.SerializeToString())
74
+
75
+
76
+ async def delete(txn: Transaction, *, kbid: str, vectorset_id: str):
77
+ kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=True)
78
+ index = _find_vectorset(kb_vectorsets, vectorset_id)
79
+ if index is None:
80
+ # already deleted
81
+ return
82
+
83
+ del kb_vectorsets.vectorsets[index]
84
+ key = KB_VECTORSETS.format(kbid=kbid)
85
+ await txn.set(key, kb_vectorsets.SerializeToString())
86
+
87
+
88
+ # XXX At some point in the vectorset epic, we should make this key mandatory and
89
+ # fail instead of providing a default
90
+ async def _get_or_default(
91
+ txn: Transaction,
92
+ *,
93
+ kbid: str,
94
+ for_update: bool = True,
95
+ ) -> knowledgebox_pb2.KnowledgeBoxVectorSetsConfig:
96
+ key = KB_VECTORSETS.format(kbid=kbid)
97
+ stored = await get_kv_pb(
98
+ txn, key, knowledgebox_pb2.KnowledgeBoxVectorSetsConfig, for_update=for_update
99
+ )
100
+ return stored or knowledgebox_pb2.KnowledgeBoxVectorSetsConfig()
101
+
102
+
103
+ def _find_vectorset(
104
+ kb_vectorsets: knowledgebox_pb2.KnowledgeBoxVectorSetsConfig, vectorset_id: str
105
+ ) -> Optional[int]:
106
+ """Return the position of the vectorset in `vectorsets` or `None` if not found."""
107
+ for idx, vectorset in enumerate(kb_vectorsets.vectorsets):
108
+ if vectorset.vectorset_id == vectorset_id:
109
+ return idx
110
+ return None