nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -17,47 +17,191 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import AsyncGenerator, Optional
20
+ from typing import TYPE_CHECKING, AsyncGenerator, Optional
21
21
 
22
22
  import backoff
23
- from nucliadb_protos.resources_pb2 import Basic
24
23
 
24
+ from nucliadb.common.datamanagers.utils import get_kv_pb
25
25
  from nucliadb.common.maindb.driver import Transaction
26
26
  from nucliadb.common.maindb.exceptions import ConflictError, NotFoundError
27
27
 
28
28
  # These should be refactored
29
- from nucliadb.ingest.orm.resource import KB_RESOURCE_SLUG, KB_RESOURCE_SLUG_BASE
30
- from nucliadb.ingest.orm.resource import Resource as ResourceORM
31
- from nucliadb.ingest.orm.utils import get_basic, set_basic
32
- from nucliadb_protos import noderesources_pb2, writer_pb2
29
+ from nucliadb.ingest.settings import settings as ingest_settings
30
+ from nucliadb_protos import resources_pb2
33
31
  from nucliadb_utils.utilities import get_storage
34
32
 
35
- from .utils import with_transaction
33
+ from .utils import with_ro_transaction
36
34
 
35
+ if TYPE_CHECKING:
36
+ from nucliadb.ingest.orm.resource import Resource as ResourceORM
37
+
38
+
39
+ KB_RESOURCE_BASIC = "/kbs/{kbid}/r/{uuid}"
40
+ KB_RESOURCE_BASIC_FS = "/kbs/{kbid}/r/{uuid}/basic" # Only used on FS driver
41
+ KB_RESOURCE_ORIGIN = "/kbs/{kbid}/r/{uuid}/origin"
42
+ KB_RESOURCE_EXTRA = "/kbs/{kbid}/r/{uuid}/extra"
43
+ KB_RESOURCE_SECURITY = "/kbs/{kbid}/r/{uuid}/security"
44
+ KB_RESOURCE_RELATIONS = "/kbs/{kbid}/r/{uuid}/relations"
45
+
46
+ KB_RESOURCE_SLUG_BASE = "/kbs/{kbid}/s/"
47
+ KB_RESOURCE_SLUG = f"{KB_RESOURCE_SLUG_BASE}{{slug}}"
48
+
49
+ KB_RESOURCE_FIELDS = "/kbs/{kbid}/r/{uuid}/f/"
50
+
51
+ KB_RESOURCE_ALL_FIELDS = "/kbs/{kbid}/r/{uuid}/allfields"
37
52
  KB_MATERIALIZED_RESOURCES_COUNT = "/kbs/{kbid}/materialized/resources/count"
53
+
38
54
  KB_RESOURCE_SHARD = "/kbs/{kbid}/r/{uuid}/shard"
39
55
 
40
56
 
41
- @backoff.on_exception(
42
- backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
43
- )
44
- async def _iter_resource_slugs(*, kbid: str) -> AsyncGenerator[str, None]:
45
- async with with_transaction() as txn:
46
- async for key in txn.keys(
47
- match=KB_RESOURCE_SLUG_BASE.format(kbid=kbid), count=-1
48
- ):
49
- yield key.split("/")[-1]
57
+ async def resource_exists(txn: Transaction, *, kbid: str, rid: str) -> bool:
58
+ basic = await get_basic_raw(txn, kbid=kbid, rid=rid)
59
+ return basic is not None
50
60
 
51
61
 
52
- @backoff.on_exception(
53
- backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
54
- )
55
- async def _get_resource_ids_from_slugs(kbid: str, slugs: list[str]) -> list[str]:
56
- async with with_transaction() as txn:
57
- rids = await txn.batch_get(
58
- [KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug) for slug in slugs]
62
+ # id and slug
63
+
64
+
65
+ async def get_resource_uuid_from_slug(txn: Transaction, *, kbid: str, slug: str) -> Optional[str]:
66
+ encoded_uuid = await txn.get(KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug, for_update=False))
67
+ if not encoded_uuid:
68
+ return None
69
+ return encoded_uuid.decode()
70
+
71
+
72
+ async def slug_exists(txn: Transaction, *, kbid: str, slug: str) -> bool:
73
+ key = KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug)
74
+ encoded_slug: Optional[bytes] = await txn.get(key)
75
+ return encoded_slug not in (None, b"")
76
+
77
+
78
+ async def modify_slug(txn: Transaction, *, kbid: str, rid: str, new_slug: str) -> str:
79
+ basic = await get_basic(txn, kbid=kbid, rid=rid)
80
+ if basic is None:
81
+ raise NotFoundError()
82
+ old_slug = basic.slug
83
+
84
+ uuid_for_new_slug = await get_resource_uuid_from_slug(txn, kbid=kbid, slug=new_slug)
85
+ if uuid_for_new_slug is not None:
86
+ if uuid_for_new_slug == rid:
87
+ # Nothing to change
88
+ return old_slug
89
+ else:
90
+ raise ConflictError(f"Slug {new_slug} already exists")
91
+ key = KB_RESOURCE_SLUG.format(kbid=kbid, slug=old_slug)
92
+ await txn.delete(key)
93
+ key = KB_RESOURCE_SLUG.format(kbid=kbid, slug=new_slug)
94
+ await txn.set(key, rid.encode())
95
+ basic.slug = new_slug
96
+ await set_basic(txn, kbid=kbid, rid=rid, basic=basic)
97
+ return old_slug
98
+
99
+
100
+ # resource-shard
101
+
102
+
103
+ @backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
104
+ async def get_resource_shard_id(
105
+ txn: Transaction, *, kbid: str, rid: str, for_update: bool = False
106
+ ) -> Optional[str]:
107
+ shard = await txn.get(KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid, for_update=for_update))
108
+ if shard is not None:
109
+ return shard.decode()
110
+ else:
111
+ return None
112
+
113
+
114
+ async def set_resource_shard_id(txn: Transaction, *, kbid: str, rid: str, shard: str):
115
+ await txn.set(KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid), shard.encode())
116
+
117
+
118
+ # Basic
119
+
120
+
121
+ async def get_basic(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Basic]:
122
+ raw = await get_basic_raw(txn, kbid=kbid, rid=rid)
123
+ if raw is None:
124
+ return None
125
+ basic = resources_pb2.Basic()
126
+ basic.ParseFromString(raw)
127
+ return basic
128
+
129
+
130
+ async def get_basic_raw(txn: Transaction, *, kbid: str, rid: str) -> Optional[bytes]:
131
+ if ingest_settings.driver == "local":
132
+ raw_basic = await txn.get(KB_RESOURCE_BASIC_FS.format(kbid=kbid, uuid=rid))
133
+ else:
134
+ raw_basic = await txn.get(KB_RESOURCE_BASIC.format(kbid=kbid, uuid=rid))
135
+ return raw_basic
136
+
137
+
138
+ async def set_basic(txn: Transaction, *, kbid: str, rid: str, basic: resources_pb2.Basic):
139
+ if ingest_settings.driver == "local":
140
+ await txn.set(
141
+ KB_RESOURCE_BASIC_FS.format(kbid=kbid, uuid=rid),
142
+ basic.SerializeToString(),
59
143
  )
60
- return [rid.decode() for rid in rids if rid is not None]
144
+ else:
145
+ await txn.set(
146
+ KB_RESOURCE_BASIC.format(kbid=kbid, uuid=rid),
147
+ basic.SerializeToString(),
148
+ )
149
+
150
+
151
+ # Origin
152
+
153
+
154
+ async def get_origin(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Origin]:
155
+ key = KB_RESOURCE_ORIGIN.format(kbid=kbid, uuid=rid)
156
+ return await get_kv_pb(txn, key, resources_pb2.Origin)
157
+
158
+
159
+ async def set_origin(txn: Transaction, *, kbid: str, rid: str, origin: resources_pb2.Origin):
160
+ key = KB_RESOURCE_ORIGIN.format(kbid=kbid, uuid=rid)
161
+ await txn.set(key, origin.SerializeToString())
162
+
163
+
164
+ # Extra
165
+
166
+
167
+ async def get_extra(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Extra]:
168
+ key = KB_RESOURCE_EXTRA.format(kbid=kbid, uuid=rid)
169
+ return await get_kv_pb(txn, key, resources_pb2.Extra)
170
+
171
+
172
+ async def set_extra(txn: Transaction, *, kbid: str, rid: str, extra: resources_pb2.Extra):
173
+ key = KB_RESOURCE_EXTRA.format(kbid=kbid, uuid=rid)
174
+ await txn.set(key, extra.SerializeToString())
175
+
176
+
177
+ # Security
178
+
179
+
180
+ async def get_security(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Security]:
181
+ key = KB_RESOURCE_SECURITY.format(kbid=kbid, uuid=rid)
182
+ return await get_kv_pb(txn, key, resources_pb2.Security)
183
+
184
+
185
+ async def set_security(txn: Transaction, *, kbid: str, rid: str, security: resources_pb2.Security):
186
+ key = KB_RESOURCE_SECURITY.format(kbid=kbid, uuid=rid)
187
+ await txn.set(key, security.SerializeToString())
188
+
189
+
190
+ # Relations
191
+
192
+
193
+ async def get_relations(txn: Transaction, *, kbid: str, rid: str) -> Optional[resources_pb2.Relations]:
194
+ key = KB_RESOURCE_RELATIONS.format(kbid=kbid, uuid=rid)
195
+ return await get_kv_pb(txn, key, resources_pb2.Relations)
196
+
197
+
198
+ async def set_relations(txn: Transaction, *, kbid: str, rid: str, relations: resources_pb2.Relations):
199
+ key = KB_RESOURCE_RELATIONS.format(kbid=kbid, uuid=rid)
200
+ await txn.set(key, relations.SerializeToString())
201
+
202
+
203
+ # KB resource ids (this functions use internal transactions, breaking the
204
+ # datamanager contract. We should rethink them at some point)
61
205
 
62
206
 
63
207
  async def iterate_resource_ids(*, kbid: str) -> AsyncGenerator[str, None]:
@@ -80,52 +224,21 @@ async def iterate_resource_ids(*, kbid: str) -> AsyncGenerator[str, None]:
80
224
  yield rid
81
225
 
82
226
 
83
- @backoff.on_exception(
84
- backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
85
- )
86
- async def get_resource_shard_id(
87
- txn: Transaction, *, kbid: str, rid: str
88
- ) -> Optional[str]:
89
- shard = await txn.get(KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid))
90
- if shard is not None:
91
- return shard.decode()
92
- else:
93
- return None
94
-
95
-
96
- @backoff.on_exception(
97
- backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
98
- )
99
- async def get_resource(
100
- txn: Transaction, *, kbid: str, rid: str
101
- ) -> Optional[ResourceORM]:
102
- """
103
- Not ideal to return Resource type here but refactoring would
104
- require a lot of changes.
105
-
106
- At least this isolated that dependency here.
107
- """
108
- # prevent circulat imports -- this is not ideal that we have the ORM mix here.
109
- from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
227
+ @backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
228
+ async def _iter_resource_slugs(*, kbid: str) -> AsyncGenerator[str, None]:
229
+ async with with_ro_transaction() as txn:
230
+ async for key in txn.keys(match=KB_RESOURCE_SLUG_BASE.format(kbid=kbid)):
231
+ yield key.split("/")[-1]
110
232
 
111
- kb_orm = KnowledgeBoxORM(txn, await get_storage(), kbid)
112
- return await kb_orm.get(rid)
113
233
 
234
+ @backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
235
+ async def _get_resource_ids_from_slugs(kbid: str, slugs: list[str]) -> list[str]:
236
+ async with with_ro_transaction() as txn:
237
+ rids = await txn.batch_get([KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug) for slug in slugs])
238
+ return [rid.decode() for rid in rids if rid is not None]
114
239
 
115
- @backoff.on_exception(
116
- backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3
117
- )
118
- async def get_resource_index_message(
119
- txn: Transaction, *, kbid: str, rid: str
120
- ) -> Optional[noderesources_pb2.Resource]:
121
- # prevent circulat imports -- this is not ideal that we have the ORM mix here.
122
- from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
123
240
 
124
- kb_orm = KnowledgeBoxORM(txn, await get_storage(), kbid)
125
- res = await kb_orm.get(rid)
126
- if res is None:
127
- return None
128
- return (await res.generate_index_message()).brain
241
+ # KB resource count (materialized key)
129
242
 
130
243
 
131
244
  async def calculate_number_of_resources(txn: Transaction, *, kbid: str) -> int:
@@ -150,72 +263,56 @@ async def get_number_of_resources(txn: Transaction, *, kbid: str) -> int:
150
263
  """
151
264
  Return cached number of resources in a knowledgebox.
152
265
  """
153
- raw_value = await txn.get(KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid))
266
+ raw_value = await txn.get(KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid), for_update=False)
154
267
  if raw_value is None:
155
268
  return -1
156
269
  return int(raw_value)
157
270
 
158
271
 
159
272
  async def set_number_of_resources(txn: Transaction, kbid: str, value: int) -> None:
160
- await txn.set(
161
- KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid), str(value).encode()
162
- )
273
+ await txn.set(KB_MATERIALIZED_RESOURCES_COUNT.format(kbid=kbid), str(value).encode())
163
274
 
164
275
 
165
- async def get_broker_message(
166
- txn: Transaction, *, kbid: str, rid: str
167
- ) -> Optional[writer_pb2.BrokerMessage]:
168
- resource = await get_resource(txn, kbid=kbid, rid=rid)
169
- if resource is None:
170
- return None
276
+ # Fields (materialized key with all field ids)
171
277
 
172
- resource.disable_vectors = False
173
- resource.txn = txn
174
- bm = await resource.generate_broker_message()
175
- return bm
176
278
 
279
+ async def get_all_field_ids(
280
+ txn: Transaction, *, kbid: str, rid: str, for_update: bool = False
281
+ ) -> Optional[resources_pb2.AllFieldIDs]:
282
+ key = KB_RESOURCE_ALL_FIELDS.format(kbid=kbid, uuid=rid)
283
+ return await get_kv_pb(txn, key, resources_pb2.AllFieldIDs, for_update=for_update)
177
284
 
178
- async def get_resource_basic(
179
- txn: Transaction, *, kbid: str, rid: str
180
- ) -> Optional[Basic]:
181
- raw_basic = await get_basic(txn, kbid, rid)
182
- if not raw_basic:
183
- return None
184
- basic = Basic()
185
- basic.ParseFromString(raw_basic)
186
- return basic
187
285
 
286
+ async def set_all_field_ids(
287
+ txn: Transaction, *, kbid: str, rid: str, allfields: resources_pb2.AllFieldIDs
288
+ ):
289
+ key = KB_RESOURCE_ALL_FIELDS.format(kbid=kbid, uuid=rid)
290
+ await txn.set(key, allfields.SerializeToString())
188
291
 
189
- async def get_resource_uuid_from_slug(
190
- txn: Transaction, *, kbid: str, slug: str
191
- ) -> Optional[str]:
192
- encoded_uuid = await txn.get(KB_RESOURCE_SLUG.format(kbid=kbid, slug=slug))
193
- if not encoded_uuid:
194
- return None
195
- return encoded_uuid.decode()
196
292
 
293
+ async def has_field(txn: Transaction, *, kbid: str, rid: str, field_id: resources_pb2.FieldID) -> bool:
294
+ fields = await get_all_field_ids(txn, kbid=kbid, rid=rid)
295
+ if fields is None:
296
+ return False
297
+ for resource_field_id in fields.fields:
298
+ if field_id == resource_field_id:
299
+ return True
300
+ return False
197
301
 
198
- async def modify_slug(txn: Transaction, *, kbid: str, rid: str, new_slug: str) -> str:
199
- basic = await get_resource_basic(txn, kbid=kbid, rid=rid)
200
- if basic is None:
201
- raise NotFoundError()
202
- old_slug = basic.slug
203
302
 
204
- uuid_for_new_slug = await get_resource_uuid_from_slug(txn, kbid=kbid, slug=new_slug)
205
- if uuid_for_new_slug is not None:
206
- if uuid_for_new_slug == rid:
207
- # Nothing to change
208
- return old_slug
209
- else:
210
- raise ConflictError(f"Slug {new_slug} already exists")
211
- key = KB_RESOURCE_SLUG.format(kbid=kbid, slug=old_slug)
212
- await txn.delete(key)
213
- key = KB_RESOURCE_SLUG.format(kbid=kbid, slug=new_slug)
214
- await txn.set(key, rid.encode())
215
- basic.slug = new_slug
216
- await set_basic(txn, kbid, rid, basic)
217
- return old_slug
303
+ # ORM mix (this functions shouldn't belong here)
218
304
 
219
305
 
220
- async def set_resource_shard_id(txn: Transaction, *, kbid: str, rid: str, shard: str):
221
- await txn.set(KB_RESOURCE_SHARD.format(kbid=kbid, uuid=rid), shard.encode())
306
+ @backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=3)
307
+ async def get_resource(txn: Transaction, *, kbid: str, rid: str) -> Optional["ResourceORM"]:
308
+ """
309
+ Not ideal to return Resource type here but refactoring would
310
+ require a lot of changes.
311
+
312
+ At least this isolated that dependency here.
313
+ """
314
+ # prevent circulat imports -- this is not ideal that we have the ORM mix here.
315
+ from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
316
+
317
+ kb_orm = KnowledgeBoxORM(txn, await get_storage(), kbid)
318
+ return await kb_orm.get(rid)
@@ -21,22 +21,42 @@ import logging
21
21
  from typing import AsyncGenerator, Optional
22
22
 
23
23
  import orjson
24
+ from pydantic import BaseModel
24
25
 
25
26
  from nucliadb.common.maindb.driver import Transaction
27
+ from nucliadb_protos import knowledgebox_pb2 as kb_pb2
26
28
  from nucliadb_protos import writer_pb2
27
29
 
28
- from .utils import get_kv_pb, with_transaction
30
+ from .utils import get_kv_pb, with_ro_transaction
29
31
 
30
32
  logger = logging.getLogger(__name__)
31
33
 
34
+ KB_ROLLOVER_STATE = "/kbs/{kbid}/rollover/state"
32
35
  KB_ROLLOVER_SHARDS = "/kbs/{kbid}/rollover/shards"
36
+ KB_ROLLOVER_EXTERNAL_INDEX_METADATA = "/kbs/{kbid}/rollover/external_index_metadata"
33
37
  KB_ROLLOVER_RESOURCES_TO_INDEX = "/kbs/{kbid}/rollover/to-index/{resource}"
34
38
  KB_ROLLOVER_RESOURCES_INDEXED = "/kbs/{kbid}/rollover/indexed/{resource}"
35
39
 
36
40
 
37
- async def get_kb_rollover_shards(
38
- txn: Transaction, *, kbid: str
39
- ) -> Optional[writer_pb2.Shards]:
41
+ class RolloverState(BaseModel):
42
+ rollover_shards_created: bool = False
43
+ external_index_created: bool = False
44
+ resources_scheduled: bool = False
45
+ resources_indexed: bool = False
46
+ cutover_shards: bool = False
47
+ cutover_external_index: bool = False
48
+ resources_validated: bool = False
49
+
50
+
51
+ class RolloverStateNotFoundError(Exception):
52
+ """
53
+ Raised when the rollover state is not found.
54
+ """
55
+
56
+ ...
57
+
58
+
59
+ async def get_kb_rollover_shards(txn: Transaction, *, kbid: str) -> Optional[writer_pb2.Shards]:
40
60
  key = KB_ROLLOVER_SHARDS.format(kbid=kbid)
41
61
  return await get_kv_pb(txn, key, writer_pb2.Shards)
42
62
 
@@ -90,7 +110,7 @@ async def add_indexed(
90
110
  kbid: str,
91
111
  resource_id: str,
92
112
  shard_id: str,
93
- modification_time: int
113
+ modification_time: int,
94
114
  ) -> None:
95
115
  to_index = KB_ROLLOVER_RESOURCES_TO_INDEX.format(kbid=kbid, resource=resource_id)
96
116
  indexed = KB_ROLLOVER_RESOURCES_INDEXED.format(kbid=kbid, resource=resource_id)
@@ -106,7 +126,9 @@ async def get_indexed_data(
106
126
  val = await txn.get(key)
107
127
  if val is not None:
108
128
  data = orjson.loads(val)
109
- return tuple(data) # type: ignore
129
+ shard_id: str = data[0]
130
+ modification_time: int = data[1]
131
+ return shard_id, modification_time
110
132
  return None
111
133
 
112
134
 
@@ -122,15 +144,13 @@ async def iter_indexed_keys(*, kbid: str) -> AsyncGenerator[str, None]:
122
144
  internally managed
123
145
  """
124
146
  start_key = KB_ROLLOVER_RESOURCES_INDEXED.format(kbid=kbid, resource="")
125
- async with with_transaction() as txn:
126
- async for key in txn.keys(match=start_key, count=-1):
147
+ async with with_ro_transaction() as txn:
148
+ async for key in txn.keys(match=start_key):
127
149
  yield key.split("/")[-1]
128
150
 
129
151
 
130
- async def _get_batch_indexed_data(
131
- *, kbid, batch: list[str]
132
- ) -> list[tuple[str, tuple[str, int]]]:
133
- async with with_transaction() as txn:
152
+ async def _get_batch_indexed_data(*, kbid, batch: list[str]) -> list[tuple[str, tuple[str, int]]]:
153
+ async with with_ro_transaction() as txn:
134
154
  values = await txn.batch_get(
135
155
  [
136
156
  KB_ROLLOVER_RESOURCES_INDEXED.format(kbid=kbid, resource=resource_id)
@@ -140,14 +160,15 @@ async def _get_batch_indexed_data(
140
160
  results: list[tuple[str, tuple[str, int]]] = []
141
161
  for key, val in zip(batch, values):
142
162
  if val is not None:
143
- data: tuple[str, int] = tuple(orjson.loads(val)) # type: ignore
163
+ shard_id: str
164
+ modification_time: int
165
+ shard_id, modification_time = orjson.loads(val)
166
+ data = (shard_id, modification_time)
144
167
  results.append((key.split("/")[-1], data))
145
168
  return results
146
169
 
147
170
 
148
- async def iterate_indexed_data(
149
- *, kbid: str
150
- ) -> AsyncGenerator[tuple[str, tuple[str, int]], None]:
171
+ async def iterate_indexed_data(*, kbid: str) -> AsyncGenerator[tuple[str, tuple[str, int]], None]:
151
172
  """
152
173
  This function is optimized for reducing the time a transaction is open.
153
174
 
@@ -164,3 +185,43 @@ async def iterate_indexed_data(
164
185
  if len(batch) > 0:
165
186
  for key, val in await _get_batch_indexed_data(kbid=kbid, batch=batch):
166
187
  yield key, val
188
+
189
+
190
+ async def get_rollover_state(txn: Transaction, kbid: str) -> RolloverState:
191
+ key = KB_ROLLOVER_STATE.format(kbid=kbid)
192
+ val = await txn.get(key)
193
+ if not val:
194
+ raise RolloverStateNotFoundError(kbid)
195
+ return RolloverState.model_validate_json(val)
196
+
197
+
198
+ async def set_rollover_state(txn: Transaction, kbid: str, state: RolloverState) -> None:
199
+ key = KB_ROLLOVER_STATE.format(kbid=kbid)
200
+ await txn.set(key, state.model_dump_json().encode())
201
+
202
+
203
+ async def clear_rollover_state(txn: Transaction, kbid: str) -> None:
204
+ key = KB_ROLLOVER_STATE.format(kbid=kbid)
205
+ await txn.delete(key)
206
+
207
+
208
+ async def update_kb_rollover_external_index_metadata(
209
+ txn: Transaction, *, kbid: str, metadata: kb_pb2.StoredExternalIndexProviderMetadata
210
+ ) -> None:
211
+ key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
212
+ await txn.set(key, metadata.SerializeToString())
213
+
214
+
215
+ async def get_kb_rollover_external_index_metadata(
216
+ txn: Transaction, *, kbid: str
217
+ ) -> Optional[kb_pb2.StoredExternalIndexProviderMetadata]:
218
+ key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
219
+ val = await txn.get(key)
220
+ if not val:
221
+ return None
222
+ return kb_pb2.StoredExternalIndexProviderMetadata.FromString(val)
223
+
224
+
225
+ async def delete_kb_rollover_external_index_metadata(txn: Transaction, *, kbid: str) -> None:
226
+ key = KB_ROLLOVER_EXTERNAL_INDEX_METADATA.format(kbid=kbid)
227
+ await txn.delete(key)
@@ -17,38 +17,26 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from typing import Optional
21
20
 
22
- from nucliadb_protos.knowledgebox_pb2 import Synonyms as PBSynonyms
21
+ from typing import Optional
23
22
 
23
+ from nucliadb.common.datamanagers.utils import get_kv_pb
24
24
  from nucliadb.common.maindb.driver import Transaction
25
+ from nucliadb_protos import knowledgebox_pb2
25
26
 
26
27
  KB_SYNONYMS = "/kbs/{kbid}/synonyms"
27
28
 
28
29
 
29
- class Synonyms:
30
- def __init__(self, txn: Transaction, kbid: str):
31
- self.txn = txn
32
- self.kbid = kbid
33
-
34
- @property
35
- def key(self) -> str:
36
- return KB_SYNONYMS.format(kbid=self.kbid)
37
-
38
- async def set(self, synonyms: PBSynonyms):
39
- body = synonyms.SerializeToString()
40
- await self.txn.set(self.key, body)
41
-
42
- async def get(self) -> Optional[PBSynonyms]:
43
- try:
44
- payload = await self.txn.get(self.key)
45
- except KeyError:
46
- return None
47
- if payload is None:
48
- return None
49
- body = PBSynonyms()
50
- body.ParseFromString(payload)
51
- return body
52
-
53
- async def clear(self):
54
- await self.txn.delete(self.key)
30
+ async def get(txn: Transaction, *, kbid: str) -> Optional[knowledgebox_pb2.Synonyms]:
31
+ key = KB_SYNONYMS.format(kbid=kbid)
32
+ return await get_kv_pb(txn, key, knowledgebox_pb2.Synonyms, for_update=False)
33
+
34
+
35
+ async def set(txn: Transaction, *, kbid: str, synonyms: knowledgebox_pb2.Synonyms):
36
+ key = KB_SYNONYMS.format(kbid=kbid)
37
+ await txn.set(key, synonyms.SerializeToString())
38
+
39
+
40
+ async def delete(txn: Transaction, *, kbid: str):
41
+ key = KB_SYNONYMS.format(kbid=kbid)
42
+ await txn.delete(key)
@@ -29,21 +29,29 @@ PB_TYPE = TypeVar("PB_TYPE", bound=Message)
29
29
 
30
30
 
31
31
  async def get_kv_pb(
32
- txn: Transaction, key: str, pb_type: Type[PB_TYPE]
32
+ txn: Transaction, key: str, pb_type: Type[PB_TYPE], for_update: bool = True
33
33
  ) -> Optional[PB_TYPE]:
34
- kb_shards_bytes: Optional[bytes] = await txn.get(key)
35
- if kb_shards_bytes:
36
- kb_shards = pb_type()
37
- kb_shards.ParseFromString(kb_shards_bytes)
38
- return kb_shards
39
- else:
34
+ serialized: Optional[bytes] = await txn.get(key, for_update=for_update)
35
+ if serialized is None:
40
36
  return None
37
+ pb = pb_type()
38
+ pb.ParseFromString(serialized)
39
+ return pb
41
40
 
42
41
 
43
42
  @contextlib.asynccontextmanager
44
- async def with_transaction(read_only: bool = False, wait_for_abort: bool = True):
43
+ async def with_rw_transaction():
45
44
  driver = get_driver()
46
- async with driver.transaction(
47
- read_only=read_only, wait_for_abort=wait_for_abort
48
- ) as txn:
45
+ async with driver.transaction(read_only=False) as txn:
49
46
  yield txn
47
+
48
+
49
+ # For backwards compatibility
50
+ with_transaction = with_rw_transaction
51
+
52
+
53
+ @contextlib.asynccontextmanager
54
+ async def with_ro_transaction():
55
+ driver = get_driver()
56
+ async with driver.transaction(read_only=True) as ro_txn:
57
+ yield ro_txn