nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -0,0 +1,102 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """
22
+ Atomic datamanagers
23
+
24
+ This module aims to provide a simple way to call a datamanager function in a
25
+ single transaction, avoiding the need of encapsulating like in this example:
26
+
27
+ ```
28
+ async def <function>(...):
29
+ async with datamanagers.with_transaction() as txn:
30
+ await datamanagers.<module>.<function>(...)
31
+ ```
32
+
33
+ Or simply a more handy way to call an datamanager operation without caring about
34
+ it's transaction
35
+
36
+ """
37
+
38
+ import sys
39
+ from functools import wraps
40
+
41
+ from . import kb as kb_dm
42
+ from . import labels as labels_dm
43
+ from . import resources as resources_dm
44
+ from . import synonyms as synonyms_dm
45
+ from .utils import with_ro_transaction, with_transaction
46
+
47
+ # XXX: we are using the not exported _ParamSpec to support 3.9. Whenever we
48
+ # upgrade to >= 3.10 we'll be able to use ParamSpecKwargs and improve the
49
+ # typing. We are abusing of ParamSpec anywat to better support text editors, so
50
+ # we also need to ignore some mypy complains
51
+
52
+ __python_version = (sys.version_info.major, sys.version_info.minor)
53
+ if __python_version == (3, 9):
54
+ from typing_extensions import ParamSpec
55
+ else:
56
+ from typing import ParamSpec # type: ignore
57
+
58
+ P = ParamSpec("P")
59
+
60
+
61
+ def ro_txn_wrap(fun: P) -> P: # type: ignore
62
+ @wraps(fun)
63
+ async def wrapper(**kwargs: P.kwargs):
64
+ async with with_ro_transaction() as txn:
65
+ return await fun(txn, **kwargs)
66
+
67
+ return wrapper
68
+
69
+
70
+ def rw_txn_wrap(fun: P) -> P: # type: ignore
71
+ @wraps(fun)
72
+ async def wrapper(**kwargs: P.kwargs):
73
+ async with with_transaction() as txn:
74
+ result = await fun(txn, **kwargs)
75
+ await txn.commit()
76
+ return result
77
+
78
+ return wrapper
79
+
80
+
81
+ class kb:
82
+ exists_kb = ro_txn_wrap(kb_dm.exists_kb)
83
+ get_config = ro_txn_wrap(kb_dm.get_config)
84
+ get_external_index_provider_metadata = ro_txn_wrap(kb_dm.get_external_index_provider_metadata)
85
+
86
+
87
+ class resources:
88
+ get_resource_uuid_from_slug = ro_txn_wrap(resources_dm.get_resource_uuid_from_slug)
89
+ resource_exists = ro_txn_wrap(resources_dm.resource_exists)
90
+ slug_exists = ro_txn_wrap(resources_dm.slug_exists)
91
+
92
+
93
+ class labelset:
94
+ get = ro_txn_wrap(labels_dm.get_labelset)
95
+ set = rw_txn_wrap(labels_dm.set_labelset)
96
+ delete = rw_txn_wrap(labels_dm.delete_labelset)
97
+ get_all = ro_txn_wrap(labels_dm.get_labels)
98
+
99
+
100
+ class synonyms:
101
+ get = ro_txn_wrap(synonyms_dm.get)
102
+ set = rw_txn_wrap(synonyms_dm.set)
@@ -31,13 +31,13 @@ logger = logging.getLogger(__name__)
31
31
  KB_SHARDS = "/kbs/{kbid}/shards"
32
32
 
33
33
 
34
- async def get_kb_shards(txn: Transaction, *, kbid: str) -> Optional[writer_pb2.Shards]:
34
+ async def get_kb_shards(
35
+ txn: Transaction, *, kbid: str, for_update: bool = False
36
+ ) -> Optional[writer_pb2.Shards]:
35
37
  key = KB_SHARDS.format(kbid=kbid)
36
- return await get_kv_pb(txn, key, writer_pb2.Shards)
38
+ return await get_kv_pb(txn, key, writer_pb2.Shards, for_update=for_update)
37
39
 
38
40
 
39
- async def update_kb_shards(
40
- txn: Transaction, *, kbid: str, shards: writer_pb2.Shards
41
- ) -> None:
41
+ async def update_kb_shards(txn: Transaction, *, kbid: str, shards: writer_pb2.Shards) -> None:
42
42
  key = KB_SHARDS.format(kbid=kbid)
43
43
  await txn.set(key, shards.SerializeToString())
@@ -85,11 +85,9 @@ async def set_entities_group(
85
85
  await txn.set(key, entities.SerializeToString())
86
86
 
87
87
 
88
- async def iterate_entities_groups(
89
- txn: Transaction, *, kbid: str
90
- ) -> AsyncGenerator[str, None]:
88
+ async def iterate_entities_groups(txn: Transaction, *, kbid: str) -> AsyncGenerator[str, None]:
91
89
  entities_key = KB_ENTITIES.format(kbid=kbid)
92
- async for key in txn.keys(entities_key, count=-1):
90
+ async for key in txn.keys(entities_key):
93
91
  group = key.split("/")[-1]
94
92
  yield group
95
93
 
@@ -106,9 +104,7 @@ async def get_entities_group(
106
104
  return eg
107
105
 
108
106
 
109
- async def get_deleted_groups(
110
- txn: Transaction, *, kbid: str
111
- ) -> kb_pb2.DeletedEntitiesGroups:
107
+ async def get_deleted_groups(txn: Transaction, *, kbid: str) -> kb_pb2.DeletedEntitiesGroups:
112
108
  deleted_groups_key = KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid)
113
109
  payload = await txn.get(deleted_groups_key)
114
110
  deg = kb_pb2.DeletedEntitiesGroups()
@@ -122,18 +118,14 @@ async def mark_group_as_deleted(txn: Transaction, *, kbid: str, group: str) -> N
122
118
  deg = await get_deleted_groups(txn, kbid=kbid)
123
119
  if group not in deg.entities_groups:
124
120
  deg.entities_groups.append(group)
125
- await txn.set(
126
- KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString()
127
- )
121
+ await txn.set(KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString())
128
122
 
129
123
 
130
124
  async def unmark_group_as_deleted(txn: Transaction, *, kbid: str, group: str) -> None:
131
125
  deg = await get_deleted_groups(txn, kbid=kbid)
132
126
  if group in deg.entities_groups:
133
127
  deg.entities_groups.remove(group)
134
- await txn.set(
135
- KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString()
136
- )
128
+ await txn.set(KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString())
137
129
 
138
130
 
139
131
  async def get_entities_meta_cache(txn: Transaction, *, kbid: str) -> EntitiesMetaCache:
@@ -143,7 +135,5 @@ async def get_entities_meta_cache(txn: Transaction, *, kbid: str) -> EntitiesMet
143
135
  return pickle.loads(value)
144
136
 
145
137
 
146
- async def set_entities_meta_cache(
147
- txn: Transaction, kbid: str, cache: EntitiesMetaCache
148
- ) -> None:
138
+ async def set_entities_meta_cache(txn: Transaction, kbid: str, cache: EntitiesMetaCache) -> None:
149
139
  await txn.set(KB_ENTITIES_CACHE.format(kbid=kbid), pickle.dumps(cache, protocol=5))
@@ -0,0 +1,84 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ from typing import Optional
22
+
23
+ from google.protobuf.message import Message
24
+
25
+ from nucliadb.common.datamanagers.utils import get_kv_pb
26
+ from nucliadb.common.maindb.driver import Transaction
27
+ from nucliadb_protos import writer_pb2
28
+
29
+ KB_RESOURCE_FIELD = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}"
30
+ KB_RESOURCE_FIELD_ERROR = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}/error"
31
+
32
+
33
+ async def get_raw(
34
+ txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str
35
+ ) -> Optional[bytes]:
36
+ key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
37
+ return await txn.get(key)
38
+
39
+
40
+ async def set(
41
+ txn: Transaction,
42
+ *,
43
+ kbid: str,
44
+ rid: str,
45
+ field_type: str,
46
+ field_id: str,
47
+ value: Message,
48
+ ):
49
+ key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
50
+ await txn.set(key, value.SerializeToString())
51
+
52
+
53
+ async def delete(txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str):
54
+ base_key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
55
+ # Make sure we explicitly delete the field and any nested key
56
+ keys_to_delete = []
57
+ async for key in txn.keys(base_key):
58
+ keys_to_delete.append(key)
59
+
60
+ for key in keys_to_delete:
61
+ await txn.delete(key)
62
+
63
+
64
+ # Error
65
+
66
+
67
+ async def get_error(
68
+ txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str
69
+ ) -> Optional[writer_pb2.Error]:
70
+ key = KB_RESOURCE_FIELD_ERROR.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
71
+ return await get_kv_pb(txn, key, writer_pb2.Error)
72
+
73
+
74
+ async def set_error(
75
+ txn: Transaction,
76
+ *,
77
+ kbid: str,
78
+ rid: str,
79
+ field_type: str,
80
+ field_id: str,
81
+ error: writer_pb2.Error,
82
+ ):
83
+ key = KB_RESOURCE_FIELD_ERROR.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
84
+ await txn.set(key, error.SerializeToString())
@@ -33,15 +33,43 @@ KB_SLUGS = KB_SLUGS_BASE + "{slug}"
33
33
  logger = logging.getLogger(__name__)
34
34
 
35
35
 
36
+ async def get_kbs(txn: Transaction, *, prefix: str = "") -> AsyncIterator[tuple[str, str]]:
37
+ async for key in txn.keys(KB_SLUGS.format(slug=prefix)):
38
+ slug = key.replace(KB_SLUGS_BASE, "")
39
+ uuid = await get_kb_uuid(txn, slug=slug)
40
+ if uuid is None:
41
+ logger.error(f"KB with slug ({slug}) but without uuid?")
42
+ continue
43
+ yield (uuid, slug)
44
+
45
+
36
46
  async def exists_kb(txn: Transaction, *, kbid: str) -> bool:
37
- return await get_config(txn, kbid=kbid) is not None
47
+ return await get_config(txn, kbid=kbid, for_update=False) is not None
48
+
49
+
50
+ async def get_kb_uuid(txn: Transaction, *, slug: str) -> Optional[str]:
51
+ uuid = await txn.get(KB_SLUGS.format(slug=slug), for_update=False)
52
+ if uuid is not None:
53
+ return uuid.decode()
54
+ else:
55
+ return None
56
+
57
+
58
+ async def set_kbid_for_slug(txn: Transaction, *, slug: str, kbid: str):
59
+ key = KB_SLUGS.format(slug=slug)
60
+ await txn.set(key, kbid.encode())
61
+
62
+
63
+ async def delete_kb_slug(txn: Transaction, *, slug: str):
64
+ key = KB_SLUGS.format(slug=slug)
65
+ await txn.delete(key)
38
66
 
39
67
 
40
68
  async def get_config(
41
- txn: Transaction, *, kbid: str
69
+ txn: Transaction, *, kbid: str, for_update: bool = False
42
70
  ) -> Optional[knowledgebox_pb2.KnowledgeBoxConfig]:
43
71
  key = KB_UUID.format(kbid=kbid)
44
- payload = await txn.get(key)
72
+ payload = await txn.get(key, for_update=for_update)
45
73
  if payload is None:
46
74
  return None
47
75
  response = knowledgebox_pb2.KnowledgeBoxConfig()
@@ -49,10 +77,18 @@ async def get_config(
49
77
  return response
50
78
 
51
79
 
52
- async def get_model_metadata(
53
- txn: Transaction, *, kbid: str
54
- ) -> knowledgebox_pb2.SemanticModelMetadata:
55
- shards_obj = await cluster.get_kb_shards(txn, kbid=kbid)
80
+ async def set_config(txn: Transaction, *, kbid: str, config: knowledgebox_pb2.KnowledgeBoxConfig):
81
+ key = KB_UUID.format(kbid=kbid)
82
+ await txn.set(key, config.SerializeToString())
83
+
84
+
85
+ async def delete_config(txn: Transaction, *, kbid: str) -> None:
86
+ key = KB_UUID.format(kbid=kbid)
87
+ await txn.delete(key)
88
+
89
+
90
+ async def get_model_metadata(txn: Transaction, *, kbid: str) -> knowledgebox_pb2.SemanticModelMetadata:
91
+ shards_obj = await cluster.get_kb_shards(txn, kbid=kbid, for_update=False)
56
92
  if shards_obj is None:
57
93
  raise KnowledgeBoxNotFound(kbid)
58
94
  if shards_obj.HasField("model"):
@@ -60,26 +96,67 @@ async def get_model_metadata(
60
96
  else:
61
97
  # B/c code for old KBs that do not have the `model` attribute set in the Shards object.
62
98
  # Cleanup this code after a migration is done unifying all fields under `model` (on-prem and cloud).
63
- return knowledgebox_pb2.SemanticModelMetadata(
64
- similarity_function=shards_obj.similarity
65
- )
99
+ return knowledgebox_pb2.SemanticModelMetadata(similarity_function=shards_obj.similarity)
66
100
 
67
101
 
68
- async def get_kb_uuid(txn: Transaction, *, slug: str) -> Optional[str]:
69
- uuid = await txn.get(KB_SLUGS.format(slug=slug))
70
- if uuid is not None:
71
- return uuid.decode()
102
+ # DEPRECATED: this function should be removed once the "default" vectorset
103
+ # concept is removed and processing sends us all messages with a vectorset_id
104
+ async def get_matryoshka_vector_dimension(
105
+ txn: Transaction,
106
+ *,
107
+ kbid: str,
108
+ vectorset_id: Optional[str] = None,
109
+ ) -> Optional[int]:
110
+ """Return vector dimension for matryoshka models"""
111
+ from . import vectorsets
112
+
113
+ async for _, vs in vectorsets.iter(txn, kbid=kbid):
114
+ if len(vs.matryoshka_dimensions) > 0 and vs.vectorset_index_config.vector_dimension:
115
+ if vs.vectorset_index_config.vector_dimension in vs.matryoshka_dimensions:
116
+ return vs.vectorset_index_config.vector_dimension
117
+ else:
118
+ logger.error(
119
+ "KB has an invalid matryoshka dimension!",
120
+ extra={
121
+ "kbid": kbid,
122
+ "vector_dimension": vs.vectorset_index_config.vector_dimension,
123
+ "matryoshka_dimensions": vs.matryoshka_dimensions,
124
+ },
125
+ )
126
+ return None
72
127
  else:
128
+ # fallback for KBs that don't have vectorset
129
+ model_metadata = await get_model_metadata(txn, kbid=kbid)
130
+ dimension = None
131
+ if len(model_metadata.matryoshka_dimensions) > 0 and model_metadata.vector_dimension:
132
+ if model_metadata.vector_dimension in model_metadata.matryoshka_dimensions:
133
+ dimension = model_metadata.vector_dimension
134
+ else:
135
+ logger.error(
136
+ "KB has an invalid matryoshka dimension!",
137
+ extra={
138
+ "kbid": kbid,
139
+ "vector_dimension": model_metadata.vector_dimension,
140
+ "matryoshka_dimensions": model_metadata.matryoshka_dimensions,
141
+ },
142
+ )
143
+ return dimension
144
+
145
+
146
+ async def get_external_index_provider_metadata(
147
+ txn: Transaction, *, kbid: str
148
+ ) -> Optional[knowledgebox_pb2.StoredExternalIndexProviderMetadata]:
149
+ kb_config = await get_config(txn, kbid=kbid)
150
+ if kb_config is None:
73
151
  return None
152
+ return kb_config.external_index_provider
74
153
 
75
154
 
76
- async def get_kbs(
77
- txn: Transaction, *, prefix: str = ""
78
- ) -> AsyncIterator[tuple[str, str]]:
79
- async for key in txn.keys(KB_SLUGS.format(slug=prefix), count=-1):
80
- slug = key.replace(KB_SLUGS_BASE, "")
81
- uuid = await get_kb_uuid(txn, slug=slug)
82
- if uuid is None:
83
- logger.error(f"KB with slug ({slug}) but without uuid?")
84
- continue
85
- yield (uuid, slug)
155
+ async def set_external_index_provider_metadata(
156
+ txn: Transaction, *, kbid: str, metadata: knowledgebox_pb2.StoredExternalIndexProviderMetadata
157
+ ):
158
+ kb_config = await get_config(txn, kbid=kbid)
159
+ if kb_config is None:
160
+ raise KnowledgeBoxNotFound(kbid)
161
+ kb_config.external_index_provider.CopyFrom(metadata)
162
+ await set_config(txn, kbid=kbid, config=kb_config)
@@ -17,6 +17,7 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ import logging
20
21
  from typing import Optional
21
22
 
22
23
  import orjson
@@ -24,6 +25,8 @@ import orjson
24
25
  from nucliadb.common.maindb.driver import Transaction
25
26
  from nucliadb_protos import knowledgebox_pb2 as kb_pb2
26
27
 
28
+ logger = logging.getLogger(__name__)
29
+
27
30
  KB_LABELS = "/kbs/{kbid}/labels"
28
31
  KB_LABELSET = "/kbs/{kbid}/labels/{id}"
29
32
  KB_LABELSET_IDS = "/kbs/{kbid}/ids-labels"
@@ -34,7 +37,9 @@ async def get_labels(txn: Transaction, *, kbid: str) -> kb_pb2.Labels:
34
37
  Get all labels for a knowledge box (from multiple labelsets)
35
38
  """
36
39
  labels = kb_pb2.Labels()
37
- labelset_ids = await _get_labelset_ids_bw_compat(txn, kbid=kbid)
40
+ labelset_ids = await _get_labelset_ids(txn, kbid=kbid)
41
+ if labelset_ids is None:
42
+ return labels
38
43
  for labelset_id in labelset_ids:
39
44
  labelset = await txn.get(KB_LABELSET.format(kbid=kbid, id=labelset_id))
40
45
  if not labelset:
@@ -45,76 +50,41 @@ async def get_labels(txn: Transaction, *, kbid: str) -> kb_pb2.Labels:
45
50
  return labels
46
51
 
47
52
 
48
- async def _get_labelset_ids_bw_compat(txn: Transaction, *, kbid: str) -> list[str]:
49
- labelsets = await _get_labelset_ids(txn, kbid=kbid)
50
- if labelsets is not None:
51
- return labelsets
52
- # TODO: Remove this after migration #11
53
- return await _deprecated_scan_labelset_ids(txn, kbid=kbid)
54
-
55
-
56
- async def _deprecated_scan_labelset_ids(txn: Transaction, *, kbid: str) -> list[str]:
57
- labelsets = []
58
- labels_key = KB_LABELS.format(kbid=kbid)
59
- async for key in txn.keys(labels_key, count=-1, include_start=False):
60
- lsid = key.split("/")[-1]
61
- labelsets.append(lsid)
62
- return labelsets
63
-
64
-
65
53
  async def _get_labelset_ids(txn: Transaction, *, kbid: str) -> Optional[list[str]]:
66
54
  key = KB_LABELSET_IDS.format(kbid=kbid)
67
- data = await txn.get(key)
55
+ data = await txn.get(key, for_update=True)
68
56
  if not data:
69
57
  return None
70
58
  return orjson.loads(data)
71
59
 
72
60
 
73
- async def _add_to_labelset_ids(
74
- txn: Transaction, *, kbid: str, labelsets: list[str]
75
- ) -> None:
61
+ async def _add_to_labelset_ids(txn: Transaction, *, kbid: str, labelsets: list[str]) -> None:
62
+ updated = set(labelsets)
76
63
  previous = await _get_labelset_ids(txn, kbid=kbid)
77
- needs_set = False
78
- if previous is None:
79
- # TODO: Remove this after migration #11
80
- needs_set = True
81
- previous = await _deprecated_scan_labelset_ids(txn, kbid=kbid)
82
- for labelset in labelsets:
83
- if labelset not in previous:
84
- needs_set = True
85
- previous.append(labelset)
86
- if needs_set:
87
- await _set_labelset_ids(txn, kbid=kbid, labelsets=previous)
88
-
89
-
90
- async def _delete_from_labelset_ids(
91
- txn: Transaction, *, kbid: str, labelsets: list[str]
92
- ) -> None:
93
- needs_set = False
64
+ if previous is not None:
65
+ updated.update(previous)
66
+ if previous is None or previous != updated:
67
+ await _set_labelset_ids(txn, kbid=kbid, labelsets=list(updated))
68
+
69
+
70
+ async def _delete_from_labelset_ids(txn: Transaction, *, kbid: str, labelsets: list[str]) -> None:
94
71
  previous = await _get_labelset_ids(txn, kbid=kbid)
95
72
  if previous is None:
96
- # TODO: Remove this after migration #11
97
- needs_set = True
98
- previous = await _deprecated_scan_labelset_ids(txn, kbid=kbid)
99
- for labelset in labelsets:
100
- if labelset in previous:
101
- needs_set = True
102
- previous.remove(labelset)
103
- if needs_set:
104
- await _set_labelset_ids(txn, kbid=kbid, labelsets=previous)
105
-
106
-
107
- async def _set_labelset_ids(
108
- txn: Transaction, *, kbid: str, labelsets: list[str]
109
- ) -> None:
73
+ # Nothing to delete
74
+ return
75
+ previous_set = set(previous)
76
+ updated = previous_set - set(labelsets)
77
+ if previous_set != updated:
78
+ await _set_labelset_ids(txn, kbid=kbid, labelsets=list(updated))
79
+
80
+
81
+ async def _set_labelset_ids(txn: Transaction, *, kbid: str, labelsets: list[str]) -> None:
110
82
  key = KB_LABELSET_IDS.format(kbid=kbid)
111
83
  data = orjson.dumps(labelsets)
112
84
  await txn.set(key, data)
113
85
 
114
86
 
115
- async def get_labelset(
116
- txn: Transaction, *, kbid: str, labelset_id: str
117
- ) -> Optional[kb_pb2.LabelSet]:
87
+ async def get_labelset(txn: Transaction, *, kbid: str, labelset_id: str) -> Optional[kb_pb2.LabelSet]:
118
88
  labelset_key = KB_LABELSET.format(kbid=kbid, id=labelset_id)
119
89
  payload = await txn.get(labelset_key)
120
90
  if payload:
@@ -28,9 +28,7 @@ logger = logging.getLogger(__name__)
28
28
  PULL_PARTITION_OFFSET = "/processing/pull-offset/{pull_type_id}/{partition}"
29
29
 
30
30
 
31
- async def get_pull_offset(
32
- txn: Transaction, *, pull_type_id: str, partition: str
33
- ) -> Optional[int]:
31
+ async def get_pull_offset(txn: Transaction, *, pull_type_id: str, partition: str) -> Optional[int]:
34
32
  key = PULL_PARTITION_OFFSET.format(pull_type_id=pull_type_id, partition=partition)
35
33
  val: Optional[bytes] = await txn.get(key)
36
34
  if val is not None:
@@ -38,8 +36,6 @@ async def get_pull_offset(
38
36
  return None
39
37
 
40
38
 
41
- async def set_pull_offset(
42
- txn: Transaction, *, pull_type_id: str, partition: str, offset: int
43
- ) -> None:
39
+ async def set_pull_offset(txn: Transaction, *, pull_type_id: str, partition: str, offset: int) -> None:
44
40
  key = PULL_PARTITION_OFFSET.format(pull_type_id=pull_type_id, partition=partition)
45
41
  await txn.set(key, str(offset).encode())