nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -0,0 +1,52 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ from pydantic import Field
21
+ from pydantic_settings import BaseSettings
22
+
23
+
24
+ class ExternalIndexProvidersSettings(BaseSettings):
25
+ pinecone_upsert_parallelism: int = Field(
26
+ default=3,
27
+ title="Pinecone upsert parallelism",
28
+ description="Number of parallel upserts to Pinecone on each set resource operation",
29
+ )
30
+ pinecone_delete_parallelism: int = Field(
31
+ default=2,
32
+ title="Pinecone delete parallelism",
33
+ description="Number of parallel deletes to Pinecone on each delete resource operation",
34
+ )
35
+ pinecone_upsert_timeout: float = Field(
36
+ default=10.0,
37
+ title="Pinecone upsert timeout",
38
+ description="Timeout in seconds for each upsert operation to Pinecone",
39
+ )
40
+ pinecone_delete_timeout: float = Field(
41
+ default=10.0,
42
+ title="Pinecone delete timeout",
43
+ description="Timeout in seconds for each delete operation to Pinecone",
44
+ )
45
+ pinecone_query_timeout: float = Field(
46
+ default=10.0,
47
+ title="Pinecone query timeout",
48
+ description="Timeout in seconds for each query operation to Pinecone",
49
+ )
50
+
51
+
52
+ settings = ExternalIndexProvidersSettings()
@@ -44,14 +44,11 @@ class NucliaAuthHTTPClient:
44
44
  def __init__(self):
45
45
  self.session = aiohttp.ClientSession()
46
46
  self.base_url = (
47
- nuclia_settings.nuclia_public_url.format(zone=nuclia_settings.nuclia_zone)
48
- + "/api"
47
+ nuclia_settings.nuclia_public_url.format(zone=nuclia_settings.nuclia_zone) + "/api"
49
48
  )
50
49
  self.headers = {}
51
50
  if nuclia_settings.nuclia_service_account is not None:
52
- self.headers["X-NUCLIA-NUAKEY"] = (
53
- f"Bearer {nuclia_settings.nuclia_service_account}"
54
- )
51
+ self.headers["X-NUCLIA-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
55
52
 
56
53
  async def __aenter__(self):
57
54
  return self
@@ -67,4 +64,4 @@ class NucliaAuthHTTPClient:
67
64
  async with self.session.get(url, headers=self.headers) as resp:
68
65
  resp_text = await resp.text()
69
66
  check_status(resp, resp_text)
70
- return AuthInfoResponse.parse_raw(resp_text)
67
+ return AuthInfoResponse.model_validate_json(resp_text)
@@ -48,10 +48,7 @@ def get_processing_api_url() -> str:
48
48
  + "/api/v1/processing"
49
49
  )
50
50
  else:
51
- return (
52
- nuclia_settings.nuclia_processing_cluster_url
53
- + "/api/v1/internal/processing"
54
- )
51
+ return nuclia_settings.nuclia_processing_cluster_url + "/api/v1/internal/processing"
55
52
 
56
53
 
57
54
  class PullResponse(pydantic.BaseModel):
@@ -159,9 +156,7 @@ class ProcessingHTTPClient:
159
156
  self.base_url = get_processing_api_url()
160
157
  self.headers = {}
161
158
  if nuclia_settings.nuclia_service_account is not None:
162
- self.headers["X-STF-NUAKEY"] = (
163
- f"Bearer {nuclia_settings.nuclia_service_account}"
164
- )
159
+ self.headers["X-STF-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
165
160
 
166
161
  async def __aenter__(self):
167
162
  return self
@@ -187,7 +182,7 @@ class ProcessingHTTPClient:
187
182
  async with self.session.get(url, headers=self.headers, params=params) as resp:
188
183
  resp_text = await resp.text()
189
184
  check_status(resp, resp_text)
190
- return PullResponse.parse_raw(resp_text)
185
+ return PullResponse.model_validate_json(resp_text)
191
186
 
192
187
  async def pull_position(self, partition: str) -> int:
193
188
  url = self.base_url + "/pull/position"
@@ -195,7 +190,7 @@ class ProcessingHTTPClient:
195
190
  async with self.session.get(url, headers=self.headers, params=params) as resp:
196
191
  resp_text = await resp.text()
197
192
  check_status(resp, resp_text)
198
- data = PullPosition.parse_raw(resp_text)
193
+ data = PullPosition.model_validate_json(resp_text)
199
194
  return data.cursor
200
195
 
201
196
  async def requests(
@@ -217,7 +212,7 @@ class ProcessingHTTPClient:
217
212
  async with self.session.get(url, headers=self.headers, params=params) as resp:
218
213
  resp_text = await resp.text()
219
214
  check_status(resp, resp_text)
220
- return RequestsResults.parse_raw(resp_text)
215
+ return RequestsResults.model_validate_json(resp_text)
221
216
 
222
217
  async def stats(self, kbid: str, timeout: Optional[float] = 1.0) -> StatsResponse:
223
218
  url = self.base_url + "/stats"
@@ -229,4 +224,4 @@ class ProcessingHTTPClient:
229
224
  ) as resp:
230
225
  resp_text = await resp.text()
231
226
  check_status(resp, resp_text)
232
- return StatsResponse.parse_raw(resp_text)
227
+ return StatsResponse.model_validate_json(resp_text)
@@ -30,9 +30,7 @@ def check_status(resp: aiohttp.ClientResponse, resp_text: str) -> None:
30
30
  elif resp.status == 404:
31
31
  raise exceptions.NotFoundException(f"Resource not found: {resp_text}")
32
32
  elif resp.status in (401, 403):
33
- raise exceptions.AuthorizationException(
34
- f"Unauthorized to access: {resp.status}"
35
- )
33
+ raise exceptions.AuthorizationException(f"Unauthorized to access: {resp.status}")
36
34
  elif resp.status == 429:
37
35
  raise exceptions.RateLimitException("Rate limited")
38
36
  else:
nucliadb/common/ids.py ADDED
@@ -0,0 +1,240 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """
22
+ This module aims to centralize how we build ids for resources, fields,
23
+ paragraphs... Avoiding spread of id construction and parsing everywhere
24
+ """
25
+
26
+ from dataclasses import dataclass
27
+ from typing import Optional
28
+
29
+ from nucliadb_protos.resources_pb2 import FieldType
30
+
31
+ FIELD_TYPE_STR_TO_PB: dict[str, FieldType.ValueType] = {
32
+ "t": FieldType.TEXT,
33
+ "f": FieldType.FILE,
34
+ "u": FieldType.LINK,
35
+ "a": FieldType.GENERIC,
36
+ "c": FieldType.CONVERSATION,
37
+ }
38
+
39
+ FIELD_TYPE_PB_TO_STR = {v: k for k, v in FIELD_TYPE_STR_TO_PB.items()}
40
+
41
+
42
+ @dataclass
43
+ class FieldId:
44
+ """
45
+ Field ids are used to identify fields in resources. They usually have the following format:
46
+
47
+ `rid/field_type/field_key`
48
+
49
+ where field type is one of: `t`, `f`, `u`, `a`, `c` (text, file, link, generic, conversation)
50
+ and field_key is an identifier for that field type on the resource, usually chosen by the user.
51
+
52
+ In some cases, fields can have subfields, for example, in conversations, where each part of the
53
+ conversation is a subfield. In those cases, the id has the following format:
54
+
55
+ `rid/field_type/field_key/subfield_id`
56
+
57
+ Examples:
58
+
59
+ >>> FieldId(rid="rid", type="u", key="/my-link")
60
+ FieldID("rid/u/my-link")
61
+ >>> FieldId.from_string("rid/u/my-link")
62
+ FieldID("rid/u/my-link")
63
+ """
64
+
65
+ rid: str
66
+ type: str
67
+ key: str
68
+ # also knwon as `split`, this indicates a part of a field in, for example, conversations
69
+ subfield_id: Optional[str] = None
70
+
71
+ def __repr__(self) -> str:
72
+ return f"FieldId({self.full()})"
73
+
74
+ def short_without_subfield(self) -> str:
75
+ return f"/{self.type}/{self.key}"
76
+
77
+ def full(self) -> str:
78
+ if self.subfield_id is None:
79
+ return f"{self.rid}/{self.type}/{self.key}"
80
+ else:
81
+ return f"{self.rid}/{self.type}/{self.key}/{self.subfield_id}"
82
+
83
+ def __hash__(self) -> int:
84
+ return hash(self.full())
85
+
86
+ @property
87
+ def pb_type(self) -> FieldType.ValueType:
88
+ return FIELD_TYPE_STR_TO_PB[self.type]
89
+
90
+ @classmethod
91
+ def from_pb(
92
+ cls, rid: str, field_type: FieldType.ValueType, key: str, subfield_id: Optional[str] = None
93
+ ) -> "FieldId":
94
+ return cls(rid=rid, type=FIELD_TYPE_PB_TO_STR[field_type], key=key, subfield_id=subfield_id)
95
+
96
+ @classmethod
97
+ def from_string(cls, value: str) -> "FieldId":
98
+ """
99
+ Parse a FieldId from a string
100
+ Example:
101
+ >>> fid = FieldId.from_string("rid/u/foo")
102
+ >>> fid
103
+ FieldId("rid/u/foo")
104
+ >>> fid.type
105
+ 'u'
106
+ >>> fid.key
107
+ 'foo'
108
+ >>> FieldId.from_string("rid/u/foo/subfield_id").subfield_id
109
+ 'subfield_id'
110
+ """
111
+ parts = value.split("/")
112
+ if len(parts) == 3:
113
+ rid, _type, key = parts
114
+ if _type not in FIELD_TYPE_STR_TO_PB:
115
+ raise ValueError(f"Invalid FieldId: {value}")
116
+ return cls(rid=rid, type=_type, key=key)
117
+ elif len(parts) == 4:
118
+ rid, _type, key, subfield_id = parts
119
+ if _type not in FIELD_TYPE_STR_TO_PB:
120
+ raise ValueError(f"Invalid FieldId: {value}")
121
+ return cls(
122
+ rid=rid,
123
+ type=_type,
124
+ key=key,
125
+ subfield_id=subfield_id,
126
+ )
127
+ else:
128
+ raise ValueError(f"Invalid FieldId: {value}")
129
+
130
+
131
+ @dataclass
132
+ class ParagraphId:
133
+ field_id: FieldId
134
+ paragraph_start: int
135
+ paragraph_end: int
136
+
137
+ def __repr__(self) -> str:
138
+ return f"ParagraphId({self.full()})"
139
+
140
+ def full(self) -> str:
141
+ return f"{self.field_id.full()}/{self.paragraph_start}-{self.paragraph_end}"
142
+
143
+ def __hash__(self) -> int:
144
+ return hash(self.full())
145
+
146
+ @property
147
+ def rid(self) -> str:
148
+ return self.field_id.rid
149
+
150
+ @classmethod
151
+ def from_string(cls, value: str) -> "ParagraphId":
152
+ parts = value.split("/")
153
+ paragraph_range = parts[-1]
154
+ start, end = map(int, paragraph_range.split("-"))
155
+ field_id = FieldId.from_string("/".join(parts[:-1]))
156
+ return cls(field_id=field_id, paragraph_start=start, paragraph_end=end)
157
+
158
+ @classmethod
159
+ def from_vector_id(cls, vid: "VectorId") -> "ParagraphId":
160
+ """
161
+ Returns a ParagraphId from a vector_key (the index part of the vector_key is ignored).
162
+ >>> vid = VectorId.from_string("rid/u/field_id/0/0-1")
163
+ >>> ParagraphId.from_vector_id(vid)
164
+ ParagraphId("rid/u/field_id/0-1")
165
+ """
166
+ return cls(
167
+ field_id=vid.field_id,
168
+ paragraph_start=vid.vector_start,
169
+ paragraph_end=vid.vector_end,
170
+ )
171
+
172
+
173
+ @dataclass
174
+ class VectorId:
175
+ """
176
+ Ids of vectors are very similar to ParagraphIds, but for legacy reasons, they have an index
177
+ indicating the position of the corresponding text block in the list of text blocks for the field.
178
+
179
+ Examples:
180
+
181
+ >>> VectorId.from_string("rid/u/field_id/0/0-10")
182
+ VectorId("rid/u/field_id/0/0-10")
183
+ >>> VectorId(
184
+ ... field_id=FieldId.from_string("rid/u/field_id"),
185
+ ... index=0,
186
+ ... vector_start=0,
187
+ ... vector_end=10,
188
+ ... )
189
+ VectorId("rid/u/field_id/0/0-10")
190
+ """
191
+
192
+ field_id: FieldId
193
+ index: int
194
+ vector_start: int
195
+ vector_end: int
196
+
197
+ def __repr__(self) -> str:
198
+ return f"VectorId({self.full()})"
199
+
200
+ def full(self) -> str:
201
+ return f"{self.field_id.full()}/{self.index}/{self.vector_start}-{self.vector_end}"
202
+
203
+ def __hash__(self) -> int:
204
+ return hash(self.full())
205
+
206
+ @property
207
+ def rid(self) -> str:
208
+ return self.field_id.rid
209
+
210
+ @classmethod
211
+ def from_string(cls, value: str) -> "VectorId":
212
+ parts = value.split("/")
213
+ vector_range = parts[-1]
214
+ start, end = map(int, vector_range.split("-"))
215
+ index = int(parts[-2])
216
+ field_id = FieldId.from_string("/".join(parts[:-2]))
217
+ return cls(field_id=field_id, index=index, vector_start=start, vector_end=end)
218
+
219
+
220
+ def extract_data_augmentation_id(generated_field_id: str) -> Optional[str]:
221
+ """Data augmentation generated fields have a strict id with the following
222
+ format:
223
+ `da-{task_id}-{original:field_type}-{original:field_id}[-{original:split}]`
224
+
225
+ @return the `task_id`
226
+
227
+ ATENTION: we are assuming ids have been properly generated and `-` is not a
228
+ valid character, otherwise, this extraction would be wrong and a partial id
229
+ would be returned.
230
+
231
+ """
232
+ parts = generated_field_id.split("-")
233
+
234
+ if len(parts) < 4:
235
+ return None
236
+
237
+ if parts[0] != "da":
238
+ return None
239
+
240
+ return parts[1] or None
@@ -35,6 +35,7 @@ logger = logging.getLogger(__name__)
35
35
 
36
36
  NEW_SHARD_LOCK = "new-shard-{kbid}"
37
37
  RESOURCE_INDEX_LOCK = "resource-index-{kbid}-{resource_id}"
38
+ RESOURCE_CREATION_SLUG_LOCK = "resource-creation-{kbid}-{resource_slug}"
38
39
  KB_SHARDS_LOCK = "shards-kb-{kbid}"
39
40
  MIGRATIONS_LOCK = "migration"
40
41
 
@@ -83,7 +84,7 @@ class _Lock:
83
84
  else:
84
85
  if time.time() > lock_data.expires_at:
85
86
  # if current time is greater than when it expires, take it over
86
- await self._set_lock_value(txn)
87
+ await self._update_lock_value(txn)
87
88
  await txn.commit()
88
89
  break
89
90
 
@@ -99,24 +100,36 @@ class _Lock:
99
100
  return self
100
101
 
101
102
  async def get_lock_data(self, txn: Transaction) -> Optional[LockValue]:
102
- existing_data = await txn.get(self.key)
103
+ existing_data = await txn.get(self.key, for_update=True)
103
104
  if existing_data is None:
104
105
  return None
105
106
  else:
106
107
  return LockValue(**orjson.loads(existing_data))
107
108
 
108
- async def _set_lock_value(self, txn: Transaction) -> None:
109
+ async def _update_lock_value(self, txn: Transaction) -> None:
110
+ """
111
+ Update the value for the lock.
112
+ """
109
113
  await txn.set(
110
114
  self.key,
111
115
  orjson.dumps(LockValue(self.value, time.time() + self.expire_timeout)),
112
116
  )
113
117
 
118
+ async def _set_lock_value(self, txn: Transaction) -> None:
119
+ """
120
+ Set the value for the lock. If lock already exists, it doesn't update and raises a ConflictError.
121
+ """
122
+ await txn.insert(
123
+ self.key,
124
+ orjson.dumps(LockValue(self.value, time.time() + self.expire_timeout)),
125
+ )
126
+
114
127
  async def _refresh_task(self) -> None:
115
128
  while True:
116
129
  try:
117
130
  await asyncio.sleep(self.refresh_timeout)
118
131
  async with self.driver.transaction() as txn:
119
- await self._set_lock_value(txn)
132
+ await self._update_lock_value(txn)
120
133
  await txn.commit()
121
134
  except (asyncio.CancelledError, RuntimeError):
122
135
  return
@@ -137,10 +150,19 @@ class _Lock:
137
150
 
138
151
  def distributed_lock(
139
152
  key: str,
140
- lock_timeout: float = 60.0, # max time to wait for lock
141
- expire_timeout: float = 30.0, # how long by default the lock will be held without a refresh
142
- refresh_timeout: float = 10.0, # how often to refresh
153
+ lock_timeout: float = 60.0,
154
+ expire_timeout: float = 30.0,
155
+ refresh_timeout: float = 10.0,
143
156
  ) -> _Lock:
157
+ """
158
+ Context manager to get a distributed lock on a key.
159
+
160
+ Params:
161
+ - key: the key to lock with
162
+ - lock_timeout: maximum time to wait for the lock before ResourceLocked is raised.
163
+ - expire_timeout: how long by default the lock will be held without a refresh
164
+ - refresh_timeout: how often to refresh the lock
165
+ """
144
166
  return _Lock(
145
167
  key,
146
168
  lock_timeout=lock_timeout,
@@ -23,7 +23,7 @@ import asyncio
23
23
  from contextlib import asynccontextmanager
24
24
  from typing import AsyncGenerator, Optional
25
25
 
26
- DEFAULT_SCAN_LIMIT = 10
26
+ DEFAULT_SCAN_LIMIT = -1
27
27
  DEFAULT_BATCH_SCAN_LIMIT = 500
28
28
 
29
29
 
@@ -37,18 +37,24 @@ class Transaction:
37
37
  async def commit(self):
38
38
  raise NotImplementedError()
39
39
 
40
- async def batch_get(self, keys: list[str]) -> list[Optional[bytes]]:
40
+ async def batch_get(self, keys: list[str], for_update: bool = False) -> list[Optional[bytes]]:
41
41
  raise NotImplementedError()
42
42
 
43
- async def get(self, key: str) -> Optional[bytes]:
43
+ async def get(self, key: str, for_update: bool = False) -> Optional[bytes]:
44
44
  raise NotImplementedError()
45
45
 
46
46
  async def set(self, key: str, value: bytes):
47
47
  raise NotImplementedError()
48
48
 
49
+ async def insert(self, key: str, value: bytes):
50
+ return await self.set(key, value)
51
+
49
52
  async def delete(self, key: str):
50
53
  raise NotImplementedError()
51
54
 
55
+ async def delete_by_prefix(self, prefix: str) -> None:
56
+ raise NotImplementedError()
57
+
52
58
  def keys(
53
59
  self, match: str, count: int = DEFAULT_SCAN_LIMIT, include_start: bool = True
54
60
  ) -> AsyncGenerator[str, None]:
@@ -74,36 +80,6 @@ class Driver:
74
80
  except Exception:
75
81
  pass
76
82
 
77
- async def begin(self, read_only: bool = False) -> Transaction:
78
- raise NotImplementedError()
79
-
80
83
  @asynccontextmanager
81
- async def transaction(
82
- self, wait_for_abort: bool = True, read_only: bool = False
83
- ) -> AsyncGenerator[Transaction, None]:
84
- """
85
- Use to make sure transaction is always aborted.
86
-
87
- :param wait_for_abort: If True, wait for abort to finish before returning.
88
- If False, abort is done in background (unless there
89
- is an error)
90
- """
91
- txn: Optional[Transaction] = None
92
- error: bool = False
93
- try:
94
- txn = await self.begin(read_only=read_only)
95
- yield txn
96
- except Exception:
97
- error = True
98
- raise
99
- finally:
100
- if txn is not None and txn.open:
101
- if error or wait_for_abort:
102
- await txn.abort()
103
- else:
104
- self._async_abort(txn)
105
-
106
- def _async_abort(self, txn: Transaction):
107
- task = asyncio.create_task(txn.abort())
108
- task.add_done_callback(lambda task: self._abort_tasks.remove(task))
109
- self._abort_tasks.append(task)
84
+ async def transaction(self, read_only: bool = False) -> AsyncGenerator[Transaction, None]:
85
+ yield Transaction()
@@ -24,3 +24,6 @@ class NotFoundError(Exception): ...
24
24
 
25
25
 
26
26
  class UnsetUtility(Exception): ...
27
+
28
+
29
+ class MaindbServerError(Exception): ...
@@ -19,7 +19,8 @@
19
19
  #
20
20
  import glob
21
21
  import os
22
- from typing import Optional
22
+ from contextlib import asynccontextmanager
23
+ from typing import AsyncGenerator, Optional
23
24
 
24
25
  from nucliadb.common.maindb.driver import (
25
26
  DEFAULT_BATCH_SCAN_LIMIT,
@@ -105,7 +106,7 @@ class LocalTransaction(Transaction):
105
106
  self.clean()
106
107
  self.open = False
107
108
 
108
- async def batch_get(self, keys: list[str]) -> list[Optional[bytes]]:
109
+ async def batch_get(self, keys: list[str], for_update: bool = False) -> list[Optional[bytes]]:
109
110
  results: list[Optional[bytes]] = []
110
111
  for key in keys:
111
112
  obj = await self.get(key)
@@ -124,7 +125,7 @@ class LocalTransaction(Transaction):
124
125
 
125
126
  return results
126
127
 
127
- async def get(self, key: str) -> Optional[bytes]:
128
+ async def get(self, key: str, for_update: bool = False) -> Optional[bytes]:
128
129
  if key in self.deleted_keys:
129
130
  raise KeyError(f"Not found {key}")
130
131
 
@@ -159,9 +160,15 @@ class LocalTransaction(Transaction):
159
160
  if key in self.modified_keys:
160
161
  del self.modified_keys[key]
161
162
 
162
- async def keys(
163
- self, match: str, count: int = DEFAULT_SCAN_LIMIT, include_start: bool = True
164
- ):
163
+ async def delete_by_prefix(self, prefix: str) -> None:
164
+ keys = []
165
+ for key in self.modified_keys.keys():
166
+ if key.startswith(prefix):
167
+ keys.append(key)
168
+ for key in keys:
169
+ await self.delete(key)
170
+
171
+ async def keys(self, match: str, count: int = DEFAULT_SCAN_LIMIT, include_start: bool = True):
165
172
  prev_key = None
166
173
 
167
174
  get_all_keys = count == -1
@@ -195,7 +202,7 @@ class LocalTransaction(Transaction):
195
202
 
196
203
  async def count(self, match: str) -> int:
197
204
  value = 0
198
- async for _ in self.keys(match, count=-1):
205
+ async for _ in self.keys(match):
199
206
  value += 1
200
207
  return value
201
208
 
@@ -214,7 +221,13 @@ class LocalDriver(Driver):
214
221
  async def finalize(self):
215
222
  pass
216
223
 
217
- async def begin(self, read_only: bool = False) -> LocalTransaction:
224
+ @asynccontextmanager
225
+ async def transaction(self, read_only: bool = False) -> AsyncGenerator[Transaction, None]:
218
226
  if self.url is None:
219
227
  raise AttributeError("Invalid url")
220
- return LocalTransaction(self.url, self)
228
+ txn = LocalTransaction(self.url, self)
229
+ try:
230
+ yield txn
231
+ finally:
232
+ if txn.open:
233
+ await txn.abort()