nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -0,0 +1,52 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ from pydantic import Field
21
+ from pydantic_settings import BaseSettings
22
+
23
+
24
+ class ExternalIndexProvidersSettings(BaseSettings):
25
+ pinecone_upsert_parallelism: int = Field(
26
+ default=3,
27
+ title="Pinecone upsert parallelism",
28
+ description="Number of parallel upserts to Pinecone on each set resource operation",
29
+ )
30
+ pinecone_delete_parallelism: int = Field(
31
+ default=2,
32
+ title="Pinecone delete parallelism",
33
+ description="Number of parallel deletes to Pinecone on each delete resource operation",
34
+ )
35
+ pinecone_upsert_timeout: float = Field(
36
+ default=10.0,
37
+ title="Pinecone upsert timeout",
38
+ description="Timeout in seconds for each upsert operation to Pinecone",
39
+ )
40
+ pinecone_delete_timeout: float = Field(
41
+ default=10.0,
42
+ title="Pinecone delete timeout",
43
+ description="Timeout in seconds for each delete operation to Pinecone",
44
+ )
45
+ pinecone_query_timeout: float = Field(
46
+ default=10.0,
47
+ title="Pinecone query timeout",
48
+ description="Timeout in seconds for each query operation to Pinecone",
49
+ )
50
+
51
+
52
+ settings = ExternalIndexProvidersSettings()
@@ -44,14 +44,11 @@ class NucliaAuthHTTPClient:
44
44
  def __init__(self):
45
45
  self.session = aiohttp.ClientSession()
46
46
  self.base_url = (
47
- nuclia_settings.nuclia_public_url.format(zone=nuclia_settings.nuclia_zone)
48
- + "/api"
47
+ nuclia_settings.nuclia_public_url.format(zone=nuclia_settings.nuclia_zone) + "/api"
49
48
  )
50
49
  self.headers = {}
51
50
  if nuclia_settings.nuclia_service_account is not None:
52
- self.headers[
53
- "X-NUCLIA-NUAKEY"
54
- ] = f"Bearer {nuclia_settings.nuclia_service_account}"
51
+ self.headers["X-NUCLIA-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
55
52
 
56
53
  async def __aenter__(self):
57
54
  return self
@@ -67,4 +64,4 @@ class NucliaAuthHTTPClient:
67
64
  async with self.session.get(url, headers=self.headers) as resp:
68
65
  resp_text = await resp.text()
69
66
  check_status(resp, resp_text)
70
- return AuthInfoResponse.parse_raw(resp_text)
67
+ return AuthInfoResponse.model_validate_json(resp_text)
@@ -48,10 +48,7 @@ def get_processing_api_url() -> str:
48
48
  + "/api/v1/processing"
49
49
  )
50
50
  else:
51
- return (
52
- nuclia_settings.nuclia_processing_cluster_url
53
- + "/api/v1/internal/processing"
54
- )
51
+ return nuclia_settings.nuclia_processing_cluster_url + "/api/v1/internal/processing"
55
52
 
56
53
 
57
54
  class PullResponse(pydantic.BaseModel):
@@ -159,9 +156,7 @@ class ProcessingHTTPClient:
159
156
  self.base_url = get_processing_api_url()
160
157
  self.headers = {}
161
158
  if nuclia_settings.nuclia_service_account is not None:
162
- self.headers[
163
- "X-STF-NUAKEY"
164
- ] = f"Bearer {nuclia_settings.nuclia_service_account}"
159
+ self.headers["X-STF-NUAKEY"] = f"Bearer {nuclia_settings.nuclia_service_account}"
165
160
 
166
161
  async def __aenter__(self):
167
162
  return self
@@ -187,7 +182,7 @@ class ProcessingHTTPClient:
187
182
  async with self.session.get(url, headers=self.headers, params=params) as resp:
188
183
  resp_text = await resp.text()
189
184
  check_status(resp, resp_text)
190
- return PullResponse.parse_raw(resp_text)
185
+ return PullResponse.model_validate_json(resp_text)
191
186
 
192
187
  async def pull_position(self, partition: str) -> int:
193
188
  url = self.base_url + "/pull/position"
@@ -195,7 +190,7 @@ class ProcessingHTTPClient:
195
190
  async with self.session.get(url, headers=self.headers, params=params) as resp:
196
191
  resp_text = await resp.text()
197
192
  check_status(resp, resp_text)
198
- data = PullPosition.parse_raw(resp_text)
193
+ data = PullPosition.model_validate_json(resp_text)
199
194
  return data.cursor
200
195
 
201
196
  async def requests(
@@ -217,7 +212,7 @@ class ProcessingHTTPClient:
217
212
  async with self.session.get(url, headers=self.headers, params=params) as resp:
218
213
  resp_text = await resp.text()
219
214
  check_status(resp, resp_text)
220
- return RequestsResults.parse_raw(resp_text)
215
+ return RequestsResults.model_validate_json(resp_text)
221
216
 
222
217
  async def stats(self, kbid: str, timeout: Optional[float] = 1.0) -> StatsResponse:
223
218
  url = self.base_url + "/stats"
@@ -229,4 +224,4 @@ class ProcessingHTTPClient:
229
224
  ) as resp:
230
225
  resp_text = await resp.text()
231
226
  check_status(resp, resp_text)
232
- return StatsResponse.parse_raw(resp_text)
227
+ return StatsResponse.model_validate_json(resp_text)
@@ -30,9 +30,7 @@ def check_status(resp: aiohttp.ClientResponse, resp_text: str) -> None:
30
30
  elif resp.status == 404:
31
31
  raise exceptions.NotFoundException(f"Resource not found: {resp_text}")
32
32
  elif resp.status in (401, 403):
33
- raise exceptions.AuthorizationException(
34
- f"Unauthorized to access: {resp.status}"
35
- )
33
+ raise exceptions.AuthorizationException(f"Unauthorized to access: {resp.status}")
36
34
  elif resp.status == 429:
37
35
  raise exceptions.RateLimitException("Rate limited")
38
36
  else:
nucliadb/common/ids.py ADDED
@@ -0,0 +1,240 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """
22
+ This module aims to centralize how we build ids for resources, fields,
23
+ paragraphs... Avoiding spread of id construction and parsing everywhere
24
+ """
25
+
26
+ from dataclasses import dataclass
27
+ from typing import Optional
28
+
29
+ from nucliadb_protos.resources_pb2 import FieldType
30
+
31
+ FIELD_TYPE_STR_TO_PB: dict[str, FieldType.ValueType] = {
32
+ "t": FieldType.TEXT,
33
+ "f": FieldType.FILE,
34
+ "u": FieldType.LINK,
35
+ "a": FieldType.GENERIC,
36
+ "c": FieldType.CONVERSATION,
37
+ }
38
+
39
+ FIELD_TYPE_PB_TO_STR = {v: k for k, v in FIELD_TYPE_STR_TO_PB.items()}
40
+
41
+
42
+ @dataclass
43
+ class FieldId:
44
+ """
45
+ Field ids are used to identify fields in resources. They usually have the following format:
46
+
47
+ `rid/field_type/field_key`
48
+
49
+ where field type is one of: `t`, `f`, `u`, `a`, `c` (text, file, link, generic, conversation)
50
+ and field_key is an identifier for that field type on the resource, usually chosen by the user.
51
+
52
+ In some cases, fields can have subfields, for example, in conversations, where each part of the
53
+ conversation is a subfield. In those cases, the id has the following format:
54
+
55
+ `rid/field_type/field_key/subfield_id`
56
+
57
+ Examples:
58
+
59
+ >>> FieldId(rid="rid", type="u", key="/my-link")
60
+ FieldID("rid/u/my-link")
61
+ >>> FieldId.from_string("rid/u/my-link")
62
+ FieldID("rid/u/my-link")
63
+ """
64
+
65
+ rid: str
66
+ type: str
67
+ key: str
68
+ # also knwon as `split`, this indicates a part of a field in, for example, conversations
69
+ subfield_id: Optional[str] = None
70
+
71
+ def __repr__(self) -> str:
72
+ return f"FieldId({self.full()})"
73
+
74
+ def short_without_subfield(self) -> str:
75
+ return f"/{self.type}/{self.key}"
76
+
77
+ def full(self) -> str:
78
+ if self.subfield_id is None:
79
+ return f"{self.rid}/{self.type}/{self.key}"
80
+ else:
81
+ return f"{self.rid}/{self.type}/{self.key}/{self.subfield_id}"
82
+
83
+ def __hash__(self) -> int:
84
+ return hash(self.full())
85
+
86
+ @property
87
+ def pb_type(self) -> FieldType.ValueType:
88
+ return FIELD_TYPE_STR_TO_PB[self.type]
89
+
90
+ @classmethod
91
+ def from_pb(
92
+ cls, rid: str, field_type: FieldType.ValueType, key: str, subfield_id: Optional[str] = None
93
+ ) -> "FieldId":
94
+ return cls(rid=rid, type=FIELD_TYPE_PB_TO_STR[field_type], key=key, subfield_id=subfield_id)
95
+
96
+ @classmethod
97
+ def from_string(cls, value: str) -> "FieldId":
98
+ """
99
+ Parse a FieldId from a string
100
+ Example:
101
+ >>> fid = FieldId.from_string("rid/u/foo")
102
+ >>> fid
103
+ FieldId("rid/u/foo")
104
+ >>> fid.type
105
+ 'u'
106
+ >>> fid.key
107
+ 'foo'
108
+ >>> FieldId.from_string("rid/u/foo/subfield_id").subfield_id
109
+ 'subfield_id'
110
+ """
111
+ parts = value.split("/")
112
+ if len(parts) == 3:
113
+ rid, _type, key = parts
114
+ if _type not in FIELD_TYPE_STR_TO_PB:
115
+ raise ValueError(f"Invalid FieldId: {value}")
116
+ return cls(rid=rid, type=_type, key=key)
117
+ elif len(parts) == 4:
118
+ rid, _type, key, subfield_id = parts
119
+ if _type not in FIELD_TYPE_STR_TO_PB:
120
+ raise ValueError(f"Invalid FieldId: {value}")
121
+ return cls(
122
+ rid=rid,
123
+ type=_type,
124
+ key=key,
125
+ subfield_id=subfield_id,
126
+ )
127
+ else:
128
+ raise ValueError(f"Invalid FieldId: {value}")
129
+
130
+
131
+ @dataclass
132
+ class ParagraphId:
133
+ field_id: FieldId
134
+ paragraph_start: int
135
+ paragraph_end: int
136
+
137
+ def __repr__(self) -> str:
138
+ return f"ParagraphId({self.full()})"
139
+
140
+ def full(self) -> str:
141
+ return f"{self.field_id.full()}/{self.paragraph_start}-{self.paragraph_end}"
142
+
143
+ def __hash__(self) -> int:
144
+ return hash(self.full())
145
+
146
+ @property
147
+ def rid(self) -> str:
148
+ return self.field_id.rid
149
+
150
+ @classmethod
151
+ def from_string(cls, value: str) -> "ParagraphId":
152
+ parts = value.split("/")
153
+ paragraph_range = parts[-1]
154
+ start, end = map(int, paragraph_range.split("-"))
155
+ field_id = FieldId.from_string("/".join(parts[:-1]))
156
+ return cls(field_id=field_id, paragraph_start=start, paragraph_end=end)
157
+
158
+ @classmethod
159
+ def from_vector_id(cls, vid: "VectorId") -> "ParagraphId":
160
+ """
161
+ Returns a ParagraphId from a vector_key (the index part of the vector_key is ignored).
162
+ >>> vid = VectorId.from_string("rid/u/field_id/0/0-1")
163
+ >>> ParagraphId.from_vector_id(vid)
164
+ ParagraphId("rid/u/field_id/0-1")
165
+ """
166
+ return cls(
167
+ field_id=vid.field_id,
168
+ paragraph_start=vid.vector_start,
169
+ paragraph_end=vid.vector_end,
170
+ )
171
+
172
+
173
+ @dataclass
174
+ class VectorId:
175
+ """
176
+ Ids of vectors are very similar to ParagraphIds, but for legacy reasons, they have an index
177
+ indicating the position of the corresponding text block in the list of text blocks for the field.
178
+
179
+ Examples:
180
+
181
+ >>> VectorId.from_string("rid/u/field_id/0/0-10")
182
+ VectorId("rid/u/field_id/0/0-10")
183
+ >>> VectorId(
184
+ ... field_id=FieldId.from_string("rid/u/field_id"),
185
+ ... index=0,
186
+ ... vector_start=0,
187
+ ... vector_end=10,
188
+ ... )
189
+ VectorId("rid/u/field_id/0/0-10")
190
+ """
191
+
192
+ field_id: FieldId
193
+ index: int
194
+ vector_start: int
195
+ vector_end: int
196
+
197
+ def __repr__(self) -> str:
198
+ return f"VectorId({self.full()})"
199
+
200
+ def full(self) -> str:
201
+ return f"{self.field_id.full()}/{self.index}/{self.vector_start}-{self.vector_end}"
202
+
203
+ def __hash__(self) -> int:
204
+ return hash(self.full())
205
+
206
+ @property
207
+ def rid(self) -> str:
208
+ return self.field_id.rid
209
+
210
+ @classmethod
211
+ def from_string(cls, value: str) -> "VectorId":
212
+ parts = value.split("/")
213
+ vector_range = parts[-1]
214
+ start, end = map(int, vector_range.split("-"))
215
+ index = int(parts[-2])
216
+ field_id = FieldId.from_string("/".join(parts[:-2]))
217
+ return cls(field_id=field_id, index=index, vector_start=start, vector_end=end)
218
+
219
+
220
+ def extract_data_augmentation_id(generated_field_id: str) -> Optional[str]:
221
+ """Data augmentation generated fields have a strict id with the following
222
+ format:
223
+ `da-{task_id}-{original:field_type}-{original:field_id}[-{original:split}]`
224
+
225
+ @return the `task_id`
226
+
227
+ ATENTION: we are assuming ids have been properly generated and `-` is not a
228
+ valid character, otherwise, this extraction would be wrong and a partial id
229
+ would be returned.
230
+
231
+ """
232
+ parts = generated_field_id.split("-")
233
+
234
+ if len(parts) < 4:
235
+ return None
236
+
237
+ if parts[0] != "da":
238
+ return None
239
+
240
+ return parts[1] or None
@@ -35,11 +35,15 @@ logger = logging.getLogger(__name__)
35
35
 
36
36
  NEW_SHARD_LOCK = "new-shard-{kbid}"
37
37
  RESOURCE_INDEX_LOCK = "resource-index-{kbid}-{resource_id}"
38
+ RESOURCE_CREATION_SLUG_LOCK = "resource-creation-{kbid}-{resource_slug}"
38
39
  KB_SHARDS_LOCK = "shards-kb-{kbid}"
40
+ MIGRATIONS_LOCK = "migration"
39
41
 
40
42
 
41
43
  class ResourceLocked(Exception):
42
- ...
44
+ def __init__(self, key: str):
45
+ self.key = key
46
+ super().__init__(f"{key} is locked")
43
47
 
44
48
 
45
49
  @dataclass
@@ -59,7 +63,8 @@ class _Lock:
59
63
  expire_timeout: float,
60
64
  refresh_timeout: float,
61
65
  ):
62
- self.key = "/distributed/locks/" + key
66
+ self.user_key = key
67
+ self.key = "/distributed/locks/" + self.user_key
63
68
  self.lock_timeout = lock_timeout
64
69
  self.expire_timeout = expire_timeout
65
70
  self.refresh_timeout = refresh_timeout
@@ -79,40 +84,52 @@ class _Lock:
79
84
  else:
80
85
  if time.time() > lock_data.expires_at:
81
86
  # if current time is greater than when it expires, take it over
82
- await self._set_lock_value(txn)
87
+ await self._update_lock_value(txn)
83
88
  await txn.commit()
84
89
  break
85
90
 
86
91
  if time.time() > start + self.lock_timeout:
87
92
  # if current time > start time + lock timeout
88
93
  # we've waited too long, raise exception that, we can't get the lock
89
- raise ResourceLocked()
94
+ raise ResourceLocked(key=self.user_key)
90
95
  except ConflictError:
91
96
  # if we get a conflict error, retry
92
97
  pass
93
- await asyncio.sleep(0.1) # sleep before trying againt
98
+ await asyncio.sleep(0.1) # sleep before trying again
94
99
  self.task = asyncio.create_task(self._refresh_task())
95
100
  return self
96
101
 
97
102
  async def get_lock_data(self, txn: Transaction) -> Optional[LockValue]:
98
- existing_data = await txn.get(self.key)
103
+ existing_data = await txn.get(self.key, for_update=True)
99
104
  if existing_data is None:
100
105
  return None
101
106
  else:
102
107
  return LockValue(**orjson.loads(existing_data))
103
108
 
104
- async def _set_lock_value(self, txn: Transaction) -> None:
109
+ async def _update_lock_value(self, txn: Transaction) -> None:
110
+ """
111
+ Update the value for the lock.
112
+ """
105
113
  await txn.set(
106
114
  self.key,
107
115
  orjson.dumps(LockValue(self.value, time.time() + self.expire_timeout)),
108
116
  )
109
117
 
118
+ async def _set_lock_value(self, txn: Transaction) -> None:
119
+ """
120
+ Set the value for the lock. If lock already exists, it doesn't update and raises a ConflictError.
121
+ """
122
+ await txn.insert(
123
+ self.key,
124
+ orjson.dumps(LockValue(self.value, time.time() + self.expire_timeout)),
125
+ )
126
+
110
127
  async def _refresh_task(self) -> None:
111
128
  while True:
112
129
  try:
113
130
  await asyncio.sleep(self.refresh_timeout)
114
131
  async with self.driver.transaction() as txn:
115
- await self._set_lock_value(txn)
132
+ await self._update_lock_value(txn)
116
133
  await txn.commit()
117
134
  except (asyncio.CancelledError, RuntimeError):
118
135
  return
@@ -125,21 +142,34 @@ class _Lock:
125
142
  await txn.delete(self.key)
126
143
  await txn.commit()
127
144
 
128
- async def is_locked(self, key: str) -> bool:
145
+ async def is_locked(self) -> bool:
129
146
  async with get_driver().transaction(read_only=True) as txn:
130
147
  lock_data = await self.get_lock_data(txn)
131
- return lock_data is None or time.time() > lock_data.expires_at
148
+ return lock_data is not None and time.time() < lock_data.expires_at
132
149
 
133
150
 
134
151
  def distributed_lock(
135
152
  key: str,
136
- lock_timeout: float = 60.0, # max time to wait for lock
137
- expire_timeout: float = 30.0, # how long by default the lock will be held without a refresh
138
- refresh_timeout: float = 10.0, # how often to refresh
153
+ lock_timeout: float = 60.0,
154
+ expire_timeout: float = 30.0,
155
+ refresh_timeout: float = 10.0,
139
156
  ) -> _Lock:
157
+ """
158
+ Context manager to get a distributed lock on a key.
159
+
160
+ Params:
161
+ - key: the key to lock with
162
+ - lock_timeout: maximum time to wait for the lock before ResourceLocked is raised.
163
+ - expire_timeout: how long by default the lock will be held without a refresh
164
+ - refresh_timeout: how often to refresh the lock
165
+ """
140
166
  return _Lock(
141
167
  key,
142
168
  lock_timeout=lock_timeout,
143
169
  expire_timeout=expire_timeout,
144
170
  refresh_timeout=refresh_timeout,
145
171
  )
172
+
173
+
174
+ async def is_locked(key: str) -> bool:
175
+ return await distributed_lock(key).is_locked()
@@ -23,7 +23,7 @@ import asyncio
23
23
  from contextlib import asynccontextmanager
24
24
  from typing import AsyncGenerator, Optional
25
25
 
26
- DEFAULT_SCAN_LIMIT = 10
26
+ DEFAULT_SCAN_LIMIT = -1
27
27
  DEFAULT_BATCH_SCAN_LIMIT = 500
28
28
 
29
29
 
@@ -37,18 +37,24 @@ class Transaction:
37
37
  async def commit(self):
38
38
  raise NotImplementedError()
39
39
 
40
- async def batch_get(self, keys: list[str]) -> list[Optional[bytes]]:
40
+ async def batch_get(self, keys: list[str], for_update: bool = False) -> list[Optional[bytes]]:
41
41
  raise NotImplementedError()
42
42
 
43
- async def get(self, key: str) -> Optional[bytes]:
43
+ async def get(self, key: str, for_update: bool = False) -> Optional[bytes]:
44
44
  raise NotImplementedError()
45
45
 
46
46
  async def set(self, key: str, value: bytes):
47
47
  raise NotImplementedError()
48
48
 
49
+ async def insert(self, key: str, value: bytes):
50
+ return await self.set(key, value)
51
+
49
52
  async def delete(self, key: str):
50
53
  raise NotImplementedError()
51
54
 
55
+ async def delete_by_prefix(self, prefix: str) -> None:
56
+ raise NotImplementedError()
57
+
52
58
  def keys(
53
59
  self, match: str, count: int = DEFAULT_SCAN_LIMIT, include_start: bool = True
54
60
  ) -> AsyncGenerator[str, None]:
@@ -74,36 +80,6 @@ class Driver:
74
80
  except Exception:
75
81
  pass
76
82
 
77
- async def begin(self, read_only: bool = False) -> Transaction:
78
- raise NotImplementedError()
79
-
80
83
  @asynccontextmanager
81
- async def transaction(
82
- self, wait_for_abort: bool = True, read_only: bool = False
83
- ) -> AsyncGenerator[Transaction, None]:
84
- """
85
- Use to make sure transaction is always aborted.
86
-
87
- :param wait_for_abort: If True, wait for abort to finish before returning.
88
- If False, abort is done in background (unless there
89
- is an error)
90
- """
91
- txn: Optional[Transaction] = None
92
- error: bool = False
93
- try:
94
- txn = await self.begin(read_only=read_only)
95
- yield txn
96
- except Exception:
97
- error = True
98
- raise
99
- finally:
100
- if txn is not None and txn.open:
101
- if error or wait_for_abort:
102
- await txn.abort()
103
- else:
104
- self._async_abort(txn)
105
-
106
- def _async_abort(self, txn: Transaction):
107
- task = asyncio.create_task(txn.abort())
108
- task.add_done_callback(lambda task: self._abort_tasks.remove(task))
109
- self._abort_tasks.append(task)
84
+ async def transaction(self, read_only: bool = False) -> AsyncGenerator[Transaction, None]:
85
+ yield Transaction()
@@ -17,13 +17,13 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- class ConflictError(Exception):
21
- ...
20
+ class ConflictError(Exception): ...
22
21
 
23
22
 
24
- class NotFoundError(Exception):
25
- ...
23
+ class NotFoundError(Exception): ...
26
24
 
27
25
 
28
- class UnsetUtility(Exception):
29
- ...
26
+ class UnsetUtility(Exception): ...
27
+
28
+
29
+ class MaindbServerError(Exception): ...
@@ -19,7 +19,8 @@
19
19
  #
20
20
  import glob
21
21
  import os
22
- from typing import Optional
22
+ from contextlib import asynccontextmanager
23
+ from typing import AsyncGenerator, Optional
23
24
 
24
25
  from nucliadb.common.maindb.driver import (
25
26
  DEFAULT_BATCH_SCAN_LIMIT,
@@ -105,7 +106,7 @@ class LocalTransaction(Transaction):
105
106
  self.clean()
106
107
  self.open = False
107
108
 
108
- async def batch_get(self, keys: list[str]) -> list[Optional[bytes]]:
109
+ async def batch_get(self, keys: list[str], for_update: bool = False) -> list[Optional[bytes]]:
109
110
  results: list[Optional[bytes]] = []
110
111
  for key in keys:
111
112
  obj = await self.get(key)
@@ -124,7 +125,7 @@ class LocalTransaction(Transaction):
124
125
 
125
126
  return results
126
127
 
127
- async def get(self, key: str) -> Optional[bytes]:
128
+ async def get(self, key: str, for_update: bool = False) -> Optional[bytes]:
128
129
  if key in self.deleted_keys:
129
130
  raise KeyError(f"Not found {key}")
130
131
 
@@ -159,9 +160,15 @@ class LocalTransaction(Transaction):
159
160
  if key in self.modified_keys:
160
161
  del self.modified_keys[key]
161
162
 
162
- async def keys(
163
- self, match: str, count: int = DEFAULT_SCAN_LIMIT, include_start: bool = True
164
- ):
163
+ async def delete_by_prefix(self, prefix: str) -> None:
164
+ keys = []
165
+ for key in self.modified_keys.keys():
166
+ if key.startswith(prefix):
167
+ keys.append(key)
168
+ for key in keys:
169
+ await self.delete(key)
170
+
171
+ async def keys(self, match: str, count: int = DEFAULT_SCAN_LIMIT, include_start: bool = True):
165
172
  prev_key = None
166
173
 
167
174
  get_all_keys = count == -1
@@ -195,7 +202,7 @@ class LocalTransaction(Transaction):
195
202
 
196
203
  async def count(self, match: str) -> int:
197
204
  value = 0
198
- async for _ in self.keys(match, count=-1):
205
+ async for _ in self.keys(match):
199
206
  value += 1
200
207
  return value
201
208
 
@@ -214,7 +221,13 @@ class LocalDriver(Driver):
214
221
  async def finalize(self):
215
222
  pass
216
223
 
217
- async def begin(self, read_only: bool = False) -> LocalTransaction:
224
+ @asynccontextmanager
225
+ async def transaction(self, read_only: bool = False) -> AsyncGenerator[Transaction, None]:
218
226
  if self.url is None:
219
227
  raise AttributeError("Invalid url")
220
- return LocalTransaction(self.url, self)
228
+ txn = LocalTransaction(self.url, self)
229
+ try:
230
+ yield txn
231
+ finally:
232
+ if txn.open:
233
+ await txn.abort()