nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,194 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from typing import Any, Optional
21
-
22
- from nucliadb.common.maindb.driver import (
23
- DEFAULT_BATCH_SCAN_LIMIT,
24
- DEFAULT_SCAN_LIMIT,
25
- Driver,
26
- Transaction,
27
- )
28
-
29
- try:
30
- from redis import asyncio as aioredis
31
-
32
- REDIS = True
33
- except ImportError: # pragma: no cover
34
- REDIS = False
35
-
36
-
37
- class RedisTransaction(Transaction):
38
- modified_keys: dict[str, bytes]
39
- visited_keys: dict[str, bytes]
40
- deleted_keys: list[str]
41
-
42
- def __init__(self, redis: Any, driver: Driver):
43
- self.redis = redis
44
- self.driver = driver
45
- self.modified_keys = {}
46
- self.visited_keys = {}
47
- self.deleted_keys = []
48
- self.open = True
49
-
50
- def clean(self):
51
- self.modified_keys.clear()
52
- self.visited_keys.clear()
53
- self.deleted_keys.clear()
54
-
55
- async def abort(self):
56
- self.clean()
57
- self.open = False
58
-
59
- async def commit(self):
60
- if len(self.modified_keys) == 0 and len(self.deleted_keys) == 0:
61
- self.clean()
62
- return
63
-
64
- not_to_check = []
65
- async with self.redis.pipeline(transaction=True) as pipe:
66
- count = 0
67
- for key, value in self.modified_keys.items():
68
- pipe = pipe.set(key.encode(), value)
69
- count += 1
70
- for key in self.deleted_keys:
71
- pipe = pipe.delete(key.encode())
72
- not_to_check.append(count)
73
- count += 1
74
- oks = await pipe.execute()
75
-
76
- for index, ok in enumerate(oks):
77
- # We do no check deleted if its already deleted
78
- if index not in not_to_check:
79
- assert ok
80
- self.clean()
81
- self.open = False
82
-
83
- async def batch_get(self, keys: list[str]) -> list[Optional[bytes]]:
84
- if len(keys) == 0:
85
- return []
86
-
87
- bytes_keys: list[bytes] = [x.encode() for x in keys]
88
- results = await self.redis.mget(bytes_keys)
89
-
90
- for idx, key in enumerate(keys):
91
- if key in self.deleted_keys:
92
- results[idx] = None
93
- if key in self.modified_keys:
94
- results[idx] = self.modified_keys[key]
95
- if key in self.visited_keys:
96
- results[idx] = self.visited_keys[key]
97
-
98
- return results
99
-
100
- async def get(self, key: str) -> Optional[bytes]:
101
- if key in self.deleted_keys:
102
- raise KeyError(f"Not found {key}")
103
-
104
- if key in self.modified_keys:
105
- return self.modified_keys[key]
106
-
107
- if key in self.visited_keys:
108
- return self.visited_keys[key]
109
-
110
- else:
111
- obj = await self.redis.get(key.encode())
112
- self.visited_keys[key] = obj
113
- return obj
114
-
115
- async def set(self, key: str, value: bytes):
116
- if key in self.deleted_keys:
117
- self.deleted_keys.remove(key)
118
-
119
- if key in self.visited_keys:
120
- del self.visited_keys[key]
121
-
122
- self.modified_keys[key] = value
123
-
124
- async def delete(self, key: str):
125
- if key not in self.deleted_keys:
126
- self.deleted_keys.append(key)
127
-
128
- if key in self.visited_keys:
129
- del self.visited_keys[key]
130
-
131
- if key in self.modified_keys:
132
- del self.modified_keys[key]
133
-
134
- async def keys(
135
- self, match: str, count: int = DEFAULT_SCAN_LIMIT, include_start: bool = True
136
- ):
137
- prev_key = None
138
-
139
- get_all_keys = count == -1
140
- _count = DEFAULT_BATCH_SCAN_LIMIT if get_all_keys else count
141
-
142
- async with self.redis.client() as conn:
143
- async for key in conn.scan_iter(match=match.encode() + b"*", count=_count):
144
- str_key = key.decode()
145
- if str_key in self.deleted_keys:
146
- continue
147
- for new_key in self.modified_keys.keys():
148
- if (
149
- match in new_key
150
- and prev_key is not None
151
- and prev_key < new_key
152
- and new_key < str_key
153
- ):
154
- yield new_key
155
-
156
- yield str_key
157
- prev_key = str_key
158
- if prev_key is None:
159
- for new_key in self.modified_keys.keys():
160
- if match in new_key:
161
- yield new_key
162
-
163
- async def count(self, match: str) -> int:
164
- """
165
- This is not efficient but it works and redis is mostly for experiments
166
- and should not be used for production environments
167
- """
168
- value = 0
169
- async for _ in self.keys(match, count=-1):
170
- value += 1
171
- return value
172
-
173
-
174
- class RedisDriver(Driver):
175
- redis = None
176
- url = None
177
-
178
- def __init__(self, url: str):
179
- if REDIS is False:
180
- raise ImportError("Redis is not installed")
181
- self.url = url
182
-
183
- async def initialize(self):
184
- if self.initialized is False and self.redis is None:
185
- self.redis = aioredis.from_url(self.url)
186
- self.initialized = True
187
-
188
- async def finalize(self):
189
- if self.initialized is True:
190
- await self.redis.close()
191
- self.initialized = False
192
-
193
- async def begin(self, read_only: bool = False) -> RedisTransaction:
194
- return RedisTransaction(self.redis, driver=self)
@@ -1,412 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from __future__ import annotations
21
-
22
- import asyncio
23
- import contextlib
24
- import logging
25
- import random
26
- from typing import Any, List, Optional, Union
27
-
28
- import backoff
29
-
30
- from nucliadb.common.maindb.driver import (
31
- DEFAULT_BATCH_SCAN_LIMIT,
32
- DEFAULT_SCAN_LIMIT,
33
- Driver,
34
- Transaction,
35
- )
36
- from nucliadb.common.maindb.exceptions import ConflictError
37
- from nucliadb_telemetry import metrics
38
-
39
- try:
40
- from tikv_client import asynchronous # type: ignore
41
-
42
- TiKV = True
43
- except ImportError: # pragma: no cover
44
- TiKV = False
45
-
46
-
47
- class LeaderNotFoundError(Exception):
48
- """
49
- Raised when the tikv client raises an exception indicating that the leader of a region is not found.
50
- This is a transient error and the operation should be retried.
51
- """
52
-
53
- pass
54
-
55
-
56
- tikv_observer = metrics.Observer(
57
- "tikv_client",
58
- labels={"type": ""},
59
- error_mappings={
60
- "conflict_error": ConflictError,
61
- "timeout_error": TimeoutError,
62
- "leader_not_found_error": LeaderNotFoundError,
63
- },
64
- )
65
- logger = logging.getLogger(__name__)
66
-
67
-
68
- class TiKVDataLayer:
69
- def __init__(
70
- self, connection: Union[asynchronous.RawClient, asynchronous.Transaction]
71
- ):
72
- self.connection = connection
73
-
74
- async def abort(self):
75
- with tikv_observer({"type": "rollback"}):
76
- try:
77
- await self.connection.rollback()
78
- except Exception:
79
- logger.exception("Error rolling back transaction")
80
-
81
- async def commit(self):
82
- with tikv_observer({"type": "commit"}), self.tikv_error_handler():
83
- await self.connection.commit()
84
-
85
- async def batch_get(self, keys: list[str]) -> list[Optional[bytes]]:
86
- bytes_keys: list[bytes] = [x.encode() for x in keys]
87
- with tikv_observer({"type": "batch_get"}), self.tikv_error_handler():
88
- output = {}
89
- for key, value in await self.connection.batch_get(bytes_keys):
90
- output[key.decode()] = value
91
- return [output.get(key) for key in keys]
92
-
93
- @backoff.on_exception(
94
- backoff.expo,
95
- (TimeoutError, LeaderNotFoundError),
96
- jitter=backoff.random_jitter,
97
- max_tries=2,
98
- )
99
- async def get(self, key: str) -> Optional[bytes]:
100
- with tikv_observer({"type": "get"}), self.tikv_error_handler():
101
- return await self.connection.get(key.encode())
102
-
103
- @contextlib.contextmanager
104
- def tikv_error_handler(self):
105
- """
106
- The tikv_client library does not provide specific exceptions and simply
107
- raises generic Exception class with different error strings. That forces
108
- us to parse the error string to determine the type of error...
109
- """
110
- try:
111
- yield
112
- except Exception as exc:
113
- exc_text = str(exc)
114
- if "WriteConflict" in exc_text:
115
- raise ConflictError(exc_text) from exc
116
- elif "4-DEADLINE_EXCEEDED" in exc_text:
117
- raise TimeoutError(exc_text) from exc
118
- elif "Leader of region" in exc_text and "not found" in exc_text:
119
- raise LeaderNotFoundError(exc_text) from exc
120
- else:
121
- raise
122
-
123
- async def set(self, key: str, value: bytes) -> None:
124
- with tikv_observer({"type": "put"}), self.tikv_error_handler():
125
- await self.connection.put(key.encode(), value)
126
-
127
- async def delete(self, key: str) -> None:
128
- with tikv_observer({"type": "delete"}), self.tikv_error_handler():
129
- await self.connection.delete(key.encode())
130
-
131
- async def keys(
132
- self,
133
- match: str,
134
- count: int = DEFAULT_SCAN_LIMIT,
135
- include_start: bool = True,
136
- ):
137
- """
138
- Get keys from tikv, up to a configurable limit.
139
-
140
- Use -1 as the count of objects keep iterating in batches
141
- until all matching keys are retrieved.
142
- With any other count, only up to count keys will be returned.
143
- """
144
- get_all_keys = count == -1
145
- limit = DEFAULT_BATCH_SCAN_LIMIT if get_all_keys else count
146
- start_key = match.encode()
147
- _include_start = include_start
148
-
149
- while True:
150
- with tikv_observer({"type": "scan_keys"}), self.tikv_error_handler():
151
- keys = await self.connection.scan_keys(
152
- start=start_key,
153
- end=None,
154
- limit=limit,
155
- include_start=_include_start,
156
- )
157
- for key in keys:
158
- str_key = key.decode()
159
- if str_key.startswith(match):
160
- yield str_key
161
- else:
162
- break
163
- else:
164
- if len(keys) == limit and get_all_keys:
165
- # If all keys were requested and it may exist
166
- # some more keys to retrieve
167
- start_key = keys[-1]
168
- _include_start = False
169
- continue
170
-
171
- # If not all keys were requested
172
- # or the for loop found an unmatched key
173
- break
174
-
175
- async def count(self, match: str) -> int:
176
- """
177
- Count the number of keys that match the given prefix
178
- as efficiently as possible with the available API.
179
- """
180
- original_match = match.encode()
181
- start_key = original_match
182
- _include_start = True
183
- batch_size = 5000
184
-
185
- value = 0
186
- while True:
187
- with tikv_observer({"type": "scan_keys"}), self.tikv_error_handler():
188
- keys = await self.connection.scan_keys(
189
- start=start_key,
190
- end=None,
191
- limit=batch_size,
192
- include_start=_include_start,
193
- )
194
- if len(keys) == 0:
195
- break
196
-
197
- if not keys[-1].startswith(original_match):
198
- # done counting this range, find the correct size of the match
199
- # with a binary search and break out
200
- left, right = 0, len(keys) - 1
201
- result_index = 0
202
- match_found = False
203
- while left <= right:
204
- mid = left + (right - left) // 2
205
-
206
- if keys[mid].startswith(original_match):
207
- match_found = True
208
- left = mid + 1 # Move to the right half
209
- result_index = mid
210
- else:
211
- right = mid - 1 # Move to the left half
212
- if match_found:
213
- value += result_index + 1
214
- break
215
- else:
216
- value += len(keys)
217
-
218
- if len(keys) == batch_size:
219
- start_key = keys[-1]
220
- _include_start = False
221
- continue
222
- else:
223
- # done counting
224
- break
225
- return value
226
-
227
-
228
- class TiKVTransaction(Transaction):
229
- driver: TiKVDriver
230
-
231
- def __init__(self, txn: Any, driver: TiKVDriver):
232
- self.txn = txn
233
- self.driver = driver
234
- self.data_layer = TiKVDataLayer(txn)
235
- self.open = True
236
-
237
- async def abort(self):
238
- if not self.open:
239
- return
240
- await self.data_layer.abort()
241
- self.open = False
242
-
243
- async def commit(self):
244
- assert self.open
245
- await self.data_layer.commit()
246
- self.open = False
247
-
248
- async def batch_get(self, keys: list[str]) -> list[Optional[bytes]]:
249
- assert self.open
250
- return await self.data_layer.batch_get(keys)
251
-
252
- @backoff.on_exception(
253
- backoff.expo,
254
- (TimeoutError, LeaderNotFoundError),
255
- jitter=backoff.random_jitter,
256
- max_tries=2,
257
- )
258
- async def get(self, key: str) -> Optional[bytes]:
259
- assert self.open
260
- return await self.data_layer.get(key)
261
-
262
- async def set(self, key: str, value: bytes) -> None:
263
- assert self.open
264
- return await self.data_layer.set(key, value)
265
-
266
- async def delete(self, key: str) -> None:
267
- assert self.open
268
- return await self.data_layer.delete(key)
269
-
270
- async def keys(
271
- self, match: str, count: int = DEFAULT_SCAN_LIMIT, include_start: bool = True
272
- ):
273
- assert self.open
274
- # XXX must have connection outside of current txn
275
- conn_holder = self.driver.get_connection_holder()
276
- txn = await conn_holder.get_snapshot()
277
- dl = TiKVDataLayer(txn)
278
-
279
- async for key in dl.keys(match, count, include_start):
280
- yield key
281
-
282
- async def count(self, match: str) -> int:
283
- assert self.open
284
- return await self.data_layer.count(match)
285
-
286
-
287
- class ReadOnlyTiKVTransaction(Transaction):
288
- driver: TiKVDriver
289
-
290
- def __init__(self, connection: asynchronous.Snapshot, driver: TiKVDriver):
291
- self.connection = connection
292
- self.data_layer = TiKVDataLayer(connection)
293
- self.driver = driver
294
- self.open = True
295
-
296
- async def abort(self):
297
- self.open = False
298
- # Read only transactions are implemented as snapshots, which
299
- # are read only and isolated, and they don't need to be aborted.
300
-
301
- async def commit(self):
302
- raise Exception("Cannot commit transaction in read only mode")
303
-
304
- async def batch_get(self, keys: list[str]) -> list[Optional[bytes]]:
305
- assert self.open
306
- return await self.data_layer.batch_get(keys)
307
-
308
- async def get(self, key: str) -> Optional[bytes]:
309
- assert self.open
310
- return await self.data_layer.get(key)
311
-
312
- async def set(self, key: str, value: bytes) -> None:
313
- raise Exception("Cannot set in read only transaction")
314
-
315
- async def delete(self, key: str) -> None:
316
- raise Exception("Cannot delete in read only transaction")
317
-
318
- async def keys(
319
- self, match: str, count: int = DEFAULT_SCAN_LIMIT, include_start: bool = True
320
- ):
321
- assert self.open
322
- async for key in self.data_layer.keys(match, count, include_start):
323
- yield key
324
-
325
- async def count(self, match: str) -> int:
326
- assert self.open
327
- return await self.data_layer.count(match)
328
-
329
-
330
- class ConnectionHolder:
331
- _txn_connection: asynchronous.TransactionClient
332
-
333
- def __init__(self, url: list[str]):
334
- self.url = url
335
- self.connect_lock = asyncio.Lock()
336
-
337
- async def initialize(self) -> None:
338
- self._txn_connection = await asynchronous.TransactionClient.connect(self.url)
339
-
340
- async def get_snapshot(
341
- self, timestamp: Optional[float] = None, retried: bool = False
342
- ) -> asynchronous.Snapshot:
343
- if self.connect_lock.locked(): # pragma: no cover
344
- async with self.connect_lock:
345
- ...
346
- try:
347
- if timestamp is None:
348
- with tikv_observer({"type": "current_timestamp"}):
349
- timestamp = await self._txn_connection.current_timestamp()
350
- return self._txn_connection.snapshot(timestamp, pessimistic=False)
351
- except Exception:
352
- if retried:
353
- raise
354
- logger.exception(
355
- f"Error getting snapshot for tikv. Retrying once and then failing."
356
- )
357
- await self.reinitialize()
358
- return await self.get_snapshot(timestamp, retried=True)
359
-
360
- async def begin_transaction(self) -> asynchronous.Transaction:
361
- if self.connect_lock.locked(): # pragma: no cover
362
- async with self.connect_lock:
363
- ...
364
- try:
365
- # pessimistic=False means faster but more conflicts
366
- with tikv_observer({"type": "begin"}):
367
- return await self._txn_connection.begin(pessimistic=False)
368
- except Exception:
369
- logger.exception(
370
- f"Error getting transaction for tikv. Retrying once and then failing."
371
- )
372
- await self.reinitialize()
373
- return await self._txn_connection.begin(pessimistic=False)
374
-
375
- async def reinitialize(self) -> None:
376
- if self.connect_lock.locked():
377
- async with self.connect_lock:
378
- # wait for lock and then just continue because someone else is establishing the connection
379
- return
380
- else:
381
- async with self.connect_lock:
382
- logger.warning("Reconnecting to TiKV")
383
- await self.initialize()
384
-
385
-
386
- class TiKVDriver(Driver):
387
- def __init__(self, url: List[str], pool_size: int = 3):
388
- if TiKV is False:
389
- raise ImportError("TiKV is not installed")
390
- self.url = url
391
- self.pool: list[ConnectionHolder] = []
392
- self.pool_size = pool_size
393
-
394
- async def initialize(self):
395
- self.pool = [ConnectionHolder(self.url) for _ in range(self.pool_size)]
396
- for holder in self.pool:
397
- await holder.reinitialize()
398
-
399
- async def finalize(self):
400
- self.pool.clear()
401
-
402
- def get_connection_holder(self) -> ConnectionHolder:
403
- return random.choice(self.pool)
404
-
405
- async def begin(
406
- self, read_only: bool = False
407
- ) -> Union[TiKVTransaction, ReadOnlyTiKVTransaction]:
408
- conn = self.get_connection_holder()
409
- # if read_only:
410
- # return ReadOnlyTiKVTransaction(await conn.get_snapshot(), self)
411
- # else:
412
- return TiKVTransaction(await conn.begin_transaction(), self)
@@ -1,58 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from nucliadb_protos.resources_pb2 import CloudFile, FieldLayout
21
-
22
- from nucliadb.ingest.fields.base import Field
23
- from nucliadb_utils.storages.storage import StorageField
24
-
25
-
26
- class NotTheSameFormat(Exception):
27
- pass
28
-
29
-
30
- class Layout(Field):
31
- pbklass = FieldLayout
32
- value: FieldLayout
33
- type: str = "l"
34
-
35
- async def set_value(self, payload: FieldLayout):
36
- # Diff support
37
- actual_payload = await self.get_value()
38
- if actual_payload and payload.format != actual_payload.format:
39
- raise NotTheSameFormat()
40
- if actual_payload is None:
41
- actual_payload = FieldLayout()
42
- actual_payload.format = payload.format
43
- for block in payload.body.deleted_blocks:
44
- if block in actual_payload.body.blocks:
45
- del actual_payload.body.blocks[block]
46
-
47
- for ident, pbblock in payload.body.blocks.items():
48
- if self.storage.needs_move(pbblock.file, self.kbid):
49
- sf: StorageField = self.storage.layout_field(
50
- self.kbid, self.uuid, self.id, ident
51
- )
52
- cf: CloudFile = await self.storage.normalize_binary(pbblock.file, sf)
53
- pbblock.file.CopyFrom(cf)
54
- actual_payload.body.blocks[ident].CopyFrom(pbblock)
55
- await self.db_set_value(actual_payload)
56
-
57
- async def get_value(self) -> FieldLayout:
58
- return await self.db_get_value()
@@ -1,30 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- pytest_plugins = [
21
- "pytest_docker_fixtures",
22
- "nucliadb_utils.tests.nats",
23
- "nucliadb.ingest.tests.fixtures",
24
- "nucliadb.tests.fixtures",
25
- "nucliadb.tests.tikv",
26
- "nucliadb_utils.tests.conftest",
27
- "nucliadb_utils.tests.gcs",
28
- "nucliadb_utils.tests.s3",
29
- "nucliadb_telemetry.tests.telemetry",
30
- ]