nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -403
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +70 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +143 -117
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +37 -128
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -22,23 +22,20 @@ import logging
22
22
  from typing import Optional
23
23
 
24
24
  from nucliadb.common import locking
25
- from nucliadb.common.cluster.rollover import rollover_kb_shards
25
+ from nucliadb.common.cluster.rollover import rollover_kb_index
26
26
  from nucliadb.common.cluster.settings import in_standalone_mode
27
+ from nucliadb.common.maindb.pg import PGDriver
27
28
  from nucliadb.migrator.context import ExecutionContext
28
- from nucliadb.migrator.utils import get_migrations
29
+ from nucliadb.migrator.utils import get_migrations, get_pg_migrations
29
30
  from nucliadb_telemetry import errors, metrics
30
31
 
31
- migration_observer = metrics.Observer(
32
- "nucliadb_migrations", labels={"type": "kb", "target_version": ""}
33
- )
32
+ migration_observer = metrics.Observer("nucliadb_migrations", labels={"type": "kb", "target_version": ""})
34
33
 
35
34
 
36
35
  logger = logging.getLogger(__name__)
37
36
 
38
37
 
39
- async def run_kb_migrations(
40
- context: ExecutionContext, kbid: str, target_version: int
41
- ) -> None:
38
+ async def run_kb_migrations(context: ExecutionContext, kbid: str, target_version: int) -> None:
42
39
  async with locking.distributed_lock(f"migration-{kbid}"):
43
40
  kb_info = await context.data_manager.get_kb_info(kbid)
44
41
  if kb_info is None:
@@ -46,9 +43,7 @@ async def run_kb_migrations(
46
43
  await context.data_manager.delete_kb_migration(kbid=kbid)
47
44
  return
48
45
 
49
- migrations = get_migrations(
50
- from_version=kb_info.current_version, to_version=target_version
51
- )
46
+ migrations = get_migrations(from_version=kb_info.current_version, to_version=target_version)
52
47
 
53
48
  for migration in migrations:
54
49
  migration_info = {
@@ -59,14 +54,10 @@ async def run_kb_migrations(
59
54
 
60
55
  try:
61
56
  logger.info("Migrating KB", extra=migration_info)
62
- with migration_observer(
63
- {"type": "kb", "target_version": str(migration.version)}
64
- ):
65
- await migration.module.migrate_kb(context, kbid) # type: ignore
57
+ with migration_observer({"type": "kb", "target_version": str(migration.version)}):
58
+ await migration.module.migrate_kb(context, kbid)
66
59
  logger.info("Finished KB Migration", extra=migration_info)
67
- await context.data_manager.update_kb_info(
68
- kbid=kbid, current_version=migration.version
69
- )
60
+ await context.data_manager.update_kb_info(kbid=kbid, current_version=migration.version)
70
61
  except Exception as exc:
71
62
  errors.capture_exception(exc)
72
63
  logger.exception("Failed to migrate KB", extra=migration_info)
@@ -74,9 +65,7 @@ async def run_kb_migrations(
74
65
 
75
66
  refreshed_kb_info = await context.data_manager.get_kb_info(kbid=kbid)
76
67
  if refreshed_kb_info is None:
77
- logger.warning(
78
- "KB not found. This should not happen.", extra={"kbid": kbid}
79
- )
68
+ logger.warning("KB not found. This should not happen.", extra={"kbid": kbid})
80
69
  return
81
70
  assert refreshed_kb_info.current_version == target_version
82
71
 
@@ -88,7 +77,7 @@ async def run_all_kb_migrations(context: ExecutionContext, target_version: int)
88
77
  Schedule all KB migrations to run in parallel. Only a certain number of migrations will run at the same time.
89
78
  If any of the migrations fail, the whole process will fail.
90
79
  """
91
- to_migrate = await context.data_manager.get_kb_migrations(limit=-1)
80
+ to_migrate = await context.data_manager.get_kb_migrations()
92
81
 
93
82
  if len(to_migrate) == 0:
94
83
  return
@@ -154,13 +143,9 @@ async def run_global_migrations(context: ExecutionContext, target_version: int)
154
143
  }
155
144
  try:
156
145
  logger.info("Migrating", extra=migration_info)
157
- with migration_observer(
158
- {"type": "global", "target_version": str(migration.version)}
159
- ):
160
- await migration.module.migrate(context) # type: ignore
161
- await context.data_manager.update_global_info(
162
- current_version=migration.version
163
- )
146
+ with migration_observer({"type": "global", "target_version": str(migration.version)}):
147
+ await migration.module.migrate(context)
148
+ await context.data_manager.update_global_info(current_version=migration.version)
164
149
  logger.info("Finished migration", extra=migration_info)
165
150
  except Exception as exc:
166
151
  errors.capture_exception(exc)
@@ -177,7 +162,7 @@ async def run_rollover_in_parallel(
177
162
  ) -> None:
178
163
  async with max_concurrent:
179
164
  try:
180
- await rollover_kb_shards(context, kbid)
165
+ await rollover_kb_index(context, kbid)
181
166
  await context.data_manager.delete_kb_rollover(kbid=kbid)
182
167
  except Exception as exc:
183
168
  errors.capture_exception(exc)
@@ -221,7 +206,39 @@ async def run_rollovers(context: ExecutionContext) -> None:
221
206
  raise Exception(f"Failed to migrate KBs. Failures: {failures}")
222
207
 
223
208
 
209
+ async def run_pg_schema_migrations(driver: PGDriver):
210
+ migrations = get_pg_migrations()
211
+
212
+ # The migration uses two transactions. The former is only used to get a lock (pg_advisory_lock)
213
+ # without having to worry about correctly unlocking it (postgres unlocks it when the transaction ends)
214
+ async with driver.transaction() as tx_lock, tx_lock.connection.cursor() as cur_lock: # type: ignore[attr-defined]
215
+ await cur_lock.execute(
216
+ "CREATE TABLE IF NOT EXISTS migrations (version INT PRIMARY KEY, migrated_at TIMESTAMP NOT NULL DEFAULT NOW())"
217
+ )
218
+ await tx_lock.commit()
219
+ await cur_lock.execute("SELECT pg_advisory_xact_lock(3116614845278015934)")
220
+
221
+ await cur_lock.execute("SELECT version FROM migrations")
222
+ migrated = [r[0] for r in await cur_lock.fetchall()]
223
+
224
+ for version, migration in migrations:
225
+ if version in migrated:
226
+ continue
227
+
228
+ # Gets a new transaction for each migration, so if they get interrupted we at least
229
+ # save the state of the last finished transaction
230
+ async with driver.transaction() as tx, tx.connection.cursor() as cur: # type: ignore[attr-defined]
231
+ await migration.migrate(tx)
232
+ await cur.execute("INSERT INTO migrations (version) VALUES (%s)", (version,))
233
+ await tx.commit()
234
+
235
+
224
236
  async def run(context: ExecutionContext, target_version: Optional[int] = None) -> None:
237
+ # Run schema migrations first, since they create the `resources` table needed for the lock below
238
+ # Schema migrations use their own locking system
239
+ if isinstance(context.kv_driver, PGDriver):
240
+ await run_pg_schema_migrations(context.kv_driver)
241
+
225
242
  async with locking.distributed_lock(locking.MIGRATIONS_LOCK):
226
243
  # before we move to managed migrations, see if there are any rollovers
227
244
  # scheduled and run them
@@ -17,13 +17,12 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ import importlib
20
21
  import logging
21
22
  import os
22
23
  import types
23
24
  from functools import lru_cache
24
25
 
25
- import migrations
26
-
27
26
  from .models import Migration
28
27
 
29
28
  logger = logging.getLogger(__name__)
@@ -33,14 +32,27 @@ MIGRATION_DIR = os.path.sep.join(
33
32
  )
34
33
 
35
34
 
35
+ def get_pg_migrations() -> list[tuple[int, types.ModuleType]]:
36
+ output = []
37
+ for filename in os.listdir(os.path.join(MIGRATION_DIR, "pg")):
38
+ if filename.endswith(".py") and filename != "__init__.py":
39
+ module_name = filename[:-3]
40
+ version = int(module_name.split("_")[0])
41
+ module = importlib.import_module(f"migrations.pg.{module_name}")
42
+ if not hasattr(module, "migrate"):
43
+ raise Exception(f"Missing `migrate` function in {module_name}")
44
+ output.append((version, module))
45
+ output.sort()
46
+ return output
47
+
48
+
36
49
  def get_migration_modules() -> list[tuple[types.ModuleType, int]]:
37
50
  output = []
38
51
  for filename in os.listdir(MIGRATION_DIR):
39
52
  if filename.endswith(".py") and filename != "__init__.py":
40
53
  module_name = filename[:-3]
41
54
  version = int(module_name.split("_")[0])
42
- __import__(f"migrations.{module_name}")
43
- module = getattr(migrations, module_name)
55
+ module = importlib.import_module(f"migrations.{module_name}")
44
56
  if not hasattr(module, "migrate"):
45
57
  raise Exception(f"Missing `migrate` function in {module_name}")
46
58
  if not hasattr(module, "migrate_kb"):
@@ -49,17 +61,13 @@ def get_migration_modules() -> list[tuple[types.ModuleType, int]]:
49
61
  return output
50
62
 
51
63
 
52
- def get_migrations(
53
- from_version: int = 0, to_version: int = 99999999
54
- ) -> list[Migration]:
64
+ def get_migrations(from_version: int = 0, to_version: int = 99999999) -> list[Migration]:
55
65
  migrations: list[Migration] = []
56
66
  for module, version in get_migration_modules():
57
67
  migrations.append(Migration(version=version, module=module))
58
68
 
59
69
  migrations.sort(key=lambda m: m.version)
60
- return [
61
- m for m in migrations if m.version > from_version and m.version <= to_version
62
- ]
70
+ return [m for m in migrations if m.version > from_version and m.version <= to_version]
63
71
 
64
72
 
65
73
  @lru_cache(maxsize=None)
@@ -18,10 +18,9 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import asyncio
21
+ import importlib.metadata
21
22
  from typing import AsyncGenerator
22
23
 
23
- import pkg_resources
24
-
25
24
  from nucliadb.common.cluster.exceptions import NodeError, ShardNotFound
26
25
  from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
27
26
  from nucliadb.common.maindb.driver import Driver
@@ -31,6 +30,9 @@ from nucliadb.ingest.orm.knowledgebox import (
31
30
  KB_TO_DELETE,
32
31
  KB_TO_DELETE_BASE,
33
32
  KB_TO_DELETE_STORAGE_BASE,
33
+ KB_VECTORSET_TO_DELETE,
34
+ KB_VECTORSET_TO_DELETE_BASE,
35
+ RESOURCE_TO_DELETE_STORAGE_BASE,
34
36
  KnowledgeBox,
35
37
  )
36
38
  from nucliadb_telemetry import errors
@@ -41,7 +43,7 @@ from nucliadb_utils.utilities import get_storage
41
43
 
42
44
  async def _iter_keys(driver: Driver, match: str) -> AsyncGenerator[str, None]:
43
45
  async with driver.transaction(read_only=True) as keys_txn:
44
- async for key in keys_txn.keys(match=match, count=-1):
46
+ async for key in keys_txn.keys(match=match):
45
47
  yield key
46
48
 
47
49
 
@@ -52,9 +54,7 @@ async def purge_kb(driver: Driver):
52
54
  try:
53
55
  kbid = key.split("/")[2]
54
56
  except Exception:
55
- logger.warning(
56
- f" X Skipping purge {key}, wrong key format, expected {KB_TO_DELETE_BASE}"
57
- )
57
+ logger.warning(f" X Skipping purge {key}, wrong key format, expected {KB_TO_DELETE_BASE}")
58
58
  continue
59
59
 
60
60
  try:
@@ -62,15 +62,11 @@ async def purge_kb(driver: Driver):
62
62
  logger.info(f" √ Successfully Purged {kbid}")
63
63
  except ShardNotFound as exc:
64
64
  errors.capture_exception(exc)
65
- logger.error(
66
- f" X At least one shard was unavailable while purging {kbid}, skipping"
67
- )
65
+ logger.error(f" X At least one shard was unavailable while purging {kbid}, skipping")
68
66
  continue
69
67
  except NodeError as exc:
70
68
  errors.capture_exception(exc)
71
- logger.error(
72
- f" X At least one node was unavailable while purging {kbid}, skipping"
73
- )
69
+ logger.error(f" X At least one node was unavailable while purging {kbid}, skipping")
74
70
  continue
75
71
 
76
72
  except Exception as exc:
@@ -82,10 +78,10 @@ async def purge_kb(driver: Driver):
82
78
 
83
79
  # Now delete the tikv delete mark
84
80
  try:
85
- txn = await driver.begin()
86
- key_to_purge = KB_TO_DELETE.format(kbid=kbid)
87
- await txn.delete(key_to_purge)
88
- await txn.commit()
81
+ async with driver.transaction() as txn:
82
+ key_to_purge = KB_TO_DELETE.format(kbid=kbid)
83
+ await txn.delete(key_to_purge)
84
+ await txn.commit()
89
85
  logger.info(f" √ Deleted {key_to_purge}")
90
86
  except Exception as exc:
91
87
  errors.capture_exception(exc)
@@ -112,16 +108,12 @@ async def purge_kb_storage(driver: Driver, storage: Storage):
112
108
 
113
109
  delete_marker = False
114
110
  if conflict:
115
- logger.info(
116
- f" . Nothing was deleted for {key}, (Bucket not yet empty), will try next time"
117
- )
111
+ logger.info(f" . Nothing was deleted for {key}, (Bucket not yet empty), will try next time")
118
112
  # Just in case something failed while setting a lifecycle policy to
119
113
  # remove all elements from the bucket, reschedule it
120
114
  await storage.schedule_delete_kb(kbid)
121
115
  elif not deleted:
122
- logger.info(
123
- f" ! Expected bucket for {key} was not found, will delete marker"
124
- )
116
+ logger.info(f" ! Expected bucket for {key} was not found, will delete marker")
125
117
  delete_marker = True
126
118
  elif deleted:
127
119
  logger.info(" √ Bucket successfully deleted")
@@ -129,19 +121,122 @@ async def purge_kb_storage(driver: Driver, storage: Storage):
129
121
 
130
122
  if delete_marker:
131
123
  try:
132
- txn = await driver.begin()
133
- await txn.delete(key)
124
+ async with driver.transaction() as txn:
125
+ await txn.delete(key)
126
+ await txn.commit()
134
127
  logger.info(f" √ Deleted storage deletion marker {key}")
135
128
  except Exception as exc:
136
129
  errors.capture_exception(exc)
137
130
  logger.info(f" X Error while deleting key {key}")
138
- await txn.abort()
139
- else:
140
- await txn.commit()
141
131
 
142
132
  logger.info("FINISH PURGING KB STORAGE")
143
133
 
144
134
 
135
+ async def purge_deleted_resource_storage(driver: Driver, storage: Storage) -> None:
136
+ """
137
+ Remove from storage all resources marked as deleted.
138
+
139
+ Returns the number of resources purged.
140
+ """
141
+ logger.info("Starting purge of deleted resource storage")
142
+ to_purge = await _count_resources_storage_to_purge(driver)
143
+ logger.info(f"Found {to_purge} resources to purge")
144
+ while True:
145
+ try:
146
+ purged = await _purge_resources_storage_batch(driver, storage, batch_size=100)
147
+ if not purged:
148
+ logger.info("No more resources to purge found")
149
+ return
150
+ logger.info(f"Purged {purged} resources")
151
+
152
+ except asyncio.CancelledError:
153
+ logger.info("Purge of deleted resource storage was cancelled")
154
+ return
155
+
156
+
157
+ async def _count_resources_storage_to_purge(driver: Driver) -> int:
158
+ """
159
+ Count the number of resources marked as deleted in storage.
160
+ """
161
+ async with driver.transaction(read_only=True) as txn:
162
+ return await txn.count(match=RESOURCE_TO_DELETE_STORAGE_BASE)
163
+
164
+
165
+ async def _purge_resources_storage_batch(driver: Driver, storage: Storage, batch_size: int = 100) -> int:
166
+ """
167
+ Remove from storage a batch of resources marked as deleted. Returns the
168
+ number of resources purged.
169
+ """
170
+ # Get the keys of the resources to delete in batches of 100
171
+ to_delete_batch = []
172
+ async with driver.transaction(read_only=True) as txn:
173
+ async for key in txn.keys(match=RESOURCE_TO_DELETE_STORAGE_BASE, count=batch_size):
174
+ to_delete_batch.append(key)
175
+
176
+ if not to_delete_batch:
177
+ return 0
178
+
179
+ # Delete the resources blobs from storage
180
+ logger.info(f"Purging {len(to_delete_batch)} deleted resources")
181
+ tasks = []
182
+ for key in to_delete_batch:
183
+ kbid, resource_id = key.split("/")[-2:]
184
+ tasks.append(asyncio.create_task(storage.delete_resource(kbid, resource_id)))
185
+ await asyncio.gather(*tasks)
186
+
187
+ # Delete the schedule-to-delete keys
188
+ async with driver.transaction() as txn:
189
+ for key in to_delete_batch:
190
+ await txn.delete(key)
191
+ await txn.commit()
192
+
193
+ return len(to_delete_batch)
194
+
195
+
196
+ async def purge_kb_vectorsets(driver: Driver, storage: Storage):
197
+ """Vectors for a vectorset are stored in a key inside each resource. Iterate
198
+ through all resources of the KB and remove any storage object containing
199
+ vectors for the specific vectorset to purge.
200
+
201
+ """
202
+ logger.info("START PURGING KB VECTORSETS")
203
+
204
+ purged = []
205
+ async for key in _iter_keys(driver, KB_VECTORSET_TO_DELETE_BASE):
206
+ logger.info(f"Purging vectorsets {key}")
207
+ try:
208
+ _base, kbid, vectorset = key.lstrip("/").split("/")
209
+ except ValueError:
210
+ logger.info(f" X Skipping purge {key}, wrong key format, expected {KB_VECTORSET_TO_DELETE}")
211
+ continue
212
+
213
+ try:
214
+ async with driver.transaction(read_only=True) as txn:
215
+ kb = KnowledgeBox(txn, storage, kbid)
216
+ async for resource in kb.iterate_resources():
217
+ fields = await resource.get_fields(force=True)
218
+ # we don't need the maindb transaction anymore to remove vectors from storage
219
+ for field in fields.values():
220
+ await field.delete_vectors(vectorset)
221
+ except Exception as exc:
222
+ errors.capture_exception(exc)
223
+ logger.error(
224
+ f" X ERROR while executing KB vectorset purge, skipping",
225
+ exc_info=exc,
226
+ extra={"kbid": kbid},
227
+ )
228
+ continue
229
+
230
+ purged.append(key)
231
+
232
+ async with driver.transaction() as txn:
233
+ for key in purged:
234
+ await txn.delete(key)
235
+ await txn.commit()
236
+
237
+ logger.info("FINISH PURGING KB VECTORSETS")
238
+
239
+
145
240
  async def main():
146
241
  """
147
242
  This script will purge all knowledge boxes marked to be deleted in maindb.
@@ -153,17 +248,28 @@ async def main():
153
248
  service_name=SERVICE_NAME,
154
249
  )
155
250
  try:
251
+ purge_resources_storage_task = asyncio.create_task(
252
+ purge_deleted_resource_storage(driver, storage)
253
+ )
156
254
  await purge_kb(driver)
157
255
  await purge_kb_storage(driver, storage)
256
+ await purge_kb_vectorsets(driver, storage)
257
+ await purge_resources_storage_task
258
+ except Exception as ex: # pragma: no cover
259
+ logger.exception("Unhandled exception on purge command")
260
+ errors.capture_exception(ex)
158
261
  finally:
159
- await storage.finalize()
160
- await teardown_driver()
161
- await teardown_cluster()
262
+ try:
263
+ purge_resources_storage_task.cancel()
264
+ await storage.finalize()
265
+ await teardown_driver()
266
+ await teardown_cluster()
267
+ except Exception: # pragma: no cover
268
+ logger.exception("Error tearing down utilities on purge command")
269
+ pass
162
270
 
163
271
 
164
272
  def run() -> int: # pragma: no cover
165
273
  setup_logging()
166
-
167
- errors.setup_error_handling(pkg_resources.get_distribution("nucliadb").version)
168
-
274
+ errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
169
275
  return asyncio.run(main())
@@ -19,11 +19,11 @@
19
19
 
20
20
  import argparse
21
21
  import asyncio
22
+ import importlib.metadata
22
23
  from dataclasses import dataclass
23
24
  from typing import Optional
24
25
 
25
- import pkg_resources
26
- from grpc.aio import AioRpcError # type: ignore
26
+ from grpc.aio import AioRpcError
27
27
 
28
28
  from nucliadb.common import datamanagers
29
29
  from nucliadb.common.cluster import manager
@@ -86,7 +86,7 @@ async def detect_orphan_shards(driver: Driver) -> dict[str, ShardLocation]:
86
86
  orphan_shard_ids = indexed_shards.keys() - stored_shards.keys()
87
87
  orphan_shards: dict[str, ShardLocation] = {}
88
88
  unavailable_nodes: set[str] = set()
89
- async with datamanagers.with_transaction() as txn:
89
+ async with datamanagers.with_ro_transaction() as txn:
90
90
  for shard_id in orphan_shard_ids:
91
91
  node_id = indexed_shards[shard_id].node_id
92
92
  node = manager.get_index_node(node_id) # type: ignore
@@ -99,9 +99,7 @@ async def detect_orphan_shards(driver: Driver) -> dict[str, ShardLocation]:
99
99
  # Shards with knwon KB ids can be checked and ignore those comming from
100
100
  # an ongoing migration/rollover
101
101
  if kbid != UNKNOWN_KB:
102
- skip = await datamanagers.rollover.is_rollover_shard(
103
- txn, kbid=kbid, shard_id=shard_id
104
- )
102
+ skip = await datamanagers.rollover.is_rollover_shard(txn, kbid=kbid, shard_id=shard_id)
105
103
  if skip:
106
104
  continue
107
105
 
@@ -133,18 +131,14 @@ async def _get_stored_shards(driver: Driver) -> dict[str, ShardLocation]:
133
131
  try:
134
132
  kb_shards = await shards_manager.get_shards_by_kbid(kbid)
135
133
  except ShardsNotFound:
136
- logger.warning(
137
- "KB not found while looking for orphan shards", extra={"kbid": kbid}
138
- )
134
+ logger.warning("KB not found while looking for orphan shards", extra={"kbid": kbid})
139
135
  continue
140
136
  else:
141
137
  for shard_object_pb in kb_shards:
142
138
  for shard_replica_pb in shard_object_pb.replicas:
143
139
  shard_replica_id = shard_replica_pb.shard.id
144
140
  node_id = shard_replica_pb.node
145
- stored_shards[shard_replica_id] = ShardLocation(
146
- kbid=kbid, node_id=node_id
147
- )
141
+ stored_shards[shard_replica_id] = ShardLocation(kbid=kbid, node_id=node_id)
148
142
  return stored_shards
149
143
 
150
144
 
@@ -264,6 +258,6 @@ async def main():
264
258
  def run() -> int: # pragma: no cover
265
259
  setup_logging()
266
260
 
267
- errors.setup_error_handling(pkg_resources.get_distribution("nucliadb").version)
261
+ errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
268
262
 
269
263
  return asyncio.run(main())
@@ -29,9 +29,7 @@ API_PREFIX = "api"
29
29
  class EndpointFilter(logging.Filter):
30
30
  def filter(self, record: logging.LogRecord) -> bool:
31
31
  return (
32
- record.args is not None
33
- and len(record.args) >= 3
34
- and record.args[2] not in ("/", "/metrics") # type: ignore
32
+ record.args is not None and len(record.args) >= 3 and record.args[2] not in ("/", "/metrics") # type: ignore
35
33
  )
36
34
 
37
35
 
@@ -22,15 +22,12 @@ from typing import TYPE_CHECKING, Any, Optional, Union
22
22
  from pydantic import BaseModel
23
23
 
24
24
  import nucliadb_models as models
25
- from nucliadb_models.common import FIELD_TYPES_MAP, FieldTypeName
25
+ from nucliadb_models.common import FieldTypeName
26
26
  from nucliadb_models.resource import (
27
27
  ConversationFieldExtractedData,
28
- DatetimeFieldExtractedData,
29
28
  Error,
30
29
  ExtractedDataType,
31
30
  FileFieldExtractedData,
32
- KeywordsetFieldExtractedData,
33
- LayoutFieldExtractedData,
34
31
  LinkFieldExtractedData,
35
32
  TextFieldExtractedData,
36
33
  )
@@ -41,10 +38,7 @@ if TYPE_CHECKING: # pragma: no cover
41
38
  models.FieldText,
42
39
  models.FieldFile,
43
40
  models.FieldLink,
44
- models.FieldLayout,
45
41
  models.Conversation,
46
- models.FieldKeywordset,
47
- models.FieldDatetime,
48
42
  ]
49
43
  ]
50
44
  else:
@@ -60,14 +54,9 @@ class ResourceField(BaseModel):
60
54
  error: Optional[Error] = None
61
55
 
62
56
 
63
- FIELD_NAMES_TO_PB_TYPE_MAP = {v: k for k, v in FIELD_TYPES_MAP.items()}
64
-
65
57
  FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP: dict[FieldTypeName, Any] = {
66
58
  FieldTypeName.TEXT: TextFieldExtractedData,
67
59
  FieldTypeName.FILE: FileFieldExtractedData,
68
60
  FieldTypeName.LINK: LinkFieldExtractedData,
69
- FieldTypeName.DATETIME: DatetimeFieldExtractedData,
70
- FieldTypeName.KEYWORDSET: KeywordsetFieldExtractedData,
71
- FieldTypeName.LAYOUT: LayoutFieldExtractedData,
72
61
  FieldTypeName.CONVERSATION: ConversationFieldExtractedData,
73
62
  }
@@ -20,7 +20,6 @@
20
20
  from . import download # noqa
21
21
  from . import export_import # noqa
22
22
  from . import knowledgebox # noqa
23
- from . import learning_collector # noqa
24
23
  from . import learning_config # noqa
25
24
  from . import resource # noqa
26
25
  from . import services # noqa