nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -16,701 +16,6 @@
16
16
  #
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import asyncio
21
- import logging
22
- from typing import Optional
23
-
24
- import aiohttp.client_exceptions
25
-
26
- from nucliadb.common import datamanagers, locking
27
- from nucliadb.common.cluster.settings import settings as cluster_settings
28
- from nucliadb.common.cluster.utils import get_shard_manager
29
- from nucliadb.common.maindb.driver import Driver, Transaction
30
- from nucliadb.common.maindb.exceptions import ConflictError
31
- from nucliadb.ingest.orm.exceptions import (
32
- DeadletteredError,
33
- KnowledgeBoxConflict,
34
- ResourceNotIndexable,
35
- SequenceOrderViolation,
36
- )
37
- from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
38
- from nucliadb.ingest.orm.metrics import processor_observer
39
- from nucliadb.ingest.orm.processor import sequence_manager
40
- from nucliadb.ingest.orm.resource import Resource
41
- from nucliadb_protos import (
42
- knowledgebox_pb2,
43
- noderesources_pb2,
44
- nodewriter_pb2,
45
- resources_pb2,
46
- utils_pb2,
47
- writer_pb2,
48
- )
49
- from nucliadb_telemetry import errors
50
- from nucliadb_utils import const
51
- from nucliadb_utils.cache.pubsub import PubSubDriver
52
- from nucliadb_utils.storages.storage import Storage
53
- from nucliadb_utils.utilities import get_storage
54
-
55
- logger = logging.getLogger(__name__)
56
-
57
-
58
- MESSAGE_TO_NOTIFICATION_SOURCE = {
59
- writer_pb2.BrokerMessage.MessageSource.WRITER: writer_pb2.NotificationSource.WRITER,
60
- writer_pb2.BrokerMessage.MessageSource.PROCESSOR: writer_pb2.NotificationSource.PROCESSOR,
61
- }
62
-
63
-
64
- def validate_indexable_resource(resource: noderesources_pb2.Resource) -> None:
65
- """
66
- It would be more optimal to move this to another layer but it'd also make the code
67
- more difficult to grok and test because we'd need to move processable check and throw
68
- an exception in the middle of a bunch of processing logic.
69
-
70
- As it is implemented right now, we just do the check if a resource is indexable right
71
- before we actually try to index it and not buried it somewhere else in the code base.
72
-
73
- This is still an edge case.
74
- """
75
- num_paragraphs = 0
76
- for _, fparagraph in resource.paragraphs.items():
77
- # this count should not be very expensive to do since we don't have
78
- # a lot of different fields and we just do a count on a dict
79
- num_paragraphs += len(fparagraph.paragraphs)
80
-
81
- if num_paragraphs > cluster_settings.max_resource_paragraphs:
82
- raise ResourceNotIndexable(
83
- "Resource has too many paragraphs. "
84
- f"Supported: {cluster_settings.max_resource_paragraphs} , Number: {num_paragraphs}"
85
- )
86
-
87
-
88
- class Processor:
89
- """
90
- This class is responsible for processing messages from the broker
91
- and attempts to manage sequencing correctly with a txn id implementation.
92
-
93
- The "txn" in this implementation is oriented around the sequence id of
94
- messages coming through the message broker.
95
-
96
- Not all writes are going to have a transaction id. For example, writes
97
- coming from processor can be coming through a different channel
98
- and can not use the txn id
99
- """
100
-
101
- messages: dict[str, list[writer_pb2.BrokerMessage]]
102
-
103
- def __init__(
104
- self,
105
- driver: Driver,
106
- storage: Storage,
107
- pubsub: Optional[PubSubDriver] = None,
108
- partition: Optional[str] = None,
109
- ):
110
- self.messages = {}
111
- self.driver = driver
112
- self.storage = storage
113
- self.partition = partition
114
- self.pubsub = pubsub
115
- self.shard_manager = get_shard_manager()
116
-
117
- async def process(
118
- self,
119
- message: writer_pb2.BrokerMessage,
120
- seqid: int,
121
- partition: Optional[str] = None,
122
- transaction_check: bool = True,
123
- ) -> None:
124
- partition = partition if self.partition is None else self.partition
125
- if partition is None:
126
- raise AttributeError("Can't process message from unknown partition")
127
-
128
- # When running in transactional mode, we need to check that
129
- # that the current message doesn't violate the sequence order for the
130
- # current partition
131
- if transaction_check:
132
- last_seqid = await sequence_manager.get_last_seqid(self.driver, partition)
133
- if last_seqid is not None and seqid <= last_seqid:
134
- raise SequenceOrderViolation(last_seqid)
135
-
136
- if message.type == writer_pb2.BrokerMessage.MessageType.DELETE:
137
- await self.delete_resource(message, seqid, partition, transaction_check)
138
- elif message.type == writer_pb2.BrokerMessage.MessageType.AUTOCOMMIT:
139
- await self.txn([message], seqid, partition, transaction_check)
140
- elif message.type == writer_pb2.BrokerMessage.MessageType.MULTI:
141
- # XXX Not supported right now
142
- # MULTI, COMMIT and ROLLBACK are all not supported in transactional mode right now
143
- # This concept is probably not tenable with current architecture because
144
- # of how nats works and how we would need to manage rollbacks.
145
- # XXX Should this be removed?
146
- await self.multi(message, seqid)
147
- elif message.type == writer_pb2.BrokerMessage.MessageType.COMMIT:
148
- await self.commit(message, seqid, partition)
149
- elif message.type == writer_pb2.BrokerMessage.MessageType.ROLLBACK:
150
- await self.rollback(message, seqid, partition)
151
-
152
- async def get_resource_uuid(
153
- self, kb: KnowledgeBox, message: writer_pb2.BrokerMessage
154
- ) -> str:
155
- if message.uuid is None:
156
- uuid = await kb.get_resource_uuid_by_slug(message.slug)
157
- else:
158
- uuid = message.uuid
159
- return uuid
160
-
161
- @processor_observer.wrap({"type": "delete_resource"})
162
- async def delete_resource(
163
- self,
164
- message: writer_pb2.BrokerMessage,
165
- seqid: int,
166
- partition: str,
167
- transaction_check: bool = True,
168
- ) -> None:
169
- txn = await self.driver.begin()
170
- try:
171
- kb = KnowledgeBox(txn, self.storage, message.kbid)
172
-
173
- uuid = await self.get_resource_uuid(kb, message)
174
- async with locking.distributed_lock(
175
- locking.RESOURCE_INDEX_LOCK.format(kbid=message.kbid, resource_id=uuid)
176
- ):
177
- # we need to have a lock at indexing time because we don't know if
178
- # a resource was in the process of being moved when a delete occurred
179
- shard_id = await datamanagers.resources.get_resource_shard_id(
180
- txn, kbid=message.kbid, rid=uuid
181
- )
182
- if shard_id is None:
183
- logger.warning(f"Resource {uuid} does not exist")
184
- else:
185
- shard = await kb.get_resource_shard(shard_id)
186
- if shard is None:
187
- raise AttributeError("Shard not available")
188
-
189
- await self.shard_manager.delete_resource(
190
- shard, message.uuid, seqid, partition, message.kbid
191
- )
192
- try:
193
- await kb.delete_resource(message.uuid)
194
- except Exception as exc:
195
- await txn.abort()
196
- await self.notify_abort(
197
- partition=partition,
198
- seqid=seqid,
199
- multi=message.multiid,
200
- kbid=message.kbid,
201
- rid=message.uuid,
202
- source=message.source,
203
- )
204
- raise exc
205
- finally:
206
- if txn.open:
207
- if transaction_check:
208
- await sequence_manager.set_last_seqid(txn, partition, seqid)
209
- await txn.commit()
210
- await self.notify_commit(
211
- partition=partition,
212
- seqid=seqid,
213
- multi=message.multiid,
214
- message=message,
215
- write_type=writer_pb2.Notification.WriteType.DELETED,
216
- )
217
-
218
- @processor_observer.wrap({"type": "commit_slug"})
219
- async def commit_slug(self, resource: Resource) -> None:
220
- # Slug may have conflicts as its not partitioned properly,
221
- # so we commit it in a different transaction to make it as short as possible
222
- prev_txn = resource.txn
223
- try:
224
- async with self.driver.transaction() as txn:
225
- resource.txn = txn
226
- await resource.set_slug()
227
- await txn.commit()
228
- finally:
229
- resource.txn = prev_txn
230
-
231
- @processor_observer.wrap({"type": "txn"})
232
- async def txn(
233
- self,
234
- messages: list[writer_pb2.BrokerMessage],
235
- seqid: int,
236
- partition: str,
237
- transaction_check: bool = True,
238
- ) -> None:
239
- if len(messages) == 0:
240
- return None
241
-
242
- txn = await self.driver.begin()
243
- kbid = messages[0].kbid
244
- if not await datamanagers.kb.exists_kb(txn, kbid=kbid):
245
- logger.warning(f"KB {kbid} is deleted: skiping txn")
246
- if transaction_check:
247
- await sequence_manager.set_last_seqid(txn, partition, seqid)
248
- await txn.commit()
249
- return None
250
-
251
- try:
252
- multi = messages[0].multiid
253
- kb = KnowledgeBox(txn, self.storage, kbid)
254
- uuid = await self.get_resource_uuid(kb, messages[0])
255
- resource: Optional[Resource] = None
256
- handled_exception = None
257
- created = False
258
-
259
- for message in messages:
260
- if resource is not None:
261
- assert resource.uuid == message.uuid
262
- result = await self.apply_resource(message, kb, resource)
263
-
264
- if result is None:
265
- continue
266
-
267
- resource, _created = result
268
- created = created or _created
269
-
270
- if resource:
271
- await resource.compute_global_text()
272
- await resource.compute_global_tags(resource.indexer)
273
- await resource.compute_security(resource.indexer)
274
- if message.reindex:
275
- # when reindexing, let's just generate full new index message
276
- resource.replace_indexer(await resource.generate_index_message())
277
-
278
- if resource and resource.modified:
279
- await self.index_resource( # noqa
280
- resource=resource,
281
- txn=txn,
282
- uuid=uuid,
283
- kbid=kbid,
284
- seqid=seqid,
285
- partition=partition,
286
- kb=kb,
287
- source=messages_source(messages),
288
- )
289
-
290
- if transaction_check:
291
- await sequence_manager.set_last_seqid(txn, partition, seqid)
292
- await txn.commit()
293
-
294
- if created:
295
- await self.commit_slug(resource)
296
-
297
- await self.notify_commit(
298
- partition=partition,
299
- seqid=seqid,
300
- multi=multi,
301
- message=message,
302
- write_type=(
303
- writer_pb2.Notification.WriteType.CREATED
304
- if created
305
- else writer_pb2.Notification.WriteType.MODIFIED
306
- ),
307
- )
308
- elif resource and resource.modified is False:
309
- await txn.abort()
310
- await self.notify_abort(
311
- partition=partition,
312
- seqid=seqid,
313
- multi=multi,
314
- kbid=kbid,
315
- rid=uuid,
316
- source=message.source,
317
- )
318
- logger.warning("This message did not modify the resource")
319
- except (
320
- asyncio.TimeoutError,
321
- asyncio.CancelledError,
322
- aiohttp.client_exceptions.ClientError,
323
- ConflictError,
324
- ): # pragma: no cover
325
- # Unhandled exceptions here that should bubble and hard fail
326
- # XXX We swallow too many exceptions here!
327
- await self.notify_abort(
328
- partition=partition,
329
- seqid=seqid,
330
- multi=multi,
331
- kbid=kbid,
332
- rid=uuid,
333
- source=message.source,
334
- )
335
- raise
336
- except Exception as exc:
337
- # As we are in the middle of a transaction, we cannot let the exception raise directly
338
- # as we need to do some cleanup. The exception will be reraised at the end of the function
339
- # and then handled by the top caller, so errors can be handled in the same place.
340
- await self.deadletter(messages, partition, seqid)
341
- await self.notify_abort(
342
- partition=partition,
343
- seqid=seqid,
344
- multi=multi,
345
- kbid=kbid,
346
- rid=uuid,
347
- source=message.source,
348
- )
349
- handled_exception = exc
350
- finally:
351
- if resource is not None:
352
- resource.clean()
353
- # txn should be already commited or aborted, but in the event of an exception
354
- # it could be left open. Make sure to close it if it's still open
355
- if txn.open:
356
- await txn.abort()
357
-
358
- if handled_exception is not None:
359
- if seqid == -1:
360
- raise handled_exception
361
- else:
362
- if resource is not None:
363
- await self._mark_resource_error(kb, resource, partition, seqid)
364
- raise DeadletteredError() from handled_exception
365
-
366
- return None
367
-
368
- @processor_observer.wrap({"type": "index_resource"})
369
- async def index_resource(
370
- self,
371
- resource: Resource,
372
- txn: Transaction,
373
- uuid: str,
374
- kbid: str,
375
- seqid: int,
376
- partition: str,
377
- kb: KnowledgeBox,
378
- source: nodewriter_pb2.IndexMessageSource.ValueType,
379
- ) -> None:
380
- validate_indexable_resource(resource.indexer.brain)
381
-
382
- async with locking.distributed_lock(
383
- locking.RESOURCE_INDEX_LOCK.format(kbid=kbid, resource_id=uuid)
384
- ):
385
- # we need to have a lock at indexing time because we don't know if
386
- # a resource was move to another shard while it was being indexed
387
- shard_id = await datamanagers.resources.get_resource_shard_id(
388
- txn, kbid=kbid, rid=uuid
389
- )
390
-
391
- shard = None
392
- if shard_id is not None:
393
- shard = await kb.get_resource_shard(shard_id)
394
-
395
- if shard is None:
396
- # It's a new resource, get current active shard to place
397
- # new resource on
398
- shard = await self.shard_manager.get_current_active_shard(txn, kbid)
399
- if shard is None:
400
- # no shard available, create a new one
401
- model = await datamanagers.kb.get_model_metadata(txn, kbid=kbid)
402
- config = await kb.get_config()
403
- if config is not None:
404
- release_channel = config.release_channel
405
- else:
406
- release_channel = utils_pb2.ReleaseChannel.STABLE
407
-
408
- shard = await self.shard_manager.create_shard_by_kbid(
409
- txn,
410
- kbid,
411
- semantic_model=model,
412
- release_channel=release_channel,
413
- )
414
- await datamanagers.resources.set_resource_shard_id(
415
- txn, kbid=kbid, rid=uuid, shard=shard.shard
416
- )
417
-
418
- if shard is not None:
419
- index_message = resource.indexer.brain
420
- await self.shard_manager.add_resource(
421
- shard,
422
- index_message,
423
- seqid,
424
- partition=partition,
425
- kb=kbid,
426
- source=source,
427
- )
428
- else:
429
- raise AttributeError("Shard is not available")
430
-
431
- async def multi(self, message: writer_pb2.BrokerMessage, seqid: int) -> None:
432
- self.messages.setdefault(message.multiid, []).append(message)
433
-
434
- async def commit(
435
- self, message: writer_pb2.BrokerMessage, seqid: int, partition: str
436
- ) -> None:
437
- if message.multiid not in self.messages:
438
- # Error
439
- logger.error(f"Closed multi {message.multiid}")
440
- await self.deadletter([message], partition, seqid)
441
- else:
442
- await self.txn(self.messages[message.multiid], seqid, partition)
443
-
444
- async def rollback(
445
- self, message: writer_pb2.BrokerMessage, seqid: int, partition: str
446
- ) -> None:
447
- # Error
448
- logger.error(f"Closed multi {message.multiid}")
449
- del self.messages[message.multiid]
450
- await self.notify_abort(
451
- partition=partition,
452
- seqid=seqid,
453
- multi=message.multiid,
454
- kbid=message.kbid,
455
- rid=message.uuid,
456
- source=message.source,
457
- )
458
-
459
- async def deadletter(
460
- self, messages: list[writer_pb2.BrokerMessage], partition: str, seqid: int
461
- ) -> None:
462
- for seq, message in enumerate(messages):
463
- await self.storage.deadletter(message, seq, seqid, partition)
464
-
465
- @processor_observer.wrap({"type": "apply_resource"})
466
- async def apply_resource(
467
- self,
468
- message: writer_pb2.BrokerMessage,
469
- kb: KnowledgeBox,
470
- resource: Optional[Resource] = None,
471
- ) -> Optional[tuple[Resource, bool]]:
472
- """
473
- Convert a broker message into a resource object, and apply it to the database
474
- """
475
- created = False
476
-
477
- if resource is None:
478
- # Make sure we load the resource in case it already exists on db
479
- if message.uuid is None and message.slug:
480
- uuid = await kb.get_resource_uuid_by_slug(message.slug)
481
- else:
482
- uuid = message.uuid
483
- resource = await kb.get(uuid)
484
-
485
- if resource is None and message.source is message.MessageSource.WRITER:
486
- # It's a new resource
487
- resource = await kb.add_resource(uuid, message.slug, message.basic)
488
- created = True
489
- elif resource is not None:
490
- # It's an update of an existing resource, can come either from writer or
491
- # from processing
492
- await self.maybe_update_resource_basic(resource, message)
493
- elif resource is None and message.source is message.MessageSource.PROCESSOR:
494
- # It's a new resource, and somehow we received the message coming from processing before
495
- # the "fast" one, this shouldn't happen
496
- logger.info(
497
- f"Secondary message for resource {message.uuid} and resource does not exist, ignoring"
498
- )
499
- return None
500
-
501
- if resource is None:
502
- return None
503
-
504
- if message.HasField("origin"):
505
- await resource.set_origin(message.origin)
506
-
507
- if message.HasField("extra"):
508
- await resource.set_extra(message.extra)
509
-
510
- if message.HasField("security"):
511
- await resource.set_security(message.security)
512
-
513
- await resource.apply_fields(message)
514
- await resource.apply_extracted(message)
515
- return (resource, created)
516
-
517
- async def maybe_update_resource_basic(
518
- self, resource: Resource, message: writer_pb2.BrokerMessage
519
- ) -> None:
520
- basic_field_updates = message.HasField("basic")
521
- deleted_fields = len(message.delete_fields) > 0
522
- if not (basic_field_updates or deleted_fields):
523
- return
524
-
525
- await resource.set_basic(
526
- message.basic,
527
- deleted_fields=message.delete_fields, # type: ignore
528
- )
529
-
530
- async def notify_commit(
531
- self,
532
- *,
533
- partition: str,
534
- seqid: int,
535
- multi: str,
536
- message: writer_pb2.BrokerMessage,
537
- write_type: writer_pb2.Notification.WriteType.ValueType,
538
- ):
539
- notification = writer_pb2.Notification(
540
- partition=int(partition),
541
- seqid=seqid,
542
- multi=multi,
543
- uuid=message.uuid,
544
- kbid=message.kbid,
545
- action=writer_pb2.Notification.Action.COMMIT,
546
- write_type=write_type,
547
- source=MESSAGE_TO_NOTIFICATION_SOURCE[message.source],
548
- # including the message here again might feel a bit unusual but allows
549
- # us to react to these notifications with the original payload
550
- message=message,
551
- processing_errors=len(message.errors) > 0,
552
- )
553
-
554
- await self.notify(
555
- const.PubSubChannels.RESOURCE_NOTIFY.format(kbid=message.kbid),
556
- notification.SerializeToString(),
557
- )
558
-
559
- async def notify_abort(
560
- self,
561
- *,
562
- partition: str,
563
- seqid: int,
564
- multi: str,
565
- kbid: str,
566
- rid: str,
567
- source: writer_pb2.BrokerMessage.MessageSource.ValueType,
568
- ):
569
- message = writer_pb2.Notification(
570
- partition=int(partition),
571
- seqid=seqid,
572
- multi=multi,
573
- uuid=rid,
574
- kbid=kbid,
575
- action=writer_pb2.Notification.ABORT,
576
- source=MESSAGE_TO_NOTIFICATION_SOURCE[source],
577
- )
578
- await self.notify(
579
- const.PubSubChannels.RESOURCE_NOTIFY.format(kbid=kbid),
580
- message.SerializeToString(),
581
- )
582
-
583
- async def notify(self, channel, payload: bytes):
584
- if self.pubsub is not None:
585
- await self.pubsub.publish(channel, payload)
586
-
587
- async def _mark_resource_error(
588
- self, kb: KnowledgeBox, resource: Optional[Resource], partition: str, seqid: int
589
- ) -> None:
590
- """
591
- Unhandled error processing, try to mark resource as error
592
- """
593
- if resource is None or resource.basic is None:
594
- logger.info(
595
- f"Skip when resource does not even have basic metadata: {resource}"
596
- )
597
- return
598
- try:
599
- async with self.driver.transaction() as txn:
600
- kb.txn = resource.txn = txn
601
-
602
- shard_id = await datamanagers.resources.get_resource_shard_id(
603
- txn, kbid=kb.kbid, rid=resource.uuid
604
- )
605
- shard = None
606
- if shard_id is not None:
607
- shard = await kb.get_resource_shard(shard_id)
608
- if shard is None:
609
- logger.warning(
610
- "Unable to mark resource as error, shard is None. "
611
- "This should not happen so you did something special to get here."
612
- )
613
- return
614
-
615
- resource.basic.metadata.status = resources_pb2.Metadata.Status.ERROR
616
- await resource.set_basic(resource.basic)
617
- await txn.commit()
618
-
619
- resource.indexer.set_processing_status(
620
- basic=resource.basic, previous_status=resource._previous_status
621
- )
622
- await self.shard_manager.add_resource(
623
- shard, resource.indexer.brain, seqid, partition=partition, kb=kb.kbid
624
- )
625
- except Exception:
626
- logger.warning("Error while marking resource as error", exc_info=True)
627
-
628
- # KB tools
629
- # XXX: Why are these utility functions here?
630
- async def get_kb_obj(
631
- self, txn: Transaction, kbid: knowledgebox_pb2.KnowledgeBoxID
632
- ) -> Optional[KnowledgeBox]:
633
- uuid: Optional[str] = kbid.uuid
634
- if uuid == "":
635
- uuid = await datamanagers.kb.get_kb_uuid(txn, slug=kbid.slug)
636
-
637
- if uuid is None:
638
- return None
639
-
640
- if not (await datamanagers.kb.exists_kb(txn, kbid=uuid)):
641
- return None
642
-
643
- storage = await get_storage()
644
- kbobj = KnowledgeBox(txn, storage, uuid)
645
- return kbobj
646
-
647
- @processor_observer.wrap({"type": "create_kb"})
648
- async def create_kb(
649
- self,
650
- slug: str,
651
- config: Optional[knowledgebox_pb2.KnowledgeBoxConfig],
652
- semantic_model: knowledgebox_pb2.SemanticModelMetadata,
653
- forceuuid: Optional[str] = None,
654
- release_channel: utils_pb2.ReleaseChannel.ValueType = utils_pb2.ReleaseChannel.STABLE,
655
- ) -> str:
656
- async with self.driver.transaction() as txn:
657
- try:
658
- uuid, failed = await KnowledgeBox.create(
659
- txn,
660
- slug,
661
- semantic_model,
662
- uuid=forceuuid,
663
- config=config,
664
- release_channel=release_channel,
665
- )
666
- if failed:
667
- raise Exception("Failed to create KB")
668
- await txn.commit()
669
- return uuid
670
- except KnowledgeBoxConflict:
671
- raise
672
- except Exception as e:
673
- errors.capture_exception(e)
674
- raise e
675
-
676
- async def update_kb(
677
- self,
678
- kbid: str,
679
- slug: str,
680
- config: Optional[knowledgebox_pb2.KnowledgeBoxConfig],
681
- ) -> str:
682
- async with self.driver.transaction() as txn:
683
- uuid = await KnowledgeBox.update(txn, kbid, slug, config=config)
684
- await txn.commit()
685
- return uuid
686
-
687
- async def delete_kb(self, kbid: str = "", slug: str = "") -> str:
688
- async with self.driver.transaction() as txn:
689
- uuid = await KnowledgeBox.delete_kb(txn, kbid=kbid, slug=slug)
690
- await txn.commit()
691
- return uuid
692
-
693
19
 
694
- def messages_source(messages: list[writer_pb2.BrokerMessage]):
695
- from_writer = all(
696
- (
697
- message.source == writer_pb2.BrokerMessage.MessageSource.WRITER
698
- for message in messages
699
- )
700
- )
701
- from_processor = all(
702
- (
703
- message.source == writer_pb2.BrokerMessage.MessageSource.PROCESSOR
704
- for message in messages
705
- )
706
- )
707
- if from_writer:
708
- source = nodewriter_pb2.IndexMessageSource.WRITER
709
- elif from_processor:
710
- source = nodewriter_pb2.IndexMessageSource.PROCESSOR
711
- else: # pragma: nocover
712
- msg = "Processor received multiple broker messages with different sources in the same txn!"
713
- logger.error(msg)
714
- errors.capture_exception(Exception(msg))
715
- source = nodewriter_pb2.IndexMessageSource.PROCESSOR
716
- return source
20
+ # reexports
21
+ from .processor import Processor, validate_indexable_resource # noqa: F401