nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -21,57 +21,54 @@ from __future__ import annotations
21
21
 
22
22
  import enum
23
23
  from datetime import datetime
24
- from typing import Any, Optional, Type
24
+ from typing import Any, Generic, Optional, Type, TypeVar
25
25
 
26
+ from google.protobuf.message import DecodeError, Message
27
+
28
+ from nucliadb.common import datamanagers
29
+ from nucliadb.ingest.fields.exceptions import InvalidFieldClass, InvalidPBClass
26
30
  from nucliadb_protos.resources_pb2 import (
27
31
  CloudFile,
28
32
  ExtractedTextWrapper,
29
33
  ExtractedVectorsWrapper,
34
+ FieldAuthor,
30
35
  FieldComputedMetadata,
31
36
  FieldComputedMetadataWrapper,
37
+ FieldQuestionAnswers,
32
38
  FieldQuestionAnswerWrapper,
33
39
  LargeComputedMetadata,
34
40
  LargeComputedMetadataWrapper,
35
41
  QuestionAnswers,
36
- UserVectorsWrapper,
37
- )
38
- from nucliadb_protos.utils_pb2 import (
39
- ExtractedText,
40
- UserVectorSet,
41
- UserVectorsList,
42
- VectorObject,
43
42
  )
43
+ from nucliadb_protos.utils_pb2 import ExtractedText, VectorObject
44
44
  from nucliadb_protos.writer_pb2 import Error
45
-
46
- from nucliadb.ingest.fields.exceptions import InvalidFieldClass, InvalidPBClass
47
45
  from nucliadb_utils.storages.storage import Storage, StorageField
48
46
 
49
- KB_RESOURCE_FIELD = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}"
50
- KB_RESOURCE_ERROR = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}/error"
51
-
52
- SUBFIELDFIELDS = ["l", "c"]
47
+ SUBFIELDFIELDS = ("c",)
53
48
 
54
49
 
55
50
  class FieldTypes(str, enum.Enum):
56
51
  FIELD_TEXT = "extracted_text"
57
52
  FIELD_VECTORS = "extracted_vectors"
58
- USER_FIELD_VECTORS = "user_vectors"
53
+ FIELD_VECTORSET = "{vectorset}/extracted_vectors"
59
54
  FIELD_METADATA = "metadata"
60
55
  FIELD_LARGE_METADATA = "large_metadata"
61
56
  THUMBNAIL = "thumbnail"
62
57
  QUESTION_ANSWERS = "question_answers"
63
58
 
64
59
 
65
- class Field:
66
- pbklass: Optional[Type] = None
60
+ PbType = TypeVar("PbType", bound=Message)
61
+
62
+
63
+ class Field(Generic[PbType]):
64
+ pbklass: Type[PbType]
67
65
  type: str = "x"
68
66
  value: Optional[Any]
69
67
  extracted_text: Optional[ExtractedText]
70
- extracted_vectors: Optional[VectorObject]
68
+ extracted_vectors: dict[Optional[str], VectorObject]
71
69
  computed_metadata: Optional[FieldComputedMetadata]
72
70
  large_computed_metadata: Optional[LargeComputedMetadata]
73
- extracted_user_vectors: Optional[UserVectorSet]
74
- question_answers: Optional[QuestionAnswers]
71
+ question_answers: Optional[FieldQuestionAnswers]
75
72
 
76
73
  def __init__(
77
74
  self,
@@ -85,10 +82,9 @@ class Field:
85
82
 
86
83
  self.value = None
87
84
  self.extracted_text: Optional[ExtractedText] = None
88
- self.extracted_vectors = None
85
+ self.extracted_vectors = {}
89
86
  self.computed_metadata = None
90
87
  self.large_computed_metadata = None
91
- self.extracted_user_vectors = None
92
88
  self.question_answers = None
93
89
 
94
90
  self.id: str = id
@@ -121,44 +117,51 @@ class Field:
121
117
  return f"{self.uuid}/{self.type}/{self.id}"
122
118
 
123
119
  def get_storage_field(self, field_type: FieldTypes) -> StorageField:
124
- return self.storage.file_extracted(
125
- self.kbid, self.uuid, self.type, self.id, field_type.value
126
- )
120
+ return self.storage.file_extracted(self.kbid, self.uuid, self.type, self.id, field_type.value)
121
+
122
+ def _get_extracted_vectors_storage_field(self, vectorset: Optional[str] = None) -> StorageField:
123
+ if vectorset:
124
+ key = FieldTypes.FIELD_VECTORSET.value.format(vectorset=vectorset)
125
+ else:
126
+ key = FieldTypes.FIELD_VECTORS.value
127
+ return self.storage.file_extracted(self.kbid, self.uuid, self.type, self.id, key)
127
128
 
128
- async def db_get_value(self):
129
+ async def db_get_value(self) -> Optional[PbType]:
129
130
  if self.value is None:
130
- payload = await self.resource.txn.get(
131
- KB_RESOURCE_FIELD.format(
132
- kbid=self.kbid, uuid=self.uuid, type=self.type, field=self.id
133
- )
131
+ payload = await datamanagers.fields.get_raw(
132
+ self.resource.txn,
133
+ kbid=self.kbid,
134
+ rid=self.uuid,
135
+ field_type=self.type,
136
+ field_id=self.id,
134
137
  )
135
138
  if payload is None:
136
- return
139
+ return None
137
140
 
138
141
  self.value = self.pbklass()
139
142
  self.value.ParseFromString(payload)
140
143
  return self.value
141
144
 
142
145
  async def db_set_value(self, payload: Any):
143
- await self.resource.txn.set(
144
- KB_RESOURCE_FIELD.format(
145
- kbid=self.kbid, uuid=self.uuid, type=self.type, field=self.id
146
- ),
147
- payload.SerializeToString(),
146
+ await datamanagers.fields.set(
147
+ self.resource.txn,
148
+ kbid=self.kbid,
149
+ rid=self.uuid,
150
+ field_type=self.type,
151
+ field_id=self.id,
152
+ value=payload,
148
153
  )
149
154
  self.value = payload
150
155
  self.resource.modified = True
151
156
 
152
157
  async def delete(self):
153
- field_base_key = KB_RESOURCE_FIELD.format(
154
- kbid=self.kbid, uuid=self.uuid, type=self.type, field=self.id
158
+ await datamanagers.fields.delete(
159
+ self.resource.txn,
160
+ kbid=self.kbid,
161
+ rid=self.uuid,
162
+ field_type=self.type,
163
+ field_id=self.id,
155
164
  )
156
- # Make sure we explicitly delete the field and any nested key
157
- keys_to_delete = []
158
- async for key in self.resource.txn.keys(field_base_key):
159
- keys_to_delete.append(key)
160
- for key in keys_to_delete:
161
- await self.resource.txn.delete(key)
162
165
  await self.delete_extracted_text()
163
166
  await self.delete_vectors()
164
167
  await self.delete_metadata()
@@ -178,9 +181,9 @@ class Field:
178
181
  except KeyError:
179
182
  pass
180
183
 
181
- async def delete_vectors(self) -> None:
184
+ async def delete_vectors(self, vectorset: Optional[str] = None) -> None:
182
185
  # Try delete vectors
183
- sf = self.get_storage_field(FieldTypes.FIELD_VECTORS)
186
+ sf = self._get_extracted_vectors_storage_field(vectorset)
184
187
  try:
185
188
  await self.storage.delete_upload(sf.key, sf.bucket)
186
189
  except KeyError:
@@ -194,53 +197,79 @@ class Field:
194
197
  pass
195
198
 
196
199
  async def get_error(self) -> Optional[Error]:
197
- payload = await self.resource.txn.get(
198
- KB_RESOURCE_ERROR.format(
199
- kbid=self.kbid, uuid=self.uuid, type=self.type, field=self.id
200
- )
200
+ return await datamanagers.fields.get_error(
201
+ self.resource.txn,
202
+ kbid=self.kbid,
203
+ rid=self.uuid,
204
+ field_type=self.type,
205
+ field_id=self.id,
201
206
  )
202
- if payload is None:
203
- return None
204
- pberror = Error()
205
- pberror.ParseFromString(payload)
206
- return pberror
207
207
 
208
208
  async def set_error(self, error: Error) -> None:
209
- await self.resource.txn.set(
210
- KB_RESOURCE_ERROR.format(
211
- kbid=self.kbid, uuid=self.uuid, type=self.type, field=self.id
212
- ),
213
- error.SerializeToString(),
209
+ await datamanagers.fields.set_error(
210
+ self.resource.txn,
211
+ kbid=self.kbid,
212
+ rid=self.uuid,
213
+ field_type=self.type,
214
+ field_id=self.id,
215
+ error=error,
214
216
  )
215
217
 
216
- async def get_question_answers(self) -> Optional[QuestionAnswers]:
217
- if self.question_answers is None:
218
+ async def get_question_answers(self, force=False) -> Optional[FieldQuestionAnswers]:
219
+ if self.question_answers is None or force:
218
220
  sf = self.get_storage_field(FieldTypes.QUESTION_ANSWERS)
219
- payload = await self.storage.download_pb(sf, QuestionAnswers)
221
+ try:
222
+ payload = await self.storage.download_pb(sf, FieldQuestionAnswers)
223
+ except DecodeError:
224
+ deprecated_payload = await self.storage.download_pb(sf, QuestionAnswers)
225
+ if deprecated_payload is not None:
226
+ payload = FieldQuestionAnswers()
227
+ payload.question_answers.CopyFrom(deprecated_payload)
220
228
  if payload is not None:
221
229
  self.question_answers = payload
222
230
  return self.question_answers
223
231
 
224
232
  async def set_question_answers(self, payload: FieldQuestionAnswerWrapper) -> None:
233
+ if self.type in SUBFIELDFIELDS:
234
+ try:
235
+ actual_payload: Optional[FieldQuestionAnswers] = await self.get_question_answers(
236
+ force=True
237
+ )
238
+ except KeyError:
239
+ actual_payload = None
240
+ else:
241
+ actual_payload = None
225
242
  sf = self.get_storage_field(FieldTypes.QUESTION_ANSWERS)
226
243
 
227
- if payload.HasField("file"):
228
- raw_payload = await self.storage.downloadbytescf(payload.file)
229
- pb = QuestionAnswers()
230
- pb.ParseFromString(raw_payload.read())
231
- raw_payload.flush()
232
- self.question_answers = pb
244
+ if actual_payload is None:
245
+ # Its first question answer
246
+ if payload.HasField("file"):
247
+ await self.storage.normalize_binary(payload.file, sf)
248
+ else:
249
+ await self.storage.upload_pb(sf, payload.question_answers)
250
+ self.question_answers = payload.question_answers
233
251
  else:
234
- self.question_answers = payload.question_answers
235
-
236
- await self.storage.upload_pb(sf, self.question_answers)
252
+ if payload.HasField("file"):
253
+ raw_payload = await self.storage.downloadbytescf(payload.file)
254
+ pb = FieldQuestionAnswers()
255
+ pb.ParseFromString(raw_payload.read())
256
+ raw_payload.flush()
257
+ payload.question_answers.CopyFrom(pb)
258
+ # We know its payload.question_answers
259
+ for key, value in payload.question_answers.split_question_answers.items():
260
+ actual_payload.split_question_answers[key] = value
261
+ for key in payload.question_answers.deleted_splits:
262
+ if key in actual_payload.split_question_answers:
263
+ del actual_payload.split_question_answers[key]
264
+ if payload.question_answers.HasField("question_answers") != "":
265
+ actual_payload.question_answers.CopyFrom(payload.question_answers.question_answers)
266
+ await self.storage.upload_pb(sf, actual_payload)
267
+ self.question_answers = actual_payload
237
268
 
238
269
  async def set_extracted_text(self, payload: ExtractedTextWrapper) -> None:
239
270
  if self.type in SUBFIELDFIELDS:
240
271
  try:
241
- actual_payload: Optional[ExtractedText] = await self.get_extracted_text(
242
- force=True
243
- )
272
+ actual_payload: Optional[ExtractedText] = await self.get_extracted_text(force=True)
244
273
  except KeyError:
245
274
  actual_payload = None
246
275
  else:
@@ -280,30 +309,21 @@ class Field:
280
309
  self.extracted_text = payload
281
310
  return self.extracted_text
282
311
 
283
- async def get_extracted_text_cf(self) -> Optional[CloudFile]:
284
- sf = self.get_storage_field(FieldTypes.FIELD_TEXT)
285
- if await sf.exists() is not None:
286
- return sf.build_cf()
287
- else:
288
- return None
289
-
290
- async def set_vectors(
291
- self, payload: ExtractedVectorsWrapper
292
- ) -> tuple[Optional[VectorObject], bool, list[str]]:
312
+ async def set_vectors(self, payload: ExtractedVectorsWrapper) -> Optional[VectorObject]:
313
+ vectorset = payload.vectorset_id or None
293
314
  if self.type in SUBFIELDFIELDS:
294
315
  try:
295
316
  actual_payload: Optional[VectorObject] = await self.get_vectors(
296
- force=True
317
+ vectorset=vectorset,
318
+ force=True,
297
319
  )
298
320
  except KeyError:
299
321
  actual_payload = None
300
322
  else:
301
323
  actual_payload = None
302
324
 
303
- sf = self.get_storage_field(FieldTypes.FIELD_VECTORS)
325
+ sf = self._get_extracted_vectors_storage_field(vectorset)
304
326
  vo: Optional[VectorObject] = None
305
- replace_field: bool = True
306
- replace_splits = []
307
327
  if actual_payload is None:
308
328
  # Its first extracted text
309
329
  if payload.HasField("file"):
@@ -312,7 +332,7 @@ class Field:
312
332
  else:
313
333
  await self.storage.upload_pb(sf, payload.vectors)
314
334
  vo = payload.vectors
315
- self.extracted_vectors = payload.vectors
335
+ self.extracted_vectors[vectorset] = payload.vectors
316
336
  else:
317
337
  if payload.HasField("file"):
318
338
  raw_payload = await self.storage.downloadbytescf(payload.file)
@@ -320,88 +340,39 @@ class Field:
320
340
  pb.ParseFromString(raw_payload.read())
321
341
  raw_payload.flush()
322
342
  payload.vectors.CopyFrom(pb)
323
- vo = payload.vectors
343
+ vo = actual_payload
324
344
  # We know its payload.body
325
345
  for key, value in payload.vectors.split_vectors.items():
326
346
  actual_payload.split_vectors[key].CopyFrom(value)
327
347
  for key in payload.vectors.deleted_splits:
328
348
  if key in actual_payload.split_vectors:
329
- replace_splits.append(key)
330
349
  del actual_payload.split_vectors[key]
331
350
  if len(payload.vectors.vectors.vectors) > 0:
332
- replace_field = True
333
351
  actual_payload.vectors.CopyFrom(payload.vectors.vectors)
334
352
  await self.storage.upload_pb(sf, actual_payload)
335
- self.extracted_vectors = actual_payload
336
- return vo, replace_field, replace_splits
337
-
338
- async def get_vectors(self, force=False) -> Optional[VectorObject]:
339
- if self.extracted_vectors is None or force:
340
- sf = self.get_storage_field(FieldTypes.FIELD_VECTORS)
353
+ self.extracted_vectors[vectorset] = actual_payload
354
+ return vo
355
+
356
+ async def get_vectors(
357
+ self, vectorset: Optional[str] = None, force: bool = False
358
+ ) -> Optional[VectorObject]:
359
+ # compat with vectorsets coming from protobuffers where no value is
360
+ # empty string instead of None. This shouldn't be handled here but we
361
+ # have to make sure it gets the correct vectorset
362
+ vectorset = vectorset or None
363
+ if self.extracted_vectors.get(vectorset, None) is None or force:
364
+ sf = self._get_extracted_vectors_storage_field(vectorset)
341
365
  payload = await self.storage.download_pb(sf, VectorObject)
342
366
  if payload is not None:
343
- self.extracted_vectors = payload
344
- return self.extracted_vectors
345
-
346
- async def set_user_vectors(
347
- self, user_vectors: UserVectorsWrapper
348
- ) -> tuple[UserVectorSet, dict[str, UserVectorsList]]:
349
- try:
350
- actual_payload: Optional[UserVectorSet] = await self.get_user_vectors(
351
- force=True
352
- )
353
- except KeyError:
354
- actual_payload = None
355
-
356
- sf = self.get_storage_field(FieldTypes.USER_FIELD_VECTORS)
357
-
358
- vectors_to_delete: dict[str, UserVectorsList] = {}
359
- if actual_payload is not None:
360
- for vectorset, user_vector in user_vectors.vectors.vectors.items():
361
- for key, vector in user_vector.vectors.items():
362
- if key in actual_payload.vectors[vectorset].vectors.keys():
363
- if vectorset not in vectors_to_delete:
364
- vectors_to_delete[vectorset] = UserVectorsList()
365
- vectors_to_delete[vectorset].vectors.append(key)
366
- actual_payload.vectors[vectorset].vectors[key].CopyFrom(vector)
367
- for vectorset, delete_vectors in user_vectors.vectors_to_delete.items():
368
- for vector_to_delete in delete_vectors.vectors:
369
- if (
370
- actual_payload.vectors.get(vectorset).vectors.get(
371
- vector_to_delete
372
- )
373
- is not None
374
- ):
375
- del actual_payload.vectors[vectorset].vectors[vector_to_delete]
376
- else:
377
- actual_payload = user_vectors.vectors
378
- await self.storage.upload_pb(sf, actual_payload)
379
- self.extracted_user_vectors = actual_payload
380
- return actual_payload, vectors_to_delete
381
-
382
- async def get_user_vectors(self, force=False) -> Optional[UserVectorSet]:
383
- if self.extracted_user_vectors is None or force:
384
- sf = self.get_storage_field(FieldTypes.USER_FIELD_VECTORS)
385
- payload = await self.storage.download_pb(sf, UserVectorSet)
386
- if payload is not None:
387
- self.extracted_user_vectors = payload
388
- return self.extracted_user_vectors
389
-
390
- async def get_vectors_cf(self) -> Optional[CloudFile]:
391
- sf = self.get_storage_field(FieldTypes.FIELD_VECTORS)
392
- if await sf.exists() is not None:
393
- return sf.build_cf()
394
- else:
395
- return None
367
+ self.extracted_vectors[vectorset] = payload
368
+ return self.extracted_vectors.get(vectorset, None)
396
369
 
397
- async def set_field_metadata(
398
- self, payload: FieldComputedMetadataWrapper
399
- ) -> tuple[FieldComputedMetadata, list[str], dict[str, list[str]]]:
370
+ async def set_field_metadata(self, payload: FieldComputedMetadataWrapper) -> FieldComputedMetadata:
400
371
  if self.type in SUBFIELDFIELDS:
401
372
  try:
402
- actual_payload: Optional[
403
- FieldComputedMetadata
404
- ] = await self.get_field_metadata(force=True)
373
+ actual_payload: Optional[FieldComputedMetadata] = await self.get_field_metadata(
374
+ force=True
375
+ )
405
376
  except KeyError:
406
377
  actual_payload = None
407
378
  else:
@@ -426,8 +397,6 @@ class Field:
426
397
  metadata.thumbnail.CopyFrom(cf_split)
427
398
  metadata.last_index.FromDatetime(datetime.now())
428
399
 
429
- replace_field = []
430
- replace_splits = {}
431
400
  if actual_payload is None:
432
401
  # Its first metadata
433
402
  await self.storage.upload_pb(sf, payload.metadata)
@@ -438,22 +407,15 @@ class Field:
438
407
  actual_payload.split_metadata[key].CopyFrom(value)
439
408
  for key in payload.metadata.deleted_splits:
440
409
  if key in actual_payload.split_metadata:
441
- replace_splits[key] = [
442
- f"{x.start}-{x.end}"
443
- for x in actual_payload.split_metadata[key].paragraphs
444
- ]
445
410
  del actual_payload.split_metadata[key]
446
411
  if payload.metadata.metadata:
447
412
  actual_payload.metadata.CopyFrom(payload.metadata.metadata)
448
- replace_field = [f"{x.start}-{x.end}" for x in metadata.paragraphs]
449
413
  await self.storage.upload_pb(sf, actual_payload)
450
414
  self.computed_metadata = actual_payload
451
415
 
452
- return self.computed_metadata, replace_field, replace_splits
416
+ return self.computed_metadata
453
417
 
454
- async def get_field_metadata(
455
- self, force: bool = False
456
- ) -> Optional[FieldComputedMetadata]:
418
+ async def get_field_metadata(self, force: bool = False) -> Optional[FieldComputedMetadata]:
457
419
  if self.computed_metadata is None or force:
458
420
  sf = self.get_storage_field(FieldTypes.FIELD_METADATA)
459
421
  payload = await self.storage.download_pb(sf, FieldComputedMetadata)
@@ -461,19 +423,12 @@ class Field:
461
423
  self.computed_metadata = payload
462
424
  return self.computed_metadata
463
425
 
464
- async def get_field_metadata_cf(self) -> Optional[CloudFile]:
465
- sf = self.get_storage_field(FieldTypes.FIELD_METADATA)
466
- if await sf.exists() is not None:
467
- return sf.build_cf()
468
- else:
469
- return None
470
-
471
426
  async def set_large_field_metadata(self, payload: LargeComputedMetadataWrapper):
472
427
  if self.type in SUBFIELDFIELDS:
473
428
  try:
474
- actual_payload: Optional[
475
- LargeComputedMetadata
476
- ] = await self.get_large_field_metadata(force=True)
429
+ actual_payload: Optional[LargeComputedMetadata] = await self.get_large_field_metadata(
430
+ force=True
431
+ )
477
432
  except KeyError:
478
433
  actual_payload = None
479
434
  else:
@@ -508,9 +463,7 @@ class Field:
508
463
 
509
464
  return self.large_computed_metadata
510
465
 
511
- async def get_large_field_metadata(
512
- self, force: bool = False
513
- ) -> Optional[LargeComputedMetadata]:
466
+ async def get_large_field_metadata(self, force: bool = False) -> Optional[LargeComputedMetadata]:
514
467
  if self.large_computed_metadata is None or force:
515
468
  sf = self.get_storage_field(FieldTypes.FIELD_LARGE_METADATA)
516
469
  payload = await self.storage.download_pb(
@@ -521,12 +474,10 @@ class Field:
521
474
  self.large_computed_metadata = payload
522
475
  return self.large_computed_metadata
523
476
 
524
- async def get_large_field_metadata_cf(self) -> Optional[CloudFile]:
525
- sf = self.get_storage_field(FieldTypes.FIELD_LARGE_METADATA)
526
- if await sf.exists() is not None:
527
- return sf.build_cf()
528
- else:
529
- return None
477
+ async def generated_by(self) -> FieldAuthor:
478
+ author = FieldAuthor()
479
+ author.user.SetInParent()
480
+ return author
530
481
 
531
482
  def serialize(self):
532
483
  return self.value.SerializeToString()
@@ -20,11 +20,9 @@
20
20
  import uuid
21
21
  from typing import Any, Optional
22
22
 
23
- from nucliadb_protos.resources_pb2 import CloudFile
24
- from nucliadb_protos.resources_pb2 import Conversation as PBConversation
25
- from nucliadb_protos.resources_pb2 import FieldConversation
26
-
27
23
  from nucliadb.ingest.fields.base import Field
24
+ from nucliadb_protos.resources_pb2 import CloudFile, FieldConversation
25
+ from nucliadb_protos.resources_pb2 import Conversation as PBConversation
28
26
  from nucliadb_utils.storages.storage import StorageField
29
27
 
30
28
  PAGE_SIZE = 200
@@ -36,7 +34,7 @@ class PageNotFound(Exception):
36
34
  pass
37
35
 
38
36
 
39
- class Conversation(Field):
37
+ class Conversation(Field[PBConversation]):
40
38
  pbklass = PBConversation
41
39
  type: str = "c"
42
40
  value: dict[int, PBConversation]
@@ -120,6 +118,21 @@ class Conversation(Field):
120
118
  except PageNotFound:
121
119
  return None
122
120
 
121
+ async def get_full_conversation(self) -> Optional[PBConversation]:
122
+ """
123
+ Messages of a conversations may be stored across several pages.
124
+ This method fetches them all and returns a single complete conversation.
125
+ """
126
+ full_conv = PBConversation()
127
+ n_page = 1
128
+ while True:
129
+ page = await self.get_value(page=n_page)
130
+ if page is None:
131
+ break
132
+ full_conv.messages.extend(page.messages)
133
+ n_page += 1
134
+ return full_conv
135
+
123
136
  async def get_metadata(self) -> FieldConversation:
124
137
  if self.metadata is None:
125
138
  payload = await self.resource.txn.get(
@@ -28,7 +28,4 @@ class InvalidPBClass(Exception):
28
28
  def __init__(self, source: Type, destination: Type):
29
29
  self.source = source
30
30
  self.destination = destination
31
- super().__init__(
32
- "Source and destination does not match "
33
- f"{self.source} - {self.destination}"
34
- )
31
+ super().__init__("Source and destination does not match " f"{self.source} - {self.destination}")
@@ -19,15 +19,14 @@
19
19
  #
20
20
  from typing import Any, Optional
21
21
 
22
- from nucliadb_protos.resources_pb2 import CloudFile, FieldFile, FileExtractedData
23
-
24
22
  from nucliadb.ingest.fields.base import Field
23
+ from nucliadb_protos.resources_pb2 import CloudFile, FieldFile, FileExtractedData
25
24
  from nucliadb_utils.storages.storage import StorageField
26
25
 
27
26
  FILE_METADATA = "file_metadata"
28
27
 
29
28
 
30
- class File(Field):
29
+ class File(Field[FieldFile]):
31
30
  pbklass = FieldFile
32
31
  value: FieldFile
33
32
  type: str = "f"
@@ -52,15 +51,13 @@ class File(Field):
52
51
 
53
52
  is_external_file = payload.file.source == CloudFile.Source.EXTERNAL
54
53
  if not is_external_file:
55
- sf: StorageField = self.storage.file_field(
56
- self.kbid, self.uuid, self.id, old_cf
57
- )
54
+ sf: StorageField = self.storage.file_field(self.kbid, self.uuid, self.id, old_cf)
58
55
  cf: CloudFile = await self.storage.normalize_binary(payload.file, sf)
59
56
  payload.file.CopyFrom(cf)
60
57
 
61
58
  await self.db_set_value(payload)
62
59
 
63
- async def get_value(self) -> FieldFile:
60
+ async def get_value(self) -> Optional[FieldFile]:
64
61
  return await self.db_get_value()
65
62
 
66
63
  async def set_file_extracted_data(self, file_extracted_data: FileExtractedData):
@@ -80,17 +77,13 @@ class File(Field):
80
77
  cf_file_page_preview: CloudFile = await self.storage.normalize_binary(
81
78
  preview, sf_file_page_preview
82
79
  )
83
- file_extracted_data.file_pages_previews.pages[page].CopyFrom(
84
- cf_file_page_preview
85
- )
80
+ file_extracted_data.file_pages_previews.pages[page].CopyFrom(cf_file_page_preview)
86
81
 
87
82
  for fileid, origincf in file_extracted_data.file_generated.items():
88
83
  sf_generated: StorageField = self.storage.file_extracted(
89
84
  self.kbid, self.uuid, self.type, self.id, f"generated/{fileid}"
90
85
  )
91
- cf_generated: CloudFile = await self.storage.normalize_binary(
92
- origincf, sf_generated
93
- )
86
+ cf_generated: CloudFile = await self.storage.normalize_binary(origincf, sf_generated)
94
87
  file_extracted_data.file_generated[fileid].CopyFrom(cf_generated)
95
88
 
96
89
  if file_extracted_data.HasField("file_thumbnail"):
@@ -113,16 +106,5 @@ class File(Field):
113
106
  sf: StorageField = self.storage.file_extracted(
114
107
  self.kbid, self.uuid, self.type, self.id, FILE_METADATA
115
108
  )
116
- self.file_extracted_data = await self.storage.download_pb(
117
- sf, FileExtractedData
118
- )
109
+ self.file_extracted_data = await self.storage.download_pb(sf, FileExtractedData)
119
110
  return self.file_extracted_data
120
-
121
- async def get_file_extracted_data_cf(self) -> Optional[CloudFile]:
122
- sf: StorageField = self.storage.file_extracted(
123
- self.kbid, self.uuid, self.type, self.id, FILE_METADATA
124
- )
125
- if await sf.exists() is not None:
126
- return sf.build_cf()
127
- else:
128
- return None