nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,138 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- from unittest.mock import AsyncMock, MagicMock, patch
21
-
22
- import pytest
23
-
24
- from nucliadb import purge
25
- from nucliadb.common.cluster.exceptions import NodeError, ShardNotFound
26
-
27
- pytestmark = pytest.mark.asyncio
28
-
29
-
30
- class DataIterator:
31
- def __init__(self, data):
32
- self.data = data
33
-
34
- def __call__(self, *args, **kwargs):
35
- return self
36
-
37
- async def __aiter__(self):
38
- for item in self.data:
39
- yield item
40
-
41
-
42
- @pytest.fixture
43
- def keys():
44
- yield []
45
-
46
-
47
- @pytest.fixture
48
- def txn(keys):
49
- mock = AsyncMock()
50
- mock.keys = DataIterator(keys)
51
- yield mock
52
-
53
-
54
- @pytest.fixture
55
- def driver(txn):
56
- mock = AsyncMock()
57
- cm = AsyncMock()
58
- cm.__aenter__.return_value = txn
59
- mock.transaction = MagicMock(return_value=cm)
60
- yield mock
61
-
62
-
63
- @pytest.fixture
64
- def storage():
65
- mock = AsyncMock()
66
- mock.delete_kb.return_value = True, False
67
- yield mock
68
-
69
-
70
- @pytest.fixture(autouse=True)
71
- def kb():
72
- mock = AsyncMock()
73
- with patch("nucliadb.purge.KnowledgeBox", mock):
74
- yield mock
75
-
76
-
77
- async def test_purge(kb, keys, driver):
78
- keys.append("/pathto/kbid")
79
-
80
- await purge.purge_kb(driver)
81
-
82
- kb.purge.assert_called_once_with(driver, "kbid")
83
- driver.begin.return_value.commit.assert_called_once()
84
-
85
-
86
- async def test_purge_handle_errors(kb, keys, driver):
87
- keys.append("/failed")
88
- keys.append("/pathto/failed")
89
- keys.append("/pathto/failed")
90
- keys.append("/pathto/failed")
91
- keys.append("/pathto/failed")
92
-
93
- kb.purge.side_effect = [ShardNotFound(), NodeError(), Exception(), None]
94
- driver.begin.return_value.delete.side_effect = Exception()
95
-
96
- await purge.purge_kb(driver)
97
-
98
- driver.begin.return_value.commit.assert_not_called()
99
- driver.begin.return_value.abort.assert_called_once()
100
-
101
-
102
- async def test_purge_kb_storage(
103
- keys,
104
- driver,
105
- storage,
106
- ):
107
- keys.append("/pathto/kbid")
108
-
109
- await purge.purge_kb_storage(driver, storage)
110
-
111
- driver.begin.return_value.commit.assert_called_once()
112
-
113
-
114
- async def test_purge_kb_storage_handle_errors(keys, driver, storage):
115
- keys.append("/failed")
116
- keys.append("/pathto/failed")
117
-
118
- driver.begin.return_value.delete.side_effect = Exception()
119
-
120
- await purge.purge_kb_storage(driver, storage)
121
-
122
- driver.begin.return_value.commit.assert_not_called()
123
-
124
-
125
- async def test_main(driver, storage):
126
- with patch("nucliadb.purge.purge_kb", AsyncMock()) as purge_kb, patch(
127
- "nucliadb.purge.purge_kb_storage", AsyncMock()
128
- ) as purge_kb_storage, patch(
129
- "nucliadb.purge.get_storage", return_value=storage
130
- ), patch(
131
- "nucliadb.purge.setup_driver", return_value=driver
132
- ), patch(
133
- "nucliadb.purge.setup_cluster", return_value=driver
134
- ):
135
- await purge.main()
136
-
137
- purge_kb.assert_called_once_with(driver)
138
- purge_kb_storage.assert_called_once_with(driver, storage)
@@ -1,74 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import uuid
21
- from datetime import datetime
22
-
23
- from nucliadb_protos.writer_pb2 import BrokerMessage, OpStatusWriter
24
- from nucliadb_protos.writer_pb2_grpc import WriterStub
25
-
26
- from nucliadb_protos import resources_pb2 as rpb
27
-
28
-
29
- def broker_resource(
30
- kbid: str, rid=None, slug=None, title=None, summary=None
31
- ) -> BrokerMessage:
32
- """
33
- Returns a broker resource with barebones metadata.
34
- """
35
- rid = rid or str(uuid.uuid4())
36
- slug = slug or f"{rid}slug1"
37
- bm: BrokerMessage = BrokerMessage(
38
- kbid=kbid,
39
- uuid=rid,
40
- slug=slug,
41
- type=BrokerMessage.AUTOCOMMIT,
42
- )
43
- title = title or "Title Resource"
44
- summary = summary or "Summary of document"
45
- bm.basic.icon = "text/plain"
46
- bm.basic.title = title
47
- bm.basic.summary = summary
48
- bm.basic.thumbnail = "doc"
49
- bm.basic.layout = "default"
50
- bm.basic.metadata.useful = True
51
- bm.basic.metadata.language = "es"
52
- bm.basic.created.FromDatetime(datetime.now())
53
- bm.basic.modified.FromDatetime(datetime.now())
54
- bm.origin.source = rpb.Origin.Source.WEB
55
-
56
- etw = rpb.ExtractedTextWrapper()
57
- etw.body.text = title
58
- etw.field.field = "title"
59
- etw.field.field_type = rpb.FieldType.GENERIC
60
- bm.extracted_text.append(etw)
61
-
62
- etw = rpb.ExtractedTextWrapper()
63
- etw.body.text = summary
64
- etw.field.field = "summary"
65
- etw.field.field_type = rpb.FieldType.GENERIC
66
- bm.extracted_text.append(etw)
67
-
68
- bm.source = BrokerMessage.MessageSource.WRITER
69
- return bm
70
-
71
-
72
- async def inject_message(writer: WriterStub, message: BrokerMessage):
73
- resp = await writer.ProcessMessage([message]) # type: ignore
74
- assert resp.status == OpStatusWriter.Status.OK
@@ -1,44 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from unittest.mock import AsyncMock, Mock
21
-
22
-
23
- def get_mocked_session(
24
- http_method: str, status: int, text=None, json=None, read=None, context_manager=True
25
- ):
26
- response = Mock(status=status)
27
- if text is not None:
28
- response.text = AsyncMock(return_value=text)
29
- if json is not None:
30
- response.json = AsyncMock(return_value=json)
31
- if read is not None:
32
- if isinstance(read, str):
33
- read = read.encode()
34
- response.read = AsyncMock(return_value=read)
35
- if context_manager:
36
- # For when async with self.session.post() as response: is called
37
- session = Mock()
38
- http_method_mock = AsyncMock(__aenter__=AsyncMock(return_value=response))
39
- getattr(session, http_method.lower()).return_value = http_method_mock
40
- else:
41
- # For when await self.session.post() is called
42
- session = AsyncMock()
43
- getattr(session, http_method.lower()).return_value = response
44
- return session
@@ -1,167 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- from datetime import datetime
22
- from typing import Optional
23
- from uuid import uuid4
24
-
25
- from nucliadb_protos import resources_pb2 as rpb
26
- from nucliadb_protos import writer_pb2 as wpb
27
-
28
- from .fields import FieldBuilder
29
- from .helpers import labels_to_classifications
30
-
31
-
32
- class BrokerMessageBuilder:
33
- """Helper to deal with broker message creation. It allows customized
34
- creation of broker messages with sensible defaults and default title and
35
- summary.
36
-
37
- """
38
-
39
- def __init__(
40
- self,
41
- *,
42
- kbid: str,
43
- rid: Optional[str] = None,
44
- slug: Optional[str] = None,
45
- ):
46
- self.bm = wpb.BrokerMessage()
47
- self.fields: dict[tuple[str, rpb.FieldType.ValueType], FieldBuilder] = {}
48
-
49
- self.bm.kbid = kbid
50
- self.bm.type = wpb.BrokerMessage.AUTOCOMMIT
51
-
52
- # if first BM comes from PROCESSOR, it'll be ignored as it's out of order
53
- self.bm.source = wpb.BrokerMessage.MessageSource.WRITER
54
-
55
- if rid is None:
56
- rid = str(uuid4())
57
- self.bm.uuid = rid
58
-
59
- if slug is None:
60
- slug = f"{rid}-slug"
61
- self.bm.slug = slug
62
-
63
- self._default_basic()
64
- self._default_origin()
65
-
66
- def build(self) -> wpb.BrokerMessage:
67
- self._apply_fields()
68
- return self.bm
69
-
70
- def add_field_builder(self, field: FieldBuilder):
71
- self.fields[(field.id.field, field.id.field_type)] = field
72
-
73
- def field_builder(
74
- self, field_id: str, field_type: rpb.FieldType.ValueType
75
- ) -> FieldBuilder:
76
- return self.fields[(field_id, field_type)]
77
-
78
- def with_title(self, title: str):
79
- title_builder = FieldBuilder("title", rpb.FieldType.GENERIC)
80
- title_builder.with_extracted_text(title)
81
- # we do this to writer BMs in write resource API endpoint
82
- title_builder.with_extracted_paragraph_metadata(
83
- rpb.Paragraph(
84
- start=0,
85
- end=len(title),
86
- kind=rpb.Paragraph.TypeParagraph.TITLE,
87
- )
88
- )
89
- self.bm.basic.title = title
90
- self.add_field_builder(title_builder)
91
-
92
- def with_summary(self, summary: str):
93
- summary_builder = FieldBuilder("summary", rpb.FieldType.GENERIC)
94
- summary_builder.with_extracted_text(summary)
95
- # we do this to writer BMs in write resource API endpoint
96
- summary_builder.with_extracted_paragraph_metadata(
97
- rpb.Paragraph(
98
- start=0,
99
- end=len(summary),
100
- kind=rpb.Paragraph.TypeParagraph.DESCRIPTION,
101
- )
102
- )
103
- self.bm.basic.summary = summary
104
- self.add_field_builder(summary_builder)
105
-
106
- def with_resource_labels(self, labelset: str, labels: list[str]):
107
- classifications = labels_to_classifications(labelset, labels)
108
- self.bm.basic.usermetadata.classifications.extend(classifications)
109
-
110
- def _default_basic(self):
111
- self.bm.basic.icon = "text/plain"
112
- self.bm.basic.thumbnail = "doc"
113
- self.bm.basic.layout = "default"
114
- self.bm.basic.metadata.useful = True
115
- self.bm.basic.metadata.language = "en"
116
- self.bm.basic.metadata.status = rpb.Metadata.Status.PROCESSED
117
- self.bm.basic.metadata.metadata["key"] = "value"
118
- self.bm.basic.created.FromDatetime(datetime.now())
119
- self.bm.basic.modified.FromDatetime(datetime.now())
120
-
121
- self.with_title("Default test resource title")
122
- self.with_summary("Default test resource summary")
123
-
124
- def _default_origin(self):
125
- self.bm.origin.source = rpb.Origin.Source.API
126
- self.bm.origin.source_id = "My Source"
127
- self.bm.origin.created.FromDatetime(datetime.now())
128
- self.bm.origin.modified.FromDatetime(datetime.now())
129
-
130
- def _apply_fields(self):
131
- def replace_if_exists(mut_iterable, field_id: rpb.FieldID, item):
132
- for obj in mut_iterable:
133
- if obj.field == field_id:
134
- obj.Clear()
135
- obj.CopyFrom(item)
136
- break
137
- else:
138
- mut_iterable.append(item)
139
-
140
- for field_builder in self.fields.values():
141
- field = field_builder.build()
142
-
143
- if field.id.field_type == rpb.FieldType.GENERIC:
144
- pass
145
- elif field.id.field_type == rpb.FieldType.FILE:
146
- file_field = self.bm.files[field.id.field]
147
- file_field.added.FromDatetime(datetime.now())
148
- file_field.file.source = rpb.CloudFile.Source.EXTERNAL
149
- else:
150
- raise Exception("Unsupported field type")
151
-
152
- if field.user.metadata is not None:
153
- replace_if_exists(
154
- self.bm.basic.fieldmetadata, field.id, field.user.metadata
155
- )
156
- if field.extracted.metadata is not None:
157
- replace_if_exists(
158
- self.bm.field_metadata, field.id, field.extracted.metadata
159
- )
160
- if field.extracted.text is not None:
161
- replace_if_exists(
162
- self.bm.extracted_text, field.id, field.extracted.text
163
- )
164
- if field.extracted.question_answers is not None:
165
- replace_if_exists(
166
- self.bm.question_answers, field.id, field.extracted.question_answers
167
- )
@@ -1,181 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- import dataclasses
22
- from datetime import datetime
23
- from typing import Optional
24
-
25
- from nucliadb_protos import resources_pb2 as rpb
26
-
27
- from .helpers import labels_to_classifications
28
-
29
-
30
- @dataclasses.dataclass
31
- class FieldUser:
32
- metadata: Optional[rpb.UserFieldMetadata] = None
33
-
34
-
35
- @dataclasses.dataclass
36
- class FieldExtracted:
37
- metadata: Optional[rpb.FieldComputedMetadataWrapper] = None
38
- text: Optional[rpb.ExtractedTextWrapper] = None
39
- question_answers: Optional[rpb.FieldQuestionAnswerWrapper] = None
40
-
41
-
42
- @dataclasses.dataclass
43
- class Field:
44
- id: rpb.FieldID
45
- user: FieldUser = dataclasses.field(default_factory=FieldUser)
46
- extracted: FieldExtracted = dataclasses.field(default_factory=FieldExtracted)
47
-
48
-
49
- class FieldBuilder:
50
- def __init__(self, field: str, field_type: rpb.FieldType.ValueType):
51
- self._field_id = rpb.FieldID(field=field, field_type=field_type)
52
- self.__extracted_metadata: Optional[rpb.FieldComputedMetadataWrapper] = None
53
- self.__extracted_text: Optional[rpb.ExtractedTextWrapper] = None
54
- self.__user_metadata: Optional[rpb.UserFieldMetadata] = None
55
- self.__question_answers: Optional[rpb.FieldQuestionAnswerWrapper] = None
56
-
57
- @property
58
- def id(self) -> rpb.FieldID:
59
- return self._field_id
60
-
61
- # properties to generate a default value per pb
62
-
63
- @property
64
- def _extracted_metadata(self) -> rpb.FieldComputedMetadataWrapper:
65
- if self.__extracted_metadata is None:
66
- now = datetime.now()
67
- self.__extracted_metadata = rpb.FieldComputedMetadataWrapper(
68
- field=self._field_id,
69
- )
70
- self.__extracted_metadata.metadata.metadata.last_index.FromDatetime(now)
71
- self.__extracted_metadata.metadata.metadata.last_understanding.FromDatetime(
72
- now
73
- )
74
- self.__extracted_metadata.metadata.metadata.last_extract.FromDatetime(now)
75
- return self.__extracted_metadata
76
-
77
- @property
78
- def _extracted_text(self) -> rpb.ExtractedTextWrapper:
79
- if self.__extracted_text is None:
80
- self.__extracted_text = rpb.ExtractedTextWrapper(field=self._field_id)
81
- return self.__extracted_text
82
-
83
- @property
84
- def _question_answers(self) -> rpb.FieldQuestionAnswerWrapper:
85
- if self.__question_answers is None:
86
- self.__question_answers = rpb.FieldQuestionAnswerWrapper(
87
- field=self._field_id
88
- )
89
- return self.__question_answers
90
-
91
- @property
92
- def _user_metadata(self) -> rpb.UserFieldMetadata:
93
- if self.__user_metadata is None:
94
- self.__user_metadata = rpb.UserFieldMetadata(field=self._field_id)
95
- return self.__user_metadata
96
-
97
- def build(self) -> Field:
98
- field = Field(id=self._field_id)
99
-
100
- if self.__extracted_metadata is not None:
101
- field.extracted.metadata = rpb.FieldComputedMetadataWrapper()
102
- field.extracted.metadata.CopyFrom(self.__extracted_metadata)
103
-
104
- if self.__extracted_text is not None:
105
- field.extracted.text = rpb.ExtractedTextWrapper()
106
- field.extracted.text.CopyFrom(self.__extracted_text)
107
-
108
- if self.__question_answers is not None:
109
- field.extracted.question_answers = rpb.FieldQuestionAnswerWrapper()
110
- field.extracted.question_answers.CopyFrom(self.__question_answers)
111
-
112
- if self.__user_metadata is not None:
113
- field.user.metadata = rpb.UserFieldMetadata()
114
- field.user.metadata.CopyFrom(self.__user_metadata)
115
-
116
- return field
117
-
118
- def with_extracted_labels(self, labelset: str, labels: list[str]):
119
- classifications = labels_to_classifications(labelset, labels)
120
- self._extracted_metadata.metadata.metadata.classifications.extend(
121
- classifications
122
- )
123
-
124
- def with_extracted_text(self, text: str):
125
- self._extracted_text.body.text = text
126
-
127
- def with_extracted_paragraph_metadata(self, paragraph: rpb.Paragraph):
128
- self._extracted_metadata.metadata.metadata.paragraphs.append(paragraph)
129
-
130
- def with_user_entity(self, klass: str, name: str, *, start: int, end: int):
131
- entity = rpb.TokenSplit(
132
- klass=klass,
133
- token=name,
134
- start=start,
135
- end=end,
136
- )
137
- self._user_metadata.token.append(entity)
138
-
139
- def with_extracted_entity(
140
- self, klass: str, name: str, *, positions: list[rpb.Position]
141
- ):
142
- entity = self._extracted_metadata.metadata.metadata.positions[f"{klass}/{name}"]
143
- entity.entity = name
144
- entity.position.extend(positions)
145
-
146
- def with_user_paragraph_labels(self, key: str, labelset: str, labels: list[str]):
147
- classifications = labels_to_classifications(labelset, labels)
148
- pa = rpb.ParagraphAnnotation()
149
- pa.key = key
150
- pa.classifications.extend(classifications)
151
- self._user_metadata.paragraphs.append(pa)
152
-
153
- def add_question_answer(
154
- self,
155
- question: str,
156
- answer: str,
157
- question_lang: str = "en",
158
- question_paragraph_ids: list[str] = [],
159
- answer_lang: str = "en",
160
- answer_paragraph_ids: list[str] = [],
161
- ):
162
- question_pb = rpb.Question(
163
- text=question,
164
- language=question_lang,
165
- ids_paragraphs=question_paragraph_ids,
166
- )
167
- answer_pb = rpb.Answers(
168
- text=answer,
169
- language=answer_lang,
170
- ids_paragraphs=answer_paragraph_ids,
171
- )
172
-
173
- # check if is another answer for an already added question
174
- for question_answer in self._question_answers.question_answers.question_answer:
175
- if question_answer.question == question_pb:
176
- question_answer.answers.append(answer_pb)
177
- return
178
-
179
- question_answer = rpb.QuestionAnswer(question=question_pb)
180
- question_answer.answers.append(answer_pb)
181
- self._question_answers.question_answers.question_answer.append(question_answer)
@@ -1,33 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- from nucliadb_protos.resources_pb2 import Classification
22
-
23
-
24
- def labels_to_classifications(labelset: str, labels: list[str]) -> list[Classification]:
25
- classifications = [
26
- Classification(
27
- labelset=labelset,
28
- label=label,
29
- cancelled_by_user=False,
30
- )
31
- for label in labels
32
- ]
33
- return classifications