nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,136 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- from unittest.mock import AsyncMock, MagicMock, patch
21
-
22
- import pytest
23
-
24
- from nucliadb import purge
25
- from nucliadb.common.cluster.exceptions import NodeError, ShardNotFound
26
-
27
- pytestmark = pytest.mark.asyncio
28
-
29
-
30
- class DataIterator:
31
- def __init__(self, data):
32
- self.data = data
33
-
34
- def __call__(self, *args, **kwargs):
35
- return self
36
-
37
- async def __aiter__(self):
38
- for item in self.data:
39
- yield item
40
-
41
-
42
- @pytest.fixture
43
- def keys():
44
- yield []
45
-
46
-
47
- @pytest.fixture
48
- def txn(keys):
49
- mock = AsyncMock()
50
- mock.keys = DataIterator(keys)
51
- yield mock
52
-
53
-
54
- @pytest.fixture
55
- def driver(txn):
56
- mock = AsyncMock()
57
- cm = AsyncMock()
58
- cm.__aenter__.return_value = txn
59
- mock.transaction = MagicMock(return_value=cm)
60
- yield mock
61
-
62
-
63
- @pytest.fixture
64
- def storage():
65
- mock = AsyncMock()
66
- mock.delete_kb.return_value = True, False
67
- yield mock
68
-
69
-
70
- @pytest.fixture(autouse=True)
71
- def kb():
72
- mock = AsyncMock()
73
- with patch("nucliadb.purge.KnowledgeBox", mock):
74
- yield mock
75
-
76
-
77
- async def test_purge(kb, keys, driver):
78
- keys.append("/pathto/kbid")
79
-
80
- await purge.purge_kb(driver)
81
-
82
- kb.purge.assert_called_once_with(driver, "kbid")
83
- driver.begin.return_value.commit.assert_called_once()
84
-
85
-
86
- async def test_purge_handle_errors(kb, keys, driver):
87
- keys.append("/failed")
88
- keys.append("/pathto/failed")
89
- keys.append("/pathto/failed")
90
- keys.append("/pathto/failed")
91
- keys.append("/pathto/failed")
92
-
93
- kb.purge.side_effect = [ShardNotFound(), NodeError(), Exception(), None]
94
- driver.begin.return_value.delete.side_effect = Exception()
95
-
96
- await purge.purge_kb(driver)
97
-
98
- driver.begin.return_value.commit.assert_not_called()
99
- driver.begin.return_value.abort.assert_called_once()
100
-
101
-
102
- async def test_purge_kb_storage(
103
- keys,
104
- driver,
105
- storage,
106
- ):
107
- keys.append("/pathto/kbid")
108
-
109
- await purge.purge_kb_storage(driver, storage)
110
-
111
- driver.begin.return_value.commit.assert_called_once()
112
-
113
-
114
- async def test_purge_kb_storage_handle_errors(keys, driver, storage):
115
- keys.append("/failed")
116
- keys.append("/pathto/failed")
117
-
118
- driver.begin.return_value.delete.side_effect = Exception()
119
-
120
- await purge.purge_kb_storage(driver, storage)
121
-
122
- driver.begin.return_value.commit.assert_not_called()
123
-
124
-
125
- async def test_main(driver, storage):
126
- with (
127
- patch("nucliadb.purge.purge_kb", AsyncMock()) as purge_kb,
128
- patch("nucliadb.purge.purge_kb_storage", AsyncMock()) as purge_kb_storage,
129
- patch("nucliadb.purge.get_storage", return_value=storage),
130
- patch("nucliadb.purge.setup_driver", return_value=driver),
131
- patch("nucliadb.purge.setup_cluster", return_value=driver),
132
- ):
133
- await purge.main()
134
-
135
- purge_kb.assert_called_once_with(driver)
136
- purge_kb_storage.assert_called_once_with(driver, storage)
@@ -1,74 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import uuid
21
- from datetime import datetime
22
-
23
- from nucliadb_protos.writer_pb2 import BrokerMessage, OpStatusWriter
24
- from nucliadb_protos.writer_pb2_grpc import WriterStub
25
-
26
- from nucliadb_protos import resources_pb2 as rpb
27
-
28
-
29
- def broker_resource(
30
- kbid: str, rid=None, slug=None, title=None, summary=None
31
- ) -> BrokerMessage:
32
- """
33
- Returns a broker resource with barebones metadata.
34
- """
35
- rid = rid or str(uuid.uuid4())
36
- slug = slug or f"{rid}slug1"
37
- bm: BrokerMessage = BrokerMessage(
38
- kbid=kbid,
39
- uuid=rid,
40
- slug=slug,
41
- type=BrokerMessage.AUTOCOMMIT,
42
- )
43
- title = title or "Title Resource"
44
- summary = summary or "Summary of document"
45
- bm.basic.icon = "text/plain"
46
- bm.basic.title = title
47
- bm.basic.summary = summary
48
- bm.basic.thumbnail = "doc"
49
- bm.basic.layout = "default"
50
- bm.basic.metadata.useful = True
51
- bm.basic.metadata.language = "es"
52
- bm.basic.created.FromDatetime(datetime.now())
53
- bm.basic.modified.FromDatetime(datetime.now())
54
- bm.origin.source = rpb.Origin.Source.WEB
55
-
56
- etw = rpb.ExtractedTextWrapper()
57
- etw.body.text = title
58
- etw.field.field = "title"
59
- etw.field.field_type = rpb.FieldType.GENERIC
60
- bm.extracted_text.append(etw)
61
-
62
- etw = rpb.ExtractedTextWrapper()
63
- etw.body.text = summary
64
- etw.field.field = "summary"
65
- etw.field.field_type = rpb.FieldType.GENERIC
66
- bm.extracted_text.append(etw)
67
-
68
- bm.source = BrokerMessage.MessageSource.WRITER
69
- return bm
70
-
71
-
72
- async def inject_message(writer: WriterStub, message: BrokerMessage):
73
- resp = await writer.ProcessMessage([message]) # type: ignore
74
- assert resp.status == OpStatusWriter.Status.OK
@@ -1,44 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from unittest.mock import AsyncMock, Mock
21
-
22
-
23
- def get_mocked_session(
24
- http_method: str, status: int, text=None, json=None, read=None, context_manager=True
25
- ):
26
- response = Mock(status=status)
27
- if text is not None:
28
- response.text = AsyncMock(return_value=text)
29
- if json is not None:
30
- response.json = AsyncMock(return_value=json)
31
- if read is not None:
32
- if isinstance(read, str):
33
- read = read.encode()
34
- response.read = AsyncMock(return_value=read)
35
- if context_manager:
36
- # For when async with self.session.post() as response: is called
37
- session = Mock()
38
- http_method_mock = AsyncMock(__aenter__=AsyncMock(return_value=response))
39
- getattr(session, http_method.lower()).return_value = http_method_mock
40
- else:
41
- # For when await self.session.post() is called
42
- session = AsyncMock()
43
- getattr(session, http_method.lower()).return_value = response
44
- return session
@@ -1,171 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- from datetime import datetime
22
- from typing import Optional
23
- from uuid import uuid4
24
-
25
- from nucliadb_protos import resources_pb2 as rpb
26
- from nucliadb_protos import writer_pb2 as wpb
27
-
28
- from .fields import FieldBuilder
29
- from .helpers import labels_to_classifications
30
-
31
-
32
- class BrokerMessageBuilder:
33
- """Helper to deal with broker message creation. It allows customized
34
- creation of broker messages with sensible defaults and default title and
35
- summary.
36
-
37
- """
38
-
39
- def __init__(
40
- self,
41
- *,
42
- kbid: str,
43
- rid: Optional[str] = None,
44
- slug: Optional[str] = None,
45
- ):
46
- self.bm = wpb.BrokerMessage()
47
- self.fields: dict[tuple[str, rpb.FieldType.ValueType], FieldBuilder] = {}
48
-
49
- self.bm.kbid = kbid
50
- self.bm.type = wpb.BrokerMessage.AUTOCOMMIT
51
-
52
- # if first BM comes from PROCESSOR, it'll be ignored as it's out of order
53
- self.bm.source = wpb.BrokerMessage.MessageSource.WRITER
54
-
55
- if rid is None:
56
- rid = str(uuid4())
57
- self.bm.uuid = rid
58
-
59
- if slug is None:
60
- slug = f"{rid}-slug"
61
- self.bm.slug = slug
62
-
63
- self._default_basic()
64
- self._default_origin()
65
-
66
- def build(self) -> wpb.BrokerMessage:
67
- self._apply_fields()
68
- return self.bm
69
-
70
- def add_field_builder(self, field: FieldBuilder):
71
- self.fields[(field.id.field, field.id.field_type)] = field
72
-
73
- def field_builder(
74
- self, field_id: str, field_type: rpb.FieldType.ValueType
75
- ) -> FieldBuilder:
76
- return self.fields[(field_id, field_type)]
77
-
78
- def with_title(self, title: str):
79
- title_builder = FieldBuilder("title", rpb.FieldType.GENERIC)
80
- title_builder.with_extracted_text(title)
81
- # we do this to writer BMs in write resource API endpoint
82
- title_builder.with_extracted_paragraph_metadata(
83
- rpb.Paragraph(
84
- start=0,
85
- end=len(title),
86
- kind=rpb.Paragraph.TypeParagraph.TITLE,
87
- )
88
- )
89
- self.bm.basic.title = title
90
- self.add_field_builder(title_builder)
91
-
92
- def with_summary(self, summary: str):
93
- summary_builder = FieldBuilder("summary", rpb.FieldType.GENERIC)
94
- summary_builder.with_extracted_text(summary)
95
- # we do this to writer BMs in write resource API endpoint
96
- summary_builder.with_extracted_paragraph_metadata(
97
- rpb.Paragraph(
98
- start=0,
99
- end=len(summary),
100
- kind=rpb.Paragraph.TypeParagraph.DESCRIPTION,
101
- )
102
- )
103
- self.bm.basic.summary = summary
104
- self.add_field_builder(summary_builder)
105
-
106
- def with_resource_labels(self, labelset: str, labels: list[str]):
107
- classifications = labels_to_classifications(labelset, labels)
108
- self.bm.basic.usermetadata.classifications.extend(classifications)
109
-
110
- def _default_basic(self):
111
- self.bm.basic.icon = "text/plain"
112
- self.bm.basic.thumbnail = "doc"
113
- self.bm.basic.layout = "default"
114
- self.bm.basic.metadata.useful = True
115
- self.bm.basic.metadata.language = "en"
116
- self.bm.basic.metadata.status = rpb.Metadata.Status.PROCESSED
117
- self.bm.basic.metadata.metadata["key"] = "value"
118
- self.bm.basic.created.FromDatetime(datetime.now())
119
- self.bm.basic.modified.FromDatetime(datetime.now())
120
-
121
- self.with_title("Default test resource title")
122
- self.with_summary("Default test resource summary")
123
-
124
- def _default_origin(self):
125
- self.bm.origin.source = rpb.Origin.Source.API
126
- self.bm.origin.source_id = "My Source"
127
- self.bm.origin.created.FromDatetime(datetime.now())
128
- self.bm.origin.modified.FromDatetime(datetime.now())
129
-
130
- def _apply_fields(self):
131
- def replace_if_exists(mut_iterable, field_id: rpb.FieldID, item):
132
- for obj in mut_iterable:
133
- if obj.field == field_id:
134
- obj.Clear()
135
- obj.CopyFrom(item)
136
- break
137
- else:
138
- mut_iterable.append(item)
139
-
140
- for field_builder in self.fields.values():
141
- field = field_builder.build()
142
-
143
- if field.id.field_type == rpb.FieldType.GENERIC:
144
- pass
145
- elif field.id.field_type == rpb.FieldType.FILE:
146
- file_field = self.bm.files[field.id.field]
147
- file_field.added.FromDatetime(datetime.now())
148
- file_field.file.source = rpb.CloudFile.Source.EXTERNAL
149
- else:
150
- raise Exception("Unsupported field type")
151
-
152
- if field.user.metadata is not None:
153
- replace_if_exists(
154
- self.bm.basic.fieldmetadata, field.id, field.user.metadata
155
- )
156
- if field.extracted.metadata is not None:
157
- replace_if_exists(
158
- self.bm.field_metadata, field.id, field.extracted.metadata
159
- )
160
- if field.extracted.text is not None:
161
- replace_if_exists(
162
- self.bm.extracted_text, field.id, field.extracted.text
163
- )
164
- if field.extracted.vectors is not None:
165
- replace_if_exists(
166
- self.bm.field_vectors, field.id, field.extracted.vectors
167
- )
168
- if field.extracted.question_answers is not None:
169
- replace_if_exists(
170
- self.bm.question_answers, field.id, field.extracted.question_answers
171
- )
@@ -1,197 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- import dataclasses
22
- from datetime import datetime
23
- from typing import Optional
24
-
25
- from nucliadb_protos import resources_pb2 as rpb
26
- from nucliadb_protos import utils_pb2
27
-
28
- from .helpers import labels_to_classifications
29
-
30
-
31
- @dataclasses.dataclass
32
- class FieldUser:
33
- metadata: Optional[rpb.UserFieldMetadata] = None
34
-
35
-
36
- @dataclasses.dataclass
37
- class FieldExtracted:
38
- metadata: Optional[rpb.FieldComputedMetadataWrapper] = None
39
- text: Optional[rpb.ExtractedTextWrapper] = None
40
- vectors: Optional[rpb.ExtractedVectorsWrapper] = None
41
- question_answers: Optional[rpb.FieldQuestionAnswerWrapper] = None
42
-
43
-
44
- @dataclasses.dataclass
45
- class Field:
46
- id: rpb.FieldID
47
- user: FieldUser = dataclasses.field(default_factory=FieldUser)
48
- extracted: FieldExtracted = dataclasses.field(default_factory=FieldExtracted)
49
-
50
-
51
- class FieldBuilder:
52
- def __init__(self, field: str, field_type: rpb.FieldType.ValueType):
53
- self._field_id = rpb.FieldID(field=field, field_type=field_type)
54
- self.__extracted_metadata: Optional[rpb.FieldComputedMetadataWrapper] = None
55
- self.__extracted_text: Optional[rpb.ExtractedTextWrapper] = None
56
- self.__extracted_vectors: Optional[rpb.ExtractedVectorsWrapper] = None
57
- self.__user_metadata: Optional[rpb.UserFieldMetadata] = None
58
- self.__question_answers: Optional[rpb.FieldQuestionAnswerWrapper] = None
59
-
60
- @property
61
- def id(self) -> rpb.FieldID:
62
- return self._field_id
63
-
64
- # properties to generate a default value per pb
65
-
66
- @property
67
- def _extracted_metadata(self) -> rpb.FieldComputedMetadataWrapper:
68
- if self.__extracted_metadata is None:
69
- now = datetime.now()
70
- self.__extracted_metadata = rpb.FieldComputedMetadataWrapper(
71
- field=self._field_id,
72
- )
73
- self.__extracted_metadata.metadata.metadata.last_index.FromDatetime(now)
74
- self.__extracted_metadata.metadata.metadata.last_understanding.FromDatetime(
75
- now
76
- )
77
- self.__extracted_metadata.metadata.metadata.last_extract.FromDatetime(now)
78
- return self.__extracted_metadata
79
-
80
- @property
81
- def _extracted_text(self) -> rpb.ExtractedTextWrapper:
82
- if self.__extracted_text is None:
83
- self.__extracted_text = rpb.ExtractedTextWrapper(field=self._field_id)
84
- return self.__extracted_text
85
-
86
- @property
87
- def _extracted_vectors(self) -> rpb.ExtractedVectorsWrapper:
88
- if self.__extracted_vectors is None:
89
- self.__extracted_vectors = rpb.ExtractedVectorsWrapper(field=self._field_id)
90
- return self.__extracted_vectors
91
-
92
- @property
93
- def _question_answers(self) -> rpb.FieldQuestionAnswerWrapper:
94
- if self.__question_answers is None:
95
- self.__question_answers = rpb.FieldQuestionAnswerWrapper(
96
- field=self._field_id
97
- )
98
- return self.__question_answers
99
-
100
- @property
101
- def _user_metadata(self) -> rpb.UserFieldMetadata:
102
- if self.__user_metadata is None:
103
- self.__user_metadata = rpb.UserFieldMetadata(field=self._field_id)
104
- return self.__user_metadata
105
-
106
- def build(self) -> Field:
107
- field = Field(id=self._field_id)
108
-
109
- if self.__extracted_metadata is not None:
110
- field.extracted.metadata = rpb.FieldComputedMetadataWrapper()
111
- field.extracted.metadata.CopyFrom(self.__extracted_metadata)
112
-
113
- if self.__extracted_text is not None:
114
- field.extracted.text = rpb.ExtractedTextWrapper()
115
- field.extracted.text.CopyFrom(self.__extracted_text)
116
-
117
- if self.__extracted_vectors is not None:
118
- field.extracted.vectors = rpb.ExtractedVectorsWrapper()
119
- field.extracted.vectors.CopyFrom(self.__extracted_vectors)
120
-
121
- if self.__question_answers is not None:
122
- field.extracted.question_answers = rpb.FieldQuestionAnswerWrapper()
123
- field.extracted.question_answers.CopyFrom(self.__question_answers)
124
-
125
- if self.__user_metadata is not None:
126
- field.user.metadata = rpb.UserFieldMetadata()
127
- field.user.metadata.CopyFrom(self.__user_metadata)
128
-
129
- return field
130
-
131
- def with_extracted_labels(self, labelset: str, labels: list[str]):
132
- classifications = labels_to_classifications(labelset, labels)
133
- self._extracted_metadata.metadata.metadata.classifications.extend(
134
- classifications
135
- )
136
-
137
- def with_extracted_text(self, text: str):
138
- self._extracted_text.body.text = text
139
-
140
- def with_extracted_vectors(self, vectors: list[utils_pb2.Vector]):
141
- self._extracted_vectors.vectors.vectors.vectors.extend(vectors)
142
-
143
- def with_extracted_paragraph_metadata(self, paragraph: rpb.Paragraph):
144
- self._extracted_metadata.metadata.metadata.paragraphs.append(paragraph)
145
-
146
- def with_user_entity(self, klass: str, name: str, *, start: int, end: int):
147
- entity = rpb.TokenSplit(
148
- klass=klass,
149
- token=name,
150
- start=start,
151
- end=end,
152
- )
153
- self._user_metadata.token.append(entity)
154
-
155
- def with_extracted_entity(
156
- self, klass: str, name: str, *, positions: list[rpb.Position]
157
- ):
158
- entity = self._extracted_metadata.metadata.metadata.positions[f"{klass}/{name}"]
159
- entity.entity = name
160
- entity.position.extend(positions)
161
-
162
- def with_user_paragraph_labels(self, key: str, labelset: str, labels: list[str]):
163
- classifications = labels_to_classifications(labelset, labels)
164
- pa = rpb.ParagraphAnnotation()
165
- pa.key = key
166
- pa.classifications.extend(classifications)
167
- self._user_metadata.paragraphs.append(pa)
168
-
169
- def add_question_answer(
170
- self,
171
- question: str,
172
- answer: str,
173
- question_lang: str = "en",
174
- question_paragraph_ids: list[str] = [],
175
- answer_lang: str = "en",
176
- answer_paragraph_ids: list[str] = [],
177
- ):
178
- question_pb = rpb.Question(
179
- text=question,
180
- language=question_lang,
181
- ids_paragraphs=question_paragraph_ids,
182
- )
183
- answer_pb = rpb.Answers(
184
- text=answer,
185
- language=answer_lang,
186
- ids_paragraphs=answer_paragraph_ids,
187
- )
188
-
189
- # check if is another answer for an already added question
190
- for question_answer in self._question_answers.question_answers.question_answer:
191
- if question_answer.question == question_pb:
192
- question_answer.answers.append(answer_pb)
193
- return
194
-
195
- question_answer = rpb.QuestionAnswer(question=question_pb)
196
- question_answer.answers.append(answer_pb)
197
- self._question_answers.question_answers.question_answer.append(question_answer)
@@ -1,33 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- from nucliadb_protos.resources_pb2 import Classification
22
-
23
-
24
- def labels_to_classifications(labelset: str, labels: list[str]) -> list[Classification]:
25
- classifications = [
26
- Classification(
27
- labelset=labelset,
28
- label=label,
29
- cancelled_by_user=False,
30
- )
31
- for label in labels
32
- ]
33
- return classifications