nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,80 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import uuid
21
- from unittest.mock import AsyncMock, Mock
22
-
23
- import pytest
24
- from nucliadb_protos.train_pb2 import GetEntitiesRequest
25
- from nucliadb_protos.train_pb2_grpc import TrainStub
26
- from nucliadb_protos.writer_pb2 import GetEntitiesResponse
27
-
28
-
29
- @pytest.fixture(scope="function")
30
- async def entities_manager_mock():
31
- from nucliadb.train import nodes
32
-
33
- original = nodes.EntitiesManager
34
-
35
- mock = Mock()
36
- nodes.EntitiesManager = Mock(return_value=mock)
37
-
38
- yield mock
39
-
40
- nodes.EntitiesManager = original
41
-
42
-
43
- @pytest.mark.asyncio
44
- async def test_get_entities(
45
- train_client: TrainStub,
46
- knowledgebox_ingest: str,
47
- entities_manager_mock: Mock,
48
- ) -> None:
49
- def get_entities_mock(response):
50
- response.groups["group1"].entities["entity1"].value = "PERSON"
51
-
52
- entities_manager_mock.get_entities = AsyncMock(side_effect=get_entities_mock)
53
-
54
- req = GetEntitiesRequest()
55
- req.kb.uuid = knowledgebox_ingest
56
- entities: GetEntitiesResponse = await train_client.GetEntities(req) # type: ignore
57
-
58
- assert entities.groups["group1"].entities["entity1"].value == "PERSON"
59
-
60
-
61
- @pytest.mark.asyncio
62
- async def test_get_entities_kb_not_found(train_client: TrainStub) -> None:
63
- req = GetEntitiesRequest()
64
- req.kb.uuid = str(uuid.uuid4())
65
- entities: GetEntitiesResponse = await train_client.GetEntities(req) # type: ignore
66
- assert entities.status == GetEntitiesResponse.Status.NOTFOUND
67
-
68
-
69
- @pytest.mark.asyncio
70
- async def test_get_entities_error(
71
- train_client: TrainStub, knowledgebox_ingest: str, entities_manager_mock
72
- ) -> None:
73
- entities_manager_mock.get_entities = AsyncMock(
74
- side_effect=Exception("Testing exception on ingest")
75
- )
76
-
77
- req = GetEntitiesRequest()
78
- req.kb.uuid = knowledgebox_ingest
79
- entities: GetEntitiesResponse = await train_client.GetEntities(req) # type: ignore
80
- assert entities.status == GetEntitiesResponse.Status.ERROR
@@ -1,51 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import sys
21
-
22
- import pytest
23
- from aioresponses import aioresponses
24
- from nucliadb_protos.train_pb2 import GetInfoRequest, TrainInfo
25
- from nucliadb_protos.train_pb2_grpc import TrainStub
26
-
27
- VERSION = sys.version_info
28
- PY_GEQ_3_11 = VERSION.major > 3 or VERSION.major == 3 and VERSION.minor >= 11
29
-
30
-
31
- @pytest.mark.asyncio
32
- @pytest.mark.skipif(
33
- PY_GEQ_3_11, reason="aioresponses not compatible with python 3.11 yet"
34
- )
35
- async def test_get_info(
36
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
37
- ) -> None:
38
- req = GetInfoRequest()
39
- req.kb.uuid = knowledgebox_ingest
40
-
41
- with aioresponses() as m:
42
- m.get(
43
- f"http://search.nuclia.svc.cluster.local:8030/api/v1/kb/{knowledgebox_ingest}/counters",
44
- payload={"resources": 4, "paragraphs": 89, "fields": 4, "sentences": 90},
45
- )
46
-
47
- labels: TrainInfo = await train_client.GetInfo(req) # type: ignore
48
- assert labels.fields == 4
49
- assert labels.resources == 4
50
- assert labels.paragraphs == 89
51
- assert labels.sentences == 90
@@ -1,34 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
- from nucliadb_protos.train_pb2 import GetLabelsRequest
22
- from nucliadb_protos.train_pb2_grpc import TrainStub
23
- from nucliadb_protos.writer_pb2 import GetLabelsResponse
24
-
25
-
26
- @pytest.mark.asyncio
27
- async def test_get_ontology(
28
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
29
- ) -> None:
30
- req = GetLabelsRequest()
31
- req.kb.uuid = knowledgebox_ingest
32
-
33
- labels: GetLabelsResponse = await train_client.GetOntology(req) # type: ignore
34
- assert labels.labels.labelset["label1"].labels[0].title == "label1"
@@ -1,63 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import sys
21
-
22
- import pytest
23
- from aioresponses import aioresponses
24
- from nucliadb_protos.train_pb2 import GetLabelsetsCountRequest, LabelsetsCount
25
- from nucliadb_protos.train_pb2_grpc import TrainStub
26
-
27
- VERSION = sys.version_info
28
- PY_GEQ_3_11 = VERSION.major > 3 or VERSION.major == 3 and VERSION.minor >= 11
29
-
30
-
31
- @pytest.mark.asyncio
32
- @pytest.mark.skipif(
33
- PY_GEQ_3_11, reason="aioresponses not compatible with python 3.11 yet"
34
- )
35
- async def test_get_ontology_count(
36
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
37
- ) -> None:
38
- req = GetLabelsetsCountRequest()
39
- req.kb.uuid = knowledgebox_ingest
40
-
41
- with aioresponses() as m:
42
- m.get(
43
- f"http://search.nuclia.svc.cluster.local:8030/api/v1/kb/{knowledgebox_ingest}/search?faceted=/l/my-labelset", # noqa
44
- payload={
45
- "resources": {},
46
- "sentences": {"results": [], "facets": {}},
47
- "paragraphs": {
48
- "results": [],
49
- "facets": {
50
- "/l/my-labelset": {
51
- "facetresults": [
52
- {"tag": "/l/my-labelset/Label 1", "total": 1}
53
- ]
54
- }
55
- },
56
- },
57
- "fulltext": {"results": [], "facets": {}},
58
- },
59
- )
60
-
61
- req.resource_labelsets.append("my-labelset")
62
- labels: LabelsetsCount = await train_client.GetOntologyCount(req) # type: ignore
63
- assert labels.labelsets["/l/my-labelset"].paragraphs["Label 1"] == 1
@@ -1,221 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import asyncio
21
- import base64
22
- import json
23
- import os
24
- from typing import Any
25
- from unittest.mock import AsyncMock, patch
26
-
27
- import aiohttp
28
- import pytest
29
- from nucliadb_protos.dataset_pb2 import ImageClassificationBatch, TaskType, TrainSet
30
- from nucliadb_protos.resources_pb2 import (
31
- CloudFile,
32
- FileExtractedData,
33
- FilePages,
34
- PageStructure,
35
- PageStructurePage,
36
- PageStructureToken,
37
- )
38
- from nucliadb_protos.writer_pb2 import BrokerMessage, OpStatusWriter
39
- from nucliadb_protos.writer_pb2_grpc import WriterStub
40
-
41
- from nucliadb.train import API_PREFIX
42
- from nucliadb.train.api.v1.router import KB_PREFIX
43
- from nucliadb.train.tests.utils import get_batches_from_train_response_stream
44
- from nucliadb_utils.utilities import Utility, get_utility, set_utility
45
-
46
- _dir = os.path.dirname(__file__)
47
- _testdata_dir = os.path.join(_dir, "..", "..", "tests", "testdata")
48
-
49
- INVOICE_FILENAME = os.path.join(_testdata_dir, "invoice.pdf")
50
- INVOICE_SELECTIONS_FILENAME = os.path.join(_testdata_dir, "invoice_selections.json")
51
-
52
-
53
- @pytest.mark.asyncio
54
- @pytest.mark.parametrize("knowledgebox", ["STABLE", "EXPERIMENTAL"], indirect=True)
55
- async def test_generation_image_classification(
56
- train_rest_api: aiohttp.ClientSession,
57
- knowledgebox: str,
58
- image_classification_resource,
59
- ):
60
- async with train_rest_api.get(
61
- f"/{API_PREFIX}/v1/{KB_PREFIX}/{knowledgebox}/trainset"
62
- ) as partitions:
63
- assert partitions.status == 200
64
- data = await partitions.json()
65
- assert len(data["partitions"]) == 1
66
- partition_id = data["partitions"][0]
67
-
68
- trainset = TrainSet()
69
- trainset.type = TaskType.IMAGE_CLASSIFICATION
70
- trainset.batch_size = 10
71
-
72
- await asyncio.sleep(0.1)
73
- async with train_rest_api.post(
74
- f"/{API_PREFIX}/v1/{KB_PREFIX}/{knowledgebox}/trainset/{partition_id}",
75
- data=trainset.SerializeToString(),
76
- ) as response:
77
- assert response.status == 200
78
- batches = []
79
- async for batch in get_batches_from_train_response_stream(
80
- response, ImageClassificationBatch
81
- ):
82
- batches.append(batch)
83
- assert len(batch.data) == 1
84
- selections = json.loads(batch.data[0].selections)
85
- assert selections["width"] == 10
86
- assert selections["height"] == 10
87
- assert len(selections["tokens"]) == 87
88
- assert len(selections["annotations"]) == 18
89
- assert batch.data[0].page_uri == "DUMMY-URI"
90
- assert len(batches) == 1
91
-
92
-
93
- @pytest.fixture
94
- @pytest.mark.asyncio
95
- async def image_classification_resource(
96
- writer_rest_api: aiohttp.ClientSession, nucliadb_grpc: WriterStub, knowledgebox: str
97
- ):
98
- kbid = knowledgebox
99
- field_id = "invoice"
100
-
101
- with open(INVOICE_SELECTIONS_FILENAME) as f:
102
- selections = json.load(f)
103
- assert len(selections["tokens"]) == 87
104
- assert len(selections["annotations"]) == 18
105
-
106
- fieldmetadata = generate_image_classification_fieldmetadata(selections, field_id)
107
-
108
- with open(INVOICE_FILENAME, "rb") as f:
109
- invoice_content = f.read()
110
-
111
- resp = await writer_rest_api.post(
112
- f"/{API_PREFIX}/v1/{KB_PREFIX}/{knowledgebox}/resources",
113
- json={
114
- "title": "My invoice",
115
- "files": {
116
- field_id: {
117
- "file": {
118
- "filename": "invoice.pdf",
119
- "content_type": "application/pdf",
120
- "payload": base64.b64encode(invoice_content).decode(),
121
- }
122
- }
123
- },
124
- "fieldmetadata": fieldmetadata,
125
- },
126
- )
127
- assert resp.status == 201
128
- body = await resp.json()
129
- rid = body["uuid"]
130
-
131
- broker_message = generate_image_classification_broker_message(
132
- selections, kbid, rid, field_id
133
- )
134
-
135
- original_storage = get_utility(Utility.STORAGE)
136
- set_utility(Utility.STORAGE, AsyncMock())
137
- mock_set = AsyncMock(return_value=None)
138
- mock_get = AsyncMock(return_value=broker_message.file_extracted_data[0])
139
- with (
140
- patch(
141
- "nucliadb.ingest.fields.file.File.set_file_extracted_data", new=mock_set
142
- ) as _,
143
- patch(
144
- "nucliadb.ingest.fields.file.File.get_file_extracted_data", new=mock_get
145
- ) as _,
146
- ):
147
- resp = await nucliadb_grpc.ProcessMessage( # type: ignore
148
- iter([broker_message]), timeout=10, wait_for_ready=True
149
- )
150
- assert resp.status == OpStatusWriter.Status.OK
151
- yield
152
-
153
- set_utility(Utility.STORAGE, original_storage)
154
-
155
-
156
- def generate_image_classification_fieldmetadata(
157
- selections: dict, field_id: str
158
- ) -> list[dict[str, Any]]:
159
- selections_by_page = {} # type: ignore
160
- for annotation in selections["annotations"]:
161
- page_selections = selections_by_page.setdefault(annotation["page"], [])
162
- page_selections.append(
163
- {
164
- "label": annotation["label"]["text"],
165
- "top": annotation["bounds"]["top"],
166
- "left": annotation["bounds"]["left"],
167
- "right": annotation["bounds"]["right"],
168
- "bottom": annotation["bounds"]["bottom"],
169
- "token_ids": [token["tokenIndex"] for token in annotation["tokens"]],
170
- }
171
- )
172
-
173
- fieldmetadata = {
174
- "field": {"field": field_id, "field_type": "file"},
175
- "selections": [
176
- {
177
- "page": page,
178
- "visual": selections,
179
- }
180
- for page, selections in selections_by_page.items()
181
- ],
182
- }
183
- return [fieldmetadata]
184
-
185
-
186
- def generate_image_classification_broker_message(
187
- selections: dict, kbid: str, rid: str, field_id: str
188
- ) -> BrokerMessage:
189
- bm = BrokerMessage(
190
- kbid=kbid,
191
- uuid=rid,
192
- source=BrokerMessage.MessageSource.PROCESSOR,
193
- file_extracted_data=[
194
- FileExtractedData(
195
- field=field_id,
196
- file_pages_previews=FilePages(
197
- pages=[
198
- CloudFile(uri="DUMMY-URI"),
199
- ],
200
- structures=[
201
- PageStructure(
202
- page=PageStructurePage(width=10, height=10),
203
- tokens=[
204
- PageStructureToken(
205
- x=token["x"],
206
- y=token["y"],
207
- width=token["width"],
208
- height=token["height"],
209
- text=token["text"],
210
- line=0,
211
- )
212
- for token in selections["tokens"]
213
- ],
214
- )
215
- ],
216
- ),
217
- )
218
- ],
219
- )
220
-
221
- return bm
@@ -1,39 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
- from nucliadb_protos.train_pb2 import GetFieldsRequest
22
- from nucliadb_protos.train_pb2_grpc import TrainStub
23
-
24
-
25
- @pytest.mark.asyncio
26
- async def test_list_fields(
27
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
28
- ) -> None:
29
- req = GetFieldsRequest()
30
- req.kb.uuid = knowledgebox_ingest
31
- req.metadata.entities = True
32
- req.metadata.labels = True
33
- req.metadata.text = True
34
- req.metadata.vector = True
35
- count = 0
36
- async for _ in train_client.GetParagraphs(req): # type: ignore
37
- count += 1
38
-
39
- assert count == 30
@@ -1,73 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
- from nucliadb_protos.train_pb2 import GetParagraphsRequest
22
- from nucliadb_protos.train_pb2_grpc import TrainStub
23
-
24
-
25
- @pytest.mark.asyncio
26
- async def test_list_paragraphs(
27
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
28
- ) -> None:
29
- req = GetParagraphsRequest()
30
- req.kb.uuid = knowledgebox_ingest
31
- req.metadata.entities = True
32
- req.metadata.labels = True
33
- req.metadata.text = True
34
- req.metadata.vector = True
35
- count = 0
36
- async for _ in train_client.GetParagraphs(req): # type: ignore
37
- count += 1
38
-
39
- assert count == 30
40
-
41
-
42
- @pytest.mark.asyncio
43
- async def test_list_paragraphs_shows_ners_with_positions(
44
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
45
- ) -> None:
46
- req = GetParagraphsRequest()
47
- req.kb.uuid = knowledgebox_ingest
48
- req.metadata.entities = True
49
- req.metadata.labels = True
50
- req.metadata.text = True
51
- req.metadata.vector = True
52
-
53
- found_barcelona = found_manresa = False
54
- async for paragraph in train_client.GetParagraphs(req): # type: ignore
55
- if "Barcelona" in paragraph.metadata.text:
56
- found_barcelona = True
57
- assert paragraph.metadata.entities == {"Barcelona": "CITY"}
58
- positions = paragraph.metadata.entity_positions["CITY/Barcelona"]
59
- assert positions.entity == "Barcelona"
60
- assert len(positions.positions) == 1
61
- assert positions.positions[0].start == 43
62
- assert positions.positions[0].end == 52
63
- elif "Manresa" in paragraph.metadata.text:
64
- found_manresa = True
65
- assert paragraph.metadata.entities == {"Manresa": "CITY"}
66
- positions = paragraph.metadata.entity_positions["CITY/Manresa"]
67
- assert positions.entity == "Manresa"
68
- assert len(positions.positions) == 2
69
- assert positions.positions[0].start == 22
70
- assert positions.positions[0].end == 29
71
- assert positions.positions[1].start == 38
72
- assert positions.positions[1].end == 45
73
- assert found_manresa and found_barcelona
@@ -1,39 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
- from nucliadb_protos.train_pb2 import GetResourcesRequest
22
- from nucliadb_protos.train_pb2_grpc import TrainStub
23
-
24
-
25
- @pytest.mark.asyncio
26
- async def test_list_resource(
27
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
28
- ) -> None:
29
- req = GetResourcesRequest()
30
- req.kb.uuid = knowledgebox_ingest
31
- req.metadata.entities = True
32
- req.metadata.labels = True
33
- req.metadata.text = True
34
- req.metadata.vector = True
35
- count = 0
36
- async for _ in train_client.GetResources(req): # type: ignore
37
- count += 1
38
-
39
- assert count == 10
@@ -1,71 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
- from nucliadb_protos.train_pb2 import GetSentencesRequest
22
- from nucliadb_protos.train_pb2_grpc import TrainStub
23
-
24
-
25
- @pytest.mark.asyncio
26
- async def test_list_sentences(
27
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
28
- ) -> None:
29
- req = GetSentencesRequest()
30
- req.kb.uuid = knowledgebox_ingest
31
- req.metadata.entities = True
32
- req.metadata.labels = True
33
- req.metadata.text = True
34
- req.metadata.vector = True
35
- count = 0
36
-
37
- async for _ in train_client.GetSentences(req): # type: ignore
38
- count += 1
39
-
40
- assert count == 40
41
-
42
-
43
- @pytest.mark.asyncio
44
- @pytest.mark.parametrize("knowledgebox", ["STABLE", "EXPERIMENTAL"], indirect=True)
45
- async def test_list_sentences_shows_ners_with_positions(
46
- train_client: TrainStub, knowledgebox: str, test_pagination_resources
47
- ) -> None:
48
- req = GetSentencesRequest()
49
- req.kb.uuid = knowledgebox
50
- req.metadata.entities = True
51
- async for sentence in train_client.GetSentences(req): # type: ignore
52
- if "Barcelona" in sentence.metadata.text:
53
- assert sentence.metadata.entities == {"Barcelona": "CITY"}
54
- positions = sentence.metadata.entity_positions["CITY/Barcelona"]
55
- assert positions.entity == "Barcelona"
56
- assert len(positions.positions) == 1
57
- assert positions.positions[0].start == 43
58
- assert positions.positions[0].end == 52
59
- elif "Manresa" in sentence.metadata.text:
60
- assert sentence.metadata.entities == {"Manresa": "CITY"}
61
- positions = sentence.metadata.entity_positions["CITY/Manresa"]
62
- assert positions.entity == "Manresa"
63
- assert len(positions.positions) == 2
64
- assert positions.positions[0].start == 22
65
- assert positions.positions[0].end == 29
66
- assert positions.positions[1].start == 38
67
- assert positions.positions[1].end == 45
68
- else:
69
- # Other sentences should not have entities nor positions
70
- assert sentence.metadata.entities == {}
71
- assert sentence.metadata.entity_positions == {}