nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,80 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import uuid
21
- from unittest.mock import AsyncMock, Mock
22
-
23
- import pytest
24
- from nucliadb_protos.train_pb2 import GetEntitiesRequest
25
- from nucliadb_protos.train_pb2_grpc import TrainStub
26
- from nucliadb_protos.writer_pb2 import GetEntitiesResponse
27
-
28
-
29
- @pytest.fixture(scope="function")
30
- async def entities_manager_mock():
31
- from nucliadb.train import nodes
32
-
33
- original = nodes.EntitiesManager
34
-
35
- mock = Mock()
36
- nodes.EntitiesManager = Mock(return_value=mock)
37
-
38
- yield mock
39
-
40
- nodes.EntitiesManager = original
41
-
42
-
43
- @pytest.mark.asyncio
44
- async def test_get_entities(
45
- train_client: TrainStub,
46
- knowledgebox_ingest: str,
47
- entities_manager_mock: Mock,
48
- ) -> None:
49
- def get_entities_mock(response):
50
- response.groups["group1"].entities["entity1"].value = "PERSON"
51
-
52
- entities_manager_mock.get_entities = AsyncMock(side_effect=get_entities_mock)
53
-
54
- req = GetEntitiesRequest()
55
- req.kb.uuid = knowledgebox_ingest
56
- entities: GetEntitiesResponse = await train_client.GetEntities(req) # type: ignore
57
-
58
- assert entities.groups["group1"].entities["entity1"].value == "PERSON"
59
-
60
-
61
- @pytest.mark.asyncio
62
- async def test_get_entities_kb_not_found(train_client: TrainStub) -> None:
63
- req = GetEntitiesRequest()
64
- req.kb.uuid = str(uuid.uuid4())
65
- entities: GetEntitiesResponse = await train_client.GetEntities(req) # type: ignore
66
- assert entities.status == GetEntitiesResponse.Status.NOTFOUND
67
-
68
-
69
- @pytest.mark.asyncio
70
- async def test_get_entities_error(
71
- train_client: TrainStub, knowledgebox_ingest: str, entities_manager_mock
72
- ) -> None:
73
- entities_manager_mock.get_entities = AsyncMock(
74
- side_effect=Exception("Testing exception on ingest")
75
- )
76
-
77
- req = GetEntitiesRequest()
78
- req.kb.uuid = knowledgebox_ingest
79
- entities: GetEntitiesResponse = await train_client.GetEntities(req) # type: ignore
80
- assert entities.status == GetEntitiesResponse.Status.ERROR
@@ -1,51 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import sys
21
-
22
- import pytest
23
- from aioresponses import aioresponses
24
- from nucliadb_protos.train_pb2 import GetInfoRequest, TrainInfo
25
- from nucliadb_protos.train_pb2_grpc import TrainStub
26
-
27
- VERSION = sys.version_info
28
- PY_GEQ_3_11 = VERSION.major > 3 or VERSION.major == 3 and VERSION.minor >= 11
29
-
30
-
31
- @pytest.mark.asyncio
32
- @pytest.mark.skipif(
33
- PY_GEQ_3_11, reason="aioresponses not compatible with python 3.11 yet"
34
- )
35
- async def test_get_info(
36
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
37
- ) -> None:
38
- req = GetInfoRequest()
39
- req.kb.uuid = knowledgebox_ingest
40
-
41
- with aioresponses() as m:
42
- m.get(
43
- f"http://search.nuclia.svc.cluster.local:8030/api/v1/kb/{knowledgebox_ingest}/counters",
44
- payload={"resources": 4, "paragraphs": 89, "fields": 4, "sentences": 90},
45
- )
46
-
47
- labels: TrainInfo = await train_client.GetInfo(req) # type: ignore
48
- assert labels.fields == 4
49
- assert labels.resources == 4
50
- assert labels.paragraphs == 89
51
- assert labels.sentences == 90
@@ -1,34 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
- from nucliadb_protos.train_pb2 import GetLabelsRequest
22
- from nucliadb_protos.train_pb2_grpc import TrainStub
23
- from nucliadb_protos.writer_pb2 import GetLabelsResponse
24
-
25
-
26
- @pytest.mark.asyncio
27
- async def test_get_ontology(
28
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
29
- ) -> None:
30
- req = GetLabelsRequest()
31
- req.kb.uuid = knowledgebox_ingest
32
-
33
- labels: GetLabelsResponse = await train_client.GetOntology(req) # type: ignore
34
- assert labels.labels.labelset["label1"].labels[0].title == "label1"
@@ -1,63 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import sys
21
-
22
- import pytest
23
- from aioresponses import aioresponses
24
- from nucliadb_protos.train_pb2 import GetLabelsetsCountRequest, LabelsetsCount
25
- from nucliadb_protos.train_pb2_grpc import TrainStub
26
-
27
- VERSION = sys.version_info
28
- PY_GEQ_3_11 = VERSION.major > 3 or VERSION.major == 3 and VERSION.minor >= 11
29
-
30
-
31
- @pytest.mark.asyncio
32
- @pytest.mark.skipif(
33
- PY_GEQ_3_11, reason="aioresponses not compatible with python 3.11 yet"
34
- )
35
- async def test_get_ontology_count(
36
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
37
- ) -> None:
38
- req = GetLabelsetsCountRequest()
39
- req.kb.uuid = knowledgebox_ingest
40
-
41
- with aioresponses() as m:
42
- m.get(
43
- f"http://search.nuclia.svc.cluster.local:8030/api/v1/kb/{knowledgebox_ingest}/search?faceted=/l/my-labelset", # noqa
44
- payload={
45
- "resources": {},
46
- "sentences": {"results": [], "facets": {}},
47
- "paragraphs": {
48
- "results": [],
49
- "facets": {
50
- "/l/my-labelset": {
51
- "facetresults": [
52
- {"tag": "/l/my-labelset/Label 1", "total": 1}
53
- ]
54
- }
55
- },
56
- },
57
- "fulltext": {"results": [], "facets": {}},
58
- },
59
- )
60
-
61
- req.resource_labelsets.append("my-labelset")
62
- labels: LabelsetsCount = await train_client.GetOntologyCount(req) # type: ignore
63
- assert labels.labelsets["/l/my-labelset"].paragraphs["Label 1"] == 1
@@ -1,222 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import asyncio
21
- import base64
22
- import json
23
- import os
24
- from typing import Any
25
- from unittest.mock import AsyncMock, patch
26
-
27
- import aiohttp
28
- import pytest
29
- from nucliadb_protos.dataset_pb2 import ImageClassificationBatch, TaskType, TrainSet
30
- from nucliadb_protos.resources_pb2 import (
31
- CloudFile,
32
- FileExtractedData,
33
- FilePages,
34
- PageStructure,
35
- PageStructurePage,
36
- PageStructureToken,
37
- )
38
- from nucliadb_protos.writer_pb2 import BrokerMessage, OpStatusWriter
39
- from nucliadb_protos.writer_pb2_grpc import WriterStub
40
-
41
- from nucliadb.train import API_PREFIX
42
- from nucliadb.train.api.v1.router import KB_PREFIX
43
- from nucliadb.train.tests.utils import get_batches_from_train_response_stream
44
- from nucliadb_utils.utilities import Utility, get_utility, set_utility
45
-
46
- _dir = os.path.dirname(__file__)
47
- _testdata_dir = os.path.join(_dir, "..", "..", "tests", "testdata")
48
-
49
- INVOICE_FILENAME = os.path.join(_testdata_dir, "invoice.pdf")
50
- INVOICE_SELECTIONS_FILENAME = os.path.join(_testdata_dir, "invoice_selections.json")
51
-
52
-
53
- @pytest.mark.asyncio
54
- @pytest.mark.parametrize("knowledgebox", ["STABLE", "EXPERIMENTAL"], indirect=True)
55
- async def test_generation_image_classification(
56
- train_rest_api: aiohttp.ClientSession,
57
- knowledgebox: str,
58
- image_classification_resource,
59
- ):
60
- async with train_rest_api.get(
61
- f"/{API_PREFIX}/v1/{KB_PREFIX}/{knowledgebox}/trainset"
62
- ) as partitions:
63
- assert partitions.status == 200
64
- data = await partitions.json()
65
- assert len(data["partitions"]) == 1
66
- partition_id = data["partitions"][0]
67
-
68
- trainset = TrainSet()
69
- trainset.type = TaskType.IMAGE_CLASSIFICATION
70
- trainset.batch_size = 10
71
-
72
- await asyncio.sleep(0.1)
73
- async with train_rest_api.post(
74
- f"/{API_PREFIX}/v1/{KB_PREFIX}/{knowledgebox}/trainset/{partition_id}",
75
- data=trainset.SerializeToString(),
76
- ) as response:
77
- assert response.status == 200
78
- batches = []
79
- async for batch in get_batches_from_train_response_stream(
80
- response, ImageClassificationBatch
81
- ):
82
- batches.append(batch)
83
- assert len(batch.data) == 1
84
- selections = json.loads(batch.data[0].selections)
85
- assert selections["width"] == 10
86
- assert selections["height"] == 10
87
- assert len(selections["tokens"]) == 87
88
- assert len(selections["annotations"]) == 18
89
- assert batch.data[0].page_uri == "DUMMY-URI"
90
- assert len(batches) == 1
91
-
92
-
93
- @pytest.fixture
94
- @pytest.mark.asyncio
95
- async def image_classification_resource(
96
- writer_rest_api: aiohttp.ClientSession, nucliadb_grpc: WriterStub, knowledgebox: str
97
- ):
98
- kbid = knowledgebox
99
- field_id = "invoice"
100
-
101
- with open(INVOICE_SELECTIONS_FILENAME) as f:
102
- selections = json.load(f)
103
- assert len(selections["tokens"]) == 87
104
- assert len(selections["annotations"]) == 18
105
-
106
- fieldmetadata = generate_image_classification_fieldmetadata(selections, field_id)
107
-
108
- with open(INVOICE_FILENAME, "rb") as f:
109
- invoice_content = f.read()
110
-
111
- resp = await writer_rest_api.post(
112
- f"/{API_PREFIX}/v1/{KB_PREFIX}/{knowledgebox}/resources",
113
- headers={"x-synchronous": "true"},
114
- json={
115
- "title": "My invoice",
116
- "files": {
117
- field_id: {
118
- "file": {
119
- "filename": "invoice.pdf",
120
- "content_type": "application/pdf",
121
- "payload": base64.b64encode(invoice_content).decode(),
122
- }
123
- }
124
- },
125
- "fieldmetadata": fieldmetadata,
126
- },
127
- )
128
- assert resp.status == 201
129
- body = await resp.json()
130
- rid = body["uuid"]
131
-
132
- broker_message = generate_image_classification_broker_message(
133
- selections, kbid, rid, field_id
134
- )
135
-
136
- original_storage = get_utility(Utility.STORAGE)
137
- set_utility(Utility.STORAGE, AsyncMock())
138
- mock_set = AsyncMock(return_value=None)
139
- mock_get = AsyncMock(return_value=broker_message.file_extracted_data[0])
140
- with (
141
- patch(
142
- "nucliadb.ingest.fields.file.File.set_file_extracted_data", new=mock_set
143
- ) as _,
144
- patch(
145
- "nucliadb.ingest.fields.file.File.get_file_extracted_data", new=mock_get
146
- ) as _,
147
- ):
148
- resp = await nucliadb_grpc.ProcessMessage( # type: ignore
149
- iter([broker_message]), timeout=10, wait_for_ready=True
150
- )
151
- assert resp.status == OpStatusWriter.Status.OK
152
- yield
153
-
154
- set_utility(Utility.STORAGE, original_storage)
155
-
156
-
157
- def generate_image_classification_fieldmetadata(
158
- selections: dict, field_id: str
159
- ) -> list[dict[str, Any]]:
160
- selections_by_page = {} # type: ignore
161
- for annotation in selections["annotations"]:
162
- page_selections = selections_by_page.setdefault(annotation["page"], [])
163
- page_selections.append(
164
- {
165
- "label": annotation["label"]["text"],
166
- "top": annotation["bounds"]["top"],
167
- "left": annotation["bounds"]["left"],
168
- "right": annotation["bounds"]["right"],
169
- "bottom": annotation["bounds"]["bottom"],
170
- "token_ids": [token["tokenIndex"] for token in annotation["tokens"]],
171
- }
172
- )
173
-
174
- fieldmetadata = {
175
- "field": {"field": field_id, "field_type": "file"},
176
- "selections": [
177
- {
178
- "page": page,
179
- "visual": selections,
180
- }
181
- for page, selections in selections_by_page.items()
182
- ],
183
- }
184
- return [fieldmetadata]
185
-
186
-
187
- def generate_image_classification_broker_message(
188
- selections: dict, kbid: str, rid: str, field_id: str
189
- ) -> BrokerMessage:
190
- bm = BrokerMessage(
191
- kbid=kbid,
192
- uuid=rid,
193
- source=BrokerMessage.MessageSource.PROCESSOR,
194
- file_extracted_data=[
195
- FileExtractedData(
196
- field=field_id,
197
- file_pages_previews=FilePages(
198
- pages=[
199
- CloudFile(uri="DUMMY-URI"),
200
- ],
201
- structures=[
202
- PageStructure(
203
- page=PageStructurePage(width=10, height=10),
204
- tokens=[
205
- PageStructureToken(
206
- x=token["x"],
207
- y=token["y"],
208
- width=token["width"],
209
- height=token["height"],
210
- text=token["text"],
211
- line=0,
212
- )
213
- for token in selections["tokens"]
214
- ],
215
- )
216
- ],
217
- ),
218
- )
219
- ],
220
- )
221
-
222
- return bm
@@ -1,39 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
- from nucliadb_protos.train_pb2 import GetFieldsRequest
22
- from nucliadb_protos.train_pb2_grpc import TrainStub
23
-
24
-
25
- @pytest.mark.asyncio
26
- async def test_list_fields(
27
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
28
- ) -> None:
29
- req = GetFieldsRequest()
30
- req.kb.uuid = knowledgebox_ingest
31
- req.metadata.entities = True
32
- req.metadata.labels = True
33
- req.metadata.text = True
34
- req.metadata.vector = True
35
- count = 0
36
- async for _ in train_client.GetParagraphs(req): # type: ignore
37
- count += 1
38
-
39
- assert count == 30
@@ -1,73 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
- from nucliadb_protos.train_pb2 import GetParagraphsRequest
22
- from nucliadb_protos.train_pb2_grpc import TrainStub
23
-
24
-
25
- @pytest.mark.asyncio
26
- async def test_list_paragraphs(
27
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
28
- ) -> None:
29
- req = GetParagraphsRequest()
30
- req.kb.uuid = knowledgebox_ingest
31
- req.metadata.entities = True
32
- req.metadata.labels = True
33
- req.metadata.text = True
34
- req.metadata.vector = True
35
- count = 0
36
- async for _ in train_client.GetParagraphs(req): # type: ignore
37
- count += 1
38
-
39
- assert count == 30
40
-
41
-
42
- @pytest.mark.asyncio
43
- async def test_list_paragraphs_shows_ners_with_positions(
44
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
45
- ) -> None:
46
- req = GetParagraphsRequest()
47
- req.kb.uuid = knowledgebox_ingest
48
- req.metadata.entities = True
49
- req.metadata.labels = True
50
- req.metadata.text = True
51
- req.metadata.vector = True
52
-
53
- found_barcelona = found_manresa = False
54
- async for paragraph in train_client.GetParagraphs(req): # type: ignore
55
- if "Barcelona" in paragraph.metadata.text:
56
- found_barcelona = True
57
- assert paragraph.metadata.entities == {"Barcelona": "CITY"}
58
- positions = paragraph.metadata.entity_positions["CITY/Barcelona"]
59
- assert positions.entity == "Barcelona"
60
- assert len(positions.positions) == 1
61
- assert positions.positions[0].start == 43
62
- assert positions.positions[0].end == 52
63
- elif "Manresa" in paragraph.metadata.text:
64
- found_manresa = True
65
- assert paragraph.metadata.entities == {"Manresa": "CITY"}
66
- positions = paragraph.metadata.entity_positions["CITY/Manresa"]
67
- assert positions.entity == "Manresa"
68
- assert len(positions.positions) == 2
69
- assert positions.positions[0].start == 22
70
- assert positions.positions[0].end == 29
71
- assert positions.positions[1].start == 38
72
- assert positions.positions[1].end == 45
73
- assert found_manresa and found_barcelona
@@ -1,39 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
- from nucliadb_protos.train_pb2 import GetResourcesRequest
22
- from nucliadb_protos.train_pb2_grpc import TrainStub
23
-
24
-
25
- @pytest.mark.asyncio
26
- async def test_list_resource(
27
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
28
- ) -> None:
29
- req = GetResourcesRequest()
30
- req.kb.uuid = knowledgebox_ingest
31
- req.metadata.entities = True
32
- req.metadata.labels = True
33
- req.metadata.text = True
34
- req.metadata.vector = True
35
- count = 0
36
- async for _ in train_client.GetResources(req): # type: ignore
37
- count += 1
38
-
39
- assert count == 10
@@ -1,71 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
- from nucliadb_protos.train_pb2 import GetSentencesRequest
22
- from nucliadb_protos.train_pb2_grpc import TrainStub
23
-
24
-
25
- @pytest.mark.asyncio
26
- async def test_list_sentences(
27
- train_client: TrainStub, knowledgebox_ingest: str, test_pagination_resources
28
- ) -> None:
29
- req = GetSentencesRequest()
30
- req.kb.uuid = knowledgebox_ingest
31
- req.metadata.entities = True
32
- req.metadata.labels = True
33
- req.metadata.text = True
34
- req.metadata.vector = True
35
- count = 0
36
-
37
- async for _ in train_client.GetSentences(req): # type: ignore
38
- count += 1
39
-
40
- assert count == 40
41
-
42
-
43
- @pytest.mark.asyncio
44
- @pytest.mark.parametrize("knowledgebox", ["STABLE", "EXPERIMENTAL"], indirect=True)
45
- async def test_list_sentences_shows_ners_with_positions(
46
- train_client: TrainStub, knowledgebox: str, test_pagination_resources
47
- ) -> None:
48
- req = GetSentencesRequest()
49
- req.kb.uuid = knowledgebox
50
- req.metadata.entities = True
51
- async for sentence in train_client.GetSentences(req): # type: ignore
52
- if "Barcelona" in sentence.metadata.text:
53
- assert sentence.metadata.entities == {"Barcelona": "CITY"}
54
- positions = sentence.metadata.entity_positions["CITY/Barcelona"]
55
- assert positions.entity == "Barcelona"
56
- assert len(positions.positions) == 1
57
- assert positions.positions[0].start == 43
58
- assert positions.positions[0].end == 52
59
- elif "Manresa" in sentence.metadata.text:
60
- assert sentence.metadata.entities == {"Manresa": "CITY"}
61
- positions = sentence.metadata.entity_positions["CITY/Manresa"]
62
- assert positions.entity == "Manresa"
63
- assert len(positions.positions) == 2
64
- assert positions.positions[0].start == 22
65
- assert positions.positions[0].end == 29
66
- assert positions.positions[1].start == 38
67
- assert positions.positions[1].end == 45
68
- else:
69
- # Other sentences should not have entities nor positions
70
- assert sentence.metadata.entities == {}
71
- assert sentence.metadata.entity_positions == {}