nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,546 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from datetime import datetime
21
- from typing import Any, Callable, Optional
22
- from unittest.mock import AsyncMock # type: ignore
23
-
24
- import pytest
25
- from httpx import AsyncClient
26
- from nucliadb_protos.writer_pb2 import ResourceFieldId
27
-
28
- import nucliadb_models
29
- from nucliadb.common.maindb.local import LocalDriver
30
- from nucliadb.common.maindb.redis import RedisDriver
31
- from nucliadb.ingest.orm.resource import Resource
32
- from nucliadb.ingest.processing import PushPayload
33
- from nucliadb.writer.api.v1.router import (
34
- KB_PREFIX,
35
- RESOURCE_PREFIX,
36
- RESOURCES_PREFIX,
37
- RSLUG_PREFIX,
38
- )
39
- from nucliadb.writer.tests.test_fields import (
40
- TEST_CONVERSATION_PAYLOAD,
41
- TEST_DATETIMES_PAYLOAD,
42
- TEST_EXTERNAL_FILE_PAYLOAD,
43
- TEST_FILE_PAYLOAD,
44
- TEST_KEYWORDSETS_PAYLOAD,
45
- TEST_LAYOUT_PAYLOAD,
46
- TEST_LINK_PAYLOAD,
47
- TEST_TEXT_PAYLOAD,
48
- )
49
- from nucliadb_models.resource import NucliaDBRoles
50
- from nucliadb_utils.utilities import get_ingest
51
-
52
-
53
- @pytest.mark.asyncio
54
- async def test_resource_crud_min(
55
- writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str
56
- ):
57
- knowledgebox_id = knowledgebox_writer
58
- async with writer_api([NucliaDBRoles.WRITER]) as client:
59
- resp = await client.post(
60
- f"/{KB_PREFIX}/{knowledgebox_id}/vectorset/base",
61
- json={"dimension": 3, "similarity": "dot"},
62
- )
63
- assert resp.status_code == 200
64
- # Test create resource
65
- resp = await client.post(
66
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
67
- json={
68
- "uservectors": [
69
- {
70
- "vectors": {
71
- "base": {
72
- "vector1": {
73
- "vector": [4.0, 2.0, 3.0],
74
- "positions": [0, 0],
75
- }
76
- }
77
- },
78
- "field": {"field_type": "file", "field": "field1"},
79
- }
80
- ]
81
- },
82
- )
83
- assert resp.status_code == 201
84
-
85
-
86
- @pytest.mark.asyncio
87
- async def test_resource_crud_min_no_vectorset(
88
- writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str
89
- ):
90
- knowledgebox_id = knowledgebox_writer
91
- async with writer_api([NucliaDBRoles.WRITER]) as client:
92
- # Test create resource
93
- resp = await client.post(
94
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
95
- json={
96
- "uservectors": [
97
- {
98
- "vectors": {
99
- "base": {
100
- "vector1": {
101
- "vector": [4.0, 2.0, 3.0],
102
- "positions": [0, 0],
103
- }
104
- }
105
- },
106
- "field": {"field_type": "file", "field": "field1"},
107
- }
108
- ]
109
- },
110
- )
111
- assert resp.status_code == 201
112
-
113
-
114
- @pytest.mark.asyncio
115
- async def test_resource_crud(
116
- writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str
117
- ):
118
- knowledgebox_id = knowledgebox_writer
119
- async with writer_api([NucliaDBRoles.WRITER]) as client:
120
- # Test create resource
121
- resp = await client.post(
122
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
123
- headers={"X-SYNCHRONOUS": "True"},
124
- json={
125
- "slug": "resource1",
126
- "title": "My resource",
127
- "summary": "Some summary",
128
- "icon": "image/png",
129
- "layout": "layout",
130
- "metadata": {
131
- "language": "en",
132
- "metadata": {"key1": "value1", "key2": "value2"},
133
- },
134
- "fieldmetadata": [
135
- {
136
- "paragraphs": [
137
- {
138
- "key": "paragraph1",
139
- "classifications": [
140
- {"labelset": "ls1", "label": "label1"}
141
- ],
142
- }
143
- ],
144
- "token": [
145
- {"token": "token1", "klass": "klass1", "start": 1, "end": 2}
146
- ],
147
- "field": {"field": "text1", "field_type": "text"},
148
- }
149
- ],
150
- "usermetadata": {
151
- "classifications": [{"labelset": "ls1", "label": "label1"}],
152
- "relations": [
153
- {
154
- "relation": "CHILD",
155
- "to": {
156
- "type": "resource",
157
- "value": "resource_uuid",
158
- },
159
- }
160
- ],
161
- },
162
- "origin": {
163
- "source_id": "source_id",
164
- "url": "http://some_source",
165
- "created": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
166
- "modified": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
167
- "metadata": {"key1": "value1", "key2": "value2"},
168
- "tags": ["tag1", "tag2"],
169
- "collaborators": ["col1", "col2"],
170
- "filename": "file.pdf",
171
- "related": ["related1"],
172
- },
173
- "texts": {"text1": TEST_TEXT_PAYLOAD},
174
- "links": {"link1": TEST_LINK_PAYLOAD},
175
- "files": {
176
- "file1": TEST_FILE_PAYLOAD,
177
- "external1": TEST_EXTERNAL_FILE_PAYLOAD,
178
- },
179
- "layouts": {"layout1": TEST_LAYOUT_PAYLOAD},
180
- "conversations": {"conv1": TEST_CONVERSATION_PAYLOAD},
181
- "keywordsets": {"keywordset1": TEST_KEYWORDSETS_PAYLOAD},
182
- "datetimes": {"datetime1": TEST_DATETIMES_PAYLOAD},
183
- },
184
- )
185
-
186
- assert resp.status_code == 201
187
- data = resp.json()
188
- assert "uuid" in data
189
- assert "seqid" in data
190
- rid = data["uuid"]
191
-
192
- # Test update resource
193
- resp = await client.patch(
194
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
195
- json={},
196
- )
197
- assert resp.status_code == 200
198
-
199
- data = resp.json()
200
-
201
- assert "seqid" in data
202
-
203
- # Test delete resource
204
- resp = await client.delete(
205
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
206
- )
207
- assert resp.status_code == 204
208
-
209
-
210
- @pytest.mark.asyncio
211
- async def test_resource_crud_sync(
212
- writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str
213
- ):
214
- knowledgebox_id = knowledgebox_writer
215
- async with writer_api([NucliaDBRoles.WRITER]) as client:
216
- # Test create resource
217
- resp = await client.post(
218
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
219
- headers={"X-SYNCHRONOUS": "True"},
220
- json={
221
- "slug": "resource1",
222
- "title": "My resource",
223
- "summary": "Some summary",
224
- "icon": "image/png",
225
- "layout": "layout",
226
- "metadata": {
227
- "language": "en",
228
- "metadata": {"key1": "value1", "key2": "value2"},
229
- },
230
- "fieldmetadata": [
231
- {
232
- "paragraphs": [
233
- {
234
- "key": "paragraph1",
235
- "classifications": [
236
- {"labelset": "ls1", "label": "label1"}
237
- ],
238
- }
239
- ],
240
- "token": [
241
- {"token": "token1", "klass": "klass1", "start": 1, "end": 2}
242
- ],
243
- "field": {"field": "text1", "field_type": "text"},
244
- }
245
- ],
246
- "usermetadata": {
247
- "classifications": [{"labelset": "ls1", "label": "label1"}],
248
- "relations": [
249
- {
250
- "relation": "CHILD",
251
- "to": {
252
- "type": "resource",
253
- "value": "resource_uuid",
254
- },
255
- }
256
- ],
257
- },
258
- "origin": {
259
- "source_id": "source_id",
260
- "url": "http://some_source",
261
- "created": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
262
- "modified": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
263
- "metadata": {"key1": "value1", "key2": "value2"},
264
- "tags": ["tag1", "tag2"],
265
- "collaborators": ["col1", "col2"],
266
- "filename": "file.pdf",
267
- "related": ["related1"],
268
- },
269
- "texts": {"text1": TEST_TEXT_PAYLOAD},
270
- "links": {"link1": TEST_LINK_PAYLOAD},
271
- "files": {"file1": TEST_FILE_PAYLOAD},
272
- "layouts": {"layout1": TEST_LAYOUT_PAYLOAD},
273
- "conversations": {"conv1": TEST_CONVERSATION_PAYLOAD},
274
- "keywordsets": {"keywordset1": TEST_KEYWORDSETS_PAYLOAD},
275
- "datetimes": {"datetime1": TEST_DATETIMES_PAYLOAD},
276
- },
277
- )
278
-
279
- assert resp.status_code == 201
280
- data = resp.json()
281
- assert "uuid" in data
282
- assert "seqid" in data
283
- assert "elapsed" in data
284
- rid = data["uuid"]
285
-
286
- ingest = get_ingest()
287
- pbrequest = ResourceFieldId()
288
- pbrequest.kbid = knowledgebox_id
289
- pbrequest.rid = rid
290
-
291
- res = await ingest.ResourceFieldExists(pbrequest) # type: ignore
292
- assert res.found
293
-
294
- # Test update resource
295
- resp = await client.patch(
296
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
297
- headers={"X-SYNCHRONOUS": "True"},
298
- json={},
299
- )
300
- assert resp.status_code == 200
301
-
302
- # Test delete resource
303
-
304
- resp = await client.delete(
305
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/resource1",
306
- headers={"X-SYNCHRONOUS": "True"},
307
- )
308
-
309
- assert resp.status_code == 404
310
-
311
- resp = await client.delete(
312
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
313
- headers={"X-SYNCHRONOUS": "True"},
314
- )
315
- assert resp.status_code == 204
316
-
317
- res = await ingest.ResourceFieldExists(pbrequest) # type: ignore
318
- assert not res.found
319
-
320
-
321
- @pytest.mark.asyncio
322
- async def test_reprocess_resource(
323
- writer_api: Callable[..., AsyncClient],
324
- test_resource: Resource,
325
- mocker,
326
- maindb_driver,
327
- ) -> None:
328
- if isinstance(maindb_driver, (LocalDriver, RedisDriver)):
329
- pytest.skip("Keys might not be ordered correctly in this driver")
330
-
331
- rsc = test_resource
332
- kbid = rsc.kb.kbid
333
- rid = rsc.uuid
334
-
335
- from nucliadb.writer.utilities import get_processing
336
-
337
- processing = get_processing()
338
- processing.values.clear() # type: ignore
339
-
340
- original = processing.send_to_process
341
- mocker.patch.object(processing, "send_to_process", AsyncMock(side_effect=original))
342
-
343
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
344
- resp = await client.post(
345
- f"/{KB_PREFIX}/{kbid}/resource/{rid}/reprocess",
346
- )
347
- assert resp.status_code == 202
348
-
349
- assert processing.send_to_process.call_count == 1 # type: ignore
350
- payload = processing.send_to_process.call_args[0][0] # type: ignore
351
- assert isinstance(payload, PushPayload)
352
- assert payload.uuid == rid
353
- assert payload.kbid == kbid
354
-
355
- assert isinstance(payload.filefield.get("file1"), str)
356
- assert payload.filefield["file1"] == "convert_internal_filefield_to_str,0"
357
- assert isinstance(payload.linkfield.get("link1"), nucliadb_models.LinkUpload)
358
- assert isinstance(payload.textfield.get("text1"), nucliadb_models.Text)
359
- assert isinstance(
360
- payload.layoutfield.get("layout1"), nucliadb_models.LayoutDiff
361
- )
362
- assert (
363
- payload.layoutfield["layout1"].blocks["field1"].file
364
- == "convert_internal_cf_to_str,2"
365
- )
366
- assert isinstance(
367
- payload.conversationfield.get("conv1"), nucliadb_models.PushConversation
368
- )
369
- assert (
370
- payload.conversationfield["conv1"].messages[33].content.attachments[0]
371
- == "convert_internal_cf_to_str,0"
372
- )
373
- assert (
374
- payload.conversationfield["conv1"].messages[33].content.attachments[1]
375
- == "convert_internal_cf_to_str,1"
376
- )
377
-
378
-
379
- @pytest.mark.asyncio
380
- @pytest.mark.parametrize(
381
- "method,endpoint,payload",
382
- [
383
- ["patch", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", {}],
384
- ["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reprocess", None],
385
- ["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reindex", None],
386
- ["delete", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", None],
387
- ],
388
- )
389
- async def test_resource_endpoints_by_slug(
390
- writer_api: Callable[[list[str]], AsyncClient],
391
- knowledgebox_ingest: str,
392
- method: str,
393
- endpoint: str,
394
- payload: Optional[dict[Any, Any]],
395
- ):
396
- async with writer_api([NucliaDBRoles.WRITER]) as client:
397
- slug = "my-resource"
398
- resp = await client.post(
399
- f"/{KB_PREFIX}/{knowledgebox_ingest}/{RESOURCES_PREFIX}",
400
- headers={"X-SYNCHRONOUS": "True"},
401
- json={
402
- "slug": slug,
403
- "texts": {"text1": {"body": "test1", "format": "PLAIN"}},
404
- },
405
- )
406
- assert resp.status_code == 201
407
-
408
- endpoint = endpoint.format(
409
- KB_PREFIX=KB_PREFIX,
410
- kb=knowledgebox_ingest,
411
- RSLUG_PREFIX=RSLUG_PREFIX,
412
- slug=slug,
413
- )
414
- extra_params = {}
415
- if payload is not None:
416
- extra_params["json"] = payload
417
-
418
- op = getattr(client, method)
419
- resp = await op(endpoint, **extra_params)
420
-
421
- assert resp.status_code in (200, 202, 204)
422
-
423
-
424
- @pytest.mark.asyncio
425
- @pytest.mark.parametrize(
426
- "method,endpoint,payload",
427
- [
428
- ["patch", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", {}],
429
- ["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reprocess", None],
430
- ["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reindex", None],
431
- ["delete", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", None],
432
- ],
433
- )
434
- async def test_resource_endpoints_by_slug_404(
435
- writer_api,
436
- knowledgebox_ingest,
437
- method,
438
- endpoint,
439
- payload,
440
- ):
441
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
442
- endpoint = endpoint.format(
443
- KB_PREFIX=KB_PREFIX,
444
- kb=knowledgebox_ingest,
445
- RSLUG_PREFIX=RSLUG_PREFIX,
446
- slug="idonotexist",
447
- )
448
- extra_params = {}
449
- if payload is not None:
450
- extra_params["json"] = payload
451
-
452
- op = getattr(client, method)
453
- resp = await op(endpoint, **extra_params)
454
-
455
- assert resp.status_code == 404
456
- assert resp.json()["detail"] == "Resource does not exist"
457
-
458
-
459
- @pytest.mark.asyncio
460
- async def test_reindex(writer_api, test_resource):
461
- rsc = test_resource
462
- kbid = rsc.kb.kbid
463
- rid = rsc.uuid
464
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
465
- resp = await client.post(
466
- f"/{KB_PREFIX}/{kbid}/resource/{rid}/reindex",
467
- )
468
- assert resp.status_code == 200
469
-
470
- resp = await client.post(
471
- f"/{KB_PREFIX}/{kbid}/resource/{rid}/reindex?reindex_vectors=True",
472
- )
473
- assert resp.status_code == 200
474
-
475
-
476
- @pytest.mark.asyncio
477
- async def test_paragraph_annotations(writer_api, knowledgebox_writer):
478
- kbid = knowledgebox_writer
479
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
480
- # Must have at least one classification
481
- resp = await client.post(
482
- f"/{KB_PREFIX}/{kbid}/resources",
483
- headers={"X-SYNCHRONOUS": "True"},
484
- json={
485
- "texts": {"text1": TEST_TEXT_PAYLOAD},
486
- "fieldmetadata": [
487
- {
488
- "paragraphs": [
489
- {
490
- "key": "paragraph1",
491
- "classifications": [],
492
- }
493
- ],
494
- "field": {"field": "text1", "field_type": "text"},
495
- }
496
- ],
497
- },
498
- )
499
- assert resp.status_code == 422
500
- body = resp.json()
501
- assert body["detail"] == "ensure classifications has at least 1 items"
502
-
503
- classification = {"label": "label", "labelset": "ls"}
504
-
505
- resp = await client.post(
506
- f"/{KB_PREFIX}/{kbid}/resources",
507
- headers={"X-SYNCHRONOUS": "True"},
508
- json={
509
- "texts": {"text1": TEST_TEXT_PAYLOAD},
510
- "fieldmetadata": [
511
- {
512
- "paragraphs": [
513
- {
514
- "key": "paragraph1",
515
- "classifications": [classification],
516
- }
517
- ],
518
- "field": {"field": "text1", "field_type": "text"},
519
- }
520
- ],
521
- },
522
- )
523
- assert resp.status_code == 201
524
- rid = resp.json()["uuid"]
525
-
526
- # Classifications need to be unique
527
- resp = await client.patch(
528
- f"/{KB_PREFIX}/{kbid}/resource/{rid}",
529
- headers={"X-SYNCHRONOUS": "True"},
530
- json={
531
- "fieldmetadata": [
532
- {
533
- "paragraphs": [
534
- {
535
- "key": "paragraph1",
536
- "classifications": [classification, classification],
537
- }
538
- ],
539
- "field": {"field": "text1", "field_type": "text"},
540
- }
541
- ],
542
- },
543
- )
544
- assert resp.status_code == 422
545
- body = resp.json()
546
- assert body["detail"] == "Paragraph classifications need to be unique"
@@ -1,137 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
-
22
- from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX
23
- from nucliadb_models.entities import CreateEntitiesGroupPayload, Entity
24
- from nucliadb_models.labels import Label, LabelSet
25
- from nucliadb_models.resource import NucliaDBRoles
26
- from nucliadb_protos import knowledgebox_pb2, writer_pb2
27
- from nucliadb_utils.utilities import get_ingest
28
-
29
-
30
- @pytest.mark.asyncio
31
- async def test_service_lifecycle_entities(writer_api, entities_manager_mock):
32
- async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
33
- resp = await client.post(
34
- f"/{KBS_PREFIX}",
35
- json={
36
- "slug": "kbid1",
37
- "title": "My Knowledge Box",
38
- },
39
- )
40
- assert resp.status_code == 201
41
- data = resp.json()
42
- assert data["slug"] == "kbid1"
43
- kbid = data["uuid"]
44
-
45
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
46
- eg = CreateEntitiesGroupPayload(
47
- group="0",
48
- title="My group",
49
- color="#0000000",
50
- entities={
51
- "ent1": Entity(value="asd", merged=False),
52
- "ent2": Entity(value="asd", merged=False),
53
- "ent3": Entity(value="asd", merged=False),
54
- },
55
- )
56
-
57
- resp = await client.post(f"/{KB_PREFIX}/{kbid}/entitiesgroups", json=eg.dict())
58
- assert resp.status_code == 200
59
-
60
- ingest = get_ingest()
61
- result = await ingest.GetEntities(
62
- writer_pb2.GetEntitiesRequest(kb=knowledgebox_pb2.KnowledgeBoxID(uuid=kbid))
63
- )
64
- assert set(result.groups.keys()) == {"0"}
65
- assert result.groups["0"].title == eg.title
66
- assert result.groups["0"].color == eg.color
67
- assert set(result.groups["0"].entities.keys()) == {"ent1", "ent2", "ent3"}
68
- assert result.groups["0"].entities["ent1"].value == "asd"
69
-
70
- eg.group = "1"
71
- resp = await client.post(f"/{KB_PREFIX}/{kbid}/entitiesgroups", json=eg.dict())
72
- assert resp.status_code == 200
73
- result = await ingest.GetEntities(
74
- writer_pb2.GetEntitiesRequest(kb=knowledgebox_pb2.KnowledgeBoxID(uuid=kbid))
75
- )
76
- assert set(result.groups.keys()) == {"0", "1"}
77
-
78
-
79
- @pytest.mark.asyncio
80
- async def test_entities_custom_field_for_user_defined_groups(
81
- writer_api, entities_manager_mock
82
- ):
83
- """
84
- Test description:
85
-
86
- - Create an entity group and check that the default value for the `custom`
87
- field is True
88
- """
89
- async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
90
- resp = await client.post(
91
- f"/{KBS_PREFIX}",
92
- json={
93
- "slug": "kbid1",
94
- "title": "My Knowledge Box",
95
- },
96
- )
97
- assert resp.status_code == 201
98
- data = resp.json()
99
- kbid = data["uuid"]
100
-
101
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
102
- eg = CreateEntitiesGroupPayload(group="0")
103
- resp = await client.post(f"/{KB_PREFIX}/{kbid}/entitiesgroups", json=eg.dict())
104
- assert resp.status_code == 200
105
-
106
- ingest = get_ingest()
107
- result = await ingest.GetEntities(
108
- writer_pb2.GetEntitiesRequest(kb=knowledgebox_pb2.KnowledgeBoxID(uuid=kbid))
109
- )
110
- assert result.groups["0"].custom is True
111
-
112
-
113
- @pytest.mark.asyncio
114
- async def test_service_lifecycle_labels(writer_api):
115
- async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
116
- resp = await client.post(
117
- f"/{KBS_PREFIX}",
118
- json={
119
- "slug": "kbid1",
120
- "title": "My Knowledge Box",
121
- },
122
- )
123
- assert resp.status_code == 201
124
- data = resp.json()
125
- assert data["slug"] == "kbid1"
126
- kbid = data["uuid"]
127
-
128
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
129
- ls = LabelSet(
130
- title="My labelset", color="#0000000", multiple=False, kind=["RESOURCES"]
131
- )
132
- ls.labels.append(Label(title="asd"))
133
- ls.labels.append(Label(title="asd"))
134
- resp = await client.post(f"/{KB_PREFIX}/{kbid}/labelset/ls1", json=ls.dict())
135
- assert resp.status_code == 200
136
- resp = await client.post(f"/{KB_PREFIX}/{kbid}/labelset/ls2", json=ls.dict())
137
- assert resp.status_code == 200