nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,49 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
-
22
- from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX
23
- from nucliadb_models.resource import NucliaDBRoles
24
-
25
-
26
- @pytest.mark.asyncio
27
- async def test_knowledgebox_lifecycle(writer_api):
28
- async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
29
- resp = await client.post(
30
- f"/{KBS_PREFIX}",
31
- json={
32
- "slug": "kbid1",
33
- "title": "My Knowledge Box",
34
- "description": "My lovely knowledgebox",
35
- },
36
- )
37
- assert resp.status_code == 201
38
- data = resp.json()
39
- assert data["slug"] == "kbid1"
40
- kbid = data["uuid"]
41
-
42
- resp = await client.patch(
43
- f"/{KB_PREFIX}/{kbid}",
44
- json={
45
- "slug": "kbid2",
46
- "description": "My lovely knowledgebox2",
47
- },
48
- )
49
- assert resp.status_code == 200
@@ -1,133 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from typing import AsyncIterator
21
- from unittest.mock import AsyncMock
22
-
23
- import pytest
24
-
25
- from nucliadb.common import datamanagers
26
- from nucliadb.ingest.processing import ProcessingInfo
27
- from nucliadb.writer.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RESOURCES_PREFIX
28
- from nucliadb.writer.tests.utils import load_file_as_FileB64_payload
29
- from nucliadb.writer.utilities import get_processing
30
- from nucliadb_models.resource import NucliaDBRoles, QueueType
31
-
32
-
33
- @pytest.fixture(scope="function")
34
- def processing_mock(mocker):
35
- processing = get_processing()
36
- mocker.patch.object(
37
- processing,
38
- "send_to_process",
39
- AsyncMock(
40
- return_value=ProcessingInfo(seqid=0, account_seq=0, queue=QueueType.SHARED)
41
- ),
42
- )
43
- yield processing
44
-
45
-
46
- @pytest.fixture(scope="function")
47
- @pytest.mark.asyncio
48
- async def file_field(
49
- writer_api, knowledgebox_writer: str
50
- ) -> AsyncIterator[tuple[str, str, str]]:
51
- kbid = knowledgebox_writer
52
- field_id = "myfile"
53
-
54
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
55
- resp = await client.post(
56
- f"/{KB_PREFIX}/{kbid}/{RESOURCES_PREFIX}",
57
- json={
58
- "slug": "resource",
59
- "title": "My resource",
60
- "files": {
61
- field_id: {
62
- "language": "en",
63
- "password": "xxxxxx",
64
- "file": load_file_as_FileB64_payload(
65
- "assets/text001.txt", "text/plain"
66
- ),
67
- }
68
- },
69
- },
70
- )
71
- assert resp.status_code == 201
72
- rid = resp.json()["uuid"]
73
-
74
- assert (
75
- await datamanagers.atomic.resources.resource_exists(kbid=kbid, rid=rid)
76
- ) is True
77
-
78
- yield kbid, rid, field_id
79
-
80
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
81
- resp = await client.delete(
82
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}",
83
- )
84
- assert resp.status_code == 204
85
-
86
-
87
- @pytest.mark.asyncio
88
- async def test_reprocess_nonexistent_file_field(
89
- writer_api, knowledgebox_writer: str, resource: str
90
- ):
91
- kbid = knowledgebox_writer
92
- rid = resource
93
- field_id = "nonexistent-field"
94
-
95
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
96
- resp = await client.post(
97
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/reprocess",
98
- )
99
- assert resp.status_code == 404
100
-
101
-
102
- @pytest.mark.asyncio
103
- async def test_reprocess_file_field_with_password(
104
- writer_api, file_field: tuple[str, str, str], processing_mock
105
- ):
106
- kbid, rid, field_id = file_field
107
- password = "secret-password"
108
-
109
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
110
- resp = await client.post(
111
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/reprocess",
112
- headers={
113
- "X-FILE-PASSWORD": password,
114
- },
115
- )
116
- assert resp.status_code == 202
117
-
118
- assert processing_mock.send_to_process.await_count == 1
119
-
120
-
121
- @pytest.mark.asyncio
122
- async def test_reprocess_file_field_without_password(
123
- writer_api, file_field: tuple[str, str, str], processing_mock
124
- ):
125
- kbid, rid, field_id = file_field
126
-
127
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
128
- resp = await client.post(
129
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/reprocess",
130
- )
131
- assert resp.status_code == 202
132
-
133
- assert processing_mock.send_to_process.await_count == 1
@@ -1,476 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from datetime import datetime
21
- from typing import Any, Callable, Optional
22
- from unittest.mock import AsyncMock # type: ignore
23
-
24
- import pytest
25
- from httpx import AsyncClient
26
-
27
- import nucliadb_models
28
- from nucliadb.common import datamanagers
29
- from nucliadb.common.maindb.local import LocalDriver
30
- from nucliadb.common.maindb.redis import RedisDriver
31
- from nucliadb.ingest.orm.resource import Resource
32
- from nucliadb.ingest.processing import PushPayload
33
- from nucliadb.writer.api.v1.router import (
34
- KB_PREFIX,
35
- RESOURCE_PREFIX,
36
- RESOURCES_PREFIX,
37
- RSLUG_PREFIX,
38
- )
39
- from nucliadb.writer.tests.test_fields import (
40
- TEST_CONVERSATION_PAYLOAD,
41
- TEST_DATETIMES_PAYLOAD,
42
- TEST_EXTERNAL_FILE_PAYLOAD,
43
- TEST_FILE_PAYLOAD,
44
- TEST_KEYWORDSETS_PAYLOAD,
45
- TEST_LAYOUT_PAYLOAD,
46
- TEST_LINK_PAYLOAD,
47
- TEST_TEXT_PAYLOAD,
48
- )
49
- from nucliadb_models.resource import NucliaDBRoles
50
-
51
-
52
- @pytest.mark.asyncio
53
- async def test_resource_crud(
54
- writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str
55
- ):
56
- knowledgebox_id = knowledgebox_writer
57
- async with writer_api([NucliaDBRoles.WRITER]) as client:
58
- # Test create resource
59
- resp = await client.post(
60
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
61
- json={
62
- "slug": "resource1",
63
- "title": "My resource",
64
- "summary": "Some summary",
65
- "icon": "image/png",
66
- "layout": "layout",
67
- "metadata": {
68
- "language": "en",
69
- "metadata": {"key1": "value1", "key2": "value2"},
70
- },
71
- "fieldmetadata": [
72
- {
73
- "paragraphs": [
74
- {
75
- "key": "paragraph1",
76
- "classifications": [
77
- {"labelset": "ls1", "label": "label1"}
78
- ],
79
- }
80
- ],
81
- "token": [
82
- {"token": "token1", "klass": "klass1", "start": 1, "end": 2}
83
- ],
84
- "field": {"field": "text1", "field_type": "text"},
85
- }
86
- ],
87
- "usermetadata": {
88
- "classifications": [{"labelset": "ls1", "label": "label1"}],
89
- "relations": [
90
- {
91
- "relation": "CHILD",
92
- "to": {
93
- "type": "resource",
94
- "value": "resource_uuid",
95
- },
96
- }
97
- ],
98
- },
99
- "origin": {
100
- "source_id": "source_id",
101
- "url": "http://some_source",
102
- "created": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
103
- "modified": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
104
- "metadata": {"key1": "value1", "key2": "value2"},
105
- "tags": ["tag1", "tag2"],
106
- "collaborators": ["col1", "col2"],
107
- "filename": "file.pdf",
108
- "related": ["related1"],
109
- },
110
- "texts": {"text1": TEST_TEXT_PAYLOAD},
111
- "links": {"link1": TEST_LINK_PAYLOAD},
112
- "files": {
113
- "file1": TEST_FILE_PAYLOAD,
114
- "external1": TEST_EXTERNAL_FILE_PAYLOAD,
115
- },
116
- "layouts": {"layout1": TEST_LAYOUT_PAYLOAD},
117
- "conversations": {"conv1": TEST_CONVERSATION_PAYLOAD},
118
- "keywordsets": {"keywordset1": TEST_KEYWORDSETS_PAYLOAD},
119
- "datetimes": {"datetime1": TEST_DATETIMES_PAYLOAD},
120
- },
121
- )
122
-
123
- assert resp.status_code == 201
124
- data = resp.json()
125
- assert "uuid" in data
126
- assert "seqid" in data
127
- rid = data["uuid"]
128
-
129
- # Test update resource
130
- resp = await client.patch(
131
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
132
- json={},
133
- )
134
- assert resp.status_code == 200
135
-
136
- data = resp.json()
137
-
138
- assert "seqid" in data
139
-
140
- # Test delete resource
141
- resp = await client.delete(
142
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
143
- )
144
- assert resp.status_code == 204
145
-
146
-
147
- @pytest.mark.asyncio
148
- async def test_resource_crud_sync(
149
- writer_api: Callable[[list[str]], AsyncClient], knowledgebox_writer: str
150
- ):
151
- knowledgebox_id = knowledgebox_writer
152
- async with writer_api([NucliaDBRoles.WRITER]) as client:
153
- # Test create resource
154
- resp = await client.post(
155
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
156
- json={
157
- "slug": "resource1",
158
- "title": "My resource",
159
- "summary": "Some summary",
160
- "icon": "image/png",
161
- "layout": "layout",
162
- "metadata": {
163
- "language": "en",
164
- "metadata": {"key1": "value1", "key2": "value2"},
165
- },
166
- "fieldmetadata": [
167
- {
168
- "paragraphs": [
169
- {
170
- "key": "paragraph1",
171
- "classifications": [
172
- {"labelset": "ls1", "label": "label1"}
173
- ],
174
- }
175
- ],
176
- "token": [
177
- {"token": "token1", "klass": "klass1", "start": 1, "end": 2}
178
- ],
179
- "field": {"field": "text1", "field_type": "text"},
180
- }
181
- ],
182
- "usermetadata": {
183
- "classifications": [{"labelset": "ls1", "label": "label1"}],
184
- "relations": [
185
- {
186
- "relation": "CHILD",
187
- "to": {
188
- "type": "resource",
189
- "value": "resource_uuid",
190
- },
191
- }
192
- ],
193
- },
194
- "origin": {
195
- "source_id": "source_id",
196
- "url": "http://some_source",
197
- "created": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
198
- "modified": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
199
- "metadata": {"key1": "value1", "key2": "value2"},
200
- "tags": ["tag1", "tag2"],
201
- "collaborators": ["col1", "col2"],
202
- "filename": "file.pdf",
203
- "related": ["related1"],
204
- },
205
- "texts": {"text1": TEST_TEXT_PAYLOAD},
206
- "links": {"link1": TEST_LINK_PAYLOAD},
207
- "files": {"file1": TEST_FILE_PAYLOAD},
208
- "layouts": {"layout1": TEST_LAYOUT_PAYLOAD},
209
- "conversations": {"conv1": TEST_CONVERSATION_PAYLOAD},
210
- "keywordsets": {"keywordset1": TEST_KEYWORDSETS_PAYLOAD},
211
- "datetimes": {"datetime1": TEST_DATETIMES_PAYLOAD},
212
- },
213
- )
214
-
215
- assert resp.status_code == 201
216
- data = resp.json()
217
- assert "uuid" in data
218
- assert "seqid" in data
219
- assert "elapsed" in data
220
- rid = data["uuid"]
221
-
222
- assert (
223
- await datamanagers.atomic.resources.resource_exists(
224
- kbid=knowledgebox_id, rid=rid
225
- )
226
- ) is True
227
-
228
- # Test update resource
229
- resp = await client.patch(
230
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
231
- json={},
232
- )
233
- assert resp.status_code == 200
234
-
235
- # Test delete resource
236
-
237
- resp = await client.delete(
238
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/resource1",
239
- )
240
-
241
- assert resp.status_code == 404
242
-
243
- resp = await client.delete(
244
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}",
245
- )
246
- assert resp.status_code == 204
247
-
248
- assert (
249
- await datamanagers.atomic.resources.resource_exists(
250
- kbid=knowledgebox_id, rid=rid
251
- )
252
- ) is False
253
-
254
-
255
- @pytest.mark.asyncio
256
- async def test_reprocess_resource(
257
- writer_api: Callable[..., AsyncClient],
258
- test_resource: Resource,
259
- mocker,
260
- maindb_driver,
261
- ) -> None:
262
- if isinstance(maindb_driver, (LocalDriver, RedisDriver)):
263
- pytest.skip("Keys might not be ordered correctly in this driver")
264
-
265
- rsc = test_resource
266
- kbid = rsc.kb.kbid
267
- rid = rsc.uuid
268
-
269
- from nucliadb.writer.utilities import get_processing
270
-
271
- processing = get_processing()
272
- processing.values.clear() # type: ignore
273
-
274
- original = processing.send_to_process
275
- mocker.patch.object(processing, "send_to_process", AsyncMock(side_effect=original))
276
-
277
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
278
- resp = await client.post(
279
- f"/{KB_PREFIX}/{kbid}/resource/{rid}/reprocess",
280
- )
281
- assert resp.status_code == 202
282
-
283
- assert processing.send_to_process.call_count == 1 # type: ignore
284
- payload = processing.send_to_process.call_args[0][0] # type: ignore
285
- assert isinstance(payload, PushPayload)
286
- assert payload.uuid == rid
287
- assert payload.kbid == kbid
288
-
289
- assert isinstance(payload.filefield.get("file1"), str)
290
- assert payload.filefield["file1"] == "convert_internal_filefield_to_str,0"
291
- assert isinstance(payload.linkfield.get("link1"), nucliadb_models.LinkUpload)
292
- assert isinstance(payload.textfield.get("text1"), nucliadb_models.Text)
293
- assert isinstance(
294
- payload.layoutfield.get("layout1"), nucliadb_models.LayoutDiff
295
- )
296
- assert (
297
- payload.layoutfield["layout1"].blocks["field1"].file
298
- == "convert_internal_cf_to_str,2"
299
- )
300
- assert isinstance(
301
- payload.conversationfield.get("conv1"), nucliadb_models.PushConversation
302
- )
303
- assert (
304
- payload.conversationfield["conv1"].messages[33].content.attachments[0]
305
- == "convert_internal_cf_to_str,0"
306
- )
307
- assert (
308
- payload.conversationfield["conv1"].messages[33].content.attachments[1]
309
- == "convert_internal_cf_to_str,1"
310
- )
311
-
312
-
313
- @pytest.mark.asyncio
314
- @pytest.mark.parametrize(
315
- "method,endpoint,payload",
316
- [
317
- ["patch", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", {}],
318
- ["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reprocess", None],
319
- ["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reindex", None],
320
- ["delete", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", None],
321
- ],
322
- )
323
- async def test_resource_endpoints_by_slug(
324
- writer_api: Callable[[list[str]], AsyncClient],
325
- knowledgebox_ingest: str,
326
- method: str,
327
- endpoint: str,
328
- payload: Optional[dict[Any, Any]],
329
- ):
330
- async with writer_api([NucliaDBRoles.WRITER]) as client:
331
- slug = "my-resource"
332
- resp = await client.post(
333
- f"/{KB_PREFIX}/{knowledgebox_ingest}/{RESOURCES_PREFIX}",
334
- json={
335
- "slug": slug,
336
- "texts": {"text1": {"body": "test1", "format": "PLAIN"}},
337
- },
338
- )
339
- assert resp.status_code == 201
340
-
341
- endpoint = endpoint.format(
342
- KB_PREFIX=KB_PREFIX,
343
- kb=knowledgebox_ingest,
344
- RSLUG_PREFIX=RSLUG_PREFIX,
345
- slug=slug,
346
- )
347
- extra_params = {}
348
- if payload is not None:
349
- extra_params["json"] = payload
350
-
351
- op = getattr(client, method)
352
- resp = await op(endpoint, **extra_params)
353
-
354
- assert resp.status_code in (200, 202, 204)
355
-
356
-
357
- @pytest.mark.asyncio
358
- @pytest.mark.parametrize(
359
- "method,endpoint,payload",
360
- [
361
- ["patch", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", {}],
362
- ["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reprocess", None],
363
- ["post", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}/reindex", None],
364
- ["delete", "/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{slug}", None],
365
- ],
366
- )
367
- async def test_resource_endpoints_by_slug_404(
368
- writer_api,
369
- knowledgebox_ingest,
370
- method,
371
- endpoint,
372
- payload,
373
- ):
374
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
375
- endpoint = endpoint.format(
376
- KB_PREFIX=KB_PREFIX,
377
- kb=knowledgebox_ingest,
378
- RSLUG_PREFIX=RSLUG_PREFIX,
379
- slug="idonotexist",
380
- )
381
- extra_params = {}
382
- if payload is not None:
383
- extra_params["json"] = payload
384
-
385
- op = getattr(client, method)
386
- resp = await op(endpoint, **extra_params)
387
-
388
- assert resp.status_code == 404
389
- assert resp.json()["detail"] == "Resource does not exist"
390
-
391
-
392
- @pytest.mark.asyncio
393
- async def test_reindex(writer_api, test_resource):
394
- rsc = test_resource
395
- kbid = rsc.kb.kbid
396
- rid = rsc.uuid
397
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
398
- resp = await client.post(
399
- f"/{KB_PREFIX}/{kbid}/resource/{rid}/reindex",
400
- )
401
- assert resp.status_code == 200
402
-
403
- resp = await client.post(
404
- f"/{KB_PREFIX}/{kbid}/resource/{rid}/reindex?reindex_vectors=True",
405
- )
406
- assert resp.status_code == 200
407
-
408
-
409
- @pytest.mark.asyncio
410
- async def test_paragraph_annotations(writer_api, knowledgebox_writer):
411
- kbid = knowledgebox_writer
412
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
413
- # Must have at least one classification
414
- resp = await client.post(
415
- f"/{KB_PREFIX}/{kbid}/resources",
416
- json={
417
- "texts": {"text1": TEST_TEXT_PAYLOAD},
418
- "fieldmetadata": [
419
- {
420
- "paragraphs": [
421
- {
422
- "key": "paragraph1",
423
- "classifications": [],
424
- }
425
- ],
426
- "field": {"field": "text1", "field_type": "text"},
427
- }
428
- ],
429
- },
430
- )
431
- assert resp.status_code == 422
432
- body = resp.json()
433
- assert body["detail"] == "ensure classifications has at least 1 items"
434
-
435
- classification = {"label": "label", "labelset": "ls"}
436
-
437
- resp = await client.post(
438
- f"/{KB_PREFIX}/{kbid}/resources",
439
- json={
440
- "texts": {"text1": TEST_TEXT_PAYLOAD},
441
- "fieldmetadata": [
442
- {
443
- "paragraphs": [
444
- {
445
- "key": "paragraph1",
446
- "classifications": [classification],
447
- }
448
- ],
449
- "field": {"field": "text1", "field_type": "text"},
450
- }
451
- ],
452
- },
453
- )
454
- assert resp.status_code == 201
455
- rid = resp.json()["uuid"]
456
-
457
- # Classifications need to be unique
458
- resp = await client.patch(
459
- f"/{KB_PREFIX}/{kbid}/resource/{rid}",
460
- json={
461
- "fieldmetadata": [
462
- {
463
- "paragraphs": [
464
- {
465
- "key": "paragraph1",
466
- "classifications": [classification, classification],
467
- }
468
- ],
469
- "field": {"field": "text1", "field_type": "text"},
470
- }
471
- ],
472
- },
473
- )
474
- assert resp.status_code == 422
475
- body = resp.json()
476
- assert body["detail"] == "Paragraph classifications need to be unique"