nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -403
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +70 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +143 -117
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +37 -128
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -1,136 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import asyncio
21
-
22
- import aiohttp
23
- import pytest
24
- from nucliadb_protos.dataset_pb2 import TaskType, TokenClassificationBatch, TrainSet
25
- from nucliadb_protos.resources_pb2 import Position
26
- from nucliadb_protos.writer_pb2 import BrokerMessage
27
- from nucliadb_protos.writer_pb2_grpc import WriterStub
28
-
29
- from nucliadb.tests.utils import inject_message
30
- from nucliadb.tests.utils.broker_messages import BrokerMessageBuilder, FieldBuilder
31
- from nucliadb.train import API_PREFIX
32
- from nucliadb.train.api.v1.router import KB_PREFIX
33
- from nucliadb.train.tests.utils import get_batches_from_train_response_stream
34
- from nucliadb_protos import resources_pb2 as rpb
35
-
36
-
37
- @pytest.mark.asyncio
38
- @pytest.mark.parametrize("knowledgebox", ["STABLE", "EXPERIMENTAL"], indirect=True)
39
- async def test_generator_token_classification(
40
- train_rest_api: aiohttp.ClientSession,
41
- knowledgebox_with_entities: str,
42
- nucliadb_grpc: WriterStub,
43
- ):
44
- kbid = knowledgebox_with_entities
45
-
46
- await inject_resource_with_token_classification(kbid, nucliadb_grpc)
47
-
48
- async with train_rest_api.get(
49
- f"/{API_PREFIX}/v1/{KB_PREFIX}/{kbid}/trainset"
50
- ) as partitions:
51
- assert partitions.status == 200
52
- data = await partitions.json()
53
- assert len(data["partitions"]) == 1
54
- partition_id = data["partitions"][0]
55
-
56
- trainset = TrainSet()
57
- trainset.type = TaskType.TOKEN_CLASSIFICATION
58
- trainset.batch_size = 2
59
- trainset.filter.labels.append("PERSON")
60
- trainset.filter.labels.append("ORG")
61
- async with train_rest_api.post(
62
- f"/{API_PREFIX}/v1/{KB_PREFIX}/{kbid}/trainset/{partition_id}",
63
- data=trainset.SerializeToString(),
64
- ) as response:
65
- assert response.status == 200
66
- batches: list[TokenClassificationBatch] = []
67
- async for batch in get_batches_from_train_response_stream(
68
- response, TokenClassificationBatch
69
- ):
70
- batches.append(batch)
71
-
72
- for batch in batches:
73
- if batch.data[0].token == "Eudald":
74
- assert batch.data[0].label == "B-PERSON"
75
- assert batch.data[1].label == "I-PERSON"
76
- assert batch.data[2].label == "O"
77
- if batch.data[0].token == "This":
78
- assert batch.data[4].label == "B-PERSON"
79
- assert batch.data[5].label == "I-PERSON"
80
- if batch.data[0].token == "Where":
81
- assert batch.data[3].label == "B-ORG"
82
- assert batch.data[4].label == "I-ORG"
83
- assert batch.data[5].label == "I-ORG"
84
- if batch.data[0].token == "Summary":
85
- assert batch.data[2].label == "B-ORG"
86
- assert batch.data[4].label == "B-ORG"
87
- if batch.data[0].token == "My":
88
- assert batch.data[3].label == "B-PERSON"
89
- assert batch.data[12].label == "B-ORG"
90
-
91
-
92
- async def inject_resource_with_token_classification(knowledgebox, writer):
93
- bm = broker_resource(knowledgebox)
94
- await inject_message(writer, bm)
95
- await asyncio.sleep(0.1)
96
- return bm.uuid
97
-
98
-
99
- def broker_resource(knowledgebox: str) -> BrokerMessage:
100
- bmb = BrokerMessageBuilder(kbid=knowledgebox)
101
-
102
- bmb.with_title("This is a bird, its a plane, no, its el Super Fran")
103
- title_field = bmb.field_builder("title", rpb.FieldType.GENERIC)
104
- title_field.with_extracted_entity(
105
- "PERSON", "el Super Fran", positions=[Position(start=37, end=50)]
106
- )
107
-
108
- bmb.with_summary("Summary of Nuclia using Debian")
109
- summary_field = bmb.field_builder("summary", rpb.FieldType.GENERIC)
110
- summary_field.with_extracted_entity(
111
- "ORG", "Nuclia", positions=[Position(start=11, end=17)]
112
- )
113
- summary_field.with_extracted_entity(
114
- "ORG", "Debian", positions=[Position(start=24, end=30)]
115
- )
116
-
117
- file_field = FieldBuilder("file", rpb.FieldType.FILE)
118
- file_field.with_extracted_text(
119
- "My own text Ramon. This is great to be at Nuclia. \n Where is the Generalitat de Catalunya? Eudald Camprubi, do you want to go shooping? This is a test Carmen Iniesta!" # noqa
120
- )
121
- file_field.with_extracted_paragraph_metadata(rpb.Paragraph(start=0, end=49))
122
- file_field.with_extracted_paragraph_metadata(rpb.Paragraph(start=50, end=90))
123
- file_field.with_extracted_paragraph_metadata(rpb.Paragraph(start=91, end=135))
124
- file_field.with_extracted_paragraph_metadata(rpb.Paragraph(start=136, end=166))
125
-
126
- file_field.with_user_entity("PERSON", "Ramon", start=12, end=17)
127
- file_field.with_user_entity("ORG", "Nuclia", start=42, end=48)
128
- file_field.with_user_entity("ORG", "Generalitat de Catalunya", start=65, end=89)
129
- file_field.with_user_entity("PERSON", "Eudald", start=91, end=106)
130
- file_field.with_user_entity("PERSON", "Carmen Iniesta", start=151, end=165)
131
-
132
- bmb.add_field_builder(file_field)
133
-
134
- bm = bmb.build()
135
-
136
- return bm
@@ -1,101 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- from typing import AsyncGenerator, overload
21
-
22
- import aiohttp
23
- from nucliadb_protos.dataset_pb2 import (
24
- FieldClassificationBatch,
25
- ImageClassificationBatch,
26
- ParagraphClassificationBatch,
27
- ParagraphStreamingBatch,
28
- QuestionAnswerStreamingBatch,
29
- SentenceClassificationBatch,
30
- TokenClassificationBatch,
31
- )
32
-
33
- from nucliadb.train.types import TrainBatch, TrainBatchType
34
-
35
- # NOTE: we use def instead of async def to make mypy happy. Otherwise, it
36
- # considers the overloaded functions as corountines returning async iterators
37
- # instead of async iterators themselves and complains about it
38
-
39
-
40
- @overload
41
- def get_batches_from_train_response_stream(
42
- response: aiohttp.ClientResponse,
43
- pb_klass: type[FieldClassificationBatch],
44
- ) -> AsyncGenerator[FieldClassificationBatch, None]: ...
45
-
46
-
47
- @overload
48
- def get_batches_from_train_response_stream(
49
- response: aiohttp.ClientResponse,
50
- pb_klass: type[ImageClassificationBatch],
51
- ) -> AsyncGenerator[ImageClassificationBatch, None]: ...
52
-
53
-
54
- @overload
55
- def get_batches_from_train_response_stream(
56
- response: aiohttp.ClientResponse,
57
- pb_klass: type[ParagraphClassificationBatch],
58
- ) -> AsyncGenerator[ParagraphClassificationBatch, None]: ...
59
-
60
-
61
- @overload
62
- def get_batches_from_train_response_stream(
63
- response: aiohttp.ClientResponse,
64
- pb_klass: type[ParagraphStreamingBatch],
65
- ) -> AsyncGenerator[ParagraphStreamingBatch, None]: ...
66
-
67
-
68
- @overload
69
- def get_batches_from_train_response_stream(
70
- response: aiohttp.ClientResponse,
71
- pb_klass: type[QuestionAnswerStreamingBatch],
72
- ) -> AsyncGenerator[QuestionAnswerStreamingBatch, None]: ...
73
-
74
-
75
- @overload
76
- def get_batches_from_train_response_stream(
77
- response: aiohttp.ClientResponse,
78
- pb_klass: type[SentenceClassificationBatch],
79
- ) -> AsyncGenerator[SentenceClassificationBatch, None]: ...
80
-
81
-
82
- @overload
83
- def get_batches_from_train_response_stream(
84
- response: aiohttp.ClientResponse,
85
- pb_klass: type[TokenClassificationBatch],
86
- ) -> AsyncGenerator[TokenClassificationBatch, None]: ...
87
-
88
-
89
- async def get_batches_from_train_response_stream(
90
- response: aiohttp.ClientResponse,
91
- pb_klass: TrainBatchType,
92
- ) -> AsyncGenerator[TrainBatch, None]:
93
- while True:
94
- header = await response.content.read(4)
95
- if header == b"":
96
- break
97
- payload_size = int.from_bytes(header, byteorder="big", signed=False)
98
- payload = await response.content.read(payload_size)
99
- batch = pb_klass()
100
- batch.ParseFromString(payload)
101
- yield batch
@@ -1,51 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from typing import Any, Callable, Coroutine
21
-
22
- from nucliadb_protos.resources_pb2 import FieldLayout
23
-
24
- import nucliadb_models as models
25
- from nucliadb_utils.storages.storage import Storage
26
-
27
- VERSION: dict[
28
- int,
29
- Callable[
30
- [models.InputLayoutField, str, str, str, Storage],
31
- Coroutine[Any, Any, FieldLayout],
32
- ],
33
- ] = {}
34
-
35
- import nucliadb.writer.layouts.v1 # noqa isort:skip
36
-
37
-
38
- async def serialize_blocks(
39
- layout_field: models.InputLayoutField,
40
- kbid: str,
41
- uuid: str,
42
- field: str,
43
- storage: Storage,
44
- ) -> FieldLayout:
45
- if layout_field.format in VERSION:
46
- layout = await VERSION[layout_field.format](
47
- layout_field, kbid, uuid, field, storage
48
- )
49
- else:
50
- raise KeyError("Invalid version")
51
- return layout
@@ -1,59 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from nucliadb_protos.resources_pb2 import Block as PBBlock
21
- from nucliadb_protos.resources_pb2 import FieldLayout
22
-
23
- import nucliadb_models as models
24
- from nucliadb.writer.layouts import VERSION
25
- from nucliadb_utils.storages.storage import Storage
26
-
27
-
28
- async def serialize_block(
29
- layout_field: models.InputLayoutField,
30
- kbid: str,
31
- uuid: str,
32
- field: str,
33
- storage: Storage,
34
- ) -> FieldLayout:
35
- pblayout = FieldLayout()
36
- for key, block in layout_field.body.blocks.items():
37
- pbblock = PBBlock()
38
- pbblock.x = block.x
39
- pbblock.y = block.y
40
- pbblock.cols = block.cols
41
- pbblock.rows = block.rows
42
- pbblock.type = PBBlock.TypeBlock.Value(block.type)
43
- pbblock.ident = block.ident if block.ident else key
44
- pbblock.payload = block.payload
45
-
46
- sf = storage.layout_field(kbid, uuid, field, key)
47
- await storage.upload_b64file_to_cloudfile(
48
- sf,
49
- block.file.payload.encode(),
50
- block.file.filename,
51
- block.file.content_type,
52
- block.file.md5,
53
- )
54
- pblayout.body.blocks[key].CopyFrom(pbblock)
55
- pblayout.format = FieldLayout.Format.Value(layout_field.format.value)
56
- return pblayout
57
-
58
-
59
- VERSION[models.LayoutFormat.NUCLIAv1] = serialize_block
@@ -1,19 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
@@ -1,31 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- pytest_plugins = [
21
- "pytest_mock",
22
- "pytest_docker_fixtures",
23
- "nucliadb_utils.tests.nats",
24
- "nucliadb.tests.fixtures",
25
- "nucliadb.tests.tikv",
26
- "nucliadb.ingest.tests.fixtures", # should be refactored out
27
- "nucliadb.writer.tests.fixtures",
28
- "nucliadb_utils.tests.conftest",
29
- "nucliadb_utils.tests.gcs",
30
- "nucliadb_utils.tests.s3",
31
- ]
@@ -1,191 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from enum import Enum
21
- from typing import AsyncIterator, Callable, Optional
22
- from unittest import mock
23
-
24
- import pytest
25
- from httpx import AsyncClient
26
- from pytest_lazy_fixtures import lazy_fixture
27
- from redis import asyncio as aioredis
28
-
29
- from nucliadb.ingest.tests.fixtures import IngestFixture
30
- from nucliadb.writer import API_PREFIX
31
- from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX
32
- from nucliadb.writer.settings import settings
33
- from nucliadb.writer.tus import clear_storage
34
- from nucliadb_models.resource import NucliaDBRoles
35
- from nucliadb_utils.settings import (
36
- FileBackendConfig,
37
- nuclia_settings,
38
- nucliadb_settings,
39
- storage_settings,
40
- )
41
- from nucliadb_utils.tests.conftest import get_testing_storage_backend
42
- from nucliadb_utils.utilities import Utility, clean_utility, set_utility
43
-
44
-
45
- @pytest.fixture(scope="function")
46
- def disabled_back_pressure():
47
- with mock.patch(
48
- "nucliadb.writer.back_pressure.is_back_pressure_enabled", return_value=False
49
- ) as mocked:
50
- yield mocked
51
-
52
-
53
- @pytest.fixture(scope="function")
54
- async def writer_api(
55
- disabled_back_pressure,
56
- redis,
57
- storage_writer,
58
- grpc_servicer: IngestFixture,
59
- transaction_utility,
60
- processing_utility,
61
- tus_manager,
62
- ) -> AsyncIterator[Callable[[list[Enum], str, str], AsyncClient]]:
63
- nucliadb_settings.nucliadb_ingest = grpc_servicer.host
64
- from nucliadb.writer.app import create_application
65
-
66
- application = create_application()
67
-
68
- def make_client_fixture(
69
- roles: Optional[list[Enum]] = None,
70
- user: str = "",
71
- version: str = "1",
72
- ) -> AsyncClient:
73
- roles = roles or []
74
- client_base_url = "http://test"
75
- client_base_url = f"{client_base_url}/{API_PREFIX}/v{version}"
76
-
77
- client = AsyncClient(app=application, base_url=client_base_url) # type: ignore
78
- client.headers["X-NUCLIADB-ROLES"] = ";".join(
79
- map(lambda role: role.value, roles)
80
- )
81
- client.headers["X-NUCLIADB-USER"] = user
82
-
83
- return client
84
-
85
- driver = aioredis.from_url(f"redis://{redis[0]}:{redis[1]}")
86
- await driver.flushall()
87
-
88
- await application.router.startup()
89
-
90
- yield make_client_fixture
91
-
92
- await application.router.shutdown()
93
- clear_storage()
94
-
95
- await driver.flushall()
96
- await driver.close(close_connection_pool=True)
97
-
98
-
99
- @pytest.fixture(scope="function")
100
- def gcs_storage_writer(gcs):
101
- storage_settings.file_backend = FileBackendConfig.GCS
102
- storage_settings.gcs_endpoint_url = gcs
103
- storage_settings.gcs_bucket = "test_{kbid}"
104
-
105
-
106
- @pytest.fixture(scope="function")
107
- def s3_storage_writer(s3):
108
- storage_settings.file_backend = FileBackendConfig.S3
109
- storage_settings.s3_endpoint = s3
110
- storage_settings.s3_client_id = ""
111
- storage_settings.s3_client_secret = ""
112
- storage_settings.s3_bucket = "test-{kbid}"
113
-
114
-
115
- @pytest.fixture(scope="function")
116
- def pg_storage_writer(pg):
117
- storage_settings.file_backend = FileBackendConfig.PG
118
- url = f"postgresql://postgres:postgres@{pg[0]}:{pg[1]}/postgres"
119
- storage_settings.driver_pg_url = url
120
-
121
-
122
- def lazy_storage_writer_fixture():
123
- backend = get_testing_storage_backend()
124
- if backend == "gcs":
125
- return [lazy_fixture.lf("gcs_storage_writer")]
126
- elif backend == "s3":
127
- return [lazy_fixture.lf("s3_storage_writer")]
128
- elif backend == "pg":
129
- return [lazy_fixture.lf("pg_storage_writer")]
130
- else:
131
- print(f"Unknown storage backend {backend}, using gcs")
132
- return [lazy_fixture.lf("gcs_storage_writer")]
133
-
134
-
135
- @pytest.fixture(scope="function", params=lazy_storage_writer_fixture())
136
- async def storage_writer(request):
137
- """
138
- Generic storage fixture that allows us to run the same tests for different storage backends.
139
- """
140
- storage_driver = request.param
141
- set_utility(Utility.STORAGE, storage_driver)
142
-
143
- yield storage_driver
144
-
145
- clean_utility(Utility.STORAGE)
146
-
147
-
148
- @pytest.fixture(scope="function")
149
- async def knowledgebox_writer(writer_api):
150
- async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
151
- resp = await client.post(
152
- f"/{KBS_PREFIX}",
153
- json={
154
- "slug": "kbid1",
155
- "title": "My Knowledge Box",
156
- },
157
- )
158
- assert resp.status_code == 201
159
- kbid = resp.json().get("uuid")
160
- assert kbid is not None
161
- yield kbid
162
-
163
-
164
- @pytest.fixture(scope="function")
165
- async def resource(redis, writer_api, knowledgebox_writer):
166
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
167
- resp = await client.post(
168
- f"/{KB_PREFIX}/{knowledgebox_writer}/resources",
169
- json={
170
- "slug": "resource1",
171
- "title": "Resource 1",
172
- },
173
- )
174
- assert resp.status_code == 201
175
- uuid = resp.json()["uuid"]
176
-
177
- return uuid
178
-
179
-
180
- @pytest.fixture(scope="function")
181
- async def processing_utility():
182
- nuclia_settings.dummy_processing = True
183
- nuclia_settings.onprem = True
184
- nuclia_settings.nuclia_jwt_key = "foobarkey"
185
-
186
-
187
- @pytest.fixture(scope="function")
188
- async def tus_manager(redis):
189
- settings.dm_redis_host = redis[0]
190
- settings.dm_redis_port = redis[1]
191
- yield