nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,63 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from fastapi import Request
21
- from fastapi_versioning import version
22
-
23
- from nucliadb.learning_proxy import learning_collector_proxy
24
- from nucliadb.reader.api.v1.router import KB_PREFIX, api
25
- from nucliadb_models.resource import NucliaDBRoles
26
- from nucliadb_utils.authentication import requires
27
-
28
-
29
- @api.get(
30
- path=f"/{KB_PREFIX}/{{kbid}}/feedback/{{month}}",
31
- status_code=200,
32
- summary="Download feedback of a Knowledge Box",
33
- description="Download the feedback of a particular month in a Knowledge Box", # noqa
34
- response_model=None,
35
- tags=["Knowledge Boxes"],
36
- )
37
- @requires(NucliaDBRoles.READER)
38
- @version(1)
39
- async def feedback_download(
40
- request: Request,
41
- kbid: str,
42
- month: str,
43
- ):
44
- return await learning_collector_proxy(
45
- request, "GET", f"/collect/feedback/{kbid}/{month}"
46
- )
47
-
48
-
49
- @api.get(
50
- path=f"/{KB_PREFIX}/{{kbid}}/feedback",
51
- status_code=200,
52
- summary="Feedback avalaible months",
53
- description="List of months within the last year with feedback data",
54
- response_model=None,
55
- tags=["Knowledge Boxes"],
56
- )
57
- @requires(NucliaDBRoles.READER)
58
- @version(1)
59
- async def feedback_list_months(
60
- request: Request,
61
- kbid: str,
62
- ):
63
- return await learning_collector_proxy(request, "GET", f"/collect/feedback/{kbid}")
@@ -1,19 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
@@ -1,31 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- pytest_plugins = [
21
- "pytest_docker_fixtures",
22
- "nucliadb.tests.fixtures",
23
- "nucliadb.tests.tikv",
24
- "nucliadb.ingest.tests.fixtures", # should be refactored out
25
- "nucliadb.reader.tests.fixtures",
26
- "nucliadb_utils.tests.nats",
27
- "nucliadb_utils.tests.conftest",
28
- "nucliadb_utils.tests.gcs",
29
- "nucliadb_utils.tests.s3",
30
- "nucliadb_utils.tests.asyncbenchmark",
31
- ]
@@ -1,136 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import uuid
21
- from datetime import datetime
22
- from enum import Enum
23
- from typing import Optional
24
-
25
- import pytest
26
- from httpx import AsyncClient
27
- from nucliadb_protos.writer_pb2 import BrokerMessage
28
-
29
- from nucliadb.ingest.orm.resource import KB_RESOURCE_SLUG_BASE
30
- from nucliadb.reader import API_PREFIX
31
- from nucliadb_utils.utilities import clear_global_cache
32
-
33
-
34
- @pytest.fixture(scope="function")
35
- def test_settings_reader(cache, gcs, fake_node, maindb_driver): # type: ignore
36
- from nucliadb_utils.settings import (
37
- FileBackendConfig,
38
- running_settings,
39
- storage_settings,
40
- )
41
-
42
- running_settings.debug = False
43
- print(f"Driver ready at {maindb_driver.url}")
44
-
45
- storage_settings.gcs_endpoint_url = gcs
46
- storage_settings.file_backend = FileBackendConfig.GCS
47
- storage_settings.gcs_bucket = "test"
48
-
49
- yield
50
-
51
-
52
- @pytest.fixture(scope="function")
53
- async def reader_api(test_settings_reader: None, local_files): # type: ignore
54
- from nucliadb.reader.app import create_application
55
-
56
- application = create_application()
57
-
58
- def make_client_fixture(
59
- roles: Optional[list[Enum]] = None,
60
- user: str = "",
61
- version: str = "1",
62
- ) -> AsyncClient:
63
- roles = roles or []
64
- client_base_url = "http://test"
65
- client_base_url = f"{client_base_url}/{API_PREFIX}/v{version}"
66
-
67
- client = AsyncClient(app=application, base_url=client_base_url) # type: ignore
68
- client.headers["X-NUCLIADB-ROLES"] = ";".join([role.value for role in roles])
69
- client.headers["X-NUCLIADB-USER"] = user
70
-
71
- return client
72
-
73
- await application.router.startup()
74
- yield make_client_fixture
75
- await application.router.shutdown()
76
- clear_global_cache()
77
-
78
-
79
- def broker_simple_resource(knowledgebox: str, number: int) -> BrokerMessage:
80
- rid = str(uuid.uuid4())
81
- message1: BrokerMessage = BrokerMessage(
82
- kbid=knowledgebox,
83
- uuid=rid,
84
- slug=str(number),
85
- type=BrokerMessage.AUTOCOMMIT,
86
- )
87
-
88
- message1.basic.icon = "text/plain"
89
- message1.basic.title = str(number)
90
- message1.basic.summary = "Summary of document"
91
- message1.basic.thumbnail = "doc"
92
- message1.basic.layout = "default"
93
- message1.basic.metadata.useful = True
94
- message1.basic.metadata.language = "es"
95
- message1.basic.created.FromDatetime(datetime.utcnow())
96
- message1.basic.modified.FromDatetime(datetime.utcnow())
97
- message1.source = BrokerMessage.MessageSource.WRITER
98
-
99
- return message1
100
-
101
-
102
- @pytest.fixture(scope="function")
103
- async def test_resources(processor, knowledgebox_ingest, test_settings_reader):
104
- """
105
- Create a set of resources with only basic information to test pagination
106
- """
107
- resources = []
108
- amount = 10
109
- for i in range(1, 10 + 1):
110
- message = broker_simple_resource(knowledgebox_ingest, i)
111
- await processor.process(message=message, seqid=i)
112
- resources.append(message.uuid)
113
- # Give processed data some time to reach the node
114
-
115
- from time import time
116
-
117
- from nucliadb.common.maindb.utils import get_driver
118
-
119
- driver = get_driver()
120
-
121
- t0 = time()
122
-
123
- while time() - t0 < 30: # wait max 30 seconds for it
124
- txn = await driver.begin()
125
- count = 0
126
- async for key in txn.keys(
127
- match=KB_RESOURCE_SLUG_BASE.format(kbid=knowledgebox_ingest), count=-1
128
- ):
129
- count += 1
130
-
131
- await txn.abort()
132
- if count == amount:
133
- break
134
- print(f"got {count}, retrying")
135
-
136
- yield knowledgebox_ingest, resources
@@ -1,75 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from typing import Callable, Optional
21
-
22
- import pytest
23
- from httpx import AsyncClient
24
-
25
- from nucliadb.reader.api import DEFAULT_RESOURCE_LIST_PAGE_SIZE
26
- from nucliadb.reader.api.v1.router import KB_PREFIX
27
- from nucliadb_models.resource import NucliaDBRoles
28
-
29
- # All this scenarios are meant to match a total of 10 resources
30
- # coming from test_pagination_resources. Tests uses redis so order
31
- # is not guaranteed
32
- PAGINATION_TEST_SCENARIOS = [
33
- (None, None, 10, True), # Get first (also last) page with default values
34
- (0, 20, 10, True), # Get first (also last)page explicitly
35
- (1, 20, 0, True), # Get invalid page
36
- (0, 5, 5, False), # Get first non-last page
37
- (1, 5, 5, True), # Get last full page
38
- (1, 6, 4, True), # Get last partial page
39
- ]
40
-
41
-
42
- @pytest.mark.asyncio
43
- @pytest.mark.parametrize(
44
- "page, size, expected_resources_count, expected_is_last_page",
45
- PAGINATION_TEST_SCENARIOS,
46
- )
47
- async def test_list_resources(
48
- reader_api: Callable[..., AsyncClient],
49
- test_resources: tuple[str, list[str]],
50
- page: Optional[int],
51
- size: Optional[int],
52
- expected_resources_count: int,
53
- expected_is_last_page: bool,
54
- ) -> None:
55
- kbid = test_resources[0]
56
-
57
- query_params = {}
58
- if page is not None:
59
- query_params["page"] = page
60
-
61
- if size is not None:
62
- query_params["size"] = size
63
-
64
- async with reader_api(roles=[NucliaDBRoles.READER]) as client:
65
- resp = await client.get(f"/{KB_PREFIX}/{kbid}/resources", params=query_params)
66
- assert resp.status_code == 200
67
- resources = resp.json()["resources"]
68
- pagination = resp.json()["pagination"]
69
-
70
- assert len(resources) == expected_resources_count
71
- assert pagination["last"] == expected_is_last_page
72
- assert pagination["page"] == query_params.get("page", 0)
73
- assert pagination["size"] == query_params.get(
74
- "size", DEFAULT_RESOURCE_LIST_PAGE_SIZE
75
- )
@@ -1,273 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import os
21
- from typing import Callable
22
-
23
- import pytest
24
- from httpx import AsyncClient
25
- from nucliadb_protos.resources_pb2 import FieldType
26
-
27
- import nucliadb.ingest.tests.fixtures
28
- from nucliadb.ingest.orm.resource import Resource
29
- from nucliadb.ingest.tests.fixtures import TEST_CLOUDFILE, THUMBNAIL
30
- from nucliadb.reader.api.v1.download import parse_media_range, safe_http_header_encode
31
- from nucliadb.reader.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX
32
- from nucliadb_models.resource import NucliaDBRoles
33
-
34
- BASE = ("field_id", "field_type")
35
- VALUE = ("value",)
36
- EXTRACTED = ("extracted",)
37
-
38
-
39
- @pytest.mark.asyncio
40
- async def test_resource_download_extracted_file(
41
- reader_api: Callable[..., AsyncClient], test_resource: Resource
42
- ) -> None:
43
- rsc = test_resource
44
- kbid = rsc.kb.kbid
45
- rid = rsc.uuid
46
- field_type = "text"
47
- field_id = "text1"
48
- download_type = "extracted"
49
- download_field = "thumbnail"
50
-
51
- async with reader_api(roles=[NucliaDBRoles.READER]) as client:
52
- resp = await client.get(
53
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/{field_type}/{field_id}/download/{download_type}/{download_field}", # noqa
54
- )
55
- assert resp.status_code == 200
56
- filename = f"{os.path.dirname(nucliadb.ingest.tests.fixtures.__file__)}{THUMBNAIL.bucket_name}/{THUMBNAIL.uri}"
57
-
58
- open(filename, "rb").read() == resp.content
59
-
60
-
61
- @pytest.mark.asyncio
62
- async def test_resource_download_field_file(
63
- reader_api: Callable[..., AsyncClient], test_resource: Resource
64
- ) -> None:
65
- rsc = test_resource
66
- kbid = rsc.kb.kbid
67
- rid = rsc.uuid
68
- field_id = "file1"
69
-
70
- async with reader_api(roles=[NucliaDBRoles.READER]) as client:
71
- resp = await client.get(
72
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}?show=values",
73
- )
74
- assert (
75
- resp.json()["data"]["files"]["file1"]["value"]["file"]["filename"]
76
- == "text.pb"
77
- )
78
-
79
- # Check that invalid range is handled
80
- resp = await client.get(
81
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/download/field",
82
- headers={"range": "bytes=invalid-range"},
83
- )
84
- assert resp.status_code == 416
85
- assert resp.json()["detail"]["reason"] == "rangeNotParsable"
86
-
87
- # Check that multipart ranges not implemented is handled
88
- resp = await client.get(
89
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/download/field",
90
- headers={"range": "bytes=0-50, 100-150"},
91
- )
92
- assert resp.status_code == 416
93
- assert resp.json()["detail"]["reason"] == "rangeNotSupported"
94
-
95
- resp = await client.get(
96
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/download/field",
97
- headers={"range": "bytes=0-"},
98
- )
99
- assert resp.status_code == 206
100
- assert resp.headers["Content-Disposition"]
101
-
102
- filename = f"{os.path.dirname(nucliadb.ingest.tests.fixtures.__file__)}/{TEST_CLOUDFILE.bucket_name}/{TEST_CLOUDFILE.uri}" # noqa
103
-
104
- open(filename, "rb").read() == resp.content
105
-
106
- resp = await client.get(
107
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}?show=values",
108
- )
109
- assert resp.status_code == 200
110
-
111
- assert (
112
- resp.json()["data"]["texts"]["text1"]["value"]["md5"]
113
- == "74a3187271f1d526b1f6271bfb7df52e"
114
- )
115
- assert (
116
- resp.json()["data"]["files"]["file1"]["value"]["file"]["md5"]
117
- == "01cca3f53edb934a445a3112c6caa652"
118
- )
119
-
120
-
121
- @pytest.mark.asyncio
122
- async def test_resource_download_field_layout(
123
- reader_api: Callable[..., AsyncClient], test_resource: Resource
124
- ) -> None:
125
- rsc = test_resource
126
- kbid = rsc.kb.kbid
127
- rid = rsc.uuid
128
- field_id = "layout1"
129
- download_field = "field1"
130
-
131
- async with reader_api(roles=[NucliaDBRoles.READER]) as client:
132
- resp = await client.get(
133
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/layout/{field_id}/download/field/{download_field}",
134
- )
135
- assert resp.status_code == 200
136
- filename = f"{os.path.dirname(nucliadb.ingest.tests.fixtures.__file__)}/{TEST_CLOUDFILE.bucket_name}/{TEST_CLOUDFILE.uri}" # noqa
137
-
138
- open(filename, "rb").read() == resp.content
139
-
140
-
141
- @pytest.mark.asyncio
142
- async def test_resource_download_field_conversation(
143
- reader_api: Callable[..., AsyncClient], test_resource: Resource
144
- ) -> None:
145
- rsc = test_resource
146
- kbid = rsc.kb.kbid
147
- rid = rsc.uuid
148
- field_id = "conv1"
149
-
150
- msg_id, file_id = await _get_message_with_file(test_resource)
151
-
152
- async with reader_api(roles=[NucliaDBRoles.READER]) as client:
153
- resp = await client.get(
154
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/conversation/{field_id}/download/field/{msg_id}/{file_id}",
155
- )
156
- assert resp.status_code == 200
157
- filename = f"{os.path.dirname(nucliadb.ingest.tests.fixtures.__file__)}/{THUMBNAIL.bucket_name}/{THUMBNAIL.uri}" # noqa
158
- assert open(filename, "rb").read() == resp.content
159
-
160
-
161
- @pytest.mark.parametrize(
162
- "endpoint_part,endpoint_params",
163
- [
164
- [
165
- "{field_type}/{field_id}/download/extracted/{download_field}",
166
- {"field_type": "text", "field_id": "text1", "download_field": "thumbnail"},
167
- ], # noqa
168
- ["file/{field_id}/download/field", {"field_id": "file1"}],
169
- [
170
- "layout/{field_id}/download/field/{download_field}",
171
- {"field_id": "layout1", "download_field": "field1"},
172
- ],
173
- [
174
- "conversation/{field_id}/download/field/{message_id}/{file_num}",
175
- {"field_id": "conv1"},
176
- ],
177
- ],
178
- )
179
- @pytest.mark.asyncio
180
- async def test_download_fields_by_resource_slug(
181
- reader_api, test_resource, endpoint_part, endpoint_params
182
- ):
183
- rsc = test_resource
184
- kbid = rsc.kb.kbid
185
- slug = rsc.basic.slug
186
- if endpoint_part.startswith("conversation"):
187
- # For conversations, we need to get a message id and a file number
188
- msg_id, file_num = await _get_message_with_file(test_resource)
189
- endpoint_params["message_id"] = msg_id
190
- endpoint_params["file_num"] = file_num
191
-
192
- async with reader_api(roles=[NucliaDBRoles.READER]) as client:
193
- resource_path = f"/{KB_PREFIX}/{kbid}/{RSLUG_PREFIX}/{slug}"
194
- endpoint = endpoint_part.format(**endpoint_params)
195
- resp = await client.get(
196
- f"{resource_path}/{endpoint}",
197
- )
198
- assert resp.status_code == 200
199
-
200
- # Check that 404 is returned when a slug does not exist
201
- unexisting_resource_path = f"/{KB_PREFIX}/{kbid}/{RSLUG_PREFIX}/idonotexist"
202
- resp = await client.get(
203
- f"{unexisting_resource_path}/{endpoint}",
204
- )
205
- assert resp.status_code == 404
206
- assert resp.json()["detail"] == "Resource does not exist"
207
-
208
-
209
- async def _get_message_with_file(test_resource):
210
- conversation_field = await test_resource.get_field("conv1", FieldType.CONVERSATION)
211
- conversations = await conversation_field.get_value(page=1)
212
- message_with_files = conversations.messages[33]
213
- msg_id, file_num = message_with_files.content.attachments[1].uri.split("/")[-2:]
214
- return msg_id, file_num
215
-
216
-
217
- @pytest.mark.parametrize(
218
- "range_request,filesize,start,end,range_size,exception",
219
- [
220
- # No end range specified
221
- ("bytes=0-", 10, 0, 9, 10, None),
222
- # End range == file size
223
- (f"bytes=0-10", 10, 0, 9, 10, None),
224
- # End range < file size
225
- (f"bytes=0-5", 10, 0, 5, 6, None),
226
- # End range > file size
227
- (f"bytes=0-11", 10, 0, 9, 10, None),
228
- # Starting at a middle point until the end
229
- (f"bytes=2-", 10, 2, 9, 8, None),
230
- # A slice of bytes in the middle of the file
231
- (f"bytes=2-8", 10, 2, 8, 7, None),
232
- # Invalid range
233
- ("bytes=something", 10, None, None, None, ValueError),
234
- # Multi-part ranges not supported yet
235
- ("bytes=0-50, 100-150", 10, None, None, None, NotImplementedError),
236
- ],
237
- )
238
- def test_parse_media_range(range_request, filesize, start, end, range_size, exception):
239
- if not exception:
240
- result = parse_media_range(range_request, filesize)
241
- assert result == (start, end, range_size)
242
- else:
243
- with pytest.raises(exception):
244
- parse_media_range(range_request, filesize)
245
-
246
-
247
- @pytest.mark.asyncio
248
- async def test_resource_download_field_file_content_disposition(
249
- reader_api: Callable[..., AsyncClient], test_resource: Resource
250
- ) -> None:
251
- rsc = test_resource
252
- kbid = rsc.kb.kbid
253
- rid = rsc.uuid
254
- field_id = "file1"
255
- download_url = (
256
- f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/download/field"
257
- )
258
- async with reader_api(roles=[NucliaDBRoles.READER]) as client:
259
- # Defaults to attachment
260
- resp = await client.get(download_url)
261
- assert resp.status_code == 200
262
- assert resp.headers["Content-Disposition"].startswith("attachment; filename=")
263
-
264
- resp = await client.get(f"{download_url}?inline=true")
265
- assert resp.status_code == 200
266
- assert resp.headers["Content-Disposition"] == "inline"
267
-
268
-
269
- @pytest.mark.parametrize("text", ["ÇŞĞIİÖÜ"])
270
- def test_safe_http_header_encode(text):
271
- safe_text = safe_http_header_encode(text)
272
- # This is how startette encodes the headers
273
- safe_text.lower().encode("latin-1")