nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,743 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import asyncio
21
- import base64
22
- import io
23
- import os
24
- from typing import Callable
25
-
26
- import pytest
27
- from httpx import AsyncClient
28
- from nucliadb_protos.resources_pb2 import FieldType
29
- from nucliadb_protos.writer_pb2 import BrokerMessage, ResourceFieldId
30
-
31
- from nucliadb.writer.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX
32
- from nucliadb.writer.api.v1.upload import maybe_b64decode
33
- from nucliadb.writer.tus import TUSUPLOAD, UPLOAD, get_storage_manager
34
- from nucliadb_models.resource import NucliaDBRoles
35
- from nucliadb_utils import const
36
- from nucliadb_utils.utilities import get_ingest, get_storage, get_transaction_utility
37
-
38
- ASSETS_PATH = os.path.dirname(__file__) + "/assets"
39
-
40
-
41
- @pytest.mark.asyncio
42
- async def test_knowledgebox_file_tus_options(
43
- writer_api: Callable[[list[NucliaDBRoles]], AsyncClient], knowledgebox_writer: str
44
- ):
45
- client: AsyncClient
46
- async with writer_api([NucliaDBRoles.WRITER]) as client:
47
- resp = await client.options(
48
- f"/{KB_PREFIX}/{knowledgebox_writer}/resource/xxx/file/xxx/{TUSUPLOAD}/xxx"
49
- )
50
- assert resp.status_code == 204
51
- assert resp.headers["tus-resumable"] == "1.0.0"
52
- assert resp.headers["tus-version"] == "1.0.0"
53
- assert resp.headers["tus-extension"] == "creation-defer-length"
54
-
55
- resp = await client.options(
56
- f"/{KB_PREFIX}/{knowledgebox_writer}/resource/xxx/file/xxx/{TUSUPLOAD}"
57
- )
58
- assert resp.status_code == 204
59
- assert resp.headers["tus-resumable"] == "1.0.0"
60
- assert resp.headers["tus-version"] == "1.0.0"
61
- assert resp.headers["tus-extension"] == "creation-defer-length"
62
-
63
- resp = await client.options(f"/{KB_PREFIX}/{knowledgebox_writer}/{TUSUPLOAD}")
64
- assert resp.status_code == 204
65
- assert resp.headers["tus-resumable"] == "1.0.0"
66
- assert resp.headers["tus-version"] == "1.0.0"
67
- assert resp.headers["tus-extension"] == "creation-defer-length"
68
-
69
- resp = await client.options(
70
- f"/{KB_PREFIX}/{knowledgebox_writer}/{TUSUPLOAD}/xxx"
71
- )
72
- assert resp.status_code == 204
73
- assert resp.headers["tus-resumable"] == "1.0.0"
74
- assert resp.headers["tus-version"] == "1.0.0"
75
- assert resp.headers["tus-extension"] == "creation-defer-length"
76
-
77
-
78
- @pytest.mark.asyncio
79
- async def test_knowledgebox_file_tus_upload_root(writer_api, knowledgebox_writer):
80
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
81
- language = base64.b64encode(b"ca").decode()
82
- filename = base64.b64encode(b"image.jpg").decode()
83
- md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode()
84
- resp = await client.post(
85
- f"/{KB_PREFIX}/{knowledgebox_writer}/{TUSUPLOAD}",
86
- headers={
87
- "tus-resumable": "1.0.0",
88
- "upload-metadata": f"filename {filename},language {language},md5 {md5}",
89
- "content-type": "image/jpg",
90
- "upload-defer-length": "1",
91
- },
92
- )
93
- assert resp.status_code == 201
94
- url = resp.headers["location"]
95
-
96
- offset = 0
97
-
98
- # We upload a file that spans across more than one chunk
99
- min_chunk_size = get_storage_manager().min_upload_size
100
- raw_bytes = b"x" * min_chunk_size + b"y" * 500
101
- io_bytes = io.BytesIO(raw_bytes)
102
- data = io_bytes.read(min_chunk_size)
103
- while data != b"":
104
- resp = await client.head(url)
105
- assert resp.headers["Upload-Length"] == f"0"
106
- assert resp.headers["Upload-Offset"] == f"{offset}"
107
-
108
- headers = {
109
- "upload-offset": f"{offset}",
110
- "content-length": f"{len(data)}",
111
- }
112
- is_last_chunk = len(data) < min_chunk_size
113
- if is_last_chunk:
114
- headers["upload-length"] = f"{offset + len(data)}"
115
-
116
- resp = await client.patch(
117
- url,
118
- content=data,
119
- headers=headers,
120
- )
121
- offset += len(data)
122
- data = io_bytes.read(min_chunk_size)
123
-
124
- assert resp.headers["Tus-Upload-Finished"] == "1"
125
-
126
- transaction = get_transaction_utility()
127
-
128
- sub = await transaction.js.pull_subscribe(
129
- const.Streams.INGEST.subject.format(partition="1"), "auto"
130
- )
131
- msgs = await sub.fetch(1)
132
-
133
- writer = BrokerMessage()
134
- writer.ParseFromString(msgs[0].data)
135
- await msgs[0].ack()
136
-
137
- path = resp.headers["ndb-field"]
138
- field = path.split("/")[-1]
139
- rid = path.split("/")[-3]
140
- assert writer.uuid == rid
141
- assert writer.basic.icon == "image/jpg"
142
- assert writer.basic.title == "image.jpg"
143
- assert writer.files[field].language == "ca"
144
- assert writer.files[field].file.size == len(raw_bytes)
145
- assert writer.files[field].file.filename == "image.jpg"
146
- assert writer.files[field].file.md5 == "7af0916dba8b70e29d99e72941923529"
147
-
148
- storage = await get_storage()
149
- data = await storage.downloadbytes(
150
- bucket=writer.files[field].file.bucket_name,
151
- key=writer.files[field].file.uri,
152
- )
153
- assert len(data.read()) == len(raw_bytes)
154
- await asyncio.sleep(1)
155
-
156
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
157
- resp = await client.post(
158
- f"/{KB_PREFIX}/{knowledgebox_writer}/{TUSUPLOAD}",
159
- headers={
160
- "tus-resumable": "1.0.0",
161
- "upload-metadata": f"filename {filename},language {language},md5 {md5}",
162
- "content-type": "image/jpg",
163
- "upload-defer-length": "1",
164
- },
165
- )
166
- assert resp.status_code == 409
167
-
168
-
169
- @pytest.mark.asyncio
170
- async def test_knowledgebox_file_upload_root(
171
- writer_api: Callable[[list[NucliaDBRoles]], AsyncClient],
172
- knowledgebox_writer: str,
173
- ):
174
- async with writer_api([NucliaDBRoles.WRITER]) as client:
175
- with open(f"{ASSETS_PATH}/image001.jpg", "rb") as f:
176
- resp = await client.post(
177
- f"/{KB_PREFIX}/{knowledgebox_writer}/{UPLOAD}",
178
- content=f.read(),
179
- headers={
180
- "content-type": "image/jpg",
181
- "X-MD5": "7af0916dba8b70e29d99e72941923529",
182
- },
183
- )
184
- assert resp.status_code == 201
185
-
186
- transaction = get_transaction_utility()
187
-
188
- assert transaction.js is not None
189
- sub = await transaction.js.pull_subscribe(
190
- const.Streams.INGEST.subject.format(partition="1"), "auto"
191
- )
192
- msgs = await sub.fetch(1)
193
- writer = BrokerMessage()
194
- writer.ParseFromString(msgs[0].data)
195
- await msgs[0].ack()
196
-
197
- body = resp.json()
198
- field = body["field_id"]
199
- rid = body["uuid"]
200
- assert writer.uuid == rid
201
- assert writer.basic.icon == "image/jpg"
202
- assert writer.files[field].file.size == 30472
203
-
204
- storage = await get_storage()
205
- data = await storage.downloadbytes(
206
- bucket=writer.files[field].file.bucket_name,
207
- key=writer.files[field].file.uri,
208
- )
209
- assert len(data.read()) == 30472
210
- await asyncio.sleep(1)
211
-
212
- async with writer_api([NucliaDBRoles.WRITER]) as client:
213
- with open(f"{ASSETS_PATH}/image001.jpg", "rb") as f:
214
- resp = await client.post(
215
- f"/{KB_PREFIX}/{knowledgebox_writer}/{UPLOAD}",
216
- content=f.read(),
217
- headers={
218
- "content-type": "image/jpg",
219
- "X-MD5": "7af0916dba8b70e29d99e72941923529",
220
- },
221
- )
222
- assert resp.status_code == 409
223
-
224
-
225
- @pytest.mark.asyncio
226
- async def test_knowledgebox_file_upload_root_headers(
227
- writer_api: Callable[[list[NucliaDBRoles]], AsyncClient],
228
- knowledgebox_writer: str,
229
- ):
230
- async with writer_api([NucliaDBRoles.WRITER]) as client:
231
- filename = base64.b64encode(b"image.jpg").decode()
232
- with open(f"{ASSETS_PATH}/image001.jpg", "rb") as f:
233
- resp = await client.post(
234
- f"/{KB_PREFIX}/{knowledgebox_writer}/{UPLOAD}",
235
- content=f.read(),
236
- headers={
237
- "X-FILENAME": filename,
238
- "X-LANGUAGE": "ca",
239
- "X-MD5": "7af0916dba8b70e29d99e72941923529",
240
- "content-type": "image/jpg",
241
- },
242
- )
243
- assert resp.status_code == 201
244
-
245
- transaction = get_transaction_utility()
246
-
247
- assert transaction.js is not None
248
- sub = await transaction.js.pull_subscribe(
249
- const.Streams.INGEST.subject.format(partition="1"), "auto"
250
- )
251
- msgs = await sub.fetch(1)
252
- writer = BrokerMessage()
253
- writer.ParseFromString(msgs[0].data)
254
- await msgs[0].ack()
255
-
256
- body = resp.json()
257
- field = body["field_id"]
258
- rid = body["uuid"]
259
- assert writer.uuid == rid
260
- assert writer.basic.icon == "image/jpg"
261
- assert writer.basic.title == "image.jpg"
262
- assert writer.files[field].language == "ca"
263
- assert writer.files[field].file.size == 30472
264
-
265
- storage = await get_storage()
266
- data = await storage.downloadbytes(
267
- bucket=writer.files[field].file.bucket_name,
268
- key=writer.files[field].file.uri,
269
- )
270
- assert len(data.read()) == 30472
271
-
272
-
273
- @pytest.mark.asyncio
274
- async def test_knowledgebox_file_tus_upload_field(
275
- writer_api, knowledgebox_writer, resource
276
- ):
277
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
278
- language = base64.b64encode(b"ca").decode()
279
- filename = base64.b64encode(b"image.jpg").decode()
280
- md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode()
281
-
282
- resp = await client.post(
283
- f"/{KB_PREFIX}/{knowledgebox_writer}/resource/invalidresource/file/field1/{TUSUPLOAD}",
284
- headers={
285
- "tus-resumable": "1.0.0",
286
- "upload-metadata": f"filename {filename},language {language},md5 {md5}",
287
- "content-type": "image/jpg",
288
- "upload-defer-length": "1",
289
- },
290
- )
291
- assert resp.status_code == 404
292
- await asyncio.sleep(1)
293
-
294
- resp = await client.post(
295
- f"/{KB_PREFIX}/{knowledgebox_writer}/resource/{resource}/file/field1/{TUSUPLOAD}",
296
- headers={
297
- "tus-resumable": "1.0.0",
298
- "upload-metadata": f"filename {filename},language {language},md5 {md5}",
299
- "content-type": "image/jpg",
300
- "upload-defer-length": "1",
301
- },
302
- )
303
- assert resp.status_code == 201
304
- url = resp.headers["location"]
305
-
306
- offset = 0
307
- # We upload a file that spans across more than one chunk
308
- min_chunk_size = get_storage_manager().min_upload_size
309
- raw_bytes = b"x" * min_chunk_size + b"y" * 500
310
- io_bytes = io.BytesIO(raw_bytes)
311
- data = io_bytes.read(min_chunk_size)
312
- while data != b"":
313
- resp = await client.head(url)
314
-
315
- assert resp.headers["Upload-Length"] == f"0"
316
- assert resp.headers["Upload-Offset"] == f"{offset}"
317
-
318
- headers = {
319
- "upload-offset": f"{offset}",
320
- "content-length": f"{len(data)}",
321
- }
322
- is_last_chunk = len(data) < min_chunk_size
323
- if is_last_chunk:
324
- headers["upload-length"] = f"{offset + len(data)}"
325
-
326
- resp = await client.patch(
327
- url,
328
- content=data,
329
- headers=headers,
330
- )
331
- assert resp.status_code == 200
332
- offset += len(data)
333
- data = io_bytes.read(min_chunk_size)
334
-
335
- assert resp.headers["Tus-Upload-Finished"] == "1"
336
-
337
- transaction = get_transaction_utility()
338
-
339
- sub = await transaction.js.pull_subscribe(
340
- const.Streams.INGEST.subject.format(partition="1"), "auto"
341
- )
342
- msgs = await sub.fetch(2)
343
-
344
- writer = BrokerMessage()
345
- writer.ParseFromString(msgs[1].data)
346
- await msgs[1].ack()
347
-
348
- path = resp.headers["ndb-field"]
349
- field = path.split("/")[-1]
350
- rid = path.split("/")[-3]
351
- assert writer.uuid == rid
352
- assert writer.basic.icon == "image/jpg"
353
- assert writer.basic.title == ""
354
- assert writer.files[field].language == "ca"
355
- assert writer.files[field].file.size == len(raw_bytes)
356
- assert writer.files[field].file.filename == "image.jpg"
357
- assert writer.files[field].file.md5 == "7af0916dba8b70e29d99e72941923529"
358
-
359
- storage = await get_storage()
360
- data = await storage.downloadbytes(
361
- bucket=writer.files[field].file.bucket_name,
362
- key=writer.files[field].file.uri,
363
- )
364
- assert len(data.read()) == len(raw_bytes)
365
-
366
-
367
- @pytest.mark.asyncio
368
- async def test_knowledgebox_file_upload_field_headers(
369
- writer_api, knowledgebox_writer, resource
370
- ):
371
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
372
- filename = "image.jpg"
373
- encoded_filename = base64.b64encode(filename.encode()).decode()
374
- with open(f"{ASSETS_PATH}/image001.jpg", "rb") as f:
375
- resp = await client.post(
376
- f"/{KB_PREFIX}/{knowledgebox_writer}/resource/{resource}/file/field1/{UPLOAD}",
377
- content=f.read(),
378
- headers={
379
- "X-FILENAME": encoded_filename,
380
- "X-LANGUAGE": "ca",
381
- "X-MD5": "7af0916dba8b70e29d99e72941923529",
382
- "content-type": "image/jpg",
383
- },
384
- )
385
- assert resp.status_code == 201
386
-
387
- transaction = get_transaction_utility()
388
-
389
- sub = await transaction.js.pull_subscribe(
390
- const.Streams.INGEST.subject.format(partition="1"), "auto"
391
- )
392
- msgs = await sub.fetch(2)
393
- writer = BrokerMessage()
394
- writer.ParseFromString(msgs[1].data)
395
- await msgs[1].ack()
396
-
397
- body = resp.json()
398
- field = body["field_id"]
399
- rid = body["uuid"]
400
- assert writer.uuid == rid
401
- assert writer.basic.icon == "image/jpg"
402
- assert writer.basic.title == ""
403
- assert writer.files[field].language == "ca"
404
- assert writer.files[field].file.size == 30472
405
- assert writer.files[field].file.filename == filename
406
-
407
- storage = await get_storage()
408
- data = await storage.downloadbytes(
409
- bucket=writer.files[field].file.bucket_name,
410
- key=writer.files[field].file.uri,
411
- )
412
- assert len(data.read()) == 30472
413
-
414
-
415
- @pytest.mark.asyncio
416
- async def test_knowledgebox_file_upload_field_sync(
417
- writer_api, knowledgebox_writer, resource
418
- ):
419
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
420
- filename = "image.jpg"
421
- with open(f"{ASSETS_PATH}/image001.jpg", "rb") as f:
422
- resp = await client.post(
423
- f"/{KB_PREFIX}/{knowledgebox_writer}/resource/{resource}/file/field1/{UPLOAD}",
424
- content=f.read(),
425
- headers={
426
- "X-FILENAME": filename,
427
- "X-LANGUAGE": "ca",
428
- "X-MD5": "7af0916dba8b70e29d99e72941923529",
429
- "content-type": "image/jpg",
430
- "X-SYNCHRONOUS": "True",
431
- },
432
- )
433
- assert resp.status_code == 201
434
-
435
- ingest = get_ingest()
436
- pbrequest = ResourceFieldId()
437
- pbrequest.kbid = knowledgebox_writer
438
- pbrequest.rid = resource
439
- pbrequest.field_type = FieldType.FILE
440
- pbrequest.field = "field1"
441
-
442
- res = await ingest.ResourceFieldExists(pbrequest)
443
- assert res.found
444
-
445
-
446
- @pytest.mark.asyncio
447
- async def test_file_tus_upload_field_by_slug(writer_api, knowledgebox_writer, resource):
448
- kb = knowledgebox_writer
449
- rslug = "resource1"
450
-
451
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
452
- language = base64.b64encode(b"ca").decode()
453
- filename = base64.b64encode(b"image.jpg").decode()
454
- md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode()
455
- headers = {
456
- "tus-resumable": "1.0.0",
457
- "upload-metadata": f"filename {filename},language {language},md5 {md5}",
458
- "content-type": "image/jpg",
459
- "upload-defer-length": "1",
460
- }
461
-
462
- resp = await client.post(
463
- f"/{KB_PREFIX}/{kb}/slug/idonotexist/file/field1/{TUSUPLOAD}",
464
- headers=headers,
465
- )
466
- assert resp.status_code == 404
467
-
468
- resp = await client.post(
469
- f"/{KB_PREFIX}/{kb}/slug/{rslug}/file/field1/{TUSUPLOAD}",
470
- headers=headers,
471
- )
472
- assert resp.status_code == 201
473
- url = resp.headers["location"]
474
-
475
- # Check that we are using the slug for the whole file upload
476
- assert f"{RSLUG_PREFIX}/{rslug}" in url
477
-
478
- offset = 0
479
- min_chunk_size = get_storage_manager().min_upload_size
480
- raw_bytes = b"x" * min_chunk_size + b"y" * 500
481
- io_bytes = io.BytesIO(raw_bytes)
482
- data = io_bytes.read(min_chunk_size)
483
- while data != b"":
484
- resp = await client.head(url)
485
-
486
- assert resp.headers["Upload-Length"] == f"0"
487
- assert resp.headers["Upload-Offset"] == f"{offset}"
488
-
489
- headers = {
490
- "upload-offset": f"{offset}",
491
- "content-length": f"{len(data)}",
492
- }
493
- is_last_chunk = len(data) < min_chunk_size
494
- if is_last_chunk:
495
- headers["upload-length"] = f"{offset + len(data)}"
496
-
497
- resp = await client.patch(
498
- url,
499
- content=data,
500
- headers=headers,
501
- )
502
- assert resp.status_code == 200
503
- offset += len(data)
504
- data = io_bytes.read(min_chunk_size)
505
-
506
- assert resp.headers["Tus-Upload-Finished"] == "1"
507
-
508
- transaction = get_transaction_utility()
509
-
510
- sub = await transaction.js.pull_subscribe(
511
- const.Streams.INGEST.subject.format(partition="1"), "auto"
512
- )
513
- msgs = await sub.fetch(2)
514
-
515
- writer = BrokerMessage()
516
- writer.ParseFromString(msgs[1].data)
517
- await msgs[1].ack()
518
-
519
- path = resp.headers["ndb-field"]
520
- field = path.split("/")[-1]
521
- rid = path.split("/")[-3]
522
- assert writer.uuid == rid
523
- assert writer.basic.icon == "image/jpg"
524
- assert writer.basic.title == ""
525
- assert writer.files[field].language == "ca"
526
- assert writer.files[field].file.size == len(raw_bytes)
527
- assert writer.files[field].file.filename == "image.jpg"
528
- assert writer.files[field].file.md5 == "7af0916dba8b70e29d99e72941923529"
529
-
530
- storage = await get_storage()
531
- data = await storage.downloadbytes(
532
- bucket=writer.files[field].file.bucket_name,
533
- key=writer.files[field].file.uri,
534
- )
535
- assert len(data.read()) == len(raw_bytes)
536
-
537
-
538
- @pytest.mark.asyncio
539
- async def test_file_tus_upload_urls_field_by_resource_id(
540
- writer_api, knowledgebox_writer, resource
541
- ):
542
- kb = knowledgebox_writer
543
-
544
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
545
- language = base64.b64encode(b"ca").decode()
546
- filename = base64.b64encode(b"image.jpg").decode()
547
- md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode()
548
- headers = {
549
- "tus-resumable": "1.0.0",
550
- "upload-metadata": f"filename {filename},language {language},md5 {md5}",
551
- "content-type": "image/jpg",
552
- "upload-defer-length": "1",
553
- }
554
-
555
- resp = await client.post(
556
- f"/{KB_PREFIX}/{kb}/resource/idonotexist/file/field1/{TUSUPLOAD}",
557
- headers=headers,
558
- )
559
- assert resp.status_code == 404
560
-
561
- resp = await client.post(
562
- f"/{KB_PREFIX}/{kb}/resource/{resource}/file/field1/{TUSUPLOAD}",
563
- headers=headers,
564
- )
565
- assert resp.status_code == 201
566
- url = resp.headers["location"]
567
-
568
- # Check that we are using the resource for the whole file upload
569
- assert f"{RESOURCE_PREFIX}/{resource}" in url
570
-
571
- # Make sure the returned URL works
572
- resp = await client.head(url)
573
- assert resp.status_code == 200
574
-
575
- assert resp.headers["Upload-Length"] == "0"
576
- assert resp.headers["Upload-Offset"] == "0"
577
-
578
-
579
- @pytest.mark.asyncio
580
- async def test_multiple_tus_file_upload_tries(
581
- writer_api, knowledgebox_writer, resource
582
- ):
583
- kb = knowledgebox_writer
584
- rslug = "resource1"
585
-
586
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
587
- headers = {
588
- "tus-resumable": "1.0.0",
589
- "content-type": "image/jpg",
590
- "upload-defer-length": "1",
591
- }
592
-
593
- resp = await client.post(
594
- f"/{KB_PREFIX}/{kb}/slug/{rslug}/file/field1/{TUSUPLOAD}",
595
- headers=headers,
596
- )
597
- assert resp.status_code == 201
598
- url = resp.headers["location"]
599
-
600
- # Check that we are using the slug for the whole file upload
601
- assert f"{RSLUG_PREFIX}/{rslug}" in url
602
- resp = await client.patch(
603
- url,
604
- content=b"x" * 10000,
605
- headers={
606
- "upload-offset": "0",
607
- "content-length": "10000",
608
- "upload-length": "10000",
609
- },
610
- )
611
- assert resp.status_code == 200
612
-
613
- assert resp.headers["Tus-Upload-Finished"] == "1"
614
-
615
- # next one should work as well
616
- resp = await client.post(
617
- f"/{KB_PREFIX}/{kb}/slug/{rslug}/file/field1/{TUSUPLOAD}",
618
- headers=headers,
619
- )
620
- assert resp.status_code == 201
621
- url = resp.headers["location"]
622
-
623
- # Check that we are using the slug for the whole file upload
624
- assert f"{RSLUG_PREFIX}/{rslug}" in url
625
- resp = await client.patch(
626
- url,
627
- content=b"x" * 10000,
628
- headers={
629
- "upload-offset": "0",
630
- "content-length": "10000",
631
- "upload-length": "10000",
632
- },
633
- )
634
- assert resp.status_code == 200
635
-
636
- assert resp.headers["Tus-Upload-Finished"] == "1"
637
-
638
-
639
- @pytest.mark.asyncio
640
- async def test_file_upload_by_slug(writer_api, knowledgebox_writer):
641
- kb = knowledgebox_writer
642
- rslug = "myslug"
643
-
644
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
645
- resp = await client.post(
646
- f"/{KB_PREFIX}/{kb}/resources",
647
- headers={
648
- "X-Synchronous": "True",
649
- },
650
- json={
651
- "slug": rslug,
652
- },
653
- )
654
- assert str(resp.status_code).startswith("2")
655
-
656
- filename = "image.jpg"
657
- with open(f"{ASSETS_PATH}/image001.jpg", "rb") as f:
658
- resp = await client.post(
659
- f"/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{rslug}/file/file1/{UPLOAD}",
660
- content=f.read(),
661
- headers={
662
- "X-FILENAME": filename,
663
- "content-type": "image/jpg",
664
- "X-MD5": "7af0916dba8b70e29d99e72941923529",
665
- },
666
- )
667
- assert resp.status_code == 201
668
-
669
- transaction = get_transaction_utility()
670
-
671
- sub = await transaction.js.pull_subscribe(
672
- const.Streams.INGEST.subject.format(partition="1"), "auto"
673
- )
674
- msgs = await sub.fetch(2)
675
-
676
- writer = BrokerMessage()
677
- writer.ParseFromString(msgs[-1].data)
678
- await msgs[-1].ack()
679
-
680
- body = resp.json()
681
- field = body["field_id"]
682
- rid = body["uuid"]
683
-
684
- assert writer.uuid == rid
685
- assert writer.basic.icon == "image/jpg"
686
- assert writer.files[field].file.size == 30472
687
- assert writer.files[field].file.filename == filename
688
-
689
- storage = await get_storage()
690
- data = await storage.downloadbytes(
691
- bucket=writer.files[field].file.bucket_name,
692
- key=writer.files[field].file.uri,
693
- )
694
- assert len(data.read()) == 30472
695
-
696
-
697
- def test_maybe_b64decode():
698
- something = "something"
699
- something_encoded = base64.b64encode(something.encode())
700
- assert maybe_b64decode(something_encoded) == something
701
- assert maybe_b64decode(something) == something
702
-
703
-
704
- @pytest.mark.asyncio
705
- async def test_tus_validates_intermediate_chunks_length(
706
- writer_api, knowledgebox_writer
707
- ):
708
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
709
- language = base64.b64encode(b"ca").decode()
710
- filename = base64.b64encode(b"image.jpg").decode()
711
- md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode()
712
- resp = await client.post(
713
- f"/{KB_PREFIX}/{knowledgebox_writer}/{TUSUPLOAD}",
714
- headers={
715
- "tus-resumable": "1.0.0",
716
- "upload-metadata": f"filename {filename},language {language},md5 {md5}",
717
- "content-type": "image/jpg",
718
- "upload-defer-length": "1",
719
- },
720
- )
721
- assert resp.status_code == 201
722
- url = resp.headers["location"]
723
- # We upload a chunk that is smaller than the minimum chunk size
724
- min_chunk_size = get_storage_manager().min_upload_size
725
- raw_bytes = b"x" * min_chunk_size + b"y" * 500
726
- io_bytes = io.BytesIO(raw_bytes)
727
- chunk = io_bytes.read(min_chunk_size - 10)
728
-
729
- resp = await client.head(url)
730
-
731
- headers = {
732
- "upload-offset": f"0",
733
- "content-length": f"{len(chunk)}",
734
- }
735
- resp = await client.patch(
736
- url,
737
- content=chunk,
738
- headers=headers,
739
- )
740
- assert resp.status_code == 412
741
- assert resp.json()["detail"].startswith(
742
- "Intermediate chunks cannot be smaller than"
743
- )