nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -403
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +70 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +143 -117
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +37 -128
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -1,331 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from unittest.mock import AsyncMock, MagicMock, Mock, patch
21
-
22
- import pytest
23
- from nucliadb_protos.resources_pb2 import (
24
- AllFieldIDs,
25
- Basic,
26
- CloudFile,
27
- FieldID,
28
- FieldText,
29
- FieldType,
30
- FileExtractedData,
31
- PagePositions,
32
- )
33
- from nucliadb_protos.writer_pb2 import BrokerMessage
34
-
35
- from nucliadb.ingest.orm.resource import (
36
- Resource,
37
- get_file_page_positions,
38
- get_text_field_mimetype,
39
- maybe_update_basic_icon,
40
- maybe_update_basic_summary,
41
- maybe_update_basic_thumbnail,
42
- update_basic_languages,
43
- )
44
- from nucliadb_protos import utils_pb2, writer_pb2
45
-
46
-
47
- @pytest.mark.asyncio
48
- async def test_get_file_page_positions():
49
- extracted_data = FileExtractedData()
50
- extracted_data.file_pages_previews.positions.extend(
51
- [PagePositions(start=0, end=10), PagePositions(start=11, end=20)]
52
- )
53
- file_field = AsyncMock(
54
- get_file_extracted_data=AsyncMock(return_value=extracted_data)
55
- )
56
- assert await get_file_page_positions(file_field) == {0: (0, 10), 1: (11, 20)}
57
-
58
-
59
- @pytest.mark.parametrize(
60
- "basic,summary,updated",
61
- [
62
- (Basic(), "new_summary", True),
63
- (Basic(summary="summary"), "new_summary", False),
64
- (Basic(summary="summary"), "", False),
65
- ],
66
- )
67
- def test_maybe_update_basic_summary(basic, summary, updated):
68
- assert maybe_update_basic_summary(basic, summary) is updated
69
- if updated:
70
- assert basic.summary == summary
71
- else:
72
- assert basic.summary != summary
73
-
74
-
75
- def test_update_basic_languages():
76
- basic = Basic()
77
- # Languages are updated the first time
78
- assert update_basic_languages(basic, ["en"]) is True
79
- assert basic.metadata.language == "en"
80
- assert basic.metadata.languages == ["en"]
81
-
82
- # Languages are not updated
83
- assert update_basic_languages(basic, ["en"]) is False
84
- assert basic.metadata.language == "en"
85
- assert basic.metadata.languages == ["en"]
86
-
87
- # Main language is not updated but new language is added
88
- assert update_basic_languages(basic, ["de"]) is True
89
- assert basic.metadata.language == "en"
90
- assert basic.metadata.languages == ["en", "de"]
91
-
92
- # Null values
93
- assert update_basic_languages(basic, [""]) is False
94
- assert update_basic_languages(basic, [None]) is False # type: ignore
95
- assert basic.metadata.language == "en"
96
- assert basic.metadata.languages == ["en", "de"]
97
-
98
-
99
- @pytest.mark.parametrize(
100
- "basic,thumbnail,updated",
101
- [
102
- (Basic(), CloudFile(uri="new_thumbnail_url"), True),
103
- (
104
- Basic(thumbnail="old_thumbnail_url"),
105
- CloudFile(uri="new_thumbnail_url"),
106
- False,
107
- ),
108
- (Basic(thumbnail="old_thumbnail_url"), None, False),
109
- ],
110
- )
111
- def test_maybe_update_basic_thumbnail(basic, thumbnail, updated):
112
- assert maybe_update_basic_thumbnail(basic, thumbnail) == updated
113
- if updated:
114
- assert basic.thumbnail == thumbnail.uri
115
- else:
116
- assert basic.thumbnail == "old_thumbnail_url"
117
-
118
-
119
- @pytest.mark.parametrize(
120
- "text_format,mimetype",
121
- [
122
- (None, None),
123
- (FieldText.Format.PLAIN, "text/plain"),
124
- (FieldText.Format.HTML, "text/html"),
125
- (FieldText.Format.RST, "text/x-rst"),
126
- (FieldText.Format.MARKDOWN, "text/markdown"),
127
- (FieldText.Format.KEEP_MARKDOWN, "text/markdown"),
128
- ],
129
- )
130
- def test_get_text_field_mimetype(text_format, mimetype):
131
- message = BrokerMessage()
132
- if text_format is not None:
133
- message.texts["foo"].body = "foo"
134
- message.texts["foo"].format = text_format
135
- assert get_text_field_mimetype(message) == mimetype
136
-
137
-
138
- @pytest.mark.parametrize(
139
- "basic,icon,updated",
140
- [
141
- (Basic(), None, False),
142
- (Basic(icon="text/plain"), "text/html", False),
143
- (Basic(), "text/html", True),
144
- (Basic(icon=""), "text/html", True),
145
- (Basic(icon="application/octet-stream"), "text/html", True),
146
- ],
147
- )
148
- def test_maybe_update_basic_icon(basic, icon, updated):
149
- assert maybe_update_basic_icon(basic, icon) == updated
150
- if updated:
151
- assert basic.icon == icon
152
-
153
-
154
- class Transaction:
155
- def __init__(self):
156
- self.kv = {}
157
-
158
- async def get(self, key):
159
- return self.kv.get(key)
160
-
161
- async def set(self, key, value):
162
- self.kv[key] = value
163
-
164
-
165
- @pytest.fixture(scope="function")
166
- def txn():
167
- return Transaction()
168
-
169
-
170
- @pytest.fixture(scope="function")
171
- def storage():
172
- mock = AsyncMock()
173
- return mock
174
-
175
-
176
- @pytest.fixture(scope="function")
177
- def kb():
178
- mock = AsyncMock()
179
- mock.kbid = "mock-kbid"
180
- return mock
181
-
182
-
183
- async def test_get_fields_ids_caches_keys(txn, storage, kb):
184
- resource = Resource(txn, storage, kb, "rid")
185
- cached_field_keys = [(0, "foo"), (1, "bar")]
186
- new_field_keys = [(2, "baz")]
187
- resource._inner_get_fields_ids = AsyncMock(return_value=new_field_keys) # type: ignore
188
- resource.all_fields_keys = cached_field_keys
189
-
190
- assert await resource.get_fields_ids() == cached_field_keys
191
- resource._inner_get_fields_ids.assert_not_awaited()
192
-
193
- assert await resource.get_fields_ids(force=True) == new_field_keys
194
- resource._inner_get_fields_ids.assert_awaited_once()
195
- assert resource.all_fields_keys == new_field_keys
196
-
197
- # If the all_field_keys is an empty list,
198
- # we should not be calling the inner_get_fields_ids
199
- resource.all_fields_keys = []
200
- resource._inner_get_fields_ids.reset_mock()
201
- assert await resource.get_fields_ids() == []
202
- resource._inner_get_fields_ids.assert_not_awaited()
203
-
204
-
205
- async def test_get_set_all_field_ids(txn, storage, kb):
206
- resource = Resource(txn, storage, kb, "rid")
207
-
208
- assert await resource.get_all_field_ids() is None
209
-
210
- all_fields = AllFieldIDs()
211
- all_fields.fields.append(FieldID(field_type=FieldType.TEXT, field="text"))
212
-
213
- await resource.set_all_field_ids(all_fields)
214
-
215
- assert await resource.get_all_field_ids() == all_fields
216
-
217
-
218
- async def test_update_all_fields_key(txn, storage, kb):
219
- resource = Resource(txn, storage, kb, "rid")
220
-
221
- await resource.update_all_field_ids(updated=[], deleted=[])
222
-
223
- # Initial value is Empty
224
- assert (await resource.get_all_field_ids()) == AllFieldIDs()
225
-
226
- all_fields = AllFieldIDs()
227
- all_fields.fields.append(FieldID(field_type=FieldType.TEXT, field="text1"))
228
- all_fields.fields.append(FieldID(field_type=FieldType.TEXT, field="text2"))
229
-
230
- await resource.update_all_field_ids(updated=all_fields.fields)
231
-
232
- # Check updates
233
- assert await resource.get_all_field_ids() == all_fields
234
-
235
- file_field = FieldID(field_type=FieldType.FILE, field="file")
236
- await resource.update_all_field_ids(updated=[file_field])
237
-
238
- result = await resource.get_all_field_ids()
239
- assert list(result.fields) == list(all_fields.fields) + [file_field]
240
-
241
- # Check deletes
242
- await resource.update_all_field_ids(deleted=[file_field])
243
-
244
- assert await resource.get_all_field_ids() == all_fields
245
-
246
-
247
- async def test_apply_fields_calls_update_all_field_ids(txn, storage, kb):
248
- resource = Resource(txn, storage, kb, "rid")
249
- resource.update_all_field_ids = AsyncMock() # type: ignore
250
- resource.set_field = AsyncMock() # type: ignore
251
-
252
- bm = MagicMock()
253
- bm.layouts = {"layout": MagicMock()}
254
- bm.texts = {"text": MagicMock()}
255
- bm.keywordsets = {"keywordset": MagicMock()}
256
- bm.datetimes = {"datetime": MagicMock()}
257
- bm.links = {"link": MagicMock()}
258
- bm.files = {"file": MagicMock()}
259
- bm.conversations = {"conversation": MagicMock()}
260
- bm.delete_fields.append(FieldID(field_type=FieldType.LAYOUT, field="to_delete"))
261
-
262
- await resource.apply_fields(bm)
263
-
264
- resource.update_all_field_ids.assert_awaited_once()
265
-
266
- resource.update_all_field_ids.call_args[1]["updated"] == [
267
- FieldID(field_type=FieldType.LAYOUT, field="layout"),
268
- FieldID(field_type=FieldType.TEXT, field="text"),
269
- FieldID(field_type=FieldType.KEYWORDSET, field="keywordset"),
270
- FieldID(field_type=FieldType.DATETIME, field="datetime"),
271
- FieldID(field_type=FieldType.LINK, field="link"),
272
- FieldID(field_type=FieldType.FILE, field="file"),
273
- FieldID(field_type=FieldType.CONVERSATION, field="conversation"),
274
- ]
275
- resource.update_all_field_ids.call_args[1]["deleted"] == [
276
- FieldID(field_type=FieldType.LAYOUT, field="to_delete"),
277
- ]
278
-
279
-
280
- async def test_apply_extracted_vectors_matryoshka_embeddings(txn, storage, kb):
281
- STORED_VECTOR_DIMENSION = 100
282
- MATRYOSHKA_DIMENSION = 10
283
-
284
- mock_field = AsyncMock()
285
- vectors = utils_pb2.VectorObject(
286
- vectors=utils_pb2.Vectors(
287
- vectors=[
288
- utils_pb2.Vector(
289
- start=0,
290
- end=10,
291
- start_paragraph=0,
292
- end_paragraph=10,
293
- vector=[1.0] * STORED_VECTOR_DIMENSION,
294
- )
295
- ]
296
- )
297
- )
298
- mock_field.set_vectors.return_value = (vectors, False, [])
299
-
300
- resource = Resource(txn, storage, kb, "matryoshka-rid")
301
- with (
302
- patch.object(resource, "has_field", Mock(return_value=True)),
303
- patch.object(resource, "get_field", AsyncMock(return_value=mock_field)),
304
- patch.object(resource, "generate_field_id", Mock(return_value="field_id")),
305
- patch("nucliadb.ingest.orm.resource.datamanagers") as mock_datamanagers,
306
- patch.object(
307
- resource.indexer, "apply_field_vectors", AsyncMock()
308
- ) as apply_field_vectors,
309
- ):
310
- mock_datamanagers.kb.get_matryoshka_vector_dimension = AsyncMock(
311
- return_value=None
312
- )
313
- await resource._apply_extracted_vectors(
314
- writer_pb2.ExtractedVectorsWrapper(vectors=vectors)
315
- )
316
- assert apply_field_vectors.call_count == 1
317
- assert (
318
- apply_field_vectors.call_args.kwargs["matryoshka_vector_dimension"] is None
319
- )
320
-
321
- mock_datamanagers.kb.get_matryoshka_vector_dimension = AsyncMock(
322
- return_value=MATRYOSHKA_DIMENSION
323
- )
324
- await resource._apply_extracted_vectors(
325
- writer_pb2.ExtractedVectorsWrapper(vectors=vectors)
326
- )
327
- assert apply_field_vectors.call_count == 2
328
- assert (
329
- apply_field_vectors.call_args.kwargs["matryoshka_vector_dimension"]
330
- == MATRYOSHKA_DIMENSION
331
- )
@@ -1,31 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from nucliadb.common.cluster.index_node import READ_CONNECTIONS, WRITE_CONNECTIONS
21
- from nucliadb.ingest.cache import clear_ingest_cache
22
-
23
-
24
- def test_clear_ingest_cache():
25
- READ_CONNECTIONS["addr1"] = "conn1"
26
- WRITE_CONNECTIONS["addr2"] = "conn2"
27
-
28
- clear_ingest_cache()
29
-
30
- assert len(READ_CONNECTIONS) == 0
31
- assert len(WRITE_CONNECTIONS) == 0
@@ -1,40 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import json
21
- import os
22
-
23
- import pytest
24
-
25
- from nucliadb.ingest.partitions import assign_partitions
26
-
27
-
28
- @pytest.mark.asyncio
29
- async def test_assign_partitions(partition_settings):
30
- expected_partition_list = []
31
- part = partition_settings.replica_number
32
-
33
- while part < partition_settings.nuclia_partitions:
34
- expected_partition_list.append(str(part + 1))
35
- part += partition_settings.total_replicas
36
-
37
- assign_partitions(partition_settings)
38
-
39
- assert partition_settings.partitions == expected_partition_list
40
- assert os.environ["PARTITIONS"] == json.dumps(expected_partition_list)
@@ -1,171 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from unittest.mock import Mock
21
-
22
- import pytest
23
- from nucliadb_protos.resources_pb2 import CloudFile
24
-
25
- from nucliadb.ingest.processing import (
26
- DummyProcessingEngine,
27
- ProcessingEngine,
28
- PushPayload,
29
- )
30
- from nucliadb.tests.utils.aiohttp_session import get_mocked_session
31
- from nucliadb_models import File, FileField
32
- from nucliadb_utils.exceptions import LimitsExceededError, SendToProcessError
33
-
34
- TEST_FILE = FileField(
35
- password="mypassword", file=File(filename="myfile.pdf", payload="")
36
- )
37
-
38
- TEST_CLOUD_FILE = CloudFile(
39
- uri="file.png",
40
- source=CloudFile.Source.LOCAL,
41
- bucket_name="/integration/ingest/assets",
42
- size=4,
43
- content_type="image/png",
44
- filename="file.png",
45
- )
46
-
47
- TEST_ITEM = PushPayload(uuid="foo", kbid="bar", userid="baz", partition=1)
48
-
49
-
50
- @pytest.mark.asyncio
51
- async def test_dummy_processing_engine():
52
- engine = DummyProcessingEngine()
53
- await engine.initialize()
54
- await engine.finalize()
55
- await engine.convert_filefield_to_str(None)
56
- engine.convert_external_filefield_to_str(None)
57
- await engine.convert_internal_filefield_to_str(None, None)
58
- await engine.convert_internal_cf_to_str(None, None)
59
- await engine.send_to_process(Mock(kbid="foo"), 1)
60
-
61
-
62
- @pytest.fixture(scope="function")
63
- def engine():
64
- pe = ProcessingEngine(
65
- onprem=True,
66
- nuclia_processing_cluster_url="cluster_url",
67
- nuclia_public_url="public_url",
68
- )
69
- yield pe
70
-
71
-
72
- async def test_convert_filefield_to_str_200(engine):
73
- engine.session = get_mocked_session("POST", 200, text="jwt")
74
-
75
- assert await engine.convert_filefield_to_str(TEST_FILE) == "jwt"
76
-
77
-
78
- async def test_convert_filefield_to_str_402(engine):
79
- engine.session = get_mocked_session("POST", 402, json={"detail": "limits exceeded"})
80
-
81
- with pytest.raises(LimitsExceededError) as exc:
82
- await engine.convert_filefield_to_str(TEST_FILE)
83
- assert exc.value.status_code == 402
84
-
85
-
86
- async def test_convert_filefield_to_str_429(engine):
87
- engine.session = get_mocked_session("POST", 429, json={"detail": "limits exceeded"})
88
-
89
- with pytest.raises(LimitsExceededError) as exc:
90
- await engine.convert_filefield_to_str(TEST_FILE)
91
- assert exc.value.status_code == 429
92
-
93
-
94
- async def test_convert_filefield_to_str_500(engine):
95
- engine.session = get_mocked_session("POST", 500, text="error")
96
-
97
- with pytest.raises(Exception) as exc:
98
- await engine.convert_filefield_to_str(TEST_FILE)
99
- assert str(exc.value) == "STATUS: 500 - error"
100
-
101
-
102
- async def test_convert_internal_cf_to_str_200(engine):
103
- engine.session = get_mocked_session("POST", 200, text="jwt")
104
-
105
- assert await engine.convert_internal_cf_to_str(TEST_CLOUD_FILE, Mock()) == "jwt"
106
-
107
-
108
- async def test_convert_internal_cf_to_str_402(engine):
109
- engine.session = get_mocked_session("POST", 402, json={"detail": "limits exceeded"})
110
-
111
- with pytest.raises(LimitsExceededError) as exc:
112
- await engine.convert_internal_cf_to_str(TEST_CLOUD_FILE, Mock())
113
- assert exc.value.status_code == 402
114
-
115
-
116
- async def test_convert_internal_cf_to_str_429(engine):
117
- engine.session = get_mocked_session("POST", 429, json={"detail": "limits exceeded"})
118
-
119
- with pytest.raises(LimitsExceededError) as exc:
120
- await engine.convert_internal_cf_to_str(TEST_CLOUD_FILE, Mock())
121
- assert exc.value.status_code == 429
122
-
123
-
124
- async def test_convert_internal_cf_to_str_500(engine):
125
- engine.session = get_mocked_session("POST", 500, text="error")
126
-
127
- with pytest.raises(Exception) as exc:
128
- await engine.convert_internal_cf_to_str(TEST_CLOUD_FILE, Mock())
129
- assert str(exc.value) == "STATUS: 500 - error"
130
-
131
-
132
- async def test_send_to_process_200(engine):
133
- json_data = {"seqid": 11, "account_seq": 22, "queue": "private"}
134
- engine.session = get_mocked_session(
135
- "POST", 200, json=json_data, context_manager=False
136
- )
137
-
138
- processing_info = await engine.send_to_process(TEST_ITEM, 1)
139
- assert processing_info.seqid == 11
140
- assert processing_info.account_seq == 22
141
- assert processing_info.queue == "private"
142
-
143
-
144
- @pytest.mark.parametrize("status", [402, 413])
145
- async def test_send_to_process_limits_exceeded(status, engine):
146
- engine.session = get_mocked_session(
147
- "POST", status, json={"detail": "limits exceeded"}, context_manager=False
148
- )
149
-
150
- with pytest.raises(LimitsExceededError) as exc:
151
- await engine.send_to_process(TEST_ITEM, 1)
152
- assert exc.value.status_code == status
153
-
154
-
155
- async def test_send_to_process_limits_exceeded_429(engine):
156
- engine.session = get_mocked_session(
157
- "POST", 429, json={"detail": "limits exceeded"}, context_manager=False
158
- )
159
-
160
- with pytest.raises(LimitsExceededError) as exc:
161
- await engine.send_to_process(TEST_ITEM, 1)
162
- assert exc.value.status_code == 429
163
-
164
-
165
- async def test_send_to_process_500(engine):
166
- engine.session = get_mocked_session(
167
- "POST", 500, text="error", context_manager=False
168
- )
169
-
170
- with pytest.raises(SendToProcessError):
171
- await engine.send_to_process(TEST_ITEM, 1)
@@ -1,117 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import asyncio
21
- from contextvars import ContextVar
22
- from typing import Optional
23
-
24
- from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
25
- from starlette.requests import Request
26
- from starlette.responses import Response
27
-
28
- from nucliadb.common.maindb.driver import Transaction
29
- from nucliadb.common.maindb.utils import get_driver
30
-
31
- txn_manager: ContextVar[Optional["ReadOnlyTransactionManager"]] = ContextVar(
32
- "txn_manager", default=None
33
- )
34
-
35
-
36
- class ReadOnlyTransactionMiddleware(BaseHTTPMiddleware):
37
- """
38
- This middleware provides a unique read-only transaction for each request. The transaction is
39
- created lazily, so if it's not used, it's not created. The middleware also ensures that the
40
- transaction is aborted at the end of the request.
41
-
42
- This is useful, for instance, on search endpoints where we want to minimize the number
43
- of transactions that are created.
44
-
45
- Usage:
46
- - Add this middleware to the FastAPI app:
47
-
48
- app = FastAPI()
49
- app.add_middleware(ReadOnlyTransactionMiddleware)
50
-
51
- - Where needed, get the transaction:
52
-
53
- txn = await get_read_only_transaction()
54
- """
55
-
56
- async def dispatch(
57
- self, request: Request, call_next: RequestResponseEndpoint
58
- ) -> Response:
59
- mgr = ReadOnlyTransactionManager()
60
- txn_manager.set(mgr)
61
- try:
62
- return await call_next(request)
63
- finally:
64
- await mgr.maybe_abort()
65
- txn_manager.set(None)
66
-
67
-
68
- class TransactionNotFoundException(Exception):
69
- pass
70
-
71
-
72
- class ReadOnlyTransactionManager:
73
- def __init__(self):
74
- self._transaction: Optional[Transaction] = None
75
- self._lock = asyncio.Lock()
76
- self.aborted: bool = False
77
-
78
- async def get_transaction(self) -> Transaction:
79
- if self.aborted:
80
- raise TransactionNotFoundException("Transaction was aborted")
81
-
82
- if self._transaction is not None:
83
- return self._transaction
84
-
85
- async with self._lock:
86
- # Check again in case it was set while waiting for the lock
87
- if self._transaction is not None:
88
- return self._transaction
89
-
90
- self._transaction = await self._get_transaction()
91
- return self._transaction
92
-
93
- async def _get_transaction(self) -> Transaction:
94
- driver = get_driver()
95
- txn = await driver.begin(read_only=True)
96
- return txn
97
-
98
- async def maybe_abort(self):
99
- if self.aborted or self._transaction is None:
100
- return
101
-
102
- await self._transaction.abort()
103
- self._transaction = None
104
- self._lock = None
105
- self.aborted = True
106
-
107
-
108
- async def get_read_only_transaction() -> Transaction:
109
- """
110
- Returns the read-only transaction for the current request
111
- """
112
- manager: Optional[ReadOnlyTransactionManager] = txn_manager.get()
113
- if manager is None:
114
- raise TransactionNotFoundException(
115
- "Context var is not set. Did you forget to add the ReadOnlyTransactionMiddleware to the app?"
116
- )
117
- return await manager.get_transaction()