nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,275 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from unittest.mock import AsyncMock, MagicMock
21
-
22
- import pytest
23
- from nucliadb_protos.resources_pb2 import (
24
- AllFieldIDs,
25
- Basic,
26
- CloudFile,
27
- FieldID,
28
- FieldText,
29
- FieldType,
30
- FileExtractedData,
31
- PagePositions,
32
- )
33
- from nucliadb_protos.writer_pb2 import BrokerMessage
34
-
35
- from nucliadb.ingest.orm.resource import (
36
- Resource,
37
- get_file_page_positions,
38
- get_text_field_mimetype,
39
- maybe_update_basic_icon,
40
- maybe_update_basic_summary,
41
- maybe_update_basic_thumbnail,
42
- update_basic_languages,
43
- )
44
-
45
-
46
- @pytest.mark.asyncio
47
- async def test_get_file_page_positions():
48
- extracted_data = FileExtractedData()
49
- extracted_data.file_pages_previews.positions.extend(
50
- [PagePositions(start=0, end=10), PagePositions(start=11, end=20)]
51
- )
52
- file_field = AsyncMock(
53
- get_file_extracted_data=AsyncMock(return_value=extracted_data)
54
- )
55
- assert await get_file_page_positions(file_field) == {0: (0, 10), 1: (11, 20)}
56
-
57
-
58
- @pytest.mark.parametrize(
59
- "basic,summary,updated",
60
- [
61
- (Basic(), "new_summary", True),
62
- (Basic(summary="summary"), "new_summary", False),
63
- (Basic(summary="summary"), "", False),
64
- ],
65
- )
66
- def test_maybe_update_basic_summary(basic, summary, updated):
67
- assert maybe_update_basic_summary(basic, summary) is updated
68
- if updated:
69
- assert basic.summary == summary
70
- else:
71
- assert basic.summary != summary
72
-
73
-
74
- def test_update_basic_languages():
75
- basic = Basic()
76
- # Languages are updated the first time
77
- assert update_basic_languages(basic, ["en"]) is True
78
- assert basic.metadata.language == "en"
79
- assert basic.metadata.languages == ["en"]
80
-
81
- # Languages are not updated
82
- assert update_basic_languages(basic, ["en"]) is False
83
- assert basic.metadata.language == "en"
84
- assert basic.metadata.languages == ["en"]
85
-
86
- # Main language is not updated but new language is added
87
- assert update_basic_languages(basic, ["de"]) is True
88
- assert basic.metadata.language == "en"
89
- assert basic.metadata.languages == ["en", "de"]
90
-
91
- # Null values
92
- assert update_basic_languages(basic, [""]) is False
93
- assert update_basic_languages(basic, [None]) is False # type: ignore
94
- assert basic.metadata.language == "en"
95
- assert basic.metadata.languages == ["en", "de"]
96
-
97
-
98
- @pytest.mark.parametrize(
99
- "basic,thumbnail,updated",
100
- [
101
- (Basic(), CloudFile(uri="new_thumbnail_url"), True),
102
- (
103
- Basic(thumbnail="old_thumbnail_url"),
104
- CloudFile(uri="new_thumbnail_url"),
105
- False,
106
- ),
107
- (Basic(thumbnail="old_thumbnail_url"), None, False),
108
- ],
109
- )
110
- def test_maybe_update_basic_thumbnail(basic, thumbnail, updated):
111
- assert maybe_update_basic_thumbnail(basic, thumbnail) == updated
112
- if updated:
113
- assert basic.thumbnail == thumbnail.uri
114
- else:
115
- assert basic.thumbnail == "old_thumbnail_url"
116
-
117
-
118
- @pytest.mark.parametrize(
119
- "text_format,mimetype",
120
- [
121
- (None, None),
122
- (FieldText.Format.PLAIN, "text/plain"),
123
- (FieldText.Format.HTML, "text/html"),
124
- (FieldText.Format.RST, "text/x-rst"),
125
- (FieldText.Format.MARKDOWN, "text/markdown"),
126
- (FieldText.Format.KEEP_MARKDOWN, "text/markdown"),
127
- ],
128
- )
129
- def test_get_text_field_mimetype(text_format, mimetype):
130
- message = BrokerMessage()
131
- if text_format is not None:
132
- message.texts["foo"].body = "foo"
133
- message.texts["foo"].format = text_format
134
- assert get_text_field_mimetype(message) == mimetype
135
-
136
-
137
- @pytest.mark.parametrize(
138
- "basic,icon,updated",
139
- [
140
- (Basic(), None, False),
141
- (Basic(icon="text/plain"), "text/html", False),
142
- (Basic(), "text/html", True),
143
- (Basic(icon=""), "text/html", True),
144
- (Basic(icon="application/octet-stream"), "text/html", True),
145
- ],
146
- )
147
- def test_maybe_update_basic_icon(basic, icon, updated):
148
- assert maybe_update_basic_icon(basic, icon) == updated
149
- if updated:
150
- assert basic.icon == icon
151
-
152
-
153
- class Transaction:
154
- def __init__(self):
155
- self.kv = {}
156
-
157
- async def get(self, key):
158
- return self.kv.get(key)
159
-
160
- async def set(self, key, value):
161
- self.kv[key] = value
162
-
163
-
164
- @pytest.fixture(scope="function")
165
- def txn():
166
- return Transaction()
167
-
168
-
169
- @pytest.fixture(scope="function")
170
- def storage():
171
- mock = AsyncMock()
172
- return mock
173
-
174
-
175
- @pytest.fixture(scope="function")
176
- def kb():
177
- mock = AsyncMock()
178
- return mock
179
-
180
-
181
- async def test_get_fields_ids_caches_keys(txn, storage, kb):
182
- resource = Resource(txn, storage, kb, "rid")
183
- cached_field_keys = [(0, "foo"), (1, "bar")]
184
- new_field_keys = [(2, "baz")]
185
- resource._inner_get_fields_ids = AsyncMock(return_value=new_field_keys) # type: ignore
186
- resource.all_fields_keys = cached_field_keys
187
-
188
- assert await resource.get_fields_ids() == cached_field_keys
189
- resource._inner_get_fields_ids.assert_not_awaited()
190
-
191
- assert await resource.get_fields_ids(force=True) == new_field_keys
192
- resource._inner_get_fields_ids.assert_awaited_once()
193
- assert resource.all_fields_keys == new_field_keys
194
-
195
- # If the all_field_keys is an empty list,
196
- # we should not be calling the inner_get_fields_ids
197
- resource.all_fields_keys = []
198
- resource._inner_get_fields_ids.reset_mock()
199
- assert await resource.get_fields_ids() == []
200
- resource._inner_get_fields_ids.assert_not_awaited()
201
-
202
-
203
- async def test_get_set_all_field_ids(txn, storage, kb):
204
- resource = Resource(txn, storage, kb, "rid")
205
-
206
- assert await resource.get_all_field_ids() is None
207
-
208
- all_fields = AllFieldIDs()
209
- all_fields.fields.append(FieldID(field_type=FieldType.TEXT, field="text"))
210
-
211
- await resource.set_all_field_ids(all_fields)
212
-
213
- assert await resource.get_all_field_ids() == all_fields
214
-
215
-
216
- async def test_update_all_fields_key(txn, storage, kb):
217
- resource = Resource(txn, storage, kb, "rid")
218
-
219
- await resource.update_all_field_ids(updated=[], deleted=[])
220
-
221
- # Initial value is Empty
222
- assert (await resource.get_all_field_ids()) == AllFieldIDs()
223
-
224
- all_fields = AllFieldIDs()
225
- all_fields.fields.append(FieldID(field_type=FieldType.TEXT, field="text1"))
226
- all_fields.fields.append(FieldID(field_type=FieldType.TEXT, field="text2"))
227
-
228
- await resource.update_all_field_ids(updated=all_fields.fields)
229
-
230
- # Check updates
231
- assert await resource.get_all_field_ids() == all_fields
232
-
233
- file_field = FieldID(field_type=FieldType.FILE, field="file")
234
- await resource.update_all_field_ids(updated=[file_field])
235
-
236
- result = await resource.get_all_field_ids()
237
- assert list(result.fields) == list(all_fields.fields) + [file_field]
238
-
239
- # Check deletes
240
- await resource.update_all_field_ids(deleted=[file_field])
241
-
242
- assert await resource.get_all_field_ids() == all_fields
243
-
244
-
245
- async def test_apply_fields_calls_update_all_field_ids(txn, storage, kb):
246
- resource = Resource(txn, storage, kb, "rid")
247
- resource.update_all_field_ids = AsyncMock() # type: ignore
248
- resource.set_field = AsyncMock() # type: ignore
249
-
250
- bm = MagicMock()
251
- bm.layouts = {"layout": MagicMock()}
252
- bm.texts = {"text": MagicMock()}
253
- bm.keywordsets = {"keywordset": MagicMock()}
254
- bm.datetimes = {"datetime": MagicMock()}
255
- bm.links = {"link": MagicMock()}
256
- bm.files = {"file": MagicMock()}
257
- bm.conversations = {"conversation": MagicMock()}
258
- bm.delete_fields.append(FieldID(field_type=FieldType.LAYOUT, field="to_delete"))
259
-
260
- await resource.apply_fields(bm)
261
-
262
- resource.update_all_field_ids.assert_awaited_once()
263
-
264
- resource.update_all_field_ids.call_args[1]["updated"] == [
265
- FieldID(field_type=FieldType.LAYOUT, field="layout"),
266
- FieldID(field_type=FieldType.TEXT, field="text"),
267
- FieldID(field_type=FieldType.KEYWORDSET, field="keywordset"),
268
- FieldID(field_type=FieldType.DATETIME, field="datetime"),
269
- FieldID(field_type=FieldType.LINK, field="link"),
270
- FieldID(field_type=FieldType.FILE, field="file"),
271
- FieldID(field_type=FieldType.CONVERSATION, field="conversation"),
272
- ]
273
- resource.update_all_field_ids.call_args[1]["deleted"] == [
274
- FieldID(field_type=FieldType.LAYOUT, field="to_delete"),
275
- ]
@@ -1,40 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import json
21
- import os
22
-
23
- import pytest
24
-
25
- from nucliadb.ingest.partitions import assign_partitions
26
-
27
-
28
- @pytest.mark.asyncio
29
- async def test_assign_partitions(partition_settings):
30
- expected_partition_list = []
31
- part = partition_settings.replica_number
32
-
33
- while part < partition_settings.nuclia_partitions:
34
- expected_partition_list.append(str(part + 1))
35
- part += partition_settings.total_replicas
36
-
37
- assign_partitions(partition_settings)
38
-
39
- assert partition_settings.partitions == expected_partition_list
40
- assert os.environ["PARTITIONS"] == json.dumps(expected_partition_list)
@@ -1,171 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from unittest.mock import Mock
21
-
22
- import pytest
23
- from nucliadb_protos.resources_pb2 import CloudFile
24
-
25
- from nucliadb.ingest.processing import (
26
- DummyProcessingEngine,
27
- ProcessingEngine,
28
- PushPayload,
29
- )
30
- from nucliadb.tests.utils.aiohttp_session import get_mocked_session
31
- from nucliadb_models import File, FileField
32
- from nucliadb_utils.exceptions import LimitsExceededError, SendToProcessError
33
-
34
- TEST_FILE = FileField(
35
- password="mypassword", file=File(filename="myfile.pdf", payload="")
36
- )
37
-
38
- TEST_CLOUD_FILE = CloudFile(
39
- uri="file.png",
40
- source=CloudFile.Source.LOCAL,
41
- bucket_name="/integration/ingest/assets",
42
- size=4,
43
- content_type="image/png",
44
- filename="file.png",
45
- )
46
-
47
- TEST_ITEM = PushPayload(uuid="foo", kbid="bar", userid="baz", partition=1)
48
-
49
-
50
- @pytest.mark.asyncio
51
- async def test_dummy_processing_engine():
52
- engine = DummyProcessingEngine()
53
- await engine.initialize()
54
- await engine.finalize()
55
- await engine.convert_filefield_to_str(None)
56
- engine.convert_external_filefield_to_str(None)
57
- await engine.convert_internal_filefield_to_str(None, None)
58
- await engine.convert_internal_cf_to_str(None, None)
59
- await engine.send_to_process(Mock(kbid="foo"), 1)
60
-
61
-
62
- @pytest.fixture(scope="function")
63
- def engine():
64
- pe = ProcessingEngine(
65
- onprem=True,
66
- nuclia_processing_cluster_url="cluster_url",
67
- nuclia_public_url="public_url",
68
- )
69
- yield pe
70
-
71
-
72
- async def test_convert_filefield_to_str_200(engine):
73
- engine.session = get_mocked_session("POST", 200, text="jwt")
74
-
75
- assert await engine.convert_filefield_to_str(TEST_FILE) == "jwt"
76
-
77
-
78
- async def test_convert_filefield_to_str_402(engine):
79
- engine.session = get_mocked_session("POST", 402, json={"detail": "limits exceeded"})
80
-
81
- with pytest.raises(LimitsExceededError) as exc:
82
- await engine.convert_filefield_to_str(TEST_FILE)
83
- assert exc.value.status_code == 402
84
-
85
-
86
- async def test_convert_filefield_to_str_429(engine):
87
- engine.session = get_mocked_session("POST", 429, json={"detail": "limits exceeded"})
88
-
89
- with pytest.raises(LimitsExceededError) as exc:
90
- await engine.convert_filefield_to_str(TEST_FILE)
91
- assert exc.value.status_code == 429
92
-
93
-
94
- async def test_convert_filefield_to_str_500(engine):
95
- engine.session = get_mocked_session("POST", 500, text="error")
96
-
97
- with pytest.raises(Exception) as exc:
98
- await engine.convert_filefield_to_str(TEST_FILE)
99
- assert str(exc.value) == "STATUS: 500 - error"
100
-
101
-
102
- async def test_convert_internal_cf_to_str_200(engine):
103
- engine.session = get_mocked_session("POST", 200, text="jwt")
104
-
105
- assert await engine.convert_internal_cf_to_str(TEST_CLOUD_FILE, Mock()) == "jwt"
106
-
107
-
108
- async def test_convert_internal_cf_to_str_402(engine):
109
- engine.session = get_mocked_session("POST", 402, json={"detail": "limits exceeded"})
110
-
111
- with pytest.raises(LimitsExceededError) as exc:
112
- await engine.convert_internal_cf_to_str(TEST_CLOUD_FILE, Mock())
113
- assert exc.value.status_code == 402
114
-
115
-
116
- async def test_convert_internal_cf_to_str_429(engine):
117
- engine.session = get_mocked_session("POST", 429, json={"detail": "limits exceeded"})
118
-
119
- with pytest.raises(LimitsExceededError) as exc:
120
- await engine.convert_internal_cf_to_str(TEST_CLOUD_FILE, Mock())
121
- assert exc.value.status_code == 429
122
-
123
-
124
- async def test_convert_internal_cf_to_str_500(engine):
125
- engine.session = get_mocked_session("POST", 500, text="error")
126
-
127
- with pytest.raises(Exception) as exc:
128
- await engine.convert_internal_cf_to_str(TEST_CLOUD_FILE, Mock())
129
- assert str(exc.value) == "STATUS: 500 - error"
130
-
131
-
132
- async def test_send_to_process_200(engine):
133
- json_data = {"seqid": 11, "account_seq": 22, "queue": "private"}
134
- engine.session = get_mocked_session(
135
- "POST", 200, json=json_data, context_manager=False
136
- )
137
-
138
- processing_info = await engine.send_to_process(TEST_ITEM, 1)
139
- assert processing_info.seqid == 11
140
- assert processing_info.account_seq == 22
141
- assert processing_info.queue == "private"
142
-
143
-
144
- @pytest.mark.parametrize("status", [402, 413])
145
- async def test_send_to_process_limits_exceeded(status, engine):
146
- engine.session = get_mocked_session(
147
- "POST", status, json={"detail": "limits exceeded"}, context_manager=False
148
- )
149
-
150
- with pytest.raises(LimitsExceededError) as exc:
151
- await engine.send_to_process(TEST_ITEM, 1)
152
- assert exc.value.status_code == status
153
-
154
-
155
- async def test_send_to_process_limits_exceeded_429(engine):
156
- engine.session = get_mocked_session(
157
- "POST", 429, json={"detail": "limits exceeded"}, context_manager=False
158
- )
159
-
160
- with pytest.raises(LimitsExceededError) as exc:
161
- await engine.send_to_process(TEST_ITEM, 1)
162
- assert exc.value.status_code == 429
163
-
164
-
165
- async def test_send_to_process_500(engine):
166
- engine.session = get_mocked_session(
167
- "POST", 500, text="error", context_manager=False
168
- )
169
-
170
- with pytest.raises(SendToProcessError):
171
- await engine.send_to_process(TEST_ITEM, 1)
@@ -1,117 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- import asyncio
21
- from contextvars import ContextVar
22
- from typing import Optional
23
-
24
- from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
25
- from starlette.requests import Request
26
- from starlette.responses import Response
27
-
28
- from nucliadb.common.maindb.driver import Transaction
29
- from nucliadb.common.maindb.utils import get_driver
30
-
31
- txn_manager: ContextVar[Optional["ReadOnlyTransactionManager"]] = ContextVar(
32
- "txn_manager", default=None
33
- )
34
-
35
-
36
- class ReadOnlyTransactionMiddleware(BaseHTTPMiddleware):
37
- """
38
- This middleware provides a unique read-only transaction for each request. The transaction is
39
- created lazily, so if it's not used, it's not created. The middleware also ensures that the
40
- transaction is aborted at the end of the request.
41
-
42
- This is useful, for instance, on search endpoints where we want to minimize the number
43
- of transactions that are created.
44
-
45
- Usage:
46
- - Add this middleware to the FastAPI app:
47
-
48
- app = FastAPI()
49
- app.add_middleware(ReadOnlyTransactionMiddleware)
50
-
51
- - Where needed, get the transaction:
52
-
53
- txn = await get_read_only_transaction()
54
- """
55
-
56
- async def dispatch(
57
- self, request: Request, call_next: RequestResponseEndpoint
58
- ) -> Response:
59
- mgr = ReadOnlyTransactionManager()
60
- txn_manager.set(mgr)
61
- try:
62
- return await call_next(request)
63
- finally:
64
- await mgr.maybe_abort()
65
- txn_manager.set(None)
66
-
67
-
68
- class TransactionNotFoundException(Exception):
69
- pass
70
-
71
-
72
- class ReadOnlyTransactionManager:
73
- def __init__(self):
74
- self._transaction: Optional[Transaction] = None
75
- self._lock = asyncio.Lock()
76
- self.aborted: bool = False
77
-
78
- async def get_transaction(self) -> Transaction:
79
- if self.aborted:
80
- raise TransactionNotFoundException("Transaction was aborted")
81
-
82
- if self._transaction is not None:
83
- return self._transaction
84
-
85
- async with self._lock:
86
- # Check again in case it was set while waiting for the lock
87
- if self._transaction is not None:
88
- return self._transaction
89
-
90
- self._transaction = await self._get_transaction()
91
- return self._transaction
92
-
93
- async def _get_transaction(self) -> Transaction:
94
- driver = get_driver()
95
- txn = await driver.begin(read_only=True)
96
- return txn
97
-
98
- async def maybe_abort(self):
99
- if self.aborted or self._transaction is None:
100
- return
101
-
102
- await self._transaction.abort()
103
- self._transaction = None
104
- self._lock = None
105
- self.aborted = True
106
-
107
-
108
- async def get_read_only_transaction() -> Transaction:
109
- """
110
- Returns the read-only transaction for the current request
111
- """
112
- manager: Optional[ReadOnlyTransactionManager] = txn_manager.get()
113
- if manager is None:
114
- raise TransactionNotFoundException(
115
- "Context var is not set. Did you forget to add the ReadOnlyTransactionMiddleware to the app?"
116
- )
117
- return await manager.get_transaction()
@@ -1,63 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from fastapi import Request
21
- from fastapi_versioning import version
22
-
23
- from nucliadb.learning_proxy import learning_collector_proxy
24
- from nucliadb.reader.api.v1.router import KB_PREFIX, api
25
- from nucliadb_models.resource import NucliaDBRoles
26
- from nucliadb_utils.authentication import requires
27
-
28
-
29
- @api.get(
30
- path=f"/{KB_PREFIX}/{{kbid}}/feedback/{{month}}",
31
- status_code=200,
32
- name="Download feedback of a Knowledge Box",
33
- description="Download the feedback of a particular month in a Knowledge Box", # noqa
34
- response_model=None,
35
- tags=["Knowledge Boxes"],
36
- )
37
- @requires(NucliaDBRoles.READER)
38
- @version(1)
39
- async def feedback_download(
40
- request: Request,
41
- kbid: str,
42
- month: str,
43
- ):
44
- return await learning_collector_proxy(
45
- request, "GET", f"/collect/feedback/{kbid}/{month}"
46
- )
47
-
48
-
49
- @api.get(
50
- path=f"/{KB_PREFIX}/{{kbid}}/feedback",
51
- status_code=200,
52
- name="Feedback avalaible months",
53
- description="List of months within the last year with feedback data",
54
- response_model=None,
55
- tags=["Knowledge Boxes"],
56
- )
57
- @requires(NucliaDBRoles.READER)
58
- @version(1)
59
- async def feedback_list_months(
60
- request: Request,
61
- kbid: str,
62
- ):
63
- return await learning_collector_proxy(request, "GET", f"/collect/feedback/{kbid}")
@@ -1,19 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #