nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,95 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- from uuid import uuid4
22
-
23
- import pytest
24
-
25
- from nucliadb.tests.utils.aiohttp_session import get_mocked_session
26
-
27
-
28
- @pytest.mark.parametrize("onprem", [True, False])
29
- @pytest.mark.parametrize(
30
- "mock_payload",
31
- [
32
- {"seqid": 1, "account_seq": 1, "queue": "private"},
33
- {"seqid": 1, "account_seq": 1, "queue": "shared"},
34
- {"seqid": 1, "account_seq": None, "queue": "private"},
35
- {"seqid": 1, "account_seq": None, "queue": "shared"},
36
- {"seqid": 1, "queue": "private"},
37
- {"seqid": 1, "queue": "shared"},
38
- ],
39
- )
40
- @pytest.mark.asyncio
41
- async def test_send_to_process(onprem, mock_payload):
42
- """
43
- Test that send_to_process does not fail
44
- """
45
-
46
- from nucliadb.ingest.processing import ProcessingEngine, PushPayload
47
-
48
- fake_nuclia_proxy_url = "http://fake_proxy"
49
- processing_engine = ProcessingEngine(
50
- onprem=onprem,
51
- nuclia_processing_cluster_url=fake_nuclia_proxy_url,
52
- nuclia_public_url=fake_nuclia_proxy_url,
53
- )
54
- await processing_engine.initialize()
55
-
56
- payload = PushPayload(
57
- uuid=str(uuid4()), kbid=str(uuid4()), userid=str(uuid4()), partition=0
58
- )
59
-
60
- processing_engine.session = get_mocked_session(
61
- "POST", 200, json=mock_payload, context_manager=False
62
- )
63
- await processing_engine.send_to_process(payload, partition=0)
64
-
65
- await processing_engine.finalize()
66
-
67
-
68
- @pytest.mark.parametrize("onprem", [True, False])
69
- @pytest.mark.asyncio
70
- async def test_delete_from_processing(onprem):
71
- """
72
- Test that send_to_process does not fail
73
- """
74
-
75
- from nucliadb.ingest.processing import ProcessingEngine
76
-
77
- fake_nuclia_proxy_url = "http://fake_proxy"
78
- processing_engine = ProcessingEngine(
79
- onprem=onprem,
80
- nuclia_processing_cluster_url=fake_nuclia_proxy_url,
81
- nuclia_public_url=fake_nuclia_proxy_url,
82
- )
83
- await processing_engine.initialize()
84
-
85
- processing_engine.session = get_mocked_session(
86
- "POST",
87
- 200,
88
- json={"kbid": "kbid", "resource_id": "resource_id"},
89
- context_manager=False,
90
- )
91
- await processing_engine.delete_from_processing(
92
- kbid="kbid", resource_id="resource_id"
93
- )
94
-
95
- await processing_engine.finalize()
@@ -1,272 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import uuid
21
-
22
- import pytest
23
- from nucliadb_protos.resources_pb2 import (
24
- Classification,
25
- FieldComputedMetadataWrapper,
26
- FieldID,
27
- FieldText,
28
- FieldType,
29
- )
30
- from nucliadb_protos.utils_pb2 import Relation, RelationNode
31
- from nucliadb_protos.writer_pb2 import BrokerMessage
32
-
33
- from nucliadb.ingest import SERVICE_NAME
34
- from nucliadb_utils.utilities import get_indexing, get_storage
35
-
36
-
37
- @pytest.mark.asyncio
38
- async def test_ingest_relations_indexing(
39
- fake_node, local_files, storage, knowledgebox_ingest, processor
40
- ):
41
- rid = str(uuid.uuid4())
42
- bm = BrokerMessage(
43
- kbid=knowledgebox_ingest, uuid=rid, slug="slug-1", type=BrokerMessage.AUTOCOMMIT
44
- )
45
-
46
- e0 = RelationNode(value="E0", ntype=RelationNode.NodeType.ENTITY, subtype="")
47
- e1 = RelationNode(
48
- value="E1", ntype=RelationNode.NodeType.ENTITY, subtype="Official"
49
- )
50
- e2 = RelationNode(
51
- value="E2", ntype=RelationNode.NodeType.ENTITY, subtype="Propaganda"
52
- )
53
- r0 = Relation(
54
- relation=Relation.RelationType.CHILD, source=e1, to=e2, relation_label="R0"
55
- )
56
- r1 = Relation(
57
- relation=Relation.RelationType.ENTITY, source=e0, to=e2, relation_label="R1"
58
- )
59
- r2 = Relation(
60
- relation=Relation.RelationType.CHILD, source=e0, to=e1, relation_label="R2"
61
- )
62
-
63
- bm.relations.extend([r0, r1, r2])
64
-
65
- await processor.process(message=bm, seqid=1)
66
-
67
- index = get_indexing()
68
- storage = await get_storage(service_name=SERVICE_NAME)
69
-
70
- pb = await storage.get_indexing(index._calls[0][1])
71
-
72
- assert len(pb.relations) == 3
73
- assert pb.relations[0] == r0
74
- assert pb.relations[1] == r1
75
- assert pb.relations[2] == r2
76
-
77
-
78
- @pytest.mark.asyncio
79
- async def test_ingest_label_relation_extraction(
80
- fake_node, local_files, storage, knowledgebox_ingest, processor
81
- ):
82
- rid = str(uuid.uuid4())
83
- bm = BrokerMessage(
84
- kbid=knowledgebox_ingest, uuid=rid, slug="slug-1", type=BrokerMessage.AUTOCOMMIT
85
- )
86
-
87
- labels = [
88
- ("labelset-1", "label-1"),
89
- ("labelset-1", "label-2"),
90
- ("labelset-2", "label-1"),
91
- ("labelset-2", "label-3"),
92
- ]
93
- bm.basic.usermetadata.classifications.extend(
94
- [Classification(labelset=labelset, label=label) for labelset, label in labels]
95
- )
96
-
97
- await processor.process(message=bm, seqid=1)
98
-
99
- index = get_indexing()
100
- storage = await get_storage(service_name=SERVICE_NAME)
101
-
102
- pb = await storage.get_indexing(index._calls[0][1])
103
-
104
- for i, (labelset, label) in enumerate(labels):
105
- assert pb.relations[i].relation == Relation.RelationType.ABOUT
106
- assert pb.relations[i].source.value == rid
107
- assert pb.relations[i].to.value == f"{labelset}/{label}"
108
-
109
-
110
- @pytest.mark.asyncio
111
- async def test_ingest_colab_relation_extraction(
112
- fake_node, local_files, storage, knowledgebox_ingest, processor
113
- ):
114
- rid = str(uuid.uuid4())
115
- bm = BrokerMessage(
116
- kbid=knowledgebox_ingest, uuid=rid, slug="slug-1", type=BrokerMessage.AUTOCOMMIT
117
- )
118
-
119
- collaborators = ["Alice", "Bob", "Trudy"]
120
- bm.origin.colaborators.extend(collaborators)
121
-
122
- await processor.process(message=bm, seqid=1)
123
-
124
- index = get_indexing()
125
- storage = await get_storage(service_name=SERVICE_NAME)
126
-
127
- pb = await storage.get_indexing(index._calls[0][1])
128
-
129
- for i, collaborator in enumerate(collaborators):
130
- assert pb.relations[i].relation == Relation.RelationType.COLAB
131
- assert pb.relations[i].source.value == rid
132
- assert pb.relations[i].to.value == collaborator
133
-
134
-
135
- @pytest.mark.asyncio
136
- async def test_ingest_field_metadata_relation_extraction(
137
- fake_node, local_files, storage, knowledgebox_ingest, processor
138
- ):
139
- rid = str(uuid.uuid4())
140
- bm = BrokerMessage(
141
- kbid=knowledgebox_ingest,
142
- uuid=rid,
143
- slug="slug-1",
144
- type=BrokerMessage.AUTOCOMMIT,
145
- texts={
146
- "title": FieldText(
147
- body="Title with metadata",
148
- format=FieldText.Format.PLAIN,
149
- )
150
- },
151
- )
152
-
153
- fcmw = FieldComputedMetadataWrapper(
154
- field=FieldID(
155
- field_type=FieldType.TEXT,
156
- field="title",
157
- )
158
- )
159
- fcmw.metadata.metadata.positions["subtype-1/value-1"].entity = "value-1"
160
- fcmw.metadata.metadata.positions["subtype-1/value-2"].entity = "value-2"
161
-
162
- fcmw.metadata.metadata.classifications.extend(
163
- [
164
- Classification(labelset="ls1", label="label1"),
165
- ]
166
- )
167
-
168
- bm.field_metadata.append(fcmw)
169
-
170
- await processor.process(message=bm, seqid=1)
171
-
172
- index = get_indexing()
173
- storage = await get_storage(service_name=SERVICE_NAME)
174
-
175
- pb = await storage.get_indexing(index._calls[0][1])
176
-
177
- generated_relations = [
178
- # From ner metadata
179
- Relation(
180
- relation=Relation.RelationType.ENTITY,
181
- source=RelationNode(value=rid, ntype=RelationNode.NodeType.RESOURCE),
182
- to=RelationNode(
183
- value="value-1", ntype=RelationNode.NodeType.ENTITY, subtype="subtype-1"
184
- ),
185
- ),
186
- Relation(
187
- relation=Relation.RelationType.ENTITY,
188
- source=RelationNode(value=rid, ntype=RelationNode.NodeType.RESOURCE),
189
- to=RelationNode(
190
- value="value-2", ntype=RelationNode.NodeType.ENTITY, subtype="subtype-1"
191
- ),
192
- ),
193
- # From classification metadata
194
- Relation(
195
- relation=Relation.RelationType.ABOUT,
196
- source=RelationNode(value=rid, ntype=RelationNode.NodeType.RESOURCE),
197
- to=RelationNode(
198
- value="ls1/label1",
199
- ntype=RelationNode.NodeType.LABEL,
200
- ),
201
- ),
202
- ]
203
- for generated_relation in generated_relations:
204
- assert generated_relation in pb.relations
205
-
206
-
207
- @pytest.mark.asyncio
208
- async def test_ingest_field_relations_relation_extraction(
209
- fake_node, local_files, storage, knowledgebox_ingest, processor
210
- ):
211
- rid = str(uuid.uuid4())
212
- bm = BrokerMessage(
213
- kbid=knowledgebox_ingest, uuid=rid, slug="slug-1", type=BrokerMessage.AUTOCOMMIT
214
- )
215
-
216
- relationnode = RelationNode(
217
- value=rid, ntype=RelationNode.NodeType.RESOURCE, subtype="subtype-1"
218
- )
219
- test_relations = [
220
- Relation(
221
- relation=Relation.RelationType.CHILD,
222
- source=relationnode,
223
- to=RelationNode(
224
- value="document",
225
- ntype=RelationNode.NodeType.RESOURCE,
226
- ),
227
- ),
228
- Relation(
229
- relation=Relation.RelationType.ABOUT,
230
- source=relationnode,
231
- to=RelationNode(
232
- value="label",
233
- ntype=RelationNode.NodeType.LABEL,
234
- ),
235
- ),
236
- Relation(
237
- relation=Relation.RelationType.ENTITY,
238
- source=relationnode,
239
- to=RelationNode(
240
- value="entity",
241
- ntype=RelationNode.NodeType.ENTITY,
242
- ),
243
- ),
244
- Relation(
245
- relation=Relation.RelationType.COLAB,
246
- source=relationnode,
247
- to=RelationNode(
248
- value="user",
249
- ntype=RelationNode.NodeType.USER,
250
- ),
251
- ),
252
- Relation(
253
- relation=Relation.RelationType.OTHER,
254
- source=relationnode,
255
- to=RelationNode(
256
- value="other",
257
- ntype=RelationNode.NodeType.RESOURCE,
258
- ),
259
- ),
260
- ]
261
- bm.relations.extend(test_relations)
262
-
263
- await processor.process(message=bm, seqid=1)
264
-
265
- index = get_indexing()
266
- storage = await get_storage(service_name=SERVICE_NAME)
267
-
268
- pb = await storage.get_indexing(index._calls[0][1])
269
-
270
- assert len(pb.relations) == len(test_relations)
271
- for relation in test_relations:
272
- assert relation in pb.relations
@@ -1,18 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
@@ -1,139 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- import asyncio
22
- from unittest.mock import AsyncMock, MagicMock, patch
23
-
24
- import pytest
25
- from nucliadb_protos.audit_pb2 import AuditKBCounter, AuditRequest
26
- from nucliadb_protos.writer_pb2 import Audit, BrokerMessage, Notification, ShardObject
27
-
28
- from nucliadb.ingest.consumer import auditing
29
- from nucliadb_protos import nodereader_pb2
30
-
31
- pytestmark = pytest.mark.asyncio
32
-
33
-
34
- @pytest.fixture()
35
- def pubsub():
36
- mock = AsyncMock()
37
- mock.parse = lambda x: x
38
- yield mock
39
-
40
-
41
- @pytest.fixture()
42
- def reader():
43
- yield AsyncMock()
44
-
45
-
46
- @pytest.fixture()
47
- def shard_manager(reader):
48
- nm = MagicMock()
49
- node = MagicMock(reader=reader)
50
- nm.get_shards_by_kbid = AsyncMock(return_value=[ShardObject()])
51
- with (
52
- patch("nucliadb.ingest.consumer.auditing.get_shard_manager", return_value=nm),
53
- patch(
54
- "nucliadb.ingest.consumer.auditing.choose_node",
55
- return_value=(node, "shard_id"),
56
- ),
57
- ):
58
- yield nm
59
-
60
-
61
- @pytest.fixture()
62
- def audit():
63
- yield AsyncMock()
64
-
65
-
66
- @pytest.fixture()
67
- async def index_audit_handler(pubsub, audit, shard_manager):
68
- iah = auditing.IndexAuditHandler(
69
- audit=audit,
70
- pubsub=pubsub,
71
- check_delay=0.05,
72
- )
73
- await iah.initialize()
74
- yield iah
75
- await iah.finalize()
76
-
77
-
78
- @pytest.fixture()
79
- async def writes_audit_handler(pubsub, audit, shard_manager):
80
- rwah = auditing.ResourceWritesAuditHandler(
81
- storage=AsyncMock(),
82
- audit=audit,
83
- pubsub=pubsub,
84
- )
85
- await rwah.initialize()
86
- yield rwah
87
- await rwah.finalize()
88
-
89
-
90
- async def test_handle_message(
91
- index_audit_handler: auditing.IndexAuditHandler, reader, audit
92
- ):
93
- reader.GetShard.return_value = nodereader_pb2.Shard(fields=5, paragraphs=6)
94
-
95
- notif = Notification(
96
- kbid="kbid",
97
- action=Notification.Action.INDEXED,
98
- )
99
- await index_audit_handler.handle_message(notif.SerializeToString())
100
-
101
- await asyncio.sleep(0.06)
102
-
103
- audit.report.assert_called_with(
104
- kbid="kbid",
105
- audit_type=AuditRequest.AuditType.INDEXED,
106
- kb_counter=AuditKBCounter(fields=5, paragraphs=6),
107
- )
108
-
109
-
110
- async def test_handle_message_ignore_not_indexed(
111
- index_audit_handler: auditing.IndexAuditHandler, audit
112
- ):
113
- notif = Notification(
114
- kbid="kbid",
115
- action=Notification.Action.COMMIT,
116
- )
117
- await index_audit_handler.handle_message(notif.SerializeToString())
118
-
119
- await index_audit_handler.finalize()
120
-
121
- audit.report.assert_not_called()
122
-
123
-
124
- async def test_resource_handle_message_processor_messages_are_not_audited(
125
- writes_audit_handler: auditing.ResourceWritesAuditHandler, audit
126
- ):
127
- message_audit = Audit()
128
- message_audit.message_source = BrokerMessage.MessageSource.PROCESSOR
129
- notif = Notification(
130
- kbid="kbid",
131
- action=Notification.Action.COMMIT,
132
- write_type=Notification.WriteType.MODIFIED,
133
- message_audit=message_audit,
134
- )
135
- await writes_audit_handler.handle_message(notif.SerializeToString())
136
-
137
- await writes_audit_handler.finalize()
138
-
139
- audit.report.assert_not_called()
@@ -1,69 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from unittest.mock import AsyncMock, MagicMock, Mock
21
-
22
- import pytest
23
- from nucliadb_protos.writer_pb2 import BrokerMessage, BrokerMessageBlobReference
24
-
25
- from nucliadb.ingest.consumer.consumer import IngestConsumer
26
-
27
-
28
- @pytest.fixture()
29
- def storage():
30
- mock = MagicMock()
31
- mock.get_stream_message = AsyncMock()
32
- mock.del_stream_message = AsyncMock()
33
- yield mock
34
-
35
-
36
- @pytest.fixture()
37
- def consumer(storage):
38
- yield IngestConsumer(None, "partition", storage, None)
39
-
40
-
41
- @pytest.mark.asyncio
42
- async def test_get_broker_message(consumer: IngestConsumer, storage):
43
- bm = BrokerMessage(kbid="kbid")
44
- msg = Mock(data=bm.SerializeToString(), headers={})
45
- assert bm == await consumer.get_broker_message(msg)
46
- storage.get_stream_message.assert_not_called()
47
-
48
-
49
- @pytest.mark.asyncio
50
- async def test_get_broker_message_proxied(consumer: IngestConsumer, storage):
51
- bm = BrokerMessage(kbid="kbid")
52
- bmr = BrokerMessageBlobReference(kbid="kbid", storage_key="storage_key")
53
- msg = Mock(data=bmr.SerializeToString(), headers={"X-MESSAGE-TYPE": "PROXY"})
54
-
55
- storage.get_stream_message.return_value = bm.SerializeToString()
56
-
57
- assert bm == await consumer.get_broker_message(msg)
58
-
59
- storage.get_stream_message.assert_awaited_once_with("storage_key")
60
-
61
-
62
- @pytest.mark.asyncio
63
- async def test_clean_broker_message_proxied(consumer: IngestConsumer, storage):
64
- bmr = BrokerMessageBlobReference(kbid="kbid", storage_key="storage_key")
65
- msg = Mock(data=bmr.SerializeToString(), headers={"X-MESSAGE-TYPE": "PROXY"})
66
-
67
- await consumer.clean_broker_message(msg)
68
-
69
- storage.del_stream_message.assert_awaited_once_with("storage_key")
@@ -1,60 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- from unittest.mock import AsyncMock, MagicMock, patch
20
-
21
- import pytest
22
-
23
- from nucliadb.ingest.consumer.pull import PullWorker
24
-
25
-
26
- class TestPullWorker:
27
- """
28
- It's a complex class so this might get a little messy with mocks
29
-
30
- It should be refactor at some point and these tests be rewritten/removed
31
- """
32
-
33
- @pytest.fixture()
34
- def processor(self):
35
- processor = AsyncMock()
36
- with patch("nucliadb.ingest.consumer.pull.Processor", return_value=processor):
37
- yield processor
38
-
39
- @pytest.fixture()
40
- def nats_conn(self):
41
- conn = MagicMock()
42
- conn.jetstream.return_value = AsyncMock()
43
- conn.drain = AsyncMock()
44
- conn.close = AsyncMock()
45
- with patch("nucliadb.ingest.consumer.pull.nats.connect", return_value=conn):
46
- yield conn
47
-
48
- @pytest.fixture()
49
- def worker(self, processor):
50
- yield PullWorker(
51
- driver=AsyncMock(),
52
- partition="1",
53
- storage=AsyncMock(),
54
- pull_time_error_backoff=100,
55
- zone="zone",
56
- nuclia_processing_cluster_url="nuclia_processing_cluster_url",
57
- nuclia_public_url="nuclia_public_url",
58
- audit=None,
59
- onprem=False,
60
- )