nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,95 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- from uuid import uuid4
22
-
23
- import pytest
24
-
25
- from nucliadb.tests.utils.aiohttp_session import get_mocked_session
26
-
27
-
28
- @pytest.mark.parametrize("onprem", [True, False])
29
- @pytest.mark.parametrize(
30
- "mock_payload",
31
- [
32
- {"seqid": 1, "account_seq": 1, "queue": "private"},
33
- {"seqid": 1, "account_seq": 1, "queue": "shared"},
34
- {"seqid": 1, "account_seq": None, "queue": "private"},
35
- {"seqid": 1, "account_seq": None, "queue": "shared"},
36
- {"seqid": 1, "queue": "private"},
37
- {"seqid": 1, "queue": "shared"},
38
- ],
39
- )
40
- @pytest.mark.asyncio
41
- async def test_send_to_process(onprem, mock_payload):
42
- """
43
- Test that send_to_process does not fail
44
- """
45
-
46
- from nucliadb.ingest.processing import ProcessingEngine, PushPayload
47
-
48
- fake_nuclia_proxy_url = "http://fake_proxy"
49
- processing_engine = ProcessingEngine(
50
- onprem=onprem,
51
- nuclia_processing_cluster_url=fake_nuclia_proxy_url,
52
- nuclia_public_url=fake_nuclia_proxy_url,
53
- )
54
- await processing_engine.initialize()
55
-
56
- payload = PushPayload(
57
- uuid=str(uuid4()), kbid=str(uuid4()), userid=str(uuid4()), partition=0
58
- )
59
-
60
- processing_engine.session = get_mocked_session(
61
- "POST", 200, json=mock_payload, context_manager=False
62
- )
63
- await processing_engine.send_to_process(payload, partition=0)
64
-
65
- await processing_engine.finalize()
66
-
67
-
68
- @pytest.mark.parametrize("onprem", [True, False])
69
- @pytest.mark.asyncio
70
- async def test_delete_from_processing(onprem):
71
- """
72
- Test that send_to_process does not fail
73
- """
74
-
75
- from nucliadb.ingest.processing import ProcessingEngine
76
-
77
- fake_nuclia_proxy_url = "http://fake_proxy"
78
- processing_engine = ProcessingEngine(
79
- onprem=onprem,
80
- nuclia_processing_cluster_url=fake_nuclia_proxy_url,
81
- nuclia_public_url=fake_nuclia_proxy_url,
82
- )
83
- await processing_engine.initialize()
84
-
85
- processing_engine.session = get_mocked_session(
86
- "POST",
87
- 200,
88
- json={"kbid": "kbid", "resource_id": "resource_id"},
89
- context_manager=False,
90
- )
91
- await processing_engine.delete_from_processing(
92
- kbid="kbid", resource_id="resource_id"
93
- )
94
-
95
- await processing_engine.finalize()
@@ -1,272 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import uuid
21
-
22
- import pytest
23
- from nucliadb_protos.resources_pb2 import (
24
- Classification,
25
- FieldComputedMetadataWrapper,
26
- FieldID,
27
- FieldText,
28
- FieldType,
29
- )
30
- from nucliadb_protos.utils_pb2 import Relation, RelationNode
31
- from nucliadb_protos.writer_pb2 import BrokerMessage
32
-
33
- from nucliadb.ingest import SERVICE_NAME
34
- from nucliadb_utils.utilities import get_indexing, get_storage
35
-
36
-
37
- @pytest.mark.asyncio
38
- async def test_ingest_relations_indexing(
39
- fake_node, local_files, storage, knowledgebox_ingest, processor
40
- ):
41
- rid = str(uuid.uuid4())
42
- bm = BrokerMessage(
43
- kbid=knowledgebox_ingest, uuid=rid, slug="slug-1", type=BrokerMessage.AUTOCOMMIT
44
- )
45
-
46
- e0 = RelationNode(value="E0", ntype=RelationNode.NodeType.ENTITY, subtype="")
47
- e1 = RelationNode(
48
- value="E1", ntype=RelationNode.NodeType.ENTITY, subtype="Official"
49
- )
50
- e2 = RelationNode(
51
- value="E2", ntype=RelationNode.NodeType.ENTITY, subtype="Propaganda"
52
- )
53
- r0 = Relation(
54
- relation=Relation.RelationType.CHILD, source=e1, to=e2, relation_label="R0"
55
- )
56
- r1 = Relation(
57
- relation=Relation.RelationType.ENTITY, source=e0, to=e2, relation_label="R1"
58
- )
59
- r2 = Relation(
60
- relation=Relation.RelationType.CHILD, source=e0, to=e1, relation_label="R2"
61
- )
62
-
63
- bm.relations.extend([r0, r1, r2])
64
-
65
- await processor.process(message=bm, seqid=1)
66
-
67
- index = get_indexing()
68
- storage = await get_storage(service_name=SERVICE_NAME)
69
-
70
- pb = await storage.get_indexing(index._calls[0][1])
71
-
72
- assert len(pb.relations) == 3
73
- assert pb.relations[0] == r0
74
- assert pb.relations[1] == r1
75
- assert pb.relations[2] == r2
76
-
77
-
78
- @pytest.mark.asyncio
79
- async def test_ingest_label_relation_extraction(
80
- fake_node, local_files, storage, knowledgebox_ingest, processor
81
- ):
82
- rid = str(uuid.uuid4())
83
- bm = BrokerMessage(
84
- kbid=knowledgebox_ingest, uuid=rid, slug="slug-1", type=BrokerMessage.AUTOCOMMIT
85
- )
86
-
87
- labels = [
88
- ("labelset-1", "label-1"),
89
- ("labelset-1", "label-2"),
90
- ("labelset-2", "label-1"),
91
- ("labelset-2", "label-3"),
92
- ]
93
- bm.basic.usermetadata.classifications.extend(
94
- [Classification(labelset=labelset, label=label) for labelset, label in labels]
95
- )
96
-
97
- await processor.process(message=bm, seqid=1)
98
-
99
- index = get_indexing()
100
- storage = await get_storage(service_name=SERVICE_NAME)
101
-
102
- pb = await storage.get_indexing(index._calls[0][1])
103
-
104
- for i, (labelset, label) in enumerate(labels):
105
- assert pb.relations[i].relation == Relation.RelationType.ABOUT
106
- assert pb.relations[i].source.value == rid
107
- assert pb.relations[i].to.value == f"{labelset}/{label}"
108
-
109
-
110
- @pytest.mark.asyncio
111
- async def test_ingest_colab_relation_extraction(
112
- fake_node, local_files, storage, knowledgebox_ingest, processor
113
- ):
114
- rid = str(uuid.uuid4())
115
- bm = BrokerMessage(
116
- kbid=knowledgebox_ingest, uuid=rid, slug="slug-1", type=BrokerMessage.AUTOCOMMIT
117
- )
118
-
119
- collaborators = ["Alice", "Bob", "Trudy"]
120
- bm.origin.colaborators.extend(collaborators)
121
-
122
- await processor.process(message=bm, seqid=1)
123
-
124
- index = get_indexing()
125
- storage = await get_storage(service_name=SERVICE_NAME)
126
-
127
- pb = await storage.get_indexing(index._calls[0][1])
128
-
129
- for i, collaborator in enumerate(collaborators):
130
- assert pb.relations[i].relation == Relation.RelationType.COLAB
131
- assert pb.relations[i].source.value == rid
132
- assert pb.relations[i].to.value == collaborator
133
-
134
-
135
- @pytest.mark.asyncio
136
- async def test_ingest_field_metadata_relation_extraction(
137
- fake_node, local_files, storage, knowledgebox_ingest, processor
138
- ):
139
- rid = str(uuid.uuid4())
140
- bm = BrokerMessage(
141
- kbid=knowledgebox_ingest,
142
- uuid=rid,
143
- slug="slug-1",
144
- type=BrokerMessage.AUTOCOMMIT,
145
- texts={
146
- "title": FieldText(
147
- body="Title with metadata",
148
- format=FieldText.Format.PLAIN,
149
- )
150
- },
151
- )
152
-
153
- fcmw = FieldComputedMetadataWrapper(
154
- field=FieldID(
155
- field_type=FieldType.TEXT,
156
- field="title",
157
- )
158
- )
159
- fcmw.metadata.metadata.positions["subtype-1/value-1"].entity = "value-1"
160
- fcmw.metadata.metadata.positions["subtype-1/value-2"].entity = "value-2"
161
-
162
- fcmw.metadata.metadata.classifications.extend(
163
- [
164
- Classification(labelset="ls1", label="label1"),
165
- ]
166
- )
167
-
168
- bm.field_metadata.append(fcmw)
169
-
170
- await processor.process(message=bm, seqid=1)
171
-
172
- index = get_indexing()
173
- storage = await get_storage(service_name=SERVICE_NAME)
174
-
175
- pb = await storage.get_indexing(index._calls[0][1])
176
-
177
- generated_relations = [
178
- # From ner metadata
179
- Relation(
180
- relation=Relation.RelationType.ENTITY,
181
- source=RelationNode(value=rid, ntype=RelationNode.NodeType.RESOURCE),
182
- to=RelationNode(
183
- value="value-1", ntype=RelationNode.NodeType.ENTITY, subtype="subtype-1"
184
- ),
185
- ),
186
- Relation(
187
- relation=Relation.RelationType.ENTITY,
188
- source=RelationNode(value=rid, ntype=RelationNode.NodeType.RESOURCE),
189
- to=RelationNode(
190
- value="value-2", ntype=RelationNode.NodeType.ENTITY, subtype="subtype-1"
191
- ),
192
- ),
193
- # From classification metadata
194
- Relation(
195
- relation=Relation.RelationType.ABOUT,
196
- source=RelationNode(value=rid, ntype=RelationNode.NodeType.RESOURCE),
197
- to=RelationNode(
198
- value="ls1/label1",
199
- ntype=RelationNode.NodeType.LABEL,
200
- ),
201
- ),
202
- ]
203
- for generated_relation in generated_relations:
204
- assert generated_relation in pb.relations
205
-
206
-
207
- @pytest.mark.asyncio
208
- async def test_ingest_field_relations_relation_extraction(
209
- fake_node, local_files, storage, knowledgebox_ingest, processor
210
- ):
211
- rid = str(uuid.uuid4())
212
- bm = BrokerMessage(
213
- kbid=knowledgebox_ingest, uuid=rid, slug="slug-1", type=BrokerMessage.AUTOCOMMIT
214
- )
215
-
216
- relationnode = RelationNode(
217
- value=rid, ntype=RelationNode.NodeType.RESOURCE, subtype="subtype-1"
218
- )
219
- test_relations = [
220
- Relation(
221
- relation=Relation.RelationType.CHILD,
222
- source=relationnode,
223
- to=RelationNode(
224
- value="document",
225
- ntype=RelationNode.NodeType.RESOURCE,
226
- ),
227
- ),
228
- Relation(
229
- relation=Relation.RelationType.ABOUT,
230
- source=relationnode,
231
- to=RelationNode(
232
- value="label",
233
- ntype=RelationNode.NodeType.LABEL,
234
- ),
235
- ),
236
- Relation(
237
- relation=Relation.RelationType.ENTITY,
238
- source=relationnode,
239
- to=RelationNode(
240
- value="entity",
241
- ntype=RelationNode.NodeType.ENTITY,
242
- ),
243
- ),
244
- Relation(
245
- relation=Relation.RelationType.COLAB,
246
- source=relationnode,
247
- to=RelationNode(
248
- value="user",
249
- ntype=RelationNode.NodeType.USER,
250
- ),
251
- ),
252
- Relation(
253
- relation=Relation.RelationType.OTHER,
254
- source=relationnode,
255
- to=RelationNode(
256
- value="other",
257
- ntype=RelationNode.NodeType.RESOURCE,
258
- ),
259
- ),
260
- ]
261
- bm.relations.extend(test_relations)
262
-
263
- await processor.process(message=bm, seqid=1)
264
-
265
- index = get_indexing()
266
- storage = await get_storage(service_name=SERVICE_NAME)
267
-
268
- pb = await storage.get_indexing(index._calls[0][1])
269
-
270
- assert len(pb.relations) == len(test_relations)
271
- for relation in test_relations:
272
- assert relation in pb.relations
@@ -1,18 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
@@ -1,140 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- import asyncio
22
- from unittest.mock import AsyncMock, MagicMock, patch
23
-
24
- import pytest
25
- from nucliadb_protos.audit_pb2 import AuditKBCounter, AuditRequest
26
- from nucliadb_protos.writer_pb2 import BrokerMessage, Notification, ShardObject
27
-
28
- from nucliadb.ingest.consumer import auditing
29
- from nucliadb_protos import nodereader_pb2
30
-
31
- pytestmark = pytest.mark.asyncio
32
-
33
-
34
- @pytest.fixture()
35
- def pubsub():
36
- mock = AsyncMock()
37
- mock.parse = lambda x: x
38
- yield mock
39
-
40
-
41
- @pytest.fixture()
42
- def reader():
43
- yield AsyncMock()
44
-
45
-
46
- @pytest.fixture()
47
- def shard_manager(reader):
48
- nm = MagicMock()
49
- node = MagicMock(reader=reader)
50
- nm.get_shards_by_kbid = AsyncMock(return_value=[ShardObject()])
51
- with patch(
52
- "nucliadb.ingest.consumer.auditing.get_shard_manager", return_value=nm
53
- ), patch(
54
- "nucliadb.ingest.consumer.auditing.choose_node",
55
- return_value=(node, "shard_id"),
56
- ):
57
- yield nm
58
-
59
-
60
- @pytest.fixture()
61
- def audit():
62
- yield AsyncMock()
63
-
64
-
65
- @pytest.fixture()
66
- async def index_audit_handler(pubsub, audit, shard_manager):
67
- iah = auditing.IndexAuditHandler(
68
- driver=AsyncMock(transaction=MagicMock(return_value=AsyncMock())),
69
- audit=audit,
70
- pubsub=pubsub,
71
- check_delay=0.05,
72
- )
73
- await iah.initialize()
74
- yield iah
75
- await iah.finalize()
76
-
77
-
78
- @pytest.fixture()
79
- async def writes_audit_handler(pubsub, audit, shard_manager):
80
- rwah = auditing.ResourceWritesAuditHandler(
81
- driver=AsyncMock(transaction=MagicMock(return_value=AsyncMock())),
82
- storage=AsyncMock(),
83
- audit=audit,
84
- pubsub=pubsub,
85
- )
86
- await rwah.initialize()
87
- yield rwah
88
- await rwah.finalize()
89
-
90
-
91
- async def test_handle_message(
92
- index_audit_handler: auditing.IndexAuditHandler, reader, audit
93
- ):
94
- reader.GetShard.return_value = nodereader_pb2.Shard(fields=5, paragraphs=6)
95
-
96
- notif = Notification(
97
- kbid="kbid",
98
- action=Notification.Action.INDEXED,
99
- )
100
- await index_audit_handler.handle_message(notif.SerializeToString())
101
-
102
- await asyncio.sleep(0.06)
103
-
104
- audit.report.assert_called_with(
105
- kbid="kbid",
106
- audit_type=AuditRequest.AuditType.INDEXED,
107
- kb_counter=AuditKBCounter(fields=5, paragraphs=6),
108
- )
109
-
110
-
111
- async def test_handle_message_ignore_not_indexed(
112
- index_audit_handler: auditing.IndexAuditHandler, audit
113
- ):
114
- notif = Notification(
115
- kbid="kbid",
116
- action=Notification.Action.COMMIT,
117
- )
118
- await index_audit_handler.handle_message(notif.SerializeToString())
119
-
120
- await index_audit_handler.finalize()
121
-
122
- audit.report.assert_not_called()
123
-
124
-
125
- async def test_resource_handle_message_processor_messages_are_not_audited(
126
- writes_audit_handler: auditing.ResourceWritesAuditHandler, audit
127
- ):
128
- message = BrokerMessage()
129
- message.source = BrokerMessage.MessageSource.PROCESSOR
130
- notif = Notification(
131
- kbid="kbid",
132
- action=Notification.Action.COMMIT,
133
- message=message,
134
- write_type=Notification.WriteType.MODIFIED,
135
- )
136
- await writes_audit_handler.handle_message(notif.SerializeToString())
137
-
138
- await writes_audit_handler.finalize()
139
-
140
- audit.report.assert_not_called()
@@ -1,69 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from unittest.mock import AsyncMock, MagicMock, Mock
21
-
22
- import pytest
23
- from nucliadb_protos.writer_pb2 import BrokerMessage, BrokerMessageBlobReference
24
-
25
- from nucliadb.ingest.consumer.consumer import IngestConsumer
26
-
27
-
28
- @pytest.fixture()
29
- def storage():
30
- mock = MagicMock()
31
- mock.get_stream_message = AsyncMock()
32
- mock.del_stream_message = AsyncMock()
33
- yield mock
34
-
35
-
36
- @pytest.fixture()
37
- def consumer(storage):
38
- yield IngestConsumer(None, "partition", storage, None)
39
-
40
-
41
- @pytest.mark.asyncio
42
- async def test_get_broker_message(consumer: IngestConsumer, storage):
43
- bm = BrokerMessage(kbid="kbid")
44
- msg = Mock(data=bm.SerializeToString(), headers={})
45
- assert bm == await consumer.get_broker_message(msg)
46
- storage.get_stream_message.assert_not_called()
47
-
48
-
49
- @pytest.mark.asyncio
50
- async def test_get_broker_message_proxied(consumer: IngestConsumer, storage):
51
- bm = BrokerMessage(kbid="kbid")
52
- bmr = BrokerMessageBlobReference(kbid="kbid", storage_key="storage_key")
53
- msg = Mock(data=bmr.SerializeToString(), headers={"X-MESSAGE-TYPE": "PROXY"})
54
-
55
- storage.get_stream_message.return_value = bm.SerializeToString()
56
-
57
- assert bm == await consumer.get_broker_message(msg)
58
-
59
- storage.get_stream_message.assert_awaited_once_with("storage_key")
60
-
61
-
62
- @pytest.mark.asyncio
63
- async def test_clean_broker_message_proxied(consumer: IngestConsumer, storage):
64
- bmr = BrokerMessageBlobReference(kbid="kbid", storage_key="storage_key")
65
- msg = Mock(data=bmr.SerializeToString(), headers={"X-MESSAGE-TYPE": "PROXY"})
66
-
67
- await consumer.clean_broker_message(msg)
68
-
69
- storage.del_stream_message.assert_awaited_once_with("storage_key")
@@ -1,60 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- from unittest.mock import AsyncMock, MagicMock, patch
20
-
21
- import pytest
22
-
23
- from nucliadb.ingest.consumer.pull import PullWorker
24
-
25
-
26
- class TestPullWorker:
27
- """
28
- It's a complex class so this might get a little messy with mocks
29
-
30
- It should be refactor at some point and these tests be rewritten/removed
31
- """
32
-
33
- @pytest.fixture()
34
- def processor(self):
35
- processor = AsyncMock()
36
- with patch("nucliadb.ingest.consumer.pull.Processor", return_value=processor):
37
- yield processor
38
-
39
- @pytest.fixture()
40
- def nats_conn(self):
41
- conn = MagicMock()
42
- conn.jetstream.return_value = AsyncMock()
43
- conn.drain = AsyncMock()
44
- conn.close = AsyncMock()
45
- with patch("nucliadb.ingest.consumer.pull.nats.connect", return_value=conn):
46
- yield conn
47
-
48
- @pytest.fixture()
49
- def worker(self, processor):
50
- yield PullWorker(
51
- driver=AsyncMock(),
52
- partition="1",
53
- storage=AsyncMock(),
54
- pull_time_error_backoff=100,
55
- zone="zone",
56
- nuclia_processing_cluster_url="nuclia_processing_cluster_url",
57
- nuclia_public_url="nuclia_public_url",
58
- audit=None,
59
- onprem=False,
60
- )