nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,139 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- import asyncio
22
- from unittest.mock import AsyncMock, MagicMock, patch
23
-
24
- import pytest
25
- from nucliadb_protos.writer_pb2 import Notification, ShardObject, Shards
26
-
27
- from nucliadb.common.cluster.settings import settings
28
- from nucliadb.ingest.consumer import shard_creator
29
- from nucliadb_protos import nodereader_pb2
30
-
31
- pytestmark = pytest.mark.asyncio
32
-
33
-
34
- @pytest.fixture()
35
- def pubsub():
36
- mock = AsyncMock()
37
- mock.parse = lambda x: x
38
- yield mock
39
-
40
-
41
- @pytest.fixture()
42
- def reader():
43
- yield AsyncMock()
44
-
45
-
46
- @pytest.fixture()
47
- def kbdm():
48
- mock = MagicMock()
49
- mock.get_model_metadata = AsyncMock(return_value="model")
50
- with patch("nucliadb.common.cluster.manager.datamanagers.kb", return_value=mock):
51
- yield mock
52
-
53
-
54
- @pytest.fixture()
55
- def shard_manager(reader):
56
- sm = MagicMock()
57
- node = MagicMock(reader=reader)
58
- shards = Shards(shards=[ShardObject(read_only=False)], actual=0)
59
- sm.get_current_active_shard = AsyncMock(return_value=shards.shards[0])
60
- sm.maybe_create_new_shard = AsyncMock()
61
- with patch(
62
- "nucliadb.ingest.consumer.shard_creator.get_shard_manager", return_value=sm
63
- ), patch(
64
- "nucliadb.ingest.consumer.shard_creator.choose_node",
65
- return_value=(node, "shard_id"),
66
- ), patch(
67
- "nucliadb.ingest.consumer.shard_creator.datamanagers.cluster.get_kb_shards",
68
- AsyncMock(return_value=shards),
69
- ), patch(
70
- "nucliadb.ingest.consumer.shard_creator.locking.distributed_lock",
71
- return_value=AsyncMock(),
72
- ):
73
- yield sm
74
-
75
-
76
- @pytest.fixture()
77
- async def shard_creator_handler(pubsub, shard_manager):
78
- sc = shard_creator.ShardCreatorHandler(
79
- driver=AsyncMock(transaction=MagicMock(return_value=AsyncMock())),
80
- storage=AsyncMock(),
81
- pubsub=pubsub,
82
- check_delay=0.05,
83
- )
84
- await sc.initialize()
85
- yield sc
86
- await sc.finalize()
87
-
88
-
89
- async def test_handle_message_create_new_shard(
90
- shard_creator_handler: shard_creator.ShardCreatorHandler,
91
- reader,
92
- kbdm,
93
- shard_manager,
94
- ):
95
- reader.GetShard.return_value = nodereader_pb2.Shard(
96
- paragraphs=settings.max_shard_paragraphs + 1
97
- )
98
-
99
- notif = Notification(
100
- kbid="kbid",
101
- action=Notification.Action.INDEXED,
102
- )
103
- await shard_creator_handler.handle_message(notif.SerializeToString())
104
- await asyncio.sleep(0.06)
105
- shard_manager.maybe_create_new_shard.assert_called_with(
106
- "kbid", settings.max_shard_paragraphs + 1, 0, 0
107
- )
108
-
109
-
110
- async def test_handle_message_do_not_create(
111
- shard_creator_handler: shard_creator.ShardCreatorHandler, reader, shard_manager
112
- ):
113
- reader.GetShard.return_value = nodereader_pb2.Shard(
114
- paragraphs=settings.max_shard_paragraphs - 1
115
- )
116
-
117
- notif = Notification(
118
- kbid="kbid",
119
- action=Notification.Action.INDEXED,
120
- )
121
- await shard_creator_handler.handle_message(notif.SerializeToString())
122
-
123
- await shard_creator_handler.finalize()
124
-
125
- shard_manager.create_shard_by_kbid.assert_not_called()
126
-
127
-
128
- async def test_handle_message_ignore_not_indexed(
129
- shard_creator_handler: shard_creator.ShardCreatorHandler, shard_manager
130
- ):
131
- notif = Notification(
132
- kbid="kbid",
133
- action=Notification.Action.COMMIT,
134
- )
135
- await shard_creator_handler.handle_message(notif.SerializeToString())
136
-
137
- await shard_creator_handler.finalize()
138
-
139
- shard_manager.create_shard_by_kbid.assert_not_called()
@@ -1,67 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- import asyncio
22
-
23
- import pytest
24
-
25
- from nucliadb.ingest.consumer import utils
26
-
27
- pytestmark = pytest.mark.asyncio
28
-
29
-
30
- async def test_delay_task_handler():
31
- dth = utils.DelayedTaskHandler(0.05)
32
- await dth.initialize()
33
-
34
- counter = 0
35
-
36
- async def handler():
37
- await asyncio.sleep(0.1)
38
- nonlocal counter
39
- counter += 1
40
-
41
- dth.schedule("key1", handler)
42
- dth.schedule("key1", handler)
43
- dth.schedule("key1", handler)
44
- dth.schedule("key2", handler)
45
- dth.schedule("key3", handler)
46
- dth.schedule("key4", handler)
47
-
48
- # all should be scheduled and duplicates ignored
49
- assert len(dth.to_process) == 4
50
-
51
- await asyncio.sleep(0.06)
52
- # they should all be running now
53
- assert len(dth.outstanding_tasks) == 4
54
-
55
- # schedule a couple more
56
- dth.schedule("key1", handler) # duplicate key, should get rescheduled at end
57
- dth.schedule("key5", handler)
58
- dth.schedule("key6", handler)
59
-
60
- await asyncio.sleep(0.1)
61
- # original set should be finished now
62
- assert counter == 4
63
-
64
- # finish everything now
65
- await dth.finalize()
66
-
67
- assert counter == 7
@@ -1,19 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
@@ -1,247 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from uuid import uuid4
21
-
22
- import pytest
23
- from nucliadb_protos.noderesources_pb2 import Resource as PBResource
24
- from nucliadb_protos.resources_pb2 import (
25
- Basic,
26
- ExtractedText,
27
- FieldComputedMetadataWrapper,
28
- FieldID,
29
- FieldType,
30
- Metadata,
31
- Paragraph,
32
- Sentence,
33
- )
34
-
35
- from nucliadb.ingest.orm.brain import ParagraphPages, ResourceBrain
36
- from nucliadb_protos import resources_pb2
37
-
38
-
39
- def test_apply_field_metadata_marks_duplicated_paragraphs():
40
- # Simulate a field with two paragraphs that contain the same text
41
- br = ResourceBrain(rid=str(uuid4()))
42
- field_key = "text1"
43
- fcmw = FieldComputedMetadataWrapper()
44
- fcmw.field.CopyFrom(FieldID(field_type=FieldType.TEXT, field=field_key))
45
- paragraph = "Some paragraph here. "
46
- text_1 = f"{paragraph}{paragraph}"
47
- first_occurrence = [0, len(paragraph)]
48
- second_occurrence = [len(paragraph), len(paragraph) * 2]
49
-
50
- et = ExtractedText(text=text_1)
51
- p1 = Paragraph(start=first_occurrence[0], end=first_occurrence[1])
52
- p1.sentences.append(
53
- Sentence(start=first_occurrence[0], end=first_occurrence[1], key="test")
54
- )
55
- p2 = Paragraph(start=second_occurrence[0], end=second_occurrence[1])
56
- p2.sentences.append(
57
- Sentence(start=second_occurrence[0], end=second_occurrence[1], key="test")
58
- )
59
- fcmw.metadata.metadata.paragraphs.append(p1)
60
- fcmw.metadata.metadata.paragraphs.append(p2)
61
-
62
- br.apply_field_metadata(
63
- field_key,
64
- fcmw.metadata,
65
- replace_field=[],
66
- replace_splits={},
67
- page_positions={},
68
- extracted_text=et,
69
- )
70
-
71
- assert len(br.brain.paragraphs[field_key].paragraphs) == 2
72
- for key, paragraph in br.brain.paragraphs[field_key].paragraphs.items():
73
- if f"{first_occurrence[0]}-{first_occurrence[1]}" in key:
74
- # Only the first time that a paragraph is found should be set to false
75
- assert paragraph.repeated_in_field is False
76
- else:
77
- assert paragraph.repeated_in_field is True
78
-
79
-
80
- def test_apply_field_metadata_marks_duplicated_paragraphs_on_split_metadata():
81
- # # Test now the split text path
82
- br = ResourceBrain(rid=str(uuid4()))
83
- field_key = "text1"
84
- split_key = "subfield"
85
- fcmw = FieldComputedMetadataWrapper()
86
- fcmw.field.CopyFrom(FieldID(field_type=FieldType.TEXT, field=field_key))
87
- paragraph = "Some paragraph here. "
88
- text_1 = f"{paragraph}{paragraph}"
89
- first_occurrence = [0, len(paragraph)]
90
- second_occurrence = [len(paragraph), len(paragraph) * 2]
91
-
92
- et = ExtractedText()
93
- et.split_text[split_key] = text_1
94
- p1 = Paragraph(start=first_occurrence[0], end=first_occurrence[1])
95
- p1.sentences.append(
96
- Sentence(start=first_occurrence[0], end=first_occurrence[1], key="test")
97
- )
98
- p2 = Paragraph(start=second_occurrence[0], end=second_occurrence[1])
99
- p2.sentences.append(
100
- Sentence(start=second_occurrence[0], end=second_occurrence[1], key="test")
101
- )
102
- fcmw.metadata.split_metadata[split_key].paragraphs.append(p1)
103
- fcmw.metadata.split_metadata[split_key].paragraphs.append(p2)
104
-
105
- br.apply_field_metadata(
106
- field_key,
107
- fcmw.metadata,
108
- replace_field=[],
109
- replace_splits={},
110
- page_positions={},
111
- extracted_text=et,
112
- )
113
-
114
- assert len(br.brain.paragraphs[field_key].paragraphs) == 2
115
- for key, paragraph in br.brain.paragraphs[field_key].paragraphs.items():
116
- if f"{first_occurrence[0]}-{first_occurrence[1]}" in key:
117
- # Only the first time that a paragraph is found should be set to false
118
- assert paragraph.repeated_in_field is False
119
- else:
120
- assert paragraph.repeated_in_field is True
121
-
122
-
123
- def test_get_page_number():
124
- page_numbers = ParagraphPages(
125
- {
126
- 0: (0, 99),
127
- 1: (100, 199),
128
- 2: (200, 299),
129
- }
130
- )
131
- assert page_numbers.get(10) == 0
132
- assert page_numbers.get(100) == 1
133
- assert page_numbers.get(500) == 2
134
-
135
-
136
- @pytest.mark.parametrize(
137
- "new_status,previous_status,expected_brain_status",
138
- [
139
- # No previous_status
140
- (Metadata.Status.PENDING, None, PBResource.PENDING),
141
- (Metadata.Status.PROCESSED, None, PBResource.PROCESSED),
142
- (Metadata.Status.ERROR, None, PBResource.PROCESSED),
143
- (Metadata.Status.BLOCKED, None, PBResource.PROCESSED),
144
- (Metadata.Status.EXPIRED, None, PBResource.PROCESSED),
145
- # previous_status = PENDING
146
- (Metadata.Status.PENDING, Metadata.Status.PENDING, PBResource.PENDING),
147
- (Metadata.Status.PROCESSED, Metadata.Status.PENDING, PBResource.PROCESSED),
148
- (Metadata.Status.ERROR, Metadata.Status.PENDING, PBResource.PROCESSED),
149
- (Metadata.Status.BLOCKED, Metadata.Status.PENDING, PBResource.PROCESSED),
150
- (Metadata.Status.EXPIRED, Metadata.Status.PENDING, PBResource.PROCESSED),
151
- # previous_status = PROCESSED
152
- (Metadata.Status.PROCESSED, Metadata.Status.PROCESSED, PBResource.PROCESSED),
153
- (Metadata.Status.ERROR, Metadata.Status.PROCESSED, PBResource.PROCESSED),
154
- (Metadata.Status.BLOCKED, Metadata.Status.PROCESSED, PBResource.PROCESSED),
155
- (Metadata.Status.PENDING, Metadata.Status.PROCESSED, PBResource.PROCESSED),
156
- (Metadata.Status.EXPIRED, Metadata.Status.PROCESSED, PBResource.PROCESSED),
157
- # previous_status = ERROR
158
- (Metadata.Status.PENDING, Metadata.Status.ERROR, PBResource.PROCESSED),
159
- (Metadata.Status.PROCESSED, Metadata.Status.ERROR, PBResource.PROCESSED),
160
- (Metadata.Status.ERROR, Metadata.Status.ERROR, PBResource.PROCESSED),
161
- (Metadata.Status.BLOCKED, Metadata.Status.ERROR, PBResource.PROCESSED),
162
- (Metadata.Status.EXPIRED, Metadata.Status.ERROR, PBResource.PROCESSED),
163
- # previous_status = BLOCKED
164
- (Metadata.Status.PENDING, Metadata.Status.BLOCKED, PBResource.PROCESSED),
165
- (Metadata.Status.PROCESSED, Metadata.Status.BLOCKED, PBResource.PROCESSED),
166
- (Metadata.Status.ERROR, Metadata.Status.BLOCKED, PBResource.PROCESSED),
167
- (Metadata.Status.BLOCKED, Metadata.Status.BLOCKED, PBResource.PROCESSED),
168
- (Metadata.Status.EXPIRED, Metadata.Status.BLOCKED, PBResource.PROCESSED),
169
- # previous_status = EXPIRED
170
- (Metadata.Status.PENDING, Metadata.Status.EXPIRED, PBResource.PROCESSED),
171
- (Metadata.Status.PROCESSED, Metadata.Status.EXPIRED, PBResource.PROCESSED),
172
- (Metadata.Status.ERROR, Metadata.Status.EXPIRED, PBResource.PROCESSED),
173
- (Metadata.Status.BLOCKED, Metadata.Status.EXPIRED, PBResource.PROCESSED),
174
- (Metadata.Status.EXPIRED, Metadata.Status.EXPIRED, PBResource.PROCESSED),
175
- ],
176
- )
177
- def test_set_processing_status(new_status, previous_status, expected_brain_status):
178
- br = ResourceBrain(rid="foo")
179
- basic = Basic()
180
- basic.metadata.status = new_status
181
- br.set_processing_status(basic, previous_status)
182
- assert br.brain.status == expected_brain_status
183
-
184
-
185
- def test_apply_field_metadata_populates_page_number():
186
- br = ResourceBrain(rid="foo")
187
- field_key = "text1"
188
-
189
- fcmw = FieldComputedMetadataWrapper()
190
- fcmw.field.CopyFrom(FieldID(field_type=FieldType.TEXT, field=field_key))
191
-
192
- p1 = Paragraph(
193
- start=40, end=54, start_seconds=[0], end_seconds=[10], text="Some text here"
194
- )
195
- p1.sentences.append(Sentence(start=40, end=54, key="test"))
196
- fcmw.metadata.metadata.paragraphs.append(p1)
197
-
198
- # Add it to the split too
199
- fcmw.metadata.split_metadata["subfield"].paragraphs.append(p1)
200
-
201
- page_positions = {
202
- 0: (0, 20),
203
- 1: (21, 39),
204
- 2: (40, 100),
205
- }
206
- br.apply_field_metadata(
207
- field_key,
208
- fcmw.metadata,
209
- replace_field=[],
210
- replace_splits={},
211
- page_positions=page_positions,
212
- extracted_text=None,
213
- )
214
-
215
- assert len(br.brain.paragraphs[field_key].paragraphs) == 2
216
- for paragraph in br.brain.paragraphs[field_key].paragraphs.values():
217
- assert paragraph.metadata.position.page_number == 2
218
- assert paragraph.metadata.position.start == 40
219
- assert paragraph.metadata.position.end == 54
220
- assert paragraph.metadata.position.start_seconds == [0]
221
- assert paragraph.metadata.position.end_seconds == [10]
222
-
223
-
224
- def test_set_resource_metadata_promotes_origin_dates():
225
- resource_brain = ResourceBrain("rid")
226
- basic = Basic()
227
- basic.created.seconds = 1
228
- basic.modified.seconds = 2
229
- origin = resources_pb2.Origin()
230
- origin.created.seconds = 3
231
- origin.modified.seconds = 4
232
-
233
- resource_brain.set_resource_metadata(basic, origin)
234
-
235
- assert resource_brain.brain.metadata.created.seconds == 3
236
- assert resource_brain.brain.metadata.modified.seconds == 4
237
-
238
-
239
- def test_set_resource_metadata_handles_timestamp_not_present():
240
- resource_brain = ResourceBrain("rid")
241
- basic = Basic()
242
- resource_brain.set_resource_metadata(basic, None)
243
- created = resource_brain.brain.metadata.created.seconds
244
- modified = resource_brain.brain.metadata.modified.seconds
245
- assert created > 0
246
- assert modified > 0
247
- assert modified >= created
@@ -1,131 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
-
20
- from unittest.mock import AsyncMock, MagicMock, Mock, patch
21
-
22
- import pytest
23
-
24
- from nucliadb.common.cluster.settings import settings as cluster_settings
25
- from nucliadb.ingest.orm.exceptions import ResourceNotIndexable
26
- from nucliadb.ingest.orm.processor import Processor, validate_indexable_resource
27
- from nucliadb_protos import noderesources_pb2
28
-
29
-
30
- @pytest.fixture()
31
- def txn():
32
- yield AsyncMock()
33
-
34
-
35
- @pytest.fixture()
36
- def driver(txn):
37
- mock = MagicMock()
38
- mock.transaction.return_value.__aenter__.return_value = txn
39
- yield mock
40
-
41
-
42
- @pytest.fixture()
43
- def sm():
44
- mock = AsyncMock()
45
- mock.add_resource = AsyncMock()
46
- with patch("nucliadb.ingest.orm.processor.get_shard_manager", return_value=mock):
47
- yield mock
48
-
49
-
50
- @pytest.fixture()
51
- def processor(driver, sm):
52
- yield Processor(driver, None)
53
-
54
-
55
- @pytest.fixture()
56
- def resource():
57
- mock = MagicMock()
58
- mock.set_basic = AsyncMock()
59
- yield mock
60
-
61
-
62
- @pytest.fixture()
63
- def kb():
64
- mock = MagicMock(kbid="kbid")
65
- mock.get_resource_shard_id = AsyncMock()
66
- mock.get_resource_shard = AsyncMock()
67
- yield mock
68
-
69
-
70
- async def test_commit_slug(processor: Processor, txn, resource):
71
- another_txn = Mock()
72
- resource.txn = another_txn
73
- resource.set_slug = AsyncMock()
74
-
75
- await processor.commit_slug(resource)
76
-
77
- resource.set_slug.assert_awaited_once()
78
- txn.commit.assert_awaited_once()
79
- assert resource.txn is another_txn
80
-
81
-
82
- async def test_mark_resource_error(processor: Processor, txn, resource, kb, sm):
83
- await processor._mark_resource_error(kb, resource, partition="partition", seqid=1)
84
- txn.commit.assert_called_once()
85
- resource.set_basic.assert_awaited_once()
86
- sm.add_resource.assert_awaited_once_with(
87
- kb.get_resource_shard.return_value,
88
- resource.indexer.brain,
89
- 1,
90
- partition="partition",
91
- kb="kbid",
92
- )
93
-
94
-
95
- async def test_mark_resource_error_handle_error(
96
- processor: Processor, kb, resource, txn
97
- ):
98
- resource.set_basic.side_effect = Exception("test")
99
- await processor._mark_resource_error(kb, resource, partition="partition", seqid=1)
100
- txn.commit.assert_not_called()
101
-
102
-
103
- async def test_mark_resource_error_skip_no_shard(
104
- processor: Processor, resource, driver, kb, txn
105
- ):
106
- kb.get_resource_shard.return_value = None
107
- await processor._mark_resource_error(kb, resource, partition="partition", seqid=1)
108
- txn.commit.assert_not_called()
109
-
110
-
111
- async def test_mark_resource_error_skip_no_resource(
112
- processor: Processor, kb, driver, txn
113
- ):
114
- await processor._mark_resource_error(kb, None, partition="partition", seqid=1)
115
- txn.commit.assert_not_called()
116
-
117
-
118
- def test_validate_indexable_resource():
119
- resource = noderesources_pb2.Resource()
120
- resource.paragraphs["test"].paragraphs["test"].sentences["test"].vector.append(1.0)
121
- validate_indexable_resource(resource)
122
-
123
-
124
- def test_validate_indexable_resource_throws_error_for_max():
125
- resource = noderesources_pb2.Resource()
126
- for i in range(cluster_settings.max_resource_paragraphs + 1):
127
- resource.paragraphs["test"].paragraphs[f"test{i}"].sentences[
128
- "test"
129
- ].vector.append(1.0)
130
- with pytest.raises(ResourceNotIndexable):
131
- validate_indexable_resource(resource)