nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,764 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import logging
21
- import uuid
22
- from dataclasses import dataclass
23
- from datetime import datetime
24
- from os.path import dirname, getsize
25
- from typing import Optional
26
- from unittest.mock import AsyncMock, patch
27
-
28
- import nats
29
- import pytest
30
- from grpc import aio
31
- from nucliadb_protos.knowledgebox_pb2 import SemanticModelMetadata
32
- from nucliadb_protos.writer_pb2 import BrokerMessage
33
-
34
- from nucliadb.common.cluster import manager
35
- from nucliadb.common.cluster.settings import settings as cluster_settings
36
- from nucliadb.common.maindb.driver import Driver
37
- from nucliadb.ingest.consumer import service as consumer_service
38
- from nucliadb.ingest.fields.base import Field
39
- from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
40
- from nucliadb.ingest.orm.processor import Processor
41
- from nucliadb.ingest.orm.resource import KB_REVERSE, Resource
42
- from nucliadb.ingest.service.writer import WriterServicer
43
- from nucliadb.ingest.settings import settings
44
- from nucliadb.ingest.tests.vectors import V1, V2, V3
45
- from nucliadb.learning_proxy import LearningConfiguration
46
- from nucliadb_protos import resources_pb2 as rpb
47
- from nucliadb_protos import utils_pb2 as upb
48
- from nucliadb_protos import writer_pb2_grpc
49
- from nucliadb_utils import const
50
- from nucliadb_utils.audit.basic import BasicAuditStorage
51
- from nucliadb_utils.audit.stream import StreamAuditStorage
52
- from nucliadb_utils.cache.nats import NatsPubsub
53
- from nucliadb_utils.indexing import IndexingUtility
54
- from nucliadb_utils.settings import indexing_settings, transaction_settings
55
- from nucliadb_utils.storages.settings import settings as storage_settings
56
- from nucliadb_utils.storages.storage import Storage
57
- from nucliadb_utils.utilities import (
58
- Utility,
59
- clean_utility,
60
- clear_global_cache,
61
- get_utility,
62
- set_utility,
63
- start_nats_manager,
64
- start_transaction_utility,
65
- stop_nats_manager,
66
- stop_transaction_utility,
67
- )
68
-
69
- logger = logging.getLogger(__name__)
70
-
71
-
72
- @pytest.fixture(scope="function")
73
- async def processor(maindb_driver, storage, pubsub):
74
- proc = Processor(maindb_driver, storage, pubsub, partition="1")
75
- yield proc
76
-
77
-
78
- @pytest.fixture(scope="function")
79
- async def stream_processor(maindb_driver, storage, pubsub):
80
- proc = Processor(maindb_driver, storage, pubsub, partition="1")
81
- yield proc
82
-
83
-
84
- @pytest.fixture(scope="function")
85
- async def local_files():
86
- storage_settings.local_testing_files = f"{dirname(__file__)}"
87
-
88
-
89
- @dataclass
90
- class IngestFixture:
91
- servicer: WriterServicer
92
- channel: aio.Channel
93
- host: str
94
- serv: aio.Server
95
-
96
-
97
- @pytest.fixture(scope="function")
98
- async def ingest_consumers(
99
- redis_config, transaction_utility, storage, fake_node, nats_manager
100
- ):
101
- ingest_consumers_finalizer = await consumer_service.start_ingest_consumers()
102
-
103
- yield
104
-
105
- await ingest_consumers_finalizer()
106
- clear_global_cache()
107
-
108
-
109
- @pytest.fixture(scope="function")
110
- async def ingest_processed_consumer(
111
- redis_config, transaction_utility, storage, fake_node, nats_manager
112
- ):
113
- ingest_consumer_finalizer = await consumer_service.start_ingest_processed_consumer()
114
-
115
- yield
116
-
117
- await ingest_consumer_finalizer()
118
- clear_global_cache()
119
-
120
-
121
- @pytest.fixture(scope="function")
122
- async def grpc_servicer(
123
- maindb_driver, ingest_consumers, ingest_processed_consumer, learning_config
124
- ):
125
- servicer = WriterServicer()
126
- await servicer.initialize()
127
-
128
- server = aio.server()
129
- port = server.add_insecure_port("[::]:0")
130
- writer_pb2_grpc.add_WriterServicer_to_server(servicer, server)
131
- await server.start()
132
- _channel = aio.insecure_channel(f"127.0.0.1:{port}")
133
- yield IngestFixture(
134
- channel=_channel,
135
- serv=server,
136
- servicer=servicer,
137
- host=f"127.0.0.1:{port}",
138
- )
139
- await servicer.finalize()
140
- await _channel.close()
141
- await server.stop(None)
142
-
143
-
144
- @pytest.fixture(scope="function")
145
- async def pubsub(natsd):
146
- pubsub = get_utility(Utility.PUBSUB)
147
- if pubsub is None:
148
- pubsub = NatsPubsub(hosts=[natsd])
149
- await pubsub.initialize()
150
- set_utility(Utility.PUBSUB, pubsub)
151
-
152
- yield pubsub
153
- clean_utility(Utility.PUBSUB)
154
- await pubsub.finalize()
155
-
156
-
157
- @pytest.fixture(scope="function")
158
- async def fake_node(indexing_utility, shard_manager):
159
- manager.INDEX_NODES.clear()
160
- manager.add_index_node(
161
- id=str(uuid.uuid4()),
162
- address="nohost",
163
- shard_count=0,
164
- available_disk=100,
165
- dummy=True,
166
- )
167
- manager.add_index_node(
168
- id=str(uuid.uuid4()),
169
- address="nohost",
170
- shard_count=0,
171
- available_disk=100,
172
- dummy=True,
173
- )
174
-
175
- with patch.object(cluster_settings, "standalone_mode", False):
176
- yield
177
-
178
- manager.INDEX_NODES.clear()
179
-
180
-
181
- @pytest.fixture()
182
- def learning_config():
183
- lconfig = LearningConfiguration(
184
- semantic_model="multilingual",
185
- semantic_threshold=None,
186
- semantic_vector_size=None,
187
- semantic_vector_similarity="cosine",
188
- )
189
- with patch("nucliadb.ingest.service.writer.learning_proxy") as mocked:
190
- mocked.set_configuration = AsyncMock(return_value=None)
191
- mocked.get_configuration = AsyncMock(return_value=lconfig)
192
- mocked.delete_configuration = AsyncMock(return_value=None)
193
- yield mocked
194
-
195
-
196
- @pytest.fixture(scope="function")
197
- async def knowledgebox_ingest(
198
- storage, maindb_driver: Driver, shard_manager, learning_config
199
- ):
200
- kbid = str(uuid.uuid4())
201
- kbslug = str(uuid.uuid4())
202
- async with maindb_driver.transaction() as txn:
203
- model = SemanticModelMetadata(similarity_function=upb.VectorSimilarity.COSINE)
204
- await KnowledgeBox.create(txn, kbslug, model, uuid=kbid)
205
- await txn.commit()
206
-
207
- yield kbid
208
-
209
- async with maindb_driver.transaction() as txn:
210
- await KnowledgeBox.delete_kb(txn, kbid)
211
- await txn.commit()
212
-
213
-
214
- @pytest.fixture(scope="function")
215
- async def audit():
216
- return BasicAuditStorage()
217
-
218
-
219
- @pytest.fixture(scope="function")
220
- async def stream_audit(natsd: str):
221
- from nucliadb_utils.settings import audit_settings
222
-
223
- audit = StreamAuditStorage(
224
- [natsd],
225
- audit_settings.audit_jetstream_target, # type: ignore
226
- audit_settings.audit_partitions,
227
- audit_settings.audit_hash_seed,
228
- )
229
- await audit.initialize()
230
- yield audit
231
- await audit.finalize()
232
-
233
-
234
- @pytest.fixture(scope="function")
235
- async def indexing_utility(natsd, _clean_natsd):
236
- indexing_utility = IndexingUtility(
237
- nats_creds=indexing_settings.index_jetstream_auth,
238
- nats_servers=indexing_settings.index_jetstream_servers,
239
- dummy=True,
240
- )
241
- await indexing_utility.initialize()
242
- set_utility(Utility.INDEXING, indexing_utility)
243
-
244
- yield
245
-
246
- clean_utility(Utility.INDEXING)
247
- await indexing_utility.finalize()
248
-
249
-
250
- @pytest.fixture(scope="function")
251
- async def _clean_natsd(natsd):
252
- nc = await nats.connect(servers=[natsd])
253
- js = nc.jetstream()
254
-
255
- consumers = [
256
- (const.Streams.INGEST.name, const.Streams.INGEST.group.format(partition="1")),
257
- (const.Streams.INGEST_PROCESSED.name, const.Streams.INGEST_PROCESSED.group),
258
- (const.Streams.INDEX.name, const.Streams.INDEX.group.format(node="1")),
259
- ]
260
- for stream, consumer in consumers:
261
- try:
262
- await js.delete_consumer(stream, consumer)
263
- except nats.js.errors.NotFoundError:
264
- pass
265
-
266
- streams = [
267
- (const.Streams.INGEST.name, const.Streams.INGEST.subject.format(partition=">")),
268
- (const.Streams.INDEX.name, const.Streams.INDEX.subject.format(node="*")),
269
- ]
270
- for stream, subject in streams:
271
- try:
272
- await js.delete_stream(stream)
273
- except nats.js.errors.NotFoundError:
274
- pass
275
-
276
- await js.add_stream(name=stream, subjects=[subject])
277
-
278
- await nc.drain()
279
- await nc.close()
280
-
281
- indexing_settings.index_jetstream_servers = [natsd]
282
-
283
- yield
284
-
285
-
286
- @pytest.fixture(scope="function")
287
- async def nats_manager(natsd):
288
- ncm = await start_nats_manager("service_name", [natsd], None)
289
- yield ncm
290
- await stop_nats_manager()
291
-
292
-
293
- @pytest.fixture(scope="function")
294
- async def transaction_utility(natsd, pubsub):
295
- transaction_settings.transaction_jetstream_servers = [natsd]
296
- util = await start_transaction_utility()
297
- yield util
298
- await stop_transaction_utility()
299
-
300
-
301
- THUMBNAIL = rpb.CloudFile(
302
- uri="thumbnail.png",
303
- source=rpb.CloudFile.Source.LOCAL,
304
- bucket_name="/integration/orm/assets",
305
- size=getsize(f"{dirname(__file__)}/integration/orm/assets/thumbnail.png"),
306
- content_type="image/png",
307
- filename="thumbnail.png",
308
- )
309
-
310
- TEST_CLOUDFILE_FILENAME = "text.pb"
311
- TEST_CLOUDFILE = rpb.CloudFile(
312
- uri=TEST_CLOUDFILE_FILENAME,
313
- source=rpb.CloudFile.Source.LOCAL,
314
- bucket_name="/integration/orm/assets",
315
- size=getsize(
316
- f"{dirname(__file__)}/integration/orm/assets/{TEST_CLOUDFILE_FILENAME}"
317
- ),
318
- content_type="application/octet-stream",
319
- filename=TEST_CLOUDFILE_FILENAME,
320
- md5="01cca3f53edb934a445a3112c6caa652",
321
- )
322
-
323
-
324
- # HELPERS
325
-
326
-
327
- async def make_field(field, extracted_text):
328
- await field.set_extracted_text(make_extracted_text(field.id, body=extracted_text))
329
- await field.set_field_metadata(make_field_metadata(field.id))
330
- await field.set_large_field_metadata(make_field_large_metadata(field.id))
331
- await field.set_vectors(make_extracted_vectors(field.id))
332
-
333
-
334
- def make_extracted_text(field_id, body: str):
335
- ex1 = rpb.ExtractedTextWrapper()
336
- ex1.field.CopyFrom(rpb.FieldID(field_type=rpb.FieldType.TEXT, field=field_id))
337
- ex1.body.text = body
338
- return ex1
339
-
340
-
341
- def make_field_metadata(field_id):
342
- ex1 = rpb.FieldComputedMetadataWrapper()
343
- ex1.field.CopyFrom(rpb.FieldID(field_type=rpb.FieldType.TEXT, field=field_id))
344
- ex1.metadata.metadata.links.append("https://nuclia.com")
345
-
346
- p1 = rpb.Paragraph(start=0, end=20)
347
- p1.sentences.append(rpb.Sentence(start=0, end=20, key=""))
348
- cl1 = rpb.Classification(labelset="labelset1", label="label1")
349
- cl2 = rpb.Classification(labelset="paragraph-labelset", label="label1")
350
- p1.classifications.append(cl2)
351
- ex1.metadata.metadata.paragraphs.append(p1)
352
- ex1.metadata.metadata.classifications.append(cl1)
353
- # ex1.metadata.metadata.ner["Ramon"] = "PEOPLE"
354
- ex1.metadata.metadata.last_index.FromDatetime(datetime.now())
355
- ex1.metadata.metadata.last_understanding.FromDatetime(datetime.now())
356
- ex1.metadata.metadata.last_extract.FromDatetime(datetime.now())
357
- ex1.metadata.metadata.last_summary.FromDatetime(datetime.now())
358
- ex1.metadata.metadata.thumbnail.CopyFrom(THUMBNAIL)
359
- ex1.metadata.metadata.positions["ENTITY/document"].entity = "document"
360
- ex1.metadata.metadata.positions["ENTITY/document"].position.extend(
361
- [rpb.Position(start=0, end=5), rpb.Position(start=13, end=18)]
362
- )
363
- return ex1
364
-
365
-
366
- def make_field_large_metadata(field_id):
367
- ex1 = rpb.LargeComputedMetadataWrapper()
368
- ex1.field.CopyFrom(rpb.FieldID(field_type=rpb.FieldType.TEXT, field=field_id))
369
- en1 = rpb.Entity(token="tok1", root="tok", type="NAME")
370
- en2 = rpb.Entity(token="tok2", root="tok2", type="NAME")
371
- ex1.real.metadata.entities.append(en1)
372
- ex1.real.metadata.entities.append(en2)
373
- ex1.real.metadata.tokens["tok"] = 3
374
- return ex1
375
-
376
-
377
- def make_extracted_vectors(field_id):
378
- ex1 = rpb.ExtractedVectorsWrapper()
379
- ex1.field.CopyFrom(rpb.FieldID(field_type=rpb.FieldType.TEXT, field=field_id))
380
- v1 = rpb.Vector(start=0, end=20, vector=b"ansjkdn")
381
- ex1.vectors.vectors.vectors.append(v1)
382
- return ex1
383
-
384
-
385
- @pytest.fixture(scope="function")
386
- async def test_resource(storage, maindb_driver, knowledgebox_ingest, fake_node):
387
- """
388
- Create a resource that has every possible bit of information
389
- """
390
- resource = await create_resource(
391
- storage=storage,
392
- driver=maindb_driver,
393
- knowledgebox_ingest=knowledgebox_ingest,
394
- )
395
- yield resource
396
- resource.clean()
397
-
398
-
399
- @pytest.fixture(scope="function")
400
- def partition_settings():
401
- settings.replica_number = 1
402
- settings.total_replicas = 4
403
-
404
- yield settings
405
-
406
-
407
- def broker_resource(
408
- knowledgebox: str, rid: Optional[str] = None, slug: Optional[str] = None
409
- ) -> BrokerMessage:
410
- if rid is None:
411
- rid = str(uuid.uuid4())
412
- if slug is None:
413
- slug = f"{rid}slug1"
414
-
415
- message1: BrokerMessage = BrokerMessage(
416
- kbid=knowledgebox,
417
- uuid=rid,
418
- slug=slug,
419
- type=BrokerMessage.AUTOCOMMIT,
420
- )
421
-
422
- message1.basic.icon = "text/plain"
423
- message1.basic.title = "Title Resource"
424
- message1.basic.summary = "Summary of document"
425
- message1.basic.thumbnail = "doc"
426
- message1.basic.layout = "default"
427
- message1.basic.metadata.useful = True
428
- message1.basic.metadata.language = "es"
429
- message1.basic.created.FromDatetime(datetime.now())
430
- message1.basic.modified.FromDatetime(datetime.now())
431
- message1.origin.source = rpb.Origin.Source.WEB
432
-
433
- message1.files["file"].file.uri = "http://nofile"
434
- message1.files["file"].file.size = 0
435
- message1.files["file"].file.source = rpb.CloudFile.Source.LOCAL
436
-
437
- etw = rpb.ExtractedTextWrapper()
438
- etw.body.text = "My own text Ramon. This is great to be here. \n Where is my beer?"
439
- etw.field.field = "file"
440
- etw.field.field_type = rpb.FieldType.FILE
441
- message1.extracted_text.append(etw)
442
-
443
- etw = rpb.ExtractedTextWrapper()
444
- etw.body.text = "Summary of document"
445
- etw.field.field = "summary"
446
- etw.field.field_type = rpb.FieldType.GENERIC
447
- message1.extracted_text.append(etw)
448
-
449
- etw = rpb.ExtractedTextWrapper()
450
- etw.body.text = "Title Resource"
451
- etw.field.field = "title"
452
- etw.field.field_type = rpb.FieldType.GENERIC
453
- message1.extracted_text.append(etw)
454
-
455
- fcm = rpb.FieldComputedMetadataWrapper()
456
- fcm.field.field = "file"
457
- fcm.field.field_type = rpb.FieldType.FILE
458
- p1 = rpb.Paragraph(
459
- start=0,
460
- end=45,
461
- )
462
- p1.start_seconds.append(0)
463
- p1.end_seconds.append(10)
464
- p2 = rpb.Paragraph(
465
- start=47,
466
- end=64,
467
- )
468
- p2.start_seconds.append(10)
469
- p2.end_seconds.append(20)
470
- p2.start_seconds.append(20)
471
- p2.end_seconds.append(30)
472
-
473
- fcm.metadata.metadata.paragraphs.append(p1)
474
- fcm.metadata.metadata.paragraphs.append(p2)
475
- fcm.metadata.metadata.last_index.FromDatetime(datetime.now())
476
- fcm.metadata.metadata.last_understanding.FromDatetime(datetime.now())
477
- fcm.metadata.metadata.last_extract.FromDatetime(datetime.now())
478
- fcm.metadata.metadata.ner["Ramon"] = "PERSON"
479
-
480
- c1 = rpb.Classification()
481
- c1.label = "label1"
482
- c1.labelset = "labelset1"
483
- fcm.metadata.metadata.classifications.append(c1)
484
- message1.field_metadata.append(fcm)
485
-
486
- ev = rpb.ExtractedVectorsWrapper()
487
- ev.field.field = "file"
488
- ev.field.field_type = rpb.FieldType.FILE
489
-
490
- v1 = rpb.Vector()
491
- v1.start = 0
492
- v1.end = 19
493
- v1.start_paragraph = 0
494
- v1.end_paragraph = 45
495
- v1.vector.extend(V1)
496
- ev.vectors.vectors.vectors.append(v1)
497
-
498
- v2 = rpb.Vector()
499
- v2.start = 20
500
- v2.end = 45
501
- v2.start_paragraph = 0
502
- v2.end_paragraph = 45
503
- v2.vector.extend(V2)
504
- ev.vectors.vectors.vectors.append(v2)
505
-
506
- v3 = rpb.Vector()
507
- v3.start = 48
508
- v3.end = 65
509
- v3.start_paragraph = 47
510
- v3.end_paragraph = 64
511
- v3.vector.extend(V3)
512
- ev.vectors.vectors.vectors.append(v3)
513
-
514
- message1.field_vectors.append(ev)
515
- message1.source = BrokerMessage.MessageSource.WRITER
516
- return message1
517
-
518
-
519
- async def create_resource(
520
- storage: Storage, driver: Driver, knowledgebox_ingest: str
521
- ) -> Resource:
522
- txn = await driver.begin()
523
-
524
- rid = str(uuid.uuid4())
525
- kb_obj = KnowledgeBox(txn, storage, kbid=knowledgebox_ingest)
526
- test_resource = await kb_obj.add_resource(uuid=rid, slug="slug")
527
- await test_resource.set_slug()
528
-
529
- # 1. ROOT ELEMENTS
530
- # 1.1 BASIC
531
-
532
- basic = rpb.Basic(
533
- title="My title",
534
- summary="My summary",
535
- icon="text/plain",
536
- layout="basic",
537
- thumbnail="/file",
538
- last_seqid=1,
539
- last_account_seq=2,
540
- )
541
- basic.metadata.metadata["key"] = "value"
542
- basic.metadata.language = "ca"
543
- basic.metadata.useful = True
544
- basic.metadata.status = rpb.Metadata.Status.PROCESSED
545
-
546
- cl1 = rpb.Classification(labelset="labelset1", label="label1")
547
- basic.usermetadata.classifications.append(cl1)
548
-
549
- r1 = upb.Relation(
550
- relation=upb.Relation.CHILD,
551
- source=upb.RelationNode(value=rid, ntype=upb.RelationNode.NodeType.RESOURCE),
552
- to=upb.RelationNode(value="000001", ntype=upb.RelationNode.NodeType.RESOURCE),
553
- )
554
-
555
- basic.usermetadata.relations.append(r1)
556
-
557
- ufm1 = rpb.UserFieldMetadata(
558
- token=[rpb.TokenSplit(token="My home", klass="Location")],
559
- field=rpb.FieldID(field_type=rpb.FieldType.TEXT, field="text1"),
560
- )
561
-
562
- basic.fieldmetadata.append(ufm1)
563
- basic.created.FromDatetime(datetime.utcnow())
564
- basic.modified.FromDatetime(datetime.utcnow())
565
-
566
- await test_resource.set_basic(basic)
567
-
568
- # 1.2 RELATIONS
569
-
570
- rels = []
571
- r1 = upb.Relation(
572
- relation=upb.Relation.CHILD,
573
- source=upb.RelationNode(value=rid, ntype=upb.RelationNode.NodeType.RESOURCE),
574
- to=upb.RelationNode(value="000001", ntype=upb.RelationNode.NodeType.RESOURCE),
575
- )
576
-
577
- rels.append(r1)
578
- await test_resource.set_relations(rels)
579
-
580
- # 1.3 ORIGIN
581
-
582
- o2 = rpb.Origin()
583
- o2.source = rpb.Origin.Source.API
584
- o2.source_id = "My Source"
585
- o2.created.FromDatetime(datetime.now())
586
- o2.modified.FromDatetime(datetime.now())
587
-
588
- await test_resource.set_origin(o2)
589
-
590
- # 2. FIELDS
591
- #
592
- # Add an example of each of the files, containing all possible metadata
593
-
594
- # Title
595
- title_field = await test_resource.get_field(
596
- "title", rpb.FieldType.GENERIC, load=False
597
- )
598
- await make_field(title_field, "MyText")
599
-
600
- # Summary
601
- summary_field = await test_resource.get_field(
602
- "summary", rpb.FieldType.GENERIC, load=False
603
- )
604
- await make_field(summary_field, "MyText")
605
-
606
- # 2.1 FILE FIELD
607
-
608
- t2 = rpb.FieldFile(
609
- language="es",
610
- )
611
- t2.added.FromDatetime(datetime.now())
612
- t2.file.CopyFrom(TEST_CLOUDFILE)
613
-
614
- file_field = await test_resource.set_field(rpb.FieldType.FILE, "file1", t2)
615
- await add_field_id(test_resource, file_field)
616
- await make_field(file_field, "MyText")
617
-
618
- # 2.2 LINK FIELD
619
- li2 = rpb.FieldLink(
620
- uri="htts://nuclia.cloud",
621
- language="ca",
622
- )
623
- li2.added.FromDatetime(datetime.now())
624
- li2.headers["AUTHORIZATION"] = "Bearer xxxxx"
625
- linkfield = await test_resource.set_field(rpb.FieldType.LINK, "link1", li2)
626
-
627
- ex1 = rpb.LinkExtractedData()
628
- ex1.date.FromDatetime(datetime.now())
629
- ex1.language = "ca"
630
- ex1.title = "My Title"
631
- ex1.field = "link1"
632
-
633
- ex1.link_preview.CopyFrom(THUMBNAIL)
634
- ex1.link_thumbnail.CopyFrom(THUMBNAIL)
635
-
636
- await linkfield.set_link_extracted_data(ex1)
637
- await add_field_id(test_resource, linkfield)
638
- await make_field(linkfield, "MyText")
639
-
640
- # 2.3 TEXT FIELDS
641
-
642
- t23 = rpb.FieldText(body="This is my text field", format=rpb.FieldText.Format.PLAIN)
643
- textfield = await test_resource.set_field(rpb.FieldType.TEXT, "text1", t23)
644
- await add_field_id(test_resource, textfield)
645
- await make_field(textfield, "MyText")
646
-
647
- # 2.4 LAYOUT FIELD
648
-
649
- l2 = rpb.FieldLayout(format=rpb.FieldLayout.Format.NUCLIAv1)
650
- l2.body.blocks["field1"].x = 0
651
- l2.body.blocks["field1"].y = 0
652
- l2.body.blocks["field1"].cols = 1
653
- l2.body.blocks["field1"].rows = 1
654
- l2.body.blocks["field1"].type = rpb.Block.TypeBlock.TITLE
655
- l2.body.blocks["field1"].payload = "{}"
656
- l2.body.blocks["field1"].file.CopyFrom(TEST_CLOUDFILE)
657
-
658
- layoutfield = await test_resource.set_field(rpb.FieldType.LAYOUT, "layout1", l2)
659
- await add_field_id(test_resource, layoutfield)
660
-
661
- await layoutfield.set_extracted_text(
662
- make_extracted_text(layoutfield.id, body="MyText")
663
- )
664
- await layoutfield.set_field_metadata(make_field_metadata(layoutfield.id))
665
- await layoutfield.set_large_field_metadata(
666
- make_field_large_metadata(layoutfield.id)
667
- )
668
- await layoutfield.set_vectors(make_extracted_vectors(layoutfield.id))
669
-
670
- # 2.5 CONVERSATION FIELD
671
-
672
- def make_message(
673
- text: str, files: Optional[list[rpb.CloudFile]] = None
674
- ) -> rpb.Message:
675
- msg = rpb.Message(
676
- who="myself",
677
- )
678
- msg.timestamp.FromDatetime(datetime.now())
679
- msg.content.text = text
680
- msg.content.format = rpb.MessageContent.Format.PLAIN
681
-
682
- if files:
683
- for file in files:
684
- msg.content.attachments.append(file)
685
- return msg
686
-
687
- c2 = rpb.Conversation()
688
-
689
- for i in range(300):
690
- new_message = make_message(f"{i} hello")
691
- if i == 33:
692
- new_message = make_message(f"{i} hello", files=[TEST_CLOUDFILE, THUMBNAIL])
693
- c2.messages.append(new_message)
694
-
695
- convfield = await test_resource.set_field(rpb.FieldType.CONVERSATION, "conv1", c2)
696
- await add_field_id(test_resource, convfield)
697
- await make_field(convfield, extracted_text="MyText")
698
-
699
- # 2.6 KEYWORDSET FIELD
700
-
701
- k2 = rpb.FieldKeywordset(
702
- keywords=[rpb.Keyword(value="kw1"), rpb.Keyword(value="kw2")]
703
- )
704
- kws_field = await test_resource.set_field(
705
- rpb.FieldType.KEYWORDSET, "keywordset1", k2
706
- )
707
- await add_field_id(test_resource, kws_field)
708
- await make_field(kws_field, "MyText")
709
-
710
- # 2.7 DATETIMES FIELD
711
-
712
- d2 = rpb.FieldDatetime()
713
- d2.value.FromDatetime(datetime.now())
714
- datetime_field = await test_resource.set_field(
715
- rpb.FieldType.DATETIME, "datetime1", d2
716
- )
717
- await add_field_id(test_resource, datetime_field)
718
- await make_field(datetime_field, "MyText")
719
-
720
- field_obj = await test_resource.get_field("datetime1", type=rpb.FieldType.DATETIME)
721
-
722
- # Q/A
723
- question_answers = rpb.FieldQuestionAnswerWrapper()
724
- for i in range(10):
725
- qa = rpb.QuestionAnswer()
726
-
727
- qa.question.text = f"My question {i}"
728
- qa.question.language = "catalan"
729
- qa.question.ids_paragraphs.extend([f"id1/{i}", f"id2/{i}"])
730
-
731
- answer = rpb.Answers()
732
- answer.text = f"My answer {i}"
733
- answer.language = "catalan"
734
- answer.ids_paragraphs.extend([f"id1/{i}", f"id2/{i}"])
735
- qa.answers.append(answer)
736
- question_answers.question_answers.question_answer.append(qa)
737
-
738
- await field_obj.set_question_answers(question_answers)
739
-
740
- await txn.commit()
741
- return test_resource
742
-
743
-
744
- async def add_field_id(resource: Resource, field: Field):
745
- field_type = KB_REVERSE[field.type]
746
- field_id = rpb.FieldID(field_type=field_type, field=field.id)
747
- await resource.update_all_field_ids(updated=[field_id])
748
-
749
-
750
- @pytest.fixture(scope="function")
751
- async def entities_manager_mock():
752
- """EntitiesManager mock for ingest gRPC API disabling indexed entities
753
- functionality. As tests doesn't startup a node, with this mock we allow
754
- testing ingest's gRPC API while the whole entities functionality is properly
755
- tested in tests nos using this fixture.
756
-
757
- """
758
- klass = "nucliadb.ingest.service.writer.EntitiesManager"
759
- with patch(f"{klass}.get_indexed_entities_group", AsyncMock(return_value=None)):
760
- with patch(
761
- "nucliadb.common.cluster.manager.KBShardManager.apply_for_all_shards",
762
- AsyncMock(return_value=[]),
763
- ):
764
- yield