nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,771 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import logging
21
- import uuid
22
- from dataclasses import dataclass
23
- from datetime import datetime
24
- from os.path import dirname, getsize
25
- from typing import Optional
26
- from unittest.mock import AsyncMock, patch
27
-
28
- import nats
29
- import pytest
30
- from grpc import aio
31
- from nucliadb_protos.knowledgebox_pb2 import SemanticModelMetadata
32
- from nucliadb_protos.writer_pb2 import BrokerMessage
33
-
34
- from nucliadb.common.cluster import manager
35
- from nucliadb.common.cluster.settings import settings as cluster_settings
36
- from nucliadb.common.maindb.driver import Driver
37
- from nucliadb.ingest.consumer import service as consumer_service
38
- from nucliadb.ingest.fields.base import Field
39
- from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
40
- from nucliadb.ingest.orm.processor import Processor
41
- from nucliadb.ingest.orm.resource import KB_REVERSE, Resource
42
- from nucliadb.ingest.service.writer import WriterServicer
43
- from nucliadb.ingest.settings import settings
44
- from nucliadb.ingest.tests.vectors import V1, V2, V3
45
- from nucliadb.learning_proxy import LearningConfiguration
46
- from nucliadb_protos import resources_pb2 as rpb
47
- from nucliadb_protos import utils_pb2 as upb
48
- from nucliadb_protos import writer_pb2_grpc
49
- from nucliadb_utils import const
50
- from nucliadb_utils.audit.basic import BasicAuditStorage
51
- from nucliadb_utils.audit.stream import StreamAuditStorage
52
- from nucliadb_utils.cache.nats import NatsPubsub
53
- from nucliadb_utils.indexing import IndexingUtility
54
- from nucliadb_utils.settings import indexing_settings, transaction_settings
55
- from nucliadb_utils.storages.settings import settings as storage_settings
56
- from nucliadb_utils.storages.storage import Storage
57
- from nucliadb_utils.utilities import (
58
- Utility,
59
- clean_utility,
60
- clear_global_cache,
61
- get_utility,
62
- set_utility,
63
- start_nats_manager,
64
- start_transaction_utility,
65
- stop_nats_manager,
66
- stop_transaction_utility,
67
- )
68
-
69
- logger = logging.getLogger(__name__)
70
-
71
-
72
- @pytest.fixture(scope="function")
73
- async def processor(maindb_driver, storage, pubsub):
74
- proc = Processor(maindb_driver, storage, pubsub, partition="1")
75
- yield proc
76
-
77
-
78
- @pytest.fixture(scope="function")
79
- async def stream_processor(maindb_driver, storage, pubsub):
80
- proc = Processor(maindb_driver, storage, pubsub, partition="1")
81
- yield proc
82
-
83
-
84
- @pytest.fixture(scope="function")
85
- async def local_files():
86
- storage_settings.local_testing_files = f"{dirname(__file__)}"
87
-
88
-
89
- @dataclass
90
- class IngestFixture:
91
- servicer: WriterServicer
92
- channel: aio.Channel
93
- host: str
94
- serv: aio.Server
95
-
96
-
97
- @pytest.fixture(scope="function")
98
- async def ingest_consumers(
99
- redis_config, transaction_utility, storage, fake_node, nats_manager
100
- ):
101
- ingest_consumers_finalizer = await consumer_service.start_ingest_consumers()
102
-
103
- yield
104
-
105
- await ingest_consumers_finalizer()
106
- clear_global_cache()
107
-
108
-
109
- @pytest.fixture(scope="function")
110
- async def ingest_processed_consumer(
111
- redis_config, transaction_utility, storage, fake_node, nats_manager
112
- ):
113
- ingest_consumer_finalizer = await consumer_service.start_ingest_processed_consumer()
114
-
115
- yield
116
-
117
- await ingest_consumer_finalizer()
118
- clear_global_cache()
119
-
120
-
121
- @pytest.fixture(scope="function")
122
- async def grpc_servicer(
123
- maindb_driver, ingest_consumers, ingest_processed_consumer, learning_config
124
- ):
125
- servicer = WriterServicer()
126
- await servicer.initialize()
127
-
128
- server = aio.server()
129
- port = server.add_insecure_port("[::]:0")
130
- writer_pb2_grpc.add_WriterServicer_to_server(servicer, server)
131
- await server.start()
132
- _channel = aio.insecure_channel(f"127.0.0.1:{port}")
133
- yield IngestFixture(
134
- channel=_channel,
135
- serv=server,
136
- servicer=servicer,
137
- host=f"127.0.0.1:{port}",
138
- )
139
- await servicer.finalize()
140
- await _channel.close()
141
- await server.stop(None)
142
-
143
-
144
- @pytest.fixture(scope="function")
145
- async def pubsub(natsd):
146
- pubsub = get_utility(Utility.PUBSUB)
147
- if pubsub is None:
148
- pubsub = NatsPubsub(hosts=[natsd])
149
- await pubsub.initialize()
150
- set_utility(Utility.PUBSUB, pubsub)
151
-
152
- yield pubsub
153
- clean_utility(Utility.PUBSUB)
154
- await pubsub.finalize()
155
-
156
-
157
- @pytest.fixture(scope="function")
158
- async def fake_node(indexing_utility, shard_manager):
159
- manager.INDEX_NODES.clear()
160
- manager.add_index_node(
161
- id=str(uuid.uuid4()),
162
- address="nohost",
163
- shard_count=0,
164
- available_disk=100,
165
- dummy=True,
166
- )
167
- manager.add_index_node(
168
- id=str(uuid.uuid4()),
169
- address="nohost",
170
- shard_count=0,
171
- available_disk=100,
172
- dummy=True,
173
- )
174
-
175
- with patch.object(cluster_settings, "standalone_mode", False):
176
- yield
177
-
178
- manager.INDEX_NODES.clear()
179
-
180
-
181
- @pytest.fixture()
182
- def learning_config():
183
- lconfig = LearningConfiguration(
184
- semantic_model="multilingual",
185
- semantic_threshold=None,
186
- semantic_vector_size=None,
187
- semantic_vector_similarity="cosine",
188
- )
189
- with patch("nucliadb.ingest.service.writer.learning_proxy") as mocked:
190
- mocked.set_configuration = AsyncMock(return_value=None)
191
- mocked.get_configuration = AsyncMock(return_value=lconfig)
192
- mocked.delete_configuration = AsyncMock(return_value=None)
193
- yield mocked
194
-
195
-
196
- @pytest.fixture(scope="function")
197
- async def knowledgebox_ingest(
198
- storage, maindb_driver: Driver, shard_manager, learning_config
199
- ):
200
- kbid = str(uuid.uuid4())
201
- kbslug = str(uuid.uuid4())
202
- async with maindb_driver.transaction() as txn:
203
- model = SemanticModelMetadata(similarity_function=upb.VectorSimilarity.COSINE)
204
- await KnowledgeBox.create(txn, kbslug, model, uuid=kbid)
205
- await txn.commit()
206
-
207
- yield kbid
208
-
209
- async with maindb_driver.transaction() as txn:
210
- await KnowledgeBox.delete_kb(txn, kbslug, kbid)
211
- await txn.commit()
212
-
213
-
214
- @pytest.fixture(scope="function")
215
- async def audit():
216
- return BasicAuditStorage()
217
-
218
-
219
- @pytest.fixture(scope="function")
220
- async def stream_audit(natsd: str):
221
- from nucliadb_utils.settings import audit_settings
222
-
223
- audit = StreamAuditStorage(
224
- [natsd],
225
- audit_settings.audit_jetstream_target, # type: ignore
226
- audit_settings.audit_partitions,
227
- audit_settings.audit_hash_seed,
228
- )
229
- await audit.initialize()
230
- yield audit
231
- await audit.finalize()
232
-
233
-
234
- @pytest.fixture(scope="function")
235
- async def indexing_utility(natsd, _clean_natsd):
236
- indexing_utility = IndexingUtility(
237
- nats_creds=indexing_settings.index_jetstream_auth,
238
- nats_servers=indexing_settings.index_jetstream_servers,
239
- dummy=True,
240
- )
241
- await indexing_utility.initialize()
242
- set_utility(Utility.INDEXING, indexing_utility)
243
-
244
- yield
245
-
246
- clean_utility(Utility.INDEXING)
247
- await indexing_utility.finalize()
248
-
249
-
250
- @pytest.fixture(scope="function")
251
- async def _clean_natsd(natsd):
252
- nc = await nats.connect(servers=[natsd])
253
- js = nc.jetstream()
254
-
255
- consumers = [
256
- (const.Streams.INGEST.name, const.Streams.INGEST.group.format(partition="1")),
257
- (const.Streams.INGEST_PROCESSED.name, const.Streams.INGEST_PROCESSED.group),
258
- (const.Streams.INDEX.name, const.Streams.INDEX.group.format(node="1")),
259
- ]
260
- for stream, consumer in consumers:
261
- try:
262
- await js.delete_consumer(stream, consumer)
263
- except nats.js.errors.NotFoundError:
264
- pass
265
-
266
- streams = [
267
- (const.Streams.INGEST.name, const.Streams.INGEST.subject.format(partition=">")),
268
- (const.Streams.INDEX.name, const.Streams.INDEX.subject.format(node="*")),
269
- ]
270
- for stream, subject in streams:
271
- try:
272
- await js.delete_stream(stream)
273
- except nats.js.errors.NotFoundError:
274
- pass
275
-
276
- await js.add_stream(name=stream, subjects=[subject])
277
-
278
- await nc.drain()
279
- await nc.close()
280
-
281
- indexing_settings.index_jetstream_servers = [natsd]
282
-
283
- yield
284
-
285
-
286
- @pytest.fixture(scope="function")
287
- async def nats_manager(natsd):
288
- ncm = await start_nats_manager("service_name", [natsd], None)
289
- yield ncm
290
- await stop_nats_manager()
291
-
292
-
293
- @pytest.fixture(scope="function")
294
- async def transaction_utility(natsd, pubsub):
295
- transaction_settings.transaction_jetstream_servers = [natsd]
296
- util = await start_transaction_utility()
297
- yield util
298
- await stop_transaction_utility()
299
-
300
-
301
- THUMBNAIL = rpb.CloudFile(
302
- uri="thumbnail.png",
303
- source=rpb.CloudFile.Source.LOCAL,
304
- bucket_name="/integration/orm/assets",
305
- size=getsize(f"{dirname(__file__)}/integration/orm/assets/thumbnail.png"),
306
- content_type="image/png",
307
- filename="thumbnail.png",
308
- )
309
-
310
- TEST_CLOUDFILE_FILENAME = "text.pb"
311
- TEST_CLOUDFILE = rpb.CloudFile(
312
- uri=TEST_CLOUDFILE_FILENAME,
313
- source=rpb.CloudFile.Source.LOCAL,
314
- bucket_name="/integration/orm/assets",
315
- size=getsize(
316
- f"{dirname(__file__)}/integration/orm/assets/{TEST_CLOUDFILE_FILENAME}"
317
- ),
318
- content_type="application/octet-stream",
319
- filename=TEST_CLOUDFILE_FILENAME,
320
- md5="01cca3f53edb934a445a3112c6caa652",
321
- )
322
-
323
-
324
- # HELPERS
325
-
326
-
327
- async def make_field(field, extracted_text):
328
- await field.set_extracted_text(make_extracted_text(field.id, body=extracted_text))
329
- await field.set_field_metadata(make_field_metadata(field.id))
330
- await field.set_large_field_metadata(make_field_large_metadata(field.id))
331
- await field.set_vectors(make_extracted_vectors(field.id))
332
-
333
-
334
- def make_extracted_text(field_id, body: str):
335
- ex1 = rpb.ExtractedTextWrapper()
336
- ex1.field.CopyFrom(rpb.FieldID(field_type=rpb.FieldType.TEXT, field=field_id))
337
- ex1.body.text = body
338
- return ex1
339
-
340
-
341
- def make_field_metadata(field_id):
342
- ex1 = rpb.FieldComputedMetadataWrapper()
343
- ex1.field.CopyFrom(rpb.FieldID(field_type=rpb.FieldType.TEXT, field=field_id))
344
- ex1.metadata.metadata.links.append("https://nuclia.com")
345
-
346
- p1 = rpb.Paragraph(start=0, end=20)
347
- p1.sentences.append(rpb.Sentence(start=0, end=20, key=""))
348
- cl1 = rpb.Classification(labelset="labelset1", label="label1")
349
- cl2 = rpb.Classification(labelset="paragraph-labelset", label="label1")
350
- p1.classifications.append(cl2)
351
- ex1.metadata.metadata.paragraphs.append(p1)
352
- ex1.metadata.metadata.classifications.append(cl1)
353
- # ex1.metadata.metadata.ner["Ramon"] = "PEOPLE"
354
- ex1.metadata.metadata.last_index.FromDatetime(datetime.now())
355
- ex1.metadata.metadata.last_understanding.FromDatetime(datetime.now())
356
- ex1.metadata.metadata.last_extract.FromDatetime(datetime.now())
357
- ex1.metadata.metadata.last_summary.FromDatetime(datetime.now())
358
- ex1.metadata.metadata.thumbnail.CopyFrom(THUMBNAIL)
359
- ex1.metadata.metadata.positions["ENTITY/document"].entity = "document"
360
- ex1.metadata.metadata.positions["ENTITY/document"].position.extend(
361
- [rpb.Position(start=0, end=5), rpb.Position(start=13, end=18)]
362
- )
363
- return ex1
364
-
365
-
366
- def make_field_large_metadata(field_id):
367
- ex1 = rpb.LargeComputedMetadataWrapper()
368
- ex1.field.CopyFrom(rpb.FieldID(field_type=rpb.FieldType.TEXT, field=field_id))
369
- en1 = rpb.Entity(token="tok1", root="tok", type="NAME")
370
- en2 = rpb.Entity(token="tok2", root="tok2", type="NAME")
371
- ex1.real.metadata.entities.append(en1)
372
- ex1.real.metadata.entities.append(en2)
373
- ex1.real.metadata.tokens["tok"] = 3
374
- return ex1
375
-
376
-
377
- def make_extracted_vectors(field_id):
378
- ex1 = rpb.ExtractedVectorsWrapper()
379
- ex1.field.CopyFrom(rpb.FieldID(field_type=rpb.FieldType.TEXT, field=field_id))
380
- v1 = rpb.Vector(start=0, end=20, vector=b"ansjkdn")
381
- ex1.vectors.vectors.vectors.append(v1)
382
- return ex1
383
-
384
-
385
- @pytest.fixture(scope="function")
386
- async def test_resource(storage, maindb_driver, knowledgebox_ingest, fake_node):
387
- """
388
- Create a resource that has every possible bit of information
389
- """
390
- resource = await create_resource(
391
- storage=storage,
392
- driver=maindb_driver,
393
- knowledgebox_ingest=knowledgebox_ingest,
394
- )
395
- yield resource
396
- resource.clean()
397
-
398
-
399
- @pytest.fixture(scope="function")
400
- def partition_settings():
401
- settings.replica_number = 1
402
- settings.total_replicas = 4
403
-
404
- yield settings
405
-
406
-
407
- def broker_resource(
408
- knowledgebox: str, rid: Optional[str] = None, slug: Optional[str] = None
409
- ) -> BrokerMessage:
410
- if rid is None:
411
- rid = str(uuid.uuid4())
412
- if slug is None:
413
- slug = f"{rid}slug1"
414
-
415
- message1: BrokerMessage = BrokerMessage(
416
- kbid=knowledgebox,
417
- uuid=rid,
418
- slug=slug,
419
- type=BrokerMessage.AUTOCOMMIT,
420
- )
421
-
422
- message1.basic.icon = "text/plain"
423
- message1.basic.title = "Title Resource"
424
- message1.basic.summary = "Summary of document"
425
- message1.basic.thumbnail = "doc"
426
- message1.basic.layout = "default"
427
- message1.basic.metadata.useful = True
428
- message1.basic.metadata.language = "es"
429
- message1.basic.created.FromDatetime(datetime.now())
430
- message1.basic.modified.FromDatetime(datetime.now())
431
- message1.origin.source = rpb.Origin.Source.WEB
432
-
433
- message1.files["file"].file.uri = "http://nofile"
434
- message1.files["file"].file.size = 0
435
- message1.files["file"].file.source = rpb.CloudFile.Source.LOCAL
436
-
437
- etw = rpb.ExtractedTextWrapper()
438
- etw.body.text = "My own text Ramon. This is great to be here. \n Where is my beer?"
439
- etw.field.field = "file"
440
- etw.field.field_type = rpb.FieldType.FILE
441
- message1.extracted_text.append(etw)
442
-
443
- etw = rpb.ExtractedTextWrapper()
444
- etw.body.text = "Summary of document"
445
- etw.field.field = "summary"
446
- etw.field.field_type = rpb.FieldType.GENERIC
447
- message1.extracted_text.append(etw)
448
-
449
- etw = rpb.ExtractedTextWrapper()
450
- etw.body.text = "Title Resource"
451
- etw.field.field = "title"
452
- etw.field.field_type = rpb.FieldType.GENERIC
453
- message1.extracted_text.append(etw)
454
-
455
- fcm = rpb.FieldComputedMetadataWrapper()
456
- fcm.field.field = "file"
457
- fcm.field.field_type = rpb.FieldType.FILE
458
- p1 = rpb.Paragraph(
459
- start=0,
460
- end=45,
461
- )
462
- p1.start_seconds.append(0)
463
- p1.end_seconds.append(10)
464
- p2 = rpb.Paragraph(
465
- start=47,
466
- end=64,
467
- )
468
- p2.start_seconds.append(10)
469
- p2.end_seconds.append(20)
470
- p2.start_seconds.append(20)
471
- p2.end_seconds.append(30)
472
-
473
- fcm.metadata.metadata.paragraphs.append(p1)
474
- fcm.metadata.metadata.paragraphs.append(p2)
475
- fcm.metadata.metadata.last_index.FromDatetime(datetime.now())
476
- fcm.metadata.metadata.last_understanding.FromDatetime(datetime.now())
477
- fcm.metadata.metadata.last_extract.FromDatetime(datetime.now())
478
- fcm.metadata.metadata.ner["Ramon"] = "PERSON"
479
-
480
- c1 = rpb.Classification()
481
- c1.label = "label1"
482
- c1.labelset = "labelset1"
483
- fcm.metadata.metadata.classifications.append(c1)
484
- message1.field_metadata.append(fcm)
485
-
486
- ev = rpb.ExtractedVectorsWrapper()
487
- ev.field.field = "file"
488
- ev.field.field_type = rpb.FieldType.FILE
489
-
490
- v1 = rpb.Vector()
491
- v1.start = 0
492
- v1.end = 19
493
- v1.start_paragraph = 0
494
- v1.end_paragraph = 45
495
- v1.vector.extend(V1)
496
- ev.vectors.vectors.vectors.append(v1)
497
-
498
- v2 = rpb.Vector()
499
- v2.start = 20
500
- v2.end = 45
501
- v2.start_paragraph = 0
502
- v2.end_paragraph = 45
503
- v2.vector.extend(V2)
504
- ev.vectors.vectors.vectors.append(v2)
505
-
506
- v3 = rpb.Vector()
507
- v3.start = 48
508
- v3.end = 65
509
- v3.start_paragraph = 47
510
- v3.end_paragraph = 64
511
- v3.vector.extend(V3)
512
- ev.vectors.vectors.vectors.append(v3)
513
-
514
- message1.field_vectors.append(ev)
515
- message1.source = BrokerMessage.MessageSource.WRITER
516
- return message1
517
-
518
-
519
- async def create_resource(
520
- storage: Storage, driver: Driver, knowledgebox_ingest: str
521
- ) -> Resource:
522
- txn = await driver.begin()
523
-
524
- rid = str(uuid.uuid4())
525
- kb_obj = KnowledgeBox(txn, storage, kbid=knowledgebox_ingest)
526
- test_resource = await kb_obj.add_resource(uuid=rid, slug="slug")
527
- await test_resource.set_slug()
528
-
529
- # 1. ROOT ELEMENTS
530
- # 1.1 BASIC
531
-
532
- basic = rpb.Basic(
533
- title="My title",
534
- summary="My summary",
535
- icon="text/plain",
536
- layout="basic",
537
- thumbnail="/file",
538
- last_seqid=1,
539
- last_account_seq=2,
540
- )
541
- basic.metadata.metadata["key"] = "value"
542
- basic.metadata.language = "ca"
543
- basic.metadata.useful = True
544
- basic.metadata.status = rpb.Metadata.Status.PROCESSED
545
-
546
- cl1 = rpb.Classification(labelset="labelset1", label="label1")
547
- basic.usermetadata.classifications.append(cl1)
548
-
549
- r1 = upb.Relation(
550
- relation=upb.Relation.CHILD,
551
- source=upb.RelationNode(value=rid, ntype=upb.RelationNode.NodeType.RESOURCE),
552
- to=upb.RelationNode(value="000001", ntype=upb.RelationNode.NodeType.RESOURCE),
553
- )
554
-
555
- basic.usermetadata.relations.append(r1)
556
-
557
- ufm1 = rpb.UserFieldMetadata(
558
- token=[rpb.TokenSplit(token="My home", klass="Location")],
559
- field=rpb.FieldID(field_type=rpb.FieldType.TEXT, field="text1"),
560
- )
561
-
562
- basic.fieldmetadata.append(ufm1)
563
- basic.created.FromDatetime(datetime.utcnow())
564
- basic.modified.FromDatetime(datetime.utcnow())
565
-
566
- await test_resource.set_basic(basic)
567
-
568
- # 1.2 RELATIONS
569
-
570
- rels = []
571
- r1 = upb.Relation(
572
- relation=upb.Relation.CHILD,
573
- source=upb.RelationNode(value=rid, ntype=upb.RelationNode.NodeType.RESOURCE),
574
- to=upb.RelationNode(value="000001", ntype=upb.RelationNode.NodeType.RESOURCE),
575
- )
576
-
577
- rels.append(r1)
578
- await test_resource.set_relations(rels)
579
-
580
- # 1.3 ORIGIN
581
-
582
- o2 = rpb.Origin()
583
- o2.source = rpb.Origin.Source.API
584
- o2.source_id = "My Source"
585
- o2.created.FromDatetime(datetime.now())
586
- o2.modified.FromDatetime(datetime.now())
587
-
588
- await test_resource.set_origin(o2)
589
-
590
- # 2. FIELDS
591
- #
592
- # Add an example of each of the files, containing all possible metadata
593
-
594
- # Title
595
- title_field = await test_resource.get_field(
596
- "title", rpb.FieldType.GENERIC, load=False
597
- )
598
- await make_field(title_field, "MyText")
599
-
600
- # Summary
601
- summary_field = await test_resource.get_field(
602
- "summary", rpb.FieldType.GENERIC, load=False
603
- )
604
- await make_field(summary_field, "MyText")
605
-
606
- # 2.1 FILE FIELD
607
-
608
- t2 = rpb.FieldFile(
609
- language="es",
610
- )
611
- t2.added.FromDatetime(datetime.now())
612
- t2.file.CopyFrom(TEST_CLOUDFILE)
613
-
614
- file_field = await test_resource.set_field(rpb.FieldType.FILE, "file1", t2)
615
- await add_field_id(test_resource, file_field)
616
- await make_field(file_field, "MyText")
617
-
618
- # 2.2 LINK FIELD
619
- li2 = rpb.FieldLink(
620
- uri="htts://nuclia.cloud",
621
- language="ca",
622
- )
623
- li2.added.FromDatetime(datetime.now())
624
- li2.headers["AUTHORIZATION"] = "Bearer xxxxx"
625
- linkfield = await test_resource.set_field(rpb.FieldType.LINK, "link1", li2)
626
-
627
- ex1 = rpb.LinkExtractedData()
628
- ex1.date.FromDatetime(datetime.now())
629
- ex1.language = "ca"
630
- ex1.title = "My Title"
631
- ex1.field = "link1"
632
-
633
- ex1.link_preview.CopyFrom(THUMBNAIL)
634
- ex1.link_thumbnail.CopyFrom(THUMBNAIL)
635
-
636
- await linkfield.set_link_extracted_data(ex1)
637
- await add_field_id(test_resource, linkfield)
638
- await make_field(linkfield, "MyText")
639
-
640
- # 2.3 TEXT FIELDS
641
-
642
- t23 = rpb.FieldText(body="This is my text field", format=rpb.FieldText.Format.PLAIN)
643
- textfield = await test_resource.set_field(rpb.FieldType.TEXT, "text1", t23)
644
- await add_field_id(test_resource, textfield)
645
- await make_field(textfield, "MyText")
646
-
647
- # 2.4 LAYOUT FIELD
648
-
649
- l2 = rpb.FieldLayout(format=rpb.FieldLayout.Format.NUCLIAv1)
650
- l2.body.blocks["field1"].x = 0
651
- l2.body.blocks["field1"].y = 0
652
- l2.body.blocks["field1"].cols = 1
653
- l2.body.blocks["field1"].rows = 1
654
- l2.body.blocks["field1"].type = rpb.Block.TypeBlock.TITLE
655
- l2.body.blocks["field1"].payload = "{}"
656
- l2.body.blocks["field1"].file.CopyFrom(TEST_CLOUDFILE)
657
-
658
- layoutfield = await test_resource.set_field(rpb.FieldType.LAYOUT, "layout1", l2)
659
- await add_field_id(test_resource, layoutfield)
660
-
661
- await layoutfield.set_extracted_text(
662
- make_extracted_text(layoutfield.id, body="MyText")
663
- )
664
- await layoutfield.set_field_metadata(make_field_metadata(layoutfield.id))
665
- await layoutfield.set_large_field_metadata(
666
- make_field_large_metadata(layoutfield.id)
667
- )
668
- await layoutfield.set_vectors(make_extracted_vectors(layoutfield.id))
669
-
670
- # 2.5 CONVERSATION FIELD
671
-
672
- def make_message(
673
- text: str, files: Optional[list[rpb.CloudFile]] = None
674
- ) -> rpb.Message:
675
- msg = rpb.Message(
676
- who="myself",
677
- )
678
- msg.timestamp.FromDatetime(datetime.now())
679
- msg.content.text = text
680
- msg.content.format = rpb.MessageContent.Format.PLAIN
681
-
682
- if files:
683
- for file in files:
684
- msg.content.attachments.append(file)
685
- return msg
686
-
687
- c2 = rpb.Conversation()
688
-
689
- for i in range(300):
690
- new_message = make_message(f"{i} hello")
691
- if i == 33:
692
- new_message = make_message(f"{i} hello", files=[TEST_CLOUDFILE, THUMBNAIL])
693
- c2.messages.append(new_message)
694
-
695
- convfield = await test_resource.set_field(rpb.FieldType.CONVERSATION, "conv1", c2)
696
- await add_field_id(test_resource, convfield)
697
- await make_field(convfield, extracted_text="MyText")
698
-
699
- # 2.6 KEYWORDSET FIELD
700
-
701
- k2 = rpb.FieldKeywordset(
702
- keywords=[rpb.Keyword(value="kw1"), rpb.Keyword(value="kw2")]
703
- )
704
- kws_field = await test_resource.set_field(
705
- rpb.FieldType.KEYWORDSET, "keywordset1", k2
706
- )
707
- await add_field_id(test_resource, kws_field)
708
- await make_field(kws_field, "MyText")
709
-
710
- # 2.7 DATETIMES FIELD
711
-
712
- d2 = rpb.FieldDatetime()
713
- d2.value.FromDatetime(datetime.now())
714
- datetime_field = await test_resource.set_field(
715
- rpb.FieldType.DATETIME, "datetime1", d2
716
- )
717
- await add_field_id(test_resource, datetime_field)
718
- await make_field(datetime_field, "MyText")
719
-
720
- # 3 USER VECTORS
721
-
722
- field_obj = await test_resource.get_field("datetime1", type=rpb.FieldType.DATETIME)
723
- user_vectors = rpb.UserVectorsWrapper()
724
- user_vectors.vectors.vectors["vectorset1"].vectors["vector1"].vector.extend(
725
- (0.1, 0.2, 0.3)
726
- )
727
- await field_obj.set_user_vectors(user_vectors)
728
-
729
- # Q/A
730
- question_answers = rpb.FieldQuestionAnswerWrapper()
731
- for i in range(10):
732
- qa = rpb.QuestionAnswer()
733
-
734
- qa.question.text = f"My question {i}"
735
- qa.question.language = "catalan"
736
- qa.question.ids_paragraphs.extend([f"id1/{i}", f"id2/{i}"])
737
-
738
- answer = rpb.Answers()
739
- answer.text = f"My answer {i}"
740
- answer.language = "catalan"
741
- answer.ids_paragraphs.extend([f"id1/{i}", f"id2/{i}"])
742
- qa.answers.append(answer)
743
- question_answers.question_answers.question_answer.append(qa)
744
-
745
- await field_obj.set_question_answers(question_answers)
746
-
747
- await txn.commit()
748
- return test_resource
749
-
750
-
751
- async def add_field_id(resource: Resource, field: Field):
752
- field_type = KB_REVERSE[field.type]
753
- field_id = rpb.FieldID(field_type=field_type, field=field.id)
754
- await resource.update_all_field_ids(updated=[field_id])
755
-
756
-
757
- @pytest.fixture(scope="function")
758
- async def entities_manager_mock():
759
- """EntitiesManager mock for ingest gRPC API disabling indexed entities
760
- functionality. As tests doesn't startup a node, with this mock we allow
761
- testing ingest's gRPC API while the whole entities functionality is properly
762
- tested in tests nos using this fixture.
763
-
764
- """
765
- klass = "nucliadb.ingest.service.writer.EntitiesManager"
766
- with patch(f"{klass}.get_indexed_entities_group", AsyncMock(return_value=None)):
767
- with patch(
768
- "nucliadb.common.cluster.manager.KBShardManager.apply_for_all_shards",
769
- AsyncMock(return_value=[]),
770
- ):
771
- yield