nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -403
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +70 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +143 -117
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +37 -128
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -0,0 +1,181 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ from typing import cast
22
+
23
+ from nucliadb.ingest.fields.base import Field
24
+ from nucliadb.ingest.fields.conversation import Conversation
25
+ from nucliadb.ingest.fields.file import File
26
+ from nucliadb.ingest.fields.link import Link
27
+ from nucliadb.ingest.orm.resource import Resource
28
+ from nucliadb_protos.resources_pb2 import (
29
+ ExtractedTextWrapper,
30
+ ExtractedVectorsWrapper,
31
+ FieldComputedMetadataWrapper,
32
+ FieldType,
33
+ LargeComputedMetadataWrapper,
34
+ )
35
+ from nucliadb_protos.writer_pb2 import BrokerMessage
36
+
37
+
38
+ async def generate_broker_message(resource: Resource) -> BrokerMessage:
39
+ """Generate a full broker message from a resource. This means downloading
40
+ all the pointers minus the ones to external files that are not PB. Iterate
41
+ all resource fields and create a BrokerMessage
42
+ """
43
+ builder = _BrokerMessageBuilder()
44
+ bm = await builder.build_from(resource)
45
+ return bm
46
+
47
+
48
+ class _BrokerMessageBuilder:
49
+ def __init__(self):
50
+ self.bm = BrokerMessage()
51
+
52
+ async def build_from(self, resource: Resource):
53
+ # clear the state and generate a new broker message
54
+ self.bm.Clear()
55
+
56
+ self.bm.kbid = resource.kb.kbid
57
+ self.bm.uuid = resource.uuid
58
+ basic = await resource.get_basic()
59
+ if basic is not None:
60
+ self.bm.basic.CopyFrom(basic)
61
+
62
+ self.bm.slug = self.bm.basic.slug
63
+ origin = await resource.get_origin()
64
+ if origin is not None:
65
+ self.bm.origin.CopyFrom(origin)
66
+ relations = await resource.get_relations()
67
+ if relations is not None:
68
+ for relation in relations.relations:
69
+ self.bm.relations.append(relation)
70
+
71
+ fields = await resource.get_fields(force=True)
72
+ for (type_id, field_id), field in fields.items():
73
+ # Value
74
+ await self.generate_field(type_id, field_id, field)
75
+
76
+ # Extracted text
77
+ await self.generate_extracted_text(type_id, field_id, field)
78
+
79
+ # Field Computed Metadata
80
+ await self.generate_field_computed_metadata(type_id, field_id, field)
81
+
82
+ if type_id == FieldType.FILE and isinstance(field, File):
83
+ field_extracted_data = await field.get_file_extracted_data()
84
+ if field_extracted_data is not None:
85
+ self.bm.file_extracted_data.append(field_extracted_data)
86
+
87
+ elif type_id == FieldType.LINK and isinstance(field, Link):
88
+ link_extracted_data = await field.get_link_extracted_data()
89
+ if link_extracted_data is not None:
90
+ self.bm.link_extracted_data.append(link_extracted_data)
91
+
92
+ # Field vectors
93
+ await self.generate_field_vectors(type_id, field_id, field)
94
+
95
+ # Large metadata
96
+ await self.generate_field_large_computed_metadata(type_id, field_id, field)
97
+
98
+ return self.bm
99
+
100
+ async def generate_field(
101
+ self,
102
+ type_id: FieldType.ValueType,
103
+ field_id: str,
104
+ field: Field,
105
+ ):
106
+ # Used for exporting a field
107
+ if type_id == FieldType.TEXT:
108
+ value = await field.get_value()
109
+ self.bm.texts[field_id].CopyFrom(value)
110
+ elif type_id == FieldType.LINK:
111
+ value = await field.get_value()
112
+ self.bm.links[field_id].CopyFrom(value)
113
+ elif type_id == FieldType.FILE:
114
+ value = await field.get_value()
115
+ self.bm.files[field_id].CopyFrom(value)
116
+ elif type_id == FieldType.CONVERSATION:
117
+ field = cast(Conversation, field)
118
+ value = await field.get_full_conversation()
119
+ self.bm.conversations[field_id].CopyFrom(value)
120
+
121
+ async def generate_extracted_text(
122
+ self,
123
+ type_id: FieldType.ValueType,
124
+ field_id: str,
125
+ field: Field,
126
+ ):
127
+ etw = ExtractedTextWrapper()
128
+ etw.field.field = field_id
129
+ etw.field.field_type = type_id
130
+ extracted_text = await field.get_extracted_text()
131
+ if extracted_text is not None:
132
+ etw.body.CopyFrom(extracted_text)
133
+ self.bm.extracted_text.append(etw)
134
+
135
+ async def generate_field_computed_metadata(
136
+ self,
137
+ type_id: FieldType.ValueType,
138
+ field_id: str,
139
+ field: Field,
140
+ ):
141
+ fcmw = FieldComputedMetadataWrapper()
142
+ fcmw.field.field = field_id
143
+ fcmw.field.field_type = type_id
144
+
145
+ field_metadata = await field.get_field_metadata()
146
+ if field_metadata is not None:
147
+ fcmw.metadata.CopyFrom(field_metadata)
148
+ fcmw.field.field = field_id
149
+ fcmw.field.field_type = type_id
150
+ self.bm.field_metadata.append(fcmw)
151
+ # Make sure cloud files are removed for exporting
152
+
153
+ async def generate_field_vectors(
154
+ self,
155
+ type_id: FieldType.ValueType,
156
+ field_id: str,
157
+ field: Field,
158
+ ):
159
+ vo = await field.get_vectors()
160
+ if vo is None:
161
+ return
162
+ evw = ExtractedVectorsWrapper()
163
+ evw.field.field = field_id
164
+ evw.field.field_type = type_id
165
+ evw.vectors.CopyFrom(vo)
166
+ self.bm.field_vectors.append(evw)
167
+
168
+ async def generate_field_large_computed_metadata(
169
+ self,
170
+ type_id: FieldType.ValueType,
171
+ field_id: str,
172
+ field: Field,
173
+ ):
174
+ lcm = await field.get_large_field_metadata()
175
+ if lcm is None:
176
+ return
177
+ lcmw = LargeComputedMetadataWrapper()
178
+ lcmw.field.field = field_id
179
+ lcmw.field.field_type = type_id
180
+ lcmw.real.CopyFrom(lcm)
181
+ self.bm.field_large_metadata.append(lcmw)
@@ -21,24 +21,6 @@
21
21
  import asyncio
22
22
  from typing import AsyncGenerator, Optional
23
23
 
24
- from nucliadb_protos.knowledgebox_pb2 import (
25
- DeletedEntitiesGroups,
26
- EntitiesGroup,
27
- EntitiesGroupSummary,
28
- Entity,
29
- )
30
- from nucliadb_protos.nodereader_pb2 import (
31
- Faceted,
32
- RelationNodeFilter,
33
- RelationPrefixSearchRequest,
34
- RelationSearchRequest,
35
- RelationSearchResponse,
36
- SearchRequest,
37
- SearchResponse,
38
- )
39
- from nucliadb_protos.utils_pb2 import RelationNode
40
- from nucliadb_protos.writer_pb2 import GetEntitiesResponse
41
-
42
24
  from nucliadb.common import datamanagers
43
25
  from nucliadb.common.cluster.base import AbstractIndexNode
44
26
  from nucliadb.common.cluster.exceptions import (
@@ -55,7 +37,25 @@ from nucliadb.common.datamanagers.entities import (
55
37
  from nucliadb.common.maindb.driver import Transaction
56
38
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
57
39
  from nucliadb.ingest.settings import settings
40
+ from nucliadb_protos.knowledgebox_pb2 import (
41
+ DeletedEntitiesGroups,
42
+ EntitiesGroup,
43
+ EntitiesGroupSummary,
44
+ Entity,
45
+ )
46
+ from nucliadb_protos.nodereader_pb2 import (
47
+ Faceted,
48
+ RelationNodeFilter,
49
+ RelationPrefixSearchRequest,
50
+ RelationSearchResponse,
51
+ SearchRequest,
52
+ SearchResponse,
53
+ )
54
+ from nucliadb_protos.utils_pb2 import RelationNode
55
+ from nucliadb_protos.writer_pb2 import GetEntitiesResponse
58
56
  from nucliadb_telemetry import errors
57
+ from nucliadb_utils import const
58
+ from nucliadb_utils.utilities import has_feature
59
59
 
60
60
  from .exceptions import EntityManagementException
61
61
 
@@ -199,37 +199,33 @@ class EntitiesManager:
199
199
  elif stored is not None and indexed is not None:
200
200
  entities_group = self.merge_entities_groups(indexed, stored)
201
201
  else:
202
- entities_group = stored or indexed # type: ignore
202
+ entities_group = stored or indexed
203
203
  return entities_group
204
204
 
205
205
  async def get_stored_entities_group(self, group: str) -> Optional[EntitiesGroup]:
206
- return await datamanagers.entities.get_entities_group(
207
- self.txn, kbid=self.kbid, group=group
208
- )
206
+ return await datamanagers.entities.get_entities_group(self.txn, kbid=self.kbid, group=group)
209
207
 
210
208
  async def get_indexed_entities_group(self, group: str) -> Optional[EntitiesGroup]:
211
209
  shard_manager = get_shard_manager()
212
210
 
213
- async def do_entities_search(
214
- node: AbstractIndexNode, shard_id: str
215
- ) -> RelationSearchResponse:
216
- request = RelationSearchRequest(
217
- shard_id=shard_id,
218
- prefix=RelationPrefixSearchRequest(
211
+ async def do_entities_search(node: AbstractIndexNode, shard_id: str) -> RelationSearchResponse:
212
+ request = SearchRequest(
213
+ shard=shard_id,
214
+ relation_prefix=RelationPrefixSearchRequest(
219
215
  prefix="",
220
216
  node_filters=[
221
- RelationNodeFilter(
222
- node_type=RelationNode.NodeType.ENTITY, node_subtype=group
223
- )
217
+ RelationNodeFilter(node_type=RelationNode.NodeType.ENTITY, node_subtype=group)
224
218
  ],
225
219
  ),
226
220
  )
227
- return await node.reader.RelationSearch(request) # type: ignore
221
+ response = await node.reader.Search(request) # type: ignore
222
+ return response.relation
228
223
 
229
224
  results = await shard_manager.apply_for_all_shards(
230
225
  self.kbid,
231
226
  do_entities_search,
232
227
  settings.relation_search_timeout,
228
+ use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": self.kbid}),
233
229
  use_read_replica_nodes=self.use_read_replica_nodes,
234
230
  )
235
231
  for result in results:
@@ -239,9 +235,7 @@ class EntitiesManager:
239
235
 
240
236
  entities = {}
241
237
  for result in results:
242
- entities.update(
243
- {node.value: Entity(value=node.value) for node in result.prefix.nodes}
244
- )
238
+ entities.update({node.value: Entity(value=node.value) for node in result.prefix.nodes})
245
239
 
246
240
  if not entities:
247
241
  return None
@@ -292,7 +286,7 @@ class EntitiesManager:
292
286
 
293
287
  # stored groups
294
288
  entities_key = KB_ENTITIES.format(kbid=self.kbid)
295
- async for key in self.txn.keys(entities_key, count=-1):
289
+ async for key in self.txn.keys(entities_key):
296
290
  group = key.split("/")[-1]
297
291
  if exclude_deleted and group in deleted_groups:
298
292
  continue
@@ -312,9 +306,7 @@ class EntitiesManager:
312
306
  ) -> set[str]:
313
307
  shard_manager = get_shard_manager()
314
308
 
315
- async def query_indexed_entities_group_names(
316
- node: AbstractIndexNode, shard_id: str
317
- ) -> set[str]:
309
+ async def query_indexed_entities_group_names(node: AbstractIndexNode, shard_id: str) -> set[str]:
318
310
  request = SearchRequest(
319
311
  shard=shard_id,
320
312
  result_per_page=0,
@@ -335,6 +327,7 @@ class EntitiesManager:
335
327
  self.kbid,
336
328
  query_indexed_entities_group_names,
337
329
  settings.relation_types_timeout,
330
+ use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": self.kbid}),
338
331
  use_read_replica_nodes=self.use_read_replica_nodes,
339
332
  )
340
333
  for result in results:
@@ -347,9 +340,7 @@ class EntitiesManager:
347
340
  return set.union(*results)
348
341
 
349
342
  async def store_entities_group(self, group: str, eg: EntitiesGroup):
350
- meta_cache = await datamanagers.entities.get_entities_meta_cache(
351
- self.txn, kbid=self.kbid
352
- )
343
+ meta_cache = await datamanagers.entities.get_entities_meta_cache(self.txn, kbid=self.kbid)
353
344
  duplicates = {}
354
345
  deleted = []
355
346
  duplicate_count = 0
@@ -373,9 +364,7 @@ class EntitiesManager:
373
364
 
374
365
  meta_cache.set_duplicates(group, duplicates)
375
366
  meta_cache.set_deleted(group, deleted)
376
- await datamanagers.entities.set_entities_meta_cache(
377
- self.txn, kbid=self.kbid, cache=meta_cache
378
- )
367
+ await datamanagers.entities.set_entities_meta_cache(self.txn, kbid=self.kbid, cache=meta_cache)
379
368
 
380
369
  await datamanagers.entities.set_entities_group(
381
370
  self.txn, kbid=self.kbid, group_id=group, entities=eg
@@ -392,14 +381,10 @@ class EntitiesManager:
392
381
  await self.txn.delete(entities_key)
393
382
 
394
383
  async def mark_entities_group_as_deleted(self, group: str):
395
- await datamanagers.entities.mark_group_as_deleted(
396
- self.txn, kbid=self.kbid, group=group
397
- )
384
+ await datamanagers.entities.mark_group_as_deleted(self.txn, kbid=self.kbid, group=group)
398
385
 
399
386
  async def unmark_entities_group_as_deleted(self, group: str):
400
- await datamanagers.entities.unmark_group_as_deleted(
401
- self.txn, kbid=self.kbid, group=group
402
- )
387
+ await datamanagers.entities.unmark_group_as_deleted(self.txn, kbid=self.kbid, group=group)
403
388
 
404
389
  @staticmethod
405
390
  def merge_entities_groups(indexed: EntitiesGroup, stored: EntitiesGroup):
@@ -23,6 +23,10 @@ class NotFound(Exception):
23
23
  pass
24
24
 
25
25
 
26
+ class KnowledgeBoxCreationError(Exception):
27
+ pass
28
+
29
+
26
30
  class KnowledgeBoxConflict(Exception):
27
31
  pass
28
32
 
@@ -48,3 +52,11 @@ class ResourceNotIndexable(Exception):
48
52
 
49
53
  class EntityManagementException(Exception):
50
54
  pass
55
+
56
+
57
+ class VectorSetConflict(Exception):
58
+ pass
59
+
60
+
61
+ class InvalidBrokerMessage(ValueError):
62
+ pass