nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -20,6 +20,14 @@
20
20
  from typing import Optional
21
21
 
22
22
  import aiohttp
23
+
24
+ from nucliadb.common import datamanagers
25
+ from nucliadb.common.maindb.utils import setup_driver
26
+ from nucliadb.ingest.orm.entities import EntitiesManager
27
+ from nucliadb.ingest.orm.processor import Processor
28
+ from nucliadb.train import SERVICE_NAME
29
+ from nucliadb.train.models import RequestData
30
+ from nucliadb.train.settings import settings
23
31
  from nucliadb_protos.knowledgebox_pb2 import Labels
24
32
  from nucliadb_protos.train_pb2 import (
25
33
  EnabledMetadata,
@@ -34,13 +42,6 @@ from nucliadb_protos.writer_pb2 import (
34
42
  GetLabelsRequest,
35
43
  GetLabelsResponse,
36
44
  )
37
-
38
- from nucliadb.common.maindb.utils import setup_driver
39
- from nucliadb.ingest.orm.entities import EntitiesManager
40
- from nucliadb.ingest.orm.processor import Processor
41
- from nucliadb.train import SERVICE_NAME
42
- from nucliadb.train.models import RequestData
43
- from nucliadb.train.settings import settings
44
45
  from nucliadb_utils.utilities import get_pubsub, get_storage
45
46
 
46
47
 
@@ -51,8 +52,7 @@ class UploadServicer:
51
52
  pubsub = await get_pubsub()
52
53
  self.proc = Processor(driver=driver, storage=storage, pubsub=pubsub)
53
54
 
54
- async def finalize(self):
55
- ...
55
+ async def finalize(self): ...
56
56
 
57
57
  async def GetSentences(self, request: GetSentencesRequest, context=None):
58
58
  async for sentence in self.proc.kb_sentences(request):
@@ -75,9 +75,8 @@ class UploadServicer:
75
75
  ) -> GetEntitiesResponse:
76
76
  kbid = request.kb.uuid
77
77
  response = GetEntitiesResponse()
78
- async with self.proc.driver.transaction() as txn:
78
+ async with self.proc.driver.transaction(read_only=True) as txn:
79
79
  kbobj = await self.proc.get_kb_obj(txn, request.kb)
80
-
81
80
  if kbobj is None:
82
81
  response.status = GetEntitiesResponse.Status.NOTFOUND
83
82
  return response
@@ -91,20 +90,16 @@ class UploadServicer:
91
90
  async def GetOntology( # type: ignore
92
91
  self, request: GetLabelsRequest, context=None
93
92
  ) -> GetLabelsResponse:
94
- async with self.proc.driver.transaction() as txn:
95
- kbobj = await self.proc.get_kb_obj(txn, request.kb)
96
- labels: Optional[Labels] = None
97
- if kbobj is not None:
98
- labels = await kbobj.get_labels()
99
-
93
+ kbid = request.kb.uuid
100
94
  response = GetLabelsResponse()
101
- if kbobj is None:
95
+ kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
96
+ if not kb_exists:
102
97
  response.status = GetLabelsResponse.Status.NOTFOUND
103
- else:
104
- response.kb.uuid = kbobj.kbid
105
- if labels is not None:
106
- response.labels.CopyFrom(labels)
107
-
98
+ return response
99
+ response.kb.uuid = kbid
100
+ labels: Optional[Labels] = await datamanagers.atomic.labelset.get_all(kbid=kbid)
101
+ if labels is not None:
102
+ response.labels.CopyFrom(labels)
108
103
  return response
109
104
 
110
105
 
@@ -124,9 +119,9 @@ async def start_upload(request: str, kb: str):
124
119
  }
125
120
  ) as sess:
126
121
  req = await sess.get(f"{url}/request")
127
- request_data = RequestData.parse_raw(await req.read())
122
+ request_data = RequestData.model_validate_json(await req.read())
128
123
 
129
- metadata = EnabledMetadata(**request_data.metadata.dict())
124
+ metadata = EnabledMetadata(**request_data.metadata.model_dump())
130
125
 
131
126
  if request_data.sentences:
132
127
  pbsr = GetSentencesRequest()
nucliadb/train/utils.py CHANGED
@@ -23,7 +23,7 @@ from grpc import aio
23
23
  from grpc_health.v1 import health, health_pb2_grpc
24
24
 
25
25
  from nucliadb.common.maindb.utils import setup_driver, teardown_driver
26
- from nucliadb.train.nodes import TrainShardManager # type: ignore
26
+ from nucliadb.train.nodes import TrainShardManager
27
27
  from nucliadb.train.settings import settings
28
28
  from nucliadb_protos import train_pb2_grpc
29
29
  from nucliadb_telemetry.utils import setup_telemetry
@@ -29,9 +29,7 @@ logger = logging.getLogger(SERVICE_NAME)
29
29
  class EndpointFilter(logging.Filter):
30
30
  def filter(self, record: logging.LogRecord) -> bool:
31
31
  return (
32
- record.args is not None
33
- and len(record.args) >= 3
34
- and record.args[2] not in ("/", "/metrics") # type: ignore
32
+ record.args is not None and len(record.args) >= 3 and record.args[2] not in ("/", "/metrics") # type: ignore
35
33
  )
36
34
 
37
35
 
@@ -23,7 +23,6 @@ from fastapi.params import Header
23
23
 
24
24
  if TYPE_CHECKING: # pragma: no cover
25
25
  SKIP_STORE_DEFAULT = False
26
- SYNC_CALL = False
27
26
  X_NUCLIADB_USER = ""
28
27
  X_FILE_PASSWORD = None
29
28
  else:
@@ -31,10 +30,6 @@ else:
31
30
  False,
32
31
  description="If set to true, file fields will not be saved in the blob storage. They will only be sent to process.", # noqa
33
32
  )
34
- SYNC_CALL = Header(
35
- False,
36
- description="If set to true, the request will return when the changes to be commited to the database.",
37
- )
38
33
  X_NUCLIADB_USER = Header("")
39
34
  X_FILE_PASSWORD = Header(
40
35
  None,
@@ -17,18 +17,21 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from nucliadb_protos.resources_pb2 import FieldKeywordset
20
+ from functools import wraps
21
21
 
22
- from nucliadb.ingest.fields.base import Field
22
+ from fastapi import HTTPException
23
23
 
24
+ from nucliadb_utils.settings import is_onprem_nucliadb
24
25
 
25
- class Keywordset(Field):
26
- pbklass = FieldKeywordset
27
- value: FieldKeywordset
28
- type: str = "k"
29
26
 
30
- async def set_value(self, payload: FieldKeywordset):
31
- await self.db_set_value(payload)
27
+ def only_for_onprem(fun):
28
+ @wraps(fun)
29
+ async def endpoint_wrapper(*args, **kwargs):
30
+ if not is_onprem_nucliadb():
31
+ raise HTTPException(
32
+ status_code=403,
33
+ detail="This endpoint is only available for onprem NucliaDB",
34
+ )
35
+ return await fun(*args, **kwargs)
32
36
 
33
- async def get_value(self) -> FieldKeywordset:
34
- return await self.db_get_value()
37
+ return endpoint_wrapper
@@ -17,6 +17,8 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ from datetime import datetime
21
+ from typing import AsyncGenerator
20
22
  from uuid import uuid4
21
23
 
22
24
  from fastapi_versioning import version
@@ -28,23 +30,32 @@ from nucliadb.common.context import ApplicationContext
28
30
  from nucliadb.common.context.fastapi import get_app_context
29
31
  from nucliadb.export_import import importer
30
32
  from nucliadb.export_import.datamanager import ExportImportDataManager
33
+ from nucliadb.export_import.exceptions import (
34
+ IncompatibleExport,
35
+ )
31
36
  from nucliadb.export_import.models import (
32
37
  ExportMetadata,
33
38
  ImportMetadata,
34
39
  NatsTaskMessage,
35
40
  )
36
41
  from nucliadb.export_import.tasks import get_exports_producer, get_imports_producer
37
- from nucliadb.export_import.utils import IteratorExportStream
42
+ from nucliadb.export_import.utils import ExportStreamReader, stream_compatible_with_kb
38
43
  from nucliadb.models.responses import HTTPClientError
39
44
  from nucliadb.writer import logger
40
- from nucliadb.writer.api.v1.router import KB_PREFIX, api
45
+ from nucliadb.writer.api.utils import only_for_onprem
46
+ from nucliadb.writer.api.v1.knowledgebox import create_kb
47
+ from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX, api
41
48
  from nucliadb.writer.back_pressure import maybe_back_pressure
42
49
  from nucliadb_models.export_import import (
43
50
  CreateExportResponse,
44
51
  CreateImportResponse,
52
+ NewImportedKbResponse,
45
53
  Status,
46
54
  )
47
- from nucliadb_models.resource import NucliaDBRoles
55
+ from nucliadb_models.resource import (
56
+ KnowledgeBoxConfig,
57
+ NucliaDBRoles,
58
+ )
48
59
  from nucliadb_telemetry import errors
49
60
  from nucliadb_utils.authentication import requires_one
50
61
 
@@ -52,7 +63,7 @@ from nucliadb_utils.authentication import requires_one
52
63
  @api.post(
53
64
  f"/{KB_PREFIX}/{{kbid}}/export",
54
65
  status_code=200,
55
- name="Start an export of a Knowledge Box",
66
+ summary="Start an export of a Knowledge Box",
56
67
  tags=["Knowledge Boxes"],
57
68
  response_model=CreateExportResponse,
58
69
  )
@@ -60,9 +71,8 @@ from nucliadb_utils.authentication import requires_one
60
71
  @version(1)
61
72
  async def start_kb_export_endpoint(request: Request, kbid: str):
62
73
  context = get_app_context(request.app)
63
- async with datamanagers.with_transaction(read_only=True) as txn:
64
- if not await datamanagers.kb.exists_kb(txn, kbid=kbid):
65
- return HTTPClientError(status_code=404, detail="Knowledge Box not found")
74
+ if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
75
+ return HTTPClientError(status_code=404, detail="Knowledge Box not found")
66
76
 
67
77
  export_id = uuid4().hex
68
78
  if in_standalone_mode():
@@ -74,10 +84,60 @@ async def start_kb_export_endpoint(request: Request, kbid: str):
74
84
  return CreateExportResponse(export_id=export_id)
75
85
 
76
86
 
87
+ @only_for_onprem
88
+ @api.post(
89
+ f"/{KBS_PREFIX}/import",
90
+ summary="Create a KB from an export and import its content",
91
+ tags=["Knowledge Boxes"],
92
+ response_model=NewImportedKbResponse,
93
+ openapi_extra={"x-hidden-operation": True},
94
+ )
95
+ @requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
96
+ @version(1)
97
+ async def kb_create_and_import_endpoint(request: Request):
98
+ context = get_app_context(request.app)
99
+
100
+ # Read stream and parse learning configuration
101
+ stream = request.stream()
102
+ stream_reader = ExportStreamReader(stream)
103
+ learning_config, leftover_bytes = await stream_reader.maybe_read_learning_config()
104
+ if learning_config is None:
105
+ return HTTPClientError(
106
+ status_code=400,
107
+ detail="Trying to import an export missing learning config. Try using import on an existing KB or use a newer export",
108
+ )
109
+
110
+ # Create a KB with the import learning config
111
+
112
+ now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
113
+ import_kb_config = KnowledgeBoxConfig(
114
+ title=f"Imported KB - {now}",
115
+ learning_configuration=learning_config.dict(),
116
+ )
117
+ kbid, slug = await create_kb(import_kb_config)
118
+
119
+ # Import contents to the new KB
120
+
121
+ async def stream_with_leftovers(leftovers: bytes, stream: AsyncGenerator[bytes, None]):
122
+ if len(leftovers) > 0:
123
+ yield leftovers
124
+ async for chunk in stream:
125
+ yield chunk
126
+
127
+ await importer.import_kb(
128
+ context=context, kbid=kbid, stream=stream_with_leftovers(leftover_bytes, stream)
129
+ )
130
+
131
+ return NewImportedKbResponse(
132
+ kbid=kbid,
133
+ slug=slug,
134
+ )
135
+
136
+
77
137
  @api.post(
78
138
  f"/{KB_PREFIX}/{{kbid}}/import",
79
139
  status_code=200,
80
- name="Start an import to a Knowledge Box",
140
+ summary="Start an import to a Knowledge Box",
81
141
  tags=["Knowledge Boxes"],
82
142
  response_model=CreateImportResponse,
83
143
  )
@@ -85,40 +145,45 @@ async def start_kb_export_endpoint(request: Request, kbid: str):
85
145
  @version(1)
86
146
  async def start_kb_import_endpoint(request: Request, kbid: str):
87
147
  context = get_app_context(request.app)
88
- async with datamanagers.with_transaction(read_only=True) as txn:
89
- if not await datamanagers.kb.exists_kb(txn, kbid=kbid):
90
- return HTTPClientError(status_code=404, detail="Knowledge Box not found")
148
+ if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
149
+ return HTTPClientError(status_code=404, detail="Knowledge Box not found")
91
150
 
92
151
  await maybe_back_pressure(request, kbid)
93
152
 
94
- import_id = uuid4().hex
95
- if in_standalone_mode():
96
- # In standalone mode, we import directly from the request content stream.
97
- # Note that we return an import_id simply to keep the API consistent with hosted nucliadb.
98
- stream = FastAPIExportStream(request)
99
- await importer.import_kb(
100
- context=context,
101
- kbid=kbid,
102
- stream=stream,
103
- )
104
- return CreateImportResponse(import_id=import_id)
105
- else:
106
- import_size = await upload_import_to_blob_storage(
107
- context=context,
108
- request=request,
109
- kbid=kbid,
110
- import_id=import_id,
111
- )
112
- await start_import_task(context, kbid, import_id, import_size)
113
- return CreateImportResponse(import_id=import_id)
153
+ stream = stream_compatible_with_kb(kbid, request.stream())
154
+ try:
155
+ import_id = uuid4().hex
156
+ if in_standalone_mode():
157
+ # In standalone mode, we import directly from the request content stream.
158
+ # Note that we return an import_id simply to keep the API consistent with hosted nucliadb.
159
+ await importer.import_kb(
160
+ context=context,
161
+ kbid=kbid,
162
+ stream=stream,
163
+ )
164
+ return CreateImportResponse(import_id=import_id)
165
+ else:
166
+ import_size = await upload_import_to_blob_storage(
167
+ context=context,
168
+ stream=stream,
169
+ kbid=kbid,
170
+ import_id=import_id,
171
+ )
172
+ await start_import_task(context, kbid, import_id, import_size)
173
+ return CreateImportResponse(import_id=import_id)
174
+ except IncompatibleExport as exc:
175
+ return HTTPClientError(status_code=400, detail=str(exc))
114
176
 
115
177
 
116
178
  async def upload_import_to_blob_storage(
117
- context: ApplicationContext, request: Request, kbid: str, import_id: str
179
+ context: ApplicationContext,
180
+ stream: AsyncGenerator[bytes, None],
181
+ kbid: str,
182
+ import_id: str,
118
183
  ) -> int:
119
184
  dm = ExportImportDataManager(context.kv_driver, context.blob_storage)
120
185
  return await dm.upload_import(
121
- import_bytes=request.stream(),
186
+ import_bytes=stream,
122
187
  kbid=kbid,
123
188
  import_id=import_id,
124
189
  )
@@ -133,37 +198,25 @@ async def start_export_task(context: ApplicationContext, kbid: str, export_id: s
133
198
  producer = await get_exports_producer(context)
134
199
  msg = NatsTaskMessage(kbid=kbid, id=export_id)
135
200
  seqid = await producer(msg) # type: ignore
136
- logger.info(
137
- f"Export task produced. seqid={seqid} kbid={kbid} export_id={export_id}"
138
- )
201
+ logger.info(f"Export task produced. seqid={seqid} kbid={kbid} export_id={export_id}")
139
202
  except Exception as e:
140
203
  errors.capture_exception(e)
141
204
  await dm.delete_metadata("export", metadata)
142
205
  raise
143
206
 
144
207
 
145
- async def start_import_task(
146
- context: ApplicationContext, kbid: str, import_id: str, import_size: int
147
- ):
208
+ async def start_import_task(context: ApplicationContext, kbid: str, import_id: str, import_size: int):
148
209
  dm = ExportImportDataManager(context.kv_driver, context.blob_storage)
149
210
  metadata = ImportMetadata(kbid=kbid, id=import_id)
150
211
  metadata.task.status = Status.SCHEDULED
151
- metadata.total = import_size
212
+ metadata.total = import_size or 0
152
213
  await dm.set_metadata("import", metadata)
153
214
  try:
154
215
  producer = await get_imports_producer(context)
155
216
  msg = NatsTaskMessage(kbid=kbid, id=import_id)
156
217
  seqid = await producer(msg) # type: ignore
157
- logger.info(
158
- f"Import task produced. seqid={seqid} kbid={kbid} import_id={import_id}"
159
- )
218
+ logger.info(f"Import task produced. seqid={seqid} kbid={kbid} import_id={import_id}")
160
219
  except Exception as e:
161
220
  errors.capture_exception(e)
162
221
  await dm.delete_metadata("import", metadata)
163
222
  raise
164
-
165
-
166
- class FastAPIExportStream(IteratorExportStream):
167
- def __init__(self, request: Request):
168
- iterator = request.stream().__aiter__()
169
- super().__init__(iterator)