nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -18,127 +18,277 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import asyncio
21
- import json
21
+ from functools import partial
22
22
 
23
23
  from fastapi import HTTPException, Response
24
24
  from fastapi_versioning import version
25
- from nucliadb_protos.knowledgebox_pb2 import (
26
- DeleteKnowledgeBoxResponse,
27
- KnowledgeBoxID,
28
- KnowledgeBoxNew,
29
- KnowledgeBoxResponseStatus,
30
- KnowledgeBoxUpdate,
31
- NewKnowledgeBoxResponse,
32
- UpdateKnowledgeBoxResponse,
33
- )
34
25
  from starlette.requests import Request
35
26
 
27
+ from nucliadb import learning_proxy
28
+ from nucliadb.common import datamanagers
29
+ from nucliadb.common.external_index_providers.exceptions import (
30
+ ExternalIndexCreationError,
31
+ )
32
+ from nucliadb.common.maindb.utils import get_driver
33
+ from nucliadb.ingest.orm.exceptions import KnowledgeBoxConflict
34
+ from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
35
+ from nucliadb.writer import logger, vectorsets
36
+ from nucliadb.writer.api.utils import only_for_onprem
36
37
  from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX, api
37
38
  from nucliadb.writer.utilities import get_processing
39
+ from nucliadb_models.external_index_providers import (
40
+ ExternalIndexProviderType,
41
+ PineconeServerlessCloud,
42
+ )
38
43
  from nucliadb_models.resource import (
39
44
  KnowledgeBoxConfig,
40
45
  KnowledgeBoxObj,
41
46
  KnowledgeBoxObjID,
42
47
  NucliaDBRoles,
43
48
  )
49
+ from nucliadb_protos import knowledgebox_pb2
44
50
  from nucliadb_utils.authentication import requires
45
- from nucliadb_utils.utilities import get_ingest
46
51
 
47
52
 
53
+ @only_for_onprem
48
54
  @api.post(
49
55
  f"/{KBS_PREFIX}",
50
56
  status_code=201,
51
- name="Create Knowledge Box",
52
- response_model=KnowledgeBoxObj,
57
+ summary="Create Knowledge Box",
53
58
  tags=["Knowledge Boxes"],
54
59
  openapi_extra={"x-hidden-operation": True},
55
60
  )
56
61
  @requires(NucliaDBRoles.MANAGER)
57
62
  @version(1)
58
- async def create_kb(request: Request, item: KnowledgeBoxConfig):
59
- ingest = get_ingest()
60
- requestpb = KnowledgeBoxNew()
61
- requestpb = parse_create_kb_request(item)
62
- kbobj: NewKnowledgeBoxResponse = await ingest.NewKnowledgeBox(requestpb) # type: ignore
63
- if item.slug != "":
64
- slug = item.slug
65
- else:
66
- slug = kbobj.uuid # type: ignore
67
- if kbobj.status == KnowledgeBoxResponseStatus.OK:
68
- return KnowledgeBoxObj(uuid=kbobj.uuid, slug=slug)
69
- elif kbobj.status == KnowledgeBoxResponseStatus.CONFLICT:
63
+ async def create_kb_endpoint(request: Request, item: KnowledgeBoxConfig) -> KnowledgeBoxObj:
64
+ try:
65
+ kbid, slug = await create_kb(item)
66
+ except KnowledgeBoxConflict:
70
67
  raise HTTPException(status_code=419, detail="Knowledge box already exists")
71
- elif kbobj.status == KnowledgeBoxResponseStatus.ERROR:
72
- raise HTTPException(status_code=500, detail="Error on creating knowledge box")
73
-
74
-
75
- def parse_create_kb_request(item: KnowledgeBoxConfig) -> KnowledgeBoxNew:
76
- requestpb = KnowledgeBoxNew()
77
- if item.slug:
78
- requestpb.slug = item.slug
79
- if item.title:
80
- requestpb.config.title = item.title
81
- if item.description:
82
- requestpb.config.description = item.description
83
- if item.release_channel:
84
- requestpb.release_channel = item.release_channel.to_pb()
68
+ except ExternalIndexCreationError as exc:
69
+ raise HTTPException(status_code=502, detail=str(exc))
70
+ except Exception:
71
+ logger.exception("Could not create KB")
72
+ raise HTTPException(status_code=500, detail="Error creating knowledge box")
73
+ else:
74
+ return KnowledgeBoxObj(uuid=kbid, slug=slug)
75
+
76
+
77
+ async def create_kb(item: KnowledgeBoxConfig) -> tuple[str, str]:
78
+ driver = get_driver()
79
+ rollback_learning_config = None
80
+
81
+ kbid = KnowledgeBox.new_unique_kbid()
82
+
83
+ # Onprem KB creation doesn't have an existing learning configuration yet, so
84
+ # we need to call learning proxy to create it
85
85
  if item.learning_configuration:
86
- requestpb.learning_config = json.dumps(item.learning_configuration)
87
- return requestpb
86
+ user_learning_config = item.learning_configuration
87
+ else:
88
+ logger.warning(
89
+ "No learning configuration provided. Default will be used.",
90
+ extra={"kbid": kbid},
91
+ )
92
+ # learning will choose the default values
93
+ user_learning_config = {}
94
+
95
+ # We need to be backward compatible with the old "semantic_model" field where
96
+ # only one semantic model was allowed.
97
+ if "semantic_model" in user_learning_config:
98
+ user_learning_config["semantic_models"] = [user_learning_config.pop("semantic_model")]
88
99
 
100
+ # we rely on learning to return the updated configuration with defaults and
101
+ # any other needed values (e.g. matryoshka settings if available)
102
+ learning_config = await learning_proxy.set_configuration(kbid, config=user_learning_config)
89
103
 
104
+ # if KB creation fails, we'll have to delete its learning config
105
+ async def _rollback_learning_config(kbid: str):
106
+ try:
107
+ await learning_proxy.delete_configuration(kbid)
108
+ except Exception:
109
+ logger.warning(
110
+ "Could not rollback learning configuration",
111
+ exc_info=True,
112
+ extra={"kbid": kbid},
113
+ )
114
+
115
+ rollback_learning_config = partial(_rollback_learning_config, kbid)
116
+ semantic_models = learning_config.into_semantic_models_metadata()
117
+
118
+ external_index_provider = knowledgebox_pb2.CreateExternalIndexProviderMetadata(
119
+ type=knowledgebox_pb2.ExternalIndexProviderType.UNSET,
120
+ )
121
+ if (
122
+ item.external_index_provider
123
+ and item.external_index_provider.type == ExternalIndexProviderType.PINECONE
124
+ ):
125
+ pinecone_api_key = item.external_index_provider.api_key
126
+ serverless_pb = to_pinecone_serverless_cloud_pb(item.external_index_provider.serverless_cloud)
127
+ external_index_provider = knowledgebox_pb2.CreateExternalIndexProviderMetadata(
128
+ type=knowledgebox_pb2.ExternalIndexProviderType.PINECONE,
129
+ pinecone_config=knowledgebox_pb2.CreatePineconeConfig(
130
+ api_key=pinecone_api_key,
131
+ serverless_cloud=serverless_pb,
132
+ ),
133
+ )
134
+
135
+ try:
136
+ (kbid, slug) = await KnowledgeBox.create(
137
+ driver,
138
+ kbid=kbid,
139
+ slug=item.slug or kbid,
140
+ title=item.title or "",
141
+ description=item.description or "",
142
+ semantic_models=semantic_models,
143
+ external_index_provider=external_index_provider,
144
+ hidden_resources_enabled=item.hidden_resources_enabled,
145
+ hidden_resources_hide_on_creation=item.hidden_resources_hide_on_creation,
146
+ )
147
+
148
+ except Exception as exc:
149
+ logger.error("Unexpected error creating KB", exc_info=exc, extra={"slug": item.slug})
150
+ await rollback_learning_config()
151
+ raise
152
+
153
+ return (kbid, slug)
154
+
155
+
156
+ @only_for_onprem
90
157
  @api.patch(
91
158
  f"/{KB_PREFIX}/{{kbid}}",
92
159
  status_code=200,
93
- name="Update Knowledge Box",
160
+ summary="Update Knowledge Box",
94
161
  response_model=KnowledgeBoxObjID,
95
162
  tags=["Knowledge Boxes"],
96
163
  openapi_extra={"x-hidden-operation": True},
97
164
  )
98
165
  @requires(NucliaDBRoles.MANAGER)
99
166
  @version(1)
100
- async def update_kb(request: Request, kbid: str, item: KnowledgeBoxConfig):
101
- ingest = get_ingest()
102
- pbrequest = KnowledgeBoxUpdate(uuid=kbid)
103
- if item.slug is not None:
104
- pbrequest.slug = item.slug
105
- if item.title:
106
- pbrequest.config.title = item.title
107
- if item.description:
108
- pbrequest.config.description = item.description
109
- kbobj: UpdateKnowledgeBoxResponse = await ingest.UpdateKnowledgeBox(pbrequest) # type: ignore
110
- if kbobj.status == KnowledgeBoxResponseStatus.OK:
111
- return KnowledgeBoxObjID(uuid=kbobj.uuid)
112
- elif kbobj.status == KnowledgeBoxResponseStatus.NOTFOUND:
167
+ async def update_kb(request: Request, kbid: str, item: KnowledgeBoxConfig) -> KnowledgeBoxObjID:
168
+ driver = get_driver()
169
+ config = None
170
+ if (
171
+ item.slug
172
+ or item.title
173
+ or item.description
174
+ or item.hidden_resources_enabled
175
+ or item.hidden_resources_hide_on_creation
176
+ ):
177
+ config = knowledgebox_pb2.KnowledgeBoxConfig(
178
+ slug=item.slug or "",
179
+ title=item.title or "",
180
+ description=item.description or "",
181
+ hidden_resources_enabled=item.hidden_resources_enabled,
182
+ hidden_resources_hide_on_creation=item.hidden_resources_hide_on_creation,
183
+ )
184
+ try:
185
+ async with driver.transaction() as txn:
186
+ await KnowledgeBox.update(
187
+ txn,
188
+ uuid=kbid,
189
+ slug=item.slug,
190
+ config=config,
191
+ )
192
+ await txn.commit()
193
+ except datamanagers.exceptions.KnowledgeBoxNotFound:
113
194
  raise HTTPException(status_code=404, detail="Knowledge box does not exist")
114
- elif kbobj.status == KnowledgeBoxResponseStatus.ERROR:
115
- raise HTTPException(status_code=500, detail="Error on creating knowledge box")
195
+ except Exception as exc:
196
+ logger.exception("Could not update KB", exc_info=exc, extra={"kbid": kbid})
197
+ raise HTTPException(status_code=500, detail="Error updating knowledge box")
198
+ else:
199
+ return KnowledgeBoxObjID(uuid=kbid)
116
200
 
117
201
 
202
+ @only_for_onprem
118
203
  @api.delete(
119
204
  f"/{KB_PREFIX}/{{kbid}}",
120
205
  status_code=200,
121
- name="Delete Knowledge Box",
122
- response_model=KnowledgeBoxObj,
206
+ summary="Delete Knowledge Box",
123
207
  tags=["Knowledge Boxes"],
124
208
  openapi_extra={"x-hidden-operation": True},
125
209
  )
126
210
  @requires(NucliaDBRoles.MANAGER)
127
211
  @version(1)
128
- async def delete_kb(request: Request, kbid: str):
129
- ingest = get_ingest()
130
-
131
- kbobj: DeleteKnowledgeBoxResponse = await ingest.DeleteKnowledgeBox( # type: ignore
132
- KnowledgeBoxID(uuid=kbid)
133
- )
134
- if kbobj.status == KnowledgeBoxResponseStatus.OK:
135
- return KnowledgeBoxObj(uuid=kbid)
136
- elif kbobj.status == KnowledgeBoxResponseStatus.NOTFOUND:
212
+ async def delete_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
213
+ driver = get_driver()
214
+ try:
215
+ await KnowledgeBox.delete(driver, kbid=kbid)
216
+ except datamanagers.exceptions.KnowledgeBoxNotFound:
137
217
  raise HTTPException(status_code=404, detail="Knowledge Box does not exists")
138
- elif kbobj.status == KnowledgeBoxResponseStatus.ERROR:
139
- raise HTTPException(status_code=500, detail="Error on deleting knowledge box")
218
+ except Exception as exc:
219
+ logger.exception("Could not delete KB", exc_info=exc, extra={"kbid": kbid})
220
+ raise HTTPException(status_code=500, detail="Error deleting knowledge box")
221
+
222
+ # onprem nucliadb must delete its learning configuration
223
+ try:
224
+ await learning_proxy.delete_configuration(kbid)
225
+ logger.info("Learning configuration deleted", extra={"kbid": kbid})
226
+ except Exception as exc:
227
+ logger.exception(
228
+ "Unexpected error deleting learning configuration",
229
+ exc_info=exc,
230
+ extra={"kbid": kbid},
231
+ )
140
232
 
233
+ # be nice and notify processing this KB is being deleted so we waste
234
+ # resources
141
235
  processing = get_processing()
142
236
  asyncio.create_task(processing.delete_from_processing(kbid=kbid))
143
237
 
144
- return Response(status_code=204)
238
+ return KnowledgeBoxObj(uuid=kbid)
239
+
240
+
241
+ def to_pinecone_serverless_cloud_pb(
242
+ serverless: PineconeServerlessCloud,
243
+ ) -> knowledgebox_pb2.PineconeServerlessCloud.ValueType:
244
+ return {
245
+ PineconeServerlessCloud.AWS_EU_WEST_1: knowledgebox_pb2.PineconeServerlessCloud.AWS_EU_WEST_1,
246
+ PineconeServerlessCloud.AWS_US_EAST_1: knowledgebox_pb2.PineconeServerlessCloud.AWS_US_EAST_1,
247
+ PineconeServerlessCloud.AWS_US_WEST_2: knowledgebox_pb2.PineconeServerlessCloud.AWS_US_WEST_2,
248
+ PineconeServerlessCloud.AZURE_EASTUS2: knowledgebox_pb2.PineconeServerlessCloud.AZURE_EASTUS2,
249
+ PineconeServerlessCloud.GCP_US_CENTRAL1: knowledgebox_pb2.PineconeServerlessCloud.GCP_US_CENTRAL1,
250
+ }[serverless]
251
+
252
+
253
+ @api.post(
254
+ f"/{KB_PREFIX}/{{kbid}}/vectorsets/{{vectorset_id}}",
255
+ status_code=200,
256
+ summary="Add a vectorset to Knowledge Box",
257
+ tags=["Knowledge Boxes"],
258
+ # TODO: remove when the feature is mature
259
+ include_in_schema=False,
260
+ )
261
+ @requires(NucliaDBRoles.MANAGER)
262
+ @version(1)
263
+ async def add_vectorset(request: Request, kbid: str, vectorset_id: str) -> Response:
264
+ try:
265
+ await vectorsets.add(kbid, vectorset_id)
266
+ except learning_proxy.ProxiedLearningConfigError as err:
267
+ return Response(
268
+ status_code=err.status_code,
269
+ content=err.content,
270
+ media_type=err.content_type,
271
+ )
272
+ return Response(status_code=200)
273
+
274
+
275
+ @api.delete(
276
+ f"/{KB_PREFIX}/{{kbid}}/vectorsets/{{vectorset_id}}",
277
+ status_code=200,
278
+ summary="Delete vectorset from Knowledge Box",
279
+ tags=["Knowledge Boxes"],
280
+ # TODO: remove when the feature is mature
281
+ include_in_schema=False,
282
+ )
283
+ @requires(NucliaDBRoles.MANAGER)
284
+ @version(1)
285
+ async def delete_vectorset(request: Request, kbid: str, vectorset_id: str) -> Response:
286
+ try:
287
+ await vectorsets.delete(kbid, vectorset_id)
288
+ except learning_proxy.ProxiedLearningConfigError as err:
289
+ return Response(
290
+ status_code=err.status_code,
291
+ content=err.content,
292
+ media_type=err.content_type,
293
+ )
294
+ return Response(status_code=200)
@@ -29,7 +29,7 @@ from nucliadb_utils.authentication import requires
29
29
  @api.post(
30
30
  path=f"/{KB_PREFIX}/{{kbid}}/configuration",
31
31
  status_code=204,
32
- name="Create Knowledge Box models configuration",
32
+ summary="Create Knowledge Box models configuration",
33
33
  description="Create configuration of models assigned to a Knowledge Box",
34
34
  response_model=None,
35
35
  tags=["Knowledge Boxes"],
@@ -46,7 +46,7 @@ async def set_configuration(
46
46
  @api.patch(
47
47
  path=f"/{KB_PREFIX}/{{kbid}}/configuration",
48
48
  status_code=204,
49
- name="Update Knowledge Box models configuration",
49
+ summary="Update Knowledge Box models configuration",
50
50
  description="Update current configuration of models assigned to a Knowledge Box",
51
51
  response_model=None,
52
52
  tags=["Knowledge Boxes"],