nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -22,22 +22,21 @@ from typing import Optional, Union
22
22
  from fastapi import Header, HTTPException, Query, Request, Response
23
23
  from fastapi_versioning import version
24
24
 
25
- import nucliadb_models as models
25
+ from nucliadb.common.datamanagers.resources import KB_RESOURCE_SLUG_BASE
26
26
  from nucliadb.common.maindb.utils import get_driver
27
+ from nucliadb.common.models_utils import from_proto, to_proto
27
28
  from nucliadb.ingest.fields.conversation import Conversation
28
29
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as ORMKnowledgeBox
29
- from nucliadb.ingest.orm.resource import KB_RESOURCE_SLUG_BASE
30
30
  from nucliadb.ingest.orm.resource import Resource as ORMResource
31
31
  from nucliadb.ingest.serialize import (
32
32
  managed_serialize,
33
33
  serialize,
34
34
  set_resource_field_extracted_data,
35
35
  )
36
- from nucliadb.reader import SERVICE_NAME # type: ignore
36
+ from nucliadb.reader import SERVICE_NAME
37
37
  from nucliadb.reader.api import DEFAULT_RESOURCE_LIST_PAGE_SIZE
38
38
  from nucliadb.reader.api.models import (
39
39
  FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP,
40
- FIELD_NAMES_TO_PB_TYPE_MAP,
41
40
  ResourceField,
42
41
  )
43
42
  from nucliadb.reader.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
@@ -77,60 +76,55 @@ async def list_resources(
77
76
 
78
77
  # Get counters from maindb
79
78
  driver = get_driver()
80
- txn = await driver.begin()
81
-
82
- # Filter parameters for serializer
83
- show: list[ResourceProperties] = [ResourceProperties.BASIC]
84
- field_types: list[FieldTypeName] = []
85
- extracted: list[ExtractedDataTypeName] = []
86
-
87
- try:
88
- resources: list[Resource] = []
89
- max_items_to_iterate = (page + 1) * size
90
- first_wanted_item_index = (page * size) + 1 # 1-based index
91
- current_key_index = 0
92
-
93
- # ask for one item more than we need, in order to know if it's the last page
94
- keys_generator = txn.keys(
95
- match=KB_RESOURCE_SLUG_BASE.format(kbid=kbid),
96
- count=max_items_to_iterate + 1,
97
- )
98
- async for key in keys_generator:
99
- current_key_index += 1
100
-
101
- # First of all, we need to skip keys, in case we are on a +1 page
102
- if page > 0 and current_key_index < first_wanted_item_index:
103
- continue
104
-
105
- # Don't fetch keys once we got all items for this
106
- if len(resources) == size:
107
- await keys_generator.aclose()
108
- break
109
-
110
- # Fetch and Add wanted item
111
- rid = await txn.get(key)
112
- if rid:
113
- result = await managed_serialize(
114
- txn,
115
- kbid,
116
- rid.decode(),
117
- show,
118
- field_types,
119
- extracted,
120
- service_name=SERVICE_NAME,
121
- )
122
- if result is not None:
123
- resources.append(result)
124
-
125
- is_last_page = current_key_index <= max_items_to_iterate
126
-
127
- except Exception as exc:
128
- errors.capture_exception(exc)
129
- raise HTTPException(
130
- status_code=500, detail="Couldn't retrieve list of resources right now"
131
- )
132
- finally:
133
- await txn.abort()
79
+ async with driver.transaction(read_only=True) as txn:
80
+ # Filter parameters for serializer
81
+ show: list[ResourceProperties] = [ResourceProperties.BASIC]
82
+ field_types: list[FieldTypeName] = []
83
+ extracted: list[ExtractedDataTypeName] = []
84
+
85
+ try:
86
+ resources: list[Resource] = []
87
+ max_items_to_iterate = (page + 1) * size
88
+ first_wanted_item_index = (page * size) + 1 # 1-based index
89
+ current_key_index = 0
90
+
91
+ # ask for one item more than we need, in order to know if it's the last page
92
+ keys_generator = txn.keys(
93
+ match=KB_RESOURCE_SLUG_BASE.format(kbid=kbid),
94
+ count=max_items_to_iterate + 1,
95
+ )
96
+ async for key in keys_generator:
97
+ current_key_index += 1
98
+
99
+ # First of all, we need to skip keys, in case we are on a +1 page
100
+ if page > 0 and current_key_index < first_wanted_item_index:
101
+ continue
102
+
103
+ # Don't fetch keys once we got all items for this
104
+ if len(resources) == size:
105
+ await keys_generator.aclose()
106
+ break
107
+
108
+ # Fetch and Add wanted item
109
+ rid = await txn.get(key, for_update=False)
110
+ if rid:
111
+ result = await managed_serialize(
112
+ txn,
113
+ kbid,
114
+ rid.decode(),
115
+ show,
116
+ field_types,
117
+ extracted,
118
+ service_name=SERVICE_NAME,
119
+ )
120
+ if result is not None:
121
+ resources.append(result)
122
+
123
+ is_last_page = current_key_index <= max_items_to_iterate
124
+
125
+ except Exception as exc:
126
+ errors.capture_exception(exc)
127
+ raise HTTPException(status_code=500, detail="Couldn't retrieve list of resources right now")
134
128
 
135
129
  return ResourceList(
136
130
  resources=resources,
@@ -141,7 +135,7 @@ async def list_resources(
141
135
  @api.get(
142
136
  f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}",
143
137
  status_code=200,
144
- name="Get Resource (by id)",
138
+ summary="Get Resource (by id)",
145
139
  response_model=Resource,
146
140
  response_model_exclude_unset=True,
147
141
  tags=["Resources"],
@@ -153,9 +147,7 @@ async def get_resource_by_uuid(
153
147
  kbid: str,
154
148
  rid: str,
155
149
  show: list[ResourceProperties] = Query([ResourceProperties.BASIC]),
156
- field_type_filter: list[FieldTypeName] = Query(
157
- list(FieldTypeName), alias="field_type"
158
- ),
150
+ field_type_filter: list[FieldTypeName] = Query(list(FieldTypeName), alias="field_type"),
159
151
  extracted: list[ExtractedDataTypeName] = Query(
160
152
  [
161
153
  ExtractedDataTypeName.TEXT,
@@ -181,7 +173,7 @@ async def get_resource_by_uuid(
181
173
  @api.get(
182
174
  f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}",
183
175
  status_code=200,
184
- name="Get Resource (by slug)",
176
+ summary="Get Resource (by slug)",
185
177
  response_model=Resource,
186
178
  response_model_exclude_unset=True,
187
179
  tags=["Resources"],
@@ -193,9 +185,7 @@ async def get_resource_by_slug(
193
185
  kbid: str,
194
186
  rslug: str,
195
187
  show: list[ResourceProperties] = Query([ResourceProperties.BASIC]),
196
- field_type_filter: list[FieldTypeName] = Query(
197
- list(FieldTypeName), alias="field_type"
198
- ),
188
+ field_type_filter: list[FieldTypeName] = Query(list(FieldTypeName), alias="field_type"),
199
189
  extracted: list[ExtractedDataTypeName] = Query(
200
190
  [
201
191
  ExtractedDataTypeName.TEXT,
@@ -235,7 +225,7 @@ async def _get_resource(
235
225
  audit = get_audit()
236
226
  if audit is not None:
237
227
  audit_id = rid if rid else rslug
238
- await audit.visited(kbid, audit_id, x_nucliadb_user, x_forwarded_for) # type: ignore
228
+ audit.visited(kbid, audit_id, x_nucliadb_user, x_forwarded_for) # type: ignore
239
229
 
240
230
  result = await serialize(
241
231
  kbid,
@@ -254,7 +244,7 @@ async def _get_resource(
254
244
  @api.get(
255
245
  f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/{{field_type}}/{{field_id}}",
256
246
  status_code=200,
257
- name="Get Resource field (by slug)",
247
+ summary="Get Resource field (by slug)",
258
248
  response_model=ResourceField,
259
249
  response_model_exclude_unset=True,
260
250
  tags=["Resource fields"],
@@ -294,7 +284,7 @@ async def get_resource_field_rslug_prefix(
294
284
  @api.get(
295
285
  f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/{{field_type}}/{{field_id}}",
296
286
  status_code=200,
297
- name="Get Resource field (by id)",
287
+ summary="Get Resource field (by id)",
298
288
  response_model=ResourceField,
299
289
  response_model_exclude_unset=True,
300
290
  tags=["Resource fields"],
@@ -343,9 +333,7 @@ async def _get_resource_field(
343
333
  ) -> Response:
344
334
  storage = await get_storage(service_name=SERVICE_NAME)
345
335
  driver = get_driver()
346
-
347
- pb_field_id = FIELD_NAMES_TO_PB_TYPE_MAP[field_type]
348
-
336
+ pb_field_id = to_proto.field_type_name(field_type)
349
337
  async with driver.transaction() as txn:
350
338
  kb = ORMKnowledgeBox(txn, storage, kbid)
351
339
 
@@ -360,34 +348,22 @@ async def _get_resource_field(
360
348
  if field is None:
361
349
  raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
362
350
 
363
- resource_field = ResourceField(field_id=field_id, field_type=field_type) # type: ignore
351
+ resource_field = ResourceField(field_id=field_id, field_type=field_type)
364
352
 
365
353
  if ResourceFieldProperties.VALUE in show:
366
354
  value = await field.get_value()
367
355
 
368
356
  if isinstance(value, resources_pb2.FieldText):
369
357
  value = await field.get_value()
370
- resource_field.value = models.FieldText.from_message(value)
358
+ resource_field.value = from_proto.field_text(value)
371
359
 
372
360
  if isinstance(value, resources_pb2.FieldFile):
373
361
  value = await field.get_value()
374
- resource_field.value = models.FieldFile.from_message(value)
362
+ resource_field.value = from_proto.field_file(value)
375
363
 
376
364
  if isinstance(value, resources_pb2.FieldLink):
377
365
  value = await field.get_value()
378
- resource_field.value = models.FieldLink.from_message(value)
379
-
380
- if isinstance(value, resources_pb2.FieldLayout):
381
- value = await field.get_value()
382
- resource_field.value = models.FieldLayout.from_message(value)
383
-
384
- if isinstance(value, resources_pb2.FieldDatetime):
385
- value = await field.get_value()
386
- resource_field.value = models.FieldDatetime.from_message(value)
387
-
388
- if isinstance(value, resources_pb2.FieldKeywordset):
389
- value = await field.get_value()
390
- resource_field.value = models.FieldKeywordset.from_message(value)
366
+ resource_field.value = from_proto.field_link(value)
391
367
 
392
368
  if isinstance(field, Conversation):
393
369
  if page == "first":
@@ -400,12 +376,10 @@ async def _get_resource_field(
400
376
 
401
377
  value = await field.get_value(page=page_to_fetch)
402
378
  if value is not None:
403
- resource_field.value = models.Conversation.from_message(value)
379
+ resource_field.value = from_proto.conversation(value)
404
380
 
405
381
  if ResourceFieldProperties.EXTRACTED in show and extracted:
406
- resource_field.extracted = FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP[
407
- field_type
408
- ]()
382
+ resource_field.extracted = FIELD_NAME_TO_EXTRACTED_DATA_FIELD_MAP[field_type]()
409
383
  await set_resource_field_extracted_data(
410
384
  field,
411
385
  resource_field.extracted,
@@ -419,6 +393,6 @@ async def _get_resource_field(
419
393
  resource_field.error = Error(body=error.error, code=error.code)
420
394
 
421
395
  return Response(
422
- content=resource_field.json(exclude_unset=True, by_alias=True),
396
+ content=resource_field.model_dump_json(exclude_unset=True, by_alias=True),
423
397
  media_type="application/json",
424
398
  )
@@ -22,30 +22,17 @@ from typing import Optional, Union
22
22
 
23
23
  from fastapi import HTTPException
24
24
  from fastapi.responses import StreamingResponse
25
- from fastapi_versioning import version # type: ignore
25
+ from fastapi_versioning import version
26
26
  from google.protobuf.json_format import MessageToDict
27
- from nucliadb_protos.knowledgebox_pb2 import KnowledgeBoxID
28
- from nucliadb_protos.writer_pb2 import (
29
- GetEntitiesGroupRequest,
30
- GetEntitiesGroupResponse,
31
- GetLabelSetRequest,
32
- GetLabelSetResponse,
33
- GetLabelsRequest,
34
- GetLabelsResponse,
35
- GetSynonymsResponse,
36
- GetVectorSetsRequest,
37
- GetVectorSetsResponse,
38
- ListEntitiesGroupsRequest,
39
- ListEntitiesGroupsResponse,
40
- OpStatusWriter,
41
- )
42
27
  from starlette.requests import Request
43
28
 
44
29
  from nucliadb.common import datamanagers
45
30
  from nucliadb.common.cluster.settings import in_standalone_mode
46
31
  from nucliadb.common.context.fastapi import get_app_context
32
+ from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
47
33
  from nucliadb.common.http_clients import processing
48
34
  from nucliadb.common.maindb.utils import get_driver
35
+ from nucliadb.common.models_utils import from_proto
49
36
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
50
37
  from nucliadb.models.responses import HTTPClientError
51
38
  from nucliadb.reader import SERVICE_NAME
@@ -53,13 +40,19 @@ from nucliadb.reader.api.v1.router import KB_PREFIX, api
53
40
  from nucliadb.reader.reader.notifications import kb_notifications_stream
54
41
  from nucliadb_models.entities import (
55
42
  EntitiesGroup,
56
- EntitiesGroupSummary,
57
43
  KnowledgeBoxEntities,
58
44
  )
59
45
  from nucliadb_models.labels import KnowledgeBoxLabels, LabelSet
60
46
  from nucliadb_models.resource import NucliaDBRoles
61
47
  from nucliadb_models.synonyms import KnowledgeBoxSynonyms
62
- from nucliadb_models.vectors import VectorSet, VectorSets
48
+ from nucliadb_protos import writer_pb2
49
+ from nucliadb_protos.knowledgebox_pb2 import Synonyms
50
+ from nucliadb_protos.writer_pb2 import (
51
+ GetEntitiesGroupRequest,
52
+ GetEntitiesGroupResponse,
53
+ ListEntitiesGroupsRequest,
54
+ ListEntitiesGroupsResponse,
55
+ )
63
56
  from nucliadb_utils.authentication import requires
64
57
  from nucliadb_utils.utilities import get_ingest, get_storage
65
58
 
@@ -67,7 +60,7 @@ from nucliadb_utils.utilities import get_ingest, get_storage
67
60
  @api.get(
68
61
  f"/{KB_PREFIX}/{{kbid}}/entitiesgroups",
69
62
  status_code=200,
70
- name="Get Knowledge Box Entities",
63
+ summary="Get Knowledge Box Entities",
71
64
  response_model=KnowledgeBoxEntities,
72
65
  tags=["Knowledge Box Services"],
73
66
  )
@@ -93,25 +86,21 @@ async def list_entities_groups(kbid: str):
93
86
  if entities_groups.status == ListEntitiesGroupsResponse.Status.OK:
94
87
  response = KnowledgeBoxEntities(uuid=kbid)
95
88
  for key, eg_summary in entities_groups.groups.items():
96
- entities_group = EntitiesGroupSummary.from_message(eg_summary)
89
+ entities_group = from_proto.entities_group_summary(eg_summary)
97
90
  response.groups[key] = entities_group
98
91
  return response
99
92
  elif entities_groups.status == ListEntitiesGroupsResponse.Status.NOTFOUND:
100
93
  raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
101
94
  elif entities_groups.status == ListEntitiesGroupsResponse.Status.ERROR:
102
- raise HTTPException(
103
- status_code=500, detail="Error while listing entities groups"
104
- )
95
+ raise HTTPException(status_code=500, detail="Error while listing entities groups")
105
96
  else:
106
- raise HTTPException(
107
- status_code=500, detail="Error on listing Knowledge box entities"
108
- )
97
+ raise HTTPException(status_code=500, detail="Error on listing Knowledge box entities")
109
98
 
110
99
 
111
100
  @api.get(
112
101
  f"/{KB_PREFIX}/{{kbid}}/entitiesgroup/{{group}}",
113
102
  status_code=200,
114
- name="Get a Knowledge Box Entities Group",
103
+ summary="Get a Knowledge Box Entities Group",
115
104
  response_model=EntitiesGroup,
116
105
  tags=["Knowledge Box Services"],
117
106
  )
@@ -125,123 +114,90 @@ async def get_entity(request: Request, kbid: str, group: str) -> EntitiesGroup:
125
114
 
126
115
  kbobj: GetEntitiesGroupResponse = await ingest.GetEntitiesGroup(l_request) # type: ignore
127
116
  if kbobj.status == GetEntitiesGroupResponse.Status.OK:
128
- response = EntitiesGroup.from_message(kbobj.group)
117
+ response = from_proto.entities_group(kbobj.group)
129
118
  return response
130
119
  elif kbobj.status == GetEntitiesGroupResponse.Status.KB_NOT_FOUND:
131
- raise HTTPException(
132
- status_code=404, detail=f"Knowledge Box '{kbid}' does not exist"
133
- )
120
+ raise HTTPException(status_code=404, detail=f"Knowledge Box '{kbid}' does not exist")
134
121
  elif kbobj.status == GetEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND:
135
- raise HTTPException(
136
- status_code=404, detail=f"Entities group '{group}' does not exist"
137
- )
122
+ raise HTTPException(status_code=404, detail=f"Entities group '{group}' does not exist")
138
123
  else:
139
- raise HTTPException(
140
- status_code=500, detail="Error on getting entities group on a Knowledge box"
141
- )
124
+ raise HTTPException(status_code=500, detail="Error on getting entities group on a Knowledge box")
142
125
 
143
126
 
144
127
  @api.get(
145
128
  f"/{KB_PREFIX}/{{kbid}}/labelsets",
146
129
  status_code=200,
147
- name="Get Knowledge Box Label Sets",
130
+ summary="Get Knowledge Box Label Sets",
148
131
  response_model=KnowledgeBoxLabels,
149
132
  tags=["Knowledge Box Services"],
150
133
  )
151
134
  @requires(NucliaDBRoles.READER)
152
135
  @version(1)
153
- async def get_labelsets(request: Request, kbid: str) -> KnowledgeBoxLabels:
154
- ingest = get_ingest()
155
- l_request: GetLabelsRequest = GetLabelsRequest()
156
- l_request.kb.uuid = kbid
136
+ async def get_labelsets_endoint(request: Request, kbid: str) -> KnowledgeBoxLabels:
137
+ try:
138
+ return await get_labelsets(kbid)
139
+ except KnowledgeBoxNotFound:
140
+ raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
141
+
157
142
 
158
- kbobj: GetLabelsResponse = await ingest.GetLabels(l_request) # type: ignore
159
- if kbobj.status == GetLabelsResponse.Status.OK:
160
- response = KnowledgeBoxLabels(uuid=kbid)
161
- for labelset, labelset_data in kbobj.labels.labelset.items():
162
- labelset_response = LabelSet(
163
- **MessageToDict(
164
- labelset_data,
165
- preserving_proto_field_name=True,
166
- including_default_value_fields=True,
167
- )
143
+ async def get_labelsets(kbid: str) -> KnowledgeBoxLabels:
144
+ kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
145
+ if not kb_exists:
146
+ raise KnowledgeBoxNotFound()
147
+ labelsets: writer_pb2.Labels = await datamanagers.atomic.labelset.get_all(kbid=kbid)
148
+ response = KnowledgeBoxLabels(uuid=kbid)
149
+ for labelset, labelset_data in labelsets.labelset.items():
150
+ labelset_response = LabelSet(
151
+ **MessageToDict(
152
+ labelset_data,
153
+ preserving_proto_field_name=True,
154
+ including_default_value_fields=True,
168
155
  )
169
- response.labelsets[labelset] = labelset_response
170
- return response
171
- elif kbobj.status == GetLabelsResponse.Status.NOTFOUND:
172
- raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
173
- else:
174
- raise HTTPException(
175
- status_code=500, detail="Error on getting Knowledge box labels"
176
156
  )
157
+ response.labelsets[labelset] = labelset_response
158
+ return response
177
159
 
178
160
 
179
161
  @api.get(
180
162
  f"/{KB_PREFIX}/{{kbid}}/labelset/{{labelset}}",
181
163
  status_code=200,
182
- name="Get a Knowledge Box Label Set",
164
+ summary="Get a Knowledge Box Label Set",
183
165
  response_model=LabelSet,
184
166
  tags=["Knowledge Box Services"],
185
167
  )
186
168
  @requires(NucliaDBRoles.READER)
187
169
  @version(1)
188
- async def get_labelset(request: Request, kbid: str, labelset: str) -> LabelSet:
189
- ingest = get_ingest()
190
- l_request: GetLabelSetRequest = GetLabelSetRequest()
191
- l_request.kb.uuid = kbid
192
- l_request.labelset = labelset
170
+ async def get_labelset_endpoint(request: Request, kbid: str, labelset: str) -> LabelSet:
171
+ try:
172
+ return await get_labelset(kbid, labelset)
173
+ except KnowledgeBoxNotFound:
174
+ raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
193
175
 
194
- kbobj: GetLabelSetResponse = await ingest.GetLabelSet(l_request) # type: ignore
195
- if kbobj.status == GetLabelSetResponse.Status.OK:
176
+
177
+ async def get_labelset(kbid: str, labelset_id: str) -> LabelSet:
178
+ kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
179
+ if not kb_exists:
180
+ raise KnowledgeBoxNotFound()
181
+ labelset: Optional[writer_pb2.LabelSet] = await datamanagers.atomic.labelset.get(
182
+ kbid=kbid, labelset_id=labelset_id
183
+ )
184
+ if labelset is None:
185
+ response = LabelSet()
186
+ else:
196
187
  response = LabelSet(
197
188
  **MessageToDict(
198
- kbobj.labelset,
189
+ labelset,
199
190
  preserving_proto_field_name=True,
200
191
  including_default_value_fields=True,
201
192
  )
202
193
  )
203
- return response
204
- elif kbobj.status == GetLabelSetResponse.Status.NOTFOUND:
205
- raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
206
- else:
207
- raise HTTPException(
208
- status_code=500, detail="Error on getting labelset on a Knowledge box"
209
- )
210
-
211
-
212
- @api.get(
213
- f"/{KB_PREFIX}/{{kbid}}/vectorsets",
214
- status_code=200,
215
- name="Get Knowledge Box Vector Sets",
216
- tags=["Knowledge Box Services"],
217
- response_model=VectorSets,
218
- openapi_extra={"x-operation_order": 1},
219
- )
220
- @requires(NucliaDBRoles.READER)
221
- @version(1)
222
- async def get_vectorsets(request: Request, kbid: str):
223
- ingest = get_ingest()
224
- pbrequest: GetVectorSetsRequest = GetVectorSetsRequest()
225
- pbrequest.kb.uuid = kbid
226
-
227
- vectorsets: GetVectorSetsResponse = await ingest.GetVectorSets(pbrequest) # type: ignore
228
- if vectorsets.status == GetVectorSetsResponse.Status.OK:
229
- result = VectorSets(vectorsets={})
230
- for key, vector in vectorsets.vectorsets.vectorsets.items():
231
- result.vectorsets[key] = VectorSet.from_message(vector)
232
- return result
233
- elif vectorsets.status == GetVectorSetsResponse.Status.NOTFOUND:
234
- raise HTTPException(status_code=404, detail="VectorSet does not exist")
235
- elif vectorsets.status == GetVectorSetsResponse.Status.ERROR:
236
- raise HTTPException(
237
- status_code=500, detail="Error on getting vectorset on a Knowledge box"
238
- )
194
+ return response
239
195
 
240
196
 
241
197
  @api.get(
242
198
  f"/{KB_PREFIX}/{{kbid}}/custom-synonyms",
243
199
  status_code=200,
244
- name="Get Knowledge Box Custom Synonyms",
200
+ summary="Get Knowledge Box Custom Synonyms",
245
201
  tags=["Knowledge Box Services"],
246
202
  response_model=KnowledgeBoxSynonyms,
247
203
  openapi_extra={"x-operation_order": 2},
@@ -249,23 +205,16 @@ async def get_vectorsets(request: Request, kbid: str):
249
205
  @requires(NucliaDBRoles.READER)
250
206
  @version(1)
251
207
  async def get_custom_synonyms(request: Request, kbid: str):
252
- ingest = get_ingest()
253
- pbrequest = KnowledgeBoxID(uuid=kbid)
254
- pbresponse: GetSynonymsResponse = await ingest.GetSynonyms(pbrequest) # type: ignore
255
- if pbresponse.status.status == OpStatusWriter.Status.OK:
256
- return KnowledgeBoxSynonyms.from_message(pbresponse.synonyms)
257
- elif pbresponse.status.status == OpStatusWriter.Status.NOTFOUND:
208
+ if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
258
209
  raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
259
- elif pbresponse.status.status == OpStatusWriter.Status.ERROR:
260
- raise HTTPException(
261
- status_code=500, detail="Error getting synonyms of a Knowledge box"
262
- )
210
+ synonyms = await datamanagers.atomic.synonyms.get(kbid=kbid) or Synonyms()
211
+ return from_proto.kb_synonyms(synonyms)
263
212
 
264
213
 
265
214
  @api.get(
266
215
  f"/{KB_PREFIX}/{{kbid}}/notifications",
267
216
  status_code=200,
268
- name="Knowledge Box Notifications Stream",
217
+ summary="Knowledge Box Notifications Stream",
269
218
  description="Provides a stream of activity notifications for the given Knowledge Box. The stream will be automatically closed after 2 minutes.", # noqa: E501
270
219
  tags=["Knowledge Box Services"],
271
220
  response_description="Each line of the response is a Base64-encoded JSON object representing a notification. Refer to [the internal documentation](https://github.com/nuclia/nucliadb/blob/main/docs/tutorials/KB_NOTIFICATIONS.md) for a more detailed explanation of each notification type.", # noqa: E501
@@ -298,14 +247,14 @@ async def notifications_endpoint(
298
247
 
299
248
 
300
249
  async def exists_kb(kbid: str) -> bool:
301
- async with datamanagers.with_transaction(read_only=True) as txn:
250
+ async with datamanagers.with_ro_transaction() as txn:
302
251
  return await datamanagers.kb.exists_kb(txn, kbid=kbid)
303
252
 
304
253
 
305
254
  @api.get(
306
255
  f"/{KB_PREFIX}/{{kbid}}/processing-status",
307
256
  status_code=200,
308
- name="Knowledge Box Processing Status",
257
+ summary="Knowledge Box Processing Status",
309
258
  description="Provides the status of the processing of the given Knowledge Box.",
310
259
  tags=["Knowledge Box Services"],
311
260
  response_model=processing.RequestsResults,
@@ -326,14 +275,12 @@ async def processing_status(
326
275
  return HTTPClientError(status_code=404, detail="Knowledge Box not found")
327
276
 
328
277
  async with processing.ProcessingHTTPClient() as client:
329
- results = await client.requests(
330
- cursor=cursor, scheduled=scheduled, kbid=kbid, limit=limit
331
- )
278
+ results = await client.requests(cursor=cursor, scheduled=scheduled, kbid=kbid, limit=limit)
332
279
 
333
280
  storage = await get_storage(service_name=SERVICE_NAME)
334
281
  driver = get_driver()
335
282
 
336
- async with driver.transaction(wait_for_abort=False, read_only=True) as txn:
283
+ async with driver.transaction(read_only=True) as txn:
337
284
  kb = KnowledgeBox(txn, storage, kbid)
338
285
 
339
286
  max_simultaneous = asyncio.Semaphore(10)
@@ -355,9 +302,7 @@ async def processing_status(
355
302
 
356
303
  result_items = [
357
304
  item
358
- for item in await asyncio.gather(
359
- *[_composition(result) for result in results.results]
360
- )
305
+ for item in await asyncio.gather(*[_composition(result) for result in results.results])
361
306
  if item is not None
362
307
  ]
363
308