nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -17,20 +17,16 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
 
20
- import asyncio
21
20
  import logging
22
21
  import re
23
22
  import string
24
23
  from typing import Optional
25
24
 
26
- from nucliadb_protos.utils_pb2 import ExtractedText
27
-
25
+ from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB, ParagraphId
28
26
  from nucliadb.ingest.fields.base import Field
29
- from nucliadb.ingest.orm.resource import KB_REVERSE
30
27
  from nucliadb.ingest.orm.resource import Resource as ResourceORM
31
- from nucliadb_telemetry import metrics
32
-
33
- from .cache import get_resource_from_cache
28
+ from nucliadb.search.search import cache
29
+ from nucliadb_telemetry import errors, metrics
34
30
 
35
31
  logger = logging.getLogger(__name__)
36
32
  PRE_WORD = string.punctuation + " "
@@ -56,62 +52,6 @@ GET_PARAGRAPH_LATENCY = metrics.Observer(
56
52
  )
57
53
 
58
54
 
59
- EXTRACTED_CACHE_OPS = metrics.Counter(
60
- "nucliadb_extracted_text_cache_ops", labels={"type": ""}
61
- )
62
-
63
-
64
- class ExtractedTextCache:
65
- """
66
- Used to cache extracted text from a resource in memory during
67
- the process of search results serialization.
68
- """
69
-
70
- def __init__(self):
71
- self.locks = {}
72
- self.values = {}
73
-
74
- def get_value(self, key: str) -> Optional[ExtractedText]:
75
- return self.values.get(key)
76
-
77
- def get_lock(self, key: str) -> asyncio.Lock:
78
- return self.locks.setdefault(key, asyncio.Lock())
79
-
80
- def set_value(self, key: str, value: ExtractedText) -> None:
81
- self.values[key] = value
82
-
83
- def clear(self):
84
- self.values.clear()
85
- self.locks.clear()
86
-
87
-
88
- async def get_field_extracted_text(
89
- field: Field, cache: Optional[ExtractedTextCache] = None
90
- ) -> Optional[ExtractedText]:
91
- if cache is None:
92
- return await field.get_extracted_text()
93
-
94
- key = f"{field.kbid}/{field.uuid}/{field.id}"
95
- extracted_text = cache.get_value(key)
96
- if extracted_text is not None:
97
- EXTRACTED_CACHE_OPS.inc({"type": "hit"})
98
- return extracted_text
99
-
100
- async with cache.get_lock(key):
101
- # Check again in case another task already fetched it
102
- extracted_text = cache.get_value(key)
103
- if extracted_text is not None:
104
- EXTRACTED_CACHE_OPS.inc({"type": "hit"})
105
- return extracted_text
106
-
107
- EXTRACTED_CACHE_OPS.inc({"type": "miss"})
108
- extracted_text = await field.get_extracted_text()
109
- if extracted_text is not None:
110
- # Only cache if we actually have extracted text
111
- cache.set_value(key, extracted_text)
112
- return extracted_text
113
-
114
-
115
55
  @GET_PARAGRAPH_LATENCY.wrap({"type": "full"})
116
56
  async def get_paragraph_from_full_text(
117
57
  *,
@@ -119,16 +59,23 @@ async def get_paragraph_from_full_text(
119
59
  start: int,
120
60
  end: int,
121
61
  split: Optional[str] = None,
122
- extracted_text_cache: Optional[ExtractedTextCache] = None,
62
+ log_on_missing_field: bool = True,
123
63
  ) -> str:
124
64
  """
125
65
  Pull paragraph from full text stored in database.
126
66
 
127
67
  This requires downloading the full text and then slicing it.
128
68
  """
129
- extracted_text = await get_field_extracted_text(field, cache=extracted_text_cache)
69
+ extracted_text = await cache.get_field_extracted_text(field)
130
70
  if extracted_text is None:
131
- logger.warning(f"{field} extracted_text does not exist on DB yet")
71
+ if log_on_missing_field:
72
+ logger.warning(
73
+ "Extracted_text for field does not exist on DB. This should not happen.",
74
+ extra={
75
+ "field_id": field.resource_unique_id,
76
+ "kbid": field.kbid,
77
+ },
78
+ )
132
79
  return ""
133
80
 
134
81
  if split not in (None, ""):
@@ -141,39 +88,46 @@ async def get_paragraph_from_full_text(
141
88
  async def get_paragraph_text(
142
89
  *,
143
90
  kbid: str,
144
- rid: str,
145
- field: str,
146
- start: int,
147
- end: int,
148
- split: Optional[str] = None,
91
+ paragraph_id: ParagraphId,
149
92
  highlight: bool = False,
150
93
  ematches: Optional[list[str]] = None,
151
94
  matches: Optional[list[str]] = None,
152
95
  orm_resource: Optional[
153
96
  ResourceORM
154
97
  ] = None, # allow passing in orm_resource to avoid extra DB calls or txn issues
155
- extracted_text_cache: Optional[ExtractedTextCache] = None,
98
+ log_on_missing_field: bool = True,
156
99
  ) -> str:
100
+ rid = paragraph_id.rid
101
+ field_type = paragraph_id.field_id.type
102
+ field_key = paragraph_id.field_id.key
103
+
157
104
  if orm_resource is None:
158
- orm_resource = await get_resource_from_cache(kbid, rid)
105
+ orm_resource = await cache.get_resource(kbid, rid)
159
106
  if orm_resource is None:
160
- logger.error(f"{kbid}/{rid}:{field} does not exist on DB")
107
+ if log_on_missing_field:
108
+ logger.warning(
109
+ "Resource does not exist on DB. This should not happen.",
110
+ extra={"resource_id": rid, "kbid": kbid, "field": f"{field_type}/{field_key}"},
111
+ )
161
112
  return ""
162
113
 
163
- _, field_type, field = field.split("/")
164
- field_type_int = KB_REVERSE[field_type]
165
- field_obj = await orm_resource.get_field(field, field_type_int, load=False)
114
+ field_type_int = FIELD_TYPE_STR_TO_PB[field_type]
115
+ field_obj = await orm_resource.get_field(field_key, field_type_int, load=False)
166
116
 
167
117
  text = await get_paragraph_from_full_text(
168
118
  field=field_obj,
169
- start=start,
170
- end=end,
171
- split=split,
172
- extracted_text_cache=extracted_text_cache,
119
+ start=paragraph_id.paragraph_start,
120
+ end=paragraph_id.paragraph_end,
121
+ split=paragraph_id.field_id.subfield_id,
122
+ log_on_missing_field=log_on_missing_field,
173
123
  )
174
124
 
175
125
  if highlight:
176
- text = highlight_paragraph(text, words=matches, ematches=ematches)
126
+ try:
127
+ text = highlight_paragraph(text, words=matches, ematches=ematches)
128
+ except Exception as ex:
129
+ errors.capture_exception(ex)
130
+ logger.exception("Error highlighting paragraph", extra={"kbid": kbid})
177
131
  return text
178
132
 
179
133
 
@@ -191,19 +145,17 @@ async def get_text_sentence(
191
145
  Leave separated from get paragraph for now until we understand the differences
192
146
  better.
193
147
  """
194
- orm_resource = await get_resource_from_cache(kbid, rid)
148
+ orm_resource = await cache.get_resource(kbid, rid)
195
149
 
196
150
  if orm_resource is None:
197
151
  logger.warning(f"{rid} does not exist on DB")
198
152
  return ""
199
153
 
200
- field_type_int = KB_REVERSE[field_type]
154
+ field_type_int = FIELD_TYPE_STR_TO_PB[field_type]
201
155
  field_obj = await orm_resource.get_field(field, field_type_int, load=False)
202
156
  extracted_text = await field_obj.get_extracted_text()
203
157
  if extracted_text is None:
204
- logger.info(
205
- f"{rid} {field} {field_type_int} extracted_text does not exist on DB"
206
- )
158
+ logger.info(f"{rid} {field} {field_type_int} extracted_text does not exist on DB")
207
159
  return ""
208
160
  start = start - 1
209
161
  if start < 0:
@@ -216,36 +168,42 @@ async def get_text_sentence(
216
168
  return splitted_text
217
169
 
218
170
 
219
- def get_regex(some_string: str) -> str:
220
- return r"\b" + some_string.lower() + r"\b"
221
-
222
-
223
171
  def highlight_paragraph(
224
172
  text: str, words: Optional[list[str]] = None, ematches: Optional[list[str]] = None
225
173
  ) -> str:
174
+ """
175
+ Highlight `text` with <mark></mark> tags around the words in `words` and `ematches`.
176
+
177
+ Parameters:
178
+ - text: The text to highlight.
179
+ - words: A list of words to highlight.
180
+ - ematches: A list of exact matches to highlight.
181
+
182
+ Returns:
183
+ - The highlighted text.
184
+ """
185
+ REGEX_TEMPLATE = r"(^|\s)({text})(\s|$)"
226
186
  text_lower = text.lower()
227
187
 
228
188
  marks = [0] * (len(text_lower) + 1)
229
- if ematches is not None:
230
- for quote in ematches:
231
- quote_regex = get_regex(quote.lower())
232
- try:
233
- for match in re.finditer(quote_regex, text_lower):
234
- start, end = match.span()
235
- marks[start] = 1
236
- marks[end] = 2
237
- except re.error:
238
- logger.warning(
239
- f"Regex errors while highlighting text. Regex: {quote_regex}"
240
- )
241
- continue
189
+ ematches = ematches or []
190
+ for quote in ematches:
191
+ quote_regex = REGEX_TEMPLATE.format(text=re.escape(quote.lower()))
192
+ try:
193
+ for match in re.finditer(quote_regex, text_lower):
194
+ start, end = match.span(2)
195
+ marks[start] = 1
196
+ marks[end] = 2
197
+ except re.error:
198
+ logger.warning(f"Regex errors while highlighting text. Regex: {quote_regex}")
199
+ continue
242
200
 
243
201
  words = words or []
244
202
  for word in words:
245
- word_regex = get_regex(word.lower())
203
+ word_regex = REGEX_TEMPLATE.format(text=re.escape(word.lower()))
246
204
  try:
247
205
  for match in re.finditer(word_regex, text_lower):
248
- start, end = match.span()
206
+ start, end = match.span(2)
249
207
  if marks[start] == 0 and marks[end] == 0:
250
208
  marks[start] = 1
251
209
  marks[end] = 2
@@ -0,0 +1,233 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ import logging
22
+ from collections import defaultdict
23
+ from typing import Any, cast
24
+
25
+ from psycopg.rows import dict_row
26
+
27
+ from nucliadb.common.maindb.pg import PGDriver
28
+ from nucliadb.common.maindb.utils import get_driver
29
+ from nucliadb.search.search.query_parser.models import CatalogQuery
30
+ from nucliadb_models.labels import translate_system_to_alias_label
31
+ from nucliadb_models.metadata import ResourceProcessingStatus
32
+ from nucliadb_models.search import (
33
+ ResourceResult,
34
+ Resources,
35
+ SortField,
36
+ SortOrder,
37
+ )
38
+ from nucliadb_telemetry import metrics
39
+
40
+ from .filters import translate_label
41
+
42
+ observer = metrics.Observer("pg_catalog_search", labels={"op": ""})
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ def _filter_operands(operands):
47
+ literals = []
48
+ nonliterals = []
49
+ for operand in operands:
50
+ op, params = next(iter(operand.items()))
51
+ if op == "literal":
52
+ literals.append(params)
53
+ else:
54
+ nonliterals.append(operand)
55
+
56
+ return literals, nonliterals
57
+
58
+
59
+ def _convert_filter(filter, filter_params):
60
+ op, operands = next(iter(filter.items()))
61
+ if op == "literal":
62
+ param_name = f"param{len(filter_params)}"
63
+ filter_params[param_name] = [operands]
64
+ return f"labels @> %({param_name})s"
65
+ elif op in ("and", "or"):
66
+ array_op = "@>" if op == "and" else "&&"
67
+ sql = []
68
+ literals, nonliterals = _filter_operands(operands)
69
+ if literals:
70
+ param_name = f"param{len(filter_params)}"
71
+ filter_params[param_name] = literals
72
+ sql.append(f"labels {array_op} %({param_name})s")
73
+ for nonlit in nonliterals:
74
+ sql.append(_convert_filter(nonlit, filter_params))
75
+ return "(" + f" {op.upper()} ".join(sql) + ")"
76
+ elif op == "not":
77
+ return f"(NOT {_convert_filter(operands, filter_params)})"
78
+ else:
79
+ raise ValueError(f"Invalid operator {op}")
80
+
81
+
82
+ def _prepare_query(catalog_query: CatalogQuery):
83
+ filter_sql = ["kbid = %(kbid)s"]
84
+ filter_params: dict[str, Any] = {"kbid": catalog_query.kbid}
85
+
86
+ if catalog_query.query:
87
+ # This is doing tokenization inside the SQL server (to keep the index updated). We could move it to
88
+ # the python code at update/query time if it ever becomes a problem but for now, a single regex
89
+ # executed per query is not a problem.
90
+ filter_sql.append(
91
+ "regexp_split_to_array(lower(title), '\\W') @> regexp_split_to_array(lower(%(query)s), '\\W')"
92
+ )
93
+ filter_params["query"] = catalog_query.query
94
+
95
+ if catalog_query.filters.creation.after:
96
+ filter_sql.append("created_at > %(created_at_start)s")
97
+ filter_params["created_at_start"] = catalog_query.filters.creation.after
98
+
99
+ if catalog_query.filters.creation.before:
100
+ filter_sql.append("created_at < %(created_at_end)s")
101
+ filter_params["created_at_end"] = catalog_query.filters.creation.before
102
+
103
+ if catalog_query.filters.modification.after:
104
+ filter_sql.append("modified_at > %(modified_at_start)s")
105
+ filter_params["modified_at_start"] = catalog_query.filters.modification.after
106
+
107
+ if catalog_query.filters.modification.before:
108
+ filter_sql.append("modified_at < %(modified_at_end)s")
109
+ filter_params["modified_at_end"] = catalog_query.filters.modification.before
110
+
111
+ if catalog_query.filters.labels:
112
+ filter_sql.append(_convert_filter(catalog_query.filters.labels, filter_params))
113
+
114
+ order_sql = ""
115
+ if catalog_query.sort:
116
+ if catalog_query.sort.field == SortField.CREATED:
117
+ order_field = "created_at"
118
+ elif catalog_query.sort.field == SortField.MODIFIED:
119
+ order_field = "modified_at"
120
+ elif catalog_query.sort.field == SortField.TITLE:
121
+ order_field = "title"
122
+ else:
123
+ # Deprecated order by score, use created_at instead
124
+ order_field = "created_at"
125
+
126
+ if catalog_query.sort.order == SortOrder.ASC:
127
+ order_dir = "ASC"
128
+ else:
129
+ order_dir = "DESC"
130
+
131
+ order_sql = f" ORDER BY {order_field} {order_dir}"
132
+
133
+ if catalog_query.filters.with_status:
134
+ filter_sql.append("labels && %(status)s")
135
+ if catalog_query.filters.with_status == ResourceProcessingStatus.PROCESSED:
136
+ filter_params["status"] = ["/n/s/PROCESSED", "/n/s/ERROR"]
137
+ else:
138
+ filter_params["status"] = ["/n/s/PENDING"]
139
+
140
+ return (
141
+ f"SELECT * FROM catalog WHERE {' AND '.join(filter_sql)}{order_sql}",
142
+ filter_params,
143
+ )
144
+
145
+
146
+ def _pg_driver() -> PGDriver:
147
+ return cast(PGDriver, get_driver())
148
+
149
+
150
+ @observer.wrap({"op": "search"})
151
+ async def pgcatalog_search(catalog_query: CatalogQuery) -> Resources:
152
+ # Prepare SQL query
153
+ query, query_params = _prepare_query(catalog_query)
154
+
155
+ async with _pg_driver()._get_connection() as conn, conn.cursor(row_factory=dict_row) as cur:
156
+ facets = {}
157
+
158
+ # Faceted search
159
+ if catalog_query.faceted:
160
+ with observer({"op": "facets"}):
161
+ tmp_facets: dict[str, dict[str, int]] = {
162
+ translate_label(f): defaultdict(int) for f in catalog_query.faceted
163
+ }
164
+ facet_filters = " OR ".join(f"label LIKE '{f}/%%'" for f in tmp_facets.keys())
165
+ for facet in tmp_facets.keys():
166
+ if not (
167
+ facet.startswith("/n/s") or facet.startswith("/n/i") or facet.startswith("/l")
168
+ ):
169
+ logger.warn(
170
+ f"Unexpected facet used at catalog: {facet}, kbid={catalog_query.kbid}"
171
+ )
172
+
173
+ await cur.execute(
174
+ f"SELECT label, COUNT(*) FROM (SELECT unnest(labels) AS label FROM ({query}) fc) nl WHERE ({facet_filters}) GROUP BY 1 ORDER BY 1",
175
+ query_params,
176
+ )
177
+
178
+ for row in await cur.fetchall():
179
+ label = row["label"]
180
+ label_parts = label.split("/")
181
+ parent = "/".join(label_parts[:-1])
182
+ count = row["count"]
183
+ if parent in tmp_facets:
184
+ tmp_facets[parent][translate_system_to_alias_label(label)] = count
185
+
186
+ # No need to get recursive because our facets are at most 3 levels deep (e.g: /l/set/label)
187
+ if len(label_parts) >= 3:
188
+ grandparent = "/".join(label_parts[:-2])
189
+ if grandparent in tmp_facets:
190
+ tmp_facets[grandparent][translate_system_to_alias_label(parent)] += count
191
+
192
+ facets = {translate_system_to_alias_label(k): v for k, v in tmp_facets.items()}
193
+
194
+ # Totals
195
+ with observer({"op": "totals"}):
196
+ await cur.execute(
197
+ f"SELECT COUNT(*) FROM ({query}) fc",
198
+ query_params,
199
+ )
200
+ total = (await cur.fetchone())["count"] # type: ignore
201
+
202
+ # Query
203
+ with observer({"op": "query"}):
204
+ offset = catalog_query.page_size * catalog_query.page_number
205
+ await cur.execute(
206
+ f"{query} LIMIT %(page_size)s OFFSET %(offset)s",
207
+ {
208
+ **query_params,
209
+ "page_size": catalog_query.page_size,
210
+ "offset": offset,
211
+ },
212
+ )
213
+ data = await cur.fetchall()
214
+
215
+ return Resources(
216
+ facets=facets,
217
+ results=[
218
+ ResourceResult(
219
+ rid=str(r["rid"]).replace("-", ""),
220
+ field="title",
221
+ field_type="a",
222
+ labels=[label for label in r["labels"] if label.startswith("/l/")],
223
+ score=0,
224
+ )
225
+ for r in data
226
+ ],
227
+ query=catalog_query.query,
228
+ total=total,
229
+ page_number=catalog_query.page_number,
230
+ page_size=catalog_query.page_size,
231
+ next_page=(offset + len(data) < total),
232
+ min_score=0,
233
+ )
@@ -84,5 +84,5 @@ async def predict_proxy(
84
84
 
85
85
 
86
86
  async def exists_kb(kbid: str) -> bool:
87
- async with datamanagers.with_transaction(read_only=True) as txn:
87
+ async with datamanagers.with_ro_transaction() as txn:
88
88
  return await datamanagers.kb.exists_kb(txn, kbid=kbid)