nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -0,0 +1,110 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ from typing import AsyncIterator, Optional
21
+
22
+ from nucliadb.common.datamanagers.utils import get_kv_pb
23
+ from nucliadb.common.maindb.driver import Transaction
24
+ from nucliadb_protos import knowledgebox_pb2
25
+
26
+ KB_VECTORSETS = "/kbs/{kbid}/vectorsets"
27
+
28
+
29
+ class BrokenInvariant(Exception):
30
+ pass
31
+
32
+
33
+ async def initialize(txn: Transaction, *, kbid: str):
34
+ key = KB_VECTORSETS.format(kbid=kbid)
35
+ await txn.set(key, knowledgebox_pb2.KnowledgeBoxVectorSetsConfig().SerializeToString())
36
+
37
+
38
+ async def get(
39
+ txn: Transaction, *, kbid: str, vectorset_id: str
40
+ ) -> Optional[knowledgebox_pb2.VectorSetConfig]:
41
+ kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=False)
42
+ index = _find_vectorset(kb_vectorsets, vectorset_id)
43
+ if index is None:
44
+ return None
45
+ return kb_vectorsets.vectorsets[index]
46
+
47
+
48
+ async def exists(txn, *, kbid: str, vectorset_id: str) -> bool:
49
+ kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=False)
50
+ return _find_vectorset(kb_vectorsets, vectorset_id) is not None
51
+
52
+
53
+ async def iter(
54
+ txn: Transaction, *, kbid: str
55
+ ) -> AsyncIterator[tuple[str, knowledgebox_pb2.VectorSetConfig]]:
56
+ kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=False)
57
+ for config in kb_vectorsets.vectorsets:
58
+ yield config.vectorset_id, config
59
+
60
+
61
+ async def set(txn: Transaction, *, kbid: str, config: knowledgebox_pb2.VectorSetConfig):
62
+ """Create or update a vectorset configuration"""
63
+ kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=True)
64
+ index = _find_vectorset(kb_vectorsets, config.vectorset_id)
65
+ if index is None:
66
+ # adding a new vectorset
67
+ kb_vectorsets.vectorsets.append(config)
68
+ else:
69
+ # updating a vectorset
70
+ kb_vectorsets.vectorsets[index].CopyFrom(config)
71
+
72
+ key = KB_VECTORSETS.format(kbid=kbid)
73
+ await txn.set(key, kb_vectorsets.SerializeToString())
74
+
75
+
76
+ async def delete(txn: Transaction, *, kbid: str, vectorset_id: str):
77
+ kb_vectorsets = await _get_or_default(txn, kbid=kbid, for_update=True)
78
+ index = _find_vectorset(kb_vectorsets, vectorset_id)
79
+ if index is None:
80
+ # already deleted
81
+ return
82
+
83
+ del kb_vectorsets.vectorsets[index]
84
+ key = KB_VECTORSETS.format(kbid=kbid)
85
+ await txn.set(key, kb_vectorsets.SerializeToString())
86
+
87
+
88
+ # XXX At some point in the vectorset epic, we should make this key mandatory and
89
+ # fail instead of providing a default
90
+ async def _get_or_default(
91
+ txn: Transaction,
92
+ *,
93
+ kbid: str,
94
+ for_update: bool = True,
95
+ ) -> knowledgebox_pb2.KnowledgeBoxVectorSetsConfig:
96
+ key = KB_VECTORSETS.format(kbid=kbid)
97
+ stored = await get_kv_pb(
98
+ txn, key, knowledgebox_pb2.KnowledgeBoxVectorSetsConfig, for_update=for_update
99
+ )
100
+ return stored or knowledgebox_pb2.KnowledgeBoxVectorSetsConfig()
101
+
102
+
103
+ def _find_vectorset(
104
+ kb_vectorsets: knowledgebox_pb2.KnowledgeBoxVectorSetsConfig, vectorset_id: str
105
+ ) -> Optional[int]:
106
+ """Return the position of the vectorset in `vectorsets` or `None` if not found."""
107
+ for idx, vectorset in enumerate(kb_vectorsets.vectorsets):
108
+ if vectorset.vectorset_id == vectorset_id:
109
+ return idx
110
+ return None
@@ -0,0 +1,257 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ import abc
21
+ import logging
22
+ from dataclasses import dataclass
23
+ from typing import Any, Iterator, Optional
24
+
25
+ from pydantic import BaseModel
26
+
27
+ from nucliadb.common.counters import IndexCounts
28
+ from nucliadb.common.external_index_providers.exceptions import ExternalIndexingError
29
+ from nucliadb.common.ids import ParagraphId
30
+ from nucliadb_models.external_index_providers import ExternalIndexProviderType
31
+ from nucliadb_models.search import SCORE_TYPE, TextPosition
32
+ from nucliadb_protos.knowledgebox_pb2 import (
33
+ CreateExternalIndexProviderMetadata,
34
+ StoredExternalIndexProviderMetadata,
35
+ )
36
+ from nucliadb_protos.nodereader_pb2 import SearchRequest
37
+ from nucliadb_protos.noderesources_pb2 import Resource
38
+ from nucliadb_protos.utils_pb2 import VectorSimilarity
39
+ from nucliadb_telemetry.metrics import Observer
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+ manager_observer = Observer("external_index_manager", labels={"operation": "", "provider": ""})
44
+
45
+
46
+ @dataclass
47
+ class VectorsetExternalIndex:
48
+ """
49
+ Used to indicate to external index managers the required metadata
50
+ in order to create an external index for each vectorset
51
+ """
52
+
53
+ vectorset_id: str
54
+ dimension: int
55
+ similarity: VectorSimilarity.ValueType
56
+
57
+
58
+ class TextBlockMatch(BaseModel):
59
+ """
60
+ Model a text block/paragraph retrieved from an external index with all the information
61
+ needed in order to later hydrate retrieval results.
62
+ """
63
+
64
+ paragraph_id: ParagraphId
65
+ position: TextPosition
66
+ score: float
67
+ score_type: SCORE_TYPE
68
+ order: int
69
+ page_with_visual: bool = False
70
+ fuzzy_search: bool
71
+ is_a_table: bool = False
72
+ representation_file: Optional[str] = None
73
+ paragraph_labels: list[str] = []
74
+ field_labels: list[str] = []
75
+ text: Optional[str] = None
76
+
77
+
78
+ class QueryResults(BaseModel):
79
+ """
80
+ Model for the results of a query to an external index provider.
81
+ Must be subclassed by the specific external index provider.
82
+ """
83
+
84
+ type: ExternalIndexProviderType
85
+ results: Any
86
+
87
+ def iter_matching_text_blocks(self) -> Iterator[TextBlockMatch]:
88
+ """
89
+ Iterates over the paragraphs in the results, by decreasing score.
90
+ This should be implemented by the specific external index provider.
91
+ """
92
+ raise NotImplementedError()
93
+
94
+
95
+ class ExternalIndexManager(abc.ABC, metaclass=abc.ABCMeta):
96
+ """
97
+ Base class for the external index providers. Must be subclassed by the specific external index provider.
98
+ """
99
+
100
+ type: ExternalIndexProviderType
101
+ supports_rollover: bool = False
102
+
103
+ def __init__(self, kbid: str):
104
+ self.kbid = kbid
105
+
106
+ @classmethod
107
+ @abc.abstractmethod
108
+ async def create_indexes(
109
+ cls,
110
+ kbid: str,
111
+ create_request: CreateExternalIndexProviderMetadata,
112
+ indexes: list[VectorsetExternalIndex],
113
+ ) -> StoredExternalIndexProviderMetadata: ...
114
+
115
+ @classmethod
116
+ @abc.abstractmethod
117
+ async def delete_indexes(
118
+ cls,
119
+ kbid: str,
120
+ stored: StoredExternalIndexProviderMetadata,
121
+ ) -> None: ...
122
+
123
+ @abc.abstractmethod
124
+ async def rollover_create_indexes(
125
+ self, stored: StoredExternalIndexProviderMetadata
126
+ ) -> StoredExternalIndexProviderMetadata: # pragma: no cover
127
+ """
128
+ Creates the indexes for the rollover process.
129
+ In the event of an error, it should rollback any left over indexes.
130
+ Returns a modified version of the stored external index provider metadata with the new indexes for the rollover.
131
+ """
132
+ ...
133
+
134
+ @abc.abstractmethod
135
+ async def rollover_cutover_indexes(self) -> None: # pragma: no cover
136
+ """
137
+ Cutover the indexes for the rollover process.
138
+ After this operation, the new indexes should be used for queries and the old ones should be deleted.
139
+ """
140
+ ...
141
+
142
+ @classmethod
143
+ def get_index_name(cls) -> str: # pragma: no cover
144
+ """
145
+ Returns the name of the index in the external index provider.
146
+ """
147
+ raise NotImplementedError()
148
+
149
+ async def delete_resource(self, resource_uuid: str) -> None:
150
+ """
151
+ Deletes a resource from the external index provider.
152
+ """
153
+ logger.info(
154
+ "Deleting resource to external index",
155
+ extra={
156
+ "kbid": self.kbid,
157
+ "rid": resource_uuid,
158
+ "provider": self.type.value,
159
+ },
160
+ )
161
+ with manager_observer({"operation": "delete_resource", "provider": self.type.value}):
162
+ await self._delete_resource(resource_uuid)
163
+
164
+ async def index_resource(
165
+ self, resource_uuid: str, resource_data: Resource, to_rollover_indexes: bool = False
166
+ ) -> None:
167
+ """
168
+ Indexes a resource to the external index provider.
169
+ """
170
+ if not self.supports_rollover and to_rollover_indexes:
171
+ logger.info(
172
+ "Indexing to rollover indexes not supported",
173
+ extra={
174
+ "kbid": self.kbid,
175
+ "rid": resource_uuid,
176
+ "provider": self.type.value,
177
+ },
178
+ )
179
+ return
180
+ logger.info(
181
+ "Indexing resource to external index",
182
+ extra={
183
+ "kbid": self.kbid,
184
+ "rid": resource_uuid,
185
+ "provider": self.type.value,
186
+ "rollover": to_rollover_indexes,
187
+ },
188
+ )
189
+ with manager_observer({"operation": "index_resource", "provider": self.type.value}):
190
+ try:
191
+ await self._index_resource(
192
+ resource_uuid, resource_data, to_rollover_indexes=to_rollover_indexes
193
+ )
194
+ except Exception as ex:
195
+ raise ExternalIndexingError() from ex
196
+
197
+ async def get_index_counts(self) -> IndexCounts:
198
+ """
199
+ Returns the index counts for the external index provider.
200
+ """
201
+ logger.debug(
202
+ "Getting index counts from external index",
203
+ extra={
204
+ "kbid": self.kbid,
205
+ "provider": self.type.value,
206
+ },
207
+ )
208
+ with manager_observer({"operation": "get_index_counts", "provider": self.type.value}):
209
+ return await self._get_index_counts()
210
+
211
+ async def query(self, request: SearchRequest) -> QueryResults:
212
+ """
213
+ Queries the external index provider and returns the results.
214
+ """
215
+ logger.info(
216
+ "Querying external index",
217
+ extra={
218
+ "kbid": self.kbid,
219
+ "provider": self.type.value,
220
+ },
221
+ )
222
+ with manager_observer({"operation": "query", "provider": self.type.value}):
223
+ return await self._query(request)
224
+
225
+ @abc.abstractmethod
226
+ async def _delete_resource(self, resource_uuid: str) -> None: # pragma: no cover
227
+ """
228
+ Makes sure that all vectors associated with the resource are deleted from the external index provider.
229
+ """
230
+ ...
231
+
232
+ @abc.abstractmethod
233
+ async def _index_resource(
234
+ self, resource_uuid: str, resource_data: Resource, to_rollover_indexes: bool = False
235
+ ) -> None: # pragma: no cover
236
+ """
237
+ Adapts the Resource (aka brain) to the external index provider's index format and indexes it.
238
+ Params:
239
+ - resource_uuid: the resource's UUID
240
+ - resource_data: the resource index data
241
+ - to_rollover_indexes: whether to index to the rollover indexes or the main indexes
242
+ """
243
+ ...
244
+
245
+ @abc.abstractmethod
246
+ async def _query(self, request: SearchRequest) -> QueryResults: # pragma: no cover
247
+ """
248
+ Adapts the Nucliadb's search request to the external index provider's query format and returns the results.
249
+ """
250
+ ...
251
+
252
+ @abc.abstractmethod
253
+ async def _get_index_counts(self) -> IndexCounts: # pragma: no cover
254
+ """
255
+ Returns the index counts for the external index provider.
256
+ """
257
+ ...
@@ -17,15 +17,16 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from nucliadb.common.cluster.index_node import READ_CONNECTIONS, WRITE_CONNECTIONS
21
- from nucliadb.ingest.cache import clear_ingest_cache
22
20
 
23
21
 
24
- def test_clear_ingest_cache():
25
- READ_CONNECTIONS["addr1"] = "conn1"
26
- WRITE_CONNECTIONS["addr2"] = "conn2"
22
+ class ExternalIndexCreationError(Exception):
23
+ def __init__(self, provider: str, message: str):
24
+ self.provider = provider
25
+ self.message = message
26
+ super().__init__(f"{provider} index creation error: {message}")
27
27
 
28
- clear_ingest_cache()
29
28
 
30
- assert len(READ_CONNECTIONS) == 0
31
- assert len(WRITE_CONNECTIONS) == 0
29
+ class ExternalIndexingError(Exception):
30
+ """
31
+ Raised when an error occurs while indexing a resource in an external index.
32
+ """
@@ -0,0 +1,101 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ from typing import Optional
21
+
22
+ import async_lru
23
+
24
+ from nucliadb.common import datamanagers
25
+ from nucliadb.common.external_index_providers.base import ExternalIndexManager
26
+ from nucliadb.common.external_index_providers.pinecone import PineconeIndexManager
27
+ from nucliadb.common.external_index_providers.settings import settings
28
+ from nucliadb_protos.knowledgebox_pb2 import (
29
+ ExternalIndexProviderType,
30
+ StoredExternalIndexProviderMetadata,
31
+ )
32
+ from nucliadb_utils.utilities import get_endecryptor
33
+
34
+
35
+ async def get_external_index_manager(
36
+ kbid: str, for_rollover: bool = False
37
+ ) -> Optional[ExternalIndexManager]:
38
+ """
39
+ Returns an ExternalIndexManager for the given kbid.
40
+ If for_rollover is True, the ExternalIndexManager returned will include the rollover indexes (if any).
41
+ """
42
+ metadata = await get_external_index_metadata(kbid)
43
+ if metadata is None or metadata.type != ExternalIndexProviderType.PINECONE:
44
+ # Only Pinecone is supported for now
45
+ return None
46
+
47
+ api_key = get_endecryptor().decrypt(metadata.pinecone_config.encrypted_api_key)
48
+ default_vectorset = await get_default_vectorset_id(kbid)
49
+
50
+ rollover_indexes = None
51
+ if for_rollover:
52
+ rollover_metadata = await get_rollover_external_index_metadata(kbid)
53
+ if rollover_metadata is not None:
54
+ rollover_indexes = dict(rollover_metadata.pinecone_config.indexes)
55
+
56
+ return PineconeIndexManager(
57
+ kbid=kbid,
58
+ api_key=api_key,
59
+ indexes=dict(metadata.pinecone_config.indexes),
60
+ upsert_parallelism=settings.pinecone_upsert_parallelism,
61
+ delete_parallelism=settings.pinecone_delete_parallelism,
62
+ upsert_timeout=settings.pinecone_upsert_timeout,
63
+ delete_timeout=settings.pinecone_delete_timeout,
64
+ default_vectorset=default_vectorset,
65
+ rollover_indexes=rollover_indexes,
66
+ )
67
+
68
+
69
+ @async_lru.alru_cache(maxsize=None)
70
+ async def get_external_index_metadata(kbid: str) -> Optional[StoredExternalIndexProviderMetadata]:
71
+ return await datamanagers.atomic.kb.get_external_index_provider_metadata(kbid=kbid)
72
+
73
+
74
+ @async_lru.alru_cache(maxsize=None)
75
+ async def get_default_vectorset_id(kbid: str) -> Optional[str]:
76
+ """
77
+ While we are transitioning to the new vectorset system, we need to take into account
78
+ that KBs that have only one semantic model will have the `vectorset_id` field on BrokerMessage.field_vectors
79
+ set to empty string -- that is the `default` vectorset concept.
80
+ """
81
+ async with datamanagers.with_ro_transaction() as txn:
82
+ vss = []
83
+ async for vs_id, vs_config in datamanagers.vectorsets.iter(txn, kbid=kbid):
84
+ vss.append((vs_id, vs_config))
85
+ if len(vss) == 0:
86
+ # If there is nothing in the vectorsets key on maindb, we use the "__default__" vectorset as id.
87
+ return "__default__"
88
+ if len(vss) == 1:
89
+ # If there is only one vectorset, return it as the default
90
+ return vss[0][0]
91
+ else:
92
+ # If there are multiple vectorsets, we don't have a default
93
+ # and we assume the index messages are explicit about the vectorset
94
+ return None
95
+
96
+
97
+ async def get_rollover_external_index_metadata(
98
+ kbid: str,
99
+ ) -> Optional[StoredExternalIndexProviderMetadata]:
100
+ async with datamanagers.with_ro_transaction() as txn:
101
+ return await datamanagers.rollover.get_kb_rollover_external_index_metadata(txn, kbid=kbid)