nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -403
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +70 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +143 -117
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +37 -128
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -17,40 +17,44 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- import json
21
20
  import uuid
22
- from io import BytesIO
23
- from typing import AsyncIterator, Optional
21
+ from typing import AsyncIterator
24
22
 
23
+ from nucliadb.common import datamanagers
24
+ from nucliadb.common.cluster.exceptions import AlreadyExists, EntitiesGroupNotFound
25
+ from nucliadb.common.cluster.manager import get_index_nodes
26
+ from nucliadb.common.cluster.utils import get_shard_manager
27
+ from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
28
+ from nucliadb.common.external_index_providers.exceptions import ExternalIndexCreationError
29
+ from nucliadb.common.external_index_providers.manager import get_external_index_manager
30
+ from nucliadb.common.maindb.utils import setup_driver
31
+ from nucliadb.ingest import SERVICE_NAME, logger
32
+ from nucliadb.ingest.orm.broker_message import generate_broker_message
33
+ from nucliadb.ingest.orm.entities import EntitiesManager
34
+ from nucliadb.ingest.orm.exceptions import KnowledgeBoxConflict, VectorSetConflict
35
+ from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
36
+ from nucliadb.ingest.orm.processor import Processor, sequence_manager
37
+ from nucliadb.ingest.orm.resource import Resource as ResourceORM
38
+ from nucliadb.ingest.settings import settings
39
+ from nucliadb_protos import nodewriter_pb2, writer_pb2, writer_pb2_grpc
25
40
  from nucliadb_protos.knowledgebox_pb2 import (
26
41
  DeleteKnowledgeBoxResponse,
27
- GCKnowledgeBoxResponse,
28
42
  KnowledgeBoxID,
29
- KnowledgeBoxNew,
30
43
  KnowledgeBoxResponseStatus,
31
44
  KnowledgeBoxUpdate,
32
- Labels,
33
- NewKnowledgeBoxResponse,
34
45
  SemanticModelMetadata,
35
46
  UpdateKnowledgeBoxResponse,
47
+ VectorSetConfig,
36
48
  )
37
- from nucliadb_protos.resources_pb2 import CloudFile
38
49
  from nucliadb_protos.writer_pb2 import (
39
- BinaryData,
40
50
  BrokerMessage,
41
51
  DelEntitiesRequest,
42
- DelLabelsRequest,
43
- ExtractedVectorsWrapper,
44
- FileRequest,
45
- FileUploaded,
52
+ DelVectorSetRequest,
53
+ DelVectorSetResponse,
46
54
  GetEntitiesGroupRequest,
47
55
  GetEntitiesGroupResponse,
48
56
  GetEntitiesRequest,
49
57
  GetEntitiesResponse,
50
- GetLabelSetRequest,
51
- GetLabelSetResponse,
52
- GetLabelsRequest,
53
- GetLabelsResponse,
54
58
  IndexResource,
55
59
  IndexStatus,
56
60
  ListEntitiesGroupsRequest,
@@ -59,43 +63,22 @@ from nucliadb_protos.writer_pb2 import (
59
63
  ListMembersResponse,
60
64
  NewEntitiesGroupRequest,
61
65
  NewEntitiesGroupResponse,
66
+ NewVectorSetRequest,
67
+ NewVectorSetResponse,
62
68
  OpStatusWriter,
63
69
  SetEntitiesRequest,
64
- SetLabelsRequest,
65
- SetVectorsRequest,
66
- SetVectorsResponse,
67
70
  UpdateEntitiesGroupRequest,
68
71
  UpdateEntitiesGroupResponse,
69
- UploadBinaryData,
70
72
  WriterStatusRequest,
71
73
  WriterStatusResponse,
72
74
  )
73
-
74
- from nucliadb import learning_proxy
75
- from nucliadb.common import datamanagers
76
- from nucliadb.common.cluster.exceptions import AlreadyExists, EntitiesGroupNotFound
77
- from nucliadb.common.cluster.manager import get_index_nodes
78
- from nucliadb.common.cluster.utils import get_shard_manager
79
- from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
80
- from nucliadb.common.maindb.utils import setup_driver
81
- from nucliadb.ingest import SERVICE_NAME, logger
82
- from nucliadb.ingest.orm.entities import EntitiesManager
83
- from nucliadb.ingest.orm.exceptions import KnowledgeBoxConflict
84
- from nucliadb.ingest.orm.knowledgebox import KnowledgeBox as KnowledgeBoxORM
85
- from nucliadb.ingest.orm.processor import Processor, sequence_manager
86
- from nucliadb.ingest.orm.resource import Resource as ResourceORM
87
- from nucliadb.ingest.settings import settings
88
- from nucliadb_protos import utils_pb2, writer_pb2, writer_pb2_grpc
89
75
  from nucliadb_telemetry import errors
90
- from nucliadb_utils import const
91
- from nucliadb_utils.settings import is_onprem_nucliadb, running_settings
92
- from nucliadb_utils.storages.storage import Storage, StorageField
76
+ from nucliadb_utils.settings import is_onprem_nucliadb
93
77
  from nucliadb_utils.utilities import (
94
78
  get_partitioning,
95
79
  get_pubsub,
96
80
  get_storage,
97
81
  get_transaction_utility,
98
- has_feature,
99
82
  )
100
83
 
101
84
 
@@ -106,53 +89,64 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
106
89
  async def initialize(self):
107
90
  self.storage = await get_storage(service_name=SERVICE_NAME)
108
91
  self.driver = await setup_driver()
109
- self.proc = Processor(
110
- driver=self.driver, storage=self.storage, pubsub=await get_pubsub()
111
- )
92
+ self.proc = Processor(driver=self.driver, storage=self.storage, pubsub=await get_pubsub())
112
93
  self.shards_manager = get_shard_manager()
113
94
 
114
95
  async def finalize(self): ...
115
96
 
116
- async def SetVectors( # type: ignore
117
- self, request: SetVectorsRequest, context=None
118
- ) -> SetVectorsResponse:
119
- response = SetVectorsResponse()
120
- response.found = True
121
-
122
- async with self.driver.transaction() as txn:
123
- kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
124
- resobj = ResourceORM(txn, self.storage, kbobj, request.rid)
125
-
126
- field = await resobj.get_field(
127
- request.field.field, request.field.field_type, load=True
97
+ async def NewKnowledgeBoxV2(
98
+ self, request: writer_pb2.NewKnowledgeBoxV2Request, context=None
99
+ ) -> writer_pb2.NewKnowledgeBoxV2Response:
100
+ """v2 of KB creation endpoint. Payload has been refactored and cleaned
101
+ up to include only necessary fields. It has also been extended to
102
+ support KB creation with multiple vectorsets
103
+ """
104
+ if is_onprem_nucliadb():
105
+ logger.error(
106
+ "Sorry, this endpoint is only available for hosted. Onprem must use the REST API"
128
107
  )
129
- if field.value is None:
130
- response.found = False
131
- return response
132
-
133
- evw = ExtractedVectorsWrapper()
134
- evw.field.CopyFrom(request.field)
135
- evw.vectors.CopyFrom(request.vectors)
136
- logger.debug(f"Setting {len(request.vectors.vectors.vectors)} vectors")
137
-
138
- try:
139
- await field.set_vectors(evw)
140
- await txn.commit()
141
- except Exception as e:
142
- errors.capture_exception(e)
143
- logger.error("Error in ingest gRPC servicer", exc_info=True)
144
-
145
- return response
108
+ return writer_pb2.NewKnowledgeBoxV2Response(
109
+ status=KnowledgeBoxResponseStatus.ERROR,
110
+ error_message="This endpoint is only available for hosted. Onprem must use the REST API",
111
+ )
112
+ # Hosted KBs are created through backend endpoints. We assume learning
113
+ # configuration has been already created for it and we are given the
114
+ # model metadata in the request
146
115
 
147
- async def NewKnowledgeBox( # type: ignore
148
- self, request: KnowledgeBoxNew, context=None
149
- ) -> NewKnowledgeBoxResponse:
150
116
  try:
151
- kbid = await self.create_kb(request)
152
- logger.info("KB created successfully", extra={"kbid": kbid})
117
+ kbid, _ = await KnowledgeBoxORM.create(
118
+ self.driver,
119
+ kbid=request.kbid,
120
+ slug=request.slug,
121
+ title=request.title,
122
+ description=request.description,
123
+ semantic_models={
124
+ vs.vectorset_id: SemanticModelMetadata(
125
+ similarity_function=vs.similarity,
126
+ vector_dimension=vs.vector_dimension,
127
+ matryoshka_dimensions=vs.matryoshka_dimensions,
128
+ )
129
+ for vs in request.vectorsets
130
+ },
131
+ external_index_provider=request.external_index_provider,
132
+ hidden_resources_enabled=request.hidden_resources_enabled,
133
+ hidden_resources_hide_on_creation=request.hidden_resources_hide_on_creation,
134
+ )
135
+
153
136
  except KnowledgeBoxConflict:
154
137
  logger.info("KB already exists", extra={"slug": request.slug})
155
- return NewKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.CONFLICT)
138
+ return writer_pb2.NewKnowledgeBoxV2Response(status=KnowledgeBoxResponseStatus.CONFLICT)
139
+
140
+ except ExternalIndexCreationError as exc:
141
+ logger.exception(
142
+ "Error creating external index",
143
+ extra={"slug": request.slug, "error": str(exc)},
144
+ )
145
+ return writer_pb2.NewKnowledgeBoxV2Response(
146
+ status=KnowledgeBoxResponseStatus.EXTERNAL_INDEX_PROVIDER_ERROR,
147
+ error_message=exc.message,
148
+ )
149
+
156
150
  except Exception as exc:
157
151
  errors.capture_exception(exc)
158
152
  logger.exception(
@@ -160,101 +154,50 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
160
154
  exc_info=True,
161
155
  extra={"slug": request.slug},
162
156
  )
163
- return NewKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.ERROR)
164
- return NewKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.OK, uuid=kbid)
157
+ return writer_pb2.NewKnowledgeBoxV2Response(status=KnowledgeBoxResponseStatus.ERROR)
165
158
 
166
- async def create_kb(self, request: KnowledgeBoxNew) -> str:
167
- if is_onprem_nucliadb():
168
- return await self._create_kb_onprem(request)
169
159
  else:
170
- return await self._create_kb_hosted(request)
171
-
172
- async def _create_kb_onprem(self, request: KnowledgeBoxNew) -> str:
173
- """
174
- First, try to get the learning configuration for the new knowledge box.
175
- From there we need to extract the semantic model metadata and pass it to the create_kb method.
176
- If the kb creation fails, rollback the learning configuration for the kbid that was just created.
177
- """
178
- kbid = request.forceuuid or str(uuid.uuid4())
179
- release_channel = get_release_channel(request)
180
- lconfig = await learning_proxy.get_configuration(kbid)
181
- lconfig_created = False
182
- if lconfig is None:
183
- if request.learning_config:
184
- # We parse the desired configuration from the request and set it
185
- config = json.loads(request.learning_config)
186
- else:
187
- # We set an empty configuration so that learning chooses the default values.
188
- config = {}
189
- logger.warning(
190
- "No learning configuration provided. Default will be used.",
191
- extra={"kbid": kbid},
192
- )
193
- # NOTE: we rely on learning to return an updated configuration with
194
- # matryoshka settings if they're available
195
- lconfig = await learning_proxy.set_configuration(kbid, config=config)
196
- lconfig_created = True
197
- else:
198
- logger.info("Learning configuration already exists", extra={"kbid": kbid})
199
- try:
200
- await self.proc.create_kb(
201
- request.slug,
202
- request.config,
203
- parse_model_metadata_from_learning_config(lconfig),
204
- forceuuid=kbid,
205
- release_channel=release_channel,
206
- )
207
- return kbid
208
- except Exception:
209
- # Rollback learning config for the kbid that was just created
210
- try:
211
- if lconfig_created:
212
- await learning_proxy.delete_configuration(kbid)
213
- except Exception:
214
- logger.warning(
215
- "Could not rollback learning configuration",
216
- exc_info=True,
217
- extra={"kbid": kbid},
218
- )
219
- raise
220
-
221
- async def _create_kb_hosted(self, request: KnowledgeBoxNew) -> str:
222
- """
223
- For the hosted case, we assume that the learning configuration
224
- is already set and we are given the model metadata in the request.
225
- """
226
- kbid = request.forceuuid or str(uuid.uuid4())
227
- release_channel = get_release_channel(request)
228
- await self.proc.create_kb(
229
- request.slug,
230
- request.config,
231
- parse_model_metadata_from_request(request),
232
- forceuuid=kbid,
233
- release_channel=release_channel,
234
- )
235
- return kbid
160
+ logger.info("KB created successfully", extra={"kbid": kbid})
161
+ return writer_pb2.NewKnowledgeBoxV2Response(status=KnowledgeBoxResponseStatus.OK)
236
162
 
237
163
  async def UpdateKnowledgeBox( # type: ignore
238
164
  self, request: KnowledgeBoxUpdate, context=None
239
165
  ) -> UpdateKnowledgeBoxResponse:
240
- try:
241
- kbid = await self.proc.update_kb(request.uuid, request.slug, request.config)
242
- except KnowledgeBoxNotFound:
166
+ if is_onprem_nucliadb():
167
+ logger.error(
168
+ "Sorry, this endpoint is only available for hosted. Onprem must use the REST API"
169
+ )
243
170
  return UpdateKnowledgeBoxResponse(
244
- status=KnowledgeBoxResponseStatus.NOTFOUND
171
+ status=KnowledgeBoxResponseStatus.ERROR,
245
172
  )
173
+
174
+ try:
175
+ async with self.driver.transaction() as txn:
176
+ kbid = await KnowledgeBoxORM.update(
177
+ txn, uuid=request.uuid, slug=request.slug, config=request.config
178
+ )
179
+ await txn.commit()
180
+ except KnowledgeBoxNotFound:
181
+ return UpdateKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.NOTFOUND)
246
182
  except Exception:
247
- logger.exception("Could not create KB", exc_info=True)
183
+ logger.exception("Could not update KB", exc_info=True)
248
184
  return UpdateKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.ERROR)
249
- return UpdateKnowledgeBoxResponse(
250
- status=KnowledgeBoxResponseStatus.OK, uuid=kbid
251
- )
185
+ return UpdateKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.OK, uuid=kbid)
252
186
 
253
187
  async def DeleteKnowledgeBox( # type: ignore
254
188
  self, request: KnowledgeBoxID, context=None
255
189
  ) -> DeleteKnowledgeBoxResponse:
190
+ if is_onprem_nucliadb():
191
+ logger.error(
192
+ "Sorry, this endpoint is only available for hosted. Onprem must use the REST API"
193
+ )
194
+ return DeleteKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.ERROR)
195
+
256
196
  try:
257
- await self.delete_kb(request)
197
+ kbid = request.uuid
198
+ # learning configuration is automatically removed in nuclia backend for
199
+ # hosted users, we don't need to do it
200
+ await KnowledgeBoxORM.delete(self.driver, kbid=kbid)
258
201
  except KnowledgeBoxNotFound:
259
202
  logger.warning(f"KB not found: kbid={request.uuid}, slug={request.slug}")
260
203
  except Exception:
@@ -262,28 +205,6 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
262
205
  return DeleteKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.ERROR)
263
206
  return DeleteKnowledgeBoxResponse(status=KnowledgeBoxResponseStatus.OK)
264
207
 
265
- async def delete_kb(self, request: KnowledgeBoxID) -> None:
266
- kbid = request.uuid
267
- await self.proc.delete_kb(kbid)
268
- # learning configuration is automatically removed in nuclia backend for
269
- # hosted users, we only need to remove it for onprem
270
- if is_onprem_nucliadb():
271
- try:
272
- await learning_proxy.delete_configuration(kbid)
273
- logger.info("Learning configuration deleted", extra={"kbid": kbid})
274
- except Exception:
275
- logger.exception(
276
- "Unexpected error deleting learning configuration",
277
- exc_info=True,
278
- extra={"kbid": kbid},
279
- )
280
-
281
- async def GCKnowledgeBox( # type: ignore
282
- self, request: KnowledgeBoxID, context=None
283
- ) -> GCKnowledgeBoxResponse:
284
- response = GCKnowledgeBoxResponse()
285
- return response
286
-
287
208
  async def ProcessMessage( # type: ignore
288
209
  self, request_stream: AsyncIterator[BrokerMessage], context=None
289
210
  ):
@@ -301,86 +222,21 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
301
222
  logger.info(f"Processed {message.uuid}")
302
223
  return response
303
224
 
304
- async def SetLabels(self, request: SetLabelsRequest, context=None) -> OpStatusWriter: # type: ignore
305
- async with self.driver.transaction() as txn:
306
- kbobj = await self.proc.get_kb_obj(txn, request.kb)
307
- response = OpStatusWriter()
308
- if kbobj is not None:
309
- try:
310
- await kbobj.set_labelset(request.id, request.labelset)
311
- await txn.commit()
312
- response.status = OpStatusWriter.Status.OK
313
- except Exception as e:
314
- errors.capture_exception(e)
315
- logger.error("Error in ingest gRPC servicer", exc_info=True)
316
- response.status = OpStatusWriter.Status.ERROR
317
- else:
318
- response.status = OpStatusWriter.Status.NOTFOUND
319
- return response
320
-
321
- async def DelLabels(self, request: DelLabelsRequest, context=None) -> OpStatusWriter: # type: ignore
322
- async with self.driver.transaction() as txn:
323
- kbobj = await self.proc.get_kb_obj(txn, request.kb)
324
- response = OpStatusWriter()
325
- if kbobj is not None:
326
- try:
327
- await kbobj.del_labelset(request.id)
328
- await txn.commit()
329
- response.status = OpStatusWriter.Status.OK
330
- except Exception as e:
331
- errors.capture_exception(e)
332
- logger.error("Error in ingest gRPC servicer", exc_info=True)
333
- response.status = OpStatusWriter.Status.ERROR
334
- else:
335
- response.status = OpStatusWriter.Status.NOTFOUND
336
-
337
- return response
338
-
339
- async def GetLabels(self, request: GetLabelsRequest, context=None) -> GetLabelsResponse: # type: ignore
340
- async with self.driver.transaction() as txn:
341
- kbobj = await self.proc.get_kb_obj(txn, request.kb)
342
- labels: Optional[Labels] = None
343
- if kbobj is not None:
344
- labels = await kbobj.get_labels()
345
- response = GetLabelsResponse()
346
- if kbobj is None:
347
- response.status = GetLabelsResponse.Status.NOTFOUND
348
- else:
349
- response.kb.uuid = kbobj.kbid
350
- if labels is not None:
351
- response.labels.CopyFrom(labels)
352
-
353
- return response
354
-
355
- async def GetLabelSet( # type: ignore
356
- self, request: GetLabelSetRequest, context=None
357
- ) -> GetLabelSetResponse:
358
- async with self.driver.transaction() as txn:
359
- kbobj = await self.proc.get_kb_obj(txn, request.kb)
360
- response = GetLabelSetResponse()
361
- if kbobj is not None:
362
- await kbobj.get_labelset(request.labelset, response)
363
- response.kb.uuid = kbobj.kbid
364
- response.status = GetLabelSetResponse.Status.OK
365
- else:
366
- response.status = GetLabelSetResponse.Status.NOTFOUND
367
- return response
368
-
369
225
  async def NewEntitiesGroup( # type: ignore
370
226
  self, request: NewEntitiesGroupRequest, context=None
371
227
  ) -> NewEntitiesGroupResponse:
372
228
  response = NewEntitiesGroupResponse()
373
- async with self.driver.transaction() as txn:
374
- kbobj = await self.proc.get_kb_obj(txn, request.kb)
229
+ async with self.driver.transaction(read_only=True) as ro_txn:
230
+ kbobj = await self.proc.get_kb_obj(ro_txn, request.kb)
375
231
  if kbobj is None:
376
232
  response.status = NewEntitiesGroupResponse.Status.KB_NOT_FOUND
377
233
  return response
378
234
 
235
+ async with self.driver.transaction() as txn:
236
+ kbobj.txn = txn
379
237
  entities_manager = EntitiesManager(kbobj, txn)
380
238
  try:
381
- await entities_manager.create_entities_group(
382
- request.group, request.entities
383
- )
239
+ await entities_manager.create_entities_group(request.group, request.entities)
384
240
  except AlreadyExists:
385
241
  response.status = NewEntitiesGroupResponse.Status.ALREADY_EXISTS
386
242
  return response
@@ -393,9 +249,8 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
393
249
  self, request: GetEntitiesRequest, context=None
394
250
  ) -> GetEntitiesResponse:
395
251
  response = GetEntitiesResponse()
396
- async with self.driver.transaction() as txn:
252
+ async with self.driver.transaction(read_only=True) as txn:
397
253
  kbobj = await self.proc.get_kb_obj(txn, request.kb)
398
-
399
254
  if kbobj is None:
400
255
  response.status = GetEntitiesResponse.Status.NOTFOUND
401
256
  return response
@@ -416,9 +271,8 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
416
271
  self, request: ListEntitiesGroupsRequest, context=None
417
272
  ) -> ListEntitiesGroupsResponse:
418
273
  response = ListEntitiesGroupsResponse()
419
- async with self.driver.transaction() as txn:
274
+ async with self.driver.transaction(read_only=True) as txn:
420
275
  kbobj = await self.proc.get_kb_obj(txn, request.kb)
421
-
422
276
  if kbobj is None:
423
277
  response.status = ListEntitiesGroupsResponse.Status.NOTFOUND
424
278
  return response
@@ -441,7 +295,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
441
295
  self, request: GetEntitiesGroupRequest, context=None
442
296
  ) -> GetEntitiesGroupResponse:
443
297
  response = GetEntitiesGroupResponse()
444
- async with self.driver.transaction() as txn:
298
+ async with self.driver.transaction(read_only=True) as txn:
445
299
  kbobj = await self.proc.get_kb_obj(txn, request.kb)
446
300
  if kbobj is None:
447
301
  response.status = GetEntitiesGroupResponse.Status.KB_NOT_FOUND
@@ -449,9 +303,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
449
303
 
450
304
  entities_manager = EntitiesManager(kbobj, txn)
451
305
  try:
452
- entities_group = await entities_manager.get_entities_group(
453
- request.group
454
- )
306
+ entities_group = await entities_manager.get_entities_group(request.group)
455
307
  except Exception as e:
456
308
  errors.capture_exception(e)
457
309
  logger.error("Error in ingest gRPC servicer", exc_info=True)
@@ -459,9 +311,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
459
311
  else:
460
312
  response.kb.uuid = kbobj.kbid
461
313
  if entities_group is None:
462
- response.status = (
463
- GetEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND
464
- )
314
+ response.status = GetEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND
465
315
  else:
466
316
  response.status = GetEntitiesGroupResponse.Status.OK
467
317
  response.group.CopyFrom(entities_group)
@@ -470,17 +320,17 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
470
320
 
471
321
  async def SetEntities(self, request: SetEntitiesRequest, context=None) -> OpStatusWriter: # type: ignore
472
322
  response = OpStatusWriter()
473
- async with self.driver.transaction() as txn:
474
- kbobj = await self.proc.get_kb_obj(txn, request.kb)
323
+ async with self.driver.transaction(read_only=True) as ro_txn:
324
+ kbobj = await self.proc.get_kb_obj(ro_txn, request.kb)
475
325
  if kbobj is None:
476
326
  response.status = OpStatusWriter.Status.NOTFOUND
477
327
  return response
478
328
 
329
+ async with self.driver.transaction() as txn:
330
+ kbobj.txn = txn
479
331
  entities_manager = EntitiesManager(kbobj, txn)
480
332
  try:
481
- await entities_manager.set_entities_group(
482
- request.group, request.entities
483
- )
333
+ await entities_manager.set_entities_group(request.group, request.entities)
484
334
  except Exception as e:
485
335
  errors.capture_exception(e)
486
336
  logger.error("Error in ingest gRPC servicer", exc_info=True)
@@ -494,14 +344,15 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
494
344
  self, request: UpdateEntitiesGroupRequest, context=None
495
345
  ) -> UpdateEntitiesGroupResponse:
496
346
  response = UpdateEntitiesGroupResponse()
497
- async with self.driver.transaction() as txn:
498
- kbobj = await self.proc.get_kb_obj(txn, request.kb)
347
+ async with self.driver.transaction(read_only=True) as ro_txn:
348
+ kbobj = await self.proc.get_kb_obj(ro_txn, request.kb)
499
349
  if kbobj is None:
500
350
  response.status = UpdateEntitiesGroupResponse.Status.KB_NOT_FOUND
501
351
  return response
502
352
 
353
+ async with self.driver.transaction() as txn:
354
+ kbobj.txn = txn
503
355
  entities_manager = EntitiesManager(kbobj, txn)
504
-
505
356
  try:
506
357
  await entities_manager.set_entities_group_metadata(
507
358
  request.group,
@@ -512,9 +363,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
512
363
  await entities_manager.update_entities(request.group, updates)
513
364
  await entities_manager.delete_entities(request.group, request.delete) # type: ignore
514
365
  except EntitiesGroupNotFound:
515
- response.status = (
516
- UpdateEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND
517
- )
366
+ response.status = UpdateEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND
518
367
  return response
519
368
 
520
369
  await txn.commit()
@@ -523,12 +372,15 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
523
372
 
524
373
  async def DelEntities(self, request: DelEntitiesRequest, context=None) -> OpStatusWriter: # type: ignore
525
374
  response = OpStatusWriter()
526
- async with self.driver.transaction() as txn:
527
- kbobj = await self.proc.get_kb_obj(txn, request.kb)
375
+
376
+ async with self.driver.transaction(read_only=True) as ro_txn:
377
+ kbobj = await self.proc.get_kb_obj(ro_txn, request.kb)
528
378
  if kbobj is None:
529
379
  response.status = OpStatusWriter.Status.NOTFOUND
530
380
  return response
531
381
 
382
+ async with self.driver.transaction() as txn:
383
+ kbobj.txn = txn
532
384
  entities_manager = EntitiesManager(kbobj, txn)
533
385
  try:
534
386
  await entities_manager.delete_entities_group(request.group)
@@ -546,7 +398,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
546
398
  ) -> WriterStatusResponse:
547
399
  logger.info("Status Call")
548
400
  response = WriterStatusResponse()
549
- async with self.driver.transaction() as txn:
401
+ async with self.driver.transaction(read_only=True) as txn:
550
402
  async for _, slug in datamanagers.kb.get_kbs(txn):
551
403
  response.knowledgeboxes.append(slug)
552
404
 
@@ -580,7 +432,7 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
580
432
  async with self.driver.transaction() as txn:
581
433
  kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
582
434
  resobj = ResourceORM(txn, self.storage, kbobj, request.rid)
583
- bm = await resobj.generate_broker_message()
435
+ bm = await generate_broker_message(resobj)
584
436
  transaction = get_transaction_utility()
585
437
  partitioning = get_partitioning()
586
438
  partition = partitioning.generate_partition(request.kbid, request.rid)
@@ -595,39 +447,25 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
595
447
  kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
596
448
  resobj = ResourceORM(txn, self.storage, kbobj, request.rid)
597
449
  resobj.disable_vectors = not request.reindex_vectors
598
-
599
- brain = await resobj.generate_index_message()
600
- shard_id = await datamanagers.resources.get_resource_shard_id(
601
- txn, kbid=request.kbid, rid=request.rid
602
- )
603
- shard: Optional[writer_pb2.ShardObject] = None
604
- if shard_id is not None:
605
- shard = await kbobj.get_resource_shard(shard_id)
606
-
607
- if shard is None:
608
- shard = await self.shards_manager.get_current_active_shard(
609
- txn, request.kbid
450
+ brain = await resobj.generate_index_message(reindex=True)
451
+ shard = await self.proc.get_or_assign_resource_shard(txn, kbobj, request.rid)
452
+ index_message = brain.brain
453
+ external_index_manager = await get_external_index_manager(kbid=request.kbid)
454
+ if external_index_manager is not None:
455
+ await self.proc.external_index_add_resource(
456
+ request.kbid,
457
+ request.rid,
458
+ index_message,
610
459
  )
611
- if shard is None:
612
- # no shard currently exists, create one
613
- shard = await self.shards_manager.create_shard_by_kbid(
614
- txn, request.kbid
615
- )
616
-
617
- await datamanagers.resources.set_resource_shard_id(
618
- txn, kbid=request.kbid, rid=request.rid, shard=shard.shard
619
- )
620
-
621
- if shard is not None:
460
+ else:
622
461
  await self.shards_manager.add_resource(
623
462
  shard,
624
- brain.brain,
463
+ index_message,
625
464
  0,
626
465
  partition=self.partitions[0],
627
466
  kb=request.kbid,
628
467
  reindex_id=uuid.uuid4().hex,
629
468
  )
630
-
631
469
  response = IndexStatus()
632
470
  return response
633
471
  except Exception as e:
@@ -635,116 +473,51 @@ class WriterServicer(writer_pb2_grpc.WriterServicer):
635
473
  logger.error("Error in ingest gRPC servicer", exc_info=True)
636
474
  raise
637
475
 
638
- async def DownloadFile(self, request: FileRequest, context=None):
639
- async for data in self.storage.download(request.bucket, request.key):
640
- yield BinaryData(data=data)
641
-
642
- async def UploadFile(self, request: AsyncIterator[UploadBinaryData], context=None) -> FileUploaded: # type: ignore
643
- data: UploadBinaryData
644
-
645
- destination: Optional[StorageField] = None
646
- cf = CloudFile()
647
- data = await request.__anext__()
648
- if data.HasField("metadata"):
649
- bucket = self.storage.get_bucket_name(data.metadata.kbid)
650
- destination = self.storage.field_klass(
651
- storage=self.storage, bucket=bucket, fullkey=data.metadata.key
652
- )
653
- cf.content_type = data.metadata.content_type
654
- cf.filename = data.metadata.filename
655
- cf.size = data.metadata.size
656
- else:
657
- raise AttributeError("Metadata not found")
658
-
659
- async def generate_buffer(
660
- storage: Storage, request: AsyncIterator[UploadBinaryData] # type: ignore
661
- ):
662
- # Storage requires uploading chunks of a specified size, this is
663
- # why we need to have an intermediate buffer
664
- buf = BytesIO()
665
- async for chunk in request:
666
- if not chunk.HasField("payload"):
667
- raise AttributeError("Payload not found")
668
- buf.write(chunk.payload)
669
- while buf.tell() > storage.chunk_size:
670
- buf.seek(0)
671
- data = buf.read(storage.chunk_size)
672
- if len(data):
673
- yield data
674
- old_data = buf.read()
675
- buf = BytesIO()
676
- buf.write(old_data)
677
- buf.seek(0)
678
- data = buf.read()
679
- if len(data):
680
- yield data
681
-
682
- if destination is None:
683
- raise AttributeError("No destination file")
684
- await self.storage.uploaditerator(
685
- generate_buffer(self.storage, request), destination, cf
476
+ async def NewVectorSet( # type: ignore
477
+ self, request: NewVectorSetRequest, context=None
478
+ ) -> NewVectorSetResponse:
479
+ config = VectorSetConfig(
480
+ vectorset_id=request.vectorset_id,
481
+ vectorset_index_config=nodewriter_pb2.VectorIndexConfig(
482
+ similarity=request.similarity,
483
+ normalize_vectors=request.normalize_vectors,
484
+ vector_type=request.vector_type,
485
+ vector_dimension=request.vector_dimension,
486
+ ),
487
+ matryoshka_dimensions=request.matryoshka_dimensions,
686
488
  )
687
- result = FileUploaded()
688
- return result
689
-
690
-
691
- LEARNING_SIMILARITY_FUNCTION_TO_PROTO = {
692
- "cosine": utils_pb2.VectorSimilarity.COSINE,
693
- "dot": utils_pb2.VectorSimilarity.DOT,
694
- }
695
-
696
-
697
- def parse_model_metadata_from_learning_config(
698
- lconfig: learning_proxy.LearningConfiguration,
699
- ) -> SemanticModelMetadata:
700
- model = SemanticModelMetadata()
701
- model.similarity_function = LEARNING_SIMILARITY_FUNCTION_TO_PROTO[
702
- lconfig.semantic_vector_similarity
703
- ]
704
- if lconfig.semantic_vector_size is not None:
705
- model.vector_dimension = lconfig.semantic_vector_size
706
- else:
707
- logger.warning("Vector dimension not set!")
708
- if lconfig.semantic_matryoshka_dimensions is not None:
709
- model.matryoshka_dimensions.extend(lconfig.semantic_matryoshka_dimensions)
710
- return model
711
-
712
-
713
- def parse_model_metadata_from_request(
714
- request: KnowledgeBoxNew,
715
- ) -> SemanticModelMetadata:
716
- model = SemanticModelMetadata()
717
- model.similarity_function = request.similarity
718
- if request.HasField("vector_dimension"):
719
- model.vector_dimension = request.vector_dimension
720
- else:
721
- logger.warning(
722
- "Vector dimension not set. Will be detected automatically on the first vector set."
723
- )
724
-
725
- if len(request.matryoshka_dimensions) > 0:
726
- if model.vector_dimension not in request.matryoshka_dimensions:
727
- logger.warning(
728
- "Vector dimensions is inconsistent with matryoshka dimensions! Ignoring them",
729
- extra={
730
- "kbid": request.forceuuid,
731
- "kbslug": request.slug,
732
- },
733
- )
489
+ response = NewVectorSetResponse()
490
+ try:
491
+ async with self.driver.transaction() as txn:
492
+ kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
493
+ await kbobj.create_vectorset(config)
494
+ await txn.commit()
495
+ except VectorSetConflict as exc:
496
+ response.status = NewVectorSetResponse.Status.ERROR
497
+ response.details = str(exc)
498
+ except Exception as exc:
499
+ errors.capture_exception(exc)
500
+ logger.error("Error in ingest gRPC while creating a vectorset", exc_info=True)
501
+ response.status = NewVectorSetResponse.Status.ERROR
502
+ response.details = str(exc)
734
503
  else:
735
- model.matryoshka_dimensions.extend(request.matryoshka_dimensions)
736
-
737
- return model
738
-
504
+ response.status = NewVectorSetResponse.Status.OK
505
+ return response
739
506
 
740
- def get_release_channel(request: KnowledgeBoxNew) -> utils_pb2.ReleaseChannel.ValueType:
741
- """
742
- Set channel to Experimental if specified in the grpc request or if the requested
743
- slug has the experimental_kb feature enabled in stage environment.
744
- """
745
- release_channel = request.release_channel
746
- if running_settings.running_environment == "stage" and has_feature(
747
- const.Features.EXPERIMENTAL_KB, context={"slug": request.slug}
748
- ):
749
- release_channel = utils_pb2.ReleaseChannel.EXPERIMENTAL
750
- return release_channel
507
+ async def DelVectorSet( # type: ignore
508
+ self, request: DelVectorSetRequest, context=None
509
+ ) -> DelVectorSetResponse:
510
+ response = DelVectorSetResponse()
511
+ try:
512
+ async with self.driver.transaction() as txn:
513
+ kbobj = KnowledgeBoxORM(txn, self.storage, request.kbid)
514
+ await kbobj.delete_vectorset(request.vectorset_id)
515
+ await txn.commit()
516
+ except Exception as exc:
517
+ errors.capture_exception(exc)
518
+ logger.error("Error in ingest gRPC while deleting a vectorset", exc_info=True)
519
+ response.status = DelVectorSetResponse.Status.ERROR
520
+ response.details = str(exc)
521
+ else:
522
+ response.status = DelVectorSetResponse.Status.OK
523
+ return response