nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -18,6 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import enum
21
+ from typing import Optional
21
22
 
22
23
  from pydantic import Field
23
24
  from pydantic_settings import BaseSettings
@@ -85,6 +86,11 @@ class Settings(BaseSettings):
85
86
  cluster_discovery_kubernetes_selector: str = "appType=node"
86
87
  cluster_discovery_manual_addresses: list[str] = []
87
88
 
89
+ nidx_api_address: Optional[str] = Field(default=None, description="NIDX gRPC API address")
90
+ nidx_searcher_address: Optional[str] = Field(
91
+ default=None, description="NIDX gRPC searcher API address"
92
+ )
93
+
88
94
 
89
95
  settings = Settings()
90
96
 
@@ -30,6 +30,7 @@ from nucliadb_protos.nodereader_pb2 import (
30
30
  DocumentItem,
31
31
  EdgeList,
32
32
  GetShardRequest,
33
+ IdCollection,
33
34
  ParagraphItem,
34
35
  ParagraphSearchRequest,
35
36
  ParagraphSearchResponse,
@@ -46,15 +47,13 @@ from nucliadb_protos.noderesources_pb2 import (
46
47
  EmptyResponse,
47
48
  Resource,
48
49
  ResourceID,
49
- )
50
- from nucliadb_protos.noderesources_pb2 import Shard as NodeResourcesShard
51
- from nucliadb_protos.noderesources_pb2 import (
52
50
  ShardCreated,
53
51
  ShardId,
54
52
  ShardIds,
55
53
  VectorSetID,
56
54
  VectorSetList,
57
55
  )
56
+ from nucliadb_protos.noderesources_pb2 import Shard as NodeResourcesShard
58
57
  from nucliadb_protos.nodewriter_pb2 import NewShardRequest, OpStatus
59
58
 
60
59
  from ..settings import settings
@@ -68,8 +67,7 @@ except ImportError: # pragma: no cover
68
67
  IndexNodeException = Exception
69
68
 
70
69
  try:
71
- from nucliadb_node_binding import NodeReader # type: ignore
72
- from nucliadb_node_binding import NodeWriter # type: ignore
70
+ from nucliadb_node_binding import NodeReader, NodeWriter
73
71
  except ImportError: # pragma: no cover
74
72
  NodeReader = None
75
73
  NodeWriter = None
@@ -80,15 +78,11 @@ class StandaloneReaderWrapper:
80
78
 
81
79
  def __init__(self):
82
80
  if NodeReader is None:
83
- raise ImportError(
84
- "NucliaDB index node bindings are not installed (reader not found)"
85
- )
81
+ raise ImportError("NucliaDB index node bindings are not installed (reader not found)")
86
82
  self.reader = NodeReader()
87
83
  self.executor = ThreadPoolExecutor(settings.local_reader_threads)
88
84
 
89
- async def Search(
90
- self, request: SearchRequest, retry: bool = False
91
- ) -> SearchResponse:
85
+ async def Search(self, request: SearchRequest, retry: bool = False) -> SearchResponse:
92
86
  try:
93
87
  loop = asyncio.get_running_loop()
94
88
  result = await loop.run_in_executor(
@@ -112,30 +106,6 @@ class StandaloneReaderWrapper:
112
106
  else:
113
107
  raise
114
108
 
115
- async def ParagraphSearch(
116
- self, request: ParagraphSearchRequest
117
- ) -> ParagraphSearchResponse:
118
- loop = asyncio.get_running_loop()
119
- result = await loop.run_in_executor(
120
- self.executor, self.reader.paragraph_search, request.SerializeToString()
121
- )
122
- pb_bytes = bytes(result)
123
- pb = ParagraphSearchResponse()
124
- pb.ParseFromString(pb_bytes)
125
- return pb
126
-
127
- async def RelationSearch(
128
- self, request: RelationSearchRequest
129
- ) -> RelationSearchResponse:
130
- loop = asyncio.get_running_loop()
131
- result = await loop.run_in_executor(
132
- self.executor, self.reader.relation_search, request.SerializeToString()
133
- )
134
- pb_bytes = bytes(result)
135
- pb = RelationSearchResponse()
136
- pb.ParseFromString(pb_bytes)
137
- return pb
138
-
139
109
  async def GetShard(self, request: GetShardRequest) -> NodeResourcesShard:
140
110
  loop = asyncio.get_running_loop()
141
111
  result = await loop.run_in_executor(
@@ -200,9 +170,7 @@ class StandaloneReaderWrapper:
200
170
  raise exception
201
171
  await loop.run_in_executor(self.executor, t1.join)
202
172
 
203
- async def Paragraphs(
204
- self, stream_request: StreamRequest
205
- ) -> AsyncIterator[ParagraphItem]:
173
+ async def Paragraphs(self, stream_request: StreamRequest) -> AsyncIterator[ParagraphItem]:
206
174
  loop = asyncio.get_running_loop()
207
175
  q: asyncio.Queue[ParagraphItem] = asyncio.Queue(1)
208
176
  exception = None
@@ -248,30 +216,15 @@ class StandaloneReaderWrapper:
248
216
  edge_list.ParseFromString(pb_bytes)
249
217
  return edge_list
250
218
 
251
-
252
- async def Search(self, request: SearchRequest, retry: bool = False) -> SearchResponse:
253
- try:
219
+ async def VectorIds(self, request: VectorSetID) -> IdCollection:
254
220
  loop = asyncio.get_running_loop()
255
221
  result = await loop.run_in_executor(
256
- self.executor, self.reader.search, request.SerializeToString()
222
+ self.executor, self.reader.vector_ids, request.SerializeToString()
257
223
  )
258
224
  pb_bytes = bytes(result)
259
- pb = SearchResponse()
260
- pb.ParseFromString(pb_bytes)
261
- return pb
262
- except IndexNodeException as exc:
263
- if "IO error" not in str(exc):
264
- # ignore any other error
265
- raise
266
-
267
- # try some mitigations...
268
- logger.error(f"IndexNodeException in Search: {request}", exc_info=True)
269
- if not retry:
270
- # reinit?
271
- self.reader = NodeReader()
272
- return await self.Search(request, retry=True)
273
- else:
274
- raise
225
+ ids = IdCollection()
226
+ ids.ParseFromString(pb_bytes)
227
+ return ids
275
228
 
276
229
 
277
230
  class StandaloneWriterWrapper:
@@ -280,9 +233,7 @@ class StandaloneWriterWrapper:
280
233
  def __init__(self):
281
234
  os.makedirs(settings.data_path, exist_ok=True)
282
235
  if NodeWriter is None:
283
- raise ImportError(
284
- "NucliaDB index node bindings are not installed (writer not found)"
285
- )
236
+ raise ImportError("NucliaDB index node bindings are not installed (writer not found)")
286
237
  self.writer = NodeWriter()
287
238
  self.executor = ThreadPoolExecutor(settings.local_writer_threads)
288
239
 
@@ -369,9 +320,7 @@ class StandaloneWriterWrapper:
369
320
 
370
321
  async def GC(self, request: ShardId) -> EmptyResponse:
371
322
  loop = asyncio.get_running_loop()
372
- resp = await loop.run_in_executor(
373
- self.executor, self.writer.gc, request.SerializeToString()
374
- )
323
+ resp = await loop.run_in_executor(self.executor, self.writer.gc, request.SerializeToString())
375
324
  pb_bytes = bytes(resp)
376
325
  op_status = EmptyResponse()
377
326
  op_status.ParseFromString(pb_bytes)
@@ -20,10 +20,7 @@
20
20
  from typing import Any, Optional
21
21
 
22
22
  from nucliadb.common.cluster.base import AbstractIndexNode
23
- from nucliadb.common.cluster.grpc_node_dummy import ( # type: ignore
24
- DummyReaderStub,
25
- DummyWriterStub,
26
- )
23
+ from nucliadb.common.cluster.grpc_node_dummy import DummyReaderStub, DummyWriterStub
27
24
  from nucliadb.common.cluster.settings import settings as cluster_settings
28
25
  from nucliadb.common.cluster.standalone import grpc_node_binding
29
26
  from nucliadb_protos import standalone_pb2, standalone_pb2_grpc
@@ -79,7 +76,7 @@ class ProxyCallerWrapper:
79
76
  else:
80
77
  grpc_address = address
81
78
  self._channel = get_traced_grpc_channel(grpc_address, "standalone_proxy")
82
- self._stub = standalone_pb2_grpc.StandaloneClusterServiceStub(self._channel) # type: ignore
79
+ self._stub = standalone_pb2_grpc.StandaloneClusterServiceStub(self._channel)
83
80
 
84
81
  def __getattr__(self, name):
85
82
  async def call(request):
@@ -95,9 +92,7 @@ class ProxyCallerWrapper:
95
92
  else:
96
93
  raise NotImplementedError(f"Unknown type {self._type}")
97
94
  except KeyError:
98
- raise NotImplementedError(
99
- f"Unknown method for type {self._type}: {name}"
100
- )
95
+ raise NotImplementedError(f"Unknown method for type {self._type}: {name}")
101
96
  return_value = return_type()
102
97
  return_value.ParseFromString(resp.payload)
103
98
  return return_value
@@ -116,9 +111,7 @@ class ProxyStandaloneIndexNode(StandaloneIndexNode):
116
111
  available_disk: int,
117
112
  dummy: bool = False,
118
113
  ):
119
- super().__init__(
120
- id, address, shard_count, available_disk=available_disk, dummy=dummy
121
- )
114
+ super().__init__(id, address, shard_count, available_disk=available_disk, dummy=dummy)
122
115
  if dummy:
123
116
  return
124
117
 
@@ -32,9 +32,7 @@ from nucliadb_protos import standalone_pb2, standalone_pb2_grpc
32
32
  from nucliadb_utils.grpc import get_traced_grpc_server
33
33
 
34
34
 
35
- class StandaloneClusterServiceServicer(
36
- standalone_pb2_grpc.StandaloneClusterServiceServicer
37
- ):
35
+ class StandaloneClusterServiceServicer(standalone_pb2_grpc.StandaloneClusterServiceServicer):
38
36
  @backoff.on_exception(backoff.expo, (AioRpcError,), max_time=60)
39
37
  async def NodeAction( # type: ignore
40
38
  self, request: standalone_pb2.NodeActionRequest, context
@@ -61,9 +59,7 @@ class StandaloneClusterServiceServicer(
61
59
  self, request: standalone_pb2.NodeInfoRequest, context
62
60
  ) -> standalone_pb2.NodeInfoResponse:
63
61
  index_node = get_self()
64
- index_node.shard_count = len(
65
- os.listdir(os.path.join(cluster_settings.data_path, "shards"))
66
- )
62
+ index_node.shard_count = len(os.listdir(os.path.join(cluster_settings.data_path, "shards")))
67
63
  total_disk, _, available_disk = shutil.disk_usage(cluster_settings.data_path)
68
64
  return standalone_pb2.NodeInfoResponse(
69
65
  id=index_node.id,
@@ -56,9 +56,7 @@ def get_self() -> StandaloneIndexNode:
56
56
  make another grpc request since this node can service it directly.
57
57
  """
58
58
  if not is_index_node():
59
- raise Exception(
60
- "This node is not an Index Node. You should not reach this code path."
61
- )
59
+ raise Exception("This node is not an Index Node. You should not reach this code path.")
62
60
  global _SELF_INDEX_NODE
63
61
  node_id = get_standalone_node_id()
64
62
  if _SELF_INDEX_NODE is None or node_id != _SELF_INDEX_NODE.id:
@@ -68,9 +66,7 @@ def get_self() -> StandaloneIndexNode:
68
66
  host = f"{hn}.{ns}"
69
67
  else:
70
68
  host = gethostname()
71
- _SELF_INDEX_NODE = StandaloneIndexNode(
72
- id=node_id, address=host, shard_count=0, available_disk=0
73
- )
69
+ _SELF_INDEX_NODE = StandaloneIndexNode(id=node_id, address=host, shard_count=0, available_disk=0)
74
70
  try:
75
71
  _, _, available_disk = shutil.disk_usage(cluster_settings.data_path)
76
72
  _SELF_INDEX_NODE.available_disk = available_disk
@@ -37,7 +37,8 @@ from nucliadb.common.cluster.standalone.service import (
37
37
  start_grpc as start_standalone_grpc,
38
38
  )
39
39
  from nucliadb.common.cluster.standalone.utils import is_index_node
40
- from nucliadb_protos import noderesources_pb2, writer_pb2
40
+ from nucliadb.ingest.orm.resource import Resource
41
+ from nucliadb_protos import nodereader_pb2, writer_pb2
41
42
  from nucliadb_utils import const
42
43
  from nucliadb_utils.settings import is_onprem_nucliadb
43
44
  from nucliadb_utils.utilities import Utility, clean_utility, get_utility, set_utility
@@ -119,38 +120,44 @@ async def wait_for_node(app_context: ApplicationContext, node_id: str) -> None:
119
120
  await asyncio.sleep(sleep)
120
121
 
121
122
 
122
- @backoff.on_exception(
123
- backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=8
124
- )
123
+ async def get_resource(kbid: str, resource_id: str) -> Optional[Resource]:
124
+ async with datamanagers.with_ro_transaction() as txn:
125
+ return await datamanagers.resources.get_resource(txn, kbid=kbid, rid=resource_id)
126
+
127
+
128
+ @backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=8)
129
+ async def get_resource_index_message(kbid: str, resource_id: str) -> Optional[nodereader_pb2.Resource]:
130
+ async with datamanagers.with_ro_transaction() as txn:
131
+ resource = await datamanagers.resources.get_resource(txn, kbid=kbid, rid=resource_id)
132
+ if resource is None:
133
+ logger.warning(
134
+ "Resource not found while indexing, skipping",
135
+ extra={"kbid": kbid, "resource_id": resource_id},
136
+ )
137
+ return None
138
+ resource_index_message = (await resource.generate_index_message(reindex=False)).brain
139
+ return resource_index_message
140
+
141
+
142
+ @backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=8)
125
143
  async def index_resource_to_shard(
126
144
  app_context: ApplicationContext,
127
145
  kbid: str,
128
146
  resource_id: str,
129
147
  shard: writer_pb2.ShardObject,
130
- ) -> Optional[noderesources_pb2.Resource]:
148
+ resource_index_message: Optional[nodereader_pb2.Resource] = None,
149
+ ) -> None:
131
150
  logger.info("Indexing resource", extra={"kbid": kbid, "resource_id": resource_id})
132
-
133
151
  sm = app_context.shard_manager
134
152
  partitioning = app_context.partitioning
135
153
 
136
- async with datamanagers.with_transaction() as txn:
137
- resource_index_message = (
138
- await datamanagers.resources.get_resource_index_message(
139
- txn, kbid=kbid, rid=resource_id
140
- )
141
- )
142
-
143
154
  if resource_index_message is None:
144
- logger.warning(
145
- "Resource index message not found while indexing, skipping",
146
- extra={"kbid": kbid, "resource_id": resource_id},
147
- )
148
- return None
155
+ resource_index_message = await get_resource_index_message(kbid, resource_id)
156
+ if resource_index_message is None:
157
+ return
158
+
149
159
  partition = partitioning.generate_partition(kbid, resource_id)
150
- await sm.add_resource(
151
- shard, resource_index_message, txid=-1, partition=str(partition), kb=kbid
152
- )
153
- return resource_index_message
160
+ await sm.add_resource(shard, resource_index_message, txid=-1, partition=str(partition), kb=kbid)
154
161
 
155
162
 
156
163
  async def delete_resource_from_shard(
@@ -0,0 +1,20 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ AVG_PARAGRAPH_SIZE_BYTES = 10_000
@@ -24,6 +24,7 @@ from nucliadb.common.cluster.settings import in_standalone_mode
24
24
  from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
25
25
  from nucliadb.common.maindb.driver import Driver
26
26
  from nucliadb.common.maindb.utils import setup_driver, teardown_driver
27
+ from nucliadb.common.nidx import start_nidx_utility, stop_nidx_utility
27
28
  from nucliadb_utils.indexing import IndexingUtility
28
29
  from nucliadb_utils.nats import NatsConnectionManager
29
30
  from nucliadb_utils.partition import PartitionUtility
@@ -78,11 +79,13 @@ class ApplicationContext:
78
79
  )
79
80
  self.indexing = await start_indexing_utility()
80
81
  self.transaction = await start_transaction_utility(self.service_name)
82
+ self.nidx = await start_nidx_utility()
81
83
 
82
84
  async def finalize(self) -> None:
83
85
  if not self._initialized:
84
86
  return
85
87
 
88
+ await stop_nidx_utility()
86
89
  await stop_transaction_utility()
87
90
  if not in_standalone_mode():
88
91
  await stop_indexing_utility()
@@ -18,25 +18,28 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
+ from contextlib import asynccontextmanager
22
+
21
23
  from fastapi import FastAPI
22
24
  from starlette.routing import Mount
23
25
 
24
26
  from nucliadb.common.context import ApplicationContext
25
27
 
26
28
 
27
- def set_app_context(app: FastAPI):
29
+ @asynccontextmanager
30
+ async def inject_app_context(app: FastAPI):
28
31
  context = ApplicationContext()
29
32
 
30
33
  app.state.context = context
31
- app.add_event_handler("startup", context.initialize)
32
- app.add_event_handler("shutdown", context.finalize)
33
34
 
34
35
  # Need to add app context in all sub-applications
35
36
  for route in app.router.routes:
36
37
  if isinstance(route, Mount) and isinstance(route.app, FastAPI):
37
38
  route.app.state.context = context
38
- route.app.add_event_handler("startup", context.initialize)
39
- route.app.add_event_handler("shutdown", context.finalize)
39
+
40
+ await context.initialize()
41
+ yield context
42
+ await context.finalize()
40
43
 
41
44
 
42
45
  def get_app_context(application: FastAPI) -> ApplicationContext:
@@ -18,5 +18,11 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- from .philosophy_books import * # noqa
22
- from .ten_dummy_resources import * # noqa
21
+ from dataclasses import dataclass
22
+
23
+
24
+ @dataclass
25
+ class IndexCounts:
26
+ fields: int
27
+ paragraphs: int
28
+ sentences: int
@@ -33,25 +33,31 @@ from . import (
33
33
  cluster,
34
34
  entities,
35
35
  exceptions,
36
+ fields,
36
37
  kb,
37
38
  labels,
38
39
  processing,
39
40
  resources,
40
41
  rollover,
41
42
  synonyms,
43
+ vectorsets,
42
44
  )
43
- from .utils import with_transaction
45
+ from .utils import with_ro_transaction, with_rw_transaction, with_transaction
44
46
 
45
47
  __all__ = (
46
48
  "atomic",
47
49
  "cluster",
48
50
  "entities",
49
51
  "exceptions",
52
+ "fields",
50
53
  "kb",
51
54
  "labels",
52
55
  "processing",
53
56
  "resources",
54
57
  "rollover",
55
58
  "synonyms",
59
+ "vectorsets",
56
60
  "with_transaction",
61
+ "with_rw_transaction",
62
+ "with_ro_transaction",
57
63
  )
@@ -39,8 +39,10 @@ import sys
39
39
  from functools import wraps
40
40
 
41
41
  from . import kb as kb_dm
42
+ from . import labels as labels_dm
42
43
  from . import resources as resources_dm
43
- from .utils import with_transaction
44
+ from . import synonyms as synonyms_dm
45
+ from .utils import with_ro_transaction, with_transaction
44
46
 
45
47
  # XXX: we are using the not exported _ParamSpec to support 3.9. Whenever we
46
48
  # upgrade to >= 3.10 we'll be able to use ParamSpecKwargs and improve the
@@ -49,7 +51,7 @@ from .utils import with_transaction
49
51
 
50
52
  __python_version = (sys.version_info.major, sys.version_info.minor)
51
53
  if __python_version == (3, 9):
52
- from typing import _ParamSpec as ParamSpec # type: ignore
54
+ from typing_extensions import ParamSpec
53
55
  else:
54
56
  from typing import ParamSpec # type: ignore
55
57
 
@@ -59,7 +61,7 @@ P = ParamSpec("P")
59
61
  def ro_txn_wrap(fun: P) -> P: # type: ignore
60
62
  @wraps(fun)
61
63
  async def wrapper(**kwargs: P.kwargs):
62
- async with with_transaction(read_only=True) as txn:
64
+ async with with_ro_transaction() as txn:
63
65
  return await fun(txn, **kwargs)
64
66
 
65
67
  return wrapper
@@ -69,16 +71,32 @@ def rw_txn_wrap(fun: P) -> P: # type: ignore
69
71
  @wraps(fun)
70
72
  async def wrapper(**kwargs: P.kwargs):
71
73
  async with with_transaction() as txn:
72
- return await fun(txn, **kwargs)
74
+ result = await fun(txn, **kwargs)
75
+ await txn.commit()
76
+ return result
73
77
 
74
78
  return wrapper
75
79
 
76
80
 
77
81
  class kb:
78
82
  exists_kb = ro_txn_wrap(kb_dm.exists_kb)
83
+ get_config = ro_txn_wrap(kb_dm.get_config)
84
+ get_external_index_provider_metadata = ro_txn_wrap(kb_dm.get_external_index_provider_metadata)
79
85
 
80
86
 
81
87
  class resources:
82
88
  get_resource_uuid_from_slug = ro_txn_wrap(resources_dm.get_resource_uuid_from_slug)
83
89
  resource_exists = ro_txn_wrap(resources_dm.resource_exists)
84
90
  slug_exists = ro_txn_wrap(resources_dm.slug_exists)
91
+
92
+
93
+ class labelset:
94
+ get = ro_txn_wrap(labels_dm.get_labelset)
95
+ set = rw_txn_wrap(labels_dm.set_labelset)
96
+ delete = rw_txn_wrap(labels_dm.delete_labelset)
97
+ get_all = ro_txn_wrap(labels_dm.get_labels)
98
+
99
+
100
+ class synonyms:
101
+ get = ro_txn_wrap(synonyms_dm.get)
102
+ set = rw_txn_wrap(synonyms_dm.set)
@@ -31,13 +31,13 @@ logger = logging.getLogger(__name__)
31
31
  KB_SHARDS = "/kbs/{kbid}/shards"
32
32
 
33
33
 
34
- async def get_kb_shards(txn: Transaction, *, kbid: str) -> Optional[writer_pb2.Shards]:
34
+ async def get_kb_shards(
35
+ txn: Transaction, *, kbid: str, for_update: bool = False
36
+ ) -> Optional[writer_pb2.Shards]:
35
37
  key = KB_SHARDS.format(kbid=kbid)
36
- return await get_kv_pb(txn, key, writer_pb2.Shards)
38
+ return await get_kv_pb(txn, key, writer_pb2.Shards, for_update=for_update)
37
39
 
38
40
 
39
- async def update_kb_shards(
40
- txn: Transaction, *, kbid: str, shards: writer_pb2.Shards
41
- ) -> None:
41
+ async def update_kb_shards(txn: Transaction, *, kbid: str, shards: writer_pb2.Shards) -> None:
42
42
  key = KB_SHARDS.format(kbid=kbid)
43
43
  await txn.set(key, shards.SerializeToString())
@@ -85,11 +85,9 @@ async def set_entities_group(
85
85
  await txn.set(key, entities.SerializeToString())
86
86
 
87
87
 
88
- async def iterate_entities_groups(
89
- txn: Transaction, *, kbid: str
90
- ) -> AsyncGenerator[str, None]:
88
+ async def iterate_entities_groups(txn: Transaction, *, kbid: str) -> AsyncGenerator[str, None]:
91
89
  entities_key = KB_ENTITIES.format(kbid=kbid)
92
- async for key in txn.keys(entities_key, count=-1):
90
+ async for key in txn.keys(entities_key):
93
91
  group = key.split("/")[-1]
94
92
  yield group
95
93
 
@@ -106,9 +104,7 @@ async def get_entities_group(
106
104
  return eg
107
105
 
108
106
 
109
- async def get_deleted_groups(
110
- txn: Transaction, *, kbid: str
111
- ) -> kb_pb2.DeletedEntitiesGroups:
107
+ async def get_deleted_groups(txn: Transaction, *, kbid: str) -> kb_pb2.DeletedEntitiesGroups:
112
108
  deleted_groups_key = KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid)
113
109
  payload = await txn.get(deleted_groups_key)
114
110
  deg = kb_pb2.DeletedEntitiesGroups()
@@ -122,18 +118,14 @@ async def mark_group_as_deleted(txn: Transaction, *, kbid: str, group: str) -> N
122
118
  deg = await get_deleted_groups(txn, kbid=kbid)
123
119
  if group not in deg.entities_groups:
124
120
  deg.entities_groups.append(group)
125
- await txn.set(
126
- KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString()
127
- )
121
+ await txn.set(KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString())
128
122
 
129
123
 
130
124
  async def unmark_group_as_deleted(txn: Transaction, *, kbid: str, group: str) -> None:
131
125
  deg = await get_deleted_groups(txn, kbid=kbid)
132
126
  if group in deg.entities_groups:
133
127
  deg.entities_groups.remove(group)
134
- await txn.set(
135
- KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString()
136
- )
128
+ await txn.set(KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString())
137
129
 
138
130
 
139
131
  async def get_entities_meta_cache(txn: Transaction, *, kbid: str) -> EntitiesMetaCache:
@@ -143,7 +135,5 @@ async def get_entities_meta_cache(txn: Transaction, *, kbid: str) -> EntitiesMet
143
135
  return pickle.loads(value)
144
136
 
145
137
 
146
- async def set_entities_meta_cache(
147
- txn: Transaction, kbid: str, cache: EntitiesMetaCache
148
- ) -> None:
138
+ async def set_entities_meta_cache(txn: Transaction, kbid: str, cache: EntitiesMetaCache) -> None:
149
139
  await txn.set(KB_ENTITIES_CACHE.format(kbid=kbid), pickle.dumps(cache, protocol=5))