nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -403
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +70 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +143 -117
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +37 -128
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -0,0 +1,113 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """Migration #25 (Fixed migration 24)
22
+
23
+ Vectorsets are coming and we need to be ready at nucliadb. Vector index config
24
+ shouldn't be stored anymore in the `Shards` protobuffer, we need to migrate to
25
+ the new vectorsets config.
26
+
27
+ This migration asks learning_config for each KB configuration and saves the
28
+ model name as the vectorset_id. Creates a vectorset configuration for each model
29
+ and deprecates the vectors index config from the `Shards` protobuffer.
30
+
31
+ This migration should work for onprem and hosted deployments, as
32
+ learning_proxy handles which API is used (internal or external)
33
+
34
+ """
35
+
36
+ import logging
37
+
38
+ from nucliadb import learning_proxy
39
+ from nucliadb.common import datamanagers
40
+ from nucliadb.migrator.context import ExecutionContext
41
+ from nucliadb_protos import (
42
+ knowledgebox_pb2,
43
+ nodewriter_pb2,
44
+ )
45
+
46
+ logger = logging.getLogger(__name__)
47
+
48
+
49
+ async def migrate(context: ExecutionContext) -> None: ...
50
+
51
+
52
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
53
+ async with context.kv_driver.transaction(read_only=True) as txn:
54
+ vectorsets_count = len([vs async for vs in datamanagers.vectorsets.iter(txn, kbid=kbid)])
55
+ if vectorsets_count > 0:
56
+ logger.info("Skipping KB with vectorsets already populated", extra={"kbid": kbid})
57
+ return
58
+
59
+ learning_config = await learning_proxy.get_configuration(kbid)
60
+ if learning_config is None:
61
+ logger.warning(f"KB has no learning config", extra={"kbid": kbid})
62
+ return None
63
+
64
+ vectorset_id = learning_config.semantic_model
65
+ learning_model_metadata = learning_config.into_semantic_model_metadata()
66
+ learning_similarity = learning_model_metadata.similarity_function
67
+ learning_vector_dimension = learning_model_metadata.vector_dimension
68
+ learning_matryoshka_dimensions = learning_model_metadata.matryoshka_dimensions
69
+ learning_normalize_vectors = len(learning_matryoshka_dimensions) > 0
70
+
71
+ async with context.kv_driver.transaction(read_only=True) as txn:
72
+ semantic_model = await datamanagers.kb.get_model_metadata(txn, kbid=kbid)
73
+
74
+ maindb_similarity = semantic_model.similarity_function
75
+
76
+ maindb_vector_dimension = None
77
+ if semantic_model.vector_dimension:
78
+ maindb_vector_dimension = semantic_model.vector_dimension
79
+
80
+ maindb_matryoshka_dimensions: list[int] = []
81
+ if len(semantic_model.matryoshka_dimensions) > 0:
82
+ maindb_matryoshka_dimensions.extend(semantic_model.matryoshka_dimensions)
83
+
84
+ maindb_normalize_vectors = len(maindb_matryoshka_dimensions) > 0
85
+
86
+ if (
87
+ maindb_similarity != learning_similarity
88
+ or (maindb_vector_dimension is not None and maindb_vector_dimension != learning_vector_dimension)
89
+ or set(maindb_matryoshka_dimensions) != set(learning_matryoshka_dimensions)
90
+ or maindb_normalize_vectors != learning_normalize_vectors
91
+ ):
92
+ logger.error(
93
+ "KB has mismatched data between nucliadb and learning_config! Please, review manually",
94
+ extra={"kbid": kbid},
95
+ )
96
+ return None
97
+
98
+ default_vectorset = knowledgebox_pb2.VectorSetConfig(
99
+ vectorset_id=vectorset_id,
100
+ vectorset_index_config=nodewriter_pb2.VectorIndexConfig(
101
+ vector_dimension=maindb_vector_dimension,
102
+ similarity=maindb_similarity,
103
+ vector_type=nodewriter_pb2.VectorType.DENSE_F32, # we only support this for now
104
+ normalize_vectors=maindb_normalize_vectors,
105
+ ),
106
+ matryoshka_dimensions=maindb_matryoshka_dimensions,
107
+ )
108
+
109
+ async with context.kv_driver.transaction() as txn:
110
+ # Populate KB vectorsets with data from learning. We are skipping KBs
111
+ # with this key already set, so we can set here safely
112
+ await datamanagers.vectorsets.set(txn, kbid=kbid, config=default_vectorset)
113
+ await txn.commit()
@@ -0,0 +1,61 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """Migration #26
22
+
23
+ Previously, there was no validation on content types added by users on upload. This caused that in some KBs,
24
+ there were content types that included random uuids, which caused high cardinality in the content type field.
25
+
26
+ This migration will fix those invalid content types.
27
+ """
28
+
29
+ import logging
30
+
31
+ from nucliadb.common import datamanagers
32
+ from nucliadb.migrator.context import ExecutionContext
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ AFFECTED_KBS = [
38
+ "78d289e0-dd4d-448c-84b5-8ef0b01a5aba",
39
+ ]
40
+
41
+
42
+ async def migrate(context: ExecutionContext) -> None: ...
43
+
44
+
45
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
46
+ if kbid not in AFFECTED_KBS:
47
+ return
48
+ async for rid in datamanagers.resources.iterate_resource_ids(kbid=kbid):
49
+ async with datamanagers.with_rw_transaction() as txn:
50
+ basic = await datamanagers.resources.get_basic(txn, kbid=kbid, rid=rid)
51
+ if not basic or not basic.icon:
52
+ continue
53
+ # We're aiming to fix content types like "multipart/form-data; boundary={uuid}"
54
+ if "multipart/form-data" not in basic.icon:
55
+ continue
56
+ if "boundary=" not in basic.icon:
57
+ continue
58
+ logger.info("Fixing content type for resource", extra={"kbid": kbid, "rid": rid})
59
+ basic.icon = "multipart/form-data"
60
+ await datamanagers.resources.set_basic(txn, kbid=kbid, rid=rid, basic=basic)
61
+ await txn.commit()
@@ -0,0 +1,73 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ """Migration #27
22
+
23
+ Rollover for nucliadb_texts3
24
+ """
25
+
26
+ import logging
27
+
28
+ from nucliadb import learning_proxy
29
+ from nucliadb.common import datamanagers
30
+ from nucliadb.common.cluster.rollover import rollover_kb_index
31
+ from nucliadb.migrator.context import ExecutionContext
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ async def migrate(context: ExecutionContext) -> None: ...
37
+
38
+
39
+ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
40
+ await maybe_fix_vector_dimensions(context, kbid)
41
+ await rollover_kb_index(context, kbid)
42
+
43
+
44
+ async def maybe_fix_vector_dimensions(context: ExecutionContext, kbid: str) -> None:
45
+ learning_config = await learning_proxy.get_configuration(kbid)
46
+ if learning_config is None:
47
+ logger.warning(f"KB has no learning config", extra={"kbid": kbid})
48
+ return
49
+
50
+ async with context.kv_driver.transaction() as txn:
51
+ vectorsets = [vs async for vs in datamanagers.vectorsets.iter(txn, kbid=kbid)]
52
+ if len(vectorsets) != 1:
53
+ # If multiple vectorsets, they are new shards created correctly, we can safely skip it
54
+ logger.warning(f"KB has {len(vectorsets)} vectorsets, skipping...", extra={"kbid": kbid})
55
+ return
56
+ vectorset = vectorsets[0][1]
57
+
58
+ # Correct value, skip
59
+ if vectorset.vectorset_index_config.vector_dimension != 0:
60
+ return
61
+
62
+ learning_model_metadata = learning_config.into_semantic_model_metadata()
63
+ logger.info(
64
+ f"Fixing KB vectorset dimension",
65
+ extra={
66
+ "kbid": kbid,
67
+ "from": vectorset.vectorset_index_config.vector_dimension,
68
+ "to": learning_model_metadata.vector_dimension,
69
+ },
70
+ )
71
+ vectorset.vectorset_index_config.vector_dimension = learning_model_metadata.vector_dimension
72
+
73
+ await datamanagers.vectorsets.set(txn, kbid=kbid, config=vectorset)
@@ -17,18 +17,16 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from nucliadb_protos.resources_pb2 import FieldDatetime
21
20
 
22
- from nucliadb.ingest.fields.base import Field
21
+ from nucliadb.common.maindb.pg import PGTransaction
23
22
 
24
23
 
25
- class Datetime(Field):
26
- pbklass = FieldDatetime
27
- value: FieldDatetime
28
- type: str = "d"
29
-
30
- async def set_value(self, payload: FieldDatetime):
31
- await self.db_set_value(payload)
32
-
33
- async def get_value(self) -> FieldDatetime:
34
- return await self.db_get_value()
24
+ async def migrate(txn: PGTransaction) -> None:
25
+ async with txn.connection.cursor() as cur:
26
+ # IF NOT EXISTS just for compatibility with older install predating the migration system
27
+ await cur.execute("""
28
+ CREATE TABLE IF NOT EXISTS resources (
29
+ key TEXT PRIMARY KEY,
30
+ value BYTEA
31
+ );
32
+ """)
@@ -0,0 +1,42 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ from nucliadb.common.maindb.pg import PGTransaction
22
+
23
+
24
+ async def migrate(txn: PGTransaction) -> None:
25
+ async with txn.connection.cursor() as cur:
26
+ await cur.execute(r"""
27
+ CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
28
+ CREATE EXTENSION IF NOT EXISTS btree_gin;
29
+ CREATE TABLE catalog (
30
+ kbid UUID,
31
+ rid UUID,
32
+ title TEXT,
33
+ created_at TIMESTAMP,
34
+ modified_at TIMESTAMP,
35
+ labels TEXT[],
36
+ PRIMARY KEY(kbid, rid)
37
+ );
38
+ CREATE INDEX ON catalog USING GIN(kbid, labels);
39
+ CREATE INDEX ON catalog USING GIN(kbid, regexp_split_to_array(lower(title), '\W'::text));
40
+ CREATE INDEX ON catalog(kbid, created_at);
41
+ CREATE INDEX ON catalog(kbid, modified_at);
42
+ """)
@@ -17,8 +17,10 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from nucliadb.ingest.settings import DriverConfig
21
20
 
21
+ from nucliadb.common.maindb.pg import PGTransaction
22
22
 
23
- def test_case_insenstive_driver_config():
24
- assert DriverConfig("PG") == DriverConfig.PG
23
+
24
+ async def migrate(txn: PGTransaction) -> None:
25
+ async with txn.connection.cursor() as cur:
26
+ await cur.execute("CREATE INDEX ON catalog(kbid);")
@@ -20,16 +20,16 @@
20
20
  from abc import ABCMeta, abstractmethod
21
21
  from typing import AsyncIterator, Optional
22
22
 
23
+ from nucliadb_protos import nodereader_pb2, noderesources_pb2, utils_pb2
23
24
  from nucliadb_protos.nodereader_pb2_grpc import NodeReaderStub
24
25
  from nucliadb_protos.nodewriter_pb2 import (
25
26
  NewShardRequest,
26
27
  NewVectorSetRequest,
27
28
  OpStatus,
29
+ VectorIndexConfig,
28
30
  )
29
31
  from nucliadb_protos.nodewriter_pb2_grpc import NodeWriterStub
30
32
 
31
- from nucliadb_protos import nodereader_pb2, noderesources_pb2, utils_pb2
32
-
33
33
 
34
34
  class AbstractIndexNode(metaclass=ABCMeta):
35
35
  label: str = "index-node"
@@ -93,15 +93,29 @@ class AbstractIndexNode(metaclass=ABCMeta):
93
93
  async def new_shard(
94
94
  self,
95
95
  kbid: str,
96
- similarity: utils_pb2.VectorSimilarity.ValueType,
97
- release_channel: utils_pb2.ReleaseChannel.ValueType,
98
- normalize_vectors: bool,
96
+ vector_index_config: VectorIndexConfig,
97
+ ) -> noderesources_pb2.ShardCreated:
98
+ req = NewShardRequest(
99
+ kbid=kbid,
100
+ release_channel=utils_pb2.ReleaseChannel.STABLE,
101
+ config=vector_index_config,
102
+ # Deprecated fields, only for backwards compatibility with older nodes
103
+ similarity=vector_index_config.similarity,
104
+ normalize_vectors=vector_index_config.normalize_vectors,
105
+ )
106
+
107
+ resp = await self.writer.NewShard(req) # type: ignore
108
+ return resp
109
+
110
+ async def new_shard_with_vectorsets(
111
+ self,
112
+ kbid: str,
113
+ vectorsets_configs: dict[str, VectorIndexConfig],
99
114
  ) -> noderesources_pb2.ShardCreated:
100
115
  req = NewShardRequest(
101
116
  kbid=kbid,
102
- similarity=similarity,
103
- release_channel=release_channel,
104
- normalize_vectors=normalize_vectors,
117
+ release_channel=utils_pb2.ReleaseChannel.STABLE,
118
+ vectorsets_configs=vectorsets_configs,
105
119
  )
106
120
 
107
121
  resp = await self.writer.NewShard(req) # type: ignore
@@ -120,15 +134,15 @@ class AbstractIndexNode(metaclass=ABCMeta):
120
134
  self,
121
135
  shard_id: str,
122
136
  vectorset: str,
123
- *,
124
- similarity: utils_pb2.VectorSimilarity.ValueType = utils_pb2.VectorSimilarity.COSINE,
125
- normalize_vectors: bool = False,
137
+ config: VectorIndexConfig,
126
138
  ) -> OpStatus:
127
- req = NewVectorSetRequest()
128
- req.id.shard.id = shard_id
129
- req.id.vectorset = vectorset
130
- req.similarity = similarity
131
- req.normalize_vectors = normalize_vectors
139
+ req = NewVectorSetRequest(
140
+ id=noderesources_pb2.VectorSetID(
141
+ shard=noderesources_pb2.ShardId(id=shard_id), vectorset=vectorset
142
+ ),
143
+ config=config,
144
+ )
145
+
132
146
  resp = await self.writer.AddVectorSet(req) # type: ignore
133
147
  return resp
134
148
 
@@ -113,7 +113,7 @@ async def _get_index_node_metadata(
113
113
  channel = get_traced_grpc_channel(grpc_address, "discovery", variant="_writer")
114
114
  if read_replica:
115
115
  # on a read replica, we need to use the replication service
116
- stub = replication_pb2_grpc.ReplicationServiceStub(channel) # type: ignore
116
+ stub = replication_pb2_grpc.ReplicationServiceStub(channel)
117
117
  else:
118
118
  stub = nodewriter_pb2_grpc.NodeWriterStub(channel) # type: ignore
119
119
  try:
@@ -127,9 +127,7 @@ async def _get_index_node_metadata(
127
127
  or None
128
128
  )
129
129
  if read_replica and primary_id is None:
130
- raise Exception(
131
- "Primary node id not found when it is expected to be a read replica"
132
- )
130
+ raise Exception("Primary node id not found when it is expected to be a read replica")
133
131
 
134
132
  return IndexNodeMetadata(
135
133
  node_id=metadata.node_id,
@@ -141,18 +139,14 @@ async def _get_index_node_metadata(
141
139
  )
142
140
 
143
141
 
144
- @backoff.on_exception(
145
- backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=4
146
- )
147
- async def _get_standalone_index_node_metadata(
148
- settings: Settings, address: str
149
- ) -> IndexNodeMetadata:
142
+ @backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=4)
143
+ async def _get_standalone_index_node_metadata(settings: Settings, address: str) -> IndexNodeMetadata:
150
144
  if ":" not in address:
151
145
  grpc_address = f"{address}:{settings.standalone_node_port}"
152
146
  else:
153
147
  grpc_address = address
154
148
  channel = get_traced_grpc_channel(grpc_address, "standalone_proxy")
155
- stub = standalone_pb2_grpc.StandaloneClusterServiceStub(channel) # type: ignore
149
+ stub = standalone_pb2_grpc.StandaloneClusterServiceStub(channel)
156
150
  resp: standalone_pb2.NodeInfoResponse = await stub.NodeInfo(standalone_pb2.NodeInfoRequest()) # type: ignore
157
151
  return IndexNodeMetadata(
158
152
  node_id=resp.id,
@@ -177,9 +171,7 @@ class AbstractClusterDiscovery(abc.ABC):
177
171
  async def finalize(self) -> None:
178
172
  """ """
179
173
 
180
- async def _query_node_metadata(
181
- self, address: str, read_replica: bool = False
182
- ) -> IndexNodeMetadata:
174
+ async def _query_node_metadata(self, address: str, read_replica: bool = False) -> IndexNodeMetadata:
183
175
  if self.settings.standalone_mode:
184
176
  return await _get_standalone_index_node_metadata(self.settings, address)
185
177
  else:
@@ -69,9 +69,7 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
69
69
  ) -> IndexNodeMetadata:
70
70
  async with self.update_lock:
71
71
  if pod_name not in self.node_id_cache:
72
- self.node_id_cache[pod_name] = await self._query_node_metadata(
73
- node_ip, read_replica
74
- )
72
+ self.node_id_cache[pod_name] = await self._query_node_metadata(node_ip, read_replica)
75
73
  else:
76
74
  self.node_id_cache[pod_name].address = node_ip
77
75
  self.node_id_cache[pod_name].updated_at = time.time()
@@ -84,12 +82,10 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
84
82
  This method will update global node state by utilizing the cluster manager
85
83
  to add or remove nodes.
86
84
  """
87
- status: kubernetes_asyncio.client.models.v1_pod_status.V1PodStatus = event[
85
+ status: kubernetes_asyncio.client.models.v1_pod_status.V1PodStatus = event["object"].status
86
+ event_metadata: kubernetes_asyncio.client.models.v1_object_meta.V1ObjectMeta = event[
88
87
  "object"
89
- ].status
90
- event_metadata: kubernetes_asyncio.client.models.v1_object_meta.V1ObjectMeta = (
91
- event["object"].metadata
92
- )
88
+ ].metadata
93
89
 
94
90
  ready = status.container_statuses is not None
95
91
  if event["type"] == "DELETED":
@@ -199,9 +195,7 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
199
195
  except NodeConnectionError: # pragma: no cover
200
196
  pass
201
197
  except Exception: # pragma: no cover
202
- logger.exception(
203
- "Error while updating node", exc_info=True
204
- )
198
+ logger.exception("Error while updating node", exc_info=True)
205
199
  except (
206
200
  asyncio.CancelledError,
207
201
  KeyboardInterrupt,
@@ -259,11 +253,9 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
259
253
  continue
260
254
  existing = self.node_id_cache[pod_name]
261
255
  try:
262
- self.node_id_cache[pod_name] = (
263
- await self._query_node_metadata(
264
- existing.address,
265
- read_replica=existing.primary_id is not None,
266
- )
256
+ self.node_id_cache[pod_name] = await self._query_node_metadata(
257
+ existing.address,
258
+ read_replica=existing.primary_id is not None,
267
259
  )
268
260
  except NodeConnectionError: # pragma: no cover
269
261
  self._maybe_remove_stale_node(pod_name)
@@ -301,9 +293,7 @@ class KubernetesDiscovery(AbstractClusterDiscovery):
301
293
 
302
294
  async def initialize(self) -> None:
303
295
  self.cluster_task = asyncio.create_task(self.watch_k8s_for_updates())
304
- self.update_node_data_cache_task = asyncio.create_task(
305
- self.update_node_data_cache()
306
- )
296
+ self.update_node_data_cache_task = asyncio.create_task(self.update_node_data_cache())
307
297
  await self._wait_ready()
308
298
 
309
299
  async def finalize(self) -> None:
@@ -46,9 +46,7 @@ class ManualDiscovery(AbstractClusterDiscovery):
46
46
  except asyncio.CancelledError:
47
47
  return
48
48
  except Exception:
49
- logger.exception(
50
- "Error while watching cluster members. Will retry at started interval"
51
- )
49
+ logger.exception("Error while watching cluster members. Will retry at started interval")
52
50
  finally:
53
51
  await asyncio.sleep(15)
54
52
 
@@ -40,9 +40,7 @@ async def setup_cluster_discovery() -> None:
40
40
  # already loaded
41
41
  return util
42
42
 
43
- klass: Union[
44
- Type[ManualDiscovery], Type[KubernetesDiscovery], Type[SingleNodeDiscovery]
45
- ]
43
+ klass: Union[Type[ManualDiscovery], Type[KubernetesDiscovery], Type[SingleNodeDiscovery]]
46
44
  if settings.cluster_discovery_mode == ClusterDiscoveryMode.MANUAL:
47
45
  klass = ManualDiscovery
48
46
  elif settings.cluster_discovery_mode == ClusterDiscoveryMode.KUBERNETES:
@@ -22,16 +22,15 @@ from typing import Any
22
22
  from nucliadb_protos.nodereader_pb2 import (
23
23
  EdgeList,
24
24
  RelationEdge,
25
- RelationSearchResponse,
26
25
  )
27
- from nucliadb_protos.noderesources_pb2 import EmptyResponse
28
- from nucliadb_protos.noderesources_pb2 import Shard as NodeResourcesShard
29
26
  from nucliadb_protos.noderesources_pb2 import (
27
+ EmptyResponse,
30
28
  ShardCreated,
31
29
  ShardId,
32
30
  ShardIds,
33
31
  VectorSetList,
34
32
  )
33
+ from nucliadb_protos.noderesources_pb2 import Shard as NodeResourcesShard
35
34
  from nucliadb_protos.nodewriter_pb2 import OpStatus
36
35
  from nucliadb_protos.utils_pb2 import Relation
37
36
 
@@ -90,15 +89,8 @@ class DummyReaderStub: # pragma: no cover
90
89
  self.calls.setdefault("GetShard", []).append(data)
91
90
  return NodeResourcesShard(shard_id="shard", fields=2, paragraphs=2, sentences=2)
92
91
 
93
- async def RelationSearch(self, data): # pragma: no cover
94
- self.calls.setdefault("RelationSearch", []).append(data)
95
- result = RelationSearchResponse()
96
- return result
97
-
98
92
  async def RelationEdges(self, data): # pragma: no cover
99
93
  self.calls.setdefault("RelationEdges", []).append(data)
100
94
  result = EdgeList()
101
- result.list.append(
102
- RelationEdge(edge_type=Relation.RelationType.ENTITY, property="dummy")
103
- )
95
+ result.list.append(RelationEdge(edge_type=Relation.RelationType.ENTITY, property="dummy"))
104
96
  return result
@@ -19,16 +19,13 @@
19
19
  #
20
20
  from typing import Optional
21
21
 
22
- from lru import LRU # type: ignore
23
- from nucliadb_protos.nodereader_pb2_grpc import NodeReaderStub
24
- from nucliadb_protos.nodewriter_pb2_grpc import NodeWriterStub
22
+ from lru import LRU
25
23
 
26
- from nucliadb.common.cluster.base import AbstractIndexNode # type: ignore
27
- from nucliadb.common.cluster.grpc_node_dummy import ( # type: ignore
28
- DummyReaderStub,
29
- DummyWriterStub,
30
- )
24
+ from nucliadb.common.cluster.base import AbstractIndexNode
25
+ from nucliadb.common.cluster.grpc_node_dummy import DummyReaderStub, DummyWriterStub
31
26
  from nucliadb.ingest import SERVICE_NAME
27
+ from nucliadb_protos.nodereader_pb2_grpc import NodeReaderStub
28
+ from nucliadb_protos.nodewriter_pb2_grpc import NodeWriterStub
32
29
  from nucliadb_utils.grpc import get_traced_grpc_channel
33
30
 
34
31
  from .settings import settings
@@ -41,9 +38,7 @@ class IndexNode(AbstractIndexNode):
41
38
  _writer: Optional[NodeWriterStub] = None
42
39
  _reader: Optional[NodeReaderStub] = None
43
40
 
44
- def _get_service_address(
45
- self, port_map: dict[str, int], port: Optional[int]
46
- ) -> str:
41
+ def _get_service_address(self, port_map: dict[str, int], port: Optional[int]) -> str:
47
42
  hostname = self.address.split(":")[0]
48
43
  if port is None:
49
44
  # For testing purposes we need to be able to have a writing port
@@ -60,10 +55,8 @@ class IndexNode(AbstractIndexNode):
60
55
  grpc_address = self._get_service_address(
61
56
  settings.writer_port_map, settings.node_writer_port
62
57
  )
63
- channel = get_traced_grpc_channel(
64
- grpc_address, SERVICE_NAME, variant="_writer"
65
- )
66
- WRITE_CONNECTIONS[self.address] = NodeWriterStub(channel) # type: ignore
58
+ channel = get_traced_grpc_channel(grpc_address, SERVICE_NAME, variant="_writer")
59
+ WRITE_CONNECTIONS[self.address] = NodeWriterStub(channel)
67
60
  else:
68
61
  WRITE_CONNECTIONS[self.address] = DummyWriterStub()
69
62
  self._writer = WRITE_CONNECTIONS[self.address]
@@ -76,10 +69,8 @@ class IndexNode(AbstractIndexNode):
76
69
  grpc_address = self._get_service_address(
77
70
  settings.reader_port_map, settings.node_reader_port
78
71
  )
79
- channel = get_traced_grpc_channel(
80
- grpc_address, SERVICE_NAME, variant="_reader"
81
- )
82
- READ_CONNECTIONS[self.address] = NodeReaderStub(channel) # type: ignore
72
+ channel = get_traced_grpc_channel(grpc_address, SERVICE_NAME, variant="_reader")
73
+ READ_CONNECTIONS[self.address] = NodeReaderStub(channel)
83
74
  else:
84
75
  READ_CONNECTIONS[self.address] = DummyReaderStub()
85
76
  self._reader = READ_CONNECTIONS[self.address]