nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -403
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +70 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +143 -117
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +37 -128
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -1,137 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import pytest
21
-
22
- from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX
23
- from nucliadb_models.entities import CreateEntitiesGroupPayload, Entity
24
- from nucliadb_models.labels import Label, LabelSet
25
- from nucliadb_models.resource import NucliaDBRoles
26
- from nucliadb_protos import knowledgebox_pb2, writer_pb2
27
- from nucliadb_utils.utilities import get_ingest
28
-
29
-
30
- @pytest.mark.asyncio
31
- async def test_service_lifecycle_entities(writer_api, entities_manager_mock):
32
- async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
33
- resp = await client.post(
34
- f"/{KBS_PREFIX}",
35
- json={
36
- "slug": "kbid1",
37
- "title": "My Knowledge Box",
38
- },
39
- )
40
- assert resp.status_code == 201
41
- data = resp.json()
42
- assert data["slug"] == "kbid1"
43
- kbid = data["uuid"]
44
-
45
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
46
- eg = CreateEntitiesGroupPayload(
47
- group="0",
48
- title="My group",
49
- color="#0000000",
50
- entities={
51
- "ent1": Entity(value="asd", merged=False),
52
- "ent2": Entity(value="asd", merged=False),
53
- "ent3": Entity(value="asd", merged=False),
54
- },
55
- )
56
-
57
- resp = await client.post(f"/{KB_PREFIX}/{kbid}/entitiesgroups", json=eg.dict())
58
- assert resp.status_code == 200
59
-
60
- ingest = get_ingest()
61
- result = await ingest.GetEntities(
62
- writer_pb2.GetEntitiesRequest(kb=knowledgebox_pb2.KnowledgeBoxID(uuid=kbid))
63
- )
64
- assert set(result.groups.keys()) == {"0"}
65
- assert result.groups["0"].title == eg.title
66
- assert result.groups["0"].color == eg.color
67
- assert set(result.groups["0"].entities.keys()) == {"ent1", "ent2", "ent3"}
68
- assert result.groups["0"].entities["ent1"].value == "asd"
69
-
70
- eg.group = "1"
71
- resp = await client.post(f"/{KB_PREFIX}/{kbid}/entitiesgroups", json=eg.dict())
72
- assert resp.status_code == 200
73
- result = await ingest.GetEntities(
74
- writer_pb2.GetEntitiesRequest(kb=knowledgebox_pb2.KnowledgeBoxID(uuid=kbid))
75
- )
76
- assert set(result.groups.keys()) == {"0", "1"}
77
-
78
-
79
- @pytest.mark.asyncio
80
- async def test_entities_custom_field_for_user_defined_groups(
81
- writer_api, entities_manager_mock
82
- ):
83
- """
84
- Test description:
85
-
86
- - Create an entity group and check that the default value for the `custom`
87
- field is True
88
- """
89
- async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
90
- resp = await client.post(
91
- f"/{KBS_PREFIX}",
92
- json={
93
- "slug": "kbid1",
94
- "title": "My Knowledge Box",
95
- },
96
- )
97
- assert resp.status_code == 201
98
- data = resp.json()
99
- kbid = data["uuid"]
100
-
101
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
102
- eg = CreateEntitiesGroupPayload(group="0")
103
- resp = await client.post(f"/{KB_PREFIX}/{kbid}/entitiesgroups", json=eg.dict())
104
- assert resp.status_code == 200
105
-
106
- ingest = get_ingest()
107
- result = await ingest.GetEntities(
108
- writer_pb2.GetEntitiesRequest(kb=knowledgebox_pb2.KnowledgeBoxID(uuid=kbid))
109
- )
110
- assert result.groups["0"].custom is True
111
-
112
-
113
- @pytest.mark.asyncio
114
- async def test_service_lifecycle_labels(writer_api):
115
- async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
116
- resp = await client.post(
117
- f"/{KBS_PREFIX}",
118
- json={
119
- "slug": "kbid1",
120
- "title": "My Knowledge Box",
121
- },
122
- )
123
- assert resp.status_code == 201
124
- data = resp.json()
125
- assert data["slug"] == "kbid1"
126
- kbid = data["uuid"]
127
-
128
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
129
- ls = LabelSet(
130
- title="My labelset", color="#0000000", multiple=False, kind=["RESOURCES"]
131
- )
132
- ls.labels.append(Label(title="asd"))
133
- ls.labels.append(Label(title="asd"))
134
- resp = await client.post(f"/{KB_PREFIX}/{kbid}/labelset/ls1", json=ls.dict())
135
- assert resp.status_code == 200
136
- resp = await client.post(f"/{KB_PREFIX}/{kbid}/labelset/ls2", json=ls.dict())
137
- assert resp.status_code == 200
@@ -1,203 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import tempfile
21
- import uuid
22
-
23
- import asyncpg
24
- import pytest
25
-
26
- from nucliadb.writer.settings import settings
27
- from nucliadb.writer.tus import get_dm
28
- from nucliadb.writer.tus.exceptions import CloudFileNotFound
29
- from nucliadb.writer.tus.gcs import GCloudBlobStore, GCloudFileStorageManager
30
- from nucliadb.writer.tus.local import LocalBlobStore, LocalFileStorageManager
31
- from nucliadb.writer.tus.pg import PGBlobStore, PGFileStorageManager
32
- from nucliadb.writer.tus.s3 import S3BlobStore, S3FileStorageManager
33
- from nucliadb.writer.tus.storage import BlobStore, FileStorageManager
34
- from nucliadb_utils.storages.pg import PostgresStorage
35
- from nucliadb_utils.storages.storage import KB_RESOURCE_FIELD
36
-
37
-
38
- @pytest.fixture(scope="function")
39
- async def s3_storage_tus(s3):
40
- storage = S3BlobStore()
41
- await storage.initialize(
42
- client_id="",
43
- client_secret="",
44
- max_pool_connections=2,
45
- endpoint_url=s3,
46
- verify_ssl=False,
47
- ssl=False,
48
- region_name=None,
49
- bucket="test_{kbid}",
50
- bucket_tags={"testTag": "test"},
51
- )
52
- yield storage
53
- await storage.finalize()
54
-
55
-
56
- @pytest.fixture(scope="function")
57
- async def gcs_storage_tus(gcs):
58
- storage = GCloudBlobStore()
59
- await storage.initialize(
60
- json_credentials=None,
61
- bucket="test_{kbid}",
62
- location="location",
63
- project="project",
64
- bucket_labels={},
65
- object_base_url=gcs,
66
- )
67
- yield storage
68
- await storage.finalize()
69
-
70
-
71
- @pytest.fixture(scope="function")
72
- async def local_storage_tus():
73
- folder = tempfile.TemporaryDirectory()
74
- storage = LocalBlobStore(local_testing_files=folder.name)
75
- await storage.initialize()
76
- yield storage
77
- await storage.finalize()
78
- folder.cleanup()
79
-
80
-
81
- @pytest.fixture(scope="function")
82
- async def pg_storage_tus(pg):
83
- dsn = f"postgresql://postgres:postgres@{pg[0]}:{pg[1]}/postgres"
84
- conn = await asyncpg.connect(dsn)
85
- await conn.execute(
86
- """
87
- DROP table IF EXISTS kb_files;
88
- DROP table IF EXISTS kb_files_fileparts;
89
- """
90
- )
91
- await conn.close()
92
- fstorage = PostgresStorage(dsn) # set everything up
93
- await fstorage.initialize()
94
- await fstorage.finalize()
95
-
96
- storage = PGBlobStore(dsn)
97
- await storage.initialize()
98
- yield storage
99
- await storage.finalize()
100
-
101
-
102
- async def clean_dm():
103
- from nucliadb.writer.tus import REDIS_FILE_DATA_MANAGER_FACTORY
104
-
105
- if REDIS_FILE_DATA_MANAGER_FACTORY is not None:
106
- await REDIS_FILE_DATA_MANAGER_FACTORY.finalize()
107
- REDIS_FILE_DATA_MANAGER_FACTORY = None
108
-
109
-
110
- @pytest.fixture(scope="function")
111
- async def redis_dm(redis):
112
- prev = settings.dm_enabled
113
-
114
- settings.dm_enabled = True
115
- settings.dm_redis_host = redis[0]
116
- settings.dm_redis_port = redis[1]
117
-
118
- dm = get_dm()
119
-
120
- yield dm
121
-
122
- await clean_dm()
123
-
124
- settings.dm_enabled = prev
125
-
126
-
127
- @pytest.mark.asyncio
128
- async def test_pg_driver(redis_dm, pg_storage_tus: PGBlobStore):
129
- await storage_test(pg_storage_tus, PGFileStorageManager(pg_storage_tus))
130
-
131
-
132
- @pytest.mark.asyncio
133
- async def test_s3_driver(redis_dm, s3_storage_tus: S3BlobStore):
134
- await storage_test(s3_storage_tus, S3FileStorageManager(s3_storage_tus))
135
-
136
-
137
- @pytest.mark.asyncio
138
- async def test_gcs_driver(redis_dm, gcs_storage_tus: GCloudBlobStore):
139
- await storage_test(gcs_storage_tus, GCloudFileStorageManager(gcs_storage_tus))
140
-
141
-
142
- @pytest.mark.asyncio
143
- async def test_local_driver(local_storage_tus: LocalBlobStore):
144
- settings.dm_enabled = False
145
- await storage_test(local_storage_tus, LocalFileStorageManager(local_storage_tus))
146
- settings.dm_enabled = True
147
-
148
-
149
- async def storage_test(storage: BlobStore, file_storage_manager: FileStorageManager):
150
- example = b"mytestinfo"
151
- field = "myfield"
152
- rid = "myrid"
153
- kbid = "mykb_tus_test"
154
-
155
- metadata: dict[str, str] = {}
156
- bucket_name = storage.get_bucket_name(kbid)
157
- assert bucket_name in [
158
- "test_mykb_tus_test",
159
- "test-mykb-tus-test",
160
- "ndb_mykb_tus_test",
161
- "mykb_tus_test",
162
- ]
163
-
164
- if not isinstance(storage, PGBlobStore):
165
- # this is silly, but we don't need this for pg
166
- assert await storage.check_exists(bucket_name) is False
167
-
168
- exists = await storage.create_bucket(bucket_name)
169
- assert exists is False
170
-
171
- upload_id = uuid.uuid4().hex
172
- dm = get_dm()
173
- await dm.load(upload_id)
174
- await dm.start({})
175
- await dm.update(
176
- upload_file_id=f"{upload_id}",
177
- rid=rid,
178
- field=field,
179
- metadata=metadata,
180
- deferred_length=True,
181
- offset=0,
182
- item=None,
183
- )
184
-
185
- path = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, field=field)
186
- await file_storage_manager.start(dm, path=path, kbid=kbid)
187
-
188
- async def generate():
189
- yield example
190
-
191
- size = await file_storage_manager.append(dm, generate(), 0)
192
- await dm.update(offset=size)
193
- assert size == len(example)
194
- await file_storage_manager.finish(dm)
195
-
196
- async for data in file_storage_manager.read_range(path, kbid, 1, size):
197
- assert data == example[1:]
198
-
199
- await file_storage_manager.delete_upload(path, kbid)
200
-
201
- with pytest.raises(CloudFileNotFound):
202
- async for data in file_storage_manager.read_range(path, kbid, 1, size):
203
- assert data == example[1:]
@@ -1,35 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- import hashlib
21
- from base64 import b64encode
22
- from os.path import dirname
23
-
24
-
25
- def load_file_as_FileB64_payload(f: str, content_type: str) -> dict:
26
- file_location = f"{dirname(__file__)}/{f}"
27
- filename = f.split("/")[-1]
28
- data = b64encode(open(file_location, "rb").read())
29
-
30
- return {
31
- "filename": filename,
32
- "content_type": content_type,
33
- "payload": data.decode("utf-8"),
34
- "md5": hashlib.md5(data).hexdigest(),
35
- }
nucliadb/writer/tus/pg.py DELETED
@@ -1,125 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
- from __future__ import annotations
21
-
22
- from typing import AsyncIterator
23
-
24
- import asyncpg
25
- from nucliadb_protos.resources_pb2 import CloudFile
26
-
27
- from nucliadb.writer.tus.dm import FileDataManager
28
- from nucliadb.writer.tus.exceptions import CloudFileNotFound
29
- from nucliadb.writer.tus.storage import BlobStore, FileStorageManager
30
- from nucliadb_utils.storages import CHUNK_SIZE
31
- from nucliadb_utils.storages.pg import PostgresFileDataLayer
32
-
33
-
34
- class PGFileStorageManager(FileStorageManager):
35
- _handler = None
36
- storage: PGBlobStore
37
- chunk_size = min_upload_size = CHUNK_SIZE
38
-
39
- async def start(self, dm: FileDataManager, path: str, kbid: str):
40
- bucket = self.storage.get_bucket_name(kbid)
41
-
42
- async with self.storage.pool.acquire() as conn:
43
- async with conn.transaction():
44
- dl = PostgresFileDataLayer(conn)
45
- if path is not None:
46
- await dl.delete_file(kbid, path)
47
-
48
- await dl.create_file(
49
- kb_id=bucket,
50
- file_id=path,
51
- filename=dm.filename,
52
- size=dm.size,
53
- content_type=dm.content_type,
54
- )
55
-
56
- await dm.update(upload_file_id=path, path=path, bucket=bucket)
57
-
58
- async def iter_data(self, uri, kbid: str, headers=None):
59
- bucket = self.storage.get_bucket_name(kbid)
60
-
61
- async with self.storage.pool.acquire() as conn:
62
- dl = PostgresFileDataLayer(conn)
63
- async for chunk in dl.iterate_chunks(bucket, uri):
64
- yield chunk["data"]
65
-
66
- async def read_range(
67
- self, uri: str, kbid: str, start: int, end: int
68
- ) -> AsyncIterator[bytes]:
69
- """
70
- Iterate through ranges of data
71
- """
72
- bucket = self.storage.get_bucket_name(kbid)
73
-
74
- async with self.storage.pool.acquire() as conn:
75
- dl = PostgresFileDataLayer(conn)
76
- file_info = await dl.get_file_info(kbid, uri)
77
- if file_info is None:
78
- raise CloudFileNotFound()
79
- async for data in dl.iterate_range(
80
- kb_id=bucket, file_id=uri, start=start, end=end
81
- ):
82
- yield data
83
-
84
- async def append(self, dm: FileDataManager, iterable, offset) -> int:
85
- bucket = dm.get("bucket")
86
- path = dm.get("path")
87
- count = 0
88
- async with self.storage.pool.acquire() as conn:
89
- dl = PostgresFileDataLayer(conn)
90
- async for chunk in iterable:
91
- await dl.append_chunk(kb_id=bucket, file_id=path, data=chunk)
92
- size = len(chunk)
93
- count += size
94
- offset += len(chunk)
95
- return count
96
-
97
- async def finish(self, dm: FileDataManager):
98
- path = dm.get("path")
99
- await dm.finish()
100
- return path
101
-
102
- async def delete_upload(self, uri: str, kbid: str):
103
- async with self.storage.pool.acquire() as conn:
104
- async with conn.transaction():
105
- dl = PostgresFileDataLayer(conn)
106
- await dl.delete_file(kbid, uri)
107
-
108
-
109
- class PGBlobStore(BlobStore):
110
- def __init__(self, dsn: str):
111
- self.dsn = dsn
112
- self.source = CloudFile.POSTGRES
113
-
114
- async def initialize(self):
115
- self.pool = await asyncpg.create_pool(self.dsn)
116
-
117
- async def finalize(self):
118
- await self.pool.close()
119
- self.initialized = False
120
-
121
- async def check_exists(self, bucket_name: str) -> bool:
122
- return True
123
-
124
- def get_bucket_name(self, kbid: str) -> str:
125
- return kbid
@@ -1,135 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: nucliadb
3
- Version: 4.0.0.post542
4
- Home-page: https://docs.nuclia.dev/docs/guides/nucliadb/intro
5
- Author: NucliaDB Community
6
- Author-email: nucliadb@nuclia.com
7
- License: BSD
8
- Project-URL: Nuclia, https://nuclia.com
9
- Project-URL: Github, https://github.com/nuclia/nucliadb
10
- Project-URL: Discord, https://discord.gg/8EvQwmsbzf
11
- Project-URL: API Reference, https://docs.nuclia.dev/docs/api
12
- Keywords: search,semantic,AI
13
- Classifier: Development Status :: 4 - Beta
14
- Classifier: Intended Audience :: Developers
15
- Classifier: Intended Audience :: Information Technology
16
- Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
17
- Classifier: Programming Language :: Python
18
- Classifier: Programming Language :: Python :: 3.9
19
- Classifier: Programming Language :: Python :: 3.10
20
- Classifier: Programming Language :: Python :: 3.11
21
- Classifier: Programming Language :: Python :: 3 :: Only
22
- Requires-Python: >=3.9, <4
23
- Description-Content-Type: text/markdown
24
- Requires-Dist: nucliadb-telemetry[all] >=4.0.0.post542
25
- Requires-Dist: nucliadb-utils[cache,fastapi,storages] >=4.0.0.post542
26
- Requires-Dist: nucliadb-protos >=4.0.0.post542
27
- Requires-Dist: nucliadb-models >=4.0.0.post542
28
- Requires-Dist: nucliadb-admin-assets >=1.0.0.post1224
29
- Requires-Dist: nucliadb-node-binding >=2.26.0
30
- Requires-Dist: uvicorn <0.19.0
31
- Requires-Dist: argdantic
32
- Requires-Dist: aiohttp >=3.9.4
33
- Requires-Dist: lru-dict >=1.1.7
34
- Requires-Dist: backoff
35
- Requires-Dist: aiofiles >=0.8.0
36
- Requires-Dist: psutil >=5.9.7
37
- Requires-Dist: types-psutil >=5.9.5.17
38
- Requires-Dist: types-aiofiles >=0.8.3
39
- Requires-Dist: protobuf >=4.22.3
40
- Requires-Dist: types-protobuf <5,>=4.24
41
- Requires-Dist: grpcio <1.63.0,>=1.44.0
42
- Requires-Dist: grpcio-health-checking <1.63.0,>=1.44.0
43
- Requires-Dist: grpcio-channelz <1.63.0,>=1.44.0
44
- Requires-Dist: grpcio-status <1.63.0,>=1.44.0
45
- Requires-Dist: grpcio-tools <1.63.0,>=1.44.0
46
- Requires-Dist: grpcio-testing <1.63.0,>=1.44.0
47
- Requires-Dist: grpcio-reflection <1.63.0,>=1.44.0
48
- Requires-Dist: orjson >=3.6.7
49
- Requires-Dist: types-setuptools
50
- Requires-Dist: pydantic >=2.7
51
- Requires-Dist: pydantic-settings >=2.2
52
- Requires-Dist: aiobotocore >=2.9.0
53
- Requires-Dist: botocore >=1.34.0
54
- Requires-Dist: google-cloud-storage
55
- Requires-Dist: gcloud
56
- Requires-Dist: oauth2client
57
- Requires-Dist: jwcrypto >=1.5.6
58
- Requires-Dist: fastapi-versioning >=0.10.0
59
- Requires-Dist: fastapi >=0.95.2
60
- Requires-Dist: sentry-sdk >=1.5.12
61
- Requires-Dist: pyjwt >=2.4.0
62
- Requires-Dist: mmh3 >=3.0.0
63
- Requires-Dist: httpx >=0.23.0
64
- Requires-Dist: types-pkg-resources >=0.1.3
65
- Requires-Dist: grpc-stubs >=1.44.0
66
- Requires-Dist: aiodns >=3.0.0
67
- Requires-Dist: types-orjson
68
- Requires-Dist: asyncpg >=0.27.0
69
- Requires-Dist: tikv-client ==0.0.3
70
- Requires-Dist: multidict >=6.0.4
71
- Requires-Dist: deprecated >=1.2.12
72
- Requires-Dist: asgiref >=3.3.2
73
- Requires-Dist: jmespath >=1.0.0
74
- Requires-Dist: idna >=3.3
75
- Requires-Dist: sniffio >=1.2.0
76
- Requires-Dist: async-lru >=2.0.4
77
- Requires-Dist: async-timeout >=4.0.3
78
- Requires-Dist: cachetools >=5.3.2
79
- Requires-Dist: types-cachetools >=5.3.0.5
80
- Requires-Dist: kubernetes-asyncio
81
- Provides-Extra: redis
82
- Requires-Dist: redis >=4.3.4 ; extra == 'redis'
83
-
84
- # nucliadb
85
-
86
- This module contains most of the Python components for NucliaDB:
87
-
88
- - ingest
89
- - reader
90
- - writer
91
- - search
92
- - train
93
-
94
- # NucliaDB Migrations
95
-
96
- This module is used to manage NucliaDB Migrations.
97
-
98
- All migrations will be provided in the `migrations` folder and have a filename
99
- that follows the structure: `[sequence]_[migration name].py`.
100
- Where `sequence` is the order the migration should be run in with zero padding.
101
- Example: `0001_migrate_data.py`.
102
-
103
- Each migration should have the following:
104
-
105
- ```python
106
- from nucliadb.migrator.context import ExecutionContext
107
-
108
-
109
- async def migrate(context: ExecutionContext) -> None:
110
- """
111
- Non-kb type of migration. Migrate global data.
112
- """
113
-
114
-
115
- async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
116
- """
117
- Migrate kb.
118
-
119
- Must have both types of migrations.
120
- """
121
- ```
122
-
123
-
124
- ## How migrations are managed
125
-
126
- - All migrations utilize a distributed lock to prevent simulateously running jobs
127
- - Global migration state:
128
- - current version
129
- - target version
130
- - KBs to migrate
131
- - KB Migration State:
132
- - current version
133
-
134
- - Migrations are currently run with a deployment and will be continuously retried on failure.
135
- - Running migrations in a deployment is to make sure a migration does not prevent code deployment.