nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -23,28 +23,23 @@ from typing import Optional
23
23
  import nucliadb_models as models
24
24
  from nucliadb.common.maindb.driver import Transaction
25
25
  from nucliadb.common.maindb.utils import get_driver
26
+ from nucliadb.common.models_utils import from_proto
26
27
  from nucliadb.ingest.fields.base import Field
27
28
  from nucliadb.ingest.fields.conversation import Conversation
28
29
  from nucliadb.ingest.fields.file import File
29
30
  from nucliadb.ingest.fields.link import Link
30
31
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
31
32
  from nucliadb.ingest.orm.resource import Resource as ORMResource
32
- from nucliadb_models.common import FIELD_TYPES_MAP, FieldTypeName
33
+ from nucliadb_models.common import FieldTypeName
33
34
  from nucliadb_models.resource import (
34
35
  ConversationFieldData,
35
36
  ConversationFieldExtractedData,
36
- DatetimeFieldData,
37
- DatetimeFieldExtractedData,
38
37
  Error,
39
38
  ExtractedDataType,
40
39
  ExtractedDataTypeName,
41
40
  FileFieldData,
42
41
  FileFieldExtractedData,
43
42
  GenericFieldData,
44
- KeywordsetFieldData,
45
- KeywordsetFieldExtractedData,
46
- LayoutFieldData,
47
- LayoutFieldExtractedData,
48
43
  LinkFieldData,
49
44
  LinkFieldExtractedData,
50
45
  QueueType,
@@ -55,7 +50,6 @@ from nucliadb_models.resource import (
55
50
  )
56
51
  from nucliadb_models.search import ResourceProperties
57
52
  from nucliadb_models.security import ResourceSecurity
58
- from nucliadb_models.vectors import UserVectorSet
59
53
  from nucliadb_utils.utilities import get_storage
60
54
 
61
55
 
@@ -71,41 +65,32 @@ async def set_resource_field_extracted_data(
71
65
  if ExtractedDataTypeName.TEXT in wanted_extracted_data:
72
66
  data_et = await field.get_extracted_text()
73
67
  if data_et is not None:
74
- field_data.text = models.ExtractedText.from_message(data_et)
68
+ field_data.text = from_proto.extracted_text(data_et)
75
69
 
76
70
  metadata_wanted = ExtractedDataTypeName.METADATA in wanted_extracted_data
77
- shortened_metadata_wanted = (
78
- ExtractedDataTypeName.SHORTENED_METADATA in wanted_extracted_data
79
- )
71
+ shortened_metadata_wanted = ExtractedDataTypeName.SHORTENED_METADATA in wanted_extracted_data
80
72
  if metadata_wanted or shortened_metadata_wanted:
81
73
  data_fcm = await field.get_field_metadata()
82
74
 
83
75
  if data_fcm is not None:
84
- field_data.metadata = models.FieldComputedMetadata.from_message(
76
+ field_data.metadata = from_proto.field_computed_metadata(
85
77
  data_fcm, shortened=shortened_metadata_wanted and not metadata_wanted
86
78
  )
87
79
 
88
80
  if ExtractedDataTypeName.LARGE_METADATA in wanted_extracted_data:
89
81
  data_lcm = await field.get_large_field_metadata()
90
82
  if data_lcm is not None:
91
- field_data.large_metadata = models.LargeComputedMetadata.from_message(
92
- data_lcm
93
- )
83
+ field_data.large_metadata = from_proto.large_computed_metadata(data_lcm)
94
84
 
95
85
  if ExtractedDataTypeName.VECTOR in wanted_extracted_data:
96
86
  data_vec = await field.get_vectors()
97
87
  if data_vec is not None:
98
- field_data.vectors = models.VectorObject.from_message(data_vec)
99
-
100
- if ExtractedDataTypeName.USERVECTORS in wanted_extracted_data:
101
- user_data_vec = await field.get_user_vectors()
102
- if user_data_vec is not None:
103
- field_data.uservectors = UserVectorSet.from_message(user_data_vec)
88
+ field_data.vectors = from_proto.vector_object(data_vec)
104
89
 
105
90
  if ExtractedDataTypeName.QA in wanted_extracted_data:
106
91
  qa = await field.get_question_answers()
107
92
  if qa is not None:
108
- field_data.question_answers = models.QuestionAnswers.from_message(qa)
93
+ field_data.question_answers = from_proto.field_question_answers(qa)
109
94
 
110
95
  if (
111
96
  isinstance(field, File)
@@ -114,7 +99,7 @@ async def set_resource_field_extracted_data(
114
99
  ):
115
100
  data_fed = await field.get_file_extracted_data()
116
101
  if data_fed is not None:
117
- field_data.file = models.FileExtractedData.from_message(data_fed)
102
+ field_data.file = from_proto.file_extracted_data(data_fed)
118
103
 
119
104
  if (
120
105
  isinstance(field, Link)
@@ -123,7 +108,7 @@ async def set_resource_field_extracted_data(
123
108
  ):
124
109
  data_led = await field.get_link_extracted_data()
125
110
  if data_led is not None:
126
- field_data.link = models.LinkExtractedData.from_message(data_led)
111
+ field_data.link = from_proto.link_extracted_data(data_led)
127
112
 
128
113
 
129
114
  async def serialize(
@@ -136,7 +121,7 @@ async def serialize(
136
121
  slug: Optional[str] = None,
137
122
  ) -> Optional[Resource]:
138
123
  driver = get_driver()
139
- async with driver.transaction(wait_for_abort=False, read_only=True) as txn:
124
+ async with driver.transaction(read_only=True) as txn:
140
125
  return await managed_serialize(
141
126
  txn,
142
127
  kbid,
@@ -159,9 +144,7 @@ async def managed_serialize(
159
144
  service_name: Optional[str] = None,
160
145
  slug: Optional[str] = None,
161
146
  ) -> Optional[Resource]:
162
- orm_resource = await get_orm_resource(
163
- txn, kbid, rid=rid, slug=slug, service_name=service_name
164
- )
147
+ orm_resource = await get_orm_resource(txn, kbid, rid=rid, slug=slug, service_name=service_name)
165
148
  if orm_resource is None:
166
149
  return None
167
150
 
@@ -169,9 +152,7 @@ async def managed_serialize(
169
152
 
170
153
  include_values = ResourceProperties.VALUES in show
171
154
 
172
- include_extracted_data = (
173
- ResourceProperties.EXTRACTED in show and extracted is not []
174
- )
155
+ include_extracted_data = ResourceProperties.EXTRACTED in show and extracted is not []
175
156
 
176
157
  if ResourceProperties.BASIC in show:
177
158
  await orm_resource.get_basic()
@@ -181,8 +162,8 @@ async def managed_serialize(
181
162
  resource.title = orm_resource.basic.title
182
163
  resource.summary = orm_resource.basic.summary
183
164
  resource.icon = orm_resource.basic.icon
184
- resource.layout = orm_resource.basic.layout
185
165
  resource.thumbnail = orm_resource.basic.thumbnail
166
+ resource.hidden = orm_resource.basic.hidden
186
167
  resource.created = (
187
168
  orm_resource.basic.created.ToDatetime()
188
169
  if orm_resource.basic.HasField("created")
@@ -194,49 +175,37 @@ async def managed_serialize(
194
175
  else None
195
176
  )
196
177
 
197
- resource.metadata = models.Metadata.from_message(
198
- orm_resource.basic.metadata
199
- )
200
- resource.usermetadata = models.UserMetadata.from_message(
201
- orm_resource.basic.usermetadata
202
- )
178
+ resource.metadata = from_proto.metadata(orm_resource.basic.metadata)
179
+ resource.usermetadata = from_proto.user_metadata(orm_resource.basic.usermetadata)
203
180
  resource.fieldmetadata = [
204
- models.UserFieldMetadata.from_message(fm)
205
- for fm in orm_resource.basic.fieldmetadata
181
+ from_proto.user_field_metadata(fm) for fm in orm_resource.basic.fieldmetadata
206
182
  ]
207
- resource.computedmetadata = models.ComputedMetadata.from_message(
208
- orm_resource.basic.computedmetadata
209
- )
183
+ resource.computedmetadata = from_proto.computed_metadata(orm_resource.basic.computedmetadata)
210
184
 
211
185
  resource.last_seqid = orm_resource.basic.last_seqid
212
186
 
213
187
  # 0 on the proto means it was not ever set, as first valid value for this field will allways be 1
214
188
  resource.last_account_seq = (
215
- orm_resource.basic.last_account_seq
216
- if orm_resource.basic.last_account_seq != 0
217
- else None
189
+ orm_resource.basic.last_account_seq if orm_resource.basic.last_account_seq != 0 else None
218
190
  )
219
- resource.queue = QueueType[
220
- orm_resource.basic.QueueType.Name(orm_resource.basic.queue)
221
- ]
191
+ resource.queue = QueueType[orm_resource.basic.QueueType.Name(orm_resource.basic.queue)]
222
192
 
223
193
  if ResourceProperties.RELATIONS in show:
224
194
  await orm_resource.get_relations()
225
195
  if orm_resource.relations is not None:
226
196
  resource.relations = [
227
- models.Relation.from_message(relation)
228
- for relation in orm_resource.relations.relations
197
+ from_proto.relation(relation) for relation in orm_resource.relations.relations
229
198
  ]
230
199
 
231
200
  if ResourceProperties.ORIGIN in show:
232
201
  await orm_resource.get_origin()
233
202
  if orm_resource.origin is not None:
234
- resource.origin = models.Origin.from_message(orm_resource.origin)
203
+ resource.origin = from_proto.origin(orm_resource.origin)
235
204
 
236
205
  if ResourceProperties.EXTRA in show:
237
206
  await orm_resource.get_extra()
238
207
  if orm_resource.extra is not None:
239
- resource.extra = models.Extra.from_message(orm_resource.extra)
208
+ resource.extra = from_proto.extra(orm_resource.extra)
240
209
 
241
210
  include_errors = ResourceProperties.ERRORS in show
242
211
 
@@ -247,11 +216,11 @@ async def managed_serialize(
247
216
  for gid in orm_resource.security.access_groups:
248
217
  resource.security.access_groups.append(gid)
249
218
 
250
- if field_type_filter and (include_values or include_extracted_data):
219
+ if (field_type_filter and (include_values or include_extracted_data)) or include_errors:
251
220
  await orm_resource.get_fields()
252
221
  resource.data = ResourceData()
253
- for (field_type, field_id), field in orm_resource.fields.items():
254
- field_type_name = FIELD_TYPES_MAP[field_type]
222
+ for (field_type, _), field in orm_resource.fields.items():
223
+ field_type_name = from_proto.field_type_name(field_type)
255
224
  if field_type_name not in field_type_filter:
256
225
  continue
257
226
 
@@ -266,18 +235,12 @@ async def managed_serialize(
266
235
  if field.id not in resource.data.texts:
267
236
  resource.data.texts[field.id] = TextFieldData()
268
237
  if include_value:
269
- serialized_value = (
270
- models.FieldText.from_message(value)
271
- if value is not None
272
- else None
273
- )
238
+ serialized_value = from_proto.field_text(value) if value is not None else None
274
239
  resource.data.texts[field.id].value = serialized_value
275
240
  if include_errors:
276
241
  error = await field.get_error()
277
242
  if error is not None:
278
- resource.data.texts[field.id].error = Error(
279
- body=error.error, code=error.code
280
- )
243
+ resource.data.texts[field.id].error = Error(body=error.error, code=error.code)
281
244
  if include_extracted_data:
282
245
  resource.data.texts[field.id].extracted = TextFieldExtractedData()
283
246
  await set_resource_field_extracted_data(
@@ -293,20 +256,14 @@ async def managed_serialize(
293
256
  resource.data.files[field.id] = FileFieldData()
294
257
  if include_value:
295
258
  if value is not None:
296
- resource.data.files[
297
- field.id
298
- ].value = models.FieldFile.from_message(
299
- value # type: ignore
300
- )
259
+ resource.data.files[field.id].value = from_proto.field_file(value)
301
260
  else:
302
261
  resource.data.files[field.id].value = None
303
262
 
304
263
  if include_errors:
305
264
  error = await field.get_error()
306
265
  if error is not None:
307
- resource.data.files[field.id].error = Error(
308
- body=error.error, code=error.code
309
- )
266
+ resource.data.files[field.id].error = Error(body=error.error, code=error.code)
310
267
 
311
268
  if include_extracted_data:
312
269
  resource.data.files[field.id].extracted = FileFieldExtractedData()
@@ -322,16 +279,12 @@ async def managed_serialize(
322
279
  if field.id not in resource.data.links:
323
280
  resource.data.links[field.id] = LinkFieldData()
324
281
  if include_value and value is not None:
325
- resource.data.links[field.id].value = models.FieldLink.from_message(
326
- value
327
- )
282
+ resource.data.links[field.id].value = from_proto.field_link(value)
328
283
 
329
284
  if include_errors:
330
285
  error = await field.get_error()
331
286
  if error is not None:
332
- resource.data.links[field.id].error = Error(
333
- body=error.error, code=error.code
334
- )
287
+ resource.data.links[field.id].error = Error(body=error.error, code=error.code)
335
288
 
336
289
  if include_extracted_data:
337
290
  resource.data.links[field.id].extracted = LinkFieldExtractedData()
@@ -341,33 +294,6 @@ async def managed_serialize(
341
294
  field_type_name,
342
295
  extracted,
343
296
  )
344
- elif field_type_name is FieldTypeName.LAYOUT:
345
- if resource.data.layouts is None:
346
- resource.data.layouts = {}
347
- if field.id not in resource.data.layouts:
348
- resource.data.layouts[field.id] = LayoutFieldData()
349
- if include_value:
350
- resource.data.layouts[
351
- field.id
352
- ].value = models.FieldLayout.from_message(
353
- value # type: ignore
354
- )
355
- if include_errors:
356
- error = await field.get_error()
357
- if error is not None:
358
- resource.data.layouts[field.id].error = Error(
359
- body=error.error, code=error.code
360
- )
361
- if include_extracted_data:
362
- resource.data.layouts[
363
- field.id
364
- ].extracted = LayoutFieldExtractedData()
365
- await set_resource_field_extracted_data(
366
- field,
367
- resource.data.layouts[field.id].extracted,
368
- field_type_name,
369
- extracted,
370
- )
371
297
  elif field_type_name is FieldTypeName.CONVERSATION:
372
298
  if resource.data.conversations is None:
373
299
  resource.data.conversations = {}
@@ -381,73 +307,15 @@ async def managed_serialize(
381
307
  )
382
308
  if include_value and isinstance(field, Conversation):
383
309
  value = await field.get_metadata()
384
- resource.data.conversations[
385
- field.id
386
- ].value = models.FieldConversation.from_message(value)
310
+ resource.data.conversations[field.id].value = from_proto.field_conversation(value)
387
311
  if include_extracted_data:
388
- resource.data.conversations[
389
- field.id
390
- ].extracted = ConversationFieldExtractedData()
312
+ resource.data.conversations[field.id].extracted = ConversationFieldExtractedData()
391
313
  await set_resource_field_extracted_data(
392
314
  field,
393
315
  resource.data.conversations[field.id].extracted,
394
316
  field_type_name,
395
317
  extracted,
396
318
  )
397
- elif field_type_name is FieldTypeName.DATETIME:
398
- if resource.data.datetimes is None:
399
- resource.data.datetimes = {}
400
- if field.id not in resource.data.datetimes:
401
- resource.data.datetimes[field.id] = DatetimeFieldData()
402
- if include_errors:
403
- error = await field.get_error()
404
- if error is not None:
405
- resource.data.datetimes[field.id].error = Error(
406
- body=error.error, code=error.code
407
- )
408
- if include_value:
409
- resource.data.datetimes[
410
- field.id
411
- ].value = models.FieldDatetime.from_message(
412
- value # type: ignore
413
- )
414
- if include_extracted_data:
415
- resource.data.datetimes[
416
- field.id
417
- ].extracted = DatetimeFieldExtractedData()
418
- await set_resource_field_extracted_data(
419
- field,
420
- resource.data.datetimes[field.id].extracted,
421
- field_type_name,
422
- extracted,
423
- )
424
- elif field_type_name is FieldTypeName.KEYWORDSET:
425
- if resource.data.keywordsets is None:
426
- resource.data.keywordsets = {field.id: KeywordsetFieldData()}
427
- if field.id not in resource.data.keywordsets:
428
- resource.data.keywordsets[field.id] = KeywordsetFieldData()
429
- if include_errors:
430
- error = await field.get_error()
431
- if error is not None:
432
- resource.data.keywordsets[field.id].error = Error(
433
- body=error.error, code=error.code
434
- )
435
- if include_value:
436
- resource.data.keywordsets[
437
- field.id
438
- ].value = models.FieldKeywordset.from_message(
439
- value # type: ignore
440
- )
441
- if include_extracted_data:
442
- resource.data.keywordsets[
443
- field.id
444
- ].extracted = KeywordsetFieldExtractedData()
445
- await set_resource_field_extracted_data(
446
- field,
447
- resource.data.keywordsets[field.id].extracted,
448
- field_type_name,
449
- extracted,
450
- )
451
319
  elif field_type_name is FieldTypeName.GENERIC:
452
320
  if resource.data.generics is None:
453
321
  resource.data.generics = {}
@@ -458,14 +326,10 @@ async def managed_serialize(
458
326
  if include_errors:
459
327
  error = await field.get_error()
460
328
  if error is not None:
461
- resource.data.generics[field.id].error = Error(
462
- body=error.error, code=error.code
463
- )
329
+ resource.data.generics[field.id].error = Error(body=error.error, code=error.code)
464
330
  if include_extracted_data:
465
331
  resource.data.generics[field.id].extracted = TextFieldExtractedData(
466
- text=models.ExtractedText(
467
- text=resource.data.generics[field.id].value
468
- )
332
+ text=models.ExtractedText(text=resource.data.generics[field.id].value)
469
333
  )
470
334
  return resource
471
335
 
@@ -502,6 +366,6 @@ async def get_resource_uuid_by_slug(
502
366
  ) -> Optional[str]:
503
367
  storage = await get_storage(service_name=service_name)
504
368
  driver = get_driver()
505
- async with driver.transaction() as txn:
369
+ async with driver.transaction(read_only=True) as txn:
506
370
  kb = KnowledgeBox(txn, storage, kbid)
507
371
  return await kb.get_resource_uuid_by_slug(slug)
@@ -48,9 +48,7 @@ async def start_grpc(service_name: Optional[str] = None):
48
48
 
49
49
  await server.start()
50
50
 
51
- logger.info(
52
- f"======= Ingest GRPC running on http://0.0.0.0:{settings.grpc_port}/ ======"
53
- )
51
+ logger.info(f"======= Ingest GRPC running on http://0.0.0.0:{settings.grpc_port}/ ======")
54
52
 
55
53
  async def finalizer():
56
54
  await health_check_finalizer()