nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -403
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +70 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +143 -117
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +37 -128
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -23,28 +23,23 @@ from typing import Optional
23
23
  import nucliadb_models as models
24
24
  from nucliadb.common.maindb.driver import Transaction
25
25
  from nucliadb.common.maindb.utils import get_driver
26
+ from nucliadb.common.models_utils import from_proto
26
27
  from nucliadb.ingest.fields.base import Field
27
28
  from nucliadb.ingest.fields.conversation import Conversation
28
29
  from nucliadb.ingest.fields.file import File
29
30
  from nucliadb.ingest.fields.link import Link
30
31
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
31
32
  from nucliadb.ingest.orm.resource import Resource as ORMResource
32
- from nucliadb_models.common import FIELD_TYPES_MAP, FieldTypeName
33
+ from nucliadb_models.common import FieldTypeName
33
34
  from nucliadb_models.resource import (
34
35
  ConversationFieldData,
35
36
  ConversationFieldExtractedData,
36
- DatetimeFieldData,
37
- DatetimeFieldExtractedData,
38
37
  Error,
39
38
  ExtractedDataType,
40
39
  ExtractedDataTypeName,
41
40
  FileFieldData,
42
41
  FileFieldExtractedData,
43
42
  GenericFieldData,
44
- KeywordsetFieldData,
45
- KeywordsetFieldExtractedData,
46
- LayoutFieldData,
47
- LayoutFieldExtractedData,
48
43
  LinkFieldData,
49
44
  LinkFieldExtractedData,
50
45
  QueueType,
@@ -70,36 +65,32 @@ async def set_resource_field_extracted_data(
70
65
  if ExtractedDataTypeName.TEXT in wanted_extracted_data:
71
66
  data_et = await field.get_extracted_text()
72
67
  if data_et is not None:
73
- field_data.text = models.ExtractedText.from_message(data_et)
68
+ field_data.text = from_proto.extracted_text(data_et)
74
69
 
75
70
  metadata_wanted = ExtractedDataTypeName.METADATA in wanted_extracted_data
76
- shortened_metadata_wanted = (
77
- ExtractedDataTypeName.SHORTENED_METADATA in wanted_extracted_data
78
- )
71
+ shortened_metadata_wanted = ExtractedDataTypeName.SHORTENED_METADATA in wanted_extracted_data
79
72
  if metadata_wanted or shortened_metadata_wanted:
80
73
  data_fcm = await field.get_field_metadata()
81
74
 
82
75
  if data_fcm is not None:
83
- field_data.metadata = models.FieldComputedMetadata.from_message(
76
+ field_data.metadata = from_proto.field_computed_metadata(
84
77
  data_fcm, shortened=shortened_metadata_wanted and not metadata_wanted
85
78
  )
86
79
 
87
80
  if ExtractedDataTypeName.LARGE_METADATA in wanted_extracted_data:
88
81
  data_lcm = await field.get_large_field_metadata()
89
82
  if data_lcm is not None:
90
- field_data.large_metadata = models.LargeComputedMetadata.from_message(
91
- data_lcm
92
- )
83
+ field_data.large_metadata = from_proto.large_computed_metadata(data_lcm)
93
84
 
94
85
  if ExtractedDataTypeName.VECTOR in wanted_extracted_data:
95
86
  data_vec = await field.get_vectors()
96
87
  if data_vec is not None:
97
- field_data.vectors = models.VectorObject.from_message(data_vec)
88
+ field_data.vectors = from_proto.vector_object(data_vec)
98
89
 
99
90
  if ExtractedDataTypeName.QA in wanted_extracted_data:
100
91
  qa = await field.get_question_answers()
101
92
  if qa is not None:
102
- field_data.question_answers = models.QuestionAnswers.from_message(qa)
93
+ field_data.question_answers = from_proto.field_question_answers(qa)
103
94
 
104
95
  if (
105
96
  isinstance(field, File)
@@ -108,7 +99,7 @@ async def set_resource_field_extracted_data(
108
99
  ):
109
100
  data_fed = await field.get_file_extracted_data()
110
101
  if data_fed is not None:
111
- field_data.file = models.FileExtractedData.from_message(data_fed)
102
+ field_data.file = from_proto.file_extracted_data(data_fed)
112
103
 
113
104
  if (
114
105
  isinstance(field, Link)
@@ -117,7 +108,7 @@ async def set_resource_field_extracted_data(
117
108
  ):
118
109
  data_led = await field.get_link_extracted_data()
119
110
  if data_led is not None:
120
- field_data.link = models.LinkExtractedData.from_message(data_led)
111
+ field_data.link = from_proto.link_extracted_data(data_led)
121
112
 
122
113
 
123
114
  async def serialize(
@@ -130,7 +121,7 @@ async def serialize(
130
121
  slug: Optional[str] = None,
131
122
  ) -> Optional[Resource]:
132
123
  driver = get_driver()
133
- async with driver.transaction(wait_for_abort=False, read_only=True) as txn:
124
+ async with driver.transaction(read_only=True) as txn:
134
125
  return await managed_serialize(
135
126
  txn,
136
127
  kbid,
@@ -153,9 +144,7 @@ async def managed_serialize(
153
144
  service_name: Optional[str] = None,
154
145
  slug: Optional[str] = None,
155
146
  ) -> Optional[Resource]:
156
- orm_resource = await get_orm_resource(
157
- txn, kbid, rid=rid, slug=slug, service_name=service_name
158
- )
147
+ orm_resource = await get_orm_resource(txn, kbid, rid=rid, slug=slug, service_name=service_name)
159
148
  if orm_resource is None:
160
149
  return None
161
150
 
@@ -163,9 +152,7 @@ async def managed_serialize(
163
152
 
164
153
  include_values = ResourceProperties.VALUES in show
165
154
 
166
- include_extracted_data = (
167
- ResourceProperties.EXTRACTED in show and extracted is not []
168
- )
155
+ include_extracted_data = ResourceProperties.EXTRACTED in show and extracted is not []
169
156
 
170
157
  if ResourceProperties.BASIC in show:
171
158
  await orm_resource.get_basic()
@@ -175,8 +162,8 @@ async def managed_serialize(
175
162
  resource.title = orm_resource.basic.title
176
163
  resource.summary = orm_resource.basic.summary
177
164
  resource.icon = orm_resource.basic.icon
178
- resource.layout = orm_resource.basic.layout
179
165
  resource.thumbnail = orm_resource.basic.thumbnail
166
+ resource.hidden = orm_resource.basic.hidden
180
167
  resource.created = (
181
168
  orm_resource.basic.created.ToDatetime()
182
169
  if orm_resource.basic.HasField("created")
@@ -188,49 +175,37 @@ async def managed_serialize(
188
175
  else None
189
176
  )
190
177
 
191
- resource.metadata = models.Metadata.from_message(
192
- orm_resource.basic.metadata
193
- )
194
- resource.usermetadata = models.UserMetadata.from_message(
195
- orm_resource.basic.usermetadata
196
- )
178
+ resource.metadata = from_proto.metadata(orm_resource.basic.metadata)
179
+ resource.usermetadata = from_proto.user_metadata(orm_resource.basic.usermetadata)
197
180
  resource.fieldmetadata = [
198
- models.UserFieldMetadata.from_message(fm)
199
- for fm in orm_resource.basic.fieldmetadata
181
+ from_proto.user_field_metadata(fm) for fm in orm_resource.basic.fieldmetadata
200
182
  ]
201
- resource.computedmetadata = models.ComputedMetadata.from_message(
202
- orm_resource.basic.computedmetadata
203
- )
183
+ resource.computedmetadata = from_proto.computed_metadata(orm_resource.basic.computedmetadata)
204
184
 
205
185
  resource.last_seqid = orm_resource.basic.last_seqid
206
186
 
207
187
  # 0 on the proto means it was not ever set, as first valid value for this field will allways be 1
208
188
  resource.last_account_seq = (
209
- orm_resource.basic.last_account_seq
210
- if orm_resource.basic.last_account_seq != 0
211
- else None
189
+ orm_resource.basic.last_account_seq if orm_resource.basic.last_account_seq != 0 else None
212
190
  )
213
- resource.queue = QueueType[
214
- orm_resource.basic.QueueType.Name(orm_resource.basic.queue)
215
- ]
191
+ resource.queue = QueueType[orm_resource.basic.QueueType.Name(orm_resource.basic.queue)]
216
192
 
217
193
  if ResourceProperties.RELATIONS in show:
218
194
  await orm_resource.get_relations()
219
195
  if orm_resource.relations is not None:
220
196
  resource.relations = [
221
- models.Relation.from_message(relation)
222
- for relation in orm_resource.relations.relations
197
+ from_proto.relation(relation) for relation in orm_resource.relations.relations
223
198
  ]
224
199
 
225
200
  if ResourceProperties.ORIGIN in show:
226
201
  await orm_resource.get_origin()
227
202
  if orm_resource.origin is not None:
228
- resource.origin = models.Origin.from_message(orm_resource.origin)
203
+ resource.origin = from_proto.origin(orm_resource.origin)
229
204
 
230
205
  if ResourceProperties.EXTRA in show:
231
206
  await orm_resource.get_extra()
232
207
  if orm_resource.extra is not None:
233
- resource.extra = models.Extra.from_message(orm_resource.extra)
208
+ resource.extra = from_proto.extra(orm_resource.extra)
234
209
 
235
210
  include_errors = ResourceProperties.ERRORS in show
236
211
 
@@ -241,11 +216,11 @@ async def managed_serialize(
241
216
  for gid in orm_resource.security.access_groups:
242
217
  resource.security.access_groups.append(gid)
243
218
 
244
- if field_type_filter and (include_values or include_extracted_data):
219
+ if (field_type_filter and (include_values or include_extracted_data)) or include_errors:
245
220
  await orm_resource.get_fields()
246
221
  resource.data = ResourceData()
247
- for (field_type, field_id), field in orm_resource.fields.items():
248
- field_type_name = FIELD_TYPES_MAP[field_type]
222
+ for (field_type, _), field in orm_resource.fields.items():
223
+ field_type_name = from_proto.field_type_name(field_type)
249
224
  if field_type_name not in field_type_filter:
250
225
  continue
251
226
 
@@ -260,18 +235,12 @@ async def managed_serialize(
260
235
  if field.id not in resource.data.texts:
261
236
  resource.data.texts[field.id] = TextFieldData()
262
237
  if include_value:
263
- serialized_value = (
264
- models.FieldText.from_message(value)
265
- if value is not None
266
- else None
267
- )
238
+ serialized_value = from_proto.field_text(value) if value is not None else None
268
239
  resource.data.texts[field.id].value = serialized_value
269
240
  if include_errors:
270
241
  error = await field.get_error()
271
242
  if error is not None:
272
- resource.data.texts[field.id].error = Error(
273
- body=error.error, code=error.code
274
- )
243
+ resource.data.texts[field.id].error = Error(body=error.error, code=error.code)
275
244
  if include_extracted_data:
276
245
  resource.data.texts[field.id].extracted = TextFieldExtractedData()
277
246
  await set_resource_field_extracted_data(
@@ -287,20 +256,14 @@ async def managed_serialize(
287
256
  resource.data.files[field.id] = FileFieldData()
288
257
  if include_value:
289
258
  if value is not None:
290
- resource.data.files[
291
- field.id
292
- ].value = models.FieldFile.from_message(
293
- value # type: ignore
294
- )
259
+ resource.data.files[field.id].value = from_proto.field_file(value)
295
260
  else:
296
261
  resource.data.files[field.id].value = None
297
262
 
298
263
  if include_errors:
299
264
  error = await field.get_error()
300
265
  if error is not None:
301
- resource.data.files[field.id].error = Error(
302
- body=error.error, code=error.code
303
- )
266
+ resource.data.files[field.id].error = Error(body=error.error, code=error.code)
304
267
 
305
268
  if include_extracted_data:
306
269
  resource.data.files[field.id].extracted = FileFieldExtractedData()
@@ -316,16 +279,12 @@ async def managed_serialize(
316
279
  if field.id not in resource.data.links:
317
280
  resource.data.links[field.id] = LinkFieldData()
318
281
  if include_value and value is not None:
319
- resource.data.links[field.id].value = models.FieldLink.from_message(
320
- value
321
- )
282
+ resource.data.links[field.id].value = from_proto.field_link(value)
322
283
 
323
284
  if include_errors:
324
285
  error = await field.get_error()
325
286
  if error is not None:
326
- resource.data.links[field.id].error = Error(
327
- body=error.error, code=error.code
328
- )
287
+ resource.data.links[field.id].error = Error(body=error.error, code=error.code)
329
288
 
330
289
  if include_extracted_data:
331
290
  resource.data.links[field.id].extracted = LinkFieldExtractedData()
@@ -335,33 +294,6 @@ async def managed_serialize(
335
294
  field_type_name,
336
295
  extracted,
337
296
  )
338
- elif field_type_name is FieldTypeName.LAYOUT:
339
- if resource.data.layouts is None:
340
- resource.data.layouts = {}
341
- if field.id not in resource.data.layouts:
342
- resource.data.layouts[field.id] = LayoutFieldData()
343
- if include_value:
344
- resource.data.layouts[
345
- field.id
346
- ].value = models.FieldLayout.from_message(
347
- value # type: ignore
348
- )
349
- if include_errors:
350
- error = await field.get_error()
351
- if error is not None:
352
- resource.data.layouts[field.id].error = Error(
353
- body=error.error, code=error.code
354
- )
355
- if include_extracted_data:
356
- resource.data.layouts[field.id].extracted = (
357
- LayoutFieldExtractedData()
358
- )
359
- await set_resource_field_extracted_data(
360
- field,
361
- resource.data.layouts[field.id].extracted,
362
- field_type_name,
363
- extracted,
364
- )
365
297
  elif field_type_name is FieldTypeName.CONVERSATION:
366
298
  if resource.data.conversations is None:
367
299
  resource.data.conversations = {}
@@ -375,73 +307,15 @@ async def managed_serialize(
375
307
  )
376
308
  if include_value and isinstance(field, Conversation):
377
309
  value = await field.get_metadata()
378
- resource.data.conversations[field.id].value = (
379
- models.FieldConversation.from_message(value)
380
- )
310
+ resource.data.conversations[field.id].value = from_proto.field_conversation(value)
381
311
  if include_extracted_data:
382
- resource.data.conversations[field.id].extracted = (
383
- ConversationFieldExtractedData()
384
- )
312
+ resource.data.conversations[field.id].extracted = ConversationFieldExtractedData()
385
313
  await set_resource_field_extracted_data(
386
314
  field,
387
315
  resource.data.conversations[field.id].extracted,
388
316
  field_type_name,
389
317
  extracted,
390
318
  )
391
- elif field_type_name is FieldTypeName.DATETIME:
392
- if resource.data.datetimes is None:
393
- resource.data.datetimes = {}
394
- if field.id not in resource.data.datetimes:
395
- resource.data.datetimes[field.id] = DatetimeFieldData()
396
- if include_errors:
397
- error = await field.get_error()
398
- if error is not None:
399
- resource.data.datetimes[field.id].error = Error(
400
- body=error.error, code=error.code
401
- )
402
- if include_value:
403
- resource.data.datetimes[
404
- field.id
405
- ].value = models.FieldDatetime.from_message(
406
- value # type: ignore
407
- )
408
- if include_extracted_data:
409
- resource.data.datetimes[field.id].extracted = (
410
- DatetimeFieldExtractedData()
411
- )
412
- await set_resource_field_extracted_data(
413
- field,
414
- resource.data.datetimes[field.id].extracted,
415
- field_type_name,
416
- extracted,
417
- )
418
- elif field_type_name is FieldTypeName.KEYWORDSET:
419
- if resource.data.keywordsets is None:
420
- resource.data.keywordsets = {field.id: KeywordsetFieldData()}
421
- if field.id not in resource.data.keywordsets:
422
- resource.data.keywordsets[field.id] = KeywordsetFieldData()
423
- if include_errors:
424
- error = await field.get_error()
425
- if error is not None:
426
- resource.data.keywordsets[field.id].error = Error(
427
- body=error.error, code=error.code
428
- )
429
- if include_value:
430
- resource.data.keywordsets[
431
- field.id
432
- ].value = models.FieldKeywordset.from_message(
433
- value # type: ignore
434
- )
435
- if include_extracted_data:
436
- resource.data.keywordsets[field.id].extracted = (
437
- KeywordsetFieldExtractedData()
438
- )
439
- await set_resource_field_extracted_data(
440
- field,
441
- resource.data.keywordsets[field.id].extracted,
442
- field_type_name,
443
- extracted,
444
- )
445
319
  elif field_type_name is FieldTypeName.GENERIC:
446
320
  if resource.data.generics is None:
447
321
  resource.data.generics = {}
@@ -452,14 +326,10 @@ async def managed_serialize(
452
326
  if include_errors:
453
327
  error = await field.get_error()
454
328
  if error is not None:
455
- resource.data.generics[field.id].error = Error(
456
- body=error.error, code=error.code
457
- )
329
+ resource.data.generics[field.id].error = Error(body=error.error, code=error.code)
458
330
  if include_extracted_data:
459
331
  resource.data.generics[field.id].extracted = TextFieldExtractedData(
460
- text=models.ExtractedText(
461
- text=resource.data.generics[field.id].value
462
- )
332
+ text=models.ExtractedText(text=resource.data.generics[field.id].value)
463
333
  )
464
334
  return resource
465
335
 
@@ -496,6 +366,6 @@ async def get_resource_uuid_by_slug(
496
366
  ) -> Optional[str]:
497
367
  storage = await get_storage(service_name=service_name)
498
368
  driver = get_driver()
499
- async with driver.transaction() as txn:
369
+ async with driver.transaction(read_only=True) as txn:
500
370
  kb = KnowledgeBox(txn, storage, kbid)
501
371
  return await kb.get_resource_uuid_by_slug(slug)
@@ -48,9 +48,7 @@ async def start_grpc(service_name: Optional[str] = None):
48
48
 
49
49
  await server.start()
50
50
 
51
- logger.info(
52
- f"======= Ingest GRPC running on http://0.0.0.0:{settings.grpc_port}/ ======"
53
- )
51
+ logger.info(f"======= Ingest GRPC running on http://0.0.0.0:{settings.grpc_port}/ ======")
54
52
 
55
53
  async def finalizer():
56
54
  await health_check_finalizer()