nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,475 +0,0 @@
1
- # Copyright (C) 2021 Bosutech XXI S.L.
2
- #
3
- # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
- # For commercial licensing, contact us at info@nuclia.com.
5
- #
6
- # AGPL:
7
- # This program is free software: you can redistribute it and/or modify
8
- # it under the terms of the GNU Affero General Public License as
9
- # published by the Free Software Foundation, either version 3 of the
10
- # License, or (at your option) any later version.
11
- #
12
- # This program is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
- # GNU Affero General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Affero General Public License
18
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
- #
20
-
21
- from copy import deepcopy
22
- from datetime import datetime
23
- from os.path import dirname
24
-
25
- import pytest
26
-
27
- from nucliadb.writer.api.v1.router import (
28
- KB_PREFIX,
29
- RESOURCE_PREFIX,
30
- RESOURCES_PREFIX,
31
- RSLUG_PREFIX,
32
- )
33
- from nucliadb.writer.tests.utils import load_file_as_FileB64_payload
34
- from nucliadb_models.resource import NucliaDBRoles
35
-
36
- TEST_FILE = {f"{dirname(__file__)}/orm/"}
37
- TEST_TEXT_PAYLOAD = {"body": "test1", "format": "PLAIN"}
38
- TEST_LINK_PAYLOAD = {
39
- "added": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
40
- "headers": {},
41
- "cookies": {},
42
- "uri": "http://some-link.com",
43
- "language": "en",
44
- "localstorage": {},
45
- "css_selector": "main",
46
- "xpath": "my_xpath",
47
- }
48
- TEST_KEYWORDSETS_PAYLOAD = {"keywords": [{"value": "kw1"}, {"value": "kw2"}]}
49
- TEST_DATETIMES_PAYLOAD = {"value": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")}
50
- TEST_CONVERSATION_PAYLOAD = {
51
- "messages": [
52
- {
53
- "timestamp": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
54
- "who": "Bob",
55
- "to": ["Alice", "Charlie"],
56
- "content": {
57
- "text": "Hi people!",
58
- "format": "PLAIN",
59
- "files": [
60
- load_file_as_FileB64_payload("/assets/image001.jpg", "image/jpg")
61
- ],
62
- },
63
- "ident": "message_id_001",
64
- }
65
- ]
66
- }
67
- TEST_LAYOUT_PAYLOAD = {
68
- "body": {
69
- "blocks": {
70
- "block1": {
71
- "x": 0,
72
- "y": 0,
73
- "cols": 1,
74
- "rows": 2,
75
- "type": "TITLE",
76
- "ident": "main_title",
77
- "payload": "This is a Test Title",
78
- "file": load_file_as_FileB64_payload(
79
- "/assets/image001.jpg", "image/jpg"
80
- ),
81
- }
82
- }
83
- },
84
- "format": "NUCLIAv1",
85
- }
86
-
87
- TEST_FILE_PAYLOAD = {
88
- "language": "en",
89
- "password": "xxxxxx",
90
- "file": load_file_as_FileB64_payload("/assets/image001.jpg", "image/jpg"),
91
- }
92
-
93
- TEST_EXTERNAL_FILE_PAYLOAD = {
94
- "file": {
95
- "uri": "https://mysite.com/files/myfile.pdf",
96
- "extra_headers": {"foo": "bar"},
97
- }
98
- }
99
-
100
- TEST_CONVERSATION_APPEND_MESSAGES_PAYLOAD = [
101
- {
102
- "timestamp": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
103
- "who": "Bob",
104
- "to": ["Alice", "Charlie"],
105
- "content": {
106
- "text": "Hi people!",
107
- "format": "PLAIN",
108
- "attachments": [
109
- load_file_as_FileB64_payload("/assets/image001.jpg", "image/jpg")
110
- ],
111
- },
112
- "ident": "message_id_001",
113
- }
114
- ]
115
-
116
- TEST_LAYOUT_APPEND_BLOCKS_PAYLOAD = {
117
- "block1": {
118
- "x": 0,
119
- "y": 0,
120
- "cols": 1,
121
- "rows": 2,
122
- "type": "TITLE",
123
- "ident": "main_title",
124
- "payload": "This is a Test Title",
125
- "file": load_file_as_FileB64_payload("/assets/image001.jpg", "image/jpg"),
126
- }
127
- }
128
-
129
-
130
- @pytest.mark.asyncio
131
- async def test_resource_field_add(writer_api, knowledgebox_writer):
132
- knowledgebox_id = knowledgebox_writer
133
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
134
- resp = await client.post(
135
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
136
- json={"slug": "resource1", "title": "My resource"},
137
- )
138
- assert resp.status_code == 201
139
- data = resp.json()
140
- assert "uuid" in data
141
- assert "seqid" in data
142
- rid = data["uuid"]
143
-
144
- # Text
145
- resp = await client.put(
146
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/text/text1",
147
- json=TEST_TEXT_PAYLOAD,
148
- )
149
- assert resp.status_code == 201
150
- data = resp.json()
151
- assert "seqid" in data
152
-
153
- # Link
154
- resp = await client.put(
155
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/link/link1",
156
- json=TEST_LINK_PAYLOAD,
157
- )
158
- assert resp.status_code == 201
159
- data = resp.json()
160
- assert "seqid" in data
161
-
162
- # Keywordset
163
- resp = await client.put(
164
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/keywordset/kws1",
165
- json=TEST_KEYWORDSETS_PAYLOAD,
166
- )
167
- assert resp.status_code == 201
168
- data = resp.json()
169
- assert "seqid" in data
170
-
171
- # Datetimes
172
-
173
- resp = await client.put(
174
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/datetime/date1",
175
- json=TEST_DATETIMES_PAYLOAD,
176
- )
177
- assert resp.status_code == 201
178
- data = resp.json()
179
- assert "seqid" in data
180
-
181
- # Conversation
182
- resp = await client.put(
183
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/conversation/conv1",
184
- json=TEST_CONVERSATION_PAYLOAD,
185
- )
186
-
187
- assert resp.status_code == 201
188
- data = resp.json()
189
- assert "seqid" in data
190
-
191
- # Layout
192
- resp = await client.put(
193
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/layout/layout1",
194
- json=TEST_LAYOUT_PAYLOAD,
195
- )
196
-
197
- assert resp.status_code == 201
198
- data = resp.json()
199
- assert "seqid" in data
200
-
201
- # File
202
- resp = await client.put(
203
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/file/file1",
204
- json=TEST_FILE_PAYLOAD,
205
- )
206
- assert resp.status_code == 201
207
- data = resp.json()
208
- assert "seqid" in data
209
-
210
- # File without storing it in the internal BrokerMessage, only send to process
211
- resp = await client.put(
212
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/file/file1",
213
- json=TEST_FILE_PAYLOAD,
214
- headers={"x_skip_store": "1"},
215
- )
216
- assert resp.status_code == 201
217
- data = resp.json()
218
- assert "seqid" in data
219
-
220
- # File field pointing to an externally hosted file
221
- resp = await client.put(
222
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/file/externalfile",
223
- json=TEST_EXTERNAL_FILE_PAYLOAD,
224
- )
225
- assert resp.status_code == 201
226
- data = resp.json()
227
- assert "seqid" in data
228
-
229
-
230
- @pytest.mark.asyncio
231
- async def test_resource_field_append_extra(writer_api, knowledgebox_writer):
232
- knowledgebox_id = knowledgebox_writer
233
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
234
- resp = await client.post(
235
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
236
- json={
237
- "slug": "resource1",
238
- "title": "My resource",
239
- "layouts": {"layout1": TEST_LAYOUT_PAYLOAD},
240
- "conversations": {"conv1": TEST_CONVERSATION_PAYLOAD},
241
- },
242
- )
243
- assert resp.status_code == 201
244
- data = resp.json()
245
- assert "uuid" in data
246
- assert "seqid" in data
247
- rid = data["uuid"]
248
-
249
- # Conversation
250
- resp = await client.put(
251
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/conversation/conv1/messages",
252
- json=TEST_CONVERSATION_APPEND_MESSAGES_PAYLOAD,
253
- )
254
- assert resp.status_code == 200
255
- data = resp.json()
256
- assert "seqid" in data
257
-
258
- # Layout
259
- resp = await client.put(
260
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/layout/layout1/blocks",
261
- json=TEST_LAYOUT_APPEND_BLOCKS_PAYLOAD,
262
- )
263
-
264
- assert resp.status_code == 200
265
- data = resp.json()
266
- assert "seqid" in data
267
-
268
-
269
- @pytest.mark.asyncio
270
- async def test_resource_field_delete(writer_api, knowledgebox_writer):
271
- knowledgebox_id = knowledgebox_writer
272
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
273
- resp = await client.post(
274
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
275
- json={
276
- "slug": "resource1",
277
- "title": "My resource",
278
- "texts": {"text1": TEST_TEXT_PAYLOAD},
279
- "links": {"link1": TEST_LINK_PAYLOAD},
280
- "files": {"file1": TEST_FILE_PAYLOAD},
281
- "layouts": {"layout1": TEST_LAYOUT_PAYLOAD},
282
- "conversations": {"conv1": TEST_CONVERSATION_PAYLOAD},
283
- "keywordsets": {"keywordset1": TEST_KEYWORDSETS_PAYLOAD},
284
- "datetimes": {"datetime1": TEST_DATETIMES_PAYLOAD},
285
- },
286
- )
287
-
288
- assert resp.status_code == 201
289
- data = resp.json()
290
- rid = data["uuid"]
291
-
292
- # Text
293
- resp = await client.delete(
294
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/text/text1"
295
- )
296
- assert resp.status_code == 204
297
-
298
- # Link
299
- resp = await client.delete(
300
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/link/link1"
301
- )
302
- assert resp.status_code == 204
303
-
304
- # Keywords
305
- resp = await client.delete(
306
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/keywordset/kws1"
307
- )
308
- assert resp.status_code == 204
309
-
310
- # Datetimes
311
-
312
- resp = await client.delete(
313
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/datetime/date1"
314
- )
315
- assert resp.status_code == 204
316
-
317
- # Conversation
318
- resp = await client.delete(
319
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/conversation/conv1"
320
- )
321
-
322
- # Layout
323
- resp = await client.delete(
324
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/layout/layout1"
325
- )
326
- assert resp.status_code == 204
327
-
328
- # File
329
- resp = await client.delete(
330
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/file/file1"
331
- )
332
- assert resp.status_code == 204
333
-
334
-
335
- @pytest.mark.asyncio
336
- @pytest.mark.parametrize(
337
- "endpoint,payload",
338
- [
339
- ("text/text1", TEST_TEXT_PAYLOAD),
340
- ("link/link1", TEST_LINK_PAYLOAD),
341
- ("keywordset/kws1", TEST_KEYWORDSETS_PAYLOAD),
342
- ("datetime/date1", TEST_DATETIMES_PAYLOAD),
343
- ("conversation/conv1", TEST_CONVERSATION_PAYLOAD),
344
- ("conversation/conv1/messages", TEST_CONVERSATION_APPEND_MESSAGES_PAYLOAD),
345
- ("layout/layout1", TEST_LAYOUT_PAYLOAD),
346
- ("layout/layout1/blocks", TEST_LAYOUT_APPEND_BLOCKS_PAYLOAD),
347
- ("file/file1", TEST_FILE_PAYLOAD),
348
- ],
349
- )
350
- async def test_sync_ops(writer_api, knowledgebox_writer, endpoint, payload):
351
- knowledgebox_id = knowledgebox_writer
352
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
353
- # Create a resource
354
- resp = await client.post(
355
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
356
- json={
357
- "slug": "resource1",
358
- "title": "My resource",
359
- "layouts": {"layout1": TEST_LAYOUT_PAYLOAD},
360
- "conversations": {"conv1": TEST_CONVERSATION_PAYLOAD},
361
- },
362
- )
363
- assert resp.status_code == 201
364
- data = resp.json()
365
- rid = data["uuid"]
366
-
367
- resource_path = f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}"
368
- resp = await client.put(
369
- f"{resource_path}/{endpoint}",
370
- json=payload,
371
- )
372
- assert resp.status_code in (201, 200)
373
-
374
-
375
- @pytest.mark.asyncio
376
- async def test_external_file_field(writer_api, knowledgebox_writer):
377
- knowledgebox_id = knowledgebox_writer
378
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
379
- resp = await client.post(
380
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
381
- json={"slug": "resource1", "title": "My resource"},
382
- )
383
- assert resp.status_code == 201
384
- rid = resp.json()["uuid"]
385
-
386
- # File field pointing to an externally hosted file
387
- resp = await client.put(
388
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/file/externalfile",
389
- json=TEST_EXTERNAL_FILE_PAYLOAD,
390
- )
391
- assert resp.status_code == 201
392
-
393
-
394
- @pytest.mark.asyncio
395
- async def test_file_field_validation(writer_api, knowledgebox_writer):
396
- knowledgebox_id = knowledgebox_writer
397
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
398
- resp = await client.post(
399
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCES_PREFIX}",
400
- json={"slug": "resource1", "title": "My resource"},
401
- )
402
- assert resp.status_code == 201
403
- rid = resp.json()["uuid"]
404
-
405
- # Remove a required key from the payload
406
- payload = deepcopy(TEST_FILE_PAYLOAD)
407
- payload["file"].pop("md5")
408
-
409
- resp = await client.put(
410
- f"/{KB_PREFIX}/{knowledgebox_id}/{RESOURCE_PREFIX}/{rid}/file/file1",
411
- json=payload,
412
- )
413
- assert resp.status_code == 201
414
-
415
-
416
- @pytest.mark.parametrize(
417
- "method,endpoint,payload",
418
- [
419
- ["put", "/text/{field_id}", TEST_TEXT_PAYLOAD],
420
- ["put", "/link/{field_id}", TEST_LINK_PAYLOAD],
421
- ["put", "/keywordset/{field_id}", TEST_KEYWORDSETS_PAYLOAD],
422
- ["put", "/datetime/{field_id}", TEST_DATETIMES_PAYLOAD],
423
- ["put", "/layout/{field_id}", TEST_LAYOUT_PAYLOAD],
424
- ["put", "/conversation/{field_id}", TEST_CONVERSATION_PAYLOAD],
425
- ["put", "/file/{field_id}", TEST_FILE_PAYLOAD],
426
- ["delete", "", None],
427
- ],
428
- )
429
- @pytest.mark.asyncio()
430
- async def test_field_endpoints_by_slug(
431
- writer_api,
432
- knowledgebox_ingest,
433
- method,
434
- endpoint,
435
- payload,
436
- ):
437
- async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
438
- slug = "my-resource"
439
- field_id = "myfield"
440
- field_type = "text"
441
-
442
- resp = await client.post(
443
- f"/{KB_PREFIX}/{knowledgebox_ingest}/{RESOURCES_PREFIX}",
444
- json={"slug": slug},
445
- )
446
- assert resp.status_code == 201
447
-
448
- extra_params = {}
449
- if payload is not None:
450
- extra_params["json"] = payload
451
- op = getattr(client, method)
452
-
453
- # Try first a non-existing slug should return 404
454
- url = endpoint.format(
455
- field_id=field_id,
456
- field_type=field_type,
457
- )
458
-
459
- resp = await op(
460
- f"/{KB_PREFIX}/{knowledgebox_ingest}/{RSLUG_PREFIX}/idonotexist" + url,
461
- **extra_params,
462
- )
463
- assert resp.status_code == 404
464
- assert resp.json()["detail"] == "Resource does not exist"
465
-
466
- # Try the happy path now
467
- url = endpoint.format(
468
- field_id=field_id,
469
- field_type=field_type,
470
- )
471
- resp = await op(
472
- f"/{KB_PREFIX}/{knowledgebox_ingest}/{RSLUG_PREFIX}/{slug}" + url,
473
- **extra_params,
474
- )
475
- assert str(resp.status_code).startswith("2")