nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -402
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +64 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +114 -113
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +25 -127
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -22,26 +22,17 @@ from typing import Optional, Union
22
22
 
23
23
  from fastapi import HTTPException
24
24
  from fastapi.responses import StreamingResponse
25
- from fastapi_versioning import version # type: ignore
25
+ from fastapi_versioning import version
26
26
  from google.protobuf.json_format import MessageToDict
27
- from nucliadb_protos.knowledgebox_pb2 import Synonyms
28
- from nucliadb_protos.writer_pb2 import (
29
- GetEntitiesGroupRequest,
30
- GetEntitiesGroupResponse,
31
- GetLabelSetRequest,
32
- GetLabelSetResponse,
33
- GetLabelsRequest,
34
- GetLabelsResponse,
35
- ListEntitiesGroupsRequest,
36
- ListEntitiesGroupsResponse,
37
- )
38
27
  from starlette.requests import Request
39
28
 
40
29
  from nucliadb.common import datamanagers
41
30
  from nucliadb.common.cluster.settings import in_standalone_mode
42
31
  from nucliadb.common.context.fastapi import get_app_context
32
+ from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
43
33
  from nucliadb.common.http_clients import processing
44
34
  from nucliadb.common.maindb.utils import get_driver
35
+ from nucliadb.common.models_utils import from_proto
45
36
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
46
37
  from nucliadb.models.responses import HTTPClientError
47
38
  from nucliadb.reader import SERVICE_NAME
@@ -49,12 +40,19 @@ from nucliadb.reader.api.v1.router import KB_PREFIX, api
49
40
  from nucliadb.reader.reader.notifications import kb_notifications_stream
50
41
  from nucliadb_models.entities import (
51
42
  EntitiesGroup,
52
- EntitiesGroupSummary,
53
43
  KnowledgeBoxEntities,
54
44
  )
55
45
  from nucliadb_models.labels import KnowledgeBoxLabels, LabelSet
56
46
  from nucliadb_models.resource import NucliaDBRoles
57
47
  from nucliadb_models.synonyms import KnowledgeBoxSynonyms
48
+ from nucliadb_protos import writer_pb2
49
+ from nucliadb_protos.knowledgebox_pb2 import Synonyms
50
+ from nucliadb_protos.writer_pb2 import (
51
+ GetEntitiesGroupRequest,
52
+ GetEntitiesGroupResponse,
53
+ ListEntitiesGroupsRequest,
54
+ ListEntitiesGroupsResponse,
55
+ )
58
56
  from nucliadb_utils.authentication import requires
59
57
  from nucliadb_utils.utilities import get_ingest, get_storage
60
58
 
@@ -88,19 +86,15 @@ async def list_entities_groups(kbid: str):
88
86
  if entities_groups.status == ListEntitiesGroupsResponse.Status.OK:
89
87
  response = KnowledgeBoxEntities(uuid=kbid)
90
88
  for key, eg_summary in entities_groups.groups.items():
91
- entities_group = EntitiesGroupSummary.from_message(eg_summary)
89
+ entities_group = from_proto.entities_group_summary(eg_summary)
92
90
  response.groups[key] = entities_group
93
91
  return response
94
92
  elif entities_groups.status == ListEntitiesGroupsResponse.Status.NOTFOUND:
95
93
  raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
96
94
  elif entities_groups.status == ListEntitiesGroupsResponse.Status.ERROR:
97
- raise HTTPException(
98
- status_code=500, detail="Error while listing entities groups"
99
- )
95
+ raise HTTPException(status_code=500, detail="Error while listing entities groups")
100
96
  else:
101
- raise HTTPException(
102
- status_code=500, detail="Error on listing Knowledge box entities"
103
- )
97
+ raise HTTPException(status_code=500, detail="Error on listing Knowledge box entities")
104
98
 
105
99
 
106
100
  @api.get(
@@ -120,20 +114,14 @@ async def get_entity(request: Request, kbid: str, group: str) -> EntitiesGroup:
120
114
 
121
115
  kbobj: GetEntitiesGroupResponse = await ingest.GetEntitiesGroup(l_request) # type: ignore
122
116
  if kbobj.status == GetEntitiesGroupResponse.Status.OK:
123
- response = EntitiesGroup.from_message(kbobj.group)
117
+ response = from_proto.entities_group(kbobj.group)
124
118
  return response
125
119
  elif kbobj.status == GetEntitiesGroupResponse.Status.KB_NOT_FOUND:
126
- raise HTTPException(
127
- status_code=404, detail=f"Knowledge Box '{kbid}' does not exist"
128
- )
120
+ raise HTTPException(status_code=404, detail=f"Knowledge Box '{kbid}' does not exist")
129
121
  elif kbobj.status == GetEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND:
130
- raise HTTPException(
131
- status_code=404, detail=f"Entities group '{group}' does not exist"
132
- )
122
+ raise HTTPException(status_code=404, detail=f"Entities group '{group}' does not exist")
133
123
  else:
134
- raise HTTPException(
135
- status_code=500, detail="Error on getting entities group on a Knowledge box"
136
- )
124
+ raise HTTPException(status_code=500, detail="Error on getting entities group on a Knowledge box")
137
125
 
138
126
 
139
127
  @api.get(
@@ -145,30 +133,29 @@ async def get_entity(request: Request, kbid: str, group: str) -> EntitiesGroup:
145
133
  )
146
134
  @requires(NucliaDBRoles.READER)
147
135
  @version(1)
148
- async def get_labelsets(request: Request, kbid: str) -> KnowledgeBoxLabels:
149
- ingest = get_ingest()
150
- l_request: GetLabelsRequest = GetLabelsRequest()
151
- l_request.kb.uuid = kbid
136
+ async def get_labelsets_endoint(request: Request, kbid: str) -> KnowledgeBoxLabels:
137
+ try:
138
+ return await get_labelsets(kbid)
139
+ except KnowledgeBoxNotFound:
140
+ raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
141
+
152
142
 
153
- kbobj: GetLabelsResponse = await ingest.GetLabels(l_request) # type: ignore
154
- if kbobj.status == GetLabelsResponse.Status.OK:
155
- response = KnowledgeBoxLabels(uuid=kbid)
156
- for labelset, labelset_data in kbobj.labels.labelset.items():
157
- labelset_response = LabelSet(
158
- **MessageToDict(
159
- labelset_data,
160
- preserving_proto_field_name=True,
161
- including_default_value_fields=True,
162
- )
143
+ async def get_labelsets(kbid: str) -> KnowledgeBoxLabels:
144
+ kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
145
+ if not kb_exists:
146
+ raise KnowledgeBoxNotFound()
147
+ labelsets: writer_pb2.Labels = await datamanagers.atomic.labelset.get_all(kbid=kbid)
148
+ response = KnowledgeBoxLabels(uuid=kbid)
149
+ for labelset, labelset_data in labelsets.labelset.items():
150
+ labelset_response = LabelSet(
151
+ **MessageToDict(
152
+ labelset_data,
153
+ preserving_proto_field_name=True,
154
+ including_default_value_fields=True,
163
155
  )
164
- response.labelsets[labelset] = labelset_response
165
- return response
166
- elif kbobj.status == GetLabelsResponse.Status.NOTFOUND:
167
- raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
168
- else:
169
- raise HTTPException(
170
- status_code=500, detail="Error on getting Knowledge box labels"
171
156
  )
157
+ response.labelsets[labelset] = labelset_response
158
+ return response
172
159
 
173
160
 
174
161
  @api.get(
@@ -180,28 +167,31 @@ async def get_labelsets(request: Request, kbid: str) -> KnowledgeBoxLabels:
180
167
  )
181
168
  @requires(NucliaDBRoles.READER)
182
169
  @version(1)
183
- async def get_labelset(request: Request, kbid: str, labelset: str) -> LabelSet:
184
- ingest = get_ingest()
185
- l_request: GetLabelSetRequest = GetLabelSetRequest()
186
- l_request.kb.uuid = kbid
187
- l_request.labelset = labelset
170
+ async def get_labelset_endpoint(request: Request, kbid: str, labelset: str) -> LabelSet:
171
+ try:
172
+ return await get_labelset(kbid, labelset)
173
+ except KnowledgeBoxNotFound:
174
+ raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
175
+
188
176
 
189
- kbobj: GetLabelSetResponse = await ingest.GetLabelSet(l_request) # type: ignore
190
- if kbobj.status == GetLabelSetResponse.Status.OK:
177
+ async def get_labelset(kbid: str, labelset_id: str) -> LabelSet:
178
+ kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
179
+ if not kb_exists:
180
+ raise KnowledgeBoxNotFound()
181
+ labelset: Optional[writer_pb2.LabelSet] = await datamanagers.atomic.labelset.get(
182
+ kbid=kbid, labelset_id=labelset_id
183
+ )
184
+ if labelset is None:
185
+ response = LabelSet()
186
+ else:
191
187
  response = LabelSet(
192
188
  **MessageToDict(
193
- kbobj.labelset,
189
+ labelset,
194
190
  preserving_proto_field_name=True,
195
191
  including_default_value_fields=True,
196
192
  )
197
193
  )
198
- return response
199
- elif kbobj.status == GetLabelSetResponse.Status.NOTFOUND:
200
- raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
201
- else:
202
- raise HTTPException(
203
- status_code=500, detail="Error on getting labelset on a Knowledge box"
204
- )
194
+ return response
205
195
 
206
196
 
207
197
  @api.get(
@@ -215,15 +205,10 @@ async def get_labelset(request: Request, kbid: str, labelset: str) -> LabelSet:
215
205
  @requires(NucliaDBRoles.READER)
216
206
  @version(1)
217
207
  async def get_custom_synonyms(request: Request, kbid: str):
218
- async with datamanagers.with_transaction(read_only=True) as txn:
219
- if not datamanagers.kb.exists_kb(txn, kbid=kbid):
220
- raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
221
- synonyms = await datamanagers.synonyms.get(txn, kbid=kbid)
222
-
223
- if synonyms is None:
224
- synonyms = Synonyms()
225
-
226
- return KnowledgeBoxSynonyms.from_message(synonyms)
208
+ if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
209
+ raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
210
+ synonyms = await datamanagers.atomic.synonyms.get(kbid=kbid) or Synonyms()
211
+ return from_proto.kb_synonyms(synonyms)
227
212
 
228
213
 
229
214
  @api.get(
@@ -262,7 +247,7 @@ async def notifications_endpoint(
262
247
 
263
248
 
264
249
  async def exists_kb(kbid: str) -> bool:
265
- async with datamanagers.with_transaction(read_only=True) as txn:
250
+ async with datamanagers.with_ro_transaction() as txn:
266
251
  return await datamanagers.kb.exists_kb(txn, kbid=kbid)
267
252
 
268
253
 
@@ -290,14 +275,12 @@ async def processing_status(
290
275
  return HTTPClientError(status_code=404, detail="Knowledge Box not found")
291
276
 
292
277
  async with processing.ProcessingHTTPClient() as client:
293
- results = await client.requests(
294
- cursor=cursor, scheduled=scheduled, kbid=kbid, limit=limit
295
- )
278
+ results = await client.requests(cursor=cursor, scheduled=scheduled, kbid=kbid, limit=limit)
296
279
 
297
280
  storage = await get_storage(service_name=SERVICE_NAME)
298
281
  driver = get_driver()
299
282
 
300
- async with driver.transaction(wait_for_abort=False, read_only=True) as txn:
283
+ async with driver.transaction(read_only=True) as txn:
301
284
  kb = KnowledgeBox(txn, storage, kbid)
302
285
 
303
286
  max_simultaneous = asyncio.Semaphore(10)
@@ -319,9 +302,7 @@ async def processing_status(
319
302
 
320
303
  result_items = [
321
304
  item
322
- for item in await asyncio.gather(
323
- *[_composition(result) for result in results.results]
324
- )
305
+ for item in await asyncio.gather(*[_composition(result) for result in results.results])
325
306
  if item is not None
326
307
  ]
327
308
 
nucliadb/reader/app.py CHANGED
@@ -18,63 +18,46 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- import pkg_resources
21
+ import importlib.metadata
22
+
22
23
  from fastapi import FastAPI
23
24
  from starlette.middleware import Middleware
24
25
  from starlette.middleware.authentication import AuthenticationMiddleware
25
- from starlette.middleware.cors import CORSMiddleware
26
26
  from starlette.requests import ClientDisconnect, Request
27
27
  from starlette.responses import HTMLResponse
28
28
 
29
- from nucliadb.common.context.fastapi import set_app_context
30
29
  from nucliadb.reader import API_PREFIX
31
30
  from nucliadb.reader.api.v1.router import api as api_v1
32
- from nucliadb.reader.lifecycle import finalize, initialize
31
+ from nucliadb.reader.lifecycle import lifespan
33
32
  from nucliadb_telemetry import errors
34
33
  from nucliadb_telemetry.fastapi.utils import (
35
34
  client_disconnect_handler,
36
35
  global_exception_handler,
37
36
  )
38
- from nucliadb_utils import const
37
+ from nucliadb_utils.audit.stream import AuditMiddleware
39
38
  from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
40
39
  from nucliadb_utils.fastapi.openapi import extend_openapi
41
40
  from nucliadb_utils.fastapi.versioning import VersionedFastAPI
42
- from nucliadb_utils.settings import http_settings, running_settings
43
- from nucliadb_utils.utilities import has_feature
41
+ from nucliadb_utils.settings import running_settings
42
+ from nucliadb_utils.utilities import get_audit
44
43
 
45
44
  middleware = []
46
-
47
- if has_feature(const.Features.CORS_MIDDLEWARE, default=False):
48
- middleware.append(
49
- Middleware(
50
- CORSMiddleware,
51
- allow_origins=http_settings.cors_origins,
52
- allow_methods=["*"],
53
- # Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
54
- # Browsers already showing deprecation notices, so it needs to be specified explicitly
55
- allow_headers=["*", "Authorization"],
56
- )
57
- )
58
-
59
45
  middleware.extend(
60
46
  [
61
47
  Middleware(
62
48
  AuthenticationMiddleware,
63
49
  backend=NucliaCloudAuthenticationBackend(),
64
- )
50
+ ),
51
+ Middleware(AuditMiddleware, audit_utility_getter=get_audit),
65
52
  ]
66
53
  )
67
54
 
68
- errors.setup_error_handling(pkg_resources.get_distribution("nucliadb").version)
69
-
70
- on_startup = [initialize]
71
- on_shutdown = [finalize]
55
+ errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
72
56
 
73
57
  fastapi_settings = dict(
74
58
  debug=running_settings.debug,
75
59
  middleware=middleware,
76
- on_startup=on_startup,
77
- on_shutdown=on_shutdown,
60
+ lifespan=lifespan,
78
61
  exception_handlers={
79
62
  Exception: global_exception_handler,
80
63
  ClientDisconnect: client_disconnect_handler,
@@ -104,7 +87,7 @@ def create_application() -> FastAPI:
104
87
  # Use raw starlette routes to avoid unnecessary overhead
105
88
  application.add_route("/", homepage)
106
89
 
107
- # Inject application context into the fastapi app's state
108
- set_app_context(application)
90
+ # # Inject application context into the fastapi app's state
91
+ # set_app_context(application)
109
92
 
110
93
  return application
@@ -17,20 +17,34 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ from contextlib import asynccontextmanager
21
+
22
+ from fastapi import FastAPI
23
+
24
+ from nucliadb.common.context.fastapi import inject_app_context
20
25
  from nucliadb.ingest.utils import start_ingest, stop_ingest
21
26
  from nucliadb.reader import SERVICE_NAME
22
27
  from nucliadb_telemetry.utils import clean_telemetry, setup_telemetry
23
- from nucliadb_utils.utilities import start_audit_utility, stop_audit_utility
28
+ from nucliadb_utils.utilities import (
29
+ get_storage,
30
+ start_audit_utility,
31
+ stop_audit_utility,
32
+ teardown_storage,
33
+ )
24
34
 
25
35
 
26
- async def initialize() -> None:
36
+ @asynccontextmanager
37
+ async def lifespan(app: FastAPI):
27
38
  await setup_telemetry(SERVICE_NAME)
28
-
39
+ await get_storage(service_name=SERVICE_NAME)
29
40
  await start_ingest(SERVICE_NAME)
30
41
  await start_audit_utility(SERVICE_NAME)
31
42
 
43
+ # Inject application context into the fastapi app's state
44
+ async with inject_app_context(app):
45
+ yield
32
46
 
33
- async def finalize() -> None:
34
47
  await stop_ingest()
35
48
  await stop_audit_utility()
49
+ await teardown_storage()
36
50
  await clean_telemetry(SERVICE_NAME)
File without changes
@@ -59,9 +59,7 @@ RESOURCE_OP_PB_TO_MODEL = {
59
59
  }
60
60
 
61
61
 
62
- async def kb_notifications_stream(
63
- context: ApplicationContext, kbid: str
64
- ) -> AsyncGenerator[bytes, None]:
62
+ async def kb_notifications_stream(context: ApplicationContext, kbid: str) -> AsyncGenerator[bytes, None]:
65
63
  """
66
64
  Returns an async generator that yields pubsub notifications for the given kbid.
67
65
  The generator will return after NOTIFICATIONS_TIMEOUT_S seconds.
@@ -70,13 +68,9 @@ async def kb_notifications_stream(
70
68
  resource_cache: dict[str, str] = {}
71
69
  async with async_timeout.timeout(NOTIFICATIONS_TIMEOUT_S):
72
70
  async for pb_notification in kb_notifications(kbid):
73
- notification = await serialize_notification(
74
- context, pb_notification, resource_cache
75
- )
71
+ notification = await serialize_notification(context, pb_notification, resource_cache)
76
72
  line = encode_streamed_notification(notification) + b"\n"
77
- logger.debug(
78
- f"Sending notification: {notification.type}", extra={"kbid": kbid}
79
- )
73
+ logger.debug(f"Sending notification: {notification.type}", extra={"kbid": kbid})
80
74
  yield line
81
75
  except asyncio.TimeoutError:
82
76
  return
@@ -106,9 +100,7 @@ async def kb_notifications(kbid: str) -> AsyncGenerator[writer_pb2.Notification,
106
100
  except asyncio.QueueFull: # pragma: no cover
107
101
  logger.warning("Queue is full, dropping notification", extra={"kbid": kbid})
108
102
 
109
- async with managed_subscription(
110
- pubsub, key=subscription_key, handler=subscription_handler
111
- ):
103
+ async with managed_subscription(pubsub, key=subscription_key, handler=subscription_handler):
112
104
  try:
113
105
  while True:
114
106
  notification: writer_pb2.Notification = await queue.get()
@@ -121,9 +113,7 @@ async def kb_notifications(kbid: str) -> AsyncGenerator[writer_pb2.Notification,
121
113
  )
122
114
  except Exception as ex:
123
115
  capture_exception(ex)
124
- logger.error(
125
- "Error while streaming activity", exc_info=True, extra={"kbid": kbid}
126
- )
116
+ logger.error("Error while streaming activity", exc_info=True, extra={"kbid": kbid})
127
117
  return
128
118
 
129
119
 
@@ -144,9 +134,7 @@ async def managed_subscription(pubsub: PubSubDriver, key: str, handler: Callback
144
134
  try:
145
135
  await pubsub.unsubscribe(key=key, subscription_id=subscription_id)
146
136
  except Exception: # pragma: no cover
147
- logger.warning(
148
- "Error while unsubscribing from activity stream", exc_info=True
149
- )
137
+ logger.warning("Error while unsubscribing from activity stream", exc_info=True)
150
138
 
151
139
 
152
140
  async def serialize_notification(
@@ -156,9 +144,7 @@ async def serialize_notification(
156
144
  resource_uuid = pb.uuid
157
145
  seqid = pb.seqid
158
146
 
159
- resource_title = await get_resource_title_cached(
160
- context.kv_driver, kbid, resource_uuid, cache
161
- )
147
+ resource_title = await get_resource_title_cached(context.kv_driver, kbid, resource_uuid, cache)
162
148
  if pb.action == writer_pb2.Notification.Action.INDEXED:
163
149
  return ResourceIndexedNotification(
164
150
  data=ResourceIndexed(
@@ -213,18 +199,14 @@ async def get_resource_title_cached(
213
199
  return resource_title
214
200
 
215
201
 
216
- async def get_resource_title(
217
- kv_driver: Driver, kbid: str, resource_uuid: str
218
- ) -> Optional[str]:
202
+ async def get_resource_title(kv_driver: Driver, kbid: str, resource_uuid: str) -> Optional[str]:
219
203
  async with kv_driver.transaction(read_only=True) as txn:
220
- basic = await datamanagers.resources.get_basic(
221
- txn, kbid=kbid, rid=resource_uuid
222
- )
204
+ basic = await datamanagers.resources.get_basic(txn, kbid=kbid, rid=resource_uuid)
223
205
  if basic is None:
224
206
  return None
225
207
  return basic.title
226
208
 
227
209
 
228
210
  def encode_streamed_notification(notification: Notification) -> bytes:
229
- encoded_nofication = notification.json().encode("utf-8")
211
+ encoded_nofication = notification.model_dump_json().encode("utf-8")
230
212
  return encoded_nofication
@@ -34,9 +34,7 @@ API_PREFIX = "api"
34
34
  class EndpointFilter(logging.Filter):
35
35
  def filter(self, record: logging.LogRecord) -> bool:
36
36
  return (
37
- record.args is not None
38
- and len(record.args) >= 3
39
- and record.args[2] not in ("/", "/metrics") # type: ignore
37
+ record.args is not None and len(record.args) >= 3 and record.args[2] not in ("/", "/metrics") # type: ignore
40
38
  )
41
39
 
42
40
 
@@ -18,7 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  from . import ask # noqa
21
- from . import chat # noqa
21
+ from . import catalog # noqa
22
22
  from . import feedback # noqa
23
23
  from . import find # noqa
24
24
  from . import knowledgebox # noqa
@@ -27,6 +27,5 @@ from . import search # noqa
27
27
  from . import suggest # noqa
28
28
  from . import summarize # noqa
29
29
  from .resource import ask as ask_resource # noqa
30
- from .resource import chat as chat_resource # noqa
31
30
  from .resource import search as search_resource # noqa
32
31
  from .router import api # noqa
@@ -25,7 +25,10 @@ from starlette.responses import StreamingResponse
25
25
 
26
26
  from nucliadb.models.responses import HTTPClientError
27
27
  from nucliadb.search.api.v1.router import KB_PREFIX, api
28
+ from nucliadb.search.search import cache
28
29
  from nucliadb.search.search.chat.ask import AskResult, ask, handled_ask_exceptions
30
+ from nucliadb.search.search.chat.exceptions import AnswerJsonSchemaTooLong
31
+ from nucliadb.search.search.utils import maybe_log_request_payload
29
32
  from nucliadb_models.resource import NucliaDBRoles
30
33
  from nucliadb_models.search import (
31
34
  AskRequest,
@@ -43,8 +46,6 @@ from nucliadb_utils.authentication import requires
43
46
  description="Ask questions on a Knowledge Box",
44
47
  tags=["Search"],
45
48
  response_model=SyncAskResponse,
46
- # Add this to OpenAPI schema when endpoint is not in beta anymore
47
- include_in_schema=False,
48
49
  )
49
50
  @requires(NucliaDBRoles.READER)
50
51
  @version(1)
@@ -76,15 +77,21 @@ async def create_ask_response(
76
77
  x_synchronous: bool,
77
78
  resource: Optional[str] = None,
78
79
  ) -> Response:
80
+ maybe_log_request_payload(kbid, "/ask", ask_request)
79
81
  ask_request.max_tokens = parse_max_tokens(ask_request.max_tokens)
80
- ask_result: AskResult = await ask(
81
- kbid=kbid,
82
- ask_request=ask_request,
83
- user_id=user_id,
84
- client_type=client_type,
85
- origin=origin,
86
- resource=resource,
87
- )
82
+ with cache.request_caches():
83
+ try:
84
+ ask_result: AskResult = await ask(
85
+ kbid=kbid,
86
+ ask_request=ask_request,
87
+ user_id=user_id,
88
+ client_type=client_type,
89
+ origin=origin,
90
+ resource=resource,
91
+ )
92
+ except AnswerJsonSchemaTooLong as err:
93
+ return HTTPClientError(status_code=400, detail=str(err))
94
+
88
95
  headers = {
89
96
  "NUCLIA-LEARNING-ID": ask_result.nuclia_learning_id or "unknown",
90
97
  "Access-Control-Expose-Headers": "NUCLIA-LEARNING-ID",