nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/reader/app.py CHANGED
@@ -18,76 +18,46 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- import pkg_resources
21
+ import importlib.metadata
22
+
22
23
  from fastapi import FastAPI
23
- from fastapi.responses import JSONResponse
24
24
  from starlette.middleware import Middleware
25
25
  from starlette.middleware.authentication import AuthenticationMiddleware
26
- from starlette.middleware.cors import CORSMiddleware
27
26
  from starlette.requests import ClientDisconnect, Request
28
27
  from starlette.responses import HTMLResponse
29
28
 
30
- from nucliadb.common.context.fastapi import set_app_context
31
29
  from nucliadb.reader import API_PREFIX
32
30
  from nucliadb.reader.api.v1.router import api as api_v1
33
- from nucliadb.reader.lifecycle import finalize, initialize
31
+ from nucliadb.reader.lifecycle import lifespan
34
32
  from nucliadb_telemetry import errors
35
- from nucliadb_utils import const
33
+ from nucliadb_telemetry.fastapi.utils import (
34
+ client_disconnect_handler,
35
+ global_exception_handler,
36
+ )
37
+ from nucliadb_utils.audit.stream import AuditMiddleware
36
38
  from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
37
39
  from nucliadb_utils.fastapi.openapi import extend_openapi
38
40
  from nucliadb_utils.fastapi.versioning import VersionedFastAPI
39
- from nucliadb_utils.settings import http_settings, running_settings
40
- from nucliadb_utils.utilities import has_feature
41
+ from nucliadb_utils.settings import running_settings
42
+ from nucliadb_utils.utilities import get_audit
41
43
 
42
44
  middleware = []
43
-
44
- if has_feature(const.Features.CORS_MIDDLEWARE, default=False):
45
- middleware.append(
46
- Middleware(
47
- CORSMiddleware,
48
- allow_origins=http_settings.cors_origins,
49
- allow_methods=["*"],
50
- # Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
51
- # Browsers already showing deprecation notices, so it needs to be specified explicitly
52
- allow_headers=["*", "Authorization"],
53
- )
54
- )
55
-
56
45
  middleware.extend(
57
46
  [
58
47
  Middleware(
59
48
  AuthenticationMiddleware,
60
49
  backend=NucliaCloudAuthenticationBackend(),
61
- )
50
+ ),
51
+ Middleware(AuditMiddleware, audit_utility_getter=get_audit),
62
52
  ]
63
53
  )
64
54
 
65
- errors.setup_error_handling(pkg_resources.get_distribution("nucliadb").version)
66
-
67
- on_startup = [initialize]
68
- on_shutdown = [finalize]
69
-
70
-
71
- async def global_exception_handler(request: Request, exc: Exception):
72
- errors.capture_exception(exc)
73
- return JSONResponse(
74
- status_code=500,
75
- content={"detail": "Something went wrong, please contact your administrator"},
76
- )
77
-
78
-
79
- async def client_disconnect_handler(request: Request, exc: ClientDisconnect):
80
- return JSONResponse(
81
- status_code=200,
82
- content={"detail": "Client disconnected while an operation was in course"},
83
- )
84
-
55
+ errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
85
56
 
86
57
  fastapi_settings = dict(
87
58
  debug=running_settings.debug,
88
59
  middleware=middleware,
89
- on_startup=on_startup,
90
- on_shutdown=on_shutdown,
60
+ lifespan=lifespan,
91
61
  exception_handlers={
92
62
  Exception: global_exception_handler,
93
63
  ClientDisconnect: client_disconnect_handler,
@@ -117,7 +87,7 @@ def create_application() -> FastAPI:
117
87
  # Use raw starlette routes to avoid unnecessary overhead
118
88
  application.add_route("/", homepage)
119
89
 
120
- # Inject application context into the fastapi app's state
121
- set_app_context(application)
90
+ # # Inject application context into the fastapi app's state
91
+ # set_app_context(application)
122
92
 
123
93
  return application
@@ -17,20 +17,34 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ from contextlib import asynccontextmanager
21
+
22
+ from fastapi import FastAPI
23
+
24
+ from nucliadb.common.context.fastapi import inject_app_context
20
25
  from nucliadb.ingest.utils import start_ingest, stop_ingest
21
26
  from nucliadb.reader import SERVICE_NAME
22
27
  from nucliadb_telemetry.utils import clean_telemetry, setup_telemetry
23
- from nucliadb_utils.utilities import start_audit_utility, stop_audit_utility
28
+ from nucliadb_utils.utilities import (
29
+ get_storage,
30
+ start_audit_utility,
31
+ stop_audit_utility,
32
+ teardown_storage,
33
+ )
24
34
 
25
35
 
26
- async def initialize() -> None:
36
+ @asynccontextmanager
37
+ async def lifespan(app: FastAPI):
27
38
  await setup_telemetry(SERVICE_NAME)
28
-
39
+ await get_storage(service_name=SERVICE_NAME)
29
40
  await start_ingest(SERVICE_NAME)
30
41
  await start_audit_utility(SERVICE_NAME)
31
42
 
43
+ # Inject application context into the fastapi app's state
44
+ async with inject_app_context(app):
45
+ yield
32
46
 
33
- async def finalize() -> None:
34
47
  await stop_ingest()
35
48
  await stop_audit_utility()
49
+ await teardown_storage()
36
50
  await clean_telemetry(SERVICE_NAME)
File without changes
@@ -59,9 +59,7 @@ RESOURCE_OP_PB_TO_MODEL = {
59
59
  }
60
60
 
61
61
 
62
- async def kb_notifications_stream(
63
- context: ApplicationContext, kbid: str
64
- ) -> AsyncGenerator[bytes, None]:
62
+ async def kb_notifications_stream(context: ApplicationContext, kbid: str) -> AsyncGenerator[bytes, None]:
65
63
  """
66
64
  Returns an async generator that yields pubsub notifications for the given kbid.
67
65
  The generator will return after NOTIFICATIONS_TIMEOUT_S seconds.
@@ -70,13 +68,9 @@ async def kb_notifications_stream(
70
68
  resource_cache: dict[str, str] = {}
71
69
  async with async_timeout.timeout(NOTIFICATIONS_TIMEOUT_S):
72
70
  async for pb_notification in kb_notifications(kbid):
73
- notification = await serialize_notification(
74
- context, pb_notification, resource_cache
75
- )
71
+ notification = await serialize_notification(context, pb_notification, resource_cache)
76
72
  line = encode_streamed_notification(notification) + b"\n"
77
- logger.debug(
78
- f"Sending notification: {notification.type}", extra={"kbid": kbid}
79
- )
73
+ logger.debug(f"Sending notification: {notification.type}", extra={"kbid": kbid})
80
74
  yield line
81
75
  except asyncio.TimeoutError:
82
76
  return
@@ -101,17 +95,12 @@ async def kb_notifications(kbid: str) -> AsyncGenerator[writer_pb2.Notification,
101
95
  data = pubsub.parse(raw_data)
102
96
  notification = writer_pb2.Notification()
103
97
  notification.ParseFromString(data)
104
- # We don't need the whole broker message, so we clear it to
105
- # save space, as it can potentially be very big
106
- notification.ClearField("message")
107
98
  try:
108
99
  queue.put_nowait(notification)
109
100
  except asyncio.QueueFull: # pragma: no cover
110
101
  logger.warning("Queue is full, dropping notification", extra={"kbid": kbid})
111
102
 
112
- async with managed_subscription(
113
- pubsub, key=subscription_key, handler=subscription_handler
114
- ):
103
+ async with managed_subscription(pubsub, key=subscription_key, handler=subscription_handler):
115
104
  try:
116
105
  while True:
117
106
  notification: writer_pb2.Notification = await queue.get()
@@ -124,9 +113,7 @@ async def kb_notifications(kbid: str) -> AsyncGenerator[writer_pb2.Notification,
124
113
  )
125
114
  except Exception as ex:
126
115
  capture_exception(ex)
127
- logger.error(
128
- "Error while streaming activity", exc_info=True, extra={"kbid": kbid}
129
- )
116
+ logger.error("Error while streaming activity", exc_info=True, extra={"kbid": kbid})
130
117
  return
131
118
 
132
119
 
@@ -147,9 +134,7 @@ async def managed_subscription(pubsub: PubSubDriver, key: str, handler: Callback
147
134
  try:
148
135
  await pubsub.unsubscribe(key=key, subscription_id=subscription_id)
149
136
  except Exception: # pragma: no cover
150
- logger.warning(
151
- "Error while unsubscribing from activity stream", exc_info=True
152
- )
137
+ logger.warning("Error while unsubscribing from activity stream", exc_info=True)
153
138
 
154
139
 
155
140
  async def serialize_notification(
@@ -159,9 +144,7 @@ async def serialize_notification(
159
144
  resource_uuid = pb.uuid
160
145
  seqid = pb.seqid
161
146
 
162
- resource_title = await get_resource_title_cached(
163
- context.kv_driver, kbid, resource_uuid, cache
164
- )
147
+ resource_title = await get_resource_title_cached(context.kv_driver, kbid, resource_uuid, cache)
165
148
  if pb.action == writer_pb2.Notification.Action.INDEXED:
166
149
  return ResourceIndexedNotification(
167
150
  data=ResourceIndexed(
@@ -216,18 +199,14 @@ async def get_resource_title_cached(
216
199
  return resource_title
217
200
 
218
201
 
219
- async def get_resource_title(
220
- kv_driver: Driver, kbid: str, resource_uuid: str
221
- ) -> Optional[str]:
202
+ async def get_resource_title(kv_driver: Driver, kbid: str, resource_uuid: str) -> Optional[str]:
222
203
  async with kv_driver.transaction(read_only=True) as txn:
223
- basic = await datamanagers.resources.get_resource_basic(
224
- txn, kbid=kbid, rid=resource_uuid
225
- )
204
+ basic = await datamanagers.resources.get_basic(txn, kbid=kbid, rid=resource_uuid)
226
205
  if basic is None:
227
206
  return None
228
207
  return basic.title
229
208
 
230
209
 
231
210
  def encode_streamed_notification(notification: Notification) -> bytes:
232
- encoded_nofication = notification.json().encode("utf-8")
211
+ encoded_nofication = notification.model_dump_json().encode("utf-8")
233
212
  return encoded_nofication
@@ -34,9 +34,7 @@ API_PREFIX = "api"
34
34
  class EndpointFilter(logging.Filter):
35
35
  def filter(self, record: logging.LogRecord) -> bool:
36
36
  return (
37
- record.args is not None
38
- and len(record.args) >= 3
39
- and record.args[2] not in ("/", "/metrics") # type: ignore
37
+ record.args is not None and len(record.args) >= 3 and record.args[2] not in ("/", "/metrics") # type: ignore
40
38
  )
41
39
 
42
40
 
@@ -17,7 +17,8 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from . import chat # noqa
20
+ from . import ask # noqa
21
+ from . import catalog # noqa
21
22
  from . import feedback # noqa
22
23
  from . import find # noqa
23
24
  from . import knowledgebox # noqa
@@ -26,6 +27,5 @@ from . import search # noqa
26
27
  from . import suggest # noqa
27
28
  from . import summarize # noqa
28
29
  from .resource import ask as ask_resource # noqa
29
- from .resource import chat as chat_resource # noqa
30
30
  from .resource import search as search_resource # noqa
31
31
  from .router import api # noqa
@@ -0,0 +1,112 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ from typing import Optional, Union
21
+
22
+ from fastapi import Header, Request, Response
23
+ from fastapi_versioning import version
24
+ from starlette.responses import StreamingResponse
25
+
26
+ from nucliadb.models.responses import HTTPClientError
27
+ from nucliadb.search.api.v1.router import KB_PREFIX, api
28
+ from nucliadb.search.search import cache
29
+ from nucliadb.search.search.chat.ask import AskResult, ask, handled_ask_exceptions
30
+ from nucliadb.search.search.chat.exceptions import AnswerJsonSchemaTooLong
31
+ from nucliadb.search.search.utils import maybe_log_request_payload
32
+ from nucliadb_models.resource import NucliaDBRoles
33
+ from nucliadb_models.search import (
34
+ AskRequest,
35
+ NucliaDBClientType,
36
+ SyncAskResponse,
37
+ parse_max_tokens,
38
+ )
39
+ from nucliadb_utils.authentication import requires
40
+
41
+
42
+ @api.post(
43
+ f"/{KB_PREFIX}/{{kbid}}/ask",
44
+ status_code=200,
45
+ summary="Ask Knowledge Box",
46
+ description="Ask questions on a Knowledge Box",
47
+ tags=["Search"],
48
+ response_model=SyncAskResponse,
49
+ )
50
+ @requires(NucliaDBRoles.READER)
51
+ @version(1)
52
+ async def ask_knowledgebox_endpoint(
53
+ request: Request,
54
+ kbid: str,
55
+ item: AskRequest,
56
+ x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
57
+ x_nucliadb_user: str = Header(""),
58
+ x_forwarded_for: str = Header(""),
59
+ x_synchronous: bool = Header(
60
+ default=False,
61
+ description="When set to true, outputs response as JSON in a non-streaming way. "
62
+ "This is slower and requires waiting for entire answer to be ready.",
63
+ ),
64
+ ) -> Union[StreamingResponse, HTTPClientError, Response]:
65
+ return await create_ask_response(
66
+ kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for, x_synchronous
67
+ )
68
+
69
+
70
+ @handled_ask_exceptions
71
+ async def create_ask_response(
72
+ kbid: str,
73
+ ask_request: AskRequest,
74
+ user_id: str,
75
+ client_type: NucliaDBClientType,
76
+ origin: str,
77
+ x_synchronous: bool,
78
+ resource: Optional[str] = None,
79
+ ) -> Response:
80
+ maybe_log_request_payload(kbid, "/ask", ask_request)
81
+ ask_request.max_tokens = parse_max_tokens(ask_request.max_tokens)
82
+ with cache.request_caches():
83
+ try:
84
+ ask_result: AskResult = await ask(
85
+ kbid=kbid,
86
+ ask_request=ask_request,
87
+ user_id=user_id,
88
+ client_type=client_type,
89
+ origin=origin,
90
+ resource=resource,
91
+ )
92
+ except AnswerJsonSchemaTooLong as err:
93
+ return HTTPClientError(status_code=400, detail=str(err))
94
+
95
+ headers = {
96
+ "NUCLIA-LEARNING-ID": ask_result.nuclia_learning_id or "unknown",
97
+ "Access-Control-Expose-Headers": "NUCLIA-LEARNING-ID",
98
+ }
99
+ if x_synchronous:
100
+ return Response(
101
+ content=await ask_result.json(),
102
+ status_code=200,
103
+ headers=headers,
104
+ media_type="application/json",
105
+ )
106
+ else:
107
+ return StreamingResponse(
108
+ content=ask_result.ndjson_stream(),
109
+ status_code=200,
110
+ headers=headers,
111
+ media_type="application/x-ndjson",
112
+ )
@@ -0,0 +1,184 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+ from time import time
21
+ from typing import Optional, Union
22
+
23
+ from fastapi import Request, Response
24
+ from fastapi_versioning import version
25
+
26
+ from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
27
+ from nucliadb.common.maindb.pg import PGDriver
28
+ from nucliadb.common.maindb.utils import get_driver
29
+ from nucliadb.models.responses import HTTPClientError
30
+ from nucliadb.search import logger
31
+ from nucliadb.search.api.v1.router import KB_PREFIX, api
32
+ from nucliadb.search.api.v1.utils import fastapi_query
33
+ from nucliadb.search.search import cache
34
+ from nucliadb.search.search.exceptions import InvalidQueryError
35
+ from nucliadb.search.search.merge import fetch_resources
36
+ from nucliadb.search.search.pgcatalog import pgcatalog_search
37
+ from nucliadb.search.search.query_parser.parser import parse_catalog
38
+ from nucliadb.search.search.utils import (
39
+ maybe_log_request_payload,
40
+ )
41
+ from nucliadb_models.common import FieldTypeName
42
+ from nucliadb_models.metadata import ResourceProcessingStatus
43
+ from nucliadb_models.resource import NucliaDBRoles
44
+ from nucliadb_models.search import (
45
+ CatalogRequest,
46
+ CatalogResponse,
47
+ KnowledgeboxSearchResults,
48
+ ResourceProperties,
49
+ SearchParamDefaults,
50
+ SortField,
51
+ SortOptions,
52
+ SortOrder,
53
+ )
54
+ from nucliadb_models.utils import DateTime
55
+ from nucliadb_utils.authentication import requires
56
+ from nucliadb_utils.exceptions import LimitsExceededError
57
+
58
+
59
+ @api.get(
60
+ f"/{KB_PREFIX}/{{kbid}}/catalog",
61
+ status_code=200,
62
+ summary="List resources of a Knowledge Box",
63
+ description="List resources of a Knowledge Box",
64
+ response_model=KnowledgeboxSearchResults,
65
+ response_model_exclude_unset=True,
66
+ tags=["Search"],
67
+ )
68
+ @requires(NucliaDBRoles.READER)
69
+ @version(1)
70
+ async def catalog_get(
71
+ request: Request,
72
+ response: Response,
73
+ kbid: str,
74
+ query: str = fastapi_query(SearchParamDefaults.query),
75
+ filters: list[str] = fastapi_query(SearchParamDefaults.filters),
76
+ faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
77
+ sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
78
+ sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
79
+ sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
80
+ page_number: int = fastapi_query(SearchParamDefaults.catalog_page_number),
81
+ page_size: int = fastapi_query(SearchParamDefaults.catalog_page_size),
82
+ shards: list[str] = fastapi_query(SearchParamDefaults.shards, deprecated=True),
83
+ with_status: Optional[ResourceProcessingStatus] = fastapi_query(
84
+ SearchParamDefaults.with_status, deprecated="Use filters instead"
85
+ ),
86
+ debug: bool = fastapi_query(SearchParamDefaults.debug, include_in_schema=False),
87
+ range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
88
+ range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
89
+ range_modification_start: Optional[DateTime] = fastapi_query(
90
+ SearchParamDefaults.range_modification_start
91
+ ),
92
+ range_modification_end: Optional[DateTime] = fastapi_query(
93
+ SearchParamDefaults.range_modification_end
94
+ ),
95
+ hidden: Optional[bool] = fastapi_query(SearchParamDefaults.hidden),
96
+ ) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
97
+ item = CatalogRequest(
98
+ query=query,
99
+ filters=filters,
100
+ faceted=faceted,
101
+ page_number=page_number,
102
+ page_size=page_size,
103
+ shards=shards,
104
+ debug=debug,
105
+ with_status=with_status,
106
+ range_creation_start=range_creation_start,
107
+ range_creation_end=range_creation_end,
108
+ range_modification_start=range_modification_start,
109
+ range_modification_end=range_modification_end,
110
+ hidden=hidden,
111
+ )
112
+ if sort_field:
113
+ item.sort = SortOptions(field=sort_field, limit=sort_limit, order=sort_order)
114
+ return await catalog(kbid, item)
115
+
116
+
117
+ @api.post(
118
+ f"/{KB_PREFIX}/{{kbid}}/catalog",
119
+ status_code=200,
120
+ summary="List resources of a Knowledge Box",
121
+ description="List resources of a Knowledge Box",
122
+ response_model=KnowledgeboxSearchResults,
123
+ response_model_exclude_unset=True,
124
+ tags=["Search"],
125
+ )
126
+ @requires(NucliaDBRoles.READER)
127
+ @version(1)
128
+ async def catalog_post(
129
+ request: Request,
130
+ kbid: str,
131
+ item: CatalogRequest,
132
+ ) -> Union[CatalogResponse, HTTPClientError]:
133
+ return await catalog(kbid, item)
134
+
135
+
136
+ async def catalog(
137
+ kbid: str,
138
+ item: CatalogRequest,
139
+ ):
140
+ """
141
+ Catalog endpoint is a simplified version of the search endpoint, it only
142
+ returns bm25 results on titles and it does not support vector search.
143
+ It is useful for listing resources in a knowledge box.
144
+ """
145
+ if not pgcatalog_enabled(): # pragma: no cover
146
+ return HTTPClientError(status_code=501, detail="PG driver is needed for catalog search")
147
+
148
+ maybe_log_request_payload(kbid, "/catalog", item)
149
+ start_time = time()
150
+ try:
151
+ with cache.request_caches():
152
+ query_parser = parse_catalog(kbid, item)
153
+
154
+ catalog_results = CatalogResponse()
155
+ catalog_results.fulltext = await pgcatalog_search(query_parser)
156
+ catalog_results.resources = await fetch_resources(
157
+ resources=[r.rid for r in catalog_results.fulltext.results],
158
+ kbid=kbid,
159
+ show=[ResourceProperties.BASIC, ResourceProperties.ERRORS],
160
+ field_type_filter=list(FieldTypeName),
161
+ extracted=[],
162
+ )
163
+ return catalog_results
164
+ except InvalidQueryError as exc:
165
+ return HTTPClientError(status_code=412, detail=str(exc))
166
+ except KnowledgeBoxNotFound:
167
+ return HTTPClientError(status_code=404, detail="Knowledge Box not found")
168
+ except LimitsExceededError as exc:
169
+ return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
170
+ finally:
171
+ duration = time() - start_time
172
+ if duration > 2: # pragma: no cover
173
+ logger.warning(
174
+ "Slow catalog request",
175
+ extra={
176
+ "kbid": kbid,
177
+ "duration": duration,
178
+ "query": item.model_dump_json(),
179
+ },
180
+ )
181
+
182
+
183
+ def pgcatalog_enabled():
184
+ return isinstance(get_driver(), PGDriver)
@@ -18,24 +18,24 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
-
22
21
  from fastapi import Header, Request, Response
23
22
  from fastapi_versioning import version
24
23
 
24
+ from nucliadb.common.models_utils import to_proto
25
25
  from nucliadb.models.responses import HTTPClientError
26
- from nucliadb.search import logger, predict
26
+ from nucliadb.search import logger
27
27
  from nucliadb.search.api.v1.router import KB_PREFIX, api
28
- from nucliadb.search.utilities import get_predict
29
28
  from nucliadb_models.resource import NucliaDBRoles
30
29
  from nucliadb_models.search import FeedbackRequest, NucliaDBClientType
31
30
  from nucliadb_telemetry import errors
32
31
  from nucliadb_utils.authentication import requires
32
+ from nucliadb_utils.utilities import get_audit
33
33
 
34
34
 
35
35
  @api.post(
36
36
  f"/{KB_PREFIX}/{{kbid}}/feedback",
37
37
  status_code=200,
38
- name="Send Feedback",
38
+ summary="Send Feedback",
39
39
  description="Send feedback for a search operation in a Knowledge Box",
40
40
  tags=["Search"],
41
41
  )
@@ -51,28 +51,20 @@ async def send_feedback_endpoint(
51
51
  x_forwarded_for: str = Header(""),
52
52
  ):
53
53
  try:
54
- return await send_feedback(
55
- kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for
56
- )
57
- except predict.ProxiedPredictAPIError as err:
58
- return HTTPClientError(
59
- status_code=503,
60
- detail=f"Feedback service unavailable. {err.status}: {err.detail}",
61
- )
54
+ audit = get_audit()
55
+ if audit is not None:
56
+ audit.feedback(
57
+ kbid=kbid,
58
+ user=x_nucliadb_user,
59
+ client_type=to_proto.client_type(x_ndb_client),
60
+ origin=x_forwarded_for,
61
+ learning_id=item.ident,
62
+ good=item.good,
63
+ task=to_proto.feedback_task(item.task),
64
+ feedback=item.feedback,
65
+ text_block_id=item.text_block_id,
66
+ )
62
67
  except Exception as ex:
63
68
  errors.capture_exception(ex)
64
69
  logger.exception("Unexpected error sending feedback", extra={"kbid": kbid})
65
70
  return HTTPClientError(status_code=500, detail=f"Internal server error")
66
-
67
-
68
- async def send_feedback(
69
- kbid: str,
70
- item: FeedbackRequest,
71
- x_nucliadb_user: str,
72
- x_ndb_client: NucliaDBClientType,
73
- x_forwarded_for: str,
74
- ):
75
- predict = get_predict()
76
- await predict.send_feedback(
77
- kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for
78
- )