nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. migrations/0002_rollover_shards.py +1 -2
  2. migrations/0003_allfields_key.py +2 -37
  3. migrations/0004_rollover_shards.py +1 -2
  4. migrations/0005_rollover_shards.py +1 -2
  5. migrations/0006_rollover_shards.py +2 -4
  6. migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
  7. migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
  8. migrations/0010_fix_corrupt_indexes.py +11 -12
  9. migrations/0011_materialize_labelset_ids.py +2 -18
  10. migrations/0012_rollover_shards.py +6 -12
  11. migrations/0013_rollover_shards.py +2 -4
  12. migrations/0014_rollover_shards.py +5 -7
  13. migrations/0015_targeted_rollover.py +6 -12
  14. migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
  15. migrations/0017_multiple_writable_shards.py +3 -6
  16. migrations/0018_purge_orphan_kbslugs.py +59 -0
  17. migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
  18. migrations/0020_drain_nodes_from_cluster.py +83 -0
  19. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
  20. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  21. migrations/0023_backfill_pg_catalog.py +80 -0
  22. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  23. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  24. migrations/0027_rollover_texts3.py +73 -0
  25. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  26. migrations/pg/0002_catalog.py +42 -0
  27. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  28. nucliadb/common/cluster/base.py +41 -24
  29. nucliadb/common/cluster/discovery/base.py +6 -14
  30. nucliadb/common/cluster/discovery/k8s.py +9 -19
  31. nucliadb/common/cluster/discovery/manual.py +1 -3
  32. nucliadb/common/cluster/discovery/single.py +1 -2
  33. nucliadb/common/cluster/discovery/utils.py +1 -3
  34. nucliadb/common/cluster/grpc_node_dummy.py +11 -16
  35. nucliadb/common/cluster/index_node.py +10 -19
  36. nucliadb/common/cluster/manager.py +223 -102
  37. nucliadb/common/cluster/rebalance.py +42 -37
  38. nucliadb/common/cluster/rollover.py +377 -204
  39. nucliadb/common/cluster/settings.py +16 -9
  40. nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
  41. nucliadb/common/cluster/standalone/index_node.py +4 -11
  42. nucliadb/common/cluster/standalone/service.py +2 -6
  43. nucliadb/common/cluster/standalone/utils.py +9 -6
  44. nucliadb/common/cluster/utils.py +43 -29
  45. nucliadb/common/constants.py +20 -0
  46. nucliadb/common/context/__init__.py +6 -4
  47. nucliadb/common/context/fastapi.py +8 -5
  48. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  49. nucliadb/common/datamanagers/__init__.py +24 -5
  50. nucliadb/common/datamanagers/atomic.py +102 -0
  51. nucliadb/common/datamanagers/cluster.py +5 -5
  52. nucliadb/common/datamanagers/entities.py +6 -16
  53. nucliadb/common/datamanagers/fields.py +84 -0
  54. nucliadb/common/datamanagers/kb.py +101 -24
  55. nucliadb/common/datamanagers/labels.py +26 -56
  56. nucliadb/common/datamanagers/processing.py +2 -6
  57. nucliadb/common/datamanagers/resources.py +214 -117
  58. nucliadb/common/datamanagers/rollover.py +77 -16
  59. nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
  60. nucliadb/common/datamanagers/utils.py +19 -11
  61. nucliadb/common/datamanagers/vectorsets.py +110 -0
  62. nucliadb/common/external_index_providers/base.py +257 -0
  63. nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
  64. nucliadb/common/external_index_providers/manager.py +101 -0
  65. nucliadb/common/external_index_providers/pinecone.py +933 -0
  66. nucliadb/common/external_index_providers/settings.py +52 -0
  67. nucliadb/common/http_clients/auth.py +3 -6
  68. nucliadb/common/http_clients/processing.py +6 -11
  69. nucliadb/common/http_clients/utils.py +1 -3
  70. nucliadb/common/ids.py +240 -0
  71. nucliadb/common/locking.py +43 -13
  72. nucliadb/common/maindb/driver.py +11 -35
  73. nucliadb/common/maindb/exceptions.py +6 -6
  74. nucliadb/common/maindb/local.py +22 -9
  75. nucliadb/common/maindb/pg.py +206 -111
  76. nucliadb/common/maindb/utils.py +13 -44
  77. nucliadb/common/models_utils/from_proto.py +479 -0
  78. nucliadb/common/models_utils/to_proto.py +60 -0
  79. nucliadb/common/nidx.py +260 -0
  80. nucliadb/export_import/datamanager.py +25 -19
  81. nucliadb/export_import/exceptions.py +8 -0
  82. nucliadb/export_import/exporter.py +20 -7
  83. nucliadb/export_import/importer.py +6 -11
  84. nucliadb/export_import/models.py +5 -5
  85. nucliadb/export_import/tasks.py +4 -4
  86. nucliadb/export_import/utils.py +94 -54
  87. nucliadb/health.py +1 -3
  88. nucliadb/ingest/app.py +15 -11
  89. nucliadb/ingest/consumer/auditing.py +30 -147
  90. nucliadb/ingest/consumer/consumer.py +96 -52
  91. nucliadb/ingest/consumer/materializer.py +10 -12
  92. nucliadb/ingest/consumer/pull.py +12 -27
  93. nucliadb/ingest/consumer/service.py +20 -19
  94. nucliadb/ingest/consumer/shard_creator.py +7 -14
  95. nucliadb/ingest/consumer/utils.py +1 -3
  96. nucliadb/ingest/fields/base.py +139 -188
  97. nucliadb/ingest/fields/conversation.py +18 -5
  98. nucliadb/ingest/fields/exceptions.py +1 -4
  99. nucliadb/ingest/fields/file.py +7 -25
  100. nucliadb/ingest/fields/link.py +11 -16
  101. nucliadb/ingest/fields/text.py +9 -4
  102. nucliadb/ingest/orm/brain.py +255 -262
  103. nucliadb/ingest/orm/broker_message.py +181 -0
  104. nucliadb/ingest/orm/entities.py +36 -51
  105. nucliadb/ingest/orm/exceptions.py +12 -0
  106. nucliadb/ingest/orm/knowledgebox.py +334 -278
  107. nucliadb/ingest/orm/processor/__init__.py +2 -697
  108. nucliadb/ingest/orm/processor/auditing.py +117 -0
  109. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  110. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  111. nucliadb/ingest/orm/processor/processor.py +752 -0
  112. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  113. nucliadb/ingest/orm/resource.py +280 -520
  114. nucliadb/ingest/orm/utils.py +25 -31
  115. nucliadb/ingest/partitions.py +3 -9
  116. nucliadb/ingest/processing.py +76 -81
  117. nucliadb/ingest/py.typed +0 -0
  118. nucliadb/ingest/serialize.py +37 -173
  119. nucliadb/ingest/service/__init__.py +1 -3
  120. nucliadb/ingest/service/writer.py +186 -577
  121. nucliadb/ingest/settings.py +13 -22
  122. nucliadb/ingest/utils.py +3 -6
  123. nucliadb/learning_proxy.py +264 -51
  124. nucliadb/metrics_exporter.py +30 -19
  125. nucliadb/middleware/__init__.py +1 -3
  126. nucliadb/migrator/command.py +1 -3
  127. nucliadb/migrator/datamanager.py +13 -13
  128. nucliadb/migrator/migrator.py +57 -37
  129. nucliadb/migrator/settings.py +2 -1
  130. nucliadb/migrator/utils.py +18 -10
  131. nucliadb/purge/__init__.py +139 -33
  132. nucliadb/purge/orphan_shards.py +7 -13
  133. nucliadb/reader/__init__.py +1 -3
  134. nucliadb/reader/api/models.py +3 -14
  135. nucliadb/reader/api/v1/__init__.py +0 -1
  136. nucliadb/reader/api/v1/download.py +27 -94
  137. nucliadb/reader/api/v1/export_import.py +4 -4
  138. nucliadb/reader/api/v1/knowledgebox.py +13 -13
  139. nucliadb/reader/api/v1/learning_config.py +8 -12
  140. nucliadb/reader/api/v1/resource.py +67 -93
  141. nucliadb/reader/api/v1/services.py +70 -125
  142. nucliadb/reader/app.py +16 -46
  143. nucliadb/reader/lifecycle.py +18 -4
  144. nucliadb/reader/py.typed +0 -0
  145. nucliadb/reader/reader/notifications.py +10 -31
  146. nucliadb/search/__init__.py +1 -3
  147. nucliadb/search/api/v1/__init__.py +2 -2
  148. nucliadb/search/api/v1/ask.py +112 -0
  149. nucliadb/search/api/v1/catalog.py +184 -0
  150. nucliadb/search/api/v1/feedback.py +17 -25
  151. nucliadb/search/api/v1/find.py +41 -41
  152. nucliadb/search/api/v1/knowledgebox.py +90 -62
  153. nucliadb/search/api/v1/predict_proxy.py +2 -2
  154. nucliadb/search/api/v1/resource/ask.py +66 -117
  155. nucliadb/search/api/v1/resource/search.py +51 -72
  156. nucliadb/search/api/v1/router.py +1 -0
  157. nucliadb/search/api/v1/search.py +50 -197
  158. nucliadb/search/api/v1/suggest.py +40 -54
  159. nucliadb/search/api/v1/summarize.py +9 -5
  160. nucliadb/search/api/v1/utils.py +2 -1
  161. nucliadb/search/app.py +16 -48
  162. nucliadb/search/lifecycle.py +10 -3
  163. nucliadb/search/predict.py +176 -188
  164. nucliadb/search/py.typed +0 -0
  165. nucliadb/search/requesters/utils.py +41 -63
  166. nucliadb/search/search/cache.py +149 -20
  167. nucliadb/search/search/chat/ask.py +918 -0
  168. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
  169. nucliadb/search/search/chat/images.py +41 -17
  170. nucliadb/search/search/chat/prompt.py +851 -282
  171. nucliadb/search/search/chat/query.py +274 -267
  172. nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
  173. nucliadb/search/search/fetch.py +43 -36
  174. nucliadb/search/search/filters.py +9 -15
  175. nucliadb/search/search/find.py +214 -54
  176. nucliadb/search/search/find_merge.py +408 -391
  177. nucliadb/search/search/hydrator.py +191 -0
  178. nucliadb/search/search/merge.py +198 -234
  179. nucliadb/search/search/metrics.py +73 -2
  180. nucliadb/search/search/paragraphs.py +64 -106
  181. nucliadb/search/search/pgcatalog.py +233 -0
  182. nucliadb/search/search/predict_proxy.py +1 -1
  183. nucliadb/search/search/query.py +386 -257
  184. nucliadb/search/search/query_parser/exceptions.py +22 -0
  185. nucliadb/search/search/query_parser/models.py +101 -0
  186. nucliadb/search/search/query_parser/parser.py +183 -0
  187. nucliadb/search/search/rank_fusion.py +204 -0
  188. nucliadb/search/search/rerankers.py +270 -0
  189. nucliadb/search/search/shards.py +4 -38
  190. nucliadb/search/search/summarize.py +14 -18
  191. nucliadb/search/search/utils.py +27 -4
  192. nucliadb/search/settings.py +15 -1
  193. nucliadb/standalone/api_router.py +4 -10
  194. nucliadb/standalone/app.py +17 -14
  195. nucliadb/standalone/auth.py +7 -21
  196. nucliadb/standalone/config.py +9 -12
  197. nucliadb/standalone/introspect.py +5 -5
  198. nucliadb/standalone/lifecycle.py +26 -25
  199. nucliadb/standalone/migrations.py +58 -0
  200. nucliadb/standalone/purge.py +9 -8
  201. nucliadb/standalone/py.typed +0 -0
  202. nucliadb/standalone/run.py +25 -18
  203. nucliadb/standalone/settings.py +10 -14
  204. nucliadb/standalone/versions.py +15 -5
  205. nucliadb/tasks/consumer.py +8 -12
  206. nucliadb/tasks/producer.py +7 -6
  207. nucliadb/tests/config.py +53 -0
  208. nucliadb/train/__init__.py +1 -3
  209. nucliadb/train/api/utils.py +1 -2
  210. nucliadb/train/api/v1/shards.py +2 -2
  211. nucliadb/train/api/v1/trainset.py +4 -6
  212. nucliadb/train/app.py +14 -47
  213. nucliadb/train/generator.py +10 -19
  214. nucliadb/train/generators/field_classifier.py +7 -19
  215. nucliadb/train/generators/field_streaming.py +156 -0
  216. nucliadb/train/generators/image_classifier.py +12 -18
  217. nucliadb/train/generators/paragraph_classifier.py +5 -9
  218. nucliadb/train/generators/paragraph_streaming.py +6 -9
  219. nucliadb/train/generators/question_answer_streaming.py +19 -20
  220. nucliadb/train/generators/sentence_classifier.py +9 -15
  221. nucliadb/train/generators/token_classifier.py +45 -36
  222. nucliadb/train/generators/utils.py +14 -18
  223. nucliadb/train/lifecycle.py +7 -3
  224. nucliadb/train/nodes.py +23 -32
  225. nucliadb/train/py.typed +0 -0
  226. nucliadb/train/servicer.py +13 -21
  227. nucliadb/train/settings.py +2 -6
  228. nucliadb/train/types.py +13 -10
  229. nucliadb/train/upload.py +3 -6
  230. nucliadb/train/uploader.py +20 -25
  231. nucliadb/train/utils.py +1 -1
  232. nucliadb/writer/__init__.py +1 -3
  233. nucliadb/writer/api/constants.py +0 -5
  234. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  235. nucliadb/writer/api/v1/export_import.py +102 -49
  236. nucliadb/writer/api/v1/field.py +196 -620
  237. nucliadb/writer/api/v1/knowledgebox.py +221 -71
  238. nucliadb/writer/api/v1/learning_config.py +2 -2
  239. nucliadb/writer/api/v1/resource.py +114 -216
  240. nucliadb/writer/api/v1/services.py +64 -132
  241. nucliadb/writer/api/v1/slug.py +61 -0
  242. nucliadb/writer/api/v1/transaction.py +67 -0
  243. nucliadb/writer/api/v1/upload.py +184 -215
  244. nucliadb/writer/app.py +11 -61
  245. nucliadb/writer/back_pressure.py +62 -43
  246. nucliadb/writer/exceptions.py +0 -4
  247. nucliadb/writer/lifecycle.py +21 -15
  248. nucliadb/writer/py.typed +0 -0
  249. nucliadb/writer/resource/audit.py +2 -1
  250. nucliadb/writer/resource/basic.py +48 -62
  251. nucliadb/writer/resource/field.py +45 -135
  252. nucliadb/writer/resource/origin.py +1 -2
  253. nucliadb/writer/settings.py +14 -5
  254. nucliadb/writer/tus/__init__.py +17 -15
  255. nucliadb/writer/tus/azure.py +111 -0
  256. nucliadb/writer/tus/dm.py +17 -5
  257. nucliadb/writer/tus/exceptions.py +1 -3
  258. nucliadb/writer/tus/gcs.py +56 -84
  259. nucliadb/writer/tus/local.py +21 -37
  260. nucliadb/writer/tus/s3.py +28 -68
  261. nucliadb/writer/tus/storage.py +5 -56
  262. nucliadb/writer/vectorsets.py +125 -0
  263. nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
  264. nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
  265. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
  266. nucliadb/common/maindb/redis.py +0 -194
  267. nucliadb/common/maindb/tikv.py +0 -412
  268. nucliadb/ingest/fields/layout.py +0 -58
  269. nucliadb/ingest/tests/conftest.py +0 -30
  270. nucliadb/ingest/tests/fixtures.py +0 -771
  271. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  272. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
  273. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
  274. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  275. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  276. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  277. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
  278. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  279. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  280. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  281. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
  282. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  283. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  284. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
  285. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  286. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  287. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  288. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  289. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
  290. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  291. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  292. nucliadb/middleware/transaction.py +0 -117
  293. nucliadb/reader/api/v1/learning_collector.py +0 -63
  294. nucliadb/reader/tests/__init__.py +0 -19
  295. nucliadb/reader/tests/conftest.py +0 -31
  296. nucliadb/reader/tests/fixtures.py +0 -136
  297. nucliadb/reader/tests/test_list_resources.py +0 -75
  298. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  299. nucliadb/reader/tests/test_reader_resource.py +0 -379
  300. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  301. nucliadb/search/api/v1/chat.py +0 -258
  302. nucliadb/search/api/v1/resource/chat.py +0 -94
  303. nucliadb/search/tests/__init__.py +0 -19
  304. nucliadb/search/tests/conftest.py +0 -33
  305. nucliadb/search/tests/fixtures.py +0 -199
  306. nucliadb/search/tests/node.py +0 -465
  307. nucliadb/search/tests/unit/__init__.py +0 -18
  308. nucliadb/search/tests/unit/api/__init__.py +0 -19
  309. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  310. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  311. nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
  312. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
  313. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  314. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  315. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
  316. nucliadb/search/tests/unit/search/__init__.py +0 -18
  317. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  318. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
  319. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  320. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  321. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  322. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
  323. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  324. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  325. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  326. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  327. nucliadb/search/tests/unit/search/test_query.py +0 -201
  328. nucliadb/search/tests/unit/test_app.py +0 -79
  329. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  330. nucliadb/search/tests/unit/test_merge.py +0 -34
  331. nucliadb/search/tests/unit/test_predict.py +0 -584
  332. nucliadb/standalone/tests/__init__.py +0 -19
  333. nucliadb/standalone/tests/conftest.py +0 -33
  334. nucliadb/standalone/tests/fixtures.py +0 -38
  335. nucliadb/standalone/tests/unit/__init__.py +0 -18
  336. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  337. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  338. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  339. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  340. nucliadb/tests/benchmarks/__init__.py +0 -19
  341. nucliadb/tests/benchmarks/test_search.py +0 -99
  342. nucliadb/tests/conftest.py +0 -32
  343. nucliadb/tests/fixtures.py +0 -736
  344. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
  345. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
  346. nucliadb/tests/migrations/__init__.py +0 -19
  347. nucliadb/tests/migrations/test_migration_0017.py +0 -80
  348. nucliadb/tests/tikv.py +0 -240
  349. nucliadb/tests/unit/__init__.py +0 -19
  350. nucliadb/tests/unit/common/__init__.py +0 -19
  351. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  352. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  353. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
  354. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  355. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
  356. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
  357. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
  358. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
  359. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
  360. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  361. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  362. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  363. nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
  364. nucliadb/tests/unit/common/test_context.py +0 -36
  365. nucliadb/tests/unit/export_import/__init__.py +0 -19
  366. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  367. nucliadb/tests/unit/export_import/test_utils.py +0 -294
  368. nucliadb/tests/unit/migrator/__init__.py +0 -19
  369. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  370. nucliadb/tests/unit/tasks/__init__.py +0 -19
  371. nucliadb/tests/unit/tasks/conftest.py +0 -42
  372. nucliadb/tests/unit/tasks/test_consumer.py +0 -93
  373. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  374. nucliadb/tests/unit/tasks/test_tasks.py +0 -60
  375. nucliadb/tests/unit/test_field_ids.py +0 -49
  376. nucliadb/tests/unit/test_health.py +0 -84
  377. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  378. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  379. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  380. nucliadb/tests/unit/test_purge.py +0 -138
  381. nucliadb/tests/utils/__init__.py +0 -74
  382. nucliadb/tests/utils/aiohttp_session.py +0 -44
  383. nucliadb/tests/utils/broker_messages/__init__.py +0 -167
  384. nucliadb/tests/utils/broker_messages/fields.py +0 -181
  385. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  386. nucliadb/tests/utils/entities.py +0 -78
  387. nucliadb/train/api/v1/check.py +0 -60
  388. nucliadb/train/tests/__init__.py +0 -19
  389. nucliadb/train/tests/conftest.py +0 -29
  390. nucliadb/train/tests/fixtures.py +0 -342
  391. nucliadb/train/tests/test_field_classification.py +0 -122
  392. nucliadb/train/tests/test_get_entities.py +0 -80
  393. nucliadb/train/tests/test_get_info.py +0 -51
  394. nucliadb/train/tests/test_get_ontology.py +0 -34
  395. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  396. nucliadb/train/tests/test_image_classification.py +0 -222
  397. nucliadb/train/tests/test_list_fields.py +0 -39
  398. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  399. nucliadb/train/tests/test_list_resources.py +0 -39
  400. nucliadb/train/tests/test_list_sentences.py +0 -71
  401. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  402. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  403. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  404. nucliadb/train/tests/test_sentence_classification.py +0 -143
  405. nucliadb/train/tests/test_token_classification.py +0 -136
  406. nucliadb/train/tests/utils.py +0 -108
  407. nucliadb/writer/layouts/__init__.py +0 -51
  408. nucliadb/writer/layouts/v1.py +0 -59
  409. nucliadb/writer/resource/vectors.py +0 -120
  410. nucliadb/writer/tests/__init__.py +0 -19
  411. nucliadb/writer/tests/conftest.py +0 -31
  412. nucliadb/writer/tests/fixtures.py +0 -192
  413. nucliadb/writer/tests/test_fields.py +0 -486
  414. nucliadb/writer/tests/test_files.py +0 -743
  415. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  416. nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
  417. nucliadb/writer/tests/test_resources.py +0 -546
  418. nucliadb/writer/tests/test_service.py +0 -137
  419. nucliadb/writer/tests/test_tus.py +0 -203
  420. nucliadb/writer/tests/utils.py +0 -35
  421. nucliadb/writer/tus/pg.py +0 -125
  422. nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
  423. nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
  424. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  425. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  426. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  427. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  428. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  429. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
  430. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
  431. {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -17,8 +17,7 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from datetime import datetime
21
- from typing import Optional, Union
20
+ from typing import Optional, Union, cast
22
21
 
23
22
  from fastapi import Header, Request, Response
24
23
  from fastapi_versioning import version
@@ -27,28 +26,27 @@ from nucliadb.models.responses import HTTPClientError
27
26
  from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, api
28
27
  from nucliadb.search.api.v1.utils import fastapi_query
29
28
  from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
29
+ from nucliadb.search.search import cache
30
30
  from nucliadb.search.search.exceptions import InvalidQueryError
31
31
  from nucliadb.search.search.merge import merge_paragraphs_results
32
32
  from nucliadb.search.search.query import paragraph_query_to_pb
33
- from nucliadb_models.common import FieldTypeName
34
- from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
33
+ from nucliadb_models.resource import NucliaDBRoles
35
34
  from nucliadb_models.search import (
36
35
  NucliaDBClientType,
37
- ResourceProperties,
38
36
  ResourceSearchResults,
39
- SearchOptions,
40
37
  SearchParamDefaults,
41
38
  SortField,
42
39
  SortOrder,
43
40
  )
41
+ from nucliadb_models.utils import DateTime
44
42
  from nucliadb_utils.authentication import requires_one
45
43
 
46
44
 
47
45
  @api.get(
48
46
  f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/search",
49
47
  status_code=200,
50
- name="Search on Resource",
51
- description="Search on a Resource",
48
+ summary="Search on Resource",
49
+ description="Search on a single resource",
52
50
  tags=["Search"],
53
51
  response_model_exclude_unset=True,
54
52
  response_model=ResourceSearchResults,
@@ -64,81 +62,62 @@ async def resource_search(
64
62
  fields: list[str] = fastapi_query(SearchParamDefaults.fields),
65
63
  filters: list[str] = fastapi_query(SearchParamDefaults.filters),
66
64
  faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
67
- sort: Optional[SortField] = fastapi_query(
68
- SearchParamDefaults.sort_field, alias="sort_field"
69
- ),
65
+ sort: Optional[SortField] = fastapi_query(SearchParamDefaults.sort_field, alias="sort_field"),
70
66
  sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
71
- page_number: int = fastapi_query(SearchParamDefaults.page_number),
72
- page_size: int = fastapi_query(SearchParamDefaults.page_size),
73
- range_creation_start: Optional[datetime] = fastapi_query(
74
- SearchParamDefaults.range_creation_start
75
- ),
76
- range_creation_end: Optional[datetime] = fastapi_query(
77
- SearchParamDefaults.range_creation_end
78
- ),
79
- range_modification_start: Optional[datetime] = fastapi_query(
67
+ top_k: Optional[int] = fastapi_query(SearchParamDefaults.top_k),
68
+ range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
69
+ range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
70
+ range_modification_start: Optional[DateTime] = fastapi_query(
80
71
  SearchParamDefaults.range_modification_start
81
72
  ),
82
- range_modification_end: Optional[datetime] = fastapi_query(
73
+ range_modification_end: Optional[DateTime] = fastapi_query(
83
74
  SearchParamDefaults.range_modification_end
84
75
  ),
85
76
  highlight: bool = fastapi_query(SearchParamDefaults.highlight),
86
- show: list[ResourceProperties] = fastapi_query(
87
- SearchParamDefaults.show, default=list(ResourceProperties)
88
- ),
89
- field_type_filter: list[FieldTypeName] = fastapi_query(
90
- SearchParamDefaults.field_type_filter, alias="field_type"
91
- ),
92
- extracted: list[ExtractedDataTypeName] = fastapi_query(
93
- SearchParamDefaults.extracted
94
- ),
95
77
  x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
96
78
  debug: bool = fastapi_query(SearchParamDefaults.debug),
97
79
  shards: list[str] = fastapi_query(SearchParamDefaults.shards),
98
80
  ) -> Union[ResourceSearchResults, HTTPClientError]:
99
- # We need to query all nodes
100
- try:
101
- pb_query = await paragraph_query_to_pb(
102
- kbid,
103
- [SearchOptions.PARAGRAPH],
104
- rid,
105
- query,
106
- fields,
107
- filters,
108
- faceted,
109
- page_number,
110
- page_size,
111
- range_creation_start,
112
- range_creation_end,
113
- range_modification_start,
114
- range_modification_end,
115
- sort=sort.value if sort else None,
116
- sort_ord=sort_order.value,
117
- )
118
- except InvalidQueryError as exc:
119
- return HTTPClientError(status_code=412, detail=str(exc))
81
+ top_k = top_k or SearchParamDefaults.top_k # type: ignore
82
+ top_k = cast(int, top_k)
120
83
 
121
- results, incomplete_results, queried_nodes = await node_query(
122
- kbid, Method.PARAGRAPH, pb_query, shards
123
- )
84
+ with cache.request_caches():
85
+ try:
86
+ pb_query = await paragraph_query_to_pb(
87
+ kbid,
88
+ rid,
89
+ query,
90
+ fields,
91
+ filters,
92
+ faceted,
93
+ top_k,
94
+ range_creation_start,
95
+ range_creation_end,
96
+ range_modification_start,
97
+ range_modification_end,
98
+ sort=sort.value if sort else None,
99
+ sort_ord=sort_order.value,
100
+ )
101
+ except InvalidQueryError as exc:
102
+ return HTTPClientError(status_code=412, detail=str(exc))
124
103
 
125
- # We need to merge
126
- search_results = await merge_paragraphs_results(
127
- results,
128
- count=page_size,
129
- page=page_number,
130
- kbid=kbid,
131
- show=show,
132
- field_type_filter=field_type_filter,
133
- extracted=extracted,
134
- highlight_split=highlight,
135
- min_score=0.0,
136
- )
104
+ results, incomplete_results, queried_nodes = await node_query(
105
+ kbid, Method.SEARCH, pb_query, shards
106
+ )
107
+
108
+ # We need to merge
109
+ search_results = await merge_paragraphs_results(
110
+ results,
111
+ top_k=top_k,
112
+ kbid=kbid,
113
+ highlight_split=highlight,
114
+ min_score=0.0,
115
+ )
137
116
 
138
- response.status_code = 206 if incomplete_results else 200
139
- if debug:
140
- search_results.nodes = debug_nodes_info(queried_nodes)
117
+ response.status_code = 206 if incomplete_results else 200
118
+ if debug:
119
+ search_results.nodes = debug_nodes_info(queried_nodes)
141
120
 
142
- queried_shards = [shard_id for _, shard_id in queried_nodes]
143
- search_results.shards = queried_shards
144
- return search_results
121
+ queried_shards = [shard_id for _, shard_id in queried_nodes]
122
+ search_results.shards = queried_shards
123
+ return search_results
@@ -24,3 +24,4 @@ api = APIRouter()
24
24
  KB_PREFIX = "kb"
25
25
  KBS_PREFIX = "kbs"
26
26
  RESOURCE_PREFIX = "resource"
27
+ RESOURCE_SLUG_PREFIX = "slug"
@@ -18,25 +18,27 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import json
21
- from datetime import datetime
22
21
  from time import time
23
22
  from typing import Optional, Union
24
23
 
25
24
  from fastapi import Body, Header, Query, Request, Response
26
25
  from fastapi.openapi.models import Example
27
26
  from fastapi_versioning import version
28
- from pydantic.error_wrappers import ValidationError
27
+ from pydantic import ValidationError
29
28
 
30
29
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
30
+ from nucliadb.common.models_utils import to_proto
31
31
  from nucliadb.models.responses import HTTPClientError
32
32
  from nucliadb.search import predict
33
33
  from nucliadb.search.api.v1.router import KB_PREFIX, api
34
34
  from nucliadb.search.api.v1.utils import fastapi_query
35
35
  from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
36
+ from nucliadb.search.search import cache
36
37
  from nucliadb.search.search.exceptions import InvalidQueryError
37
38
  from nucliadb.search.search.merge import merge_results
38
39
  from nucliadb.search.search.query import QueryParser
39
40
  from nucliadb.search.search.utils import (
41
+ filter_hidden_resources,
40
42
  min_score_from_payload,
41
43
  min_score_from_query_params,
42
44
  should_disable_vector_search,
@@ -45,9 +47,7 @@ from nucliadb_models.common import FieldTypeName
45
47
  from nucliadb_models.metadata import ResourceProcessingStatus
46
48
  from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
47
49
  from nucliadb_models.search import (
48
- CatalogRequest,
49
50
  KnowledgeboxSearchResults,
50
- MinScore,
51
51
  NucliaDBClientType,
52
52
  ResourceProperties,
53
53
  SearchOptions,
@@ -58,6 +58,7 @@ from nucliadb_models.search import (
58
58
  SortOrder,
59
59
  )
60
60
  from nucliadb_models.security import RequestSecurity
61
+ from nucliadb_models.utils import DateTime
61
62
  from nucliadb_utils.authentication import requires
62
63
  from nucliadb_utils.exceptions import LimitsExceededError
63
64
  from nucliadb_utils.utilities import get_audit
@@ -69,7 +70,7 @@ SEARCH_EXAMPLES = {
69
70
  value={
70
71
  "query": "Noam Chomsky",
71
72
  "filters": ["/icon/application/pdf"],
72
- "features": [SearchOptions.DOCUMENT],
73
+ "features": [SearchOptions.FULLTEXT],
73
74
  },
74
75
  ),
75
76
  "get_language_counts": Example(
@@ -78,7 +79,7 @@ SEARCH_EXAMPLES = {
78
79
  value={
79
80
  "page_size": 0,
80
81
  "faceted": ["/s/p"],
81
- "features": [SearchOptions.DOCUMENT],
82
+ "features": [SearchOptions.FULLTEXT],
82
83
  },
83
84
  ),
84
85
  }
@@ -87,8 +88,8 @@ SEARCH_EXAMPLES = {
87
88
  @api.get(
88
89
  f"/{KB_PREFIX}/{{kbid}}/search",
89
90
  status_code=200,
90
- name="Search Knowledge Box",
91
- description="Search on a Knowledge Box",
91
+ summary="Search Knowledge Box",
92
+ description="Search on a Knowledge Box and retrieve separate results for documents, paragraphs, and sentences. Usually, it is better to use `find`", # noqa: E501
92
93
  response_model=KnowledgeboxSearchResults,
93
94
  response_model_exclude_unset=True,
94
95
  tags=["Search"],
@@ -106,40 +107,36 @@ async def search_knowledgebox(
106
107
  sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
107
108
  sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
108
109
  sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
109
- page_number: int = fastapi_query(SearchParamDefaults.page_number),
110
- page_size: int = fastapi_query(SearchParamDefaults.page_size),
110
+ top_k: int = fastapi_query(SearchParamDefaults.top_k),
111
111
  min_score: Optional[float] = Query(
112
112
  default=None,
113
- description="Minimum similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/docs/using/search/#minimum-score", # noqa: E501
113
+ description="Minimum similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
114
114
  deprecated=True,
115
115
  ),
116
116
  min_score_semantic: Optional[float] = Query(
117
117
  default=None,
118
- description="Minimum semantic similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/docs/using/search/#minimum-score", # noqa: E501
118
+ description="Minimum semantic similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
119
119
  ),
120
120
  min_score_bm25: float = Query(
121
121
  default=0,
122
122
  description="Minimum bm25 score to filter paragraph and document index results",
123
123
  ge=0,
124
124
  ),
125
- range_creation_start: Optional[datetime] = fastapi_query(
126
- SearchParamDefaults.range_creation_start
127
- ),
128
- range_creation_end: Optional[datetime] = fastapi_query(
129
- SearchParamDefaults.range_creation_end
130
- ),
131
- range_modification_start: Optional[datetime] = fastapi_query(
125
+ vectorset: Optional[str] = fastapi_query(SearchParamDefaults.vectorset),
126
+ range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
127
+ range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
128
+ range_modification_start: Optional[DateTime] = fastapi_query(
132
129
  SearchParamDefaults.range_modification_start
133
130
  ),
134
- range_modification_end: Optional[datetime] = fastapi_query(
131
+ range_modification_end: Optional[DateTime] = fastapi_query(
135
132
  SearchParamDefaults.range_modification_end
136
133
  ),
137
134
  features: list[SearchOptions] = fastapi_query(
138
135
  SearchParamDefaults.search_features,
139
136
  default=[
140
- SearchOptions.PARAGRAPH,
141
- SearchOptions.DOCUMENT,
142
- SearchOptions.VECTOR,
137
+ SearchOptions.KEYWORD,
138
+ SearchOptions.FULLTEXT,
139
+ SearchOptions.SEMANTIC,
143
140
  ],
144
141
  ),
145
142
  debug: bool = fastapi_query(SearchParamDefaults.debug),
@@ -148,14 +145,13 @@ async def search_knowledgebox(
148
145
  field_type_filter: list[FieldTypeName] = fastapi_query(
149
146
  SearchParamDefaults.field_type_filter, alias="field_type"
150
147
  ),
151
- extracted: list[ExtractedDataTypeName] = fastapi_query(
152
- SearchParamDefaults.extracted
153
- ),
148
+ extracted: list[ExtractedDataTypeName] = fastapi_query(SearchParamDefaults.extracted),
154
149
  shards: list[str] = fastapi_query(SearchParamDefaults.shards),
155
150
  with_duplicates: bool = fastapi_query(SearchParamDefaults.with_duplicates),
156
151
  with_synonyms: bool = fastapi_query(SearchParamDefaults.with_synonyms),
157
152
  autofilter: bool = fastapi_query(SearchParamDefaults.autofilter),
158
153
  security_groups: list[str] = fastapi_query(SearchParamDefaults.security_groups),
154
+ show_hidden: bool = fastapi_query(SearchParamDefaults.show_hidden),
159
155
  x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
160
156
  x_nucliadb_user: str = Header(""),
161
157
  x_forwarded_for: str = Header(""),
@@ -174,11 +170,9 @@ async def search_knowledgebox(
174
170
  if sort_field is not None
175
171
  else None
176
172
  ),
177
- page_number=page_number,
178
- page_size=page_size,
179
- min_score=min_score_from_query_params(
180
- min_score_bm25, min_score_semantic, min_score
181
- ),
173
+ top_k=top_k,
174
+ min_score=min_score_from_query_params(min_score_bm25, min_score_semantic, min_score),
175
+ vectorset=vectorset,
182
176
  range_creation_end=range_creation_end,
183
177
  range_creation_start=range_creation_start,
184
178
  range_modification_end=range_modification_end,
@@ -194,160 +188,19 @@ async def search_knowledgebox(
194
188
  with_synonyms=with_synonyms,
195
189
  autofilter=autofilter,
196
190
  security=security,
191
+ show_hidden=show_hidden,
197
192
  )
198
193
  except ValidationError as exc:
199
194
  detail = json.loads(exc.json())
200
195
  return HTTPClientError(status_code=422, detail=detail)
201
- return await _search_endpoint(
202
- response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for
203
- )
204
-
205
-
206
- @api.get(
207
- f"/{KB_PREFIX}/{{kbid}}/catalog",
208
- status_code=200,
209
- name="List resources of a Knowledge Box",
210
- description="List resources of a Knowledge Box",
211
- response_model=KnowledgeboxSearchResults,
212
- response_model_exclude_unset=True,
213
- tags=["Search"],
214
- )
215
- @requires(NucliaDBRoles.READER)
216
- @version(1)
217
- async def catalog_get(
218
- request: Request,
219
- response: Response,
220
- kbid: str,
221
- query: str = fastapi_query(SearchParamDefaults.query),
222
- filters: list[str] = fastapi_query(SearchParamDefaults.filters),
223
- faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
224
- sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
225
- sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
226
- sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
227
- page_number: int = fastapi_query(SearchParamDefaults.page_number),
228
- page_size: int = fastapi_query(SearchParamDefaults.page_size),
229
- shards: list[str] = fastapi_query(SearchParamDefaults.shards),
230
- with_status: Optional[ResourceProcessingStatus] = fastapi_query(
231
- SearchParamDefaults.with_status
232
- ),
233
- debug: bool = fastapi_query(SearchParamDefaults.debug),
234
- ) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
235
- item = CatalogRequest(
236
- query=query,
237
- filters=filters,
238
- faceted=faceted,
239
- page_number=page_number,
240
- page_size=page_size,
241
- shards=shards,
242
- debug=debug,
243
- with_status=with_status,
244
- )
245
- if sort_field:
246
- item.sort = SortOptions(field=sort_field, limit=sort_limit, order=sort_order)
247
- return await catalog(kbid, item)
248
-
249
-
250
- @api.post(
251
- f"/{KB_PREFIX}/{{kbid}}/catalog",
252
- status_code=200,
253
- name="List resources of a Knowledge Box",
254
- description="List resources of a Knowledge Box",
255
- response_model=KnowledgeboxSearchResults,
256
- response_model_exclude_unset=True,
257
- tags=["Search"],
258
- )
259
- @requires(NucliaDBRoles.READER)
260
- @version(1)
261
- async def catalog_post(
262
- request: Request,
263
- kbid: str,
264
- item: CatalogRequest,
265
- ) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
266
- return await catalog(kbid, item)
267
-
268
-
269
- async def catalog(
270
- kbid: str,
271
- item: CatalogRequest,
272
- ):
273
- """
274
- Catalog endpoint is a simplified version of the search endpoint, it only
275
- returns bm25 results on titles and it does not support vector search.
276
- It is useful for listing resources in a knowledge box.
277
- """
278
- try:
279
- sort = item.sort
280
- if item.sort is None:
281
- # By default we sort by creation date (most recent first)
282
- sort = SortOptions(
283
- field=SortField.CREATED,
284
- order=SortOrder.DESC,
285
- limit=None,
286
- )
287
-
288
- query_parser = QueryParser(
289
- kbid=kbid,
290
- features=[SearchOptions.DOCUMENT],
291
- query=item.query,
292
- filters=item.filters,
293
- faceted=item.faceted,
294
- sort=sort,
295
- page_number=item.page_number,
296
- page_size=item.page_size,
297
- min_score=MinScore(bm25=0, semantic=0),
298
- fields=["a/title"],
299
- with_status=item.with_status,
300
- )
301
- pb_query, _, _ = await query_parser.parse()
302
-
303
- (results, _, queried_nodes) = await node_query(
304
- kbid,
305
- Method.SEARCH,
306
- pb_query,
307
- target_shard_replicas=item.shards,
308
- # Catalog should not go to read replicas because we want it to be
309
- # consistent and most up to date results
310
- use_read_replica_nodes=False,
311
- )
312
-
313
- # We need to merge
314
- search_results = await merge_results(
315
- results,
316
- count=item.page_size,
317
- page=item.page_number,
318
- kbid=kbid,
319
- show=[ResourceProperties.BASIC],
320
- field_type_filter=[],
321
- extracted=[],
322
- sort=sort,
323
- requested_relations=pb_query.relation_subgraph,
324
- min_score=query_parser.min_score,
325
- highlight=False,
326
- )
327
- # We don't need sentences, paragraphs or relations on the catalog
328
- # response, so we set to None so that fastapi doesn't include them
329
- # in the response payload
330
- search_results.sentences = None
331
- search_results.paragraphs = None
332
- search_results.relations = None
333
- if item.debug:
334
- search_results.nodes = debug_nodes_info(queried_nodes)
335
- queried_shards = [shard_id for _, shard_id in queried_nodes]
336
- search_results.shards = queried_shards
337
- return search_results
338
- except InvalidQueryError as exc:
339
- return HTTPClientError(status_code=412, detail=str(exc))
340
- except KnowledgeBoxNotFound:
341
- return HTTPClientError(status_code=404, detail="Knowledge Box not found")
342
- except LimitsExceededError as exc:
343
- return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
196
+ return await _search_endpoint(response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for)
344
197
 
345
198
 
346
199
  @api.post(
347
200
  f"/{KB_PREFIX}/{{kbid}}/search",
348
201
  status_code=200,
349
- name="Search Knowledge Box",
350
- description="Search on a Knowledge Box",
202
+ summary="Search Knowledge Box",
203
+ description="Search on a Knowledge Box and retrieve separate results for documents, paragraphs, and sentences. Usually, it is better to use `find`", # noqa: E501
351
204
  response_model=KnowledgeboxSearchResults,
352
205
  response_model_exclude_unset=True,
353
206
  tags=["Search"],
@@ -363,9 +216,7 @@ async def search_post_knowledgebox(
363
216
  x_nucliadb_user: str = Header(""),
364
217
  x_forwarded_for: str = Header(""),
365
218
  ) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
366
- return await _search_endpoint(
367
- response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for
368
- )
219
+ return await _search_endpoint(response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for)
369
220
 
370
221
 
371
222
  async def _search_endpoint(
@@ -377,13 +228,13 @@ async def _search_endpoint(
377
228
  x_forwarded_for: str,
378
229
  **kwargs,
379
230
  ) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
380
- # All endpoint logic should be here
381
231
  try:
382
- results, incomplete = await search(
383
- kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for, **kwargs
384
- )
385
- response.status_code = 206 if incomplete else 200
386
- return results
232
+ with cache.request_caches():
233
+ results, incomplete = await search(
234
+ kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for, **kwargs
235
+ )
236
+ response.status_code = 206 if incomplete else 200
237
+ return results
387
238
  except KnowledgeBoxNotFound:
388
239
  return HTTPClientError(status_code=404, detail="Knowledge Box not found")
389
240
  except LimitsExceededError as exc:
@@ -392,8 +243,8 @@ async def _search_endpoint(
392
243
  return HTTPClientError(status_code=412, detail=str(exc))
393
244
  except predict.ProxiedPredictAPIError as err:
394
245
  return HTTPClientError(
395
- status_code=503,
396
- detail=f"Inference service unavailable. {err.status}: {err.detail}",
246
+ status_code=err.status,
247
+ detail=err.detail,
397
248
  )
398
249
 
399
250
 
@@ -411,20 +262,20 @@ async def search(
411
262
 
412
263
  item.min_score = min_score_from_payload(item.min_score)
413
264
 
414
- if SearchOptions.VECTOR in item.features:
265
+ if SearchOptions.SEMANTIC in item.features:
415
266
  if should_disable_vector_search(item):
416
- item.features.remove(SearchOptions.VECTOR)
267
+ item.features.remove(SearchOptions.SEMANTIC)
417
268
 
418
269
  # We need to query all nodes
419
270
  query_parser = QueryParser(
420
271
  kbid=kbid,
421
272
  features=item.features,
422
273
  query=item.query,
423
- filters=item.filters,
274
+ label_filters=item.filters,
275
+ keyword_filters=[],
424
276
  faceted=item.faceted,
425
277
  sort=item.sort,
426
- page_number=item.page_number,
427
- page_size=item.page_size,
278
+ top_k=item.top_k,
428
279
  min_score=item.min_score,
429
280
  range_creation_start=item.range_creation_start,
430
281
  range_creation_end=item.range_creation_end,
@@ -439,6 +290,8 @@ async def search(
439
290
  autofilter=item.autofilter,
440
291
  security=item.security,
441
292
  rephrase=item.rephrase,
293
+ hidden=await filter_hidden_resources(kbid, item.show_hidden),
294
+ rephrase_prompt=item.rephrase_prompt,
442
295
  )
443
296
  pb_query, incomplete_results, autofilters = await query_parser.parse()
444
297
 
@@ -451,28 +304,28 @@ async def search(
451
304
  # We need to merge
452
305
  search_results = await merge_results(
453
306
  results,
454
- count=item.page_size,
455
- page=item.page_number,
307
+ top_k=item.top_k,
456
308
  kbid=kbid,
457
309
  show=item.show,
458
310
  field_type_filter=item.field_type_filter,
459
311
  extracted=item.extracted,
460
- sort=query_parser.sort,
312
+ sort=query_parser.sort, # type: ignore
461
313
  requested_relations=pb_query.relation_subgraph,
462
314
  min_score=query_parser.min_score,
463
315
  highlight=item.highlight,
464
316
  )
465
317
 
466
318
  if audit is not None and do_audit:
467
- await audit.search(
319
+ audit.search(
468
320
  kbid,
469
321
  x_nucliadb_user,
470
- x_ndb_client.to_proto(),
322
+ to_proto.client_type(x_ndb_client),
471
323
  x_forwarded_for,
472
324
  pb_query,
473
325
  time() - start_time,
474
326
  len(search_results.resources),
475
327
  )
328
+
476
329
  if item.debug:
477
330
  search_results.nodes = debug_nodes_info(queried_nodes)
478
331