nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -403
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +70 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +143 -117
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +37 -128
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -17,8 +17,7 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
- from datetime import datetime
21
- from typing import Optional, Union
20
+ from typing import Optional, Union, cast
22
21
 
23
22
  from fastapi import Header, Request, Response
24
23
  from fastapi_versioning import version
@@ -27,20 +26,19 @@ from nucliadb.models.responses import HTTPClientError
27
26
  from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, api
28
27
  from nucliadb.search.api.v1.utils import fastapi_query
29
28
  from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
29
+ from nucliadb.search.search import cache
30
30
  from nucliadb.search.search.exceptions import InvalidQueryError
31
31
  from nucliadb.search.search.merge import merge_paragraphs_results
32
32
  from nucliadb.search.search.query import paragraph_query_to_pb
33
- from nucliadb_models.common import FieldTypeName
34
- from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
33
+ from nucliadb_models.resource import NucliaDBRoles
35
34
  from nucliadb_models.search import (
36
35
  NucliaDBClientType,
37
- ResourceProperties,
38
36
  ResourceSearchResults,
39
- SearchOptions,
40
37
  SearchParamDefaults,
41
38
  SortField,
42
39
  SortOrder,
43
40
  )
41
+ from nucliadb_models.utils import DateTime
44
42
  from nucliadb_utils.authentication import requires_one
45
43
 
46
44
 
@@ -64,81 +62,62 @@ async def resource_search(
64
62
  fields: list[str] = fastapi_query(SearchParamDefaults.fields),
65
63
  filters: list[str] = fastapi_query(SearchParamDefaults.filters),
66
64
  faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
67
- sort: Optional[SortField] = fastapi_query(
68
- SearchParamDefaults.sort_field, alias="sort_field"
69
- ),
65
+ sort: Optional[SortField] = fastapi_query(SearchParamDefaults.sort_field, alias="sort_field"),
70
66
  sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
71
- page_number: int = fastapi_query(SearchParamDefaults.page_number),
72
- page_size: int = fastapi_query(SearchParamDefaults.page_size),
73
- range_creation_start: Optional[datetime] = fastapi_query(
74
- SearchParamDefaults.range_creation_start
75
- ),
76
- range_creation_end: Optional[datetime] = fastapi_query(
77
- SearchParamDefaults.range_creation_end
78
- ),
79
- range_modification_start: Optional[datetime] = fastapi_query(
67
+ top_k: Optional[int] = fastapi_query(SearchParamDefaults.top_k),
68
+ range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
69
+ range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
70
+ range_modification_start: Optional[DateTime] = fastapi_query(
80
71
  SearchParamDefaults.range_modification_start
81
72
  ),
82
- range_modification_end: Optional[datetime] = fastapi_query(
73
+ range_modification_end: Optional[DateTime] = fastapi_query(
83
74
  SearchParamDefaults.range_modification_end
84
75
  ),
85
76
  highlight: bool = fastapi_query(SearchParamDefaults.highlight),
86
- show: list[ResourceProperties] = fastapi_query(
87
- SearchParamDefaults.show, default=list(ResourceProperties)
88
- ),
89
- field_type_filter: list[FieldTypeName] = fastapi_query(
90
- SearchParamDefaults.field_type_filter, alias="field_type"
91
- ),
92
- extracted: list[ExtractedDataTypeName] = fastapi_query(
93
- SearchParamDefaults.extracted
94
- ),
95
77
  x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
96
78
  debug: bool = fastapi_query(SearchParamDefaults.debug),
97
79
  shards: list[str] = fastapi_query(SearchParamDefaults.shards),
98
80
  ) -> Union[ResourceSearchResults, HTTPClientError]:
99
- # We need to query all nodes
100
- try:
101
- pb_query = await paragraph_query_to_pb(
102
- kbid,
103
- [SearchOptions.PARAGRAPH],
104
- rid,
105
- query,
106
- fields,
107
- filters,
108
- faceted,
109
- page_number,
110
- page_size,
111
- range_creation_start,
112
- range_creation_end,
113
- range_modification_start,
114
- range_modification_end,
115
- sort=sort.value if sort else None,
116
- sort_ord=sort_order.value,
117
- )
118
- except InvalidQueryError as exc:
119
- return HTTPClientError(status_code=412, detail=str(exc))
81
+ top_k = top_k or SearchParamDefaults.top_k # type: ignore
82
+ top_k = cast(int, top_k)
120
83
 
121
- results, incomplete_results, queried_nodes = await node_query(
122
- kbid, Method.PARAGRAPH, pb_query, shards
123
- )
84
+ with cache.request_caches():
85
+ try:
86
+ pb_query = await paragraph_query_to_pb(
87
+ kbid,
88
+ rid,
89
+ query,
90
+ fields,
91
+ filters,
92
+ faceted,
93
+ top_k,
94
+ range_creation_start,
95
+ range_creation_end,
96
+ range_modification_start,
97
+ range_modification_end,
98
+ sort=sort.value if sort else None,
99
+ sort_ord=sort_order.value,
100
+ )
101
+ except InvalidQueryError as exc:
102
+ return HTTPClientError(status_code=412, detail=str(exc))
124
103
 
125
- # We need to merge
126
- search_results = await merge_paragraphs_results(
127
- results,
128
- count=page_size,
129
- page=page_number,
130
- kbid=kbid,
131
- show=show,
132
- field_type_filter=field_type_filter,
133
- extracted=extracted,
134
- highlight_split=highlight,
135
- min_score=0.0,
136
- )
104
+ results, incomplete_results, queried_nodes = await node_query(
105
+ kbid, Method.SEARCH, pb_query, shards
106
+ )
107
+
108
+ # We need to merge
109
+ search_results = await merge_paragraphs_results(
110
+ results,
111
+ top_k=top_k,
112
+ kbid=kbid,
113
+ highlight_split=highlight,
114
+ min_score=0.0,
115
+ )
137
116
 
138
- response.status_code = 206 if incomplete_results else 200
139
- if debug:
140
- search_results.nodes = debug_nodes_info(queried_nodes)
117
+ response.status_code = 206 if incomplete_results else 200
118
+ if debug:
119
+ search_results.nodes = debug_nodes_info(queried_nodes)
141
120
 
142
- queried_shards = [shard_id for _, shard_id in queried_nodes]
143
- search_results.shards = queried_shards
144
- return search_results
121
+ queried_shards = [shard_id for _, shard_id in queried_nodes]
122
+ search_results.shards = queried_shards
123
+ return search_results
@@ -18,7 +18,6 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import json
21
- from datetime import datetime
22
21
  from time import time
23
22
  from typing import Optional, Union
24
23
 
@@ -28,15 +27,18 @@ from fastapi_versioning import version
28
27
  from pydantic import ValidationError
29
28
 
30
29
  from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
30
+ from nucliadb.common.models_utils import to_proto
31
31
  from nucliadb.models.responses import HTTPClientError
32
32
  from nucliadb.search import predict
33
33
  from nucliadb.search.api.v1.router import KB_PREFIX, api
34
34
  from nucliadb.search.api.v1.utils import fastapi_query
35
35
  from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
36
+ from nucliadb.search.search import cache
36
37
  from nucliadb.search.search.exceptions import InvalidQueryError
37
38
  from nucliadb.search.search.merge import merge_results
38
39
  from nucliadb.search.search.query import QueryParser
39
40
  from nucliadb.search.search.utils import (
41
+ filter_hidden_resources,
40
42
  min_score_from_payload,
41
43
  min_score_from_query_params,
42
44
  should_disable_vector_search,
@@ -45,9 +47,7 @@ from nucliadb_models.common import FieldTypeName
45
47
  from nucliadb_models.metadata import ResourceProcessingStatus
46
48
  from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
47
49
  from nucliadb_models.search import (
48
- CatalogRequest,
49
50
  KnowledgeboxSearchResults,
50
- MinScore,
51
51
  NucliaDBClientType,
52
52
  ResourceProperties,
53
53
  SearchOptions,
@@ -58,6 +58,7 @@ from nucliadb_models.search import (
58
58
  SortOrder,
59
59
  )
60
60
  from nucliadb_models.security import RequestSecurity
61
+ from nucliadb_models.utils import DateTime
61
62
  from nucliadb_utils.authentication import requires
62
63
  from nucliadb_utils.exceptions import LimitsExceededError
63
64
  from nucliadb_utils.utilities import get_audit
@@ -69,7 +70,7 @@ SEARCH_EXAMPLES = {
69
70
  value={
70
71
  "query": "Noam Chomsky",
71
72
  "filters": ["/icon/application/pdf"],
72
- "features": [SearchOptions.DOCUMENT],
73
+ "features": [SearchOptions.FULLTEXT],
73
74
  },
74
75
  ),
75
76
  "get_language_counts": Example(
@@ -78,7 +79,7 @@ SEARCH_EXAMPLES = {
78
79
  value={
79
80
  "page_size": 0,
80
81
  "faceted": ["/s/p"],
81
- "features": [SearchOptions.DOCUMENT],
82
+ "features": [SearchOptions.FULLTEXT],
82
83
  },
83
84
  ),
84
85
  }
@@ -106,40 +107,36 @@ async def search_knowledgebox(
106
107
  sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
107
108
  sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
108
109
  sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
109
- page_number: int = fastapi_query(SearchParamDefaults.page_number),
110
- page_size: int = fastapi_query(SearchParamDefaults.page_size),
110
+ top_k: int = fastapi_query(SearchParamDefaults.top_k),
111
111
  min_score: Optional[float] = Query(
112
112
  default=None,
113
- description="Minimum similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/docs/using/search/#minimum-score", # noqa: E501
113
+ description="Minimum similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
114
114
  deprecated=True,
115
115
  ),
116
116
  min_score_semantic: Optional[float] = Query(
117
117
  default=None,
118
- description="Minimum semantic similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/docs/using/search/#minimum-score", # noqa: E501
118
+ description="Minimum semantic similarity score to filter vector index results. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
119
119
  ),
120
120
  min_score_bm25: float = Query(
121
121
  default=0,
122
122
  description="Minimum bm25 score to filter paragraph and document index results",
123
123
  ge=0,
124
124
  ),
125
- range_creation_start: Optional[datetime] = fastapi_query(
126
- SearchParamDefaults.range_creation_start
127
- ),
128
- range_creation_end: Optional[datetime] = fastapi_query(
129
- SearchParamDefaults.range_creation_end
130
- ),
131
- range_modification_start: Optional[datetime] = fastapi_query(
125
+ vectorset: Optional[str] = fastapi_query(SearchParamDefaults.vectorset),
126
+ range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
127
+ range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
128
+ range_modification_start: Optional[DateTime] = fastapi_query(
132
129
  SearchParamDefaults.range_modification_start
133
130
  ),
134
- range_modification_end: Optional[datetime] = fastapi_query(
131
+ range_modification_end: Optional[DateTime] = fastapi_query(
135
132
  SearchParamDefaults.range_modification_end
136
133
  ),
137
134
  features: list[SearchOptions] = fastapi_query(
138
135
  SearchParamDefaults.search_features,
139
136
  default=[
140
- SearchOptions.PARAGRAPH,
141
- SearchOptions.DOCUMENT,
142
- SearchOptions.VECTOR,
137
+ SearchOptions.KEYWORD,
138
+ SearchOptions.FULLTEXT,
139
+ SearchOptions.SEMANTIC,
143
140
  ],
144
141
  ),
145
142
  debug: bool = fastapi_query(SearchParamDefaults.debug),
@@ -148,14 +145,13 @@ async def search_knowledgebox(
148
145
  field_type_filter: list[FieldTypeName] = fastapi_query(
149
146
  SearchParamDefaults.field_type_filter, alias="field_type"
150
147
  ),
151
- extracted: list[ExtractedDataTypeName] = fastapi_query(
152
- SearchParamDefaults.extracted
153
- ),
148
+ extracted: list[ExtractedDataTypeName] = fastapi_query(SearchParamDefaults.extracted),
154
149
  shards: list[str] = fastapi_query(SearchParamDefaults.shards),
155
150
  with_duplicates: bool = fastapi_query(SearchParamDefaults.with_duplicates),
156
151
  with_synonyms: bool = fastapi_query(SearchParamDefaults.with_synonyms),
157
152
  autofilter: bool = fastapi_query(SearchParamDefaults.autofilter),
158
153
  security_groups: list[str] = fastapi_query(SearchParamDefaults.security_groups),
154
+ show_hidden: bool = fastapi_query(SearchParamDefaults.show_hidden),
159
155
  x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
160
156
  x_nucliadb_user: str = Header(""),
161
157
  x_forwarded_for: str = Header(""),
@@ -174,11 +170,9 @@ async def search_knowledgebox(
174
170
  if sort_field is not None
175
171
  else None
176
172
  ),
177
- page_number=page_number,
178
- page_size=page_size,
179
- min_score=min_score_from_query_params(
180
- min_score_bm25, min_score_semantic, min_score
181
- ),
173
+ top_k=top_k,
174
+ min_score=min_score_from_query_params(min_score_bm25, min_score_semantic, min_score),
175
+ vectorset=vectorset,
182
176
  range_creation_end=range_creation_end,
183
177
  range_creation_start=range_creation_start,
184
178
  range_modification_end=range_modification_end,
@@ -194,173 +188,12 @@ async def search_knowledgebox(
194
188
  with_synonyms=with_synonyms,
195
189
  autofilter=autofilter,
196
190
  security=security,
191
+ show_hidden=show_hidden,
197
192
  )
198
193
  except ValidationError as exc:
199
194
  detail = json.loads(exc.json())
200
195
  return HTTPClientError(status_code=422, detail=detail)
201
- return await _search_endpoint(
202
- response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for
203
- )
204
-
205
-
206
- @api.get(
207
- f"/{KB_PREFIX}/{{kbid}}/catalog",
208
- status_code=200,
209
- summary="List resources of a Knowledge Box",
210
- description="List resources of a Knowledge Box",
211
- response_model=KnowledgeboxSearchResults,
212
- response_model_exclude_unset=True,
213
- tags=["Search"],
214
- )
215
- @requires(NucliaDBRoles.READER)
216
- @version(1)
217
- async def catalog_get(
218
- request: Request,
219
- response: Response,
220
- kbid: str,
221
- query: str = fastapi_query(SearchParamDefaults.query),
222
- filters: list[str] = fastapi_query(SearchParamDefaults.filters),
223
- faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
224
- sort_field: SortField = fastapi_query(SearchParamDefaults.sort_field),
225
- sort_limit: Optional[int] = fastapi_query(SearchParamDefaults.sort_limit),
226
- sort_order: SortOrder = fastapi_query(SearchParamDefaults.sort_order),
227
- page_number: int = fastapi_query(SearchParamDefaults.page_number),
228
- page_size: int = fastapi_query(SearchParamDefaults.page_size),
229
- shards: list[str] = fastapi_query(SearchParamDefaults.shards),
230
- with_status: Optional[ResourceProcessingStatus] = fastapi_query(
231
- SearchParamDefaults.with_status
232
- ),
233
- debug: bool = fastapi_query(SearchParamDefaults.debug),
234
- range_creation_start: Optional[datetime] = fastapi_query(
235
- SearchParamDefaults.range_creation_start
236
- ),
237
- range_creation_end: Optional[datetime] = fastapi_query(
238
- SearchParamDefaults.range_creation_end
239
- ),
240
- range_modification_start: Optional[datetime] = fastapi_query(
241
- SearchParamDefaults.range_modification_start
242
- ),
243
- range_modification_end: Optional[datetime] = fastapi_query(
244
- SearchParamDefaults.range_modification_end
245
- ),
246
- ) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
247
- item = CatalogRequest(
248
- query=query,
249
- filters=filters,
250
- faceted=faceted,
251
- page_number=page_number,
252
- page_size=page_size,
253
- shards=shards,
254
- debug=debug,
255
- with_status=with_status,
256
- range_creation_start=range_creation_start,
257
- range_creation_end=range_creation_end,
258
- range_modification_start=range_modification_start,
259
- range_modification_end=range_modification_end,
260
- )
261
- if sort_field:
262
- item.sort = SortOptions(field=sort_field, limit=sort_limit, order=sort_order)
263
- return await catalog(kbid, item)
264
-
265
-
266
- @api.post(
267
- f"/{KB_PREFIX}/{{kbid}}/catalog",
268
- status_code=200,
269
- summary="List resources of a Knowledge Box",
270
- description="List resources of a Knowledge Box",
271
- response_model=KnowledgeboxSearchResults,
272
- response_model_exclude_unset=True,
273
- tags=["Search"],
274
- )
275
- @requires(NucliaDBRoles.READER)
276
- @version(1)
277
- async def catalog_post(
278
- request: Request,
279
- kbid: str,
280
- item: CatalogRequest,
281
- ) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
282
- return await catalog(kbid, item)
283
-
284
-
285
- async def catalog(
286
- kbid: str,
287
- item: CatalogRequest,
288
- ):
289
- """
290
- Catalog endpoint is a simplified version of the search endpoint, it only
291
- returns bm25 results on titles and it does not support vector search.
292
- It is useful for listing resources in a knowledge box.
293
- """
294
- try:
295
- sort = item.sort
296
- if item.sort is None:
297
- # By default we sort by creation date (most recent first)
298
- sort = SortOptions(
299
- field=SortField.CREATED,
300
- order=SortOrder.DESC,
301
- limit=None,
302
- )
303
-
304
- query_parser = QueryParser(
305
- kbid=kbid,
306
- features=[SearchOptions.DOCUMENT],
307
- query=item.query,
308
- filters=item.filters,
309
- faceted=item.faceted,
310
- sort=sort,
311
- page_number=item.page_number,
312
- page_size=item.page_size,
313
- min_score=MinScore(bm25=0, semantic=0),
314
- fields=["a/title"],
315
- with_status=item.with_status,
316
- range_creation_start=item.range_creation_start,
317
- range_creation_end=item.range_creation_end,
318
- range_modification_start=item.range_modification_start,
319
- range_modification_end=item.range_modification_end,
320
- )
321
- pb_query, _, _ = await query_parser.parse()
322
-
323
- (results, _, queried_nodes) = await node_query(
324
- kbid,
325
- Method.SEARCH,
326
- pb_query,
327
- target_shard_replicas=item.shards,
328
- # Catalog should not go to read replicas because we want it to be
329
- # consistent and most up to date results
330
- use_read_replica_nodes=False,
331
- )
332
-
333
- # We need to merge
334
- search_results = await merge_results(
335
- results,
336
- count=item.page_size,
337
- page=item.page_number,
338
- kbid=kbid,
339
- show=[ResourceProperties.BASIC],
340
- field_type_filter=[],
341
- extracted=[],
342
- sort=sort,
343
- requested_relations=pb_query.relation_subgraph,
344
- min_score=query_parser.min_score,
345
- highlight=False,
346
- )
347
- # We don't need sentences, paragraphs or relations on the catalog
348
- # response, so we set to None so that fastapi doesn't include them
349
- # in the response payload
350
- search_results.sentences = None
351
- search_results.paragraphs = None
352
- search_results.relations = None
353
- if item.debug:
354
- search_results.nodes = debug_nodes_info(queried_nodes)
355
- queried_shards = [shard_id for _, shard_id in queried_nodes]
356
- search_results.shards = queried_shards
357
- return search_results
358
- except InvalidQueryError as exc:
359
- return HTTPClientError(status_code=412, detail=str(exc))
360
- except KnowledgeBoxNotFound:
361
- return HTTPClientError(status_code=404, detail="Knowledge Box not found")
362
- except LimitsExceededError as exc:
363
- return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
196
+ return await _search_endpoint(response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for)
364
197
 
365
198
 
366
199
  @api.post(
@@ -383,9 +216,7 @@ async def search_post_knowledgebox(
383
216
  x_nucliadb_user: str = Header(""),
384
217
  x_forwarded_for: str = Header(""),
385
218
  ) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
386
- return await _search_endpoint(
387
- response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for
388
- )
219
+ return await _search_endpoint(response, kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for)
389
220
 
390
221
 
391
222
  async def _search_endpoint(
@@ -397,13 +228,13 @@ async def _search_endpoint(
397
228
  x_forwarded_for: str,
398
229
  **kwargs,
399
230
  ) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
400
- # All endpoint logic should be here
401
231
  try:
402
- results, incomplete = await search(
403
- kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for, **kwargs
404
- )
405
- response.status_code = 206 if incomplete else 200
406
- return results
232
+ with cache.request_caches():
233
+ results, incomplete = await search(
234
+ kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for, **kwargs
235
+ )
236
+ response.status_code = 206 if incomplete else 200
237
+ return results
407
238
  except KnowledgeBoxNotFound:
408
239
  return HTTPClientError(status_code=404, detail="Knowledge Box not found")
409
240
  except LimitsExceededError as exc:
@@ -431,20 +262,20 @@ async def search(
431
262
 
432
263
  item.min_score = min_score_from_payload(item.min_score)
433
264
 
434
- if SearchOptions.VECTOR in item.features:
265
+ if SearchOptions.SEMANTIC in item.features:
435
266
  if should_disable_vector_search(item):
436
- item.features.remove(SearchOptions.VECTOR)
267
+ item.features.remove(SearchOptions.SEMANTIC)
437
268
 
438
269
  # We need to query all nodes
439
270
  query_parser = QueryParser(
440
271
  kbid=kbid,
441
272
  features=item.features,
442
273
  query=item.query,
443
- filters=item.filters,
274
+ label_filters=item.filters,
275
+ keyword_filters=[],
444
276
  faceted=item.faceted,
445
277
  sort=item.sort,
446
- page_number=item.page_number,
447
- page_size=item.page_size,
278
+ top_k=item.top_k,
448
279
  min_score=item.min_score,
449
280
  range_creation_start=item.range_creation_start,
450
281
  range_creation_end=item.range_creation_end,
@@ -452,12 +283,15 @@ async def search(
452
283
  range_modification_end=item.range_modification_end,
453
284
  fields=item.fields,
454
285
  user_vector=item.vector,
286
+ vectorset=item.vectorset,
455
287
  with_duplicates=item.with_duplicates,
456
288
  with_status=with_status,
457
289
  with_synonyms=item.with_synonyms,
458
290
  autofilter=item.autofilter,
459
291
  security=item.security,
460
292
  rephrase=item.rephrase,
293
+ hidden=await filter_hidden_resources(kbid, item.show_hidden),
294
+ rephrase_prompt=item.rephrase_prompt,
461
295
  )
462
296
  pb_query, incomplete_results, autofilters = await query_parser.parse()
463
297
 
@@ -470,28 +304,28 @@ async def search(
470
304
  # We need to merge
471
305
  search_results = await merge_results(
472
306
  results,
473
- count=item.page_size,
474
- page=item.page_number,
307
+ top_k=item.top_k,
475
308
  kbid=kbid,
476
309
  show=item.show,
477
310
  field_type_filter=item.field_type_filter,
478
311
  extracted=item.extracted,
479
- sort=query_parser.sort,
312
+ sort=query_parser.sort, # type: ignore
480
313
  requested_relations=pb_query.relation_subgraph,
481
314
  min_score=query_parser.min_score,
482
315
  highlight=item.highlight,
483
316
  )
484
317
 
485
318
  if audit is not None and do_audit:
486
- await audit.search(
319
+ audit.search(
487
320
  kbid,
488
321
  x_nucliadb_user,
489
- x_ndb_client.to_proto(),
322
+ to_proto.client_type(x_ndb_client),
490
323
  x_forwarded_for,
491
324
  pb_query,
492
325
  time() - start_time,
493
326
  len(search_results.resources),
494
327
  )
328
+
495
329
  if item.debug:
496
330
  search_results.nodes = debug_nodes_info(queried_nodes)
497
331