nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. migrations/0003_allfields_key.py +1 -35
  2. migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
  3. migrations/0010_fix_corrupt_indexes.py +10 -10
  4. migrations/0011_materialize_labelset_ids.py +1 -16
  5. migrations/0012_rollover_shards.py +5 -10
  6. migrations/0014_rollover_shards.py +4 -5
  7. migrations/0015_targeted_rollover.py +5 -10
  8. migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
  9. migrations/0017_multiple_writable_shards.py +2 -4
  10. migrations/0018_purge_orphan_kbslugs.py +5 -7
  11. migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
  12. migrations/0020_drain_nodes_from_cluster.py +3 -3
  13. nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
  14. nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
  15. migrations/0023_backfill_pg_catalog.py +80 -0
  16. migrations/0025_assign_models_to_kbs_v2.py +113 -0
  17. migrations/0026_fix_high_cardinality_content_types.py +61 -0
  18. migrations/0027_rollover_texts3.py +73 -0
  19. nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
  20. migrations/pg/0002_catalog.py +42 -0
  21. nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
  22. nucliadb/common/cluster/base.py +30 -16
  23. nucliadb/common/cluster/discovery/base.py +6 -14
  24. nucliadb/common/cluster/discovery/k8s.py +9 -19
  25. nucliadb/common/cluster/discovery/manual.py +1 -3
  26. nucliadb/common/cluster/discovery/utils.py +1 -3
  27. nucliadb/common/cluster/grpc_node_dummy.py +3 -11
  28. nucliadb/common/cluster/index_node.py +10 -19
  29. nucliadb/common/cluster/manager.py +174 -59
  30. nucliadb/common/cluster/rebalance.py +27 -29
  31. nucliadb/common/cluster/rollover.py +353 -194
  32. nucliadb/common/cluster/settings.py +6 -0
  33. nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
  34. nucliadb/common/cluster/standalone/index_node.py +4 -11
  35. nucliadb/common/cluster/standalone/service.py +2 -6
  36. nucliadb/common/cluster/standalone/utils.py +2 -6
  37. nucliadb/common/cluster/utils.py +29 -22
  38. nucliadb/common/constants.py +20 -0
  39. nucliadb/common/context/__init__.py +3 -0
  40. nucliadb/common/context/fastapi.py +8 -5
  41. nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
  42. nucliadb/common/datamanagers/__init__.py +7 -1
  43. nucliadb/common/datamanagers/atomic.py +22 -4
  44. nucliadb/common/datamanagers/cluster.py +5 -5
  45. nucliadb/common/datamanagers/entities.py +6 -16
  46. nucliadb/common/datamanagers/fields.py +84 -0
  47. nucliadb/common/datamanagers/kb.py +83 -37
  48. nucliadb/common/datamanagers/labels.py +26 -56
  49. nucliadb/common/datamanagers/processing.py +2 -6
  50. nucliadb/common/datamanagers/resources.py +41 -103
  51. nucliadb/common/datamanagers/rollover.py +76 -15
  52. nucliadb/common/datamanagers/synonyms.py +1 -1
  53. nucliadb/common/datamanagers/utils.py +15 -6
  54. nucliadb/common/datamanagers/vectorsets.py +110 -0
  55. nucliadb/common/external_index_providers/base.py +257 -0
  56. nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
  57. nucliadb/common/external_index_providers/manager.py +101 -0
  58. nucliadb/common/external_index_providers/pinecone.py +933 -0
  59. nucliadb/common/external_index_providers/settings.py +52 -0
  60. nucliadb/common/http_clients/auth.py +3 -6
  61. nucliadb/common/http_clients/processing.py +6 -11
  62. nucliadb/common/http_clients/utils.py +1 -3
  63. nucliadb/common/ids.py +240 -0
  64. nucliadb/common/locking.py +29 -7
  65. nucliadb/common/maindb/driver.py +11 -35
  66. nucliadb/common/maindb/exceptions.py +3 -0
  67. nucliadb/common/maindb/local.py +22 -9
  68. nucliadb/common/maindb/pg.py +206 -111
  69. nucliadb/common/maindb/utils.py +11 -42
  70. nucliadb/common/models_utils/from_proto.py +479 -0
  71. nucliadb/common/models_utils/to_proto.py +60 -0
  72. nucliadb/common/nidx.py +260 -0
  73. nucliadb/export_import/datamanager.py +25 -19
  74. nucliadb/export_import/exporter.py +5 -11
  75. nucliadb/export_import/importer.py +5 -7
  76. nucliadb/export_import/models.py +3 -3
  77. nucliadb/export_import/tasks.py +4 -4
  78. nucliadb/export_import/utils.py +25 -37
  79. nucliadb/health.py +1 -3
  80. nucliadb/ingest/app.py +15 -11
  81. nucliadb/ingest/consumer/auditing.py +21 -19
  82. nucliadb/ingest/consumer/consumer.py +82 -47
  83. nucliadb/ingest/consumer/materializer.py +5 -12
  84. nucliadb/ingest/consumer/pull.py +12 -27
  85. nucliadb/ingest/consumer/service.py +19 -17
  86. nucliadb/ingest/consumer/shard_creator.py +2 -4
  87. nucliadb/ingest/consumer/utils.py +1 -3
  88. nucliadb/ingest/fields/base.py +137 -105
  89. nucliadb/ingest/fields/conversation.py +18 -5
  90. nucliadb/ingest/fields/exceptions.py +1 -4
  91. nucliadb/ingest/fields/file.py +7 -16
  92. nucliadb/ingest/fields/link.py +5 -10
  93. nucliadb/ingest/fields/text.py +9 -4
  94. nucliadb/ingest/orm/brain.py +200 -213
  95. nucliadb/ingest/orm/broker_message.py +181 -0
  96. nucliadb/ingest/orm/entities.py +36 -51
  97. nucliadb/ingest/orm/exceptions.py +12 -0
  98. nucliadb/ingest/orm/knowledgebox.py +322 -197
  99. nucliadb/ingest/orm/processor/__init__.py +2 -700
  100. nucliadb/ingest/orm/processor/auditing.py +4 -23
  101. nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
  102. nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
  103. nucliadb/ingest/orm/processor/processor.py +752 -0
  104. nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
  105. nucliadb/ingest/orm/resource.py +249 -403
  106. nucliadb/ingest/orm/utils.py +4 -4
  107. nucliadb/ingest/partitions.py +3 -9
  108. nucliadb/ingest/processing.py +70 -73
  109. nucliadb/ingest/py.typed +0 -0
  110. nucliadb/ingest/serialize.py +37 -167
  111. nucliadb/ingest/service/__init__.py +1 -3
  112. nucliadb/ingest/service/writer.py +185 -412
  113. nucliadb/ingest/settings.py +10 -20
  114. nucliadb/ingest/utils.py +3 -6
  115. nucliadb/learning_proxy.py +242 -55
  116. nucliadb/metrics_exporter.py +30 -19
  117. nucliadb/middleware/__init__.py +1 -3
  118. nucliadb/migrator/command.py +1 -3
  119. nucliadb/migrator/datamanager.py +13 -13
  120. nucliadb/migrator/migrator.py +47 -30
  121. nucliadb/migrator/utils.py +18 -10
  122. nucliadb/purge/__init__.py +139 -33
  123. nucliadb/purge/orphan_shards.py +7 -13
  124. nucliadb/reader/__init__.py +1 -3
  125. nucliadb/reader/api/models.py +1 -12
  126. nucliadb/reader/api/v1/__init__.py +0 -1
  127. nucliadb/reader/api/v1/download.py +21 -88
  128. nucliadb/reader/api/v1/export_import.py +1 -1
  129. nucliadb/reader/api/v1/knowledgebox.py +10 -10
  130. nucliadb/reader/api/v1/learning_config.py +2 -6
  131. nucliadb/reader/api/v1/resource.py +62 -88
  132. nucliadb/reader/api/v1/services.py +64 -83
  133. nucliadb/reader/app.py +12 -29
  134. nucliadb/reader/lifecycle.py +18 -4
  135. nucliadb/reader/py.typed +0 -0
  136. nucliadb/reader/reader/notifications.py +10 -28
  137. nucliadb/search/__init__.py +1 -3
  138. nucliadb/search/api/v1/__init__.py +1 -2
  139. nucliadb/search/api/v1/ask.py +17 -10
  140. nucliadb/search/api/v1/catalog.py +184 -0
  141. nucliadb/search/api/v1/feedback.py +16 -24
  142. nucliadb/search/api/v1/find.py +36 -36
  143. nucliadb/search/api/v1/knowledgebox.py +89 -60
  144. nucliadb/search/api/v1/resource/ask.py +2 -8
  145. nucliadb/search/api/v1/resource/search.py +49 -70
  146. nucliadb/search/api/v1/search.py +44 -210
  147. nucliadb/search/api/v1/suggest.py +39 -54
  148. nucliadb/search/app.py +12 -32
  149. nucliadb/search/lifecycle.py +10 -3
  150. nucliadb/search/predict.py +136 -187
  151. nucliadb/search/py.typed +0 -0
  152. nucliadb/search/requesters/utils.py +25 -58
  153. nucliadb/search/search/cache.py +149 -20
  154. nucliadb/search/search/chat/ask.py +571 -123
  155. nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
  156. nucliadb/search/search/chat/images.py +41 -17
  157. nucliadb/search/search/chat/prompt.py +817 -266
  158. nucliadb/search/search/chat/query.py +213 -309
  159. nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
  160. nucliadb/search/search/fetch.py +43 -36
  161. nucliadb/search/search/filters.py +9 -15
  162. nucliadb/search/search/find.py +214 -53
  163. nucliadb/search/search/find_merge.py +408 -391
  164. nucliadb/search/search/hydrator.py +191 -0
  165. nucliadb/search/search/merge.py +187 -223
  166. nucliadb/search/search/metrics.py +73 -2
  167. nucliadb/search/search/paragraphs.py +64 -106
  168. nucliadb/search/search/pgcatalog.py +233 -0
  169. nucliadb/search/search/predict_proxy.py +1 -1
  170. nucliadb/search/search/query.py +305 -150
  171. nucliadb/search/search/query_parser/exceptions.py +22 -0
  172. nucliadb/search/search/query_parser/models.py +101 -0
  173. nucliadb/search/search/query_parser/parser.py +183 -0
  174. nucliadb/search/search/rank_fusion.py +204 -0
  175. nucliadb/search/search/rerankers.py +270 -0
  176. nucliadb/search/search/shards.py +3 -32
  177. nucliadb/search/search/summarize.py +7 -18
  178. nucliadb/search/search/utils.py +27 -4
  179. nucliadb/search/settings.py +15 -1
  180. nucliadb/standalone/api_router.py +4 -10
  181. nucliadb/standalone/app.py +8 -14
  182. nucliadb/standalone/auth.py +7 -21
  183. nucliadb/standalone/config.py +7 -10
  184. nucliadb/standalone/lifecycle.py +26 -25
  185. nucliadb/standalone/migrations.py +1 -3
  186. nucliadb/standalone/purge.py +1 -1
  187. nucliadb/standalone/py.typed +0 -0
  188. nucliadb/standalone/run.py +3 -6
  189. nucliadb/standalone/settings.py +9 -16
  190. nucliadb/standalone/versions.py +15 -5
  191. nucliadb/tasks/consumer.py +8 -12
  192. nucliadb/tasks/producer.py +7 -6
  193. nucliadb/tests/config.py +53 -0
  194. nucliadb/train/__init__.py +1 -3
  195. nucliadb/train/api/utils.py +1 -2
  196. nucliadb/train/api/v1/shards.py +1 -1
  197. nucliadb/train/api/v1/trainset.py +2 -4
  198. nucliadb/train/app.py +10 -31
  199. nucliadb/train/generator.py +10 -19
  200. nucliadb/train/generators/field_classifier.py +7 -19
  201. nucliadb/train/generators/field_streaming.py +156 -0
  202. nucliadb/train/generators/image_classifier.py +12 -18
  203. nucliadb/train/generators/paragraph_classifier.py +5 -9
  204. nucliadb/train/generators/paragraph_streaming.py +6 -9
  205. nucliadb/train/generators/question_answer_streaming.py +19 -20
  206. nucliadb/train/generators/sentence_classifier.py +9 -15
  207. nucliadb/train/generators/token_classifier.py +48 -39
  208. nucliadb/train/generators/utils.py +14 -18
  209. nucliadb/train/lifecycle.py +7 -3
  210. nucliadb/train/nodes.py +23 -32
  211. nucliadb/train/py.typed +0 -0
  212. nucliadb/train/servicer.py +13 -21
  213. nucliadb/train/settings.py +2 -6
  214. nucliadb/train/types.py +13 -10
  215. nucliadb/train/upload.py +3 -6
  216. nucliadb/train/uploader.py +19 -23
  217. nucliadb/train/utils.py +1 -1
  218. nucliadb/writer/__init__.py +1 -3
  219. nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
  220. nucliadb/writer/api/v1/export_import.py +67 -14
  221. nucliadb/writer/api/v1/field.py +16 -269
  222. nucliadb/writer/api/v1/knowledgebox.py +218 -68
  223. nucliadb/writer/api/v1/resource.py +68 -88
  224. nucliadb/writer/api/v1/services.py +51 -70
  225. nucliadb/writer/api/v1/slug.py +61 -0
  226. nucliadb/writer/api/v1/transaction.py +67 -0
  227. nucliadb/writer/api/v1/upload.py +143 -117
  228. nucliadb/writer/app.py +6 -43
  229. nucliadb/writer/back_pressure.py +16 -38
  230. nucliadb/writer/exceptions.py +0 -4
  231. nucliadb/writer/lifecycle.py +21 -15
  232. nucliadb/writer/py.typed +0 -0
  233. nucliadb/writer/resource/audit.py +2 -1
  234. nucliadb/writer/resource/basic.py +48 -46
  235. nucliadb/writer/resource/field.py +37 -128
  236. nucliadb/writer/resource/origin.py +1 -2
  237. nucliadb/writer/settings.py +6 -2
  238. nucliadb/writer/tus/__init__.py +17 -15
  239. nucliadb/writer/tus/azure.py +111 -0
  240. nucliadb/writer/tus/dm.py +17 -5
  241. nucliadb/writer/tus/exceptions.py +1 -3
  242. nucliadb/writer/tus/gcs.py +49 -84
  243. nucliadb/writer/tus/local.py +21 -37
  244. nucliadb/writer/tus/s3.py +28 -68
  245. nucliadb/writer/tus/storage.py +5 -56
  246. nucliadb/writer/vectorsets.py +125 -0
  247. nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
  248. nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
  249. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
  250. nucliadb/common/maindb/redis.py +0 -194
  251. nucliadb/common/maindb/tikv.py +0 -433
  252. nucliadb/ingest/fields/layout.py +0 -58
  253. nucliadb/ingest/tests/conftest.py +0 -30
  254. nucliadb/ingest/tests/fixtures.py +0 -764
  255. nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
  256. nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
  257. nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
  258. nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
  259. nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
  260. nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
  261. nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
  262. nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
  263. nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
  264. nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
  265. nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
  266. nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
  267. nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
  268. nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
  269. nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
  270. nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
  271. nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
  272. nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
  273. nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
  274. nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
  275. nucliadb/ingest/tests/unit/test_cache.py +0 -31
  276. nucliadb/ingest/tests/unit/test_partitions.py +0 -40
  277. nucliadb/ingest/tests/unit/test_processing.py +0 -171
  278. nucliadb/middleware/transaction.py +0 -117
  279. nucliadb/reader/api/v1/learning_collector.py +0 -63
  280. nucliadb/reader/tests/__init__.py +0 -19
  281. nucliadb/reader/tests/conftest.py +0 -31
  282. nucliadb/reader/tests/fixtures.py +0 -136
  283. nucliadb/reader/tests/test_list_resources.py +0 -75
  284. nucliadb/reader/tests/test_reader_file_download.py +0 -273
  285. nucliadb/reader/tests/test_reader_resource.py +0 -353
  286. nucliadb/reader/tests/test_reader_resource_field.py +0 -219
  287. nucliadb/search/api/v1/chat.py +0 -263
  288. nucliadb/search/api/v1/resource/chat.py +0 -174
  289. nucliadb/search/tests/__init__.py +0 -19
  290. nucliadb/search/tests/conftest.py +0 -33
  291. nucliadb/search/tests/fixtures.py +0 -199
  292. nucliadb/search/tests/node.py +0 -466
  293. nucliadb/search/tests/unit/__init__.py +0 -18
  294. nucliadb/search/tests/unit/api/__init__.py +0 -19
  295. nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
  296. nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
  297. nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
  298. nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
  299. nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
  300. nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
  301. nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
  302. nucliadb/search/tests/unit/search/__init__.py +0 -18
  303. nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
  304. nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
  305. nucliadb/search/tests/unit/search/search/__init__.py +0 -19
  306. nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
  307. nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
  308. nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
  309. nucliadb/search/tests/unit/search/test_fetch.py +0 -108
  310. nucliadb/search/tests/unit/search/test_filters.py +0 -125
  311. nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
  312. nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
  313. nucliadb/search/tests/unit/search/test_query.py +0 -153
  314. nucliadb/search/tests/unit/test_app.py +0 -79
  315. nucliadb/search/tests/unit/test_find_merge.py +0 -112
  316. nucliadb/search/tests/unit/test_merge.py +0 -34
  317. nucliadb/search/tests/unit/test_predict.py +0 -525
  318. nucliadb/standalone/tests/__init__.py +0 -19
  319. nucliadb/standalone/tests/conftest.py +0 -33
  320. nucliadb/standalone/tests/fixtures.py +0 -38
  321. nucliadb/standalone/tests/unit/__init__.py +0 -18
  322. nucliadb/standalone/tests/unit/test_api_router.py +0 -61
  323. nucliadb/standalone/tests/unit/test_auth.py +0 -169
  324. nucliadb/standalone/tests/unit/test_introspect.py +0 -35
  325. nucliadb/standalone/tests/unit/test_migrations.py +0 -63
  326. nucliadb/standalone/tests/unit/test_versions.py +0 -68
  327. nucliadb/tests/benchmarks/__init__.py +0 -19
  328. nucliadb/tests/benchmarks/test_search.py +0 -99
  329. nucliadb/tests/conftest.py +0 -32
  330. nucliadb/tests/fixtures.py +0 -735
  331. nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
  332. nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
  333. nucliadb/tests/migrations/test_migration_0017.py +0 -76
  334. nucliadb/tests/migrations/test_migration_0018.py +0 -95
  335. nucliadb/tests/tikv.py +0 -240
  336. nucliadb/tests/unit/__init__.py +0 -19
  337. nucliadb/tests/unit/common/__init__.py +0 -19
  338. nucliadb/tests/unit/common/cluster/__init__.py +0 -19
  339. nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
  340. nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
  341. nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
  342. nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
  343. nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
  344. nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
  345. nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
  346. nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
  347. nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
  348. nucliadb/tests/unit/common/maindb/__init__.py +0 -18
  349. nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
  350. nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
  351. nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
  352. nucliadb/tests/unit/common/test_context.py +0 -36
  353. nucliadb/tests/unit/export_import/__init__.py +0 -19
  354. nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
  355. nucliadb/tests/unit/export_import/test_utils.py +0 -301
  356. nucliadb/tests/unit/migrator/__init__.py +0 -19
  357. nucliadb/tests/unit/migrator/test_migrator.py +0 -87
  358. nucliadb/tests/unit/tasks/__init__.py +0 -19
  359. nucliadb/tests/unit/tasks/conftest.py +0 -42
  360. nucliadb/tests/unit/tasks/test_consumer.py +0 -92
  361. nucliadb/tests/unit/tasks/test_producer.py +0 -95
  362. nucliadb/tests/unit/tasks/test_tasks.py +0 -58
  363. nucliadb/tests/unit/test_field_ids.py +0 -49
  364. nucliadb/tests/unit/test_health.py +0 -86
  365. nucliadb/tests/unit/test_kb_slugs.py +0 -54
  366. nucliadb/tests/unit/test_learning_proxy.py +0 -252
  367. nucliadb/tests/unit/test_metrics_exporter.py +0 -77
  368. nucliadb/tests/unit/test_purge.py +0 -136
  369. nucliadb/tests/utils/__init__.py +0 -74
  370. nucliadb/tests/utils/aiohttp_session.py +0 -44
  371. nucliadb/tests/utils/broker_messages/__init__.py +0 -171
  372. nucliadb/tests/utils/broker_messages/fields.py +0 -197
  373. nucliadb/tests/utils/broker_messages/helpers.py +0 -33
  374. nucliadb/tests/utils/entities.py +0 -78
  375. nucliadb/train/api/v1/check.py +0 -60
  376. nucliadb/train/tests/__init__.py +0 -19
  377. nucliadb/train/tests/conftest.py +0 -29
  378. nucliadb/train/tests/fixtures.py +0 -342
  379. nucliadb/train/tests/test_field_classification.py +0 -122
  380. nucliadb/train/tests/test_get_entities.py +0 -80
  381. nucliadb/train/tests/test_get_info.py +0 -51
  382. nucliadb/train/tests/test_get_ontology.py +0 -34
  383. nucliadb/train/tests/test_get_ontology_count.py +0 -63
  384. nucliadb/train/tests/test_image_classification.py +0 -221
  385. nucliadb/train/tests/test_list_fields.py +0 -39
  386. nucliadb/train/tests/test_list_paragraphs.py +0 -73
  387. nucliadb/train/tests/test_list_resources.py +0 -39
  388. nucliadb/train/tests/test_list_sentences.py +0 -71
  389. nucliadb/train/tests/test_paragraph_classification.py +0 -123
  390. nucliadb/train/tests/test_paragraph_streaming.py +0 -118
  391. nucliadb/train/tests/test_question_answer_streaming.py +0 -239
  392. nucliadb/train/tests/test_sentence_classification.py +0 -143
  393. nucliadb/train/tests/test_token_classification.py +0 -136
  394. nucliadb/train/tests/utils.py +0 -101
  395. nucliadb/writer/layouts/__init__.py +0 -51
  396. nucliadb/writer/layouts/v1.py +0 -59
  397. nucliadb/writer/tests/__init__.py +0 -19
  398. nucliadb/writer/tests/conftest.py +0 -31
  399. nucliadb/writer/tests/fixtures.py +0 -191
  400. nucliadb/writer/tests/test_fields.py +0 -475
  401. nucliadb/writer/tests/test_files.py +0 -740
  402. nucliadb/writer/tests/test_knowledgebox.py +0 -49
  403. nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
  404. nucliadb/writer/tests/test_resources.py +0 -476
  405. nucliadb/writer/tests/test_service.py +0 -137
  406. nucliadb/writer/tests/test_tus.py +0 -203
  407. nucliadb/writer/tests/utils.py +0 -35
  408. nucliadb/writer/tus/pg.py +0 -125
  409. nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
  410. nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
  411. {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
  412. /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
  413. /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
  414. /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
  415. /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
  416. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
  417. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
  418. {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -18,13 +18,13 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
 
21
- from nucliadb.migrator.models import Migration
22
- from nucliadb.migrator.utils import get_migrations
21
+ from typing import TypeVar
23
22
 
23
+ T = TypeVar("T")
24
24
 
25
- def get_migration(version: int) -> Migration:
26
- migration: Migration = get_migrations(from_version=version - 1, to_version=version)[
27
- 0
28
- ]
29
- assert migration.version == version
30
- return migration
25
+
26
+ def cut_page(items: list[T], top_k: int) -> tuple[list[T], bool]:
27
+ """Return a slice of `items` representing the specified page and a boolean
28
+ indicating whether there is a next page or not"""
29
+ next_page = len(items) > top_k
30
+ return items[:top_k], next_page
@@ -17,26 +17,25 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ import asyncio
20
21
  from contextvars import ContextVar
21
22
  from typing import Optional
22
23
 
23
- from nucliadb_protos.nodereader_pb2 import DocumentResult, ParagraphResult
24
- from nucliadb_protos.resources_pb2 import Paragraph
25
-
26
- from nucliadb.ingest.orm.resource import KB_REVERSE
24
+ from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
25
+ from nucliadb.common.maindb.utils import get_driver
27
26
  from nucliadb.ingest.orm.resource import Resource as ResourceORM
28
27
  from nucliadb.ingest.serialize import managed_serialize
29
- from nucliadb.middleware.transaction import get_read_only_transaction
30
28
  from nucliadb.search import SERVICE_NAME, logger
29
+ from nucliadb.search.search import cache
31
30
  from nucliadb_models.common import FieldTypeName
32
31
  from nucliadb_models.resource import ExtractedDataTypeName, Resource
33
32
  from nucliadb_models.search import ResourceProperties
33
+ from nucliadb_protos.nodereader_pb2 import DocumentResult, ParagraphResult
34
+ from nucliadb_protos.resources_pb2 import Paragraph
35
+ from nucliadb_utils import const
36
+ from nucliadb_utils.utilities import has_feature
34
37
 
35
- from .cache import get_resource_from_cache
36
-
37
- rcache: ContextVar[Optional[dict[str, ResourceORM]]] = ContextVar(
38
- "rcache", default=None
39
- )
38
+ rcache: ContextVar[Optional[dict[str, ResourceORM]]] = ContextVar("rcache", default=None)
40
39
 
41
40
 
42
41
  async def fetch_resources(
@@ -46,20 +45,34 @@ async def fetch_resources(
46
45
  field_type_filter: list[FieldTypeName],
47
46
  extracted: list[ExtractedDataTypeName],
48
47
  ) -> dict[str, Resource]:
48
+ if ResourceProperties.EXTRACTED in show and has_feature(
49
+ const.Features.IGNORE_EXTRACTED_IN_SEARCH, context={"kbid": kbid}, default=False
50
+ ):
51
+ # Returning extracted metadata in search results is deprecated and this flag
52
+ # will be set to True for all KBs in the future.
53
+ show.remove(ResourceProperties.EXTRACTED)
54
+ extracted = []
55
+
49
56
  result = {}
50
- txn = await get_read_only_transaction()
51
- for resource in resources:
52
- serialization = await managed_serialize(
53
- txn,
54
- kbid,
55
- resource,
56
- show,
57
- field_type_filter=field_type_filter,
58
- extracted=extracted,
59
- service_name=SERVICE_NAME,
60
- )
61
- if serialization is not None:
62
- result[resource] = serialization
57
+ async with get_driver().transaction(read_only=True) as txn:
58
+ tasks = []
59
+ for resource in resources:
60
+ tasks.append(
61
+ asyncio.create_task(
62
+ managed_serialize(
63
+ txn,
64
+ kbid,
65
+ resource,
66
+ show,
67
+ field_type_filter=field_type_filter,
68
+ extracted=extracted,
69
+ service_name=SERVICE_NAME,
70
+ )
71
+ )
72
+ )
73
+ for resource, serialization in zip(resources, await asyncio.gather(*tasks)):
74
+ if serialization is not None:
75
+ result[resource] = serialization
63
76
  return result
64
77
 
65
78
 
@@ -67,7 +80,7 @@ async def get_paragraph_from_resource(
67
80
  orm_resource: ResourceORM, result: ParagraphResult
68
81
  ) -> Optional[Paragraph]:
69
82
  _, field_type, field = result.field.split("/")
70
- field_type_int = KB_REVERSE[field_type]
83
+ field_type_int = FIELD_TYPE_STR_TO_PB[field_type]
71
84
  field_obj = await orm_resource.get_field(field, field_type_int, load=False)
72
85
  field_metadata = await field_obj.get_field_metadata()
73
86
  paragraph = None
@@ -81,7 +94,7 @@ async def get_paragraph_from_resource(
81
94
 
82
95
 
83
96
  async def get_labels_resource(result: DocumentResult, kbid: str) -> list[str]:
84
- orm_resource = await get_resource_from_cache(kbid, result.uuid)
97
+ orm_resource = await cache.get_resource(kbid, result.uuid)
85
98
 
86
99
  if orm_resource is None:
87
100
  logger.error(f"{result.uuid} does not exist on DB")
@@ -97,7 +110,7 @@ async def get_labels_resource(result: DocumentResult, kbid: str) -> list[str]:
97
110
 
98
111
 
99
112
  async def get_labels_paragraph(result: ParagraphResult, kbid: str) -> list[str]:
100
- orm_resource = await get_resource_from_cache(kbid, result.uuid)
113
+ orm_resource = await cache.get_resource(kbid, result.uuid)
101
114
 
102
115
  if orm_resource is None:
103
116
  logger.error(f"{result.uuid} does not exist on DB")
@@ -110,7 +123,7 @@ async def get_labels_paragraph(result: ParagraphResult, kbid: str) -> list[str]:
110
123
  labels.append(f"{classification.labelset}/{classification.label}")
111
124
 
112
125
  _, field_type, field = result.field.split("/")
113
- field_type_int = KB_REVERSE[field_type]
126
+ field_type_int = FIELD_TYPE_STR_TO_PB[field_type]
114
127
  field_obj = await orm_resource.get_field(field, field_type_int, load=False)
115
128
  field_metadata = await field_obj.get_field_metadata()
116
129
  if field_metadata:
@@ -131,21 +144,15 @@ async def get_labels_paragraph(result: ParagraphResult, kbid: str) -> list[str]:
131
144
  async def get_seconds_paragraph(
132
145
  result: ParagraphResult, kbid: str
133
146
  ) -> Optional[tuple[list[int], list[int]]]:
134
- orm_resource = await get_resource_from_cache(kbid, result.uuid)
147
+ orm_resource = await cache.get_resource(kbid, result.uuid)
135
148
 
136
149
  if orm_resource is None:
137
150
  logger.error(f"{result.uuid} does not exist on DB")
138
151
  return None
139
152
 
140
- paragraph = await get_paragraph_from_resource(
141
- orm_resource=orm_resource, result=result
142
- )
153
+ paragraph = await get_paragraph_from_resource(orm_resource=orm_resource, result=result)
143
154
 
144
- if (
145
- paragraph is not None
146
- and len(paragraph.end_seconds) > 0
147
- and paragraph.end_seconds[0] > 0
148
- ):
155
+ if paragraph is not None and len(paragraph.end_seconds) > 0 and paragraph.end_seconds[0] > 0:
149
156
  return (list(paragraph.start_seconds), list(paragraph.end_seconds))
150
157
 
151
158
  return None
@@ -59,9 +59,7 @@ def translate_label(literal: str) -> str:
59
59
  if len(literal) == 0:
60
60
  raise InvalidQueryError("filters", "Invalid empty label")
61
61
  if literal[0] != "/":
62
- raise InvalidQueryError(
63
- "filters", f"Invalid label. It must start with a `/`: {literal}"
64
- )
62
+ raise InvalidQueryError("filters", f"Invalid label. It must start with a `/`: {literal}")
65
63
  return translate_alias_to_system_label(literal)
66
64
 
67
65
 
@@ -109,13 +107,9 @@ def split_labels_by_type(
109
107
  return field_labels, paragraph_labels
110
108
 
111
109
 
112
- def is_paragraph_labelset_kind(
113
- labelset_id: str, classification_labels: knowledgebox_pb2.Labels
114
- ) -> bool:
110
+ def is_paragraph_labelset_kind(labelset_id: str, classification_labels: knowledgebox_pb2.Labels) -> bool:
115
111
  try:
116
- labelset: Optional[knowledgebox_pb2.LabelSet] = (
117
- classification_labels.labelset.get(labelset_id)
118
- )
112
+ labelset: Optional[knowledgebox_pb2.LabelSet] = classification_labels.labelset.get(labelset_id)
119
113
  if labelset is None:
120
114
  return False
121
115
  return knowledgebox_pb2.LabelSet.LabelSetKind.PARAGRAPHS in labelset.kind
@@ -124,32 +118,32 @@ def is_paragraph_labelset_kind(
124
118
  return False
125
119
 
126
120
 
127
- def flat_filter_labels(filters: Union[list[str], dict[str, Any]]) -> list[str]:
121
+ def flatten_filter_literals(filters: Union[list[str], dict[str, Any]]) -> list[str]:
128
122
  if isinstance(filters, list):
129
123
  return filters
130
124
  else:
131
- return list(iter_filter_labels_expression(filters))
125
+ return list(iter_filter_expression_literals(filters))
132
126
 
133
127
 
134
- def iter_filter_labels_expression(expression: dict[str, Any]) -> Iterator[str]:
128
+ def iter_filter_expression_literals(expression: dict[str, Any]) -> Iterator[str]:
135
129
  if "literal" in expression:
136
130
  yield expression["literal"]
137
131
  return
138
132
 
139
133
  if "not" in expression:
140
- for label in iter_filter_labels_expression(expression["not"]):
134
+ for label in iter_filter_expression_literals(expression["not"]):
141
135
  yield label
142
136
  return
143
137
 
144
138
  if "and" in expression:
145
139
  for and_term in expression["and"]:
146
- for label in iter_filter_labels_expression(and_term):
140
+ for label in iter_filter_expression_literals(and_term):
147
141
  yield label
148
142
  return
149
143
 
150
144
  if "or" in expression:
151
145
  for or_term in expression["or"]:
152
- for label in iter_filter_labels_expression(or_term):
146
+ for label in iter_filter_expression_literals(or_term):
153
147
  yield label
154
148
  return
155
149
 
@@ -18,13 +18,39 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import logging
21
+ from dataclasses import dataclass
21
22
  from time import time
22
23
  from typing import Optional
23
24
 
25
+ from nucliadb.common.external_index_providers.base import ExternalIndexManager
26
+ from nucliadb.common.external_index_providers.manager import get_external_index_manager
27
+ from nucliadb.common.models_utils import to_proto
24
28
  from nucliadb.search.requesters.utils import Method, debug_nodes_info, node_query
25
- from nucliadb.search.search.find_merge import find_merge_results
29
+ from nucliadb.search.search.find_merge import (
30
+ build_find_response,
31
+ compose_find_resources,
32
+ hydrate_and_rerank,
33
+ )
34
+ from nucliadb.search.search.hydrator import (
35
+ ResourceHydrationOptions,
36
+ TextBlockHydrationOptions,
37
+ )
38
+ from nucliadb.search.search.metrics import (
39
+ RAGMetrics,
40
+ )
26
41
  from nucliadb.search.search.query import QueryParser
42
+ from nucliadb.search.search.query_parser.parser import parse_find
43
+ from nucliadb.search.search.rank_fusion import (
44
+ RankFusionAlgorithm,
45
+ get_rank_fusion,
46
+ )
47
+ from nucliadb.search.search.rerankers import (
48
+ Reranker,
49
+ RerankingOptions,
50
+ get_reranker,
51
+ )
27
52
  from nucliadb.search.search.utils import (
53
+ filter_hidden_resources,
28
54
  min_score_from_payload,
29
55
  should_disable_vector_search,
30
56
  )
@@ -32,6 +58,7 @@ from nucliadb.search.settings import settings
32
58
  from nucliadb_models.search import (
33
59
  FindRequest,
34
60
  KnowledgeboxFindResults,
61
+ MinScore,
35
62
  NucliaDBClientType,
36
63
  SearchOptions,
37
64
  )
@@ -47,72 +74,76 @@ async def find(
47
74
  x_nucliadb_user: str,
48
75
  x_forwarded_for: str,
49
76
  generative_model: Optional[str] = None,
77
+ metrics: RAGMetrics = RAGMetrics(),
50
78
  ) -> tuple[KnowledgeboxFindResults, bool, QueryParser]:
51
- audit = get_audit()
52
- start_time = time()
79
+ external_index_manager = await get_external_index_manager(kbid=kbid)
80
+ if external_index_manager is not None:
81
+ return await _external_index_retrieval(
82
+ kbid,
83
+ item,
84
+ external_index_manager,
85
+ generative_model,
86
+ )
87
+ else:
88
+ return await _index_node_retrieval(
89
+ kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for, generative_model, metrics
90
+ )
53
91
 
54
- item.min_score = min_score_from_payload(item.min_score)
55
92
 
56
- if SearchOptions.VECTOR in item.features:
57
- if should_disable_vector_search(item):
58
- item.features.remove(SearchOptions.VECTOR)
93
+ async def _index_node_retrieval(
94
+ kbid: str,
95
+ item: FindRequest,
96
+ x_ndb_client: NucliaDBClientType,
97
+ x_nucliadb_user: str,
98
+ x_forwarded_for: str,
99
+ generative_model: Optional[str] = None,
100
+ metrics: RAGMetrics = RAGMetrics(),
101
+ ) -> tuple[KnowledgeboxFindResults, bool, QueryParser]:
102
+ audit = get_audit()
103
+ start_time = time()
59
104
 
60
- query_parser = QueryParser(
61
- kbid=kbid,
62
- features=item.features,
63
- query=item.query,
64
- filters=item.filters,
65
- faceted=None,
66
- sort=None,
67
- page_number=item.page_number,
68
- page_size=item.page_size,
69
- min_score=item.min_score,
70
- range_creation_start=item.range_creation_start,
71
- range_creation_end=item.range_creation_end,
72
- range_modification_start=item.range_modification_start,
73
- range_modification_end=item.range_modification_end,
74
- fields=item.fields,
75
- user_vector=item.vector,
76
- with_duplicates=item.with_duplicates,
77
- with_synonyms=item.with_synonyms,
78
- autofilter=item.autofilter,
79
- key_filters=item.resource_filters,
80
- security=item.security,
81
- generative_model=generative_model,
82
- rephrase=item.rephrase,
83
- )
84
- pb_query, incomplete_results, autofilters = await query_parser.parse()
85
- results, query_incomplete_results, queried_nodes = await node_query(
86
- kbid, Method.SEARCH, pb_query, target_shard_replicas=item.shards
105
+ query_parser, rank_fusion, reranker = await query_parser_from_find_request(
106
+ kbid, item, generative_model=generative_model
87
107
  )
108
+ with metrics.time("query_parse"):
109
+ pb_query, incomplete_results, autofilters = await query_parser.parse()
110
+
111
+ with metrics.time("node_query"):
112
+ results, query_incomplete_results, queried_nodes = await node_query(
113
+ kbid, Method.SEARCH, pb_query, target_shard_replicas=item.shards
114
+ )
88
115
  incomplete_results = incomplete_results or query_incomplete_results
89
116
 
90
- # We need to merge
91
- search_results = await find_merge_results(
92
- results,
93
- count=item.page_size,
94
- page=item.page_number,
95
- kbid=kbid,
96
- show=item.show,
97
- field_type_filter=item.field_type_filter,
98
- extracted=item.extracted,
99
- requested_relations=pb_query.relation_subgraph,
100
- min_score_bm25=query_parser.min_score.bm25,
101
- min_score_semantic=query_parser.min_score.semantic,
102
- highlight=item.highlight,
103
- )
117
+ # Rank fusion merge, cut, hydrate and rerank
118
+ with metrics.time("results_merge"):
119
+ search_results = await build_find_response(
120
+ results,
121
+ kbid=kbid,
122
+ query=pb_query.body,
123
+ relation_subgraph_query=pb_query.relations.subgraph,
124
+ min_score_bm25=pb_query.min_score_bm25,
125
+ min_score_semantic=pb_query.min_score_semantic,
126
+ top_k=item.top_k,
127
+ show=item.show,
128
+ extracted=item.extracted,
129
+ field_type_filter=item.field_type_filter,
130
+ highlight=item.highlight,
131
+ rank_fusion_algorithm=rank_fusion,
132
+ reranker=reranker,
133
+ )
104
134
 
105
135
  search_time = time() - start_time
106
136
  if audit is not None:
107
- await audit.search(
137
+ audit.search(
108
138
  kbid,
109
139
  x_nucliadb_user,
110
- x_ndb_client.to_proto(),
140
+ to_proto.client_type(x_ndb_client),
111
141
  x_forwarded_for,
112
142
  pb_query,
113
143
  search_time,
114
144
  len(search_results.resources),
115
145
  )
146
+
116
147
  if item.debug:
117
148
  search_results.nodes = debug_nodes_info(queried_nodes)
118
149
 
@@ -120,17 +151,147 @@ async def find(
120
151
  search_results.shards = queried_shards
121
152
  search_results.autofilters = autofilters
122
153
 
123
- if search_time > settings.slow_find_log_threshold:
154
+ if metrics.elapsed("node_query") > settings.slow_node_query_log_threshold:
124
155
  logger.warning(
125
- "Slow query",
156
+ "Slow node query",
157
+ extra={
158
+ "kbid": kbid,
159
+ "user": x_nucliadb_user,
160
+ "client": x_ndb_client,
161
+ "query": item.model_dump_json(),
162
+ "time": search_time,
163
+ "nodes": debug_nodes_info(queried_nodes),
164
+ "durations": metrics.steps(),
165
+ },
166
+ )
167
+ elif search_time > settings.slow_find_log_threshold:
168
+ logger.info(
169
+ "Slow find query",
126
170
  extra={
127
171
  "kbid": kbid,
128
172
  "user": x_nucliadb_user,
129
173
  "client": x_ndb_client,
130
- "query": item.json(),
174
+ "query": item.model_dump_json(),
131
175
  "time": search_time,
132
176
  "nodes": debug_nodes_info(queried_nodes),
177
+ "durations": metrics.steps(),
133
178
  },
134
179
  )
135
180
 
136
181
  return search_results, incomplete_results, query_parser
182
+
183
+
184
+ async def _external_index_retrieval(
185
+ kbid: str,
186
+ item: FindRequest,
187
+ external_index_manager: ExternalIndexManager,
188
+ generative_model: Optional[str] = None,
189
+ ) -> tuple[KnowledgeboxFindResults, bool, QueryParser]:
190
+ """
191
+ Parse the query, query the external index, and hydrate the results.
192
+ """
193
+ # Parse query
194
+ query_parser, _, reranker = await query_parser_from_find_request(
195
+ kbid, item, generative_model=generative_model
196
+ )
197
+ search_request, incomplete_results, _ = await query_parser.parse()
198
+
199
+ # Query index
200
+ query_results = await external_index_manager.query(search_request) # noqa
201
+
202
+ # Hydrate and rerank results
203
+ text_blocks, resources, best_matches = await hydrate_and_rerank(
204
+ query_results.iter_matching_text_blocks(),
205
+ kbid,
206
+ resource_hydration_options=ResourceHydrationOptions(
207
+ show=item.show,
208
+ extracted=item.extracted,
209
+ field_type_filter=item.field_type_filter,
210
+ ),
211
+ text_block_hydration_options=TextBlockHydrationOptions(),
212
+ reranker=reranker,
213
+ reranking_options=RerankingOptions(
214
+ kbid=kbid,
215
+ query=search_request.body,
216
+ ),
217
+ top_k=query_parser.top_k,
218
+ )
219
+ find_resources = compose_find_resources(text_blocks, resources)
220
+
221
+ results_min_score = MinScore(
222
+ bm25=0,
223
+ semantic=query_parser.min_score.semantic,
224
+ )
225
+ retrieval_results = KnowledgeboxFindResults(
226
+ resources=find_resources,
227
+ query=item.query,
228
+ total=0,
229
+ page_number=0,
230
+ page_size=item.top_k,
231
+ relations=None, # Not implemented for external indexes yet
232
+ autofilters=[], # Not implemented for external indexes yet
233
+ min_score=results_min_score,
234
+ best_matches=best_matches,
235
+ # These are not used for external indexes
236
+ shards=None,
237
+ nodes=None,
238
+ )
239
+
240
+ return retrieval_results, incomplete_results, query_parser
241
+
242
+
243
+ @dataclass
244
+ class ScoredParagraph:
245
+ id: str
246
+ score: float
247
+
248
+
249
+ async def query_parser_from_find_request(
250
+ kbid: str, item: FindRequest, *, generative_model: Optional[str] = None
251
+ ) -> tuple[QueryParser, RankFusionAlgorithm, Reranker]:
252
+ item.min_score = min_score_from_payload(item.min_score)
253
+
254
+ if SearchOptions.SEMANTIC in item.features:
255
+ if should_disable_vector_search(item):
256
+ item.features.remove(SearchOptions.SEMANTIC)
257
+
258
+ hidden = await filter_hidden_resources(kbid, item.show_hidden)
259
+
260
+ # XXX this is becoming the new /find query parsing, this should be moved to
261
+ # a cleaner abstraction
262
+
263
+ parsed = parse_find(item)
264
+
265
+ rank_fusion = get_rank_fusion(parsed.rank_fusion)
266
+ reranker = get_reranker(parsed.reranker)
267
+
268
+ query_parser = QueryParser(
269
+ kbid=kbid,
270
+ features=item.features,
271
+ query=item.query,
272
+ label_filters=item.filters,
273
+ keyword_filters=item.keyword_filters,
274
+ faceted=None,
275
+ sort=None,
276
+ top_k=item.top_k,
277
+ min_score=item.min_score,
278
+ range_creation_start=item.range_creation_start,
279
+ range_creation_end=item.range_creation_end,
280
+ range_modification_start=item.range_modification_start,
281
+ range_modification_end=item.range_modification_end,
282
+ fields=item.fields,
283
+ user_vector=item.vector,
284
+ vectorset=item.vectorset,
285
+ with_duplicates=item.with_duplicates,
286
+ with_synonyms=item.with_synonyms,
287
+ autofilter=item.autofilter,
288
+ key_filters=item.resource_filters,
289
+ security=item.security,
290
+ generative_model=generative_model,
291
+ rephrase=item.rephrase,
292
+ rephrase_prompt=item.rephrase_prompt,
293
+ hidden=hidden,
294
+ rank_fusion=rank_fusion,
295
+ reranker=reranker,
296
+ )
297
+ return (query_parser, rank_fusion, reranker)