nucliadb 6.9.1.post5192__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. migrations/0023_backfill_pg_catalog.py +2 -2
  2. migrations/0029_backfill_field_status.py +3 -4
  3. migrations/0032_remove_old_relations.py +2 -3
  4. migrations/0038_backfill_catalog_field_labels.py +2 -2
  5. migrations/0039_backfill_converation_splits_metadata.py +2 -2
  6. migrations/0041_reindex_conversations.py +137 -0
  7. migrations/pg/0010_shards_index.py +34 -0
  8. nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
  9. migrations/pg/0012_catalog_statistics_undo.py +26 -0
  10. nucliadb/backups/create.py +2 -15
  11. nucliadb/backups/restore.py +4 -15
  12. nucliadb/backups/tasks.py +4 -1
  13. nucliadb/common/back_pressure/cache.py +2 -3
  14. nucliadb/common/back_pressure/materializer.py +7 -13
  15. nucliadb/common/back_pressure/settings.py +6 -6
  16. nucliadb/common/back_pressure/utils.py +1 -0
  17. nucliadb/common/cache.py +9 -9
  18. nucliadb/common/catalog/interface.py +12 -12
  19. nucliadb/common/catalog/pg.py +41 -29
  20. nucliadb/common/catalog/utils.py +3 -3
  21. nucliadb/common/cluster/manager.py +5 -4
  22. nucliadb/common/cluster/rebalance.py +483 -114
  23. nucliadb/common/cluster/rollover.py +25 -9
  24. nucliadb/common/cluster/settings.py +3 -8
  25. nucliadb/common/cluster/utils.py +34 -8
  26. nucliadb/common/context/__init__.py +7 -8
  27. nucliadb/common/context/fastapi.py +1 -2
  28. nucliadb/common/datamanagers/__init__.py +2 -4
  29. nucliadb/common/datamanagers/atomic.py +4 -2
  30. nucliadb/common/datamanagers/cluster.py +1 -2
  31. nucliadb/common/datamanagers/fields.py +3 -4
  32. nucliadb/common/datamanagers/kb.py +6 -6
  33. nucliadb/common/datamanagers/labels.py +2 -3
  34. nucliadb/common/datamanagers/resources.py +10 -33
  35. nucliadb/common/datamanagers/rollover.py +5 -7
  36. nucliadb/common/datamanagers/search_configurations.py +1 -2
  37. nucliadb/common/datamanagers/synonyms.py +1 -2
  38. nucliadb/common/datamanagers/utils.py +4 -4
  39. nucliadb/common/datamanagers/vectorsets.py +4 -4
  40. nucliadb/common/external_index_providers/base.py +32 -5
  41. nucliadb/common/external_index_providers/manager.py +4 -5
  42. nucliadb/common/filter_expression.py +128 -40
  43. nucliadb/common/http_clients/processing.py +12 -23
  44. nucliadb/common/ids.py +6 -4
  45. nucliadb/common/locking.py +1 -2
  46. nucliadb/common/maindb/driver.py +9 -8
  47. nucliadb/common/maindb/local.py +5 -5
  48. nucliadb/common/maindb/pg.py +9 -8
  49. nucliadb/common/nidx.py +3 -4
  50. nucliadb/export_import/datamanager.py +4 -3
  51. nucliadb/export_import/exporter.py +11 -19
  52. nucliadb/export_import/importer.py +13 -6
  53. nucliadb/export_import/tasks.py +2 -0
  54. nucliadb/export_import/utils.py +6 -18
  55. nucliadb/health.py +2 -2
  56. nucliadb/ingest/app.py +8 -8
  57. nucliadb/ingest/consumer/consumer.py +8 -10
  58. nucliadb/ingest/consumer/pull.py +3 -8
  59. nucliadb/ingest/consumer/service.py +3 -3
  60. nucliadb/ingest/consumer/utils.py +1 -1
  61. nucliadb/ingest/fields/base.py +28 -49
  62. nucliadb/ingest/fields/conversation.py +12 -12
  63. nucliadb/ingest/fields/exceptions.py +1 -2
  64. nucliadb/ingest/fields/file.py +22 -8
  65. nucliadb/ingest/fields/link.py +7 -7
  66. nucliadb/ingest/fields/text.py +2 -3
  67. nucliadb/ingest/orm/brain_v2.py +78 -64
  68. nucliadb/ingest/orm/broker_message.py +2 -4
  69. nucliadb/ingest/orm/entities.py +10 -209
  70. nucliadb/ingest/orm/index_message.py +4 -4
  71. nucliadb/ingest/orm/knowledgebox.py +18 -27
  72. nucliadb/ingest/orm/processor/auditing.py +1 -3
  73. nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
  74. nucliadb/ingest/orm/processor/processor.py +27 -27
  75. nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
  76. nucliadb/ingest/orm/resource.py +72 -70
  77. nucliadb/ingest/orm/utils.py +1 -1
  78. nucliadb/ingest/processing.py +17 -17
  79. nucliadb/ingest/serialize.py +202 -145
  80. nucliadb/ingest/service/writer.py +3 -109
  81. nucliadb/ingest/settings.py +3 -4
  82. nucliadb/ingest/utils.py +1 -2
  83. nucliadb/learning_proxy.py +11 -11
  84. nucliadb/metrics_exporter.py +5 -4
  85. nucliadb/middleware/__init__.py +82 -1
  86. nucliadb/migrator/datamanager.py +3 -4
  87. nucliadb/migrator/migrator.py +1 -2
  88. nucliadb/migrator/models.py +1 -2
  89. nucliadb/migrator/settings.py +1 -2
  90. nucliadb/models/internal/augment.py +614 -0
  91. nucliadb/models/internal/processing.py +19 -19
  92. nucliadb/openapi.py +2 -2
  93. nucliadb/purge/__init__.py +3 -8
  94. nucliadb/purge/orphan_shards.py +1 -2
  95. nucliadb/reader/__init__.py +5 -0
  96. nucliadb/reader/api/models.py +6 -13
  97. nucliadb/reader/api/v1/download.py +59 -38
  98. nucliadb/reader/api/v1/export_import.py +4 -4
  99. nucliadb/reader/api/v1/learning_config.py +24 -4
  100. nucliadb/reader/api/v1/resource.py +61 -9
  101. nucliadb/reader/api/v1/services.py +18 -14
  102. nucliadb/reader/app.py +3 -1
  103. nucliadb/reader/reader/notifications.py +1 -2
  104. nucliadb/search/api/v1/__init__.py +2 -0
  105. nucliadb/search/api/v1/ask.py +3 -4
  106. nucliadb/search/api/v1/augment.py +585 -0
  107. nucliadb/search/api/v1/catalog.py +11 -15
  108. nucliadb/search/api/v1/find.py +16 -22
  109. nucliadb/search/api/v1/hydrate.py +25 -25
  110. nucliadb/search/api/v1/knowledgebox.py +1 -2
  111. nucliadb/search/api/v1/predict_proxy.py +1 -2
  112. nucliadb/search/api/v1/resource/ask.py +7 -7
  113. nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
  114. nucliadb/search/api/v1/resource/search.py +9 -11
  115. nucliadb/search/api/v1/retrieve.py +130 -0
  116. nucliadb/search/api/v1/search.py +28 -32
  117. nucliadb/search/api/v1/suggest.py +11 -14
  118. nucliadb/search/api/v1/summarize.py +1 -2
  119. nucliadb/search/api/v1/utils.py +2 -2
  120. nucliadb/search/app.py +3 -2
  121. nucliadb/search/augmentor/__init__.py +21 -0
  122. nucliadb/search/augmentor/augmentor.py +232 -0
  123. nucliadb/search/augmentor/fields.py +704 -0
  124. nucliadb/search/augmentor/metrics.py +24 -0
  125. nucliadb/search/augmentor/paragraphs.py +334 -0
  126. nucliadb/search/augmentor/resources.py +238 -0
  127. nucliadb/search/augmentor/utils.py +33 -0
  128. nucliadb/search/lifecycle.py +3 -1
  129. nucliadb/search/predict.py +24 -17
  130. nucliadb/search/predict_models.py +8 -9
  131. nucliadb/search/requesters/utils.py +11 -10
  132. nucliadb/search/search/cache.py +19 -23
  133. nucliadb/search/search/chat/ask.py +88 -59
  134. nucliadb/search/search/chat/exceptions.py +3 -5
  135. nucliadb/search/search/chat/fetcher.py +201 -0
  136. nucliadb/search/search/chat/images.py +6 -4
  137. nucliadb/search/search/chat/old_prompt.py +1375 -0
  138. nucliadb/search/search/chat/parser.py +510 -0
  139. nucliadb/search/search/chat/prompt.py +563 -615
  140. nucliadb/search/search/chat/query.py +449 -36
  141. nucliadb/search/search/chat/rpc.py +85 -0
  142. nucliadb/search/search/fetch.py +3 -4
  143. nucliadb/search/search/filters.py +8 -11
  144. nucliadb/search/search/find.py +33 -31
  145. nucliadb/search/search/find_merge.py +124 -331
  146. nucliadb/search/search/graph_strategy.py +14 -12
  147. nucliadb/search/search/hydrator/__init__.py +3 -152
  148. nucliadb/search/search/hydrator/fields.py +92 -50
  149. nucliadb/search/search/hydrator/images.py +7 -7
  150. nucliadb/search/search/hydrator/paragraphs.py +42 -26
  151. nucliadb/search/search/hydrator/resources.py +20 -16
  152. nucliadb/search/search/ingestion_agents.py +5 -5
  153. nucliadb/search/search/merge.py +90 -94
  154. nucliadb/search/search/metrics.py +10 -9
  155. nucliadb/search/search/paragraphs.py +7 -9
  156. nucliadb/search/search/predict_proxy.py +13 -9
  157. nucliadb/search/search/query.py +14 -86
  158. nucliadb/search/search/query_parser/fetcher.py +51 -82
  159. nucliadb/search/search/query_parser/models.py +19 -20
  160. nucliadb/search/search/query_parser/old_filters.py +20 -19
  161. nucliadb/search/search/query_parser/parsers/ask.py +4 -5
  162. nucliadb/search/search/query_parser/parsers/catalog.py +5 -6
  163. nucliadb/search/search/query_parser/parsers/common.py +5 -6
  164. nucliadb/search/search/query_parser/parsers/find.py +6 -26
  165. nucliadb/search/search/query_parser/parsers/graph.py +13 -23
  166. nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
  167. nucliadb/search/search/query_parser/parsers/search.py +15 -53
  168. nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
  169. nucliadb/search/search/rank_fusion.py +18 -13
  170. nucliadb/search/search/rerankers.py +5 -6
  171. nucliadb/search/search/retrieval.py +300 -0
  172. nucliadb/search/search/summarize.py +5 -6
  173. nucliadb/search/search/utils.py +3 -4
  174. nucliadb/search/settings.py +1 -2
  175. nucliadb/standalone/api_router.py +1 -1
  176. nucliadb/standalone/app.py +4 -3
  177. nucliadb/standalone/auth.py +5 -6
  178. nucliadb/standalone/lifecycle.py +2 -2
  179. nucliadb/standalone/run.py +2 -4
  180. nucliadb/standalone/settings.py +5 -6
  181. nucliadb/standalone/versions.py +3 -4
  182. nucliadb/tasks/consumer.py +13 -8
  183. nucliadb/tasks/models.py +2 -1
  184. nucliadb/tasks/producer.py +3 -3
  185. nucliadb/tasks/retries.py +8 -7
  186. nucliadb/train/api/utils.py +1 -3
  187. nucliadb/train/api/v1/shards.py +1 -2
  188. nucliadb/train/api/v1/trainset.py +1 -2
  189. nucliadb/train/app.py +1 -1
  190. nucliadb/train/generator.py +4 -4
  191. nucliadb/train/generators/field_classifier.py +2 -2
  192. nucliadb/train/generators/field_streaming.py +6 -6
  193. nucliadb/train/generators/image_classifier.py +2 -2
  194. nucliadb/train/generators/paragraph_classifier.py +2 -2
  195. nucliadb/train/generators/paragraph_streaming.py +2 -2
  196. nucliadb/train/generators/question_answer_streaming.py +2 -2
  197. nucliadb/train/generators/sentence_classifier.py +2 -2
  198. nucliadb/train/generators/token_classifier.py +3 -2
  199. nucliadb/train/generators/utils.py +6 -5
  200. nucliadb/train/nodes.py +3 -3
  201. nucliadb/train/resource.py +6 -8
  202. nucliadb/train/settings.py +3 -4
  203. nucliadb/train/types.py +11 -11
  204. nucliadb/train/upload.py +3 -2
  205. nucliadb/train/uploader.py +1 -2
  206. nucliadb/train/utils.py +1 -2
  207. nucliadb/writer/api/v1/export_import.py +4 -1
  208. nucliadb/writer/api/v1/field.py +7 -11
  209. nucliadb/writer/api/v1/knowledgebox.py +3 -4
  210. nucliadb/writer/api/v1/resource.py +9 -20
  211. nucliadb/writer/api/v1/services.py +10 -132
  212. nucliadb/writer/api/v1/upload.py +73 -72
  213. nucliadb/writer/app.py +8 -2
  214. nucliadb/writer/resource/basic.py +12 -15
  215. nucliadb/writer/resource/field.py +7 -5
  216. nucliadb/writer/resource/origin.py +7 -0
  217. nucliadb/writer/settings.py +2 -3
  218. nucliadb/writer/tus/__init__.py +2 -3
  219. nucliadb/writer/tus/azure.py +1 -3
  220. nucliadb/writer/tus/dm.py +3 -3
  221. nucliadb/writer/tus/exceptions.py +3 -4
  222. nucliadb/writer/tus/gcs.py +5 -6
  223. nucliadb/writer/tus/s3.py +2 -3
  224. nucliadb/writer/tus/storage.py +3 -3
  225. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +9 -10
  226. nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
  227. nucliadb/common/datamanagers/entities.py +0 -139
  228. nucliadb-6.9.1.post5192.dist-info/RECORD +0 -392
  229. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
  230. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
  231. {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,85 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ from nucliadb.common.ids import FieldId
22
+ from nucliadb.search.search.metrics import Metrics
23
+ from nucliadb_models.augment import AugmentRequest, AugmentResponse
24
+ from nucliadb_models.labels import KnowledgeBoxLabels
25
+ from nucliadb_models.retrieval import RetrievalRequest, RetrievalResponse
26
+ from nucliadb_models.search import FindRequest, Image, KnowledgeboxFindResults, NucliaDBClientType
27
+
28
+
29
+ # TODO(decoupled-ask): replace this for a sdk.find call when moving /ask to RAO
30
+ async def find(
31
+ kbid: str,
32
+ item: FindRequest,
33
+ x_ndb_client: NucliaDBClientType,
34
+ x_nucliadb_user: str,
35
+ x_forwarded_for: str,
36
+ # REVIEW(decoupled-ask): once in an SDK metrics, we'll lose track of metrics
37
+ metrics: Metrics,
38
+ ) -> tuple[KnowledgeboxFindResults, bool]:
39
+ from nucliadb.search.search.find import find
40
+
41
+ results, incomplete, _ = await find(
42
+ kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for, metrics
43
+ )
44
+ return results, incomplete
45
+
46
+
47
+ # TODO(decoupled-ask): replace this for a sdk.retrieve call when moving /ask to RAO
48
+ async def retrieve(
49
+ kbid: str,
50
+ item: RetrievalRequest,
51
+ *,
52
+ x_ndb_client: NucliaDBClientType,
53
+ x_nucliadb_user: str,
54
+ x_forwarded_for: str,
55
+ ) -> RetrievalResponse:
56
+ from nucliadb.search.api.v1.retrieve import retrieve_endpoint
57
+
58
+ return await retrieve_endpoint(
59
+ kbid,
60
+ item,
61
+ x_ndb_client=x_ndb_client,
62
+ x_nucliadb_user=x_nucliadb_user,
63
+ x_forwarded_for=x_forwarded_for,
64
+ )
65
+
66
+
67
+ # TODO(decoupled-ask): replace this for a sdk.augment call when moving /ask to RAO
68
+ async def augment(kbid: str, item: AugmentRequest) -> AugmentResponse:
69
+ from nucliadb.search.api.v1.augment import augment_endpoint
70
+
71
+ return await augment_endpoint(kbid, item)
72
+
73
+
74
+ # TODO(decoupled-ask): replace this for a sdk.labelsets call when moving /ask to RAO
75
+ async def labelsets(kbid: str) -> KnowledgeBoxLabels:
76
+ from nucliadb.reader.api.v1.services import get_labelsets
77
+
78
+ return await get_labelsets(kbid)
79
+
80
+
81
+ # TODO(decoupled-ask): replace this for a sdk.download call when moving /ask to RAO
82
+ async def download_image(kbid: str, field_id: FieldId, path: str, *, mime_type: str) -> Image | None:
83
+ from nucliadb.search.search.hydrator.images import download_image
84
+
85
+ return await download_image(kbid, field_id, path, mime_type=mime_type)
@@ -19,7 +19,6 @@
19
19
  #
20
20
  import asyncio
21
21
  from contextvars import ContextVar
22
- from typing import Optional
23
22
 
24
23
  from nidx_protos.nodereader_pb2 import DocumentResult, ParagraphResult
25
24
 
@@ -36,7 +35,7 @@ from nucliadb_protos.resources_pb2 import Paragraph
36
35
  from nucliadb_utils import const
37
36
  from nucliadb_utils.utilities import has_feature
38
37
 
39
- rcache: ContextVar[Optional[dict[str, ResourceORM]]] = ContextVar("rcache", default=None)
38
+ rcache: ContextVar[dict[str, ResourceORM] | None] = ContextVar("rcache", default=None)
40
39
 
41
40
 
42
41
  async def fetch_resources(
@@ -79,7 +78,7 @@ async def fetch_resources(
79
78
 
80
79
  async def get_paragraph_from_resource(
81
80
  orm_resource: ResourceORM, result: ParagraphResult
82
- ) -> Optional[Paragraph]:
81
+ ) -> Paragraph | None:
83
82
  _, field_type, field = result.field.split("/")
84
83
  field_type_int = FIELD_TYPE_STR_TO_PB[field_type]
85
84
  field_obj = await orm_resource.get_field(field, field_type_int, load=False)
@@ -144,7 +143,7 @@ async def get_labels_paragraph(result: ParagraphResult, kbid: str) -> list[str]:
144
143
 
145
144
  async def get_seconds_paragraph(
146
145
  result: ParagraphResult, kbid: str
147
- ) -> Optional[tuple[list[int], list[int]]]:
146
+ ) -> tuple[list[int], list[int]] | None:
148
147
  orm_resource = await cache.get_resource(kbid, result.uuid)
149
148
 
150
149
  if orm_resource is None:
@@ -18,7 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  from collections.abc import Iterator
21
- from typing import Any, Optional, Union
21
+ from typing import Any
22
22
 
23
23
  from nucliadb.common.exceptions import InvalidQueryError
24
24
  from nucliadb_models.labels import translate_alias_to_system_label
@@ -108,7 +108,7 @@ def split_labels_by_type(
108
108
 
109
109
  def is_paragraph_labelset_kind(labelset_id: str, classification_labels: knowledgebox_pb2.Labels) -> bool:
110
110
  try:
111
- labelset: Optional[knowledgebox_pb2.LabelSet] = classification_labels.labelset.get(labelset_id)
111
+ labelset: knowledgebox_pb2.LabelSet | None = classification_labels.labelset.get(labelset_id)
112
112
  if labelset is None:
113
113
  return False
114
114
  return knowledgebox_pb2.LabelSet.LabelSetKind.PARAGRAPHS in labelset.kind
@@ -117,7 +117,7 @@ def is_paragraph_labelset_kind(labelset_id: str, classification_labels: knowledg
117
117
  return False
118
118
 
119
119
 
120
- def flatten_filter_literals(filters: Union[list[str], dict[str, Any]]) -> list[str]:
120
+ def flatten_filter_literals(filters: list[str] | dict[str, Any]) -> list[str]:
121
121
  if isinstance(filters, list):
122
122
  return filters
123
123
  else:
@@ -130,20 +130,17 @@ def iter_filter_expression_literals(expression: dict[str, Any]) -> Iterator[str]
130
130
  return
131
131
 
132
132
  if "not" in expression:
133
- for label in iter_filter_expression_literals(expression["not"]):
134
- yield label
133
+ yield from iter_filter_expression_literals(expression["not"])
135
134
  return
136
135
 
137
136
  if "and" in expression:
138
137
  for and_term in expression["and"]:
139
- for label in iter_filter_expression_literals(and_term):
140
- yield label
138
+ yield from iter_filter_expression_literals(and_term)
141
139
  return
142
140
 
143
141
  if "or" in expression:
144
142
  for or_term in expression["or"]:
145
- for label in iter_filter_expression_literals(or_term):
146
- yield label
143
+ yield from iter_filter_expression_literals(or_term)
147
144
  return
148
145
 
149
146
 
@@ -151,7 +148,7 @@ def has_classification_label_filters(filters: list[str]) -> bool:
151
148
  return any(label.startswith(CLASSIFICATION_LABEL_PREFIX) for label in filters)
152
149
 
153
150
 
154
- def convert_to_node_filters(filters: Union[list[str], list[Filter]]) -> dict[str, Any]:
151
+ def convert_to_node_filters(filters: list[str] | list[Filter]) -> dict[str, Any]:
155
152
  if len(filters) == 0:
156
153
  return {}
157
154
 
@@ -161,7 +158,7 @@ def convert_to_node_filters(filters: Union[list[str], list[Filter]]) -> dict[str
161
158
  return {"and": [convert_filter_to_node_schema(fltr) for fltr in filters]}
162
159
 
163
160
 
164
- def convert_filter_to_node_schema(fltr: Union[str, Filter]) -> dict[str, Any]:
161
+ def convert_filter_to_node_schema(fltr: str | Filter) -> dict[str, Any]:
165
162
  if isinstance(fltr, str):
166
163
  return {"literal": fltr}
167
164
 
@@ -23,7 +23,6 @@ from time import time
23
23
  from nucliadb.common.external_index_providers.base import ExternalIndexManager
24
24
  from nucliadb.common.external_index_providers.manager import get_external_index_manager
25
25
  from nucliadb.common.models_utils import to_proto
26
- from nucliadb.search.requesters.utils import Method, nidx_query
27
26
  from nucliadb.search.search.find_merge import (
28
27
  build_find_response,
29
28
  compose_find_resources,
@@ -38,14 +37,16 @@ from nucliadb.search.search.metrics import (
38
37
  )
39
38
  from nucliadb.search.search.query_parser.models import ParsedQuery
40
39
  from nucliadb.search.search.query_parser.parsers import parse_find
41
- from nucliadb.search.search.query_parser.parsers.unit_retrieval import legacy_convert_retrieval_to_proto
42
- from nucliadb.search.search.rank_fusion import (
43
- get_rank_fusion,
40
+ from nucliadb.search.search.query_parser.parsers.unit_retrieval import (
41
+ convert_retrieval_to_proto,
42
+ get_rephrased_query,
43
+ is_incomplete,
44
44
  )
45
45
  from nucliadb.search.search.rerankers import (
46
46
  RerankingOptions,
47
47
  get_reranker,
48
48
  )
49
+ from nucliadb.search.search.retrieval import text_block_search
49
50
  from nucliadb.search.settings import settings
50
51
  from nucliadb_models.search import (
51
52
  FindRequest,
@@ -68,18 +69,16 @@ async def find(
68
69
  ) -> tuple[KnowledgeboxFindResults, bool, ParsedQuery]:
69
70
  external_index_manager = await get_external_index_manager(kbid=kbid)
70
71
  if external_index_manager is not None:
71
- return await _external_index_retrieval(
72
+ return await _external_index_find(
72
73
  kbid,
73
74
  item,
74
75
  external_index_manager,
75
76
  )
76
77
  else:
77
- return await _index_node_retrieval(
78
- kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for, metrics
79
- )
78
+ return await _ndb_index_find(kbid, item, x_ndb_client, x_nucliadb_user, x_forwarded_for, metrics)
80
79
 
81
80
 
82
- async def _index_node_retrieval(
81
+ async def _ndb_index_find(
83
82
  kbid: str,
84
83
  item: FindRequest,
85
84
  x_ndb_client: NucliaDBClientType,
@@ -95,32 +94,37 @@ async def _index_node_retrieval(
95
94
  assert parsed.retrieval.rank_fusion is not None and parsed.retrieval.reranker is not None, (
96
95
  "find parser must provide rank fusion and reranker algorithms"
97
96
  )
98
- rank_fusion = get_rank_fusion(parsed.retrieval.rank_fusion)
99
97
  reranker = get_reranker(parsed.retrieval.reranker)
100
- (
101
- pb_query,
102
- incomplete_results,
103
- autofilters,
104
- rephrased_query,
105
- ) = await legacy_convert_retrieval_to_proto(parsed)
98
+ incomplete_results = is_incomplete(parsed.retrieval)
99
+ rephrased_query = get_rephrased_query(parsed)
106
100
 
107
101
  with metrics.time("index_search"):
108
- results, queried_shards = await nidx_query(kbid, Method.SEARCH, pb_query)
102
+ text_blocks, pb_query, pb_response, queried_shards = await text_block_search(
103
+ kbid, parsed.retrieval
104
+ )
109
105
 
110
106
  # Rank fusion merge, cut, hydrate and rerank
111
107
  with metrics.time("results_merge"):
112
- search_results = await build_find_response(
113
- results,
114
- retrieval=parsed.retrieval,
115
- kbid=kbid,
116
- query=pb_query.body,
117
- rephrased_query=rephrased_query,
108
+ resource_hydration_options = ResourceHydrationOptions(
118
109
  show=item.show,
119
110
  extracted=item.extracted,
120
111
  field_type_filter=item.field_type_filter,
112
+ )
113
+ text_block_hydration_options = TextBlockHydrationOptions(
121
114
  highlight=item.highlight,
122
- rank_fusion_algorithm=rank_fusion,
115
+ ematches=pb_response.paragraph.ematches, # type: ignore
116
+ )
117
+ search_results = await build_find_response(
118
+ pb_response,
119
+ text_blocks,
120
+ pb_response.graph,
121
+ retrieval=parsed.retrieval,
122
+ kbid=kbid,
123
+ query=item.query,
124
+ rephrased_query=rephrased_query,
123
125
  reranker=reranker,
126
+ resource_hydration_options=resource_hydration_options,
127
+ text_block_hydration_options=text_block_hydration_options,
124
128
  )
125
129
 
126
130
  search_time = time() - start_time
@@ -137,7 +141,6 @@ async def _index_node_retrieval(
137
141
  )
138
142
 
139
143
  search_results.shards = queried_shards
140
- search_results.autofilters = autofilters
141
144
 
142
145
  ndb_time = metrics["index_search"] + metrics["results_merge"]
143
146
  if metrics["index_search"] > settings.slow_node_query_log_threshold:
@@ -168,7 +171,7 @@ async def _index_node_retrieval(
168
171
  return search_results, incomplete_results, parsed
169
172
 
170
173
 
171
- async def _external_index_retrieval(
174
+ async def _external_index_find(
172
175
  kbid: str,
173
176
  item: FindRequest,
174
177
  external_index_manager: ExternalIndexManager,
@@ -180,12 +183,12 @@ async def _external_index_retrieval(
180
183
  parsed = await parse_find(kbid, item)
181
184
  assert parsed.retrieval.reranker is not None, "find parser must provide a reranking algorithm"
182
185
  reranker = get_reranker(parsed.retrieval.reranker)
183
- search_request, incomplete_results, _, rephrased_query = await legacy_convert_retrieval_to_proto(
184
- parsed
185
- )
186
+ incomplete_results = is_incomplete(parsed.retrieval)
187
+ rephrased_query = get_rephrased_query(parsed)
188
+ search_request = convert_retrieval_to_proto(parsed.retrieval)
186
189
 
187
190
  # Query index
188
- query_results = await external_index_manager.query(search_request) # noqa
191
+ query_results = await external_index_manager.query(search_request)
189
192
 
190
193
  # Hydrate and rerank results
191
194
  text_blocks, resources, best_matches = await hydrate_and_rerank(
@@ -220,7 +223,6 @@ async def _external_index_retrieval(
220
223
  page_number=0,
221
224
  page_size=item.top_k,
222
225
  relations=None, # Not implemented for external indexes yet
223
- autofilters=[], # Not implemented for external indexes yet
224
226
  min_score=results_min_score,
225
227
  best_matches=best_matches,
226
228
  # These are not used for external indexes