nucliadb 6.2.1.post3201__py3-none-any.whl → 6.2.1.post3209__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -293,7 +293,7 @@ async def search(
293
293
  hidden=await filter_hidden_resources(kbid, item.show_hidden),
294
294
  rephrase_prompt=item.rephrase_prompt,
295
295
  )
296
- pb_query, incomplete_results, autofilters = await query_parser.parse()
296
+ pb_query, incomplete_results, autofilters, _ = await query_parser.parse()
297
297
 
298
298
  results, query_incomplete_results, queried_nodes = await node_query(
299
299
  kbid, Method.SEARCH, pb_query, target_shard_replicas=item.shards
@@ -106,7 +106,7 @@ async def _index_node_retrieval(
106
106
  kbid, item, generative_model=generative_model
107
107
  )
108
108
  with metrics.time("query_parse"):
109
- pb_query, incomplete_results, autofilters = await query_parser.parse()
109
+ pb_query, incomplete_results, autofilters, rephrased_query = await query_parser.parse()
110
110
 
111
111
  with metrics.time("node_query"):
112
112
  results, query_incomplete_results, queried_nodes = await node_query(
@@ -120,6 +120,7 @@ async def _index_node_retrieval(
120
120
  results,
121
121
  kbid=kbid,
122
122
  query=pb_query.body,
123
+ rephrased_query=rephrased_query,
123
124
  relation_subgraph_query=pb_query.relation_subgraph,
124
125
  min_score_bm25=pb_query.min_score_bm25,
125
126
  min_score_semantic=pb_query.min_score_semantic,
@@ -194,7 +195,7 @@ async def _external_index_retrieval(
194
195
  query_parser, _, reranker = await query_parser_from_find_request(
195
196
  kbid, item, generative_model=generative_model
196
197
  )
197
- search_request, incomplete_results, _ = await query_parser.parse()
198
+ search_request, incomplete_results, _, rephrased_query = await query_parser.parse()
198
199
 
199
200
  # Query index
200
201
  query_results = await external_index_manager.query(search_request) # noqa
@@ -225,6 +226,7 @@ async def _external_index_retrieval(
225
226
  retrieval_results = KnowledgeboxFindResults(
226
227
  resources=find_resources,
227
228
  query=item.query,
229
+ rephrased_query=rephrased_query,
228
230
  total=0,
229
231
  page_number=0,
230
232
  page_size=item.top_k,
@@ -269,6 +271,7 @@ async def query_parser_from_find_request(
269
271
  kbid=kbid,
270
272
  features=item.features,
271
273
  query=item.query,
274
+ query_entities=item.query_entities,
272
275
  label_filters=item.filters,
273
276
  keyword_filters=item.keyword_filters,
274
277
  faceted=None,
@@ -18,7 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import asyncio
21
- from typing import Iterable, Union
21
+ from typing import Iterable, Optional, Union
22
22
 
23
23
  from nucliadb.common.external_index_providers.base import TextBlockMatch
24
24
  from nucliadb.common.ids import ParagraphId, VectorId
@@ -74,6 +74,7 @@ async def build_find_response(
74
74
  *,
75
75
  kbid: str,
76
76
  query: str,
77
+ rephrased_query: Optional[str],
77
78
  relation_subgraph_query: EntitiesSubgraphRequest,
78
79
  top_k: int,
79
80
  min_score_bm25: float,
@@ -143,6 +144,7 @@ async def build_find_response(
143
144
 
144
145
  find_results = KnowledgeboxFindResults(
145
146
  query=query,
147
+ rephrased_query=rephrased_query,
146
148
  resources=find_resources,
147
149
  best_matches=best_matches,
148
150
  relations=relations,
@@ -24,6 +24,7 @@ from datetime import datetime
24
24
  from typing import Any, Awaitable, Optional, Union
25
25
 
26
26
  from nucliadb.common import datamanagers
27
+ from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap
27
28
  from nucliadb.search import logger
28
29
  from nucliadb.search.predict import SendToPredictError
29
30
  from nucliadb.search.search.filters import (
@@ -49,6 +50,7 @@ from nucliadb_models.labels import LABEL_HIDDEN, translate_system_to_alias_label
49
50
  from nucliadb_models.metadata import ResourceProcessingStatus
50
51
  from nucliadb_models.search import (
51
52
  Filter,
53
+ KnowledgeGraphEntity,
52
54
  MaxTokens,
53
55
  MinScore,
54
56
  SearchOptions,
@@ -94,6 +96,7 @@ class QueryParser:
94
96
  keyword_filters: Union[list[str], list[Filter]],
95
97
  top_k: int,
96
98
  min_score: MinScore,
99
+ query_entities: Optional[list[KnowledgeGraphEntity]] = None,
97
100
  faceted: Optional[list[str]] = None,
98
101
  sort: Optional[SortOptions] = None,
99
102
  range_creation_start: Optional[datetime] = None,
@@ -120,6 +123,7 @@ class QueryParser:
120
123
  self.kbid = kbid
121
124
  self.features = features
122
125
  self.query = query
126
+ self.query_entities = query_entities
123
127
  self.hidden = hidden
124
128
  if self.hidden is not None:
125
129
  if self.hidden:
@@ -211,7 +215,7 @@ class QueryParser:
211
215
  if self.with_synonyms and self.query:
212
216
  asyncio.ensure_future(self.fetcher.get_synonyms())
213
217
 
214
- async def parse(self) -> tuple[nodereader_pb2.SearchRequest, bool, list[str]]:
218
+ async def parse(self) -> tuple[nodereader_pb2.SearchRequest, bool, list[str], Optional[str]]:
215
219
  """
216
220
  :return: (request, incomplete, autofilters)
217
221
  where:
@@ -230,12 +234,13 @@ class QueryParser:
230
234
  await self.parse_filters(request)
231
235
  self.parse_document_search(request)
232
236
  self.parse_paragraph_search(request)
233
- incomplete = await self.parse_vector_search(request)
237
+ incomplete, rephrased_query = await self.parse_vector_search(request)
238
+ # BUG: autofilters are not used to filter, but we say we do
234
239
  autofilters = await self.parse_relation_search(request)
235
240
  await self.parse_synonyms(request)
236
241
  await self.parse_min_score(request, incomplete)
237
242
  await self.adjust_page_size(request, self.rank_fusion, self.reranker)
238
- return request, incomplete, autofilters
243
+ return request, incomplete, autofilters, rephrased_query
239
244
 
240
245
  async def parse_filters(self, request: nodereader_pb2.SearchRequest) -> None:
241
246
  if len(self.label_filters) > 0:
@@ -354,26 +359,41 @@ class QueryParser:
354
359
  request.paragraph = True
355
360
  node_features.inc({"type": "paragraphs"})
356
361
 
357
- async def parse_vector_search(self, request: nodereader_pb2.SearchRequest) -> bool:
362
+ async def parse_vector_search(
363
+ self, request: nodereader_pb2.SearchRequest
364
+ ) -> tuple[bool, Optional[str]]:
358
365
  if not self.has_vector_search:
359
- return False
366
+ return False, None
360
367
 
361
368
  node_features.inc({"type": "vectors"})
362
369
 
363
370
  vectorset = await self.fetcher.get_vectorset()
364
371
  query_vector = await self.fetcher.get_query_vector()
372
+ rephrased_query = await self.fetcher.get_rephrased_query()
365
373
  incomplete = query_vector is None
366
374
 
367
375
  request.vectorset = vectorset
368
376
  if query_vector is not None:
369
377
  request.vector.extend(query_vector)
370
378
 
371
- return incomplete
379
+ return incomplete, rephrased_query
372
380
 
373
381
  async def parse_relation_search(self, request: nodereader_pb2.SearchRequest) -> list[str]:
374
382
  autofilters = []
383
+ # BUG: autofiler should autofilter, not enable relation search
375
384
  if self.has_relations_search or self.autofilter:
376
- detected_entities = await self.fetcher.get_detected_entities()
385
+ if self.query_entities:
386
+ detected_entities = []
387
+ for entity in self.query_entities:
388
+ relation_node = utils_pb2.RelationNode()
389
+ relation_node.value = entity.name
390
+ if entity.type is not None:
391
+ relation_node.ntype = RelationNodeTypeMap[entity.type]
392
+ if entity.subtype is not None:
393
+ relation_node.subtype = entity.subtype
394
+ detected_entities.append(relation_node)
395
+ else:
396
+ detected_entities = await self.fetcher.get_detected_entities()
377
397
  meta_cache = await self.fetcher.get_entities_meta_cache()
378
398
  detected_entities = expand_entities(meta_cache, detected_entities)
379
399
  if self.has_relations_search:
@@ -226,6 +226,12 @@ class Fetcher:
226
226
  self.cache.query_vector = query_vector
227
227
  return query_vector
228
228
 
229
+ async def get_rephrased_query(self) -> Optional[str]:
230
+ query_info = await self._predict_query_endpoint()
231
+ if query_info is None:
232
+ return None
233
+ return query_info.rephrased_query
234
+
229
235
  # Labels
230
236
 
231
237
  async def get_classification_labels(self) -> knowledgebox_pb2.Labels:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.2.1.post3201
3
+ Version: 6.2.1.post3209
4
4
  Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
5
5
  Author: NucliaDB Community
6
6
  Author-email: nucliadb@nuclia.com
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
23
  Requires-Python: >=3.9, <4
24
24
  Description-Content-Type: text/markdown
25
- Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3201
26
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3201
27
- Requires-Dist: nucliadb-protos>=6.2.1.post3201
28
- Requires-Dist: nucliadb-models>=6.2.1.post3201
25
+ Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3209
26
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3209
27
+ Requires-Dist: nucliadb-protos>=6.2.1.post3209
28
+ Requires-Dist: nucliadb-models>=6.2.1.post3209
29
29
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
30
30
  Requires-Dist: nuclia-models>=0.24.2
31
31
  Requires-Dist: uvicorn
@@ -195,7 +195,7 @@ nucliadb/search/api/v1/find.py,sha256=DsnWkySu_cFajDWJIxN8DYvLL_Rm2yiCjHD8TsqPfR
195
195
  nucliadb/search/api/v1/knowledgebox.py,sha256=Hrt2h-28DDlwN3AdjMZPTYI6om0RMy9bmJvqVHvw8sE,8620
196
196
  nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
197
197
  nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
198
- nucliadb/search/api/v1/search.py,sha256=vCj5V9kozoti0JrgU_XJhTcBucWzI4SY1B0yCSj9EQw,13638
198
+ nucliadb/search/api/v1/search.py,sha256=aP_Iv9mi6PvmXNDX2v_t8Xhr7orD4peCY9NKo0oEnQg,13641
199
199
  nucliadb/search/api/v1/suggest.py,sha256=S0YUTAWukzZSYZJzN3T5MUgPM3599HQvG76GOCBuAbQ,5907
200
200
  nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
201
201
  nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
@@ -210,8 +210,8 @@ nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298
210
210
  nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
211
211
  nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_QziX4,6184
212
212
  nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
213
- nucliadb/search/search/find.py,sha256=DaO3CPBQqRAw-iK_DNf_gM-aEipjtuX6oA2TbAplkxs,9901
214
- nucliadb/search/search/find_merge.py,sha256=5Aqz54E5GG8jw666KNncVHIJcs821ug-YwJ46YL6Br8,17363
213
+ nucliadb/search/search/find.py,sha256=EprmlVVPHbPvcJezEJou4Msf9JlM7LD5xaVuKbk4XtY,10065
214
+ nucliadb/search/search/find_merge.py,sha256=3FnzKFEnVemg6FO_6zveulbAU7klvsiPEBvLrpBBMg8,17450
215
215
  nucliadb/search/search/graph_strategy.py,sha256=Egcq_zn895gTUYmyQTsXj8YaUMa3HBKhcSa1GBvgzAM,31877
216
216
  nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
217
217
  nucliadb/search/search/merge.py,sha256=i_PTBFRqC5iTTziOMEltxLIlmokIou5hjjgR4BnoLBE,22635
@@ -219,7 +219,7 @@ nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUH
219
219
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
220
220
  nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
221
221
  nucliadb/search/search/predict_proxy.py,sha256=xBlh6kjuQpWRq7KsBx4pEl2PtnwljjQIiYMaTWpcCSA,3015
222
- nucliadb/search/search/query.py,sha256=doRdBhM928wB64v271RSyJxsRT5qd6oevImEMz4gpvw,29487
222
+ nucliadb/search/search/query.py,sha256=AlhRw4Mick4Oab5HsKHaQpBXsVc_UUY5IpkUIwsFfU8,30577
223
223
  nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
224
224
  nucliadb/search/search/rerankers.py,sha256=0kAHES9X_FKkP7KSN9NRETFmRPKzwrFAo_54MbyvM7Q,9051
225
225
  nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
@@ -233,7 +233,7 @@ nucliadb/search/search/chat/prompt.py,sha256=r2JTiRWH3YHPdeRAG5w6gD0g0fWVxdTjYIR
233
233
  nucliadb/search/search/chat/query.py,sha256=rBssR6MPSx8h2DASRMTLODaz9oGE5tNVVVeDncSrEp4,15684
234
234
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
235
235
  nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
236
- nucliadb/search/search/query_parser/fetcher.py,sha256=4ObVZSRN_dApeA2rP0yQR7l3gdvmhxY478j3pOYMssA,15528
236
+ nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
237
237
  nucliadb/search/search/query_parser/models.py,sha256=-VlCDXUCgOroAZw1Leqhj2VMgRv_CD2w40PXXOBLaUM,2332
238
238
  nucliadb/search/search/query_parser/parser.py,sha256=JC6koS9Np1PzCfEk1Xy6mpP1HmovS_vIxxA9u-kwzos,6498
239
239
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -332,9 +332,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
332
332
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
333
333
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
334
334
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
335
- nucliadb-6.2.1.post3201.dist-info/METADATA,sha256=m-4v4chlPV37jUjkMx61-YtplTraxGbeRj3lHOmhz_Y,4603
336
- nucliadb-6.2.1.post3201.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
337
- nucliadb-6.2.1.post3201.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
338
- nucliadb-6.2.1.post3201.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
339
- nucliadb-6.2.1.post3201.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
340
- nucliadb-6.2.1.post3201.dist-info/RECORD,,
335
+ nucliadb-6.2.1.post3209.dist-info/METADATA,sha256=ZumCP4VHqFd-S8EEIVLceil4L-qtVSlUzwr6ao232Oo,4603
336
+ nucliadb-6.2.1.post3209.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
337
+ nucliadb-6.2.1.post3209.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
338
+ nucliadb-6.2.1.post3209.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
339
+ nucliadb-6.2.1.post3209.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
340
+ nucliadb-6.2.1.post3209.dist-info/RECORD,,