nucliadb 6.2.1.post3204__py3-none-any.whl → 6.2.1.post3212__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/search/api/v1/search.py +1 -1
- nucliadb/search/search/find.py +4 -2
- nucliadb/search/search/find_merge.py +3 -1
- nucliadb/search/search/query.py +9 -6
- nucliadb/search/search/query_parser/fetcher.py +6 -0
- {nucliadb-6.2.1.post3204.dist-info → nucliadb-6.2.1.post3212.dist-info}/METADATA +5 -5
- {nucliadb-6.2.1.post3204.dist-info → nucliadb-6.2.1.post3212.dist-info}/RECORD +11 -11
- {nucliadb-6.2.1.post3204.dist-info → nucliadb-6.2.1.post3212.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post3204.dist-info → nucliadb-6.2.1.post3212.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post3204.dist-info → nucliadb-6.2.1.post3212.dist-info}/top_level.txt +0 -0
- {nucliadb-6.2.1.post3204.dist-info → nucliadb-6.2.1.post3212.dist-info}/zip-safe +0 -0
nucliadb/search/api/v1/search.py
CHANGED
@@ -293,7 +293,7 @@ async def search(
|
|
293
293
|
hidden=await filter_hidden_resources(kbid, item.show_hidden),
|
294
294
|
rephrase_prompt=item.rephrase_prompt,
|
295
295
|
)
|
296
|
-
pb_query, incomplete_results, autofilters = await query_parser.parse()
|
296
|
+
pb_query, incomplete_results, autofilters, _ = await query_parser.parse()
|
297
297
|
|
298
298
|
results, query_incomplete_results, queried_nodes = await node_query(
|
299
299
|
kbid, Method.SEARCH, pb_query, target_shard_replicas=item.shards
|
nucliadb/search/search/find.py
CHANGED
@@ -106,7 +106,7 @@ async def _index_node_retrieval(
|
|
106
106
|
kbid, item, generative_model=generative_model
|
107
107
|
)
|
108
108
|
with metrics.time("query_parse"):
|
109
|
-
pb_query, incomplete_results, autofilters = await query_parser.parse()
|
109
|
+
pb_query, incomplete_results, autofilters, rephrased_query = await query_parser.parse()
|
110
110
|
|
111
111
|
with metrics.time("node_query"):
|
112
112
|
results, query_incomplete_results, queried_nodes = await node_query(
|
@@ -120,6 +120,7 @@ async def _index_node_retrieval(
|
|
120
120
|
results,
|
121
121
|
kbid=kbid,
|
122
122
|
query=pb_query.body,
|
123
|
+
rephrased_query=rephrased_query,
|
123
124
|
relation_subgraph_query=pb_query.relation_subgraph,
|
124
125
|
min_score_bm25=pb_query.min_score_bm25,
|
125
126
|
min_score_semantic=pb_query.min_score_semantic,
|
@@ -194,7 +195,7 @@ async def _external_index_retrieval(
|
|
194
195
|
query_parser, _, reranker = await query_parser_from_find_request(
|
195
196
|
kbid, item, generative_model=generative_model
|
196
197
|
)
|
197
|
-
search_request, incomplete_results, _ = await query_parser.parse()
|
198
|
+
search_request, incomplete_results, _, rephrased_query = await query_parser.parse()
|
198
199
|
|
199
200
|
# Query index
|
200
201
|
query_results = await external_index_manager.query(search_request) # noqa
|
@@ -225,6 +226,7 @@ async def _external_index_retrieval(
|
|
225
226
|
retrieval_results = KnowledgeboxFindResults(
|
226
227
|
resources=find_resources,
|
227
228
|
query=item.query,
|
229
|
+
rephrased_query=rephrased_query,
|
228
230
|
total=0,
|
229
231
|
page_number=0,
|
230
232
|
page_size=item.top_k,
|
@@ -18,7 +18,7 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import asyncio
|
21
|
-
from typing import Iterable, Union
|
21
|
+
from typing import Iterable, Optional, Union
|
22
22
|
|
23
23
|
from nucliadb.common.external_index_providers.base import TextBlockMatch
|
24
24
|
from nucliadb.common.ids import ParagraphId, VectorId
|
@@ -74,6 +74,7 @@ async def build_find_response(
|
|
74
74
|
*,
|
75
75
|
kbid: str,
|
76
76
|
query: str,
|
77
|
+
rephrased_query: Optional[str],
|
77
78
|
relation_subgraph_query: EntitiesSubgraphRequest,
|
78
79
|
top_k: int,
|
79
80
|
min_score_bm25: float,
|
@@ -143,6 +144,7 @@ async def build_find_response(
|
|
143
144
|
|
144
145
|
find_results = KnowledgeboxFindResults(
|
145
146
|
query=query,
|
147
|
+
rephrased_query=rephrased_query,
|
146
148
|
resources=find_resources,
|
147
149
|
best_matches=best_matches,
|
148
150
|
relations=relations,
|
nucliadb/search/search/query.py
CHANGED
@@ -215,7 +215,7 @@ class QueryParser:
|
|
215
215
|
if self.with_synonyms and self.query:
|
216
216
|
asyncio.ensure_future(self.fetcher.get_synonyms())
|
217
217
|
|
218
|
-
async def parse(self) -> tuple[nodereader_pb2.SearchRequest, bool, list[str]]:
|
218
|
+
async def parse(self) -> tuple[nodereader_pb2.SearchRequest, bool, list[str], Optional[str]]:
|
219
219
|
"""
|
220
220
|
:return: (request, incomplete, autofilters)
|
221
221
|
where:
|
@@ -234,13 +234,13 @@ class QueryParser:
|
|
234
234
|
await self.parse_filters(request)
|
235
235
|
self.parse_document_search(request)
|
236
236
|
self.parse_paragraph_search(request)
|
237
|
-
incomplete = await self.parse_vector_search(request)
|
237
|
+
incomplete, rephrased_query = await self.parse_vector_search(request)
|
238
238
|
# BUG: autofilters are not used to filter, but we say we do
|
239
239
|
autofilters = await self.parse_relation_search(request)
|
240
240
|
await self.parse_synonyms(request)
|
241
241
|
await self.parse_min_score(request, incomplete)
|
242
242
|
await self.adjust_page_size(request, self.rank_fusion, self.reranker)
|
243
|
-
return request, incomplete, autofilters
|
243
|
+
return request, incomplete, autofilters, rephrased_query
|
244
244
|
|
245
245
|
async def parse_filters(self, request: nodereader_pb2.SearchRequest) -> None:
|
246
246
|
if len(self.label_filters) > 0:
|
@@ -359,21 +359,24 @@ class QueryParser:
|
|
359
359
|
request.paragraph = True
|
360
360
|
node_features.inc({"type": "paragraphs"})
|
361
361
|
|
362
|
-
async def parse_vector_search(
|
362
|
+
async def parse_vector_search(
|
363
|
+
self, request: nodereader_pb2.SearchRequest
|
364
|
+
) -> tuple[bool, Optional[str]]:
|
363
365
|
if not self.has_vector_search:
|
364
|
-
return False
|
366
|
+
return False, None
|
365
367
|
|
366
368
|
node_features.inc({"type": "vectors"})
|
367
369
|
|
368
370
|
vectorset = await self.fetcher.get_vectorset()
|
369
371
|
query_vector = await self.fetcher.get_query_vector()
|
372
|
+
rephrased_query = await self.fetcher.get_rephrased_query()
|
370
373
|
incomplete = query_vector is None
|
371
374
|
|
372
375
|
request.vectorset = vectorset
|
373
376
|
if query_vector is not None:
|
374
377
|
request.vector.extend(query_vector)
|
375
378
|
|
376
|
-
return incomplete
|
379
|
+
return incomplete, rephrased_query
|
377
380
|
|
378
381
|
async def parse_relation_search(self, request: nodereader_pb2.SearchRequest) -> list[str]:
|
379
382
|
autofilters = []
|
@@ -226,6 +226,12 @@ class Fetcher:
|
|
226
226
|
self.cache.query_vector = query_vector
|
227
227
|
return query_vector
|
228
228
|
|
229
|
+
async def get_rephrased_query(self) -> Optional[str]:
|
230
|
+
query_info = await self._predict_query_endpoint()
|
231
|
+
if query_info is None:
|
232
|
+
return None
|
233
|
+
return query_info.rephrased_query
|
234
|
+
|
229
235
|
# Labels
|
230
236
|
|
231
237
|
async def get_classification_labels(self) -> knowledgebox_pb2.Labels:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post3212
|
4
4
|
Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
|
5
5
|
Author: NucliaDB Community
|
6
6
|
Author-email: nucliadb@nuclia.com
|
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
23
|
Requires-Python: >=3.9, <4
|
24
24
|
Description-Content-Type: text/markdown
|
25
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
27
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
28
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
25
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3212
|
26
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3212
|
27
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post3212
|
28
|
+
Requires-Dist: nucliadb-models>=6.2.1.post3212
|
29
29
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
30
30
|
Requires-Dist: nuclia-models>=0.24.2
|
31
31
|
Requires-Dist: uvicorn
|
@@ -195,7 +195,7 @@ nucliadb/search/api/v1/find.py,sha256=DsnWkySu_cFajDWJIxN8DYvLL_Rm2yiCjHD8TsqPfR
|
|
195
195
|
nucliadb/search/api/v1/knowledgebox.py,sha256=Hrt2h-28DDlwN3AdjMZPTYI6om0RMy9bmJvqVHvw8sE,8620
|
196
196
|
nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
|
197
197
|
nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
|
198
|
-
nucliadb/search/api/v1/search.py,sha256=
|
198
|
+
nucliadb/search/api/v1/search.py,sha256=aP_Iv9mi6PvmXNDX2v_t8Xhr7orD4peCY9NKo0oEnQg,13641
|
199
199
|
nucliadb/search/api/v1/suggest.py,sha256=S0YUTAWukzZSYZJzN3T5MUgPM3599HQvG76GOCBuAbQ,5907
|
200
200
|
nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
|
201
201
|
nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
|
@@ -210,8 +210,8 @@ nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298
|
|
210
210
|
nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
|
211
211
|
nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_QziX4,6184
|
212
212
|
nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
|
213
|
-
nucliadb/search/search/find.py,sha256=
|
214
|
-
nucliadb/search/search/find_merge.py,sha256=
|
213
|
+
nucliadb/search/search/find.py,sha256=EprmlVVPHbPvcJezEJou4Msf9JlM7LD5xaVuKbk4XtY,10065
|
214
|
+
nucliadb/search/search/find_merge.py,sha256=3FnzKFEnVemg6FO_6zveulbAU7klvsiPEBvLrpBBMg8,17450
|
215
215
|
nucliadb/search/search/graph_strategy.py,sha256=Egcq_zn895gTUYmyQTsXj8YaUMa3HBKhcSa1GBvgzAM,31877
|
216
216
|
nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
|
217
217
|
nucliadb/search/search/merge.py,sha256=i_PTBFRqC5iTTziOMEltxLIlmokIou5hjjgR4BnoLBE,22635
|
@@ -219,7 +219,7 @@ nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUH
|
|
219
219
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
220
220
|
nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
|
221
221
|
nucliadb/search/search/predict_proxy.py,sha256=xBlh6kjuQpWRq7KsBx4pEl2PtnwljjQIiYMaTWpcCSA,3015
|
222
|
-
nucliadb/search/search/query.py,sha256=
|
222
|
+
nucliadb/search/search/query.py,sha256=AlhRw4Mick4Oab5HsKHaQpBXsVc_UUY5IpkUIwsFfU8,30577
|
223
223
|
nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
|
224
224
|
nucliadb/search/search/rerankers.py,sha256=0kAHES9X_FKkP7KSN9NRETFmRPKzwrFAo_54MbyvM7Q,9051
|
225
225
|
nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
|
@@ -233,7 +233,7 @@ nucliadb/search/search/chat/prompt.py,sha256=r2JTiRWH3YHPdeRAG5w6gD0g0fWVxdTjYIR
|
|
233
233
|
nucliadb/search/search/chat/query.py,sha256=rBssR6MPSx8h2DASRMTLODaz9oGE5tNVVVeDncSrEp4,15684
|
234
234
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
235
235
|
nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
|
236
|
-
nucliadb/search/search/query_parser/fetcher.py,sha256=
|
236
|
+
nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
|
237
237
|
nucliadb/search/search/query_parser/models.py,sha256=-VlCDXUCgOroAZw1Leqhj2VMgRv_CD2w40PXXOBLaUM,2332
|
238
238
|
nucliadb/search/search/query_parser/parser.py,sha256=JC6koS9Np1PzCfEk1Xy6mpP1HmovS_vIxxA9u-kwzos,6498
|
239
239
|
nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -332,9 +332,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
332
332
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
333
333
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
334
334
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
335
|
-
nucliadb-6.2.1.
|
336
|
-
nucliadb-6.2.1.
|
337
|
-
nucliadb-6.2.1.
|
338
|
-
nucliadb-6.2.1.
|
339
|
-
nucliadb-6.2.1.
|
340
|
-
nucliadb-6.2.1.
|
335
|
+
nucliadb-6.2.1.post3212.dist-info/METADATA,sha256=Umn96hJsJ1ZBdEhlCdysDxJwBJZfL7GMnUvHKGgS8fs,4603
|
336
|
+
nucliadb-6.2.1.post3212.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
337
|
+
nucliadb-6.2.1.post3212.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
338
|
+
nucliadb-6.2.1.post3212.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
339
|
+
nucliadb-6.2.1.post3212.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
340
|
+
nucliadb-6.2.1.post3212.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|