nucliadb 6.2.1.post3201__py3-none-any.whl → 6.2.1.post3209__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/search/api/v1/search.py +1 -1
- nucliadb/search/search/find.py +5 -2
- nucliadb/search/search/find_merge.py +3 -1
- nucliadb/search/search/query.py +27 -7
- nucliadb/search/search/query_parser/fetcher.py +6 -0
- {nucliadb-6.2.1.post3201.dist-info → nucliadb-6.2.1.post3209.dist-info}/METADATA +5 -5
- {nucliadb-6.2.1.post3201.dist-info → nucliadb-6.2.1.post3209.dist-info}/RECORD +11 -11
- {nucliadb-6.2.1.post3201.dist-info → nucliadb-6.2.1.post3209.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post3201.dist-info → nucliadb-6.2.1.post3209.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post3201.dist-info → nucliadb-6.2.1.post3209.dist-info}/top_level.txt +0 -0
- {nucliadb-6.2.1.post3201.dist-info → nucliadb-6.2.1.post3209.dist-info}/zip-safe +0 -0
nucliadb/search/api/v1/search.py
CHANGED
@@ -293,7 +293,7 @@ async def search(
|
|
293
293
|
hidden=await filter_hidden_resources(kbid, item.show_hidden),
|
294
294
|
rephrase_prompt=item.rephrase_prompt,
|
295
295
|
)
|
296
|
-
pb_query, incomplete_results, autofilters = await query_parser.parse()
|
296
|
+
pb_query, incomplete_results, autofilters, _ = await query_parser.parse()
|
297
297
|
|
298
298
|
results, query_incomplete_results, queried_nodes = await node_query(
|
299
299
|
kbid, Method.SEARCH, pb_query, target_shard_replicas=item.shards
|
nucliadb/search/search/find.py
CHANGED
@@ -106,7 +106,7 @@ async def _index_node_retrieval(
|
|
106
106
|
kbid, item, generative_model=generative_model
|
107
107
|
)
|
108
108
|
with metrics.time("query_parse"):
|
109
|
-
pb_query, incomplete_results, autofilters = await query_parser.parse()
|
109
|
+
pb_query, incomplete_results, autofilters, rephrased_query = await query_parser.parse()
|
110
110
|
|
111
111
|
with metrics.time("node_query"):
|
112
112
|
results, query_incomplete_results, queried_nodes = await node_query(
|
@@ -120,6 +120,7 @@ async def _index_node_retrieval(
|
|
120
120
|
results,
|
121
121
|
kbid=kbid,
|
122
122
|
query=pb_query.body,
|
123
|
+
rephrased_query=rephrased_query,
|
123
124
|
relation_subgraph_query=pb_query.relation_subgraph,
|
124
125
|
min_score_bm25=pb_query.min_score_bm25,
|
125
126
|
min_score_semantic=pb_query.min_score_semantic,
|
@@ -194,7 +195,7 @@ async def _external_index_retrieval(
|
|
194
195
|
query_parser, _, reranker = await query_parser_from_find_request(
|
195
196
|
kbid, item, generative_model=generative_model
|
196
197
|
)
|
197
|
-
search_request, incomplete_results, _ = await query_parser.parse()
|
198
|
+
search_request, incomplete_results, _, rephrased_query = await query_parser.parse()
|
198
199
|
|
199
200
|
# Query index
|
200
201
|
query_results = await external_index_manager.query(search_request) # noqa
|
@@ -225,6 +226,7 @@ async def _external_index_retrieval(
|
|
225
226
|
retrieval_results = KnowledgeboxFindResults(
|
226
227
|
resources=find_resources,
|
227
228
|
query=item.query,
|
229
|
+
rephrased_query=rephrased_query,
|
228
230
|
total=0,
|
229
231
|
page_number=0,
|
230
232
|
page_size=item.top_k,
|
@@ -269,6 +271,7 @@ async def query_parser_from_find_request(
|
|
269
271
|
kbid=kbid,
|
270
272
|
features=item.features,
|
271
273
|
query=item.query,
|
274
|
+
query_entities=item.query_entities,
|
272
275
|
label_filters=item.filters,
|
273
276
|
keyword_filters=item.keyword_filters,
|
274
277
|
faceted=None,
|
@@ -18,7 +18,7 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import asyncio
|
21
|
-
from typing import Iterable, Union
|
21
|
+
from typing import Iterable, Optional, Union
|
22
22
|
|
23
23
|
from nucliadb.common.external_index_providers.base import TextBlockMatch
|
24
24
|
from nucliadb.common.ids import ParagraphId, VectorId
|
@@ -74,6 +74,7 @@ async def build_find_response(
|
|
74
74
|
*,
|
75
75
|
kbid: str,
|
76
76
|
query: str,
|
77
|
+
rephrased_query: Optional[str],
|
77
78
|
relation_subgraph_query: EntitiesSubgraphRequest,
|
78
79
|
top_k: int,
|
79
80
|
min_score_bm25: float,
|
@@ -143,6 +144,7 @@ async def build_find_response(
|
|
143
144
|
|
144
145
|
find_results = KnowledgeboxFindResults(
|
145
146
|
query=query,
|
147
|
+
rephrased_query=rephrased_query,
|
146
148
|
resources=find_resources,
|
147
149
|
best_matches=best_matches,
|
148
150
|
relations=relations,
|
nucliadb/search/search/query.py
CHANGED
@@ -24,6 +24,7 @@ from datetime import datetime
|
|
24
24
|
from typing import Any, Awaitable, Optional, Union
|
25
25
|
|
26
26
|
from nucliadb.common import datamanagers
|
27
|
+
from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap
|
27
28
|
from nucliadb.search import logger
|
28
29
|
from nucliadb.search.predict import SendToPredictError
|
29
30
|
from nucliadb.search.search.filters import (
|
@@ -49,6 +50,7 @@ from nucliadb_models.labels import LABEL_HIDDEN, translate_system_to_alias_label
|
|
49
50
|
from nucliadb_models.metadata import ResourceProcessingStatus
|
50
51
|
from nucliadb_models.search import (
|
51
52
|
Filter,
|
53
|
+
KnowledgeGraphEntity,
|
52
54
|
MaxTokens,
|
53
55
|
MinScore,
|
54
56
|
SearchOptions,
|
@@ -94,6 +96,7 @@ class QueryParser:
|
|
94
96
|
keyword_filters: Union[list[str], list[Filter]],
|
95
97
|
top_k: int,
|
96
98
|
min_score: MinScore,
|
99
|
+
query_entities: Optional[list[KnowledgeGraphEntity]] = None,
|
97
100
|
faceted: Optional[list[str]] = None,
|
98
101
|
sort: Optional[SortOptions] = None,
|
99
102
|
range_creation_start: Optional[datetime] = None,
|
@@ -120,6 +123,7 @@ class QueryParser:
|
|
120
123
|
self.kbid = kbid
|
121
124
|
self.features = features
|
122
125
|
self.query = query
|
126
|
+
self.query_entities = query_entities
|
123
127
|
self.hidden = hidden
|
124
128
|
if self.hidden is not None:
|
125
129
|
if self.hidden:
|
@@ -211,7 +215,7 @@ class QueryParser:
|
|
211
215
|
if self.with_synonyms and self.query:
|
212
216
|
asyncio.ensure_future(self.fetcher.get_synonyms())
|
213
217
|
|
214
|
-
async def parse(self) -> tuple[nodereader_pb2.SearchRequest, bool, list[str]]:
|
218
|
+
async def parse(self) -> tuple[nodereader_pb2.SearchRequest, bool, list[str], Optional[str]]:
|
215
219
|
"""
|
216
220
|
:return: (request, incomplete, autofilters)
|
217
221
|
where:
|
@@ -230,12 +234,13 @@ class QueryParser:
|
|
230
234
|
await self.parse_filters(request)
|
231
235
|
self.parse_document_search(request)
|
232
236
|
self.parse_paragraph_search(request)
|
233
|
-
incomplete = await self.parse_vector_search(request)
|
237
|
+
incomplete, rephrased_query = await self.parse_vector_search(request)
|
238
|
+
# BUG: autofilters are not used to filter, but we say we do
|
234
239
|
autofilters = await self.parse_relation_search(request)
|
235
240
|
await self.parse_synonyms(request)
|
236
241
|
await self.parse_min_score(request, incomplete)
|
237
242
|
await self.adjust_page_size(request, self.rank_fusion, self.reranker)
|
238
|
-
return request, incomplete, autofilters
|
243
|
+
return request, incomplete, autofilters, rephrased_query
|
239
244
|
|
240
245
|
async def parse_filters(self, request: nodereader_pb2.SearchRequest) -> None:
|
241
246
|
if len(self.label_filters) > 0:
|
@@ -354,26 +359,41 @@ class QueryParser:
|
|
354
359
|
request.paragraph = True
|
355
360
|
node_features.inc({"type": "paragraphs"})
|
356
361
|
|
357
|
-
async def parse_vector_search(
|
362
|
+
async def parse_vector_search(
|
363
|
+
self, request: nodereader_pb2.SearchRequest
|
364
|
+
) -> tuple[bool, Optional[str]]:
|
358
365
|
if not self.has_vector_search:
|
359
|
-
return False
|
366
|
+
return False, None
|
360
367
|
|
361
368
|
node_features.inc({"type": "vectors"})
|
362
369
|
|
363
370
|
vectorset = await self.fetcher.get_vectorset()
|
364
371
|
query_vector = await self.fetcher.get_query_vector()
|
372
|
+
rephrased_query = await self.fetcher.get_rephrased_query()
|
365
373
|
incomplete = query_vector is None
|
366
374
|
|
367
375
|
request.vectorset = vectorset
|
368
376
|
if query_vector is not None:
|
369
377
|
request.vector.extend(query_vector)
|
370
378
|
|
371
|
-
return incomplete
|
379
|
+
return incomplete, rephrased_query
|
372
380
|
|
373
381
|
async def parse_relation_search(self, request: nodereader_pb2.SearchRequest) -> list[str]:
|
374
382
|
autofilters = []
|
383
|
+
# BUG: autofiler should autofilter, not enable relation search
|
375
384
|
if self.has_relations_search or self.autofilter:
|
376
|
-
|
385
|
+
if self.query_entities:
|
386
|
+
detected_entities = []
|
387
|
+
for entity in self.query_entities:
|
388
|
+
relation_node = utils_pb2.RelationNode()
|
389
|
+
relation_node.value = entity.name
|
390
|
+
if entity.type is not None:
|
391
|
+
relation_node.ntype = RelationNodeTypeMap[entity.type]
|
392
|
+
if entity.subtype is not None:
|
393
|
+
relation_node.subtype = entity.subtype
|
394
|
+
detected_entities.append(relation_node)
|
395
|
+
else:
|
396
|
+
detected_entities = await self.fetcher.get_detected_entities()
|
377
397
|
meta_cache = await self.fetcher.get_entities_meta_cache()
|
378
398
|
detected_entities = expand_entities(meta_cache, detected_entities)
|
379
399
|
if self.has_relations_search:
|
@@ -226,6 +226,12 @@ class Fetcher:
|
|
226
226
|
self.cache.query_vector = query_vector
|
227
227
|
return query_vector
|
228
228
|
|
229
|
+
async def get_rephrased_query(self) -> Optional[str]:
|
230
|
+
query_info = await self._predict_query_endpoint()
|
231
|
+
if query_info is None:
|
232
|
+
return None
|
233
|
+
return query_info.rephrased_query
|
234
|
+
|
229
235
|
# Labels
|
230
236
|
|
231
237
|
async def get_classification_labels(self) -> knowledgebox_pb2.Labels:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post3209
|
4
4
|
Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
|
5
5
|
Author: NucliaDB Community
|
6
6
|
Author-email: nucliadb@nuclia.com
|
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
23
|
Requires-Python: >=3.9, <4
|
24
24
|
Description-Content-Type: text/markdown
|
25
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
27
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
28
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
25
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3209
|
26
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3209
|
27
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post3209
|
28
|
+
Requires-Dist: nucliadb-models>=6.2.1.post3209
|
29
29
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
30
30
|
Requires-Dist: nuclia-models>=0.24.2
|
31
31
|
Requires-Dist: uvicorn
|
@@ -195,7 +195,7 @@ nucliadb/search/api/v1/find.py,sha256=DsnWkySu_cFajDWJIxN8DYvLL_Rm2yiCjHD8TsqPfR
|
|
195
195
|
nucliadb/search/api/v1/knowledgebox.py,sha256=Hrt2h-28DDlwN3AdjMZPTYI6om0RMy9bmJvqVHvw8sE,8620
|
196
196
|
nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
|
197
197
|
nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
|
198
|
-
nucliadb/search/api/v1/search.py,sha256=
|
198
|
+
nucliadb/search/api/v1/search.py,sha256=aP_Iv9mi6PvmXNDX2v_t8Xhr7orD4peCY9NKo0oEnQg,13641
|
199
199
|
nucliadb/search/api/v1/suggest.py,sha256=S0YUTAWukzZSYZJzN3T5MUgPM3599HQvG76GOCBuAbQ,5907
|
200
200
|
nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
|
201
201
|
nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
|
@@ -210,8 +210,8 @@ nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298
|
|
210
210
|
nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
|
211
211
|
nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_QziX4,6184
|
212
212
|
nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
|
213
|
-
nucliadb/search/search/find.py,sha256=
|
214
|
-
nucliadb/search/search/find_merge.py,sha256=
|
213
|
+
nucliadb/search/search/find.py,sha256=EprmlVVPHbPvcJezEJou4Msf9JlM7LD5xaVuKbk4XtY,10065
|
214
|
+
nucliadb/search/search/find_merge.py,sha256=3FnzKFEnVemg6FO_6zveulbAU7klvsiPEBvLrpBBMg8,17450
|
215
215
|
nucliadb/search/search/graph_strategy.py,sha256=Egcq_zn895gTUYmyQTsXj8YaUMa3HBKhcSa1GBvgzAM,31877
|
216
216
|
nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
|
217
217
|
nucliadb/search/search/merge.py,sha256=i_PTBFRqC5iTTziOMEltxLIlmokIou5hjjgR4BnoLBE,22635
|
@@ -219,7 +219,7 @@ nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUH
|
|
219
219
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
220
220
|
nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
|
221
221
|
nucliadb/search/search/predict_proxy.py,sha256=xBlh6kjuQpWRq7KsBx4pEl2PtnwljjQIiYMaTWpcCSA,3015
|
222
|
-
nucliadb/search/search/query.py,sha256=
|
222
|
+
nucliadb/search/search/query.py,sha256=AlhRw4Mick4Oab5HsKHaQpBXsVc_UUY5IpkUIwsFfU8,30577
|
223
223
|
nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
|
224
224
|
nucliadb/search/search/rerankers.py,sha256=0kAHES9X_FKkP7KSN9NRETFmRPKzwrFAo_54MbyvM7Q,9051
|
225
225
|
nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
|
@@ -233,7 +233,7 @@ nucliadb/search/search/chat/prompt.py,sha256=r2JTiRWH3YHPdeRAG5w6gD0g0fWVxdTjYIR
|
|
233
233
|
nucliadb/search/search/chat/query.py,sha256=rBssR6MPSx8h2DASRMTLODaz9oGE5tNVVVeDncSrEp4,15684
|
234
234
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
235
235
|
nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
|
236
|
-
nucliadb/search/search/query_parser/fetcher.py,sha256=
|
236
|
+
nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
|
237
237
|
nucliadb/search/search/query_parser/models.py,sha256=-VlCDXUCgOroAZw1Leqhj2VMgRv_CD2w40PXXOBLaUM,2332
|
238
238
|
nucliadb/search/search/query_parser/parser.py,sha256=JC6koS9Np1PzCfEk1Xy6mpP1HmovS_vIxxA9u-kwzos,6498
|
239
239
|
nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -332,9 +332,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
332
332
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
333
333
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
334
334
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
335
|
-
nucliadb-6.2.1.
|
336
|
-
nucliadb-6.2.1.
|
337
|
-
nucliadb-6.2.1.
|
338
|
-
nucliadb-6.2.1.
|
339
|
-
nucliadb-6.2.1.
|
340
|
-
nucliadb-6.2.1.
|
335
|
+
nucliadb-6.2.1.post3209.dist-info/METADATA,sha256=ZumCP4VHqFd-S8EEIVLceil4L-qtVSlUzwr6ao232Oo,4603
|
336
|
+
nucliadb-6.2.1.post3209.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
337
|
+
nucliadb-6.2.1.post3209.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
338
|
+
nucliadb-6.2.1.post3209.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
339
|
+
nucliadb-6.2.1.post3209.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
340
|
+
nucliadb-6.2.1.post3209.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|