nucliadb 6.2.1.post3395__py3-none-any.whl → 6.3.0.post3402__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/search/api/v1/resource/search.py +12 -0
- nucliadb/search/api/v1/suggest.py +5 -0
- nucliadb/search/search/chat/ask.py +3 -2
- nucliadb/search/search/chat/query.py +29 -3
- nucliadb/search/search/query.py +35 -10
- {nucliadb-6.2.1.post3395.dist-info → nucliadb-6.3.0.post3402.dist-info}/METADATA +6 -6
- {nucliadb-6.2.1.post3395.dist-info → nucliadb-6.3.0.post3402.dist-info}/RECORD +10 -10
- {nucliadb-6.2.1.post3395.dist-info → nucliadb-6.3.0.post3402.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post3395.dist-info → nucliadb-6.3.0.post3402.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post3395.dist-info → nucliadb-6.3.0.post3402.dist-info}/top_level.txt +0 -0
@@ -17,10 +17,12 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
+
import json
|
20
21
|
from typing import Optional, Union, cast
|
21
22
|
|
22
23
|
from fastapi import Header, Request, Response
|
23
24
|
from fastapi_versioning import version
|
25
|
+
from pydantic import ValidationError
|
24
26
|
|
25
27
|
from nucliadb.models.responses import HTTPClientError
|
26
28
|
from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, api
|
@@ -30,6 +32,7 @@ from nucliadb.search.search import cache
|
|
30
32
|
from nucliadb.search.search.exceptions import InvalidQueryError
|
31
33
|
from nucliadb.search.search.merge import merge_paragraphs_results
|
32
34
|
from nucliadb.search.search.query import paragraph_query_to_pb
|
35
|
+
from nucliadb_models.filter import FilterExpression
|
33
36
|
from nucliadb_models.resource import NucliaDBRoles
|
34
37
|
from nucliadb_models.search import (
|
35
38
|
NucliaDBClientType,
|
@@ -59,6 +62,9 @@ async def resource_search(
|
|
59
62
|
kbid: str,
|
60
63
|
query: str,
|
61
64
|
rid: str,
|
65
|
+
filter_expression: Optional[str] = fastapi_query(
|
66
|
+
SearchParamDefaults.filter_expression, include_in_schema=False
|
67
|
+
),
|
62
68
|
fields: list[str] = fastapi_query(SearchParamDefaults.fields),
|
63
69
|
filters: list[str] = fastapi_query(SearchParamDefaults.filters),
|
64
70
|
faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
|
@@ -82,10 +88,13 @@ async def resource_search(
|
|
82
88
|
|
83
89
|
with cache.request_caches():
|
84
90
|
try:
|
91
|
+
expr = FilterExpression.model_validate_json(filter_expression) if filter_expression else None
|
92
|
+
|
85
93
|
pb_query = await paragraph_query_to_pb(
|
86
94
|
kbid,
|
87
95
|
rid,
|
88
96
|
query,
|
97
|
+
expr,
|
89
98
|
fields,
|
90
99
|
filters,
|
91
100
|
faceted,
|
@@ -99,6 +108,9 @@ async def resource_search(
|
|
99
108
|
)
|
100
109
|
except InvalidQueryError as exc:
|
101
110
|
return HTTPClientError(status_code=412, detail=str(exc))
|
111
|
+
except ValidationError as exc:
|
112
|
+
detail = json.loads(exc.json())
|
113
|
+
return HTTPClientError(status_code=422, detail=detail)
|
102
114
|
|
103
115
|
results, incomplete_results, queried_nodes = await node_query(kbid, Method.SEARCH, pb_query)
|
104
116
|
|
@@ -17,11 +17,13 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
+
import json
|
20
21
|
from datetime import datetime
|
21
22
|
from typing import Optional, Union
|
22
23
|
|
23
24
|
from fastapi import Header, Request, Response
|
24
25
|
from fastapi_versioning import version
|
26
|
+
from pydantic import ValidationError
|
25
27
|
|
26
28
|
from nucliadb.models.responses import HTTPClientError
|
27
29
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
@@ -115,6 +117,9 @@ async def suggest_knowledgebox(
|
|
115
117
|
)
|
116
118
|
except InvalidQueryError as exc:
|
117
119
|
return HTTPClientError(status_code=412, detail=str(exc))
|
120
|
+
except ValidationError as exc:
|
121
|
+
detail = json.loads(exc.json())
|
122
|
+
return HTTPClientError(status_code=422, detail=detail)
|
118
123
|
|
119
124
|
|
120
125
|
async def suggest(
|
@@ -47,6 +47,7 @@ from nucliadb.search.search.chat.prompt import PromptContextBuilder
|
|
47
47
|
from nucliadb.search.search.chat.query import (
|
48
48
|
NOT_ENOUGH_CONTEXT_ANSWER,
|
49
49
|
ChatAuditor,
|
50
|
+
add_resource_filter,
|
50
51
|
get_find_results,
|
51
52
|
get_relations_results,
|
52
53
|
maybe_audit_chat,
|
@@ -804,7 +805,7 @@ async def retrieval_in_resource(
|
|
804
805
|
prequeries = calculate_prequeries_for_json_schema(ask_request)
|
805
806
|
|
806
807
|
# Make sure the retrieval is scoped to the resource if provided
|
807
|
-
ask_request
|
808
|
+
add_resource_filter(ask_request, [resource])
|
808
809
|
if prequeries is not None:
|
809
810
|
for prequery in prequeries.queries:
|
810
811
|
if prequery.prefilter is True:
|
@@ -812,7 +813,7 @@ async def retrieval_in_resource(
|
|
812
813
|
"rag_strategies",
|
813
814
|
"Prequeries with prefilter are not supported when asking on a resource",
|
814
815
|
)
|
815
|
-
prequery.request
|
816
|
+
add_resource_filter(prequery.request, [resource])
|
816
817
|
|
817
818
|
with metrics.time("retrieval"):
|
818
819
|
main_results, prequeries_results, query_parser = await get_find_results(
|
@@ -18,7 +18,7 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import asyncio
|
21
|
-
from typing import Iterable, Optional
|
21
|
+
from typing import Iterable, Optional, Union
|
22
22
|
|
23
23
|
from nucliadb.common.models_utils import to_proto
|
24
24
|
from nucliadb.search import logger
|
@@ -32,6 +32,7 @@ from nucliadb.search.search.metrics import RAGMetrics
|
|
32
32
|
from nucliadb.search.search.query import QueryParser
|
33
33
|
from nucliadb.search.settings import settings
|
34
34
|
from nucliadb.search.utilities import get_predict
|
35
|
+
from nucliadb_models import filter
|
35
36
|
from nucliadb_models.search import (
|
36
37
|
AskRequest,
|
37
38
|
ChatContextMessage,
|
@@ -119,9 +120,10 @@ async def get_find_results(
|
|
119
120
|
raise NoRetrievalResultsError()
|
120
121
|
# Make sure the main query and prequeries use the same resource filters.
|
121
122
|
# This is important to avoid returning results that don't match the prefilter.
|
122
|
-
|
123
|
+
matching_resources = list(prefilter_matching_resources)
|
124
|
+
add_resource_filter(item, matching_resources)
|
123
125
|
for prequery in prequeries:
|
124
|
-
prequery.request
|
126
|
+
add_resource_filter(prequery.request, matching_resources)
|
125
127
|
prequery.request.show_hidden = item.show_hidden
|
126
128
|
|
127
129
|
if prequeries:
|
@@ -151,6 +153,30 @@ async def get_find_results(
|
|
151
153
|
return main_results, prequeries_results, query_parser
|
152
154
|
|
153
155
|
|
156
|
+
def add_resource_filter(request: Union[FindRequest, AskRequest], resources: list[str]):
|
157
|
+
if len(resources) == 0:
|
158
|
+
return
|
159
|
+
|
160
|
+
if request.filter_expression is not None:
|
161
|
+
if len(resources) > 1:
|
162
|
+
resource_filter: filter.FieldFilterExpression = filter.Or.model_validate(
|
163
|
+
{"or": [filter.Resource(prop="resource", id=rid) for rid in resources]}
|
164
|
+
)
|
165
|
+
else:
|
166
|
+
resource_filter = filter.Resource(prop="resource", id=resources[0])
|
167
|
+
|
168
|
+
# Add to filter expression if set
|
169
|
+
if request.filter_expression.field is None:
|
170
|
+
request.filter_expression.field = resource_filter
|
171
|
+
else:
|
172
|
+
request.filter_expression.field = filter.And.model_validate(
|
173
|
+
{"and": [request.filter_expression.field, resource_filter]}
|
174
|
+
)
|
175
|
+
else:
|
176
|
+
# Add to old key filters instead
|
177
|
+
request.resource_filters = resources
|
178
|
+
|
179
|
+
|
154
180
|
def find_request_from_ask_request(item: AskRequest, query: str) -> FindRequest:
|
155
181
|
find_request = FindRequest()
|
156
182
|
find_request.resource_filters = item.resource_filters
|
nucliadb/search/search/query.py
CHANGED
@@ -252,7 +252,10 @@ class QueryParser:
|
|
252
252
|
if expr:
|
253
253
|
request.paragraph_filter.CopyFrom(expr)
|
254
254
|
|
255
|
-
|
255
|
+
if self.filter_expression.operator == FilterExpression.Operator.OR:
|
256
|
+
request.filter_operator = nodereader_pb2.FilterOperator.OR
|
257
|
+
else:
|
258
|
+
request.filter_operator = nodereader_pb2.FilterOperator.AND
|
256
259
|
|
257
260
|
if self.hidden is not None:
|
258
261
|
expr = nodereader_pb2.FilterExpression()
|
@@ -486,6 +489,7 @@ async def paragraph_query_to_pb(
|
|
486
489
|
kbid: str,
|
487
490
|
rid: str,
|
488
491
|
query: str,
|
492
|
+
filter_expression: Optional[FilterExpression],
|
489
493
|
fields: list[str],
|
490
494
|
filters: list[str],
|
491
495
|
faceted: list[str],
|
@@ -514,7 +518,6 @@ async def paragraph_query_to_pb(
|
|
514
518
|
range_creation_end=range_creation_end,
|
515
519
|
range_modification_start=range_modification_start,
|
516
520
|
range_modification_end=range_modification_end,
|
517
|
-
key_filters=[rid],
|
518
521
|
fields=fields,
|
519
522
|
)
|
520
523
|
fetcher = Fetcher(
|
@@ -532,6 +535,29 @@ async def paragraph_query_to_pb(
|
|
532
535
|
if paragraph_expr is not None:
|
533
536
|
request.paragraph_filter.CopyFrom(paragraph_expr)
|
534
537
|
|
538
|
+
if (field_expr is not None or paragraph_expr is not None) and filter_expression is not None:
|
539
|
+
raise InvalidQueryError("filter_expression", "Cannot mix old filters with filter_expression")
|
540
|
+
|
541
|
+
if filter_expression:
|
542
|
+
if filter_expression.field:
|
543
|
+
expr = await parse_expression(filter_expression.field, kbid)
|
544
|
+
if expr:
|
545
|
+
request.field_filter.CopyFrom(expr)
|
546
|
+
|
547
|
+
if filter_expression.paragraph:
|
548
|
+
expr = await parse_expression(filter_expression.paragraph, kbid)
|
549
|
+
if expr:
|
550
|
+
request.paragraph_filter.CopyFrom(expr)
|
551
|
+
|
552
|
+
if filter_expression.operator == FilterExpression.Operator.OR:
|
553
|
+
request.filter_operator = nodereader_pb2.FilterOperator.OR
|
554
|
+
else:
|
555
|
+
request.filter_operator = nodereader_pb2.FilterOperator.AND
|
556
|
+
|
557
|
+
key_filter = nodereader_pb2.FilterExpression()
|
558
|
+
key_filter.resource.resource_id = rid
|
559
|
+
add_and_expression(request.field_filter, key_filter)
|
560
|
+
|
535
561
|
return request
|
536
562
|
|
537
563
|
|
@@ -657,15 +683,14 @@ async def suggest_query_to_pb(
|
|
657
683
|
request.field_filter.CopyFrom(expr)
|
658
684
|
|
659
685
|
if filter_expression.paragraph:
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
# TODO
|
664
|
-
# expr = await parse_expression(filter_expression.paragraph, kbid)
|
665
|
-
# if expr:
|
666
|
-
# request.paragraph_filter.CopyFrom(expr)
|
686
|
+
expr = await parse_expression(filter_expression.paragraph, kbid)
|
687
|
+
if expr:
|
688
|
+
request.paragraph_filter.CopyFrom(expr)
|
667
689
|
|
668
|
-
|
690
|
+
if filter_expression.operator == FilterExpression.Operator.OR:
|
691
|
+
request.filter_operator = nodereader_pb2.FilterOperator.OR
|
692
|
+
else:
|
693
|
+
request.filter_operator = nodereader_pb2.FilterOperator.AND
|
669
694
|
|
670
695
|
if hidden is not None:
|
671
696
|
expr = nodereader_pb2.FilterExpression()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.
|
3
|
+
Version: 6.3.0.post3402
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.
|
26
|
-
Requires-Dist: nucliadb-models>=6.
|
27
|
-
Requires-Dist: nidx-protos>=6.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.3.0.post3402
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.0.post3402
|
25
|
+
Requires-Dist: nucliadb-protos>=6.3.0.post3402
|
26
|
+
Requires-Dist: nucliadb-models>=6.3.0.post3402
|
27
|
+
Requires-Dist: nidx-protos>=6.3.0.post3402
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn
|
@@ -198,13 +198,13 @@ nucliadb/search/api/v1/knowledgebox.py,sha256=rWhx3PYWryingu19qwwFDbVvVYynq5Ky23
|
|
198
198
|
nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
|
199
199
|
nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
|
200
200
|
nucliadb/search/api/v1/search.py,sha256=DLXxh2FRXmLnZIIXaSLT7XaNoY2GZJTkpcduLTDyVW4,14023
|
201
|
-
nucliadb/search/api/v1/suggest.py,sha256=
|
201
|
+
nucliadb/search/api/v1/suggest.py,sha256=urnA8rXLTcT2-Yulw2_43Ow8cAPWQEe5bxgDI9gNNZM,6505
|
202
202
|
nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
|
203
203
|
nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
|
204
204
|
nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
205
205
|
nucliadb/search/api/v1/resource/ask.py,sha256=nsVzBSanSSlf0Ody6LSTjdEy75Vg283_YhbkAtWEjh8,3637
|
206
206
|
nucliadb/search/api/v1/resource/ingestion_agents.py,sha256=fqqRCd8Wc9GciS5P98lcnihvTKStsZYYtOU-T1bc-6E,4771
|
207
|
-
nucliadb/search/api/v1/resource/search.py,sha256
|
207
|
+
nucliadb/search/api/v1/resource/search.py,sha256=-GY84XiiGd2vyJtunWvZMdBhZU0O0giO-ISo-HMntBo,5227
|
208
208
|
nucliadb/search/api/v1/resource/utils.py,sha256=-NjZqAQtFEXKpIh8ui5S26ItnJ5rzmmG0BHxGSS9QPw,1141
|
209
209
|
nucliadb/search/requesters/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
|
210
210
|
nucliadb/search/requesters/utils.py,sha256=qL81UVPNgBftUMLpcxIYVr7ILsMqpKCo-9SY2EvAaXw,6681
|
@@ -224,18 +224,18 @@ nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUH
|
|
224
224
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
225
225
|
nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
|
226
226
|
nucliadb/search/search/predict_proxy.py,sha256=IFI3v_ODz2_UU1XZnyaD391fE7-2C0npSmj_HmDvzS4,3123
|
227
|
-
nucliadb/search/search/query.py,sha256=
|
227
|
+
nucliadb/search/search/query.py,sha256=j7NdAXQXSytWvf_tyqpG-Fz4AhVGI-Y5i7NPm3s7-PI,29980
|
228
228
|
nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
|
229
229
|
nucliadb/search/search/rerankers.py,sha256=3vep4EOVNeDJGsMdx-1g6Ar4ZGJG3IHym3HkxnbwtAQ,7321
|
230
230
|
nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
|
231
231
|
nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
|
232
232
|
nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
|
233
233
|
nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
234
|
-
nucliadb/search/search/chat/ask.py,sha256=
|
234
|
+
nucliadb/search/search/chat/ask.py,sha256=HPHM97s1dxbHvugoWZj6aP8vL4gzHjjtwoDvzUMaLfw,36547
|
235
235
|
nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
|
236
236
|
nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
|
237
237
|
nucliadb/search/search/chat/prompt.py,sha256=Jnja-Ss7skgnnDY8BymVfdeYsFPnIQFL8tEvcRXTKUE,47356
|
238
|
-
nucliadb/search/search/chat/query.py,sha256=
|
238
|
+
nucliadb/search/search/chat/query.py,sha256=kMhtUOuVBkWbOaZnpzAUi32oRc5QnXKmlo2LFoozKWc,16382
|
239
239
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
240
240
|
nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
|
241
241
|
nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
|
@@ -338,8 +338,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
338
338
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
339
339
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
340
340
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
341
|
-
nucliadb-6.
|
342
|
-
nucliadb-6.
|
343
|
-
nucliadb-6.
|
344
|
-
nucliadb-6.
|
345
|
-
nucliadb-6.
|
341
|
+
nucliadb-6.3.0.post3402.dist-info/METADATA,sha256=ffldOKngG5BDfwbQ3N430iCHKoBEh8KkL2iM2nAr7_s,4291
|
342
|
+
nucliadb-6.3.0.post3402.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
343
|
+
nucliadb-6.3.0.post3402.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
344
|
+
nucliadb-6.3.0.post3402.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
345
|
+
nucliadb-6.3.0.post3402.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|