nucliadb 6.2.1.post3395__py3-none-any.whl → 6.3.0.post3402__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,10 +17,12 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ import json
20
21
  from typing import Optional, Union, cast
21
22
 
22
23
  from fastapi import Header, Request, Response
23
24
  from fastapi_versioning import version
25
+ from pydantic import ValidationError
24
26
 
25
27
  from nucliadb.models.responses import HTTPClientError
26
28
  from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, api
@@ -30,6 +32,7 @@ from nucliadb.search.search import cache
30
32
  from nucliadb.search.search.exceptions import InvalidQueryError
31
33
  from nucliadb.search.search.merge import merge_paragraphs_results
32
34
  from nucliadb.search.search.query import paragraph_query_to_pb
35
+ from nucliadb_models.filter import FilterExpression
33
36
  from nucliadb_models.resource import NucliaDBRoles
34
37
  from nucliadb_models.search import (
35
38
  NucliaDBClientType,
@@ -59,6 +62,9 @@ async def resource_search(
59
62
  kbid: str,
60
63
  query: str,
61
64
  rid: str,
65
+ filter_expression: Optional[str] = fastapi_query(
66
+ SearchParamDefaults.filter_expression, include_in_schema=False
67
+ ),
62
68
  fields: list[str] = fastapi_query(SearchParamDefaults.fields),
63
69
  filters: list[str] = fastapi_query(SearchParamDefaults.filters),
64
70
  faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
@@ -82,10 +88,13 @@ async def resource_search(
82
88
 
83
89
  with cache.request_caches():
84
90
  try:
91
+ expr = FilterExpression.model_validate_json(filter_expression) if filter_expression else None
92
+
85
93
  pb_query = await paragraph_query_to_pb(
86
94
  kbid,
87
95
  rid,
88
96
  query,
97
+ expr,
89
98
  fields,
90
99
  filters,
91
100
  faceted,
@@ -99,6 +108,9 @@ async def resource_search(
99
108
  )
100
109
  except InvalidQueryError as exc:
101
110
  return HTTPClientError(status_code=412, detail=str(exc))
111
+ except ValidationError as exc:
112
+ detail = json.loads(exc.json())
113
+ return HTTPClientError(status_code=422, detail=detail)
102
114
 
103
115
  results, incomplete_results, queried_nodes = await node_query(kbid, Method.SEARCH, pb_query)
104
116
 
@@ -17,11 +17,13 @@
17
17
  # You should have received a copy of the GNU Affero General Public License
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
+ import json
20
21
  from datetime import datetime
21
22
  from typing import Optional, Union
22
23
 
23
24
  from fastapi import Header, Request, Response
24
25
  from fastapi_versioning import version
26
+ from pydantic import ValidationError
25
27
 
26
28
  from nucliadb.models.responses import HTTPClientError
27
29
  from nucliadb.search.api.v1.router import KB_PREFIX, api
@@ -115,6 +117,9 @@ async def suggest_knowledgebox(
115
117
  )
116
118
  except InvalidQueryError as exc:
117
119
  return HTTPClientError(status_code=412, detail=str(exc))
120
+ except ValidationError as exc:
121
+ detail = json.loads(exc.json())
122
+ return HTTPClientError(status_code=422, detail=detail)
118
123
 
119
124
 
120
125
  async def suggest(
@@ -47,6 +47,7 @@ from nucliadb.search.search.chat.prompt import PromptContextBuilder
47
47
  from nucliadb.search.search.chat.query import (
48
48
  NOT_ENOUGH_CONTEXT_ANSWER,
49
49
  ChatAuditor,
50
+ add_resource_filter,
50
51
  get_find_results,
51
52
  get_relations_results,
52
53
  maybe_audit_chat,
@@ -804,7 +805,7 @@ async def retrieval_in_resource(
804
805
  prequeries = calculate_prequeries_for_json_schema(ask_request)
805
806
 
806
807
  # Make sure the retrieval is scoped to the resource if provided
807
- ask_request.resource_filters = [resource]
808
+ add_resource_filter(ask_request, [resource])
808
809
  if prequeries is not None:
809
810
  for prequery in prequeries.queries:
810
811
  if prequery.prefilter is True:
@@ -812,7 +813,7 @@ async def retrieval_in_resource(
812
813
  "rag_strategies",
813
814
  "Prequeries with prefilter are not supported when asking on a resource",
814
815
  )
815
- prequery.request.resource_filters = [resource]
816
+ add_resource_filter(prequery.request, [resource])
816
817
 
817
818
  with metrics.time("retrieval"):
818
819
  main_results, prequeries_results, query_parser = await get_find_results(
@@ -18,7 +18,7 @@
18
18
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
  #
20
20
  import asyncio
21
- from typing import Iterable, Optional
21
+ from typing import Iterable, Optional, Union
22
22
 
23
23
  from nucliadb.common.models_utils import to_proto
24
24
  from nucliadb.search import logger
@@ -32,6 +32,7 @@ from nucliadb.search.search.metrics import RAGMetrics
32
32
  from nucliadb.search.search.query import QueryParser
33
33
  from nucliadb.search.settings import settings
34
34
  from nucliadb.search.utilities import get_predict
35
+ from nucliadb_models import filter
35
36
  from nucliadb_models.search import (
36
37
  AskRequest,
37
38
  ChatContextMessage,
@@ -119,9 +120,10 @@ async def get_find_results(
119
120
  raise NoRetrievalResultsError()
120
121
  # Make sure the main query and prequeries use the same resource filters.
121
122
  # This is important to avoid returning results that don't match the prefilter.
122
- item.resource_filters = list(prefilter_matching_resources)
123
+ matching_resources = list(prefilter_matching_resources)
124
+ add_resource_filter(item, matching_resources)
123
125
  for prequery in prequeries:
124
- prequery.request.resource_filters = list(prefilter_matching_resources)
126
+ add_resource_filter(prequery.request, matching_resources)
125
127
  prequery.request.show_hidden = item.show_hidden
126
128
 
127
129
  if prequeries:
@@ -151,6 +153,30 @@ async def get_find_results(
151
153
  return main_results, prequeries_results, query_parser
152
154
 
153
155
 
156
+ def add_resource_filter(request: Union[FindRequest, AskRequest], resources: list[str]):
157
+ if len(resources) == 0:
158
+ return
159
+
160
+ if request.filter_expression is not None:
161
+ if len(resources) > 1:
162
+ resource_filter: filter.FieldFilterExpression = filter.Or.model_validate(
163
+ {"or": [filter.Resource(prop="resource", id=rid) for rid in resources]}
164
+ )
165
+ else:
166
+ resource_filter = filter.Resource(prop="resource", id=resources[0])
167
+
168
+ # Add to filter expression if set
169
+ if request.filter_expression.field is None:
170
+ request.filter_expression.field = resource_filter
171
+ else:
172
+ request.filter_expression.field = filter.And.model_validate(
173
+ {"and": [request.filter_expression.field, resource_filter]}
174
+ )
175
+ else:
176
+ # Add to old key filters instead
177
+ request.resource_filters = resources
178
+
179
+
154
180
  def find_request_from_ask_request(item: AskRequest, query: str) -> FindRequest:
155
181
  find_request = FindRequest()
156
182
  find_request.resource_filters = item.resource_filters
@@ -252,7 +252,10 @@ class QueryParser:
252
252
  if expr:
253
253
  request.paragraph_filter.CopyFrom(expr)
254
254
 
255
- # TODO: Pass operator to PB
255
+ if self.filter_expression.operator == FilterExpression.Operator.OR:
256
+ request.filter_operator = nodereader_pb2.FilterOperator.OR
257
+ else:
258
+ request.filter_operator = nodereader_pb2.FilterOperator.AND
256
259
 
257
260
  if self.hidden is not None:
258
261
  expr = nodereader_pb2.FilterExpression()
@@ -486,6 +489,7 @@ async def paragraph_query_to_pb(
486
489
  kbid: str,
487
490
  rid: str,
488
491
  query: str,
492
+ filter_expression: Optional[FilterExpression],
489
493
  fields: list[str],
490
494
  filters: list[str],
491
495
  faceted: list[str],
@@ -514,7 +518,6 @@ async def paragraph_query_to_pb(
514
518
  range_creation_end=range_creation_end,
515
519
  range_modification_start=range_modification_start,
516
520
  range_modification_end=range_modification_end,
517
- key_filters=[rid],
518
521
  fields=fields,
519
522
  )
520
523
  fetcher = Fetcher(
@@ -532,6 +535,29 @@ async def paragraph_query_to_pb(
532
535
  if paragraph_expr is not None:
533
536
  request.paragraph_filter.CopyFrom(paragraph_expr)
534
537
 
538
+ if (field_expr is not None or paragraph_expr is not None) and filter_expression is not None:
539
+ raise InvalidQueryError("filter_expression", "Cannot mix old filters with filter_expression")
540
+
541
+ if filter_expression:
542
+ if filter_expression.field:
543
+ expr = await parse_expression(filter_expression.field, kbid)
544
+ if expr:
545
+ request.field_filter.CopyFrom(expr)
546
+
547
+ if filter_expression.paragraph:
548
+ expr = await parse_expression(filter_expression.paragraph, kbid)
549
+ if expr:
550
+ request.paragraph_filter.CopyFrom(expr)
551
+
552
+ if filter_expression.operator == FilterExpression.Operator.OR:
553
+ request.filter_operator = nodereader_pb2.FilterOperator.OR
554
+ else:
555
+ request.filter_operator = nodereader_pb2.FilterOperator.AND
556
+
557
+ key_filter = nodereader_pb2.FilterExpression()
558
+ key_filter.resource.resource_id = rid
559
+ add_and_expression(request.field_filter, key_filter)
560
+
535
561
  return request
536
562
 
537
563
 
@@ -657,15 +683,14 @@ async def suggest_query_to_pb(
657
683
  request.field_filter.CopyFrom(expr)
658
684
 
659
685
  if filter_expression.paragraph:
660
- raise InvalidQueryError(
661
- "filter_expression", "paragraph filters not yet available in suggest"
662
- )
663
- # TODO
664
- # expr = await parse_expression(filter_expression.paragraph, kbid)
665
- # if expr:
666
- # request.paragraph_filter.CopyFrom(expr)
686
+ expr = await parse_expression(filter_expression.paragraph, kbid)
687
+ if expr:
688
+ request.paragraph_filter.CopyFrom(expr)
667
689
 
668
- # TODO: Pass operator to PB
690
+ if filter_expression.operator == FilterExpression.Operator.OR:
691
+ request.filter_operator = nodereader_pb2.FilterOperator.OR
692
+ else:
693
+ request.filter_operator = nodereader_pb2.FilterOperator.AND
669
694
 
670
695
  if hidden is not None:
671
696
  expr = nodereader_pb2.FilterExpression()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.2.1.post3395
3
+ Version: 6.3.0.post3402
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3395
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3395
25
- Requires-Dist: nucliadb-protos>=6.2.1.post3395
26
- Requires-Dist: nucliadb-models>=6.2.1.post3395
27
- Requires-Dist: nidx-protos>=6.2.1.post3395
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.0.post3402
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.0.post3402
25
+ Requires-Dist: nucliadb-protos>=6.3.0.post3402
26
+ Requires-Dist: nucliadb-models>=6.3.0.post3402
27
+ Requires-Dist: nidx-protos>=6.3.0.post3402
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn
@@ -198,13 +198,13 @@ nucliadb/search/api/v1/knowledgebox.py,sha256=rWhx3PYWryingu19qwwFDbVvVYynq5Ky23
198
198
  nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
199
199
  nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
200
200
  nucliadb/search/api/v1/search.py,sha256=DLXxh2FRXmLnZIIXaSLT7XaNoY2GZJTkpcduLTDyVW4,14023
201
- nucliadb/search/api/v1/suggest.py,sha256=tQX7rvPRjE_epk6qN8JB_Xue8JL02uUeVUNpbaJECQE,6318
201
+ nucliadb/search/api/v1/suggest.py,sha256=urnA8rXLTcT2-Yulw2_43Ow8cAPWQEe5bxgDI9gNNZM,6505
202
202
  nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
203
203
  nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
204
204
  nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
205
205
  nucliadb/search/api/v1/resource/ask.py,sha256=nsVzBSanSSlf0Ody6LSTjdEy75Vg283_YhbkAtWEjh8,3637
206
206
  nucliadb/search/api/v1/resource/ingestion_agents.py,sha256=fqqRCd8Wc9GciS5P98lcnihvTKStsZYYtOU-T1bc-6E,4771
207
- nucliadb/search/api/v1/resource/search.py,sha256=oSU5lwG7XRnD7oBFct31JaECGjTjX5R8mxNF1mskINc,4715
207
+ nucliadb/search/api/v1/resource/search.py,sha256=-GY84XiiGd2vyJtunWvZMdBhZU0O0giO-ISo-HMntBo,5227
208
208
  nucliadb/search/api/v1/resource/utils.py,sha256=-NjZqAQtFEXKpIh8ui5S26ItnJ5rzmmG0BHxGSS9QPw,1141
209
209
  nucliadb/search/requesters/__init__.py,sha256=itSI7dtTwFP55YMX4iK7JzdMHS5CQVUiB1XzQu4UBh8,833
210
210
  nucliadb/search/requesters/utils.py,sha256=qL81UVPNgBftUMLpcxIYVr7ILsMqpKCo-9SY2EvAaXw,6681
@@ -224,18 +224,18 @@ nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUH
224
224
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
225
225
  nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
226
226
  nucliadb/search/search/predict_proxy.py,sha256=IFI3v_ODz2_UU1XZnyaD391fE7-2C0npSmj_HmDvzS4,3123
227
- nucliadb/search/search/query.py,sha256=A1HnP7FzBtn3G4-oId_x-x0MHYlQPCTzrtP73LxoEwo,28733
227
+ nucliadb/search/search/query.py,sha256=j7NdAXQXSytWvf_tyqpG-Fz4AhVGI-Y5i7NPm3s7-PI,29980
228
228
  nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
229
229
  nucliadb/search/search/rerankers.py,sha256=3vep4EOVNeDJGsMdx-1g6Ar4ZGJG3IHym3HkxnbwtAQ,7321
230
230
  nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
231
231
  nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
232
232
  nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
233
233
  nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
234
- nucliadb/search/search/chat/ask.py,sha256=xmHAO9rmw1hvutGeYc0kxmioxHPpYEjlB-sTC4ArX-k,36516
234
+ nucliadb/search/search/chat/ask.py,sha256=HPHM97s1dxbHvugoWZj6aP8vL4gzHjjtwoDvzUMaLfw,36547
235
235
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
236
236
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
237
237
  nucliadb/search/search/chat/prompt.py,sha256=Jnja-Ss7skgnnDY8BymVfdeYsFPnIQFL8tEvcRXTKUE,47356
238
- nucliadb/search/search/chat/query.py,sha256=sFRJag80ahpXA7q3oP0XfIsUyRMNz0Y6K6nz8q4wP2A,15371
238
+ nucliadb/search/search/chat/query.py,sha256=kMhtUOuVBkWbOaZnpzAUi32oRc5QnXKmlo2LFoozKWc,16382
239
239
  nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
240
240
  nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
241
241
  nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
@@ -338,8 +338,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
338
338
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
339
339
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
340
340
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
341
- nucliadb-6.2.1.post3395.dist-info/METADATA,sha256=dK45Sc6ekVyUZED7nXwb3j74ycmcL-hyQexGDV2hqxA,4291
342
- nucliadb-6.2.1.post3395.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
343
- nucliadb-6.2.1.post3395.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
344
- nucliadb-6.2.1.post3395.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
345
- nucliadb-6.2.1.post3395.dist-info/RECORD,,
341
+ nucliadb-6.3.0.post3402.dist-info/METADATA,sha256=ffldOKngG5BDfwbQ3N430iCHKoBEh8KkL2iM2nAr7_s,4291
342
+ nucliadb-6.3.0.post3402.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
343
+ nucliadb-6.3.0.post3402.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
344
+ nucliadb-6.3.0.post3402.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
345
+ nucliadb-6.3.0.post3402.dist-info/RECORD,,