nucliadb 6.2.1.post3328__py3-none-any.whl → 6.2.1.post3331__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,6 +37,7 @@ from nucliadb.search.search import cache
37
37
  from nucliadb.search.search.exceptions import InvalidQueryError
38
38
  from nucliadb.search.search.merge import merge_results
39
39
  from nucliadb.search.search.query import QueryParser
40
+ from nucliadb.search.search.query_parser.old_filters import OldFilterParams
40
41
  from nucliadb.search.search.utils import (
41
42
  filter_hidden_resources,
42
43
  min_score_from_payload,
@@ -269,17 +270,19 @@ async def search(
269
270
  kbid=kbid,
270
271
  features=item.features,
271
272
  query=item.query,
272
- label_filters=item.filters,
273
- keyword_filters=[],
274
273
  faceted=item.faceted,
275
274
  sort=item.sort,
276
275
  top_k=item.top_k,
277
276
  min_score=item.min_score,
278
- range_creation_start=item.range_creation_start,
279
- range_creation_end=item.range_creation_end,
280
- range_modification_start=item.range_modification_start,
281
- range_modification_end=item.range_modification_end,
282
- fields=item.fields,
277
+ old_filters=OldFilterParams(
278
+ label_filters=item.filters,
279
+ keyword_filters=[],
280
+ range_creation_start=item.range_creation_start,
281
+ range_creation_end=item.range_creation_end,
282
+ range_modification_start=item.range_modification_start,
283
+ range_modification_end=item.range_modification_end,
284
+ fields=item.fields,
285
+ ),
283
286
  user_vector=item.vector,
284
287
  vectorset=item.vectorset,
285
288
  with_duplicates=item.with_duplicates,
@@ -133,7 +133,8 @@ async def suggest(
133
133
  ) -> KnowledgeboxSuggestResults:
134
134
  with cache.request_caches():
135
135
  hidden = await filter_hidden_resources(kbid, show_hidden)
136
- pb_query = suggest_query_to_pb(
136
+ pb_query = await suggest_query_to_pb(
137
+ kbid,
137
138
  features,
138
139
  query,
139
140
  fields,
@@ -61,6 +61,7 @@ from nucliadb.search.search.exceptions import (
61
61
  from nucliadb.search.search.graph_strategy import get_graph_results
62
62
  from nucliadb.search.search.metrics import RAGMetrics
63
63
  from nucliadb.search.search.query import QueryParser
64
+ from nucliadb.search.search.query_parser.old_filters import OldFilterParams
64
65
  from nucliadb.search.utilities import get_predict
65
66
  from nucliadb_models.search import (
66
67
  AnswerAskResponseItem,
@@ -787,8 +788,10 @@ async def retrieval_in_resource(
787
788
  kbid=kbid,
788
789
  features=[],
789
790
  query="",
790
- label_filters=ask_request.filters,
791
- keyword_filters=ask_request.keyword_filters,
791
+ old_filters=OldFilterParams(
792
+ label_filters=ask_request.filters,
793
+ keyword_filters=ask_request.keyword_filters,
794
+ ),
792
795
  top_k=0,
793
796
  min_score=MinScore(),
794
797
  ),
@@ -39,6 +39,7 @@ from nucliadb.search.search.metrics import (
39
39
  RAGMetrics,
40
40
  )
41
41
  from nucliadb.search.search.query import QueryParser
42
+ from nucliadb.search.search.query_parser.old_filters import OldFilterParams
42
43
  from nucliadb.search.search.query_parser.parser import parse_find
43
44
  from nucliadb.search.search.rank_fusion import (
44
45
  RankFusionAlgorithm,
@@ -272,23 +273,25 @@ async def query_parser_from_find_request(
272
273
  features=item.features,
273
274
  query=item.query,
274
275
  query_entities=item.query_entities,
275
- label_filters=item.filters,
276
- keyword_filters=item.keyword_filters,
277
276
  faceted=None,
278
277
  sort=None,
279
278
  top_k=item.top_k,
280
279
  min_score=item.min_score,
281
- range_creation_start=item.range_creation_start,
282
- range_creation_end=item.range_creation_end,
283
- range_modification_start=item.range_modification_start,
284
- range_modification_end=item.range_modification_end,
285
- fields=item.fields,
280
+ old_filters=OldFilterParams(
281
+ label_filters=item.filters,
282
+ keyword_filters=item.keyword_filters,
283
+ range_creation_start=item.range_creation_start,
284
+ range_creation_end=item.range_creation_end,
285
+ range_modification_start=item.range_modification_start,
286
+ range_modification_end=item.range_modification_end,
287
+ fields=item.fields,
288
+ key_filters=item.resource_filters,
289
+ ),
286
290
  user_vector=item.vector,
287
291
  vectorset=item.vectorset,
288
292
  with_duplicates=item.with_duplicates,
289
293
  with_synonyms=item.with_synonyms,
290
294
  autofilter=item.autofilter,
291
- key_filters=item.resource_filters,
292
295
  security=item.security,
293
296
  generative_model=generative_model,
294
297
  rephrase=item.rephrase,
@@ -21,24 +21,19 @@ import asyncio
21
21
  import json
22
22
  import string
23
23
  from datetime import datetime
24
- from typing import Any, Awaitable, Optional, Union
24
+ from typing import Any, Awaitable, Optional
25
25
 
26
26
  from nucliadb.common import datamanagers
27
27
  from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap
28
28
  from nucliadb.search import logger
29
29
  from nucliadb.search.predict import SendToPredictError
30
30
  from nucliadb.search.search.filters import (
31
- convert_to_node_filters,
32
- flatten_filter_literals,
33
- has_classification_label_filters,
34
- split_labels_by_type,
35
31
  translate_label,
36
- translate_label_filters,
37
32
  )
38
33
  from nucliadb.search.search.metrics import (
39
34
  node_features,
40
35
  )
41
- from nucliadb.search.search.query_parser.fetcher import Fetcher, get_classification_labels
36
+ from nucliadb.search.search.query_parser.fetcher import Fetcher
42
37
  from nucliadb.search.search.rank_fusion import (
43
38
  RankFusionAlgorithm,
44
39
  )
@@ -65,6 +60,7 @@ from nucliadb_protos import nodereader_pb2, utils_pb2
65
60
  from nucliadb_protos.noderesources_pb2 import Resource
66
61
 
67
62
  from .exceptions import InvalidQueryError
63
+ from .query_parser.old_filters import OldFilterParams, parse_old_filters
68
64
 
69
65
  INDEX_SORTABLE_FIELDS = [
70
66
  SortField.CREATED,
@@ -92,25 +88,18 @@ class QueryParser:
92
88
  kbid: str,
93
89
  features: list[SearchOptions],
94
90
  query: str,
95
- label_filters: Union[list[str], list[Filter]],
96
- keyword_filters: Union[list[str], list[Filter]],
97
91
  top_k: int,
98
92
  min_score: MinScore,
93
+ old_filters: OldFilterParams,
99
94
  query_entities: Optional[list[KnowledgeGraphEntity]] = None,
100
95
  faceted: Optional[list[str]] = None,
101
96
  sort: Optional[SortOptions] = None,
102
- range_creation_start: Optional[datetime] = None,
103
- range_creation_end: Optional[datetime] = None,
104
- range_modification_start: Optional[datetime] = None,
105
- range_modification_end: Optional[datetime] = None,
106
- fields: Optional[list[str]] = None,
107
97
  user_vector: Optional[list[float]] = None,
108
98
  vectorset: Optional[str] = None,
109
99
  with_duplicates: bool = False,
110
100
  with_status: Optional[ResourceProcessingStatus] = None,
111
101
  with_synonyms: bool = False,
112
102
  autofilter: bool = False,
113
- key_filters: Optional[list[str]] = None,
114
103
  security: Optional[RequestSecurity] = None,
115
104
  generative_model: Optional[str] = None,
116
105
  rephrase: bool = False,
@@ -127,40 +116,28 @@ class QueryParser:
127
116
  self.hidden = hidden
128
117
  if self.hidden is not None:
129
118
  if self.hidden:
130
- label_filters.append(Filter(all=[LABEL_HIDDEN])) # type: ignore
119
+ old_filters.label_filters.append(Filter(all=[LABEL_HIDDEN])) # type: ignore
131
120
  else:
132
- label_filters.append(Filter(none=[LABEL_HIDDEN])) # type: ignore
133
-
134
- self.label_filters: dict[str, Any] = convert_to_node_filters(label_filters)
135
- self.flat_label_filters: list[str] = []
136
- self.keyword_filters: dict[str, Any] = convert_to_node_filters(keyword_filters)
121
+ old_filters.label_filters.append(Filter(none=[LABEL_HIDDEN])) # type: ignore
137
122
  self.faceted = faceted or []
138
123
  self.top_k = top_k
139
124
  self.min_score = min_score
140
125
  self.sort = sort
141
- self.range_creation_start = range_creation_start
142
- self.range_creation_end = range_creation_end
143
- self.range_modification_start = range_modification_start
144
- self.range_modification_end = range_modification_end
145
- self.fields = fields or []
146
126
  self.user_vector = user_vector
147
127
  self.vectorset = vectorset
148
128
  self.with_duplicates = with_duplicates
149
129
  self.with_status = with_status
150
130
  self.with_synonyms = with_synonyms
151
131
  self.autofilter = autofilter
152
- self.key_filters = key_filters
153
132
  self.security = security
154
133
  self.generative_model = generative_model
155
134
  self.rephrase = rephrase
156
135
  self.rephrase_prompt = rephrase_prompt
157
136
  self.query_endpoint_used = False
158
- if len(self.label_filters) > 0:
159
- self.label_filters = translate_label_filters(self.label_filters)
160
- self.flat_label_filters = flatten_filter_literals(self.label_filters)
161
137
  self.max_tokens = max_tokens
162
138
  self.rank_fusion = rank_fusion
163
139
  self.reranker = reranker
140
+ self.old_filters = old_filters
164
141
  self.fetcher = Fetcher(
165
142
  kbid=kbid,
166
143
  query=query,
@@ -197,7 +174,7 @@ class QueryParser:
197
174
  This will schedule concurrent tasks for different data that needs to be pulled
198
175
  for the sake of the query being performed
199
176
  """
200
- if len(self.label_filters) > 0 and has_classification_label_filters(self.flat_label_filters):
177
+ if len(self.old_filters.label_filters) > 0:
201
178
  asyncio.ensure_future(self.fetcher.get_classification_labels())
202
179
 
203
180
  if self.has_vector_search and self.user_vector is None:
@@ -243,25 +220,7 @@ class QueryParser:
243
220
  return request, incomplete, autofilters, rephrased_query
244
221
 
245
222
  async def parse_filters(self, request: nodereader_pb2.SearchRequest) -> None:
246
- if len(self.label_filters) > 0:
247
- field_labels = self.flat_label_filters
248
- paragraph_labels: list[str] = []
249
- if has_classification_label_filters(self.flat_label_filters):
250
- classification_labels = await self.fetcher.get_classification_labels()
251
- field_labels, paragraph_labels = split_labels_by_type(
252
- self.flat_label_filters, classification_labels
253
- )
254
- check_supported_filters(self.label_filters, paragraph_labels)
255
-
256
- request.filter.field_labels.extend(field_labels)
257
- request.filter.paragraph_labels.extend(paragraph_labels)
258
- request.filter.labels_expression = json.dumps(self.label_filters)
259
-
260
- if len(self.keyword_filters) > 0:
261
- request.filter.keywords_expression = json.dumps(self.keyword_filters)
262
-
263
223
  request.faceted.labels.extend([translate_label(facet) for facet in self.faceted])
264
- request.fields.extend(self.fields)
265
224
 
266
225
  if self.security is not None and len(self.security.groups) > 0:
267
226
  security_pb = utils_pb2.Security()
@@ -270,24 +229,15 @@ class QueryParser:
270
229
  security_pb.access_groups.append(group_id)
271
230
  request.security.CopyFrom(security_pb)
272
231
 
273
- if self.key_filters is not None and len(self.key_filters) > 0:
274
- request.key_filters.extend(self.key_filters)
275
- node_features.inc({"type": "key_filters"})
276
-
277
232
  if self.with_status is not None:
278
233
  request.with_status = PROCESSING_STATUS_TO_PB_MAP[self.with_status]
279
234
 
280
- if self.range_creation_start is not None:
281
- request.timestamps.from_created.FromDatetime(self.range_creation_start)
282
-
283
- if self.range_creation_end is not None:
284
- request.timestamps.to_created.FromDatetime(self.range_creation_end)
285
-
286
- if self.range_modification_start is not None:
287
- request.timestamps.from_modified.FromDatetime(self.range_modification_start)
288
-
289
- if self.range_modification_end is not None:
290
- request.timestamps.to_modified.FromDatetime(self.range_modification_end)
235
+ if self.old_filters:
236
+ field_expr, paragraph_expr = await parse_old_filters(self.old_filters, self.fetcher)
237
+ if field_expr is not None:
238
+ request.field_filter.CopyFrom(field_expr)
239
+ if paragraph_expr is not None:
240
+ request.paragraph_filter.CopyFrom(paragraph_expr)
291
241
 
292
242
  def parse_sorting(self, request: nodereader_pb2.SearchRequest) -> None:
293
243
  if len(self.query) == 0:
@@ -533,39 +483,30 @@ async def paragraph_query_to_pb(
533
483
 
534
484
  request.body = query
535
485
 
536
- # we don't have a specific filter only for resource_ids but key_filters
537
- # parse "rid" and "rid/field" like ids, so it does the job
538
- request.key_filters.append(rid)
539
-
540
- if len(filters) > 0:
541
- field_labels = filters
542
- paragraph_labels: list[str] = []
543
- if has_classification_label_filters(filters):
544
- classification_labels = await get_classification_labels(kbid)
545
- field_labels, paragraph_labels = split_labels_by_type(filters, classification_labels)
546
- request.filter.field_labels.extend(field_labels)
547
- request.filter.paragraph_labels.extend(paragraph_labels)
548
-
549
- request.faceted.labels.extend([translate_label(facet) for facet in faceted])
550
- request.fields.extend(fields)
551
-
552
- if sort:
553
- request.order.field = sort
554
- request.order.type = sort_ord # type: ignore
555
-
556
- request.with_duplicates = with_duplicates
557
-
558
- if range_creation_start is not None:
559
- request.timestamps.from_created.FromDatetime(range_creation_start)
560
-
561
- if range_creation_end is not None:
562
- request.timestamps.to_created.FromDatetime(range_creation_end)
563
-
564
- if range_modification_start is not None:
565
- request.timestamps.from_modified.FromDatetime(range_modification_start)
566
-
567
- if range_modification_end is not None:
568
- request.timestamps.to_modified.FromDatetime(range_modification_end)
486
+ old = OldFilterParams(
487
+ label_filters=filters,
488
+ keyword_filters=[],
489
+ range_creation_start=range_creation_start,
490
+ range_creation_end=range_creation_end,
491
+ range_modification_start=range_modification_start,
492
+ range_modification_end=range_modification_end,
493
+ key_filters=[rid],
494
+ fields=fields,
495
+ )
496
+ fetcher = Fetcher(
497
+ kbid,
498
+ query="",
499
+ user_vector=None,
500
+ vectorset=None,
501
+ rephrase=False,
502
+ rephrase_prompt=None,
503
+ generative_model=None,
504
+ )
505
+ field_expr, paragraph_expr = await parse_old_filters(old, fetcher)
506
+ if field_expr is not None:
507
+ request.field_filter.CopyFrom(field_expr)
508
+ if paragraph_expr is not None:
509
+ request.paragraph_filter.CopyFrom(paragraph_expr)
569
510
 
570
511
  return request
571
512
 
@@ -643,7 +584,8 @@ def parse_entities_to_filters(
643
584
  return added_filters
644
585
 
645
586
 
646
- def suggest_query_to_pb(
587
+ async def suggest_query_to_pb(
588
+ kbid: str,
647
589
  features: list[SuggestOptions],
648
590
  query: str,
649
591
  fields: list[str],
@@ -663,29 +605,34 @@ def suggest_query_to_pb(
663
605
 
664
606
  if SuggestOptions.PARAGRAPH in features:
665
607
  request.features.append(nodereader_pb2.SuggestFeatures.PARAGRAPHS)
666
- request.fields.extend(fields)
667
608
 
668
- if hidden is not None:
669
- if hidden:
670
- filters.append(Filter(all=[LABEL_HIDDEN])) # type: ignore
671
- else:
672
- filters.append(Filter(none=[LABEL_HIDDEN])) # type: ignore
673
-
674
- expression = convert_to_node_filters(filters)
675
- if expression:
676
- expression = translate_label_filters(expression)
677
-
678
- request.filter.field_labels.extend(flatten_filter_literals(expression))
679
- request.filter.labels_expression = json.dumps(expression)
680
-
681
- if range_creation_start is not None:
682
- request.timestamps.from_created.FromDatetime(range_creation_start)
683
- if range_creation_end is not None:
684
- request.timestamps.to_created.FromDatetime(range_creation_end)
685
- if range_modification_start is not None:
686
- request.timestamps.from_modified.FromDatetime(range_modification_start)
687
- if range_modification_end is not None:
688
- request.timestamps.to_modified.FromDatetime(range_modification_end)
609
+ if hidden is not None:
610
+ if hidden:
611
+ filters.append(Filter(all=[LABEL_HIDDEN])) # type: ignore
612
+ else:
613
+ filters.append(Filter(none=[LABEL_HIDDEN])) # type: ignore
614
+
615
+ old = OldFilterParams(
616
+ label_filters=filters,
617
+ keyword_filters=[],
618
+ range_creation_start=range_creation_start,
619
+ range_creation_end=range_creation_end,
620
+ range_modification_start=range_modification_start,
621
+ range_modification_end=range_modification_end,
622
+ fields=fields,
623
+ )
624
+ fetcher = Fetcher(
625
+ kbid,
626
+ query="",
627
+ user_vector=None,
628
+ vectorset=None,
629
+ rephrase=False,
630
+ rephrase_prompt=None,
631
+ generative_model=None,
632
+ )
633
+ field_expr, _ = await parse_old_filters(old, fetcher)
634
+ if field_expr is not None:
635
+ request.field_filter.CopyFrom(field_expr)
689
636
 
690
637
  return request
691
638
 
@@ -0,0 +1,260 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ from dataclasses import dataclass
22
+ from datetime import datetime
23
+ from typing import Optional, Union
24
+
25
+ from nucliadb.search.search.filters import translate_label
26
+ from nucliadb_models.search import (
27
+ Filter,
28
+ )
29
+ from nucliadb_protos import knowledgebox_pb2
30
+ from nucliadb_protos.nodereader_pb2 import FilterExpression
31
+
32
+ from .exceptions import InvalidQueryError
33
+ from .fetcher import Fetcher
34
+
35
+
36
+ @dataclass
37
+ class OldFilterParams:
38
+ label_filters: Union[list[str], list[Filter]]
39
+ keyword_filters: Union[list[str], list[Filter]]
40
+ range_creation_start: Optional[datetime] = None
41
+ range_creation_end: Optional[datetime] = None
42
+ range_modification_start: Optional[datetime] = None
43
+ range_modification_end: Optional[datetime] = None
44
+ fields: Optional[list[str]] = None
45
+ key_filters: Optional[list[str]] = None
46
+
47
+
48
+ async def parse_old_filters(
49
+ old: OldFilterParams, fetcher: Fetcher
50
+ ) -> tuple[Optional[FilterExpression], Optional[FilterExpression]]:
51
+ filters = []
52
+ paragraph_filter_expression = None
53
+
54
+ # Labels
55
+ if old.label_filters:
56
+ classification_labels = await fetcher.get_classification_labels()
57
+
58
+ paragraph_exprs = []
59
+ for fltr in old.label_filters:
60
+ field_expr, paragraph_expr = convert_label_filter_to_expressions(fltr, classification_labels)
61
+ if field_expr:
62
+ filters.append(field_expr)
63
+ if paragraph_expr:
64
+ paragraph_exprs.append(paragraph_expr)
65
+
66
+ if len(paragraph_exprs) == 1:
67
+ paragraph_filter_expression = paragraph_exprs[0]
68
+ elif len(paragraph_exprs) > 1:
69
+ paragraph_filter_expression = FilterExpression()
70
+ paragraph_filter_expression.bool_and.operands.extend(paragraph_exprs)
71
+
72
+ # Keywords
73
+ if old.keyword_filters:
74
+ for fltr in old.keyword_filters:
75
+ filters.append(convert_keyword_filter_to_expression(fltr))
76
+
77
+ # Timestamps
78
+ if old.range_creation_start is not None or old.range_creation_end is not None:
79
+ f = FilterExpression()
80
+ f.date.field = FilterExpression.DateRangeFilter.DateField.CREATED
81
+ if old.range_creation_start is not None:
82
+ f.date.since.FromDatetime(old.range_creation_start)
83
+ if old.range_creation_end is not None:
84
+ f.date.until.FromDatetime(old.range_creation_end)
85
+ filters.append(f)
86
+
87
+ if old.range_modification_start is not None or old.range_modification_end is not None:
88
+ f = FilterExpression()
89
+ f.date.field = FilterExpression.DateRangeFilter.DateField.MODIFIED
90
+ if old.range_modification_start is not None:
91
+ f.date.since.FromDatetime(old.range_modification_start)
92
+ if old.range_modification_end is not None:
93
+ f.date.until.FromDatetime(old.range_modification_end)
94
+ filters.append(f)
95
+
96
+ # Fields
97
+ if old.fields:
98
+ field_filters = []
99
+ for field in old.fields:
100
+ parts = field.split("/")
101
+ f = FilterExpression()
102
+ f.field.field_type = parts[0]
103
+ if len(parts) > 1:
104
+ f.field.field_id = parts[1]
105
+ field_filters.append(f)
106
+
107
+ if len(field_filters) > 1:
108
+ f = FilterExpression()
109
+ f.bool_or.operands.extend(field_filters)
110
+ filters.append(f)
111
+ else:
112
+ filters.append(field_filters[0])
113
+
114
+ # Key filter
115
+ if old.key_filters:
116
+ key_exprs = []
117
+ for key in old.key_filters:
118
+ expr = FilterExpression()
119
+ parts = key.split("/")
120
+ if len(parts) == 1:
121
+ expr.resource.resource_id = parts[0]
122
+ else:
123
+ r = FilterExpression()
124
+ r.resource.resource_id = parts[0]
125
+ expr.bool_and.operands.append(r)
126
+ f = FilterExpression()
127
+ f.field.field_type = parts[1]
128
+ if len(parts) > 2:
129
+ f.field.field_id = parts[2]
130
+ key_exprs.append(expr)
131
+
132
+ if len(key_exprs) == 1:
133
+ filters.append(key_exprs[0])
134
+ elif len(key_exprs) > 1:
135
+ f = FilterExpression()
136
+ f.bool_or.operands.extend(key_exprs)
137
+ filters.append(f)
138
+
139
+ # Build filter
140
+ if len(filters) == 0:
141
+ return None, paragraph_filter_expression
142
+ elif len(filters) == 1:
143
+ return filters[0], paragraph_filter_expression
144
+ else:
145
+ f = FilterExpression()
146
+ f.bool_and.operands.extend(filters)
147
+ return f, paragraph_filter_expression
148
+
149
+
150
+ def convert_label_filter_to_expressions(
151
+ fltr: Union[str, Filter], classification_labels: knowledgebox_pb2.Labels
152
+ ) -> tuple[Optional[FilterExpression], Optional[FilterExpression]]:
153
+ if isinstance(fltr, str):
154
+ fltr = translate_label(fltr)
155
+ f = FilterExpression()
156
+ f.facet.facet = fltr
157
+ if is_paragraph_label(fltr, classification_labels):
158
+ return None, f
159
+ else:
160
+ return f, None
161
+
162
+ if fltr.all:
163
+ return split_labels(fltr.all, classification_labels, "bool_and", negate=False)
164
+ if fltr.any:
165
+ return split_labels(fltr.any, classification_labels, "bool_or", negate=False)
166
+ if fltr.none:
167
+ return split_labels(fltr.none, classification_labels, "bool_and", negate=True)
168
+ if fltr.not_all:
169
+ return split_labels(fltr.not_all, classification_labels, "bool_or", negate=True)
170
+
171
+ return None, None
172
+
173
+
174
+ def split_labels(
175
+ labels: list[str], classification_labels: knowledgebox_pb2.Labels, combinator: str, negate: bool
176
+ ) -> tuple[Optional[FilterExpression], Optional[FilterExpression]]:
177
+ field = []
178
+ paragraph = []
179
+ for label in labels:
180
+ label = translate_label(label)
181
+ expr = FilterExpression()
182
+ if negate:
183
+ expr.bool_not.facet.facet = label
184
+ else:
185
+ expr.facet.facet = label
186
+
187
+ if is_paragraph_label(label, classification_labels):
188
+ paragraph.append(expr)
189
+ else:
190
+ field.append(expr)
191
+
192
+ if len(field) == 0:
193
+ field_expr = None
194
+ elif len(field) == 1:
195
+ field_expr = field[0]
196
+ else:
197
+ field_expr = FilterExpression()
198
+ filter_list = getattr(field_expr, combinator)
199
+ filter_list.operands.extend(field)
200
+
201
+ if len(paragraph) > 0 and combinator == "bool_or":
202
+ raise InvalidQueryError(
203
+ "filters",
204
+ "Paragraph labels can only be used with 'all' filter",
205
+ )
206
+
207
+ if len(paragraph) == 0:
208
+ paragraph_expr = None
209
+ elif len(paragraph) == 1:
210
+ paragraph_expr = paragraph[0]
211
+ else:
212
+ paragraph_expr = FilterExpression()
213
+ filter_list = getattr(paragraph_expr, combinator)
214
+ filter_list.extend(paragraph)
215
+
216
+ return field_expr, paragraph_expr
217
+
218
+
219
+ def is_paragraph_label(label: str, classification_labels: knowledgebox_pb2.Labels) -> bool:
220
+ if len(label) == 0 or label[0] != "/":
221
+ return False
222
+ if not label.startswith("/l/"):
223
+ return False
224
+ # Classification labels should have the form /l/labelset/label
225
+ parts = label.split("/")
226
+ if len(parts) < 4:
227
+ return False
228
+ labelset_id = parts[2]
229
+
230
+ try:
231
+ labelset: Optional[knowledgebox_pb2.LabelSet] = classification_labels.labelset.get(labelset_id)
232
+ if labelset is None:
233
+ return False
234
+ return knowledgebox_pb2.LabelSet.LabelSetKind.PARAGRAPHS in labelset.kind
235
+ except KeyError:
236
+ # labelset_id not found
237
+ return False
238
+
239
+
240
+ def convert_keyword_filter_to_expression(fltr: Union[str, Filter]) -> FilterExpression:
241
+ if isinstance(fltr, str):
242
+ return convert_keyword_to_expression(fltr)
243
+
244
+ f = FilterExpression()
245
+ if fltr.all:
246
+ f.bool_and.operands.extend((convert_keyword_to_expression(f) for f in fltr.all))
247
+ if fltr.any:
248
+ f.bool_or.operands.extend((convert_keyword_to_expression(f) for f in fltr.any))
249
+ if fltr.none:
250
+ f.bool_not.bool_or.operands.extend((convert_keyword_to_expression(f) for f in fltr.none))
251
+ if fltr.not_all:
252
+ f.bool_not.bool_and.operands.extend((convert_keyword_to_expression(f) for f in fltr.not_all))
253
+
254
+ return f
255
+
256
+
257
+ def convert_keyword_to_expression(keyword: str) -> FilterExpression:
258
+ f = FilterExpression()
259
+ f.keyword.keyword = keyword
260
+ return f
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb
3
- Version: 6.2.1.post3328
3
+ Version: 6.2.1.post3331
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3328
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3328
25
- Requires-Dist: nucliadb-protos>=6.2.1.post3328
26
- Requires-Dist: nucliadb-models>=6.2.1.post3328
27
- Requires-Dist: nidx-protos>=6.2.1.post3328
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3331
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3331
25
+ Requires-Dist: nucliadb-protos>=6.2.1.post3331
26
+ Requires-Dist: nucliadb-models>=6.2.1.post3331
27
+ Requires-Dist: nidx-protos>=6.2.1.post3331
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn
@@ -196,8 +196,8 @@ nucliadb/search/api/v1/find.py,sha256=DsnWkySu_cFajDWJIxN8DYvLL_Rm2yiCjHD8TsqPfR
196
196
  nucliadb/search/api/v1/knowledgebox.py,sha256=rWhx3PYWryingu19qwwFDbVvVYynq5Ky23FSlzmTutQ,8721
197
197
  nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
198
198
  nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
199
- nucliadb/search/api/v1/search.py,sha256=k80su3UaucHiYflQFSilW10QW4ZxRVRp0kT8zrJXbcU,13498
200
- nucliadb/search/api/v1/suggest.py,sha256=S0YUTAWukzZSYZJzN3T5MUgPM3599HQvG76GOCBuAbQ,5907
199
+ nucliadb/search/api/v1/search.py,sha256=CBszxidsfoLmrCbVL8cuZboWJlmt_GAfpjix0Tdp_yA,13650
200
+ nucliadb/search/api/v1/suggest.py,sha256=Cc1vvT5AFEq0j6_IAx5jSVekMJpIi6kY-DWhzi7vkLg,5931
201
201
  nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
202
202
  nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
203
203
  nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -213,7 +213,7 @@ nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298
213
213
  nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
214
214
  nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_QziX4,6184
215
215
  nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
216
- nucliadb/search/search/find.py,sha256=AocqiH_mWvF_szUaW0ONqWrZAbX-k_VhM0Lpv7D669M,10030
216
+ nucliadb/search/search/find.py,sha256=BC_5p3oRKeQBVmuFCDdCd36Qr2TqJZPIjC0XfRuDJWU,10186
217
217
  nucliadb/search/search/find_merge.py,sha256=3FnzKFEnVemg6FO_6zveulbAU7klvsiPEBvLrpBBMg8,17450
218
218
  nucliadb/search/search/graph_strategy.py,sha256=ahwcUTQZ0Ll-rnS285DO9PmRyiM-1p4BM3UvmOYVwhM,31750
219
219
  nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
@@ -223,14 +223,14 @@ nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUH
223
223
  nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
224
224
  nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
225
225
  nucliadb/search/search/predict_proxy.py,sha256=xBlh6kjuQpWRq7KsBx4pEl2PtnwljjQIiYMaTWpcCSA,3015
226
- nucliadb/search/search/query.py,sha256=AlhRw4Mick4Oab5HsKHaQpBXsVc_UUY5IpkUIwsFfU8,30577
226
+ nucliadb/search/search/query.py,sha256=vSnnqJPB5iHI7kugaL6boH1l4j5HV6ln4EKbskJnFWw,27346
227
227
  nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
228
228
  nucliadb/search/search/rerankers.py,sha256=3vep4EOVNeDJGsMdx-1g6Ar4ZGJG3IHym3HkxnbwtAQ,7321
229
229
  nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
230
230
  nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
231
231
  nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
232
232
  nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
233
- nucliadb/search/search/chat/ask.py,sha256=SfnixWdSdwCE8o8zN0_bYpRuqw0VPc62DF4Wbft_GvQ,36303
233
+ nucliadb/search/search/chat/ask.py,sha256=q0CSXQJs69z52XWleIklcn5_bvHJQT7z94XUqKW506Y,36451
234
234
  nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
235
235
  nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
236
236
  nucliadb/search/search/chat/prompt.py,sha256=Jnja-Ss7skgnnDY8BymVfdeYsFPnIQFL8tEvcRXTKUE,47356
@@ -239,6 +239,7 @@ nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyv
239
239
  nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
240
240
  nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
241
241
  nucliadb/search/search/query_parser/models.py,sha256=2iWuTcH24RDF8xokgXr0j5qbMoURQ1TFyqJIYs16LqU,2283
242
+ nucliadb/search/search/query_parser/old_filters.py,sha256=-zbfN-RsXoj_DRjh3Lfp-wShwFXgkISawzVptVzja-A,9071
242
243
  nucliadb/search/search/query_parser/parser.py,sha256=m6meq5QQO_ofdtbrvEORsZLjxURWfRR0dINrgDXmYRg,6323
243
244
  nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
244
245
  nucliadb/standalone/api_router.py,sha256=4-g-eEq27nL6vKCLRCoV0Pxf-L273N-eHeEX2vI9qgg,6215
@@ -335,8 +336,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
335
336
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
336
337
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
337
338
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
338
- nucliadb-6.2.1.post3328.dist-info/METADATA,sha256=BIvljbBrmj9EjvUGSuI1e3xmkgAAUtfIy023SRqCKO8,4291
339
- nucliadb-6.2.1.post3328.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
340
- nucliadb-6.2.1.post3328.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
341
- nucliadb-6.2.1.post3328.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
342
- nucliadb-6.2.1.post3328.dist-info/RECORD,,
339
+ nucliadb-6.2.1.post3331.dist-info/METADATA,sha256=SRTN3yRk9RMQE2QmoZnEDVQOkAEfcT-YqNUbXYbn_rc,4291
340
+ nucliadb-6.2.1.post3331.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
341
+ nucliadb-6.2.1.post3331.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
342
+ nucliadb-6.2.1.post3331.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
343
+ nucliadb-6.2.1.post3331.dist-info/RECORD,,