nucliadb 6.2.1.post3328__py3-none-any.whl → 6.2.1.post3332__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/search/api/v1/search.py +10 -7
- nucliadb/search/api/v1/suggest.py +2 -1
- nucliadb/search/search/chat/ask.py +5 -2
- nucliadb/search/search/find.py +11 -8
- nucliadb/search/search/query.py +67 -120
- nucliadb/search/search/query_parser/old_filters.py +260 -0
- {nucliadb-6.2.1.post3328.dist-info → nucliadb-6.2.1.post3332.dist-info}/METADATA +6 -6
- {nucliadb-6.2.1.post3328.dist-info → nucliadb-6.2.1.post3332.dist-info}/RECORD +11 -10
- {nucliadb-6.2.1.post3328.dist-info → nucliadb-6.2.1.post3332.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post3328.dist-info → nucliadb-6.2.1.post3332.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post3328.dist-info → nucliadb-6.2.1.post3332.dist-info}/top_level.txt +0 -0
nucliadb/search/api/v1/search.py
CHANGED
@@ -37,6 +37,7 @@ from nucliadb.search.search import cache
|
|
37
37
|
from nucliadb.search.search.exceptions import InvalidQueryError
|
38
38
|
from nucliadb.search.search.merge import merge_results
|
39
39
|
from nucliadb.search.search.query import QueryParser
|
40
|
+
from nucliadb.search.search.query_parser.old_filters import OldFilterParams
|
40
41
|
from nucliadb.search.search.utils import (
|
41
42
|
filter_hidden_resources,
|
42
43
|
min_score_from_payload,
|
@@ -269,17 +270,19 @@ async def search(
|
|
269
270
|
kbid=kbid,
|
270
271
|
features=item.features,
|
271
272
|
query=item.query,
|
272
|
-
label_filters=item.filters,
|
273
|
-
keyword_filters=[],
|
274
273
|
faceted=item.faceted,
|
275
274
|
sort=item.sort,
|
276
275
|
top_k=item.top_k,
|
277
276
|
min_score=item.min_score,
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
277
|
+
old_filters=OldFilterParams(
|
278
|
+
label_filters=item.filters,
|
279
|
+
keyword_filters=[],
|
280
|
+
range_creation_start=item.range_creation_start,
|
281
|
+
range_creation_end=item.range_creation_end,
|
282
|
+
range_modification_start=item.range_modification_start,
|
283
|
+
range_modification_end=item.range_modification_end,
|
284
|
+
fields=item.fields,
|
285
|
+
),
|
283
286
|
user_vector=item.vector,
|
284
287
|
vectorset=item.vectorset,
|
285
288
|
with_duplicates=item.with_duplicates,
|
@@ -133,7 +133,8 @@ async def suggest(
|
|
133
133
|
) -> KnowledgeboxSuggestResults:
|
134
134
|
with cache.request_caches():
|
135
135
|
hidden = await filter_hidden_resources(kbid, show_hidden)
|
136
|
-
pb_query = suggest_query_to_pb(
|
136
|
+
pb_query = await suggest_query_to_pb(
|
137
|
+
kbid,
|
137
138
|
features,
|
138
139
|
query,
|
139
140
|
fields,
|
@@ -61,6 +61,7 @@ from nucliadb.search.search.exceptions import (
|
|
61
61
|
from nucliadb.search.search.graph_strategy import get_graph_results
|
62
62
|
from nucliadb.search.search.metrics import RAGMetrics
|
63
63
|
from nucliadb.search.search.query import QueryParser
|
64
|
+
from nucliadb.search.search.query_parser.old_filters import OldFilterParams
|
64
65
|
from nucliadb.search.utilities import get_predict
|
65
66
|
from nucliadb_models.search import (
|
66
67
|
AnswerAskResponseItem,
|
@@ -787,8 +788,10 @@ async def retrieval_in_resource(
|
|
787
788
|
kbid=kbid,
|
788
789
|
features=[],
|
789
790
|
query="",
|
790
|
-
|
791
|
-
|
791
|
+
old_filters=OldFilterParams(
|
792
|
+
label_filters=ask_request.filters,
|
793
|
+
keyword_filters=ask_request.keyword_filters,
|
794
|
+
),
|
792
795
|
top_k=0,
|
793
796
|
min_score=MinScore(),
|
794
797
|
),
|
nucliadb/search/search/find.py
CHANGED
@@ -39,6 +39,7 @@ from nucliadb.search.search.metrics import (
|
|
39
39
|
RAGMetrics,
|
40
40
|
)
|
41
41
|
from nucliadb.search.search.query import QueryParser
|
42
|
+
from nucliadb.search.search.query_parser.old_filters import OldFilterParams
|
42
43
|
from nucliadb.search.search.query_parser.parser import parse_find
|
43
44
|
from nucliadb.search.search.rank_fusion import (
|
44
45
|
RankFusionAlgorithm,
|
@@ -272,23 +273,25 @@ async def query_parser_from_find_request(
|
|
272
273
|
features=item.features,
|
273
274
|
query=item.query,
|
274
275
|
query_entities=item.query_entities,
|
275
|
-
label_filters=item.filters,
|
276
|
-
keyword_filters=item.keyword_filters,
|
277
276
|
faceted=None,
|
278
277
|
sort=None,
|
279
278
|
top_k=item.top_k,
|
280
279
|
min_score=item.min_score,
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
280
|
+
old_filters=OldFilterParams(
|
281
|
+
label_filters=item.filters,
|
282
|
+
keyword_filters=item.keyword_filters,
|
283
|
+
range_creation_start=item.range_creation_start,
|
284
|
+
range_creation_end=item.range_creation_end,
|
285
|
+
range_modification_start=item.range_modification_start,
|
286
|
+
range_modification_end=item.range_modification_end,
|
287
|
+
fields=item.fields,
|
288
|
+
key_filters=item.resource_filters,
|
289
|
+
),
|
286
290
|
user_vector=item.vector,
|
287
291
|
vectorset=item.vectorset,
|
288
292
|
with_duplicates=item.with_duplicates,
|
289
293
|
with_synonyms=item.with_synonyms,
|
290
294
|
autofilter=item.autofilter,
|
291
|
-
key_filters=item.resource_filters,
|
292
295
|
security=item.security,
|
293
296
|
generative_model=generative_model,
|
294
297
|
rephrase=item.rephrase,
|
nucliadb/search/search/query.py
CHANGED
@@ -21,24 +21,19 @@ import asyncio
|
|
21
21
|
import json
|
22
22
|
import string
|
23
23
|
from datetime import datetime
|
24
|
-
from typing import Any, Awaitable, Optional
|
24
|
+
from typing import Any, Awaitable, Optional
|
25
25
|
|
26
26
|
from nucliadb.common import datamanagers
|
27
27
|
from nucliadb.common.models_utils.from_proto import RelationNodeTypeMap
|
28
28
|
from nucliadb.search import logger
|
29
29
|
from nucliadb.search.predict import SendToPredictError
|
30
30
|
from nucliadb.search.search.filters import (
|
31
|
-
convert_to_node_filters,
|
32
|
-
flatten_filter_literals,
|
33
|
-
has_classification_label_filters,
|
34
|
-
split_labels_by_type,
|
35
31
|
translate_label,
|
36
|
-
translate_label_filters,
|
37
32
|
)
|
38
33
|
from nucliadb.search.search.metrics import (
|
39
34
|
node_features,
|
40
35
|
)
|
41
|
-
from nucliadb.search.search.query_parser.fetcher import Fetcher
|
36
|
+
from nucliadb.search.search.query_parser.fetcher import Fetcher
|
42
37
|
from nucliadb.search.search.rank_fusion import (
|
43
38
|
RankFusionAlgorithm,
|
44
39
|
)
|
@@ -65,6 +60,7 @@ from nucliadb_protos import nodereader_pb2, utils_pb2
|
|
65
60
|
from nucliadb_protos.noderesources_pb2 import Resource
|
66
61
|
|
67
62
|
from .exceptions import InvalidQueryError
|
63
|
+
from .query_parser.old_filters import OldFilterParams, parse_old_filters
|
68
64
|
|
69
65
|
INDEX_SORTABLE_FIELDS = [
|
70
66
|
SortField.CREATED,
|
@@ -92,25 +88,18 @@ class QueryParser:
|
|
92
88
|
kbid: str,
|
93
89
|
features: list[SearchOptions],
|
94
90
|
query: str,
|
95
|
-
label_filters: Union[list[str], list[Filter]],
|
96
|
-
keyword_filters: Union[list[str], list[Filter]],
|
97
91
|
top_k: int,
|
98
92
|
min_score: MinScore,
|
93
|
+
old_filters: OldFilterParams,
|
99
94
|
query_entities: Optional[list[KnowledgeGraphEntity]] = None,
|
100
95
|
faceted: Optional[list[str]] = None,
|
101
96
|
sort: Optional[SortOptions] = None,
|
102
|
-
range_creation_start: Optional[datetime] = None,
|
103
|
-
range_creation_end: Optional[datetime] = None,
|
104
|
-
range_modification_start: Optional[datetime] = None,
|
105
|
-
range_modification_end: Optional[datetime] = None,
|
106
|
-
fields: Optional[list[str]] = None,
|
107
97
|
user_vector: Optional[list[float]] = None,
|
108
98
|
vectorset: Optional[str] = None,
|
109
99
|
with_duplicates: bool = False,
|
110
100
|
with_status: Optional[ResourceProcessingStatus] = None,
|
111
101
|
with_synonyms: bool = False,
|
112
102
|
autofilter: bool = False,
|
113
|
-
key_filters: Optional[list[str]] = None,
|
114
103
|
security: Optional[RequestSecurity] = None,
|
115
104
|
generative_model: Optional[str] = None,
|
116
105
|
rephrase: bool = False,
|
@@ -127,40 +116,28 @@ class QueryParser:
|
|
127
116
|
self.hidden = hidden
|
128
117
|
if self.hidden is not None:
|
129
118
|
if self.hidden:
|
130
|
-
label_filters.append(Filter(all=[LABEL_HIDDEN])) # type: ignore
|
119
|
+
old_filters.label_filters.append(Filter(all=[LABEL_HIDDEN])) # type: ignore
|
131
120
|
else:
|
132
|
-
label_filters.append(Filter(none=[LABEL_HIDDEN])) # type: ignore
|
133
|
-
|
134
|
-
self.label_filters: dict[str, Any] = convert_to_node_filters(label_filters)
|
135
|
-
self.flat_label_filters: list[str] = []
|
136
|
-
self.keyword_filters: dict[str, Any] = convert_to_node_filters(keyword_filters)
|
121
|
+
old_filters.label_filters.append(Filter(none=[LABEL_HIDDEN])) # type: ignore
|
137
122
|
self.faceted = faceted or []
|
138
123
|
self.top_k = top_k
|
139
124
|
self.min_score = min_score
|
140
125
|
self.sort = sort
|
141
|
-
self.range_creation_start = range_creation_start
|
142
|
-
self.range_creation_end = range_creation_end
|
143
|
-
self.range_modification_start = range_modification_start
|
144
|
-
self.range_modification_end = range_modification_end
|
145
|
-
self.fields = fields or []
|
146
126
|
self.user_vector = user_vector
|
147
127
|
self.vectorset = vectorset
|
148
128
|
self.with_duplicates = with_duplicates
|
149
129
|
self.with_status = with_status
|
150
130
|
self.with_synonyms = with_synonyms
|
151
131
|
self.autofilter = autofilter
|
152
|
-
self.key_filters = key_filters
|
153
132
|
self.security = security
|
154
133
|
self.generative_model = generative_model
|
155
134
|
self.rephrase = rephrase
|
156
135
|
self.rephrase_prompt = rephrase_prompt
|
157
136
|
self.query_endpoint_used = False
|
158
|
-
if len(self.label_filters) > 0:
|
159
|
-
self.label_filters = translate_label_filters(self.label_filters)
|
160
|
-
self.flat_label_filters = flatten_filter_literals(self.label_filters)
|
161
137
|
self.max_tokens = max_tokens
|
162
138
|
self.rank_fusion = rank_fusion
|
163
139
|
self.reranker = reranker
|
140
|
+
self.old_filters = old_filters
|
164
141
|
self.fetcher = Fetcher(
|
165
142
|
kbid=kbid,
|
166
143
|
query=query,
|
@@ -197,7 +174,7 @@ class QueryParser:
|
|
197
174
|
This will schedule concurrent tasks for different data that needs to be pulled
|
198
175
|
for the sake of the query being performed
|
199
176
|
"""
|
200
|
-
if len(self.label_filters) > 0
|
177
|
+
if len(self.old_filters.label_filters) > 0:
|
201
178
|
asyncio.ensure_future(self.fetcher.get_classification_labels())
|
202
179
|
|
203
180
|
if self.has_vector_search and self.user_vector is None:
|
@@ -243,25 +220,7 @@ class QueryParser:
|
|
243
220
|
return request, incomplete, autofilters, rephrased_query
|
244
221
|
|
245
222
|
async def parse_filters(self, request: nodereader_pb2.SearchRequest) -> None:
|
246
|
-
if len(self.label_filters) > 0:
|
247
|
-
field_labels = self.flat_label_filters
|
248
|
-
paragraph_labels: list[str] = []
|
249
|
-
if has_classification_label_filters(self.flat_label_filters):
|
250
|
-
classification_labels = await self.fetcher.get_classification_labels()
|
251
|
-
field_labels, paragraph_labels = split_labels_by_type(
|
252
|
-
self.flat_label_filters, classification_labels
|
253
|
-
)
|
254
|
-
check_supported_filters(self.label_filters, paragraph_labels)
|
255
|
-
|
256
|
-
request.filter.field_labels.extend(field_labels)
|
257
|
-
request.filter.paragraph_labels.extend(paragraph_labels)
|
258
|
-
request.filter.labels_expression = json.dumps(self.label_filters)
|
259
|
-
|
260
|
-
if len(self.keyword_filters) > 0:
|
261
|
-
request.filter.keywords_expression = json.dumps(self.keyword_filters)
|
262
|
-
|
263
223
|
request.faceted.labels.extend([translate_label(facet) for facet in self.faceted])
|
264
|
-
request.fields.extend(self.fields)
|
265
224
|
|
266
225
|
if self.security is not None and len(self.security.groups) > 0:
|
267
226
|
security_pb = utils_pb2.Security()
|
@@ -270,24 +229,15 @@ class QueryParser:
|
|
270
229
|
security_pb.access_groups.append(group_id)
|
271
230
|
request.security.CopyFrom(security_pb)
|
272
231
|
|
273
|
-
if self.key_filters is not None and len(self.key_filters) > 0:
|
274
|
-
request.key_filters.extend(self.key_filters)
|
275
|
-
node_features.inc({"type": "key_filters"})
|
276
|
-
|
277
232
|
if self.with_status is not None:
|
278
233
|
request.with_status = PROCESSING_STATUS_TO_PB_MAP[self.with_status]
|
279
234
|
|
280
|
-
if self.
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
if self.range_modification_start is not None:
|
287
|
-
request.timestamps.from_modified.FromDatetime(self.range_modification_start)
|
288
|
-
|
289
|
-
if self.range_modification_end is not None:
|
290
|
-
request.timestamps.to_modified.FromDatetime(self.range_modification_end)
|
235
|
+
if self.old_filters:
|
236
|
+
field_expr, paragraph_expr = await parse_old_filters(self.old_filters, self.fetcher)
|
237
|
+
if field_expr is not None:
|
238
|
+
request.field_filter.CopyFrom(field_expr)
|
239
|
+
if paragraph_expr is not None:
|
240
|
+
request.paragraph_filter.CopyFrom(paragraph_expr)
|
291
241
|
|
292
242
|
def parse_sorting(self, request: nodereader_pb2.SearchRequest) -> None:
|
293
243
|
if len(self.query) == 0:
|
@@ -533,39 +483,30 @@ async def paragraph_query_to_pb(
|
|
533
483
|
|
534
484
|
request.body = query
|
535
485
|
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
if
|
559
|
-
request.
|
560
|
-
|
561
|
-
if range_creation_end is not None:
|
562
|
-
request.timestamps.to_created.FromDatetime(range_creation_end)
|
563
|
-
|
564
|
-
if range_modification_start is not None:
|
565
|
-
request.timestamps.from_modified.FromDatetime(range_modification_start)
|
566
|
-
|
567
|
-
if range_modification_end is not None:
|
568
|
-
request.timestamps.to_modified.FromDatetime(range_modification_end)
|
486
|
+
old = OldFilterParams(
|
487
|
+
label_filters=filters,
|
488
|
+
keyword_filters=[],
|
489
|
+
range_creation_start=range_creation_start,
|
490
|
+
range_creation_end=range_creation_end,
|
491
|
+
range_modification_start=range_modification_start,
|
492
|
+
range_modification_end=range_modification_end,
|
493
|
+
key_filters=[rid],
|
494
|
+
fields=fields,
|
495
|
+
)
|
496
|
+
fetcher = Fetcher(
|
497
|
+
kbid,
|
498
|
+
query="",
|
499
|
+
user_vector=None,
|
500
|
+
vectorset=None,
|
501
|
+
rephrase=False,
|
502
|
+
rephrase_prompt=None,
|
503
|
+
generative_model=None,
|
504
|
+
)
|
505
|
+
field_expr, paragraph_expr = await parse_old_filters(old, fetcher)
|
506
|
+
if field_expr is not None:
|
507
|
+
request.field_filter.CopyFrom(field_expr)
|
508
|
+
if paragraph_expr is not None:
|
509
|
+
request.paragraph_filter.CopyFrom(paragraph_expr)
|
569
510
|
|
570
511
|
return request
|
571
512
|
|
@@ -643,7 +584,8 @@ def parse_entities_to_filters(
|
|
643
584
|
return added_filters
|
644
585
|
|
645
586
|
|
646
|
-
def suggest_query_to_pb(
|
587
|
+
async def suggest_query_to_pb(
|
588
|
+
kbid: str,
|
647
589
|
features: list[SuggestOptions],
|
648
590
|
query: str,
|
649
591
|
fields: list[str],
|
@@ -663,29 +605,34 @@ def suggest_query_to_pb(
|
|
663
605
|
|
664
606
|
if SuggestOptions.PARAGRAPH in features:
|
665
607
|
request.features.append(nodereader_pb2.SuggestFeatures.PARAGRAPHS)
|
666
|
-
request.fields.extend(fields)
|
667
608
|
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
609
|
+
if hidden is not None:
|
610
|
+
if hidden:
|
611
|
+
filters.append(Filter(all=[LABEL_HIDDEN])) # type: ignore
|
612
|
+
else:
|
613
|
+
filters.append(Filter(none=[LABEL_HIDDEN])) # type: ignore
|
614
|
+
|
615
|
+
old = OldFilterParams(
|
616
|
+
label_filters=filters,
|
617
|
+
keyword_filters=[],
|
618
|
+
range_creation_start=range_creation_start,
|
619
|
+
range_creation_end=range_creation_end,
|
620
|
+
range_modification_start=range_modification_start,
|
621
|
+
range_modification_end=range_modification_end,
|
622
|
+
fields=fields,
|
623
|
+
)
|
624
|
+
fetcher = Fetcher(
|
625
|
+
kbid,
|
626
|
+
query="",
|
627
|
+
user_vector=None,
|
628
|
+
vectorset=None,
|
629
|
+
rephrase=False,
|
630
|
+
rephrase_prompt=None,
|
631
|
+
generative_model=None,
|
632
|
+
)
|
633
|
+
field_expr, _ = await parse_old_filters(old, fetcher)
|
634
|
+
if field_expr is not None:
|
635
|
+
request.field_filter.CopyFrom(field_expr)
|
689
636
|
|
690
637
|
return request
|
691
638
|
|
@@ -0,0 +1,260 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
from dataclasses import dataclass
|
22
|
+
from datetime import datetime
|
23
|
+
from typing import Optional, Union
|
24
|
+
|
25
|
+
from nucliadb.search.search.filters import translate_label
|
26
|
+
from nucliadb_models.search import (
|
27
|
+
Filter,
|
28
|
+
)
|
29
|
+
from nucliadb_protos import knowledgebox_pb2
|
30
|
+
from nucliadb_protos.nodereader_pb2 import FilterExpression
|
31
|
+
|
32
|
+
from .exceptions import InvalidQueryError
|
33
|
+
from .fetcher import Fetcher
|
34
|
+
|
35
|
+
|
36
|
+
@dataclass
|
37
|
+
class OldFilterParams:
|
38
|
+
label_filters: Union[list[str], list[Filter]]
|
39
|
+
keyword_filters: Union[list[str], list[Filter]]
|
40
|
+
range_creation_start: Optional[datetime] = None
|
41
|
+
range_creation_end: Optional[datetime] = None
|
42
|
+
range_modification_start: Optional[datetime] = None
|
43
|
+
range_modification_end: Optional[datetime] = None
|
44
|
+
fields: Optional[list[str]] = None
|
45
|
+
key_filters: Optional[list[str]] = None
|
46
|
+
|
47
|
+
|
48
|
+
async def parse_old_filters(
|
49
|
+
old: OldFilterParams, fetcher: Fetcher
|
50
|
+
) -> tuple[Optional[FilterExpression], Optional[FilterExpression]]:
|
51
|
+
filters = []
|
52
|
+
paragraph_filter_expression = None
|
53
|
+
|
54
|
+
# Labels
|
55
|
+
if old.label_filters:
|
56
|
+
classification_labels = await fetcher.get_classification_labels()
|
57
|
+
|
58
|
+
paragraph_exprs = []
|
59
|
+
for fltr in old.label_filters:
|
60
|
+
field_expr, paragraph_expr = convert_label_filter_to_expressions(fltr, classification_labels)
|
61
|
+
if field_expr:
|
62
|
+
filters.append(field_expr)
|
63
|
+
if paragraph_expr:
|
64
|
+
paragraph_exprs.append(paragraph_expr)
|
65
|
+
|
66
|
+
if len(paragraph_exprs) == 1:
|
67
|
+
paragraph_filter_expression = paragraph_exprs[0]
|
68
|
+
elif len(paragraph_exprs) > 1:
|
69
|
+
paragraph_filter_expression = FilterExpression()
|
70
|
+
paragraph_filter_expression.bool_and.operands.extend(paragraph_exprs)
|
71
|
+
|
72
|
+
# Keywords
|
73
|
+
if old.keyword_filters:
|
74
|
+
for fltr in old.keyword_filters:
|
75
|
+
filters.append(convert_keyword_filter_to_expression(fltr))
|
76
|
+
|
77
|
+
# Timestamps
|
78
|
+
if old.range_creation_start is not None or old.range_creation_end is not None:
|
79
|
+
f = FilterExpression()
|
80
|
+
f.date.field = FilterExpression.DateRangeFilter.DateField.CREATED
|
81
|
+
if old.range_creation_start is not None:
|
82
|
+
f.date.since.FromDatetime(old.range_creation_start)
|
83
|
+
if old.range_creation_end is not None:
|
84
|
+
f.date.until.FromDatetime(old.range_creation_end)
|
85
|
+
filters.append(f)
|
86
|
+
|
87
|
+
if old.range_modification_start is not None or old.range_modification_end is not None:
|
88
|
+
f = FilterExpression()
|
89
|
+
f.date.field = FilterExpression.DateRangeFilter.DateField.MODIFIED
|
90
|
+
if old.range_modification_start is not None:
|
91
|
+
f.date.since.FromDatetime(old.range_modification_start)
|
92
|
+
if old.range_modification_end is not None:
|
93
|
+
f.date.until.FromDatetime(old.range_modification_end)
|
94
|
+
filters.append(f)
|
95
|
+
|
96
|
+
# Fields
|
97
|
+
if old.fields:
|
98
|
+
field_filters = []
|
99
|
+
for field in old.fields:
|
100
|
+
parts = field.split("/")
|
101
|
+
f = FilterExpression()
|
102
|
+
f.field.field_type = parts[0]
|
103
|
+
if len(parts) > 1:
|
104
|
+
f.field.field_id = parts[1]
|
105
|
+
field_filters.append(f)
|
106
|
+
|
107
|
+
if len(field_filters) > 1:
|
108
|
+
f = FilterExpression()
|
109
|
+
f.bool_or.operands.extend(field_filters)
|
110
|
+
filters.append(f)
|
111
|
+
else:
|
112
|
+
filters.append(field_filters[0])
|
113
|
+
|
114
|
+
# Key filter
|
115
|
+
if old.key_filters:
|
116
|
+
key_exprs = []
|
117
|
+
for key in old.key_filters:
|
118
|
+
expr = FilterExpression()
|
119
|
+
parts = key.split("/")
|
120
|
+
if len(parts) == 1:
|
121
|
+
expr.resource.resource_id = parts[0]
|
122
|
+
else:
|
123
|
+
r = FilterExpression()
|
124
|
+
r.resource.resource_id = parts[0]
|
125
|
+
expr.bool_and.operands.append(r)
|
126
|
+
f = FilterExpression()
|
127
|
+
f.field.field_type = parts[1]
|
128
|
+
if len(parts) > 2:
|
129
|
+
f.field.field_id = parts[2]
|
130
|
+
key_exprs.append(expr)
|
131
|
+
|
132
|
+
if len(key_exprs) == 1:
|
133
|
+
filters.append(key_exprs[0])
|
134
|
+
elif len(key_exprs) > 1:
|
135
|
+
f = FilterExpression()
|
136
|
+
f.bool_or.operands.extend(key_exprs)
|
137
|
+
filters.append(f)
|
138
|
+
|
139
|
+
# Build filter
|
140
|
+
if len(filters) == 0:
|
141
|
+
return None, paragraph_filter_expression
|
142
|
+
elif len(filters) == 1:
|
143
|
+
return filters[0], paragraph_filter_expression
|
144
|
+
else:
|
145
|
+
f = FilterExpression()
|
146
|
+
f.bool_and.operands.extend(filters)
|
147
|
+
return f, paragraph_filter_expression
|
148
|
+
|
149
|
+
|
150
|
+
def convert_label_filter_to_expressions(
|
151
|
+
fltr: Union[str, Filter], classification_labels: knowledgebox_pb2.Labels
|
152
|
+
) -> tuple[Optional[FilterExpression], Optional[FilterExpression]]:
|
153
|
+
if isinstance(fltr, str):
|
154
|
+
fltr = translate_label(fltr)
|
155
|
+
f = FilterExpression()
|
156
|
+
f.facet.facet = fltr
|
157
|
+
if is_paragraph_label(fltr, classification_labels):
|
158
|
+
return None, f
|
159
|
+
else:
|
160
|
+
return f, None
|
161
|
+
|
162
|
+
if fltr.all:
|
163
|
+
return split_labels(fltr.all, classification_labels, "bool_and", negate=False)
|
164
|
+
if fltr.any:
|
165
|
+
return split_labels(fltr.any, classification_labels, "bool_or", negate=False)
|
166
|
+
if fltr.none:
|
167
|
+
return split_labels(fltr.none, classification_labels, "bool_and", negate=True)
|
168
|
+
if fltr.not_all:
|
169
|
+
return split_labels(fltr.not_all, classification_labels, "bool_or", negate=True)
|
170
|
+
|
171
|
+
return None, None
|
172
|
+
|
173
|
+
|
174
|
+
def split_labels(
|
175
|
+
labels: list[str], classification_labels: knowledgebox_pb2.Labels, combinator: str, negate: bool
|
176
|
+
) -> tuple[Optional[FilterExpression], Optional[FilterExpression]]:
|
177
|
+
field = []
|
178
|
+
paragraph = []
|
179
|
+
for label in labels:
|
180
|
+
label = translate_label(label)
|
181
|
+
expr = FilterExpression()
|
182
|
+
if negate:
|
183
|
+
expr.bool_not.facet.facet = label
|
184
|
+
else:
|
185
|
+
expr.facet.facet = label
|
186
|
+
|
187
|
+
if is_paragraph_label(label, classification_labels):
|
188
|
+
paragraph.append(expr)
|
189
|
+
else:
|
190
|
+
field.append(expr)
|
191
|
+
|
192
|
+
if len(field) == 0:
|
193
|
+
field_expr = None
|
194
|
+
elif len(field) == 1:
|
195
|
+
field_expr = field[0]
|
196
|
+
else:
|
197
|
+
field_expr = FilterExpression()
|
198
|
+
filter_list = getattr(field_expr, combinator)
|
199
|
+
filter_list.operands.extend(field)
|
200
|
+
|
201
|
+
if len(paragraph) > 0 and combinator == "bool_or":
|
202
|
+
raise InvalidQueryError(
|
203
|
+
"filters",
|
204
|
+
"Paragraph labels can only be used with 'all' filter",
|
205
|
+
)
|
206
|
+
|
207
|
+
if len(paragraph) == 0:
|
208
|
+
paragraph_expr = None
|
209
|
+
elif len(paragraph) == 1:
|
210
|
+
paragraph_expr = paragraph[0]
|
211
|
+
else:
|
212
|
+
paragraph_expr = FilterExpression()
|
213
|
+
filter_list = getattr(paragraph_expr, combinator)
|
214
|
+
filter_list.extend(paragraph)
|
215
|
+
|
216
|
+
return field_expr, paragraph_expr
|
217
|
+
|
218
|
+
|
219
|
+
def is_paragraph_label(label: str, classification_labels: knowledgebox_pb2.Labels) -> bool:
|
220
|
+
if len(label) == 0 or label[0] != "/":
|
221
|
+
return False
|
222
|
+
if not label.startswith("/l/"):
|
223
|
+
return False
|
224
|
+
# Classification labels should have the form /l/labelset/label
|
225
|
+
parts = label.split("/")
|
226
|
+
if len(parts) < 4:
|
227
|
+
return False
|
228
|
+
labelset_id = parts[2]
|
229
|
+
|
230
|
+
try:
|
231
|
+
labelset: Optional[knowledgebox_pb2.LabelSet] = classification_labels.labelset.get(labelset_id)
|
232
|
+
if labelset is None:
|
233
|
+
return False
|
234
|
+
return knowledgebox_pb2.LabelSet.LabelSetKind.PARAGRAPHS in labelset.kind
|
235
|
+
except KeyError:
|
236
|
+
# labelset_id not found
|
237
|
+
return False
|
238
|
+
|
239
|
+
|
240
|
+
def convert_keyword_filter_to_expression(fltr: Union[str, Filter]) -> FilterExpression:
|
241
|
+
if isinstance(fltr, str):
|
242
|
+
return convert_keyword_to_expression(fltr)
|
243
|
+
|
244
|
+
f = FilterExpression()
|
245
|
+
if fltr.all:
|
246
|
+
f.bool_and.operands.extend((convert_keyword_to_expression(f) for f in fltr.all))
|
247
|
+
if fltr.any:
|
248
|
+
f.bool_or.operands.extend((convert_keyword_to_expression(f) for f in fltr.any))
|
249
|
+
if fltr.none:
|
250
|
+
f.bool_not.bool_or.operands.extend((convert_keyword_to_expression(f) for f in fltr.none))
|
251
|
+
if fltr.not_all:
|
252
|
+
f.bool_not.bool_and.operands.extend((convert_keyword_to_expression(f) for f in fltr.not_all))
|
253
|
+
|
254
|
+
return f
|
255
|
+
|
256
|
+
|
257
|
+
def convert_keyword_to_expression(keyword: str) -> FilterExpression:
|
258
|
+
f = FilterExpression()
|
259
|
+
f.keyword.keyword = keyword
|
260
|
+
return f
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post3332
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
27
|
-
Requires-Dist: nidx-protos>=6.2.1.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3332
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3332
|
25
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post3332
|
26
|
+
Requires-Dist: nucliadb-models>=6.2.1.post3332
|
27
|
+
Requires-Dist: nidx-protos>=6.2.1.post3332
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn
|
@@ -196,8 +196,8 @@ nucliadb/search/api/v1/find.py,sha256=DsnWkySu_cFajDWJIxN8DYvLL_Rm2yiCjHD8TsqPfR
|
|
196
196
|
nucliadb/search/api/v1/knowledgebox.py,sha256=rWhx3PYWryingu19qwwFDbVvVYynq5Ky23FSlzmTutQ,8721
|
197
197
|
nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
|
198
198
|
nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
|
199
|
-
nucliadb/search/api/v1/search.py,sha256=
|
200
|
-
nucliadb/search/api/v1/suggest.py,sha256=
|
199
|
+
nucliadb/search/api/v1/search.py,sha256=CBszxidsfoLmrCbVL8cuZboWJlmt_GAfpjix0Tdp_yA,13650
|
200
|
+
nucliadb/search/api/v1/suggest.py,sha256=Cc1vvT5AFEq0j6_IAx5jSVekMJpIi6kY-DWhzi7vkLg,5931
|
201
201
|
nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
|
202
202
|
nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
|
203
203
|
nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -213,7 +213,7 @@ nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298
|
|
213
213
|
nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
|
214
214
|
nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_QziX4,6184
|
215
215
|
nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
|
216
|
-
nucliadb/search/search/find.py,sha256=
|
216
|
+
nucliadb/search/search/find.py,sha256=BC_5p3oRKeQBVmuFCDdCd36Qr2TqJZPIjC0XfRuDJWU,10186
|
217
217
|
nucliadb/search/search/find_merge.py,sha256=3FnzKFEnVemg6FO_6zveulbAU7klvsiPEBvLrpBBMg8,17450
|
218
218
|
nucliadb/search/search/graph_strategy.py,sha256=ahwcUTQZ0Ll-rnS285DO9PmRyiM-1p4BM3UvmOYVwhM,31750
|
219
219
|
nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
|
@@ -223,14 +223,14 @@ nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUH
|
|
223
223
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
224
224
|
nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
|
225
225
|
nucliadb/search/search/predict_proxy.py,sha256=xBlh6kjuQpWRq7KsBx4pEl2PtnwljjQIiYMaTWpcCSA,3015
|
226
|
-
nucliadb/search/search/query.py,sha256=
|
226
|
+
nucliadb/search/search/query.py,sha256=vSnnqJPB5iHI7kugaL6boH1l4j5HV6ln4EKbskJnFWw,27346
|
227
227
|
nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
|
228
228
|
nucliadb/search/search/rerankers.py,sha256=3vep4EOVNeDJGsMdx-1g6Ar4ZGJG3IHym3HkxnbwtAQ,7321
|
229
229
|
nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
|
230
230
|
nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
|
231
231
|
nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
|
232
232
|
nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
233
|
-
nucliadb/search/search/chat/ask.py,sha256=
|
233
|
+
nucliadb/search/search/chat/ask.py,sha256=q0CSXQJs69z52XWleIklcn5_bvHJQT7z94XUqKW506Y,36451
|
234
234
|
nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
|
235
235
|
nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
|
236
236
|
nucliadb/search/search/chat/prompt.py,sha256=Jnja-Ss7skgnnDY8BymVfdeYsFPnIQFL8tEvcRXTKUE,47356
|
@@ -239,6 +239,7 @@ nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyv
|
|
239
239
|
nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
|
240
240
|
nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
|
241
241
|
nucliadb/search/search/query_parser/models.py,sha256=2iWuTcH24RDF8xokgXr0j5qbMoURQ1TFyqJIYs16LqU,2283
|
242
|
+
nucliadb/search/search/query_parser/old_filters.py,sha256=-zbfN-RsXoj_DRjh3Lfp-wShwFXgkISawzVptVzja-A,9071
|
242
243
|
nucliadb/search/search/query_parser/parser.py,sha256=m6meq5QQO_ofdtbrvEORsZLjxURWfRR0dINrgDXmYRg,6323
|
243
244
|
nucliadb/standalone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
244
245
|
nucliadb/standalone/api_router.py,sha256=4-g-eEq27nL6vKCLRCoV0Pxf-L273N-eHeEX2vI9qgg,6215
|
@@ -335,8 +336,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
335
336
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
336
337
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
337
338
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
338
|
-
nucliadb-6.2.1.
|
339
|
-
nucliadb-6.2.1.
|
340
|
-
nucliadb-6.2.1.
|
341
|
-
nucliadb-6.2.1.
|
342
|
-
nucliadb-6.2.1.
|
339
|
+
nucliadb-6.2.1.post3332.dist-info/METADATA,sha256=Gj3sfBbSi6LtLdhJofzr_eR6V_AGI7LBFYuxDkNcfac,4291
|
340
|
+
nucliadb-6.2.1.post3332.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
341
|
+
nucliadb-6.2.1.post3332.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
342
|
+
nucliadb-6.2.1.post3332.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
343
|
+
nucliadb-6.2.1.post3332.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|