nucliadb 6.2.1.post3377__py3-none-any.whl → 6.2.1.post3382__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/common/ids.py +9 -0
- nucliadb/reader/api/v1/services.py +14 -8
- nucliadb/search/api/v1/find.py +7 -0
- nucliadb/search/api/v1/search.py +8 -0
- nucliadb/search/api/v1/suggest.py +9 -0
- nucliadb/search/search/chat/ask.py +1 -0
- nucliadb/search/search/find.py +1 -0
- nucliadb/search/search/query.py +69 -27
- nucliadb/search/search/query_parser/filter_expression.py +146 -0
- nucliadb/writer/api/v1/learning_config.py +1 -1
- {nucliadb-6.2.1.post3377.dist-info → nucliadb-6.2.1.post3382.dist-info}/METADATA +6 -6
- {nucliadb-6.2.1.post3377.dist-info → nucliadb-6.2.1.post3382.dist-info}/RECORD +15 -14
- {nucliadb-6.2.1.post3377.dist-info → nucliadb-6.2.1.post3382.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post3377.dist-info → nucliadb-6.2.1.post3382.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post3377.dist-info → nucliadb-6.2.1.post3382.dist-info}/top_level.txt +0 -0
nucliadb/common/ids.py
CHANGED
@@ -26,6 +26,7 @@ paragraphs... Avoiding spread of id construction and parsing everywhere
|
|
26
26
|
from dataclasses import dataclass
|
27
27
|
from typing import Optional
|
28
28
|
|
29
|
+
from nucliadb_models.common import FieldTypeName
|
29
30
|
from nucliadb_protos.resources_pb2 import FieldType
|
30
31
|
|
31
32
|
FIELD_TYPE_STR_TO_PB: dict[str, FieldType.ValueType] = {
|
@@ -38,6 +39,14 @@ FIELD_TYPE_STR_TO_PB: dict[str, FieldType.ValueType] = {
|
|
38
39
|
|
39
40
|
FIELD_TYPE_PB_TO_STR = {v: k for k, v in FIELD_TYPE_STR_TO_PB.items()}
|
40
41
|
|
42
|
+
FIELD_TYPE_NAME_TO_STR = {
|
43
|
+
FieldTypeName.TEXT: "t",
|
44
|
+
FieldTypeName.FILE: "f",
|
45
|
+
FieldTypeName.LINK: "u",
|
46
|
+
FieldTypeName.GENERIC: "a",
|
47
|
+
FieldTypeName.CONVERSATION: "c",
|
48
|
+
}
|
49
|
+
|
41
50
|
|
42
51
|
@dataclass
|
43
52
|
class FieldId:
|
@@ -164,6 +164,7 @@ async def get_labelsets(kbid: str) -> KnowledgeBoxLabels:
|
|
164
164
|
summary="Get a Knowledge Box Label Set",
|
165
165
|
response_model=LabelSet,
|
166
166
|
tags=["Knowledge Box Services"],
|
167
|
+
responses={"404": {"description": "Knowledge Box or Label Set not found"}},
|
167
168
|
)
|
168
169
|
@requires(NucliaDBRoles.READER)
|
169
170
|
@version(1)
|
@@ -172,6 +173,12 @@ async def get_labelset_endpoint(request: Request, kbid: str, labelset: str) -> L
|
|
172
173
|
return await get_labelset(kbid, labelset)
|
173
174
|
except KnowledgeBoxNotFound:
|
174
175
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
176
|
+
except LabelSetNotFound:
|
177
|
+
raise HTTPException(status_code=404, detail="Label Set does not exist")
|
178
|
+
|
179
|
+
|
180
|
+
class LabelSetNotFound(Exception):
|
181
|
+
pass
|
175
182
|
|
176
183
|
|
177
184
|
async def get_labelset(kbid: str, labelset_id: str) -> LabelSet:
|
@@ -182,15 +189,14 @@ async def get_labelset(kbid: str, labelset_id: str) -> LabelSet:
|
|
182
189
|
kbid=kbid, labelset_id=labelset_id
|
183
190
|
)
|
184
191
|
if labelset is None:
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
including_default_value_fields=True,
|
192
|
-
)
|
192
|
+
raise LabelSetNotFound()
|
193
|
+
response = LabelSet(
|
194
|
+
**MessageToDict(
|
195
|
+
labelset,
|
196
|
+
preserving_proto_field_name=True,
|
197
|
+
including_default_value_fields=True,
|
193
198
|
)
|
199
|
+
)
|
194
200
|
return response
|
195
201
|
|
196
202
|
|
nucliadb/search/api/v1/find.py
CHANGED
@@ -35,6 +35,7 @@ from nucliadb.search.search.exceptions import InvalidQueryError
|
|
35
35
|
from nucliadb.search.search.find import find
|
36
36
|
from nucliadb.search.search.utils import maybe_log_request_payload, min_score_from_query_params
|
37
37
|
from nucliadb_models.common import FieldTypeName
|
38
|
+
from nucliadb_models.filter import FilterExpression
|
38
39
|
from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
|
39
40
|
from nucliadb_models.search import (
|
40
41
|
FindRequest,
|
@@ -80,6 +81,9 @@ async def find_knowledgebox(
|
|
80
81
|
response: Response,
|
81
82
|
kbid: str,
|
82
83
|
query: str = fastapi_query(SearchParamDefaults.query),
|
84
|
+
filter_expression: Optional[str] = fastapi_query(
|
85
|
+
SearchParamDefaults.filter_expression, include_in_schema=False
|
86
|
+
),
|
83
87
|
fields: list[str] = fastapi_query(SearchParamDefaults.fields),
|
84
88
|
filters: list[str] = fastapi_query(SearchParamDefaults.filters),
|
85
89
|
top_k: Optional[int] = fastapi_query(SearchParamDefaults.top_k),
|
@@ -132,11 +136,14 @@ async def find_knowledgebox(
|
|
132
136
|
x_forwarded_for: str = Header(""),
|
133
137
|
) -> Union[KnowledgeboxFindResults, HTTPClientError]:
|
134
138
|
try:
|
139
|
+
expr = FilterExpression.model_validate_json(filter_expression) if filter_expression else None
|
140
|
+
|
135
141
|
security = None
|
136
142
|
if len(security_groups) > 0:
|
137
143
|
security = RequestSecurity(groups=security_groups)
|
138
144
|
item = FindRequest(
|
139
145
|
query=query,
|
146
|
+
filter_expression=expr,
|
140
147
|
fields=fields,
|
141
148
|
filters=filters,
|
142
149
|
top_k=top_k, # type: ignore
|
nucliadb/search/api/v1/search.py
CHANGED
@@ -45,6 +45,7 @@ from nucliadb.search.search.utils import (
|
|
45
45
|
should_disable_vector_search,
|
46
46
|
)
|
47
47
|
from nucliadb_models.common import FieldTypeName
|
48
|
+
from nucliadb_models.filter import FilterExpression
|
48
49
|
from nucliadb_models.metadata import ResourceProcessingStatus
|
49
50
|
from nucliadb_models.resource import ExtractedDataTypeName, NucliaDBRoles
|
50
51
|
from nucliadb_models.search import (
|
@@ -102,6 +103,9 @@ async def search_knowledgebox(
|
|
102
103
|
response: Response,
|
103
104
|
kbid: str,
|
104
105
|
query: str = fastapi_query(SearchParamDefaults.query),
|
106
|
+
filter_expression: Optional[str] = fastapi_query(
|
107
|
+
SearchParamDefaults.filter_expression, include_in_schema=False
|
108
|
+
),
|
105
109
|
fields: list[str] = fastapi_query(SearchParamDefaults.fields),
|
106
110
|
filters: list[str] = fastapi_query(SearchParamDefaults.filters),
|
107
111
|
faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
|
@@ -157,11 +161,14 @@ async def search_knowledgebox(
|
|
157
161
|
x_forwarded_for: str = Header(""),
|
158
162
|
) -> Union[KnowledgeboxSearchResults, HTTPClientError]:
|
159
163
|
try:
|
164
|
+
expr = FilterExpression.model_validate_json(filter_expression) if filter_expression else None
|
165
|
+
|
160
166
|
security = None
|
161
167
|
if len(security_groups) > 0:
|
162
168
|
security = RequestSecurity(groups=security_groups)
|
163
169
|
item = SearchRequest(
|
164
170
|
query=query,
|
171
|
+
filter_expression=expr,
|
165
172
|
fields=fields,
|
166
173
|
filters=filters,
|
167
174
|
faceted=faceted,
|
@@ -270,6 +277,7 @@ async def search(
|
|
270
277
|
kbid=kbid,
|
271
278
|
features=item.features,
|
272
279
|
query=item.query,
|
280
|
+
filter_expression=item.filter_expression,
|
273
281
|
faceted=item.faceted,
|
274
282
|
sort=item.sort,
|
275
283
|
top_k=item.top_k,
|
@@ -33,6 +33,7 @@ from nucliadb.search.search.merge import merge_suggest_results
|
|
33
33
|
from nucliadb.search.search.query import suggest_query_to_pb
|
34
34
|
from nucliadb.search.search.utils import filter_hidden_resources
|
35
35
|
from nucliadb_models.common import FieldTypeName
|
36
|
+
from nucliadb_models.filter import FilterExpression
|
36
37
|
from nucliadb_models.resource import NucliaDBRoles
|
37
38
|
from nucliadb_models.search import (
|
38
39
|
KnowledgeboxSuggestResults,
|
@@ -61,6 +62,9 @@ async def suggest_knowledgebox(
|
|
61
62
|
response: Response,
|
62
63
|
kbid: str,
|
63
64
|
query: str = fastapi_query(SearchParamDefaults.suggest_query),
|
65
|
+
filter_expression: Optional[str] = fastapi_query(
|
66
|
+
SearchParamDefaults.filter_expression, include_in_schema=False
|
67
|
+
),
|
64
68
|
fields: list[str] = fastapi_query(SearchParamDefaults.fields),
|
65
69
|
filters: list[str] = fastapi_query(SearchParamDefaults.filters),
|
66
70
|
faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
|
@@ -85,10 +89,13 @@ async def suggest_knowledgebox(
|
|
85
89
|
show_hidden: bool = fastapi_query(SearchParamDefaults.show_hidden),
|
86
90
|
) -> Union[KnowledgeboxSuggestResults, HTTPClientError]:
|
87
91
|
try:
|
92
|
+
expr = FilterExpression.model_validate_json(filter_expression) if filter_expression else None
|
93
|
+
|
88
94
|
return await suggest(
|
89
95
|
response,
|
90
96
|
kbid,
|
91
97
|
query,
|
98
|
+
expr,
|
92
99
|
fields,
|
93
100
|
filters,
|
94
101
|
faceted,
|
@@ -114,6 +121,7 @@ async def suggest(
|
|
114
121
|
response,
|
115
122
|
kbid: str,
|
116
123
|
query: str,
|
124
|
+
filter_expression: Optional[FilterExpression],
|
117
125
|
fields: list[str],
|
118
126
|
filters: list[str],
|
119
127
|
faceted: list[str],
|
@@ -137,6 +145,7 @@ async def suggest(
|
|
137
145
|
kbid,
|
138
146
|
features,
|
139
147
|
query,
|
148
|
+
filter_expression,
|
140
149
|
fields,
|
141
150
|
filters,
|
142
151
|
faceted,
|
nucliadb/search/search/find.py
CHANGED
nucliadb/search/search/query.py
CHANGED
@@ -18,7 +18,6 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import asyncio
|
21
|
-
import json
|
22
21
|
import string
|
23
22
|
from datetime import datetime
|
24
23
|
from typing import Any, Awaitable, Optional
|
@@ -40,11 +39,11 @@ from nucliadb.search.search.rank_fusion import (
|
|
40
39
|
from nucliadb.search.search.rerankers import (
|
41
40
|
Reranker,
|
42
41
|
)
|
42
|
+
from nucliadb_models.filter import FilterExpression
|
43
43
|
from nucliadb_models.internal.predict import QueryInfo
|
44
44
|
from nucliadb_models.labels import LABEL_HIDDEN, translate_system_to_alias_label
|
45
45
|
from nucliadb_models.metadata import ResourceProcessingStatus
|
46
46
|
from nucliadb_models.search import (
|
47
|
-
Filter,
|
48
47
|
KnowledgeGraphEntity,
|
49
48
|
MaxTokens,
|
50
49
|
MinScore,
|
@@ -60,6 +59,7 @@ from nucliadb_protos import nodereader_pb2, utils_pb2
|
|
60
59
|
from nucliadb_protos.noderesources_pb2 import Resource
|
61
60
|
|
62
61
|
from .exceptions import InvalidQueryError
|
62
|
+
from .query_parser.filter_expression import add_and_expression, parse_expression
|
63
63
|
from .query_parser.old_filters import OldFilterParams, parse_old_filters
|
64
64
|
|
65
65
|
INDEX_SORTABLE_FIELDS = [
|
@@ -91,6 +91,7 @@ class QueryParser:
|
|
91
91
|
top_k: int,
|
92
92
|
min_score: MinScore,
|
93
93
|
old_filters: OldFilterParams,
|
94
|
+
filter_expression: Optional[FilterExpression] = None,
|
94
95
|
query_entities: Optional[list[KnowledgeGraphEntity]] = None,
|
95
96
|
faceted: Optional[list[str]] = None,
|
96
97
|
sort: Optional[SortOptions] = None,
|
@@ -114,11 +115,6 @@ class QueryParser:
|
|
114
115
|
self.query = query
|
115
116
|
self.query_entities = query_entities
|
116
117
|
self.hidden = hidden
|
117
|
-
if self.hidden is not None:
|
118
|
-
if self.hidden:
|
119
|
-
old_filters.label_filters.append(Filter(all=[LABEL_HIDDEN])) # type: ignore
|
120
|
-
else:
|
121
|
-
old_filters.label_filters.append(Filter(none=[LABEL_HIDDEN])) # type: ignore
|
122
118
|
self.faceted = faceted or []
|
123
119
|
self.top_k = top_k
|
124
120
|
self.min_score = min_score
|
@@ -137,6 +133,7 @@ class QueryParser:
|
|
137
133
|
self.max_tokens = max_tokens
|
138
134
|
self.rank_fusion = rank_fusion
|
139
135
|
self.reranker = reranker
|
136
|
+
self.filter_expression = filter_expression
|
140
137
|
self.old_filters = old_filters
|
141
138
|
self.fetcher = Fetcher(
|
142
139
|
kbid=kbid,
|
@@ -212,7 +209,6 @@ class QueryParser:
|
|
212
209
|
self.parse_document_search(request)
|
213
210
|
self.parse_paragraph_search(request)
|
214
211
|
incomplete, rephrased_query = await self.parse_vector_search(request)
|
215
|
-
# BUG: autofilters are not used to filter, but we say we do
|
216
212
|
autofilters = await self.parse_relation_search(request)
|
217
213
|
await self.parse_synonyms(request)
|
218
214
|
await self.parse_min_score(request, incomplete)
|
@@ -232,12 +228,40 @@ class QueryParser:
|
|
232
228
|
if self.with_status is not None:
|
233
229
|
request.with_status = PROCESSING_STATUS_TO_PB_MAP[self.with_status]
|
234
230
|
|
231
|
+
has_old_filters = False
|
235
232
|
if self.old_filters:
|
236
233
|
field_expr, paragraph_expr = await parse_old_filters(self.old_filters, self.fetcher)
|
237
234
|
if field_expr is not None:
|
238
235
|
request.field_filter.CopyFrom(field_expr)
|
236
|
+
has_old_filters = True
|
239
237
|
if paragraph_expr is not None:
|
240
238
|
request.paragraph_filter.CopyFrom(paragraph_expr)
|
239
|
+
has_old_filters = True
|
240
|
+
|
241
|
+
if self.filter_expression and has_old_filters:
|
242
|
+
raise InvalidQueryError("filter_expression", "Cannot mix old filters with filter_expression")
|
243
|
+
|
244
|
+
if self.filter_expression:
|
245
|
+
if self.filter_expression.field:
|
246
|
+
expr = await parse_expression(self.filter_expression.field, self.kbid)
|
247
|
+
if expr:
|
248
|
+
request.field_filter.CopyFrom(expr)
|
249
|
+
|
250
|
+
if self.filter_expression.paragraph:
|
251
|
+
expr = await parse_expression(self.filter_expression.paragraph, self.kbid)
|
252
|
+
if expr:
|
253
|
+
request.paragraph_filter.CopyFrom(expr)
|
254
|
+
|
255
|
+
# TODO: Pass operator to PB
|
256
|
+
|
257
|
+
if self.hidden is not None:
|
258
|
+
expr = nodereader_pb2.FilterExpression()
|
259
|
+
if self.hidden:
|
260
|
+
expr.facet.facet = LABEL_HIDDEN
|
261
|
+
else:
|
262
|
+
expr.bool_not.facet.facet = LABEL_HIDDEN
|
263
|
+
|
264
|
+
add_and_expression(request.field_filter, expr)
|
241
265
|
|
242
266
|
def parse_sorting(self, request: nodereader_pb2.SearchRequest) -> None:
|
243
267
|
if len(self.query) == 0:
|
@@ -360,7 +384,7 @@ class QueryParser:
|
|
360
384
|
)
|
361
385
|
node_features.inc({"type": "relations"})
|
362
386
|
if self.autofilter:
|
363
|
-
entity_filters =
|
387
|
+
entity_filters = apply_entities_filter(request, detected_entities)
|
364
388
|
autofilters.extend([translate_system_to_alias_label(e) for e in entity_filters])
|
365
389
|
return autofilters
|
366
390
|
|
@@ -558,7 +582,7 @@ def expand_entities(
|
|
558
582
|
return list(result_entities.values())
|
559
583
|
|
560
584
|
|
561
|
-
def
|
585
|
+
def apply_entities_filter(
|
562
586
|
request: nodereader_pb2.SearchRequest,
|
563
587
|
detected_entities: list[utils_pb2.RelationNode],
|
564
588
|
) -> list[str]:
|
@@ -568,19 +592,13 @@ def parse_entities_to_filters(
|
|
568
592
|
for entity in detected_entities
|
569
593
|
if entity.ntype == utils_pb2.RelationNode.NodeType.ENTITY
|
570
594
|
]:
|
571
|
-
if entity_filter not in
|
572
|
-
request.filter.field_labels.append(entity_filter)
|
595
|
+
if entity_filter not in added_filters:
|
573
596
|
added_filters.append(entity_filter)
|
597
|
+
# Add the entity to the filter expression (with AND)
|
598
|
+
entity_expr = nodereader_pb2.FilterExpression()
|
599
|
+
entity_expr.facet.facet = translate_label(entity_filter)
|
600
|
+
add_and_expression(request.field_filter, entity_expr)
|
574
601
|
|
575
|
-
# We need to expand the filter expression with the automatically detected entities.
|
576
|
-
if len(added_filters) > 0:
|
577
|
-
# So far, autofilters feature will only yield 'and' expressions with the detected entities.
|
578
|
-
# More complex autofilters can be added here if we leverage the query endpoint.
|
579
|
-
expanded_expression = {"and": [{"literal": entity} for entity in added_filters]}
|
580
|
-
if request.filter.labels_expression:
|
581
|
-
expression = json.loads(request.filter.labels_expression)
|
582
|
-
expanded_expression["and"].append(expression)
|
583
|
-
request.filter.labels_expression = json.dumps(expanded_expression)
|
584
602
|
return added_filters
|
585
603
|
|
586
604
|
|
@@ -588,6 +606,7 @@ async def suggest_query_to_pb(
|
|
588
606
|
kbid: str,
|
589
607
|
features: list[SuggestOptions],
|
590
608
|
query: str,
|
609
|
+
filter_expression: Optional[FilterExpression],
|
591
610
|
fields: list[str],
|
592
611
|
filters: list[str],
|
593
612
|
faceted: list[str],
|
@@ -606,12 +625,6 @@ async def suggest_query_to_pb(
|
|
606
625
|
if SuggestOptions.PARAGRAPH in features:
|
607
626
|
request.features.append(nodereader_pb2.SuggestFeatures.PARAGRAPHS)
|
608
627
|
|
609
|
-
if hidden is not None:
|
610
|
-
if hidden:
|
611
|
-
filters.append(Filter(all=[LABEL_HIDDEN])) # type: ignore
|
612
|
-
else:
|
613
|
-
filters.append(Filter(none=[LABEL_HIDDEN])) # type: ignore
|
614
|
-
|
615
628
|
old = OldFilterParams(
|
616
629
|
label_filters=filters,
|
617
630
|
keyword_filters=[],
|
@@ -631,9 +644,38 @@ async def suggest_query_to_pb(
|
|
631
644
|
generative_model=None,
|
632
645
|
)
|
633
646
|
field_expr, _ = await parse_old_filters(old, fetcher)
|
647
|
+
if field_expr is not None and filter_expression is not None:
|
648
|
+
raise InvalidQueryError("filter_expression", "Cannot mix old filters with filter_expression")
|
649
|
+
|
634
650
|
if field_expr is not None:
|
635
651
|
request.field_filter.CopyFrom(field_expr)
|
636
652
|
|
653
|
+
if filter_expression:
|
654
|
+
if filter_expression.field:
|
655
|
+
expr = await parse_expression(filter_expression.field, kbid)
|
656
|
+
if expr:
|
657
|
+
request.field_filter.CopyFrom(expr)
|
658
|
+
|
659
|
+
if filter_expression.paragraph:
|
660
|
+
raise InvalidQueryError(
|
661
|
+
"filter_expression", "paragraph filters not yet available in suggest"
|
662
|
+
)
|
663
|
+
# TODO
|
664
|
+
# expr = await parse_expression(filter_expression.paragraph, kbid)
|
665
|
+
# if expr:
|
666
|
+
# request.paragraph_filter.CopyFrom(expr)
|
667
|
+
|
668
|
+
# TODO: Pass operator to PB
|
669
|
+
|
670
|
+
if hidden is not None:
|
671
|
+
expr = nodereader_pb2.FilterExpression()
|
672
|
+
if hidden:
|
673
|
+
expr.facet.facet = LABEL_HIDDEN
|
674
|
+
else:
|
675
|
+
expr.bool_not.facet.facet = LABEL_HIDDEN
|
676
|
+
|
677
|
+
add_and_expression(request.field_filter, expr)
|
678
|
+
|
637
679
|
return request
|
638
680
|
|
639
681
|
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
from typing import Union
|
22
|
+
|
23
|
+
from nucliadb.common import datamanagers
|
24
|
+
from nucliadb.common.ids import FIELD_TYPE_NAME_TO_STR
|
25
|
+
from nucliadb.search.search.exceptions import InvalidQueryError
|
26
|
+
from nucliadb_models.filter import (
|
27
|
+
And,
|
28
|
+
DateCreated,
|
29
|
+
DateModified,
|
30
|
+
Entity,
|
31
|
+
Field,
|
32
|
+
FieldFilterExpression,
|
33
|
+
FieldMimetype,
|
34
|
+
Generated,
|
35
|
+
Keyword,
|
36
|
+
Kind,
|
37
|
+
Label,
|
38
|
+
Language,
|
39
|
+
Not,
|
40
|
+
Or,
|
41
|
+
OriginMetadata,
|
42
|
+
OriginPath,
|
43
|
+
OriginTag,
|
44
|
+
ParagraphFilterExpression,
|
45
|
+
Resource,
|
46
|
+
ResourceMimetype,
|
47
|
+
)
|
48
|
+
from nucliadb_protos.nodereader_pb2 import FilterExpression as PBFilterExpression
|
49
|
+
|
50
|
+
|
51
|
+
async def parse_expression(
|
52
|
+
expr: Union[FieldFilterExpression, ParagraphFilterExpression],
|
53
|
+
kbid: str,
|
54
|
+
) -> PBFilterExpression:
|
55
|
+
f = PBFilterExpression()
|
56
|
+
|
57
|
+
if isinstance(expr, And):
|
58
|
+
for op in expr.operands:
|
59
|
+
f.bool_and.operands.append(await parse_expression(op, kbid))
|
60
|
+
elif isinstance(expr, Or):
|
61
|
+
for op in expr.operands:
|
62
|
+
f.bool_or.operands.append(await parse_expression(op, kbid))
|
63
|
+
elif isinstance(expr, Not):
|
64
|
+
f.bool_not.CopyFrom(await parse_expression(expr.operand, kbid))
|
65
|
+
elif isinstance(expr, Resource):
|
66
|
+
if expr.id:
|
67
|
+
f.resource.resource_id = expr.id
|
68
|
+
else:
|
69
|
+
rid = await datamanagers.atomic.resources.get_resource_uuid_from_slug(
|
70
|
+
kbid=kbid, slug=expr.slug
|
71
|
+
)
|
72
|
+
if rid is None:
|
73
|
+
raise InvalidQueryError("slug", f"Cannot find slug {expr.slug}")
|
74
|
+
f.resource.resource_id = rid
|
75
|
+
elif isinstance(expr, Field):
|
76
|
+
f.field.field_type = FIELD_TYPE_NAME_TO_STR[expr.type]
|
77
|
+
if expr.name:
|
78
|
+
f.field.field_id = expr.name
|
79
|
+
elif isinstance(expr, Keyword):
|
80
|
+
f.keyword.keyword = expr.word
|
81
|
+
elif isinstance(expr, DateCreated):
|
82
|
+
f.date.field = PBFilterExpression.DateRangeFilter.DateField.CREATED
|
83
|
+
if expr.since:
|
84
|
+
f.date.since.FromDatetime(expr.since)
|
85
|
+
if expr.until:
|
86
|
+
f.date.until.FromDatetime(expr.until)
|
87
|
+
elif isinstance(expr, DateModified):
|
88
|
+
f.date.field = PBFilterExpression.DateRangeFilter.DateField.MODIFIED
|
89
|
+
if expr.since:
|
90
|
+
f.date.since.FromDatetime(expr.since)
|
91
|
+
if expr.until:
|
92
|
+
f.date.until.FromDatetime(expr.until)
|
93
|
+
elif isinstance(expr, OriginTag):
|
94
|
+
f.facet.facet = f"/t/{expr.tag}"
|
95
|
+
elif isinstance(expr, Label):
|
96
|
+
f.facet.facet = f"/l/{expr.labelset}"
|
97
|
+
if expr.label:
|
98
|
+
f.facet.facet += f"/{expr.label}"
|
99
|
+
elif isinstance(expr, ResourceMimetype):
|
100
|
+
f.facet.facet = f"/n/i/{expr.type}"
|
101
|
+
if expr.subtype:
|
102
|
+
f.facet.facet += f"/{expr.subtype}"
|
103
|
+
elif isinstance(expr, FieldMimetype):
|
104
|
+
f.facet.facet = f"/mt/{expr.type}"
|
105
|
+
if expr.subtype:
|
106
|
+
f.facet.facet += f"/{expr.subtype}"
|
107
|
+
elif isinstance(expr, Entity):
|
108
|
+
f.facet.facet = f"/e/{expr.subtype}"
|
109
|
+
if expr.value:
|
110
|
+
f.facet.facet += f"/{expr.value}"
|
111
|
+
elif isinstance(expr, Language):
|
112
|
+
if expr.only_primary:
|
113
|
+
f.facet.facet = f"/s/p/{expr.language}"
|
114
|
+
else:
|
115
|
+
f.facet.facet = f"/s/s/{expr.language}"
|
116
|
+
elif isinstance(expr, OriginMetadata):
|
117
|
+
f.facet.facet = f"/m/{expr.field}"
|
118
|
+
if expr.value:
|
119
|
+
f.facet.facet += f"/{expr.value}"
|
120
|
+
elif isinstance(expr, OriginPath):
|
121
|
+
f.facet.facet = f"/p/{expr.prefix}"
|
122
|
+
elif isinstance(expr, Generated):
|
123
|
+
f.facet.facet = "/g/da"
|
124
|
+
if expr.da_task:
|
125
|
+
f.facet.facet += f"/{expr.da_task}"
|
126
|
+
elif isinstance(expr, Kind):
|
127
|
+
f.facet.facet = f"/k/{expr.kind.lower()}"
|
128
|
+
else:
|
129
|
+
# This is a trick so mypy generates an error if this branch can be reached,
|
130
|
+
# that is, if we are missing some ifs
|
131
|
+
_a: int = "a"
|
132
|
+
|
133
|
+
return f
|
134
|
+
|
135
|
+
|
136
|
+
def add_and_expression(dest: PBFilterExpression, add: PBFilterExpression):
|
137
|
+
dest_expr_type = dest.WhichOneof("expr")
|
138
|
+
if dest_expr_type is None:
|
139
|
+
dest.CopyFrom(add)
|
140
|
+
elif dest_expr_type == "bool_and":
|
141
|
+
dest.bool_and.operands.append(add)
|
142
|
+
else:
|
143
|
+
and_expr = PBFilterExpression()
|
144
|
+
and_expr.bool_and.operands.append(dest)
|
145
|
+
and_expr.bool_and.operands.append(add)
|
146
|
+
dest.CopyFrom(and_expr)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post3382
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
27
|
-
Requires-Dist: nidx-protos>=6.2.1.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post3382
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post3382
|
25
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post3382
|
26
|
+
Requires-Dist: nucliadb-models>=6.2.1.post3382
|
27
|
+
Requires-Dist: nidx-protos>=6.2.1.post3382
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn
|
@@ -40,7 +40,7 @@ nucliadb/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
40
|
nucliadb/common/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
41
41
|
nucliadb/common/constants.py,sha256=QpigxJh_CtD85Evy0PtV5cVq6x0U_f9xfIcXz1ymkUg,869
|
42
42
|
nucliadb/common/counters.py,sha256=8lOi3A2HeLDDlcNaS2QT1SfD3350VPBjiY3FkmHH1V8,977
|
43
|
-
nucliadb/common/ids.py,sha256=
|
43
|
+
nucliadb/common/ids.py,sha256=4QjoIofes_vtKj2HsFWZf8VVIVWXxdkYtLpx1n618Us,8239
|
44
44
|
nucliadb/common/locking.py,sha256=RL0CabZVPzxHZyUjYeUyLvsJTm7W3J9o4fEgsY_ufNc,5896
|
45
45
|
nucliadb/common/nidx.py,sha256=_LoU8D4afEtlW0c3vGUCoatDZvMr0-2l_GtIGap7VxA,10185
|
46
46
|
nucliadb/common/cluster/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -173,7 +173,7 @@ nucliadb/reader/api/v1/knowledgebox.py,sha256=Uu-yPB8KKZt1VaFrFNMMaXOvLsclBJDK9d
|
|
173
173
|
nucliadb/reader/api/v1/learning_config.py,sha256=CZ7pFXzBZkJE2dXbC1wArszJw_ZLpuEb6gnsz2MKEz0,5525
|
174
174
|
nucliadb/reader/api/v1/resource.py,sha256=SFIv_vpgkdJQv7L_UgYZS5FvubipJ0ligpExGDjKHV0,14064
|
175
175
|
nucliadb/reader/api/v1/router.py,sha256=eyNmEGSP9zHkCIG5XlAXl6sukq950B7gFT3X2peMtIE,1011
|
176
|
-
nucliadb/reader/api/v1/services.py,sha256=
|
176
|
+
nucliadb/reader/api/v1/services.py,sha256=hGD7VHOsLYeaA8kwX92iC-BkbGbh4d_v5W4434ezjuY,11916
|
177
177
|
nucliadb/reader/api/v1/vectorsets.py,sha256=insTwaykshz442cMKa2VP74wJwvZrIYi0U7M9EM3aCM,1822
|
178
178
|
nucliadb/reader/reader/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
179
179
|
nucliadb/reader/reader/notifications.py,sha256=HVZNUlfbSuoZ9BsSs8wmzPeYurl0U0O2ooVlR9KSM3U,7792
|
@@ -192,12 +192,12 @@ nucliadb/search/api/v1/__init__.py,sha256=8w6VhZ5rbzX1xLSXr336d2IE-O0dQiv-ba6UYd
|
|
192
192
|
nucliadb/search/api/v1/ask.py,sha256=F2dR3-swb3Xz8MfZPYL3G65KY2R_mgef4YVBbu8kLi4,4352
|
193
193
|
nucliadb/search/api/v1/catalog.py,sha256=TF19WN-qgZZLkqBwVH5xNsMxYTrmdEflPvy7qft_4lE,7010
|
194
194
|
nucliadb/search/api/v1/feedback.py,sha256=kNLc4dHz2SXHzV0PwC1WiRAwY88fDptPcP-kO0q-FrQ,2620
|
195
|
-
nucliadb/search/api/v1/find.py,sha256=
|
195
|
+
nucliadb/search/api/v1/find.py,sha256=2_YX0p6Y8dYVYy99OB0cgPlre13P_JfWmE07MnbVwlE,9627
|
196
196
|
nucliadb/search/api/v1/knowledgebox.py,sha256=rWhx3PYWryingu19qwwFDbVvVYynq5Ky23FSlzmTutQ,8721
|
197
197
|
nucliadb/search/api/v1/predict_proxy.py,sha256=QrGzo0hKjtmyGZ6pjlJHYAh4hxwVUIOTcVcerRCw7eE,3047
|
198
198
|
nucliadb/search/api/v1/router.py,sha256=mtT07rBZcVfpa49doaw9b1tj3sdi3qLH0gn9Io6NYM0,988
|
199
|
-
nucliadb/search/api/v1/search.py,sha256=
|
200
|
-
nucliadb/search/api/v1/suggest.py,sha256=
|
199
|
+
nucliadb/search/api/v1/search.py,sha256=DLXxh2FRXmLnZIIXaSLT7XaNoY2GZJTkpcduLTDyVW4,14023
|
200
|
+
nucliadb/search/api/v1/suggest.py,sha256=tQX7rvPRjE_epk6qN8JB_Xue8JL02uUeVUNpbaJECQE,6318
|
201
201
|
nucliadb/search/api/v1/summarize.py,sha256=VAHJvE6V3xUgEBfqNKhgoxmDqCvh30RnrEIBVhMcNLU,2499
|
202
202
|
nucliadb/search/api/v1/utils.py,sha256=5Ve-frn7LAE2jqAgB85F8RSeqxDlyA08--gS-AdOLS4,1434
|
203
203
|
nucliadb/search/api/v1/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -213,7 +213,7 @@ nucliadb/search/search/cut.py,sha256=ytY0_GY7ocNjfxTb4aosxEp4ZfhQNDP--JkhEMGD298
|
|
213
213
|
nucliadb/search/search/exceptions.py,sha256=klGLgAGGrXcSGix_W6418ZBMqDchAIGjN77ofkOScEI,1039
|
214
214
|
nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_QziX4,6184
|
215
215
|
nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
|
216
|
-
nucliadb/search/search/find.py,sha256=
|
216
|
+
nucliadb/search/search/find.py,sha256=jQZOqu8VeX8k3ELV8bLK4TwUUjGrvmubouxvO1IvJV0,10236
|
217
217
|
nucliadb/search/search/find_merge.py,sha256=3FnzKFEnVemg6FO_6zveulbAU7klvsiPEBvLrpBBMg8,17450
|
218
218
|
nucliadb/search/search/graph_strategy.py,sha256=ahwcUTQZ0Ll-rnS285DO9PmRyiM-1p4BM3UvmOYVwhM,31750
|
219
219
|
nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
|
@@ -223,14 +223,14 @@ nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUH
|
|
223
223
|
nucliadb/search/search/paragraphs.py,sha256=pNAEiYqJGGUVcEf7xf-PFMVqz0PX4Qb-WNG-_zPGN2o,7799
|
224
224
|
nucliadb/search/search/pgcatalog.py,sha256=IaNK4dAxdXs38PoIkTdgqMDuZDjeiOtcXn3LeaT-OMw,8855
|
225
225
|
nucliadb/search/search/predict_proxy.py,sha256=IFI3v_ODz2_UU1XZnyaD391fE7-2C0npSmj_HmDvzS4,3123
|
226
|
-
nucliadb/search/search/query.py,sha256=
|
226
|
+
nucliadb/search/search/query.py,sha256=A1HnP7FzBtn3G4-oId_x-x0MHYlQPCTzrtP73LxoEwo,28733
|
227
227
|
nucliadb/search/search/rank_fusion.py,sha256=tRGo_KlsFsVx1CQEy1iqQ6f0T1Dq1kf0axDXHuuzvvM,6946
|
228
228
|
nucliadb/search/search/rerankers.py,sha256=3vep4EOVNeDJGsMdx-1g6Ar4ZGJG3IHym3HkxnbwtAQ,7321
|
229
229
|
nucliadb/search/search/shards.py,sha256=JSRSrHgHcF4sXyuZZoJdMfK0v_LHpoSRf1lCr5-K5ko,2742
|
230
230
|
nucliadb/search/search/summarize.py,sha256=ksmYPubEQvAQgfPdZHfzB_rR19B2ci4IYZ6jLdHxZo8,4996
|
231
231
|
nucliadb/search/search/utils.py,sha256=iF2tbBA56gRMJH1TlE2hMrqeXqjoeOPt4KgRdp2m9Ek,3313
|
232
232
|
nucliadb/search/search/chat/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
233
|
-
nucliadb/search/search/chat/ask.py,sha256=
|
233
|
+
nucliadb/search/search/chat/ask.py,sha256=xmHAO9rmw1hvutGeYc0kxmioxHPpYEjlB-sTC4ArX-k,36516
|
234
234
|
nucliadb/search/search/chat/exceptions.py,sha256=Siy4GXW2L7oPhIR86H3WHBhE9lkV4A4YaAszuGGUf54,1356
|
235
235
|
nucliadb/search/search/chat/images.py,sha256=PA8VWxT5_HUGfW1ULhKTK46UBsVyINtWWqEM1ulzX1E,3095
|
236
236
|
nucliadb/search/search/chat/prompt.py,sha256=Jnja-Ss7skgnnDY8BymVfdeYsFPnIQFL8tEvcRXTKUE,47356
|
@@ -238,6 +238,7 @@ nucliadb/search/search/chat/query.py,sha256=sFRJag80ahpXA7q3oP0XfIsUyRMNz0Y6K6nz
|
|
238
238
|
nucliadb/search/search/query_parser/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
239
239
|
nucliadb/search/search/query_parser/exceptions.py,sha256=szAOXUZ27oNY-OSa9t2hQ5HHkQQC0EX1FZz_LluJHJE,1224
|
240
240
|
nucliadb/search/search/query_parser/fetcher.py,sha256=jhr__J0KmAzjdsTTadWQmD9qf6lZvqlKAfZdYjZH_UY,15742
|
241
|
+
nucliadb/search/search/query_parser/filter_expression.py,sha256=wdqqQzdMe5nMS4jZh0HT21ppSd5ur50jyZRjzjGI7o4,5123
|
241
242
|
nucliadb/search/search/query_parser/models.py,sha256=2iWuTcH24RDF8xokgXr0j5qbMoURQ1TFyqJIYs16LqU,2283
|
242
243
|
nucliadb/search/search/query_parser/old_filters.py,sha256=-zbfN-RsXoj_DRjh3Lfp-wShwFXgkISawzVptVzja-A,9071
|
243
244
|
nucliadb/search/search/query_parser/parser.py,sha256=m6meq5QQO_ofdtbrvEORsZLjxURWfRR0dINrgDXmYRg,6323
|
@@ -314,7 +315,7 @@ nucliadb/writer/api/v1/__init__.py,sha256=akI9A_jloNLb0dU4T5zjfdyvmSAiDeIdjAlzNx
|
|
314
315
|
nucliadb/writer/api/v1/export_import.py,sha256=Cv4DNtqoR_x2LMNx80C9ehyaCaFIxhq2eutEk2uzcZg,8249
|
315
316
|
nucliadb/writer/api/v1/field.py,sha256=OsWOYA0WQ6onE5Rkl20QIEdtrSi7Jgnu62fUt90Ziy8,17503
|
316
317
|
nucliadb/writer/api/v1/knowledgebox.py,sha256=MLeIuym4jPrJgfy1NTcN9CpUGwuBiqDHMcx0hY9DR7g,9530
|
317
|
-
nucliadb/writer/api/v1/learning_config.py,sha256=
|
318
|
+
nucliadb/writer/api/v1/learning_config.py,sha256=CKBjqcbewkfPwGUPLDWzZSpro6XkmCaVppe5Qtpu5Go,3117
|
318
319
|
nucliadb/writer/api/v1/resource.py,sha256=A8fAHlN5XFsg6XFYKhfWJS8czgNH6yXr-PsnUqz2WUE,18757
|
319
320
|
nucliadb/writer/api/v1/router.py,sha256=RjuoWLpZer6Kl2BW_wznpNo6XL3BOpdTGqXZCn3QrrQ,1034
|
320
321
|
nucliadb/writer/api/v1/services.py,sha256=THnBnRxiHrEZPpBTL-E-vplEUfcD-fZpuslKRonM6xs,10286
|
@@ -336,8 +337,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
336
337
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
337
338
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
338
339
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
339
|
-
nucliadb-6.2.1.
|
340
|
-
nucliadb-6.2.1.
|
341
|
-
nucliadb-6.2.1.
|
342
|
-
nucliadb-6.2.1.
|
343
|
-
nucliadb-6.2.1.
|
340
|
+
nucliadb-6.2.1.post3382.dist-info/METADATA,sha256=44Lp7-8JChfxhjnEjkNJECK8F8rPbezf9KGEeH_UE78,4291
|
341
|
+
nucliadb-6.2.1.post3382.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
342
|
+
nucliadb-6.2.1.post3382.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
343
|
+
nucliadb-6.2.1.post3382.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
344
|
+
nucliadb-6.2.1.post3382.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|