nucliadb-models 6.2.1.post3377__py3-none-any.whl → 6.2.1.post3380__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,323 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+ from typing import Any, Generic, Literal, Optional, TypeVar, Union
22
+
23
+ import pydantic
24
+ from pydantic import BaseModel, Discriminator, Tag, model_validator
25
+ from typing_extensions import Annotated, Self
26
+
27
+ from .common import FieldTypeName, Paragraph
28
+ from .utils import DateTime
29
+
30
+ F = TypeVar("F")
31
+
32
+
33
+ class And(BaseModel, Generic[F], extra="forbid"):
34
+ """AND of other expressions"""
35
+
36
+ operands: list[F] = pydantic.Field(alias="and")
37
+
38
+
39
+ class Or(BaseModel, Generic[F], extra="forbid"):
40
+ """OR of other expressions"""
41
+
42
+ operands: list[F] = pydantic.Field(alias="or")
43
+
44
+
45
+ class Not(BaseModel, Generic[F], extra="forbid"):
46
+ """NOT another expression"""
47
+
48
+ operand: F = pydantic.Field(alias="not")
49
+
50
+
51
+ class Resource(BaseModel, extra="forbid"):
52
+ """Matches all fields of a resource given its id or slug"""
53
+
54
+ prop: Literal["resource"]
55
+ id: Optional[str] = pydantic.Field(default=None, description="ID of the resource to match")
56
+ slug: Optional[str] = pydantic.Field(default=None, description="Slug of the resource to match")
57
+
58
+ @model_validator(mode="after")
59
+ def single_field(self) -> Self:
60
+ if self.id is not None and self.slug is not None:
61
+ raise ValueError("Must set only one of `id` and `slug`")
62
+ if self.id is None and self.slug is None:
63
+ raise ValueError("Must set `id` or `slug`")
64
+ return self
65
+
66
+
67
+ class Field(BaseModel, extra="forbid"):
68
+ """Matches a field or set of fields"""
69
+
70
+ prop: Literal["field"]
71
+ type: FieldTypeName = pydantic.Field(description="Type of the field to match, ")
72
+ name: Optional[str] = pydantic.Field(
73
+ default=None,
74
+ description="Name of the field to match. If blank, matches all fields of the given type",
75
+ )
76
+
77
+
78
+ class Keyword(BaseModel, extra="forbid"):
79
+ """Matches all fields that contain a keyword"""
80
+
81
+ prop: Literal["keyword"]
82
+ word: str = pydantic.Field(description="Keyword to find")
83
+
84
+
85
+ class DateCreated(BaseModel, extra="forbid"):
86
+ """Matches all fields created in a date range"""
87
+
88
+ prop: Literal["created"]
89
+ since: Optional[DateTime] = pydantic.Field(
90
+ default=None, description="Start of the date range. Leave blank for unbounded"
91
+ )
92
+ until: Optional[DateTime] = pydantic.Field(
93
+ default=None, description="End of the date range. Leave blank for unbounded"
94
+ )
95
+
96
+ @model_validator(mode="after")
97
+ def some_set(self) -> Self:
98
+ if self.since is None and self.until is None:
99
+ raise ValueError("Must set `since` or `until` (or both)")
100
+ return self
101
+
102
+
103
+ class DateModified(BaseModel, extra="forbid"):
104
+ """Matches all fields modified in a date range"""
105
+
106
+ prop: Literal["modified"]
107
+ since: Optional[DateTime] = pydantic.Field(
108
+ default=None, description="Start of the date range. Leave blank for unbounded"
109
+ )
110
+ until: Optional[DateTime] = pydantic.Field(
111
+ default=None, description="End of the date range. Leave blank for unbounded"
112
+ )
113
+
114
+ @model_validator(mode="after")
115
+ def some_set(self) -> Self:
116
+ if self.since is None and self.until is None:
117
+ raise ValueError("Must set `since` or `until` (or both)")
118
+ return self
119
+
120
+
121
+ class OriginTag(BaseModel, extra="forbid"):
122
+ """Matches all fields with a given origin tag"""
123
+
124
+ prop: Literal["origin_tag"]
125
+ tag: str = pydantic.Field(description="The tag to match")
126
+
127
+
128
+ class Label(BaseModel, extra="forbid"):
129
+ """Matches fields/paragraphs with a label (or labelset)"""
130
+
131
+ prop: Literal["label"]
132
+ labelset: str = pydantic.Field(description="The labelset to match")
133
+ label: Optional[str] = pydantic.Field(
134
+ default=None,
135
+ description="The label to match. If blank, matches all labels in the given labelset",
136
+ )
137
+
138
+
139
+ class ResourceMimetype(BaseModel, extra="forbid"):
140
+ """Matches resources with a mimetype.
141
+
142
+ The mimetype of a resource can be assigned independently of the mimetype of its fields.
143
+ In resources with multiple fields, you may prefer to use `field_mimetype`"""
144
+
145
+ prop: Literal["resource_mimetype"]
146
+ type: str = pydantic.Field(
147
+ description="Type of the mimetype to match. e.g: In image/jpeg, type is image"
148
+ )
149
+ subtype: Optional[str] = pydantic.Field(
150
+ default=None,
151
+ description=(
152
+ "Type of the mimetype to match. e.g: In image/jpeg, subtype is jpeg."
153
+ "Leave blank to match all mimetype of the type"
154
+ ),
155
+ )
156
+
157
+
158
+ class FieldMimetype(BaseModel, extra="forbid"):
159
+ """Matches fields with a mimetype"""
160
+
161
+ prop: Literal["field_mimetype"]
162
+ type: str = pydantic.Field(
163
+ description="Type of the mimetype to match. e.g: In image/jpeg, type is image"
164
+ )
165
+ subtype: Optional[str] = pydantic.Field(
166
+ default=None,
167
+ description=(
168
+ "Type of the mimetype to match. e.g: In image/jpeg, subtype is jpeg."
169
+ "Leave blank to match all mimetype of the type"
170
+ ),
171
+ )
172
+
173
+
174
+ class Entity(BaseModel, extra="forbid"):
175
+ """Matches fields that contains a detected entity"""
176
+
177
+ prop: Literal["entity"]
178
+ subtype: str = pydantic.Field(description="Type of the entity. e.g: PERSON")
179
+ value: Optional[str] = pydantic.Field(
180
+ default=None,
181
+ description="Value of the entity. e.g: Anna. If blank, matches any entity of the given type",
182
+ )
183
+
184
+
185
+ class Language(BaseModel, extra="forbid"):
186
+ """Matches the language of the field"""
187
+
188
+ prop: Literal["language"]
189
+ only_primary: bool = pydantic.Field(
190
+ default=False,
191
+ description="Match only the primary language of the document. By default, matches any language that appears in the document",
192
+ )
193
+ language: str = pydantic.Field(description="The code of the language to match, e.g: en")
194
+
195
+
196
+ class OriginMetadata(BaseModel, extra="forbid"):
197
+ """Matches metadata from the origin"""
198
+
199
+ prop: Literal["origin_metadata"]
200
+ field: str = pydantic.Field(description="Metadata field")
201
+ value: Optional[str] = pydantic.Field(
202
+ default=None,
203
+ description="Value of the metadata field. If blank, matches any document with the given metadata field set (to any value)",
204
+ )
205
+
206
+
207
+ class OriginPath(BaseModel, extra="forbid"):
208
+ """Matches the origin path"""
209
+
210
+ prop: Literal["origin_path"]
211
+ prefix: str = pydantic.Field(
212
+ description=(
213
+ "Prefix of the path, matches all paths under this prefix"
214
+ "e.g: `prefix=/dir/` matches `/dir` and `/dir/a/b` but not `/dirrrr`"
215
+ )
216
+ )
217
+
218
+
219
+ class Generated(BaseModel, extra="forbid"):
220
+ """Matches if the field was generated by the given source"""
221
+
222
+ prop: Literal["generated"]
223
+ by: Literal["data-augmentation"] = pydantic.Field(
224
+ description="Generator for this field. Currently, only data-augmentation is supported"
225
+ )
226
+ da_task: Optional["str"] = pydantic.Field(
227
+ default=None, description="Matches field generated by an specific DA task, given its prefix"
228
+ )
229
+
230
+
231
+ class Kind(BaseModel, extra="forbid"):
232
+ """Matches paragraphs of a certain kind"""
233
+
234
+ prop: Literal["kind"]
235
+ kind: Paragraph.TypeParagraph = pydantic.Field(description="The kind of paragraph to match")
236
+
237
+
238
+ # The discriminator function is optional, everything works without it.
239
+ # We implement it because it makes pydantic produce more user-friendly errors
240
+ def filter_discriminator(v: Any) -> Optional[str]:
241
+ if isinstance(v, dict):
242
+ if "and" in v:
243
+ return "and"
244
+ elif "or" in v:
245
+ return "or"
246
+ elif "not" in v:
247
+ return "not"
248
+ else:
249
+ return v.get("prop")
250
+
251
+ if isinstance(v, And):
252
+ return "and"
253
+ elif isinstance(v, Or):
254
+ return "or"
255
+ elif isinstance(v, Not):
256
+ return "not"
257
+ else:
258
+ return getattr(v, "prop", None)
259
+
260
+
261
+ FieldFilterExpression = Annotated[
262
+ Union[
263
+ Annotated[And["FieldFilterExpression"], Tag("and")],
264
+ Annotated[Or["FieldFilterExpression"], Tag("or")],
265
+ Annotated[Not["FieldFilterExpression"], Tag("not")],
266
+ Annotated[Resource, Tag("resource")],
267
+ Annotated[Field, Tag("field")],
268
+ Annotated[Keyword, Tag("keyword")],
269
+ Annotated[DateCreated, Tag("created")],
270
+ Annotated[DateModified, Tag("modified")],
271
+ Annotated[OriginTag, Tag("origin_tag")],
272
+ Annotated[Label, Tag("label")],
273
+ Annotated[ResourceMimetype, Tag("resource_mimetype")],
274
+ Annotated[FieldMimetype, Tag("field_mimetype")],
275
+ Annotated[Entity, Tag("entity")],
276
+ Annotated[Language, Tag("language")],
277
+ Annotated[OriginMetadata, Tag("origin_metadata")],
278
+ Annotated[OriginPath, Tag("origin_path")],
279
+ Annotated[Generated, Tag("generated")],
280
+ ],
281
+ Discriminator(filter_discriminator),
282
+ ]
283
+
284
+ ParagraphFilterExpression = Annotated[
285
+ Union[
286
+ Annotated[And["ParagraphFilterExpression"], Tag("and")],
287
+ Annotated[Or["ParagraphFilterExpression"], Tag("or")],
288
+ Annotated[Not["ParagraphFilterExpression"], Tag("not")],
289
+ Annotated[Label, Tag("label")],
290
+ Annotated[Kind, Tag("kind")],
291
+ ],
292
+ Discriminator(filter_discriminator),
293
+ ]
294
+
295
+
296
+ class FilterExpression(BaseModel, extra="forbid"):
297
+ """Returns only documents that match this filter expression.
298
+ Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search/#filters
299
+
300
+ This allows building complex filtering expressions and replaces the following parameters:
301
+ `fields`, `filters`, `range_*`, `resource_filters`, `keyword_filters`.
302
+ """
303
+
304
+ # class Operator(str, Enum):
305
+ # AND = "and"
306
+ # OR = "or"
307
+
308
+ field: Optional[FieldFilterExpression] = pydantic.Field(
309
+ default=None, description="Filter to apply to fields"
310
+ )
311
+ paragraph: Optional[ParagraphFilterExpression] = pydantic.Field(
312
+ default=None, description="Filter to apply to each text block"
313
+ )
314
+
315
+ # TODO: Not exposed until implemented in nidx
316
+ # operator: Operator = pydantic.Field(
317
+ # default=Operator.AND,
318
+ # description=(
319
+ # "How to combine field and paragraph filters (default is AND)."
320
+ # "AND returns text blocks that match both filters."
321
+ # "OR returns text_blocks that match one of the two filters"
322
+ # ),
323
+ # )
nucliadb_models/search.py CHANGED
@@ -45,7 +45,7 @@ from nucliadb_models.internal.shards import ( # noqa isort: skip
45
45
  ShardReplica,
46
46
  KnowledgeboxShards,
47
47
  )
48
-
48
+ from nucliadb_models.filter import FilterExpression
49
49
 
50
50
  ANSWER_JSON_SCHEMA_EXAMPLE = {
51
51
  "name": "structred_response",
@@ -644,6 +644,16 @@ class SearchParamDefaults:
644
644
  title="Filter resources by hidden",
645
645
  description="Set to filter only hidden or only non-hidden resources. Default is to return everything",
646
646
  )
647
+ filter_expression = ParamDefault(
648
+ default=None,
649
+ title="Filter resource by an expression",
650
+ description=(
651
+ "Returns only documents that match this filter expression."
652
+ "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search/#filters"
653
+ "This allows building complex filtering expressions and replaces the following parameters:"
654
+ "`fields`, `filters`, `range_*`, `resource_filters`, `keyword_filters`."
655
+ ),
656
+ )
647
657
 
648
658
 
649
659
  class Filter(BaseModel):
@@ -737,6 +747,9 @@ class AuditMetadataBase(BaseModel):
737
747
 
738
748
  class BaseSearchRequest(AuditMetadataBase):
739
749
  query: str = SearchParamDefaults.query.to_pydantic_field()
750
+ filter_expression: SkipJsonSchema[Optional[FilterExpression]] = (
751
+ SearchParamDefaults.filter_expression.to_pydantic_field()
752
+ )
740
753
  fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
741
754
  filters: Union[list[str], list[Filter]] = Field(
742
755
  default=[],
@@ -1370,6 +1383,9 @@ class AskRequest(AuditMetadataBase):
1370
1383
  le=200,
1371
1384
  description="The top most relevant results to fetch at the retrieval step. The maximum number of results allowed is 200.",
1372
1385
  )
1386
+ filter_expression: SkipJsonSchema[Optional[FilterExpression]] = (
1387
+ SearchParamDefaults.filter_expression.to_pydantic_field()
1388
+ )
1373
1389
  fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
1374
1390
  filters: Union[list[str], list[Filter]] = Field(
1375
1391
  default=[],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nucliadb_models
3
- Version: 6.2.1.post3377
3
+ Version: 6.2.1.post3380
4
4
  Author-email: Nuclia <nucliadb@nuclia.com>
5
5
  License: AGPL
6
6
  Project-URL: Homepage, https://nuclia.com
@@ -8,6 +8,7 @@ nucliadb_models/export_import.py,sha256=A1KTjwQCRtyVAWcgabXsdltI78rauXBmZX1ie6Rx
8
8
  nucliadb_models/external_index_providers.py,sha256=aVyj-P4kVqfqPjF13E_lUM0FZsq8-DTbIsh-kHOgt2s,1787
9
9
  nucliadb_models/extracted.py,sha256=wnTjMsSPk1iZFtn4eFrBC9fSOZkNTeHQ_B9CRyaL0cA,6444
10
10
  nucliadb_models/file.py,sha256=4pDfQtXaBNB-ExeXC7NIdt33RbJp_u53_x8ACVkHXCM,2174
11
+ nucliadb_models/filter.py,sha256=c6lgFaN8SATosD9d-fuuxsDrloVcZpGq23i8w656YaA,11034
11
12
  nucliadb_models/labels.py,sha256=OUlX-apmFkibEN9bWThRJlbCD84hzJdddN1YYUV2Y3w,4201
12
13
  nucliadb_models/link.py,sha256=NRfsjLQpjZXndkb5o8qnSVPqb2knqk2kk5_iQB4AkaY,2785
13
14
  nucliadb_models/metadata.py,sha256=fiIJfht0Eg5a65ud2FdmHzElZ8VGdrDQ-F65-VJI4IE,8151
@@ -15,7 +16,7 @@ nucliadb_models/notifications.py,sha256=jr2J3zncs880jYf2oZHYt0VFcnlZevsbkyX69ovT
15
16
  nucliadb_models/processing.py,sha256=UeU-VxbBlOzkNxviOS3a0X_k7Ye-jYu3UOdGuu21M8M,971
16
17
  nucliadb_models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
18
  nucliadb_models/resource.py,sha256=cjYloaRuCJFc3lGIxLZcX959oOq_N1f3V9bpPMYv4WA,9255
18
- nucliadb_models/search.py,sha256=1aehKd2EYlKOpz8c0CG5eaaAcDmRS9zaEAmFkWvDhE0,80139
19
+ nucliadb_models/search.py,sha256=ErfXrZsQe4b2iKrOE5dEzspj_1omHSeL2EpeiAEZEJc,80994
19
20
  nucliadb_models/security.py,sha256=RewdzQ55nPZ9V7B0NX9KHeWg6B4Hg_RkeiFv2TQyrjs,1402
20
21
  nucliadb_models/synonyms.py,sha256=qXTPHfspMgw22hCjAOdFOIoUsRZ7Ju3JW-Lw9Nz4VaI,942
21
22
  nucliadb_models/text.py,sha256=RHN55PzQjyC0ghbf0r5GvVjTbFUTWzEDSCCkHkgnfig,3491
@@ -28,7 +29,7 @@ nucliadb_models/agents/ingestion.py,sha256=mV7gV6VpYg4VNpc59K3275TMUJZbUzeUnp3SZ
28
29
  nucliadb_models/internal/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
29
30
  nucliadb_models/internal/predict.py,sha256=5rgUPrH_98gerySOZ-TR2PX_qzCGF1_8VxyOu3bGhis,2281
30
31
  nucliadb_models/internal/shards.py,sha256=uZLsMkYWrJDHq3xy_w7snSeV2X3aDBuht9GC_MG3sKc,1976
31
- nucliadb_models-6.2.1.post3377.dist-info/METADATA,sha256=EluUoQFpS0Qo912t_F7G0EtWuRYg2xPUaSjai5H0rcA,759
32
- nucliadb_models-6.2.1.post3377.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
33
- nucliadb_models-6.2.1.post3377.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
34
- nucliadb_models-6.2.1.post3377.dist-info/RECORD,,
32
+ nucliadb_models-6.2.1.post3380.dist-info/METADATA,sha256=mBu75c2jiNf0VjnpB2yOdNXEs8OozC1T7kIA8W5Bduk,759
33
+ nucliadb_models-6.2.1.post3380.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
34
+ nucliadb_models-6.2.1.post3380.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
35
+ nucliadb_models-6.2.1.post3380.dist-info/RECORD,,