PyPI - nucliadb-models - Versions diffs - 6.2.1.post3377__py3-none-any.whl → 6.2.1.post3380__py3-none-any.whl - Mend

nucliadb-models 6.2.1.post3377py3-none-any.whl → 6.2.1.post3380py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

nucliadb_models/filter.py ADDED Viewed

@@ -0,0 +1,323 @@
+# Copyright (C) 2021 Bosutech XXI S.L.
+#
+# nucliadb is offered under the AGPL v3.0 and as commercial software.
+# For commercial licensing, contact us at info@nuclia.com.
+#
+# AGPL:
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+from typing import Any, Generic, Literal, Optional, TypeVar, Union
+import pydantic
+from pydantic import BaseModel, Discriminator, Tag, model_validator
+from typing_extensions import Annotated, Self
+from .common import FieldTypeName, Paragraph
+from .utils import DateTime
+F = TypeVar("F")
+class And(BaseModel, Generic[F], extra="forbid"):
+    """AND of other expressions"""
+    operands: list[F] = pydantic.Field(alias="and")
+class Or(BaseModel, Generic[F], extra="forbid"):
+    """OR of other expressions"""
+    operands: list[F] = pydantic.Field(alias="or")
+class Not(BaseModel, Generic[F], extra="forbid"):
+    """NOT another expression"""
+    operand: F = pydantic.Field(alias="not")
+class Resource(BaseModel, extra="forbid"):
+    """Matches all fields of a resource given its id or slug"""
+    prop: Literal["resource"]
+    id: Optional[str] = pydantic.Field(default=None, description="ID of the resource to match")
+    slug: Optional[str] = pydantic.Field(default=None, description="Slug of the resource to match")
+    @model_validator(mode="after")
+    def single_field(self) -> Self:
+        if self.id is not None and self.slug is not None:
+            raise ValueError("Must set only one of `id` and `slug`")
+        if self.id is None and self.slug is None:
+            raise ValueError("Must set `id` or `slug`")
+        return self
+class Field(BaseModel, extra="forbid"):
+    """Matches a field or set of fields"""
+    prop: Literal["field"]
+    type: FieldTypeName = pydantic.Field(description="Type of the field to match, ")
+    name: Optional[str] = pydantic.Field(
+        default=None,
+        description="Name of the field to match. If blank, matches all fields of the given type",
+    )
+class Keyword(BaseModel, extra="forbid"):
+    """Matches all fields that contain a keyword"""
+    prop: Literal["keyword"]
+    word: str = pydantic.Field(description="Keyword to find")
+class DateCreated(BaseModel, extra="forbid"):
+    """Matches all fields created in a date range"""
+    prop: Literal["created"]
+    since: Optional[DateTime] = pydantic.Field(
+        default=None, description="Start of the date range. Leave blank for unbounded"
+    )
+    until: Optional[DateTime] = pydantic.Field(
+        default=None, description="End of the date range. Leave blank for unbounded"
+    )
+    @model_validator(mode="after")
+    def some_set(self) -> Self:
+        if self.since is None and self.until is None:
+            raise ValueError("Must set `since` or `until` (or both)")
+        return self
+class DateModified(BaseModel, extra="forbid"):
+    """Matches all fields modified in a date range"""
+    prop: Literal["modified"]
+    since: Optional[DateTime] = pydantic.Field(
+        default=None, description="Start of the date range. Leave blank for unbounded"
+    )
+    until: Optional[DateTime] = pydantic.Field(
+        default=None, description="End of the date range. Leave blank for unbounded"
+    )
+    @model_validator(mode="after")
+    def some_set(self) -> Self:
+        if self.since is None and self.until is None:
+            raise ValueError("Must set `since` or `until` (or both)")
+        return self
+class OriginTag(BaseModel, extra="forbid"):
+    """Matches all fields with a given origin tag"""
+    prop: Literal["origin_tag"]
+    tag: str = pydantic.Field(description="The tag to match")
+class Label(BaseModel, extra="forbid"):
+    """Matches fields/paragraphs with a label (or labelset)"""
+    prop: Literal["label"]
+    labelset: str = pydantic.Field(description="The labelset to match")
+    label: Optional[str] = pydantic.Field(
+        default=None,
+        description="The label to match. If blank, matches all labels in the given labelset",
+    )
+class ResourceMimetype(BaseModel, extra="forbid"):
+    """Matches resources with a mimetype.
+    The mimetype of a resource can be assigned independently of the mimetype of its fields.
+    In resources with multiple fields, you may prefer to use `field_mimetype`"""
+    prop: Literal["resource_mimetype"]
+    type: str = pydantic.Field(
+        description="Type of the mimetype to match. e.g: In image/jpeg, type is image"
+    )
+    subtype: Optional[str] = pydantic.Field(
+        default=None,
+        description=(
+            "Type of the mimetype to match. e.g: In image/jpeg, subtype is jpeg."
+            "Leave blank to match all mimetype of the type"
+        ),
+    )
+class FieldMimetype(BaseModel, extra="forbid"):
+    """Matches fields with a mimetype"""
+    prop: Literal["field_mimetype"]
+    type: str = pydantic.Field(
+        description="Type of the mimetype to match. e.g: In image/jpeg, type is image"
+    )
+    subtype: Optional[str] = pydantic.Field(
+        default=None,
+        description=(
+            "Type of the mimetype to match. e.g: In image/jpeg, subtype is jpeg."
+            "Leave blank to match all mimetype of the type"
+        ),
+    )
+class Entity(BaseModel, extra="forbid"):
+    """Matches fields that contains a detected entity"""
+    prop: Literal["entity"]
+    subtype: str = pydantic.Field(description="Type of the entity. e.g: PERSON")
+    value: Optional[str] = pydantic.Field(
+        default=None,
+        description="Value of the entity. e.g: Anna. If blank, matches any entity of the given type",
+    )
+class Language(BaseModel, extra="forbid"):
+    """Matches the language of the field"""
+    prop: Literal["language"]
+    only_primary: bool = pydantic.Field(
+        default=False,
+        description="Match only the primary language of the document. By default, matches any language that appears in the document",
+    )
+    language: str = pydantic.Field(description="The code of the language to match, e.g: en")
+class OriginMetadata(BaseModel, extra="forbid"):
+    """Matches metadata from the origin"""
+    prop: Literal["origin_metadata"]
+    field: str = pydantic.Field(description="Metadata field")
+    value: Optional[str] = pydantic.Field(
+        default=None,
+        description="Value of the metadata field. If blank, matches any document with the given metadata field set (to any value)",
+    )
+class OriginPath(BaseModel, extra="forbid"):
+    """Matches the origin path"""
+    prop: Literal["origin_path"]
+    prefix: str = pydantic.Field(
+        description=(
+            "Prefix of the path, matches all paths under this prefix"
+            "e.g: `prefix=/dir/` matches `/dir` and `/dir/a/b` but not `/dirrrr`"
+        )
+    )
+class Generated(BaseModel, extra="forbid"):
+    """Matches if the field was generated by the given source"""
+    prop: Literal["generated"]
+    by: Literal["data-augmentation"] = pydantic.Field(
+        description="Generator for this field. Currently, only data-augmentation is supported"
+    )
+    da_task: Optional["str"] = pydantic.Field(
+        default=None, description="Matches field generated by an specific DA task, given its prefix"
+    )
+class Kind(BaseModel, extra="forbid"):
+    """Matches paragraphs of a certain kind"""
+    prop: Literal["kind"]
+    kind: Paragraph.TypeParagraph = pydantic.Field(description="The kind of paragraph to match")
+# The discriminator function is optional, everything works without it.
+# We implement it because it makes pydantic produce more user-friendly errors
+def filter_discriminator(v: Any) -> Optional[str]:
+    if isinstance(v, dict):
+        if "and" in v:
+            return "and"
+        elif "or" in v:
+            return "or"
+        elif "not" in v:
+            return "not"
+        else:
+            return v.get("prop")
+    if isinstance(v, And):
+        return "and"
+    elif isinstance(v, Or):
+        return "or"
+    elif isinstance(v, Not):
+        return "not"
+    else:
+        return getattr(v, "prop", None)
+FieldFilterExpression = Annotated[
+    Union[
+        Annotated[And["FieldFilterExpression"], Tag("and")],
+        Annotated[Or["FieldFilterExpression"], Tag("or")],
+        Annotated[Not["FieldFilterExpression"], Tag("not")],
+        Annotated[Resource, Tag("resource")],
+        Annotated[Field, Tag("field")],
+        Annotated[Keyword, Tag("keyword")],
+        Annotated[DateCreated, Tag("created")],
+        Annotated[DateModified, Tag("modified")],
+        Annotated[OriginTag, Tag("origin_tag")],
+        Annotated[Label, Tag("label")],
+        Annotated[ResourceMimetype, Tag("resource_mimetype")],
+        Annotated[FieldMimetype, Tag("field_mimetype")],
+        Annotated[Entity, Tag("entity")],
+        Annotated[Language, Tag("language")],
+        Annotated[OriginMetadata, Tag("origin_metadata")],
+        Annotated[OriginPath, Tag("origin_path")],
+        Annotated[Generated, Tag("generated")],
+    ],
+    Discriminator(filter_discriminator),
+]
+ParagraphFilterExpression = Annotated[
+    Union[
+        Annotated[And["ParagraphFilterExpression"], Tag("and")],
+        Annotated[Or["ParagraphFilterExpression"], Tag("or")],
+        Annotated[Not["ParagraphFilterExpression"], Tag("not")],
+        Annotated[Label, Tag("label")],
+        Annotated[Kind, Tag("kind")],
+    ],
+    Discriminator(filter_discriminator),
+]
+class FilterExpression(BaseModel, extra="forbid"):
+    """Returns only documents that match this filter expression.
+    Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search/#filters
+    This allows building complex filtering expressions and replaces the following parameters:
+    `fields`, `filters`, `range_*`, `resource_filters`, `keyword_filters`.
+    """
+    # class Operator(str, Enum):
+    #     AND = "and"
+    #     OR = "or"
+    field: Optional[FieldFilterExpression] = pydantic.Field(
+        default=None, description="Filter to apply to fields"
+    )
+    paragraph: Optional[ParagraphFilterExpression] = pydantic.Field(
+        default=None, description="Filter to apply to each text block"
+    )
+    # TODO: Not exposed until implemented in nidx
+    # operator: Operator = pydantic.Field(
+    #     default=Operator.AND,
+    #     description=(
+    #         "How to combine field and paragraph filters (default is AND)."
+    #         "AND returns text blocks that match both filters."
+    #         "OR returns text_blocks that match one of the two filters"
+    #     ),
+    # )

nucliadb_models/search.py CHANGED Viewed

@@ -45,7 +45,7 @@ from nucliadb_models.internal.shards import (  # noqa isort: skip
     ShardReplica,
     KnowledgeboxShards,
 )
+from nucliadb_models.filter import FilterExpression
 ANSWER_JSON_SCHEMA_EXAMPLE = {
     "name": "structred_response",
@@ -644,6 +644,16 @@ class SearchParamDefaults:
         title="Filter resources by hidden",
         description="Set to filter only hidden or only non-hidden resources. Default is to return everything",
     )
+    filter_expression = ParamDefault(
+        default=None,
+        title="Filter resource by an expression",
+        description=(
+            "Returns only documents that match this filter expression."
+            "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search/#filters"
+            "This allows building complex filtering expressions and replaces the following parameters:"
+            "`fields`, `filters`, `range_*`, `resource_filters`, `keyword_filters`."
+        ),
+    )
 class Filter(BaseModel):
@@ -737,6 +747,9 @@ class AuditMetadataBase(BaseModel):
 class BaseSearchRequest(AuditMetadataBase):
     query: str = SearchParamDefaults.query.to_pydantic_field()
+    filter_expression: SkipJsonSchema[Optional[FilterExpression]] = (
+        SearchParamDefaults.filter_expression.to_pydantic_field()
+    )
     fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
     filters: Union[list[str], list[Filter]] = Field(
         default=[],
@@ -1370,6 +1383,9 @@ class AskRequest(AuditMetadataBase):
         le=200,
         description="The top most relevant results to fetch at the retrieval step. The maximum number of results allowed is 200.",
     )
+    filter_expression: SkipJsonSchema[Optional[FilterExpression]] = (
+        SearchParamDefaults.filter_expression.to_pydantic_field()
+    )
     fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
     filters: Union[list[str], list[Filter]] = Field(
         default=[],

{nucliadb_models-6.2.1.post3377.dist-info → nucliadb_models-6.2.1.post3380.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: nucliadb_models
-Version: 6.2.1.post3377
+Version: 6.2.1.post3380
 Author-email: Nuclia <nucliadb@nuclia.com>
 License: AGPL
 Project-URL: Homepage, https://nuclia.com

{nucliadb_models-6.2.1.post3377.dist-info → nucliadb_models-6.2.1.post3380.dist-info}/RECORD RENAMED Viewed

@@ -8,6 +8,7 @@ nucliadb_models/export_import.py,sha256=A1KTjwQCRtyVAWcgabXsdltI78rauXBmZX1ie6Rx
 nucliadb_models/external_index_providers.py,sha256=aVyj-P4kVqfqPjF13E_lUM0FZsq8-DTbIsh-kHOgt2s,1787
 nucliadb_models/extracted.py,sha256=wnTjMsSPk1iZFtn4eFrBC9fSOZkNTeHQ_B9CRyaL0cA,6444
 nucliadb_models/file.py,sha256=4pDfQtXaBNB-ExeXC7NIdt33RbJp_u53_x8ACVkHXCM,2174
+nucliadb_models/filter.py,sha256=c6lgFaN8SATosD9d-fuuxsDrloVcZpGq23i8w656YaA,11034
 nucliadb_models/labels.py,sha256=OUlX-apmFkibEN9bWThRJlbCD84hzJdddN1YYUV2Y3w,4201
 nucliadb_models/link.py,sha256=NRfsjLQpjZXndkb5o8qnSVPqb2knqk2kk5_iQB4AkaY,2785
 nucliadb_models/metadata.py,sha256=fiIJfht0Eg5a65ud2FdmHzElZ8VGdrDQ-F65-VJI4IE,8151
@@ -15,7 +16,7 @@ nucliadb_models/notifications.py,sha256=jr2J3zncs880jYf2oZHYt0VFcnlZevsbkyX69ovT
 nucliadb_models/processing.py,sha256=UeU-VxbBlOzkNxviOS3a0X_k7Ye-jYu3UOdGuu21M8M,971
 nucliadb_models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nucliadb_models/resource.py,sha256=cjYloaRuCJFc3lGIxLZcX959oOq_N1f3V9bpPMYv4WA,9255
-nucliadb_models/search.py,sha256=1aehKd2EYlKOpz8c0CG5eaaAcDmRS9zaEAmFkWvDhE0,80139
+nucliadb_models/search.py,sha256=ErfXrZsQe4b2iKrOE5dEzspj_1omHSeL2EpeiAEZEJc,80994
 nucliadb_models/security.py,sha256=RewdzQ55nPZ9V7B0NX9KHeWg6B4Hg_RkeiFv2TQyrjs,1402
 nucliadb_models/synonyms.py,sha256=qXTPHfspMgw22hCjAOdFOIoUsRZ7Ju3JW-Lw9Nz4VaI,942
 nucliadb_models/text.py,sha256=RHN55PzQjyC0ghbf0r5GvVjTbFUTWzEDSCCkHkgnfig,3491
@@ -28,7 +29,7 @@ nucliadb_models/agents/ingestion.py,sha256=mV7gV6VpYg4VNpc59K3275TMUJZbUzeUnp3SZ
 nucliadb_models/internal/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
 nucliadb_models/internal/predict.py,sha256=5rgUPrH_98gerySOZ-TR2PX_qzCGF1_8VxyOu3bGhis,2281
 nucliadb_models/internal/shards.py,sha256=uZLsMkYWrJDHq3xy_w7snSeV2X3aDBuht9GC_MG3sKc,1976
-nucliadb_models-6.2.1.post3377.dist-info/METADATA,sha256=EluUoQFpS0Qo912t_F7G0EtWuRYg2xPUaSjai5H0rcA,759
-nucliadb_models-6.2.1.post3377.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-nucliadb_models-6.2.1.post3377.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
-nucliadb_models-6.2.1.post3377.dist-info/RECORD,,
+nucliadb_models-6.2.1.post3380.dist-info/METADATA,sha256=mBu75c2jiNf0VjnpB2yOdNXEs8OozC1T7kIA8W5Bduk,759
+nucliadb_models-6.2.1.post3380.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+nucliadb_models-6.2.1.post3380.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
+nucliadb_models-6.2.1.post3380.dist-info/RECORD,,

{nucliadb_models-6.2.1.post3377.dist-info → nucliadb_models-6.2.1.post3380.dist-info}/WHEEL RENAMED Viewed

File without changes

{nucliadb_models-6.2.1.post3377.dist-info → nucliadb_models-6.2.1.post3380.dist-info}/top_level.txt RENAMED Viewed

File without changes

nucliadb-models 6.2.1.post3377__py3-none-any.whl → 6.2.1.post3380__py3-none-any.whl

nucliadb-models 6.2.1.post3377py3-none-any.whl → 6.2.1.post3380py3-none-any.whl