PyPI - nucliadb-models - Versions diffs - 6.9.7.post5583__py3-none-any.whl → 6.11.1.post5822__py3-none-any.whl - Mend

nucliadb-models 6.9.7.post5583py3-none-any.whl → 6.11.1.post5822py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

nucliadb_models/agents/ingestion.py +4 -4
nucliadb_models/augment.py +100 -84
nucliadb_models/common.py +56 -56
nucliadb_models/configuration.py +8 -8
nucliadb_models/content_types.py +13 -11
nucliadb_models/conversation.py +25 -26
nucliadb_models/entities.py +17 -18
nucliadb_models/external_index_providers.py +1 -2
nucliadb_models/extracted.py +82 -83
nucliadb_models/file.py +10 -11
nucliadb_models/filters.py +78 -74
nucliadb_models/graph/requests.py +40 -48
nucliadb_models/graph/responses.py +13 -1
nucliadb_models/hydration.py +48 -50
nucliadb_models/internal/predict.py +7 -9
nucliadb_models/internal/shards.py +2 -3
nucliadb_models/labels.py +18 -11
nucliadb_models/link.py +18 -19
nucliadb_models/metadata.py +66 -54
nucliadb_models/notifications.py +3 -3
nucliadb_models/processing.py +1 -2
nucliadb_models/resource.py +85 -93
nucliadb_models/retrieval.py +147 -0
nucliadb_models/search.py +263 -275
nucliadb_models/security.py +2 -3
nucliadb_models/text.py +7 -8
nucliadb_models/trainset.py +1 -2
nucliadb_models/utils.py +2 -3
nucliadb_models/vectors.py +2 -5
nucliadb_models/writer.py +56 -57
{nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/METADATA +1 -1
nucliadb_models-6.11.1.post5822.dist-info/RECORD +41 -0
{nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/WHEEL +1 -1
nucliadb_models-6.9.7.post5583.dist-info/RECORD +0 -40
{nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/top_level.txt +0 -0

nucliadb_models/filters.py CHANGED Viewed

@@ -15,12 +15,13 @@
 from collections.abc import Sequence
 from enum import Enum
-from typing import Any, Generic, Literal, Optional, TypeVar, Union
+from typing import Annotated, Any, Generic, Literal, TypeVar
 from uuid import UUID
 import pydantic
 from pydantic import AliasChoices, BaseModel, Discriminator, Tag, field_validator, model_validator
-from typing_extensions import Annotated, Self
+from pydantic.config import ConfigDict
+from typing_extensions import Self
 from .common import FieldTypeName, Paragraph
 from .metadata import ResourceProcessingStatus
@@ -33,7 +34,10 @@ class And(BaseModel, Generic[F], extra="forbid"):
     """AND of other expressions"""
     operands: Sequence[F] = pydantic.Field(
-        serialization_alias="and", validation_alias=AliasChoices("operands", "and"), min_length=1
+        title="And Operands",
+        serialization_alias="and",
+        validation_alias=AliasChoices("operands", "and"),
+        min_length=1,
     )
     @pydantic.model_serializer
@@ -45,7 +49,10 @@ class Or(BaseModel, Generic[F], extra="forbid"):
     """OR of other expressions"""
     operands: Sequence[F] = pydantic.Field(
-        serialization_alias="or", validation_alias=AliasChoices("operands", "or"), min_length=1
+        title="Or Operands",
+        serialization_alias="or",
+        validation_alias=AliasChoices("operands", "or"),
+        min_length=1,
     )
     @pydantic.model_serializer
@@ -57,7 +64,7 @@ class Not(BaseModel, Generic[F], extra="forbid"):
     """NOT another expression"""
     operand: F = pydantic.Field(
-        serialization_alias="not", validation_alias=AliasChoices("operand", "not")
+        title="Not Operand", serialization_alias="not", validation_alias=AliasChoices("operand", "not")
     )
     @pydantic.model_serializer
@@ -78,11 +85,11 @@ class FilterProp(BaseModel):
 class Resource(FilterProp, extra="forbid"):
     """Matches all fields of a resource given its id or slug"""
+    model_config = ConfigDict(title="Resource Filter")
     prop: Literal["resource"] = "resource"
-    id: Optional[str] = pydantic.Field(default=None, description="UUID of the resource to match")
-    slug: Optional[SlugString] = pydantic.Field(
-        default=None, description="Slug of the resource to match"
-    )
+    id: str | None = pydantic.Field(default=None, description="UUID of the resource to match")
+    slug: SlugString | None = pydantic.Field(default=None, description="Slug of the resource to match")
     @field_validator("id", mode="after")
     def validate_id(cls, v: str) -> str:
@@ -107,8 +114,9 @@ class Field(FilterProp, extra="forbid"):
     prop: Literal["field"] = "field"
     type: FieldTypeName = pydantic.Field(description="Type of the field to match, ")
-    name: Optional[str] = pydantic.Field(
+    name: str | None = pydantic.Field(
         default=None,
+        title="Field Filter",
         description="Name of the field to match. If blank, matches all fields of the given type",
     )
@@ -124,10 +132,10 @@ class DateCreated(FilterProp, extra="forbid"):
     """Matches all fields created in a date range"""
     prop: Literal["created"] = "created"
-    since: Optional[DateTime] = pydantic.Field(
+    since: DateTime | None = pydantic.Field(
         default=None, description="Start of the date range. Leave blank for unbounded"
     )
-    until: Optional[DateTime] = pydantic.Field(
+    until: DateTime | None = pydantic.Field(
         default=None, description="End of the date range. Leave blank for unbounded"
     )
@@ -142,10 +150,10 @@ class DateModified(FilterProp, extra="forbid"):
     """Matches all fields modified in a date range"""
     prop: Literal["modified"] = "modified"
-    since: Optional[DateTime] = pydantic.Field(
+    since: DateTime | None = pydantic.Field(
         default=None, description="Start of the date range. Leave blank for unbounded"
     )
-    until: Optional[DateTime] = pydantic.Field(
+    until: DateTime | None = pydantic.Field(
         default=None, description="End of the date range. Leave blank for unbounded"
     )
@@ -160,8 +168,8 @@ class Label(FilterProp, extra="forbid"):
     """Matches fields/paragraphs with a label (or labelset)"""
     prop: Literal["label"] = "label"
-    labelset: str = pydantic.Field(description="The labelset to match")
-    label: Optional[str] = pydantic.Field(
+    labelset: str = pydantic.Field(description="The ID of the labelset to match")
+    label: str | None = pydantic.Field(
         default=None,
         description="The label to match. If blank, matches all labels in the given labelset",
     )
@@ -177,7 +185,7 @@ class ResourceMimetype(FilterProp, extra="forbid"):
     type: str = pydantic.Field(
         description="Type of the mimetype to match. e.g: In image/jpeg, type is image"
     )
-    subtype: Optional[str] = pydantic.Field(
+    subtype: str | None = pydantic.Field(
         default=None,
         description=(
             "Type of the mimetype to match. e.g: In image/jpeg, subtype is jpeg."
@@ -193,7 +201,7 @@ class FieldMimetype(FilterProp, extra="forbid"):
     type: str = pydantic.Field(
         description="Type of the mimetype to match. e.g: In image/jpeg, type is image"
     )
-    subtype: Optional[str] = pydantic.Field(
+    subtype: str | None = pydantic.Field(
         default=None,
         description=(
             "Type of the mimetype to match. e.g: In image/jpeg, subtype is jpeg."
@@ -207,7 +215,7 @@ class Entity(FilterProp, extra="forbid"):
     prop: Literal["entity"] = "entity"
     subtype: str = pydantic.Field(description="Type of the entity. e.g: PERSON")
-    value: Optional[str] = pydantic.Field(
+    value: str | None = pydantic.Field(
         default=None,
         description="Value of the entity. e.g: Anna. If blank, matches any entity of the given type",
     )
@@ -235,8 +243,8 @@ class OriginMetadata(FilterProp, extra="forbid"):
     """Matches metadata from the origin"""
     prop: Literal["origin_metadata"] = "origin_metadata"
-    field: str = pydantic.Field(description="Metadata field")
-    value: Optional[str] = pydantic.Field(
+    field: str = pydantic.Field(title="Origin Metadata Field", description="Metadata field")
+    value: str | None = pydantic.Field(
         default=None,
         description="Value of the metadata field. If blank, matches any document with the given metadata field set (to any value)",
     )
@@ -246,7 +254,7 @@ class OriginPath(FilterProp, extra="forbid"):
     """Matches the origin path"""
     prop: Literal["origin_path"] = "origin_path"
-    prefix: Optional[str] = pydantic.Field(
+    prefix: str | None = pydantic.Field(
         default=None,
         description=(
             "Prefix of the path, matches all paths under this prefix"
@@ -259,7 +267,7 @@ class OriginSource(FilterProp, extra="forbid"):
     """Matches the origin source id"""
     prop: Literal["origin_source"] = "origin_source"
-    id: Optional[str] = pydantic.Field(default=None, description=("Source ID"))
+    id: str | None = pydantic.Field(default=None, description=("Source ID"))
 class OriginCollaborator(FilterProp, extra="forbid"):
@@ -276,7 +284,7 @@ class Generated(FilterProp, extra="forbid"):
     by: Literal["data-augmentation"] = pydantic.Field(
         description="Generator for this field. Currently, only data-augmentation is supported"
     )
-    da_task: Optional[str] = pydantic.Field(
+    da_task: str | None = pydantic.Field(
         default=None, description="Matches field generated by an specific DA task, given its prefix"
     )
@@ -297,7 +305,7 @@ class Status(FilterProp, extra="forbid"):
 # The discriminator function is optional, everything works without it.
 # We implement it because it makes pydantic produce more user-friendly errors
-def filter_discriminator(v: Any) -> Optional[str]:
+def filter_discriminator(v: Any) -> str | None:
     if isinstance(v, dict):
         if "and" in v:
             return "and"
@@ -319,59 +327,53 @@ def filter_discriminator(v: Any) -> Optional[str]:
 FieldFilterExpression = Annotated[
-    Union[
-        Annotated[And["FieldFilterExpression"], Tag("and")],
-        Annotated[Or["FieldFilterExpression"], Tag("or")],
-        Annotated[Not["FieldFilterExpression"], Tag("not")],
-        Annotated[Resource, Tag("resource")],
-        Annotated[Field, Tag("field")],
-        Annotated[Keyword, Tag("keyword")],
-        Annotated[DateCreated, Tag("created")],
-        Annotated[DateModified, Tag("modified")],
-        Annotated[Label, Tag("label")],
-        Annotated[ResourceMimetype, Tag("resource_mimetype")],
-        Annotated[FieldMimetype, Tag("field_mimetype")],
-        Annotated[Entity, Tag("entity")],
-        Annotated[Language, Tag("language")],
-        Annotated[OriginTag, Tag("origin_tag")],
-        Annotated[OriginMetadata, Tag("origin_metadata")],
-        Annotated[OriginPath, Tag("origin_path")],
-        Annotated[OriginSource, Tag("origin_source")],
-        Annotated[OriginCollaborator, Tag("origin_collaborator")],
-        Annotated[Generated, Tag("generated")],
-    ],
+    Annotated[And["FieldFilterExpression"], Tag("and")]
+    | Annotated[Or["FieldFilterExpression"], Tag("or")]
+    | Annotated[Not["FieldFilterExpression"], Tag("not")]
+    | Annotated[Resource, Tag("resource")]
+    | Annotated[Field, Tag("field")]
+    | Annotated[Keyword, Tag("keyword")]
+    | Annotated[DateCreated, Tag("created")]
+    | Annotated[DateModified, Tag("modified")]
+    | Annotated[Label, Tag("label")]
+    | Annotated[ResourceMimetype, Tag("resource_mimetype")]
+    | Annotated[FieldMimetype, Tag("field_mimetype")]
+    | Annotated[Entity, Tag("entity")]
+    | Annotated[Language, Tag("language")]
+    | Annotated[OriginTag, Tag("origin_tag")]
+    | Annotated[OriginMetadata, Tag("origin_metadata")]
+    | Annotated[OriginPath, Tag("origin_path")]
+    | Annotated[OriginSource, Tag("origin_source")]
+    | Annotated[OriginCollaborator, Tag("origin_collaborator")]
+    | Annotated[Generated, Tag("generated")],
     Discriminator(filter_discriminator),
 ]
 ParagraphFilterExpression = Annotated[
-    Union[
-        Annotated[And["ParagraphFilterExpression"], Tag("and")],
-        Annotated[Or["ParagraphFilterExpression"], Tag("or")],
-        Annotated[Not["ParagraphFilterExpression"], Tag("not")],
-        Annotated[Label, Tag("label")],
-        Annotated[Kind, Tag("kind")],
-    ],
+    Annotated[And["ParagraphFilterExpression"], Tag("and")]
+    | Annotated[Or["ParagraphFilterExpression"], Tag("or")]
+    | Annotated[Not["ParagraphFilterExpression"], Tag("not")]
+    | Annotated[Label, Tag("label")]
+    | Annotated[Kind, Tag("kind")],
     Discriminator(filter_discriminator),
 ]
 ResourceFilterExpression = Annotated[
-    Union[
-        Annotated[And["ResourceFilterExpression"], Tag("and")],
-        Annotated[Or["ResourceFilterExpression"], Tag("or")],
-        Annotated[Not["ResourceFilterExpression"], Tag("not")],
-        Annotated[Resource, Tag("resource")],
-        Annotated[DateCreated, Tag("created")],
-        Annotated[DateModified, Tag("modified")],
-        Annotated[Label, Tag("label")],
-        Annotated[ResourceMimetype, Tag("resource_mimetype")],
-        Annotated[Language, Tag("language")],
-        Annotated[OriginTag, Tag("origin_tag")],
-        Annotated[OriginMetadata, Tag("origin_metadata")],
-        Annotated[OriginPath, Tag("origin_path")],
-        Annotated[OriginSource, Tag("origin_source")],
-        Annotated[OriginCollaborator, Tag("origin_collaborator")],
-        Annotated[Status, Tag("status")],
-    ],
+    Annotated[And["ResourceFilterExpression"], Tag("and")]
+    | Annotated[Or["ResourceFilterExpression"], Tag("or")]
+    | Annotated[Not["ResourceFilterExpression"], Tag("not")]
+    | Annotated[Resource, Tag("resource")]
+    | Annotated[DateCreated, Tag("created")]
+    | Annotated[DateModified, Tag("modified")]
+    | Annotated[Label, Tag("label")]
+    | Annotated[ResourceMimetype, Tag("resource_mimetype")]
+    | Annotated[Language, Tag("language")]
+    | Annotated[OriginTag, Tag("origin_tag")]
+    | Annotated[OriginMetadata, Tag("origin_metadata")]
+    | Annotated[OriginPath, Tag("origin_path")]
+    | Annotated[OriginSource, Tag("origin_source")]
+    | Annotated[OriginCollaborator, Tag("origin_collaborator")]
+    | Annotated[Status, Tag("status")],
     Discriminator(filter_discriminator),
 ]
@@ -388,10 +390,10 @@ class FilterExpression(BaseModel, extra="forbid"):
         AND = "and"
         OR = "or"
-    field: Optional[FieldFilterExpression] = pydantic.Field(
-        default=None, description="Filter to apply to fields"
+    field: FieldFilterExpression | None = pydantic.Field(
+        default=None, title="Field Filters", description="Filter to apply to fields"
     )
-    paragraph: Optional[ParagraphFilterExpression] = pydantic.Field(
+    paragraph: ParagraphFilterExpression | None = pydantic.Field(
         default=None, description="Filter to apply to each text block"
     )
@@ -413,4 +415,6 @@ class CatalogFilterExpression(BaseModel, extra="forbid"):
     `filters`, `range_*`, `with_status`.
     """
-    resource: ResourceFilterExpression = pydantic.Field(description="Filter to apply to resources")
+    resource: ResourceFilterExpression = pydantic.Field(
+        title="Resource filters", description="Filter to apply to resources"
+    )

nucliadb_models/graph/requests.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 #
 from enum import Enum
-from typing import Annotated, Any, Literal, Optional, Union
+from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Discriminator, Field, Tag, model_validator
 from typing_extensions import Self
@@ -38,17 +38,18 @@ class GraphProp(BaseModel):
 class NodeMatchKindName(str, Enum):
     EXACT = "exact"
     FUZZY = "fuzzy"
+    FUZZY_WORDS = "fuzzy_words"
 class GraphNode(BaseModel, extra="forbid"):
-    value: Optional[str] = None
+    value: str | None = None
     match: NodeMatchKindName = NodeMatchKindName.EXACT
-    type: Optional[RelationNodeType] = RelationNodeType.ENTITY
-    group: Optional[str] = None
+    type: RelationNodeType | None = RelationNodeType.ENTITY
+    group: str | None = None
     @model_validator(mode="after")
     def validate_fuzzy_usage(self) -> Self:
-        if self.match == NodeMatchKindName.FUZZY:
+        if self.match in (NodeMatchKindName.FUZZY, NodeMatchKindName.FUZZY_WORDS):
             if self.value is None:
                 raise ValueError("Fuzzy match can only be used if a node value is provided")
             else:
@@ -60,8 +61,8 @@ class GraphNode(BaseModel, extra="forbid"):
 class GraphRelation(BaseModel, extra="forbid"):
-    label: Optional[str] = None
-    type: Optional[RelationType] = None
+    label: str | None = None
+    type: RelationType | None = None
 ## Models for query expressions
@@ -85,9 +86,9 @@ class Relation(GraphRelation, GraphProp):
 class GraphPath(GraphProp, extra="forbid"):
     prop: Literal["path"] = "path"
-    source: Optional[GraphNode] = None
-    relation: Optional[GraphRelation] = None
-    destination: Optional[GraphNode] = None
+    source: GraphNode | None = None
+    relation: GraphRelation | None = None
+    destination: GraphNode | None = None
     undirected: bool = False
@@ -102,7 +103,7 @@ class Generated(GraphProp, extra="forbid"):
     prop: Literal["generated"] = "generated"
     by: Generator = Field(description="Generator for this field.")
-    da_task: Optional["str"] = Field(
+    da_task: str | None = Field(
         default=None, description="Matches relations generated by an specific DA task, given its prefix"
     )
@@ -122,12 +123,14 @@ class GraphFilterExpression(BaseModel, extra="forbid"):
     Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters
     """
-    field: FieldFilterExpression = Field(description="Filter to apply to fields")
+    field: FieldFilterExpression = Field(
+        title="Graph Field Filters", description="Filter to apply to fields"
+    )
 class BaseGraphSearchRequest(BaseModel):
     top_k: int = Field(default=50, le=500, title="Number of results to retrieve")
-    filter_expression: Optional[GraphFilterExpression] = Field(
+    filter_expression: GraphFilterExpression | None = Field(
         default=None,
         title="Filter resource by an expression",
         description=(
@@ -135,10 +138,10 @@ class BaseGraphSearchRequest(BaseModel):
             "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters "
         ),
     )
-    security: Optional[RequestSecurity] = Field(
+    security: RequestSecurity | None = Field(
         default=None,
         title="Security",
-        description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",  # noqa: E501
+        description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",
     )
     show_hidden: bool = Field(
         default=False,
@@ -153,64 +156,53 @@ graph_query_discriminator = filter_discriminator
 # Paths search
 GraphPathQuery = Annotated[
-    Union[
-        # bool expressions
-        Annotated[And["GraphPathQuery"], Tag("and")],
-        Annotated[Or["GraphPathQuery"], Tag("or")],
-        Annotated[Not["GraphPathQuery"], Tag("not")],
-        # paths
-        Annotated[GraphPath, Tag("path")],
-        # nodes
-        Annotated[SourceNode, Tag("source_node")],
-        Annotated[DestinationNode, Tag("destination_node")],
-        Annotated[AnyNode, Tag("node")],
-        # relations
-        Annotated[Relation, Tag("relation")],
-        # metadata
-        Annotated[Generated, Tag("generated")],
-    ],
+    Annotated[And["GraphPathQuery"], Tag("and")]
+    | Annotated[Or["GraphPathQuery"], Tag("or")]
+    | Annotated[Not["GraphPathQuery"], Tag("not")]
+    | Annotated[GraphPath, Tag("path")]
+    | Annotated[SourceNode, Tag("source_node")]
+    | Annotated[DestinationNode, Tag("destination_node")]
+    | Annotated[AnyNode, Tag("node")]
+    | Annotated[Relation, Tag("relation")]
+    | Annotated[Generated, Tag("generated")],
     Discriminator(graph_query_discriminator),
 ]
 class GraphSearchRequest(BaseGraphSearchRequest):
-    query: GraphPathQuery
+    query: GraphPathQuery = Field(title="Graph Path Query")
 # Nodes search
 GraphNodesQuery = Annotated[
-    Union[
-        Annotated[And["GraphNodesQuery"], Tag("and")],
-        Annotated[Or["GraphNodesQuery"], Tag("or")],
-        Annotated[Not["GraphNodesQuery"], Tag("not")],
-        Annotated[AnyNode, Tag("node")],
-        Annotated[Generated, Tag("generated")],
-    ],
+    Annotated[And["GraphNodesQuery"], Tag("and")]
+    | Annotated[Or["GraphNodesQuery"], Tag("or")]
+    | Annotated[Not["GraphNodesQuery"], Tag("not")]
+    | Annotated[AnyNode, Tag("node")]
+    | Annotated[Generated, Tag("generated")],
     Discriminator(graph_query_discriminator),
 ]
 class GraphNodesSearchRequest(BaseGraphSearchRequest):
-    query: GraphNodesQuery
+    query: GraphNodesQuery = Field(title="Graph Nodes Query")
 # Relations search
 GraphRelationsQuery = Annotated[
-    Union[
-        Annotated[And["GraphRelationsQuery"], Tag("and")],
-        Annotated[Or["GraphRelationsQuery"], Tag("or")],
-        Annotated[Not["GraphRelationsQuery"], Tag("not")],
-        Annotated[Relation, Tag("relation")],
-        Annotated[Generated, Tag("generated")],
-    ],
+    Annotated[And["GraphRelationsQuery"], Tag("and")]
+    | Annotated[Or["GraphRelationsQuery"], Tag("or")]
+    | Annotated[Not["GraphRelationsQuery"], Tag("not")]
+    | Annotated[Relation, Tag("relation")]
+    | Annotated[Generated, Tag("generated")],
     Discriminator(graph_query_discriminator),
 ]
 class GraphRelationsSearchRequest(BaseGraphSearchRequest):
-    query: GraphRelationsQuery
+    query: GraphRelationsQuery = Field(title="Graph Relations Query")
 # We need this to avoid issues with pydantic and generic types defined in another module

nucliadb_models/graph/responses.py CHANGED Viewed

@@ -14,7 +14,7 @@
 #
 from enum import Enum
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from nucliadb_models.metadata import RelationNodeType, RelationType
@@ -40,10 +40,22 @@ class GraphRelation(BaseModel):
     type: RelationType
+class PathMetadata(BaseModel):
+    # {rid}/{field_type}/{field_id}
+    field_id: str | None = Field(
+        default=None, description="Field id where the relation has been extracted from"
+    )
+    # {rid}/{field_type}/{field_id}/{paragraph_start}-{paragraph_end}
+    paragraph_id: str | None = Field(
+        default=None, description="Paragraph id where the relation has been extracted from"
+    )
 class GraphPath(BaseModel):
     source: GraphNode
     relation: GraphRelation
     destination: GraphNode
+    metadata: PathMetadata | None
 class GraphSearchResponse(BaseModel):

nucliadb-models 6.9.7.post5583__py3-none-any.whl → 6.11.1.post5822__py3-none-any.whl

nucliadb-models 6.9.7.post5583py3-none-any.whl → 6.11.1.post5822py3-none-any.whl