PyPI - nucliadb-models - Versions diffs - 6.9.3.post5346__py3-none-any.whl → 6.10.0.post5788__py3-none-any.whl - Mend

nucliadb-models 6.9.3.post5346py3-none-any.whl → 6.10.0.post5788py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nucliadb-models might be problematic. Click here for more details.

Files changed (35) hide show

nucliadb_models/agents/ingestion.py +4 -4
nucliadb_models/augment.py +355 -0
nucliadb_models/common.py +57 -57
nucliadb_models/configuration.py +8 -8
nucliadb_models/content_types.py +13 -11
nucliadb_models/conversation.py +25 -26
nucliadb_models/entities.py +17 -18
nucliadb_models/external_index_providers.py +1 -2
nucliadb_models/extracted.py +82 -83
nucliadb_models/file.py +10 -11
nucliadb_models/filters.py +79 -75
nucliadb_models/graph/requests.py +40 -48
nucliadb_models/graph/responses.py +13 -1
nucliadb_models/hydration.py +50 -52
nucliadb_models/internal/predict.py +7 -9
nucliadb_models/internal/shards.py +2 -3
nucliadb_models/labels.py +18 -11
nucliadb_models/link.py +18 -19
nucliadb_models/metadata.py +66 -54
nucliadb_models/notifications.py +3 -3
nucliadb_models/processing.py +1 -2
nucliadb_models/resource.py +85 -102
nucliadb_models/retrieval.py +147 -0
nucliadb_models/search.py +331 -283
nucliadb_models/security.py +2 -3
nucliadb_models/text.py +7 -8
nucliadb_models/trainset.py +1 -2
nucliadb_models/utils.py +2 -3
nucliadb_models/vectors.py +2 -5
nucliadb_models/writer.py +56 -57
{nucliadb_models-6.9.3.post5346.dist-info → nucliadb_models-6.10.0.post5788.dist-info}/METADATA +1 -1
nucliadb_models-6.10.0.post5788.dist-info/RECORD +41 -0
nucliadb_models-6.9.3.post5346.dist-info/RECORD +0 -39
{nucliadb_models-6.9.3.post5346.dist-info → nucliadb_models-6.10.0.post5788.dist-info}/WHEEL +0 -0
{nucliadb_models-6.9.3.post5346.dist-info → nucliadb_models-6.10.0.post5788.dist-info}/top_level.txt +0 -0

nucliadb_models/hydration.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from typing import Annotated, Optional, Union
+from typing import Annotated
 from pydantic import BaseModel, Field, StringConstraints
@@ -105,23 +105,23 @@ class GenericFieldHydration(BaseModel, extra="forbid"):
 class FieldHydration(BaseModel, extra="forbid"):
-    text: Optional[TextFieldHydration] = Field(
+    text: TextFieldHydration | None = Field(
         default_factory=TextFieldHydration,
         description="Text fields hydration options",
     )
-    file: Optional[FileFieldHydration] = Field(
+    file: FileFieldHydration | None = Field(
         default_factory=FileFieldHydration,
         description="File fields hydration options",
     )
-    link: Optional[LinkFieldHydration] = Field(
+    link: LinkFieldHydration | None = Field(
         default_factory=LinkFieldHydration,
         description="Link fields hydration options",
     )
-    conversation: Optional[ConversationFieldHydration] = Field(
+    conversation: ConversationFieldHydration | None = Field(
         default_factory=ConversationFieldHydration,
         description="Conversation fields hydration options",
     )
-    generic: Optional[GenericFieldHydration] = Field(
+    generic: GenericFieldHydration | None = Field(
         default_factory=GenericFieldHydration,
         description="Generic fields hydration options",
     )
@@ -141,7 +141,7 @@ class NeighbourParagraphHydration(BaseModel, extra="forbid"):
 class RelatedParagraphHydration(BaseModel, extra="forbid"):
-    neighbours: Optional[NeighbourParagraphHydration] = Field(
+    neighbours: NeighbourParagraphHydration | None = Field(
         default=None,
         description="Hydrate extra paragraphs that surround the original one",
     )
@@ -205,11 +205,11 @@ class ParagraphHydration(BaseModel, extra="forbid"):
         default=True,
         description="Hydrate paragraph text",
     )
-    image: Optional[ImageParagraphHydration] = Field(
+    image: ImageParagraphHydration | None = Field(
         default=None,
         description="Hydrate options for paragraphs extracted from images (using OCR, inception...)",
     )
-    table: Optional[TableParagraphHydration] = Field(
+    table: TableParagraphHydration | None = Field(
         default=None,
         description="Hydrate options for paragraphs extracted from tables",
     )
@@ -217,19 +217,19 @@ class ParagraphHydration(BaseModel, extra="forbid"):
     # TODO: at some point, we should add hydration options for paragraphs from
     # audio and video
-    page: Optional[ParagraphPageHydration] = Field(
+    page: ParagraphPageHydration | None = Field(
         default=None,
         description="Hydrte options for paragraphs within a page. This applies to paragraphs in fields with pages",
     )
-    related: Optional[RelatedParagraphHydration] = Field(
+    related: RelatedParagraphHydration | None = Field(
         default=None,
         description="Hydration options for related paragraphs. For example, neighbours or sibling paragraphs",
     )
 class Hydration(BaseModel, extra="forbid"):
-    resource: Optional[ResourceHydration] = Field(
+    resource: ResourceHydration | None = Field(
         default_factory=ResourceHydration,
         description="Resource hydration options",
     )
@@ -247,8 +247,8 @@ ParagraphId = Annotated[
     str,
     StringConstraints(
         # resource-uuid/field-type/field-id/[split-id/]paragraph-id
-        pattern=r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+/([^/]{1,128}/)?[0-9]+-[0-9]+$",
-        min_length=32 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 3,
+        pattern=r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+(/[^/]{1,128})?/[0-9]+-[0-9]+$",
+        min_length=32 + 1 + 1 + 1 + 1 + 0 + 0 + 1 + 3,
         # max field id of 250 and 10 digit paragraphs. More than enough
         max_length=32 + 1 + 1 + 1 + 250 + 1 + 128 + 1 + 21,
     ),
@@ -270,22 +270,22 @@ class HydratedResource(BaseModel, extra="forbid"):
     id: str = Field(description="Unique resource id")
     slug: str = Field(description="Resource slug")
-    title: Optional[str] = None
-    summary: Optional[str] = None
+    title: str | None = None
+    summary: str | None = None
-    origin: Optional[Origin] = None
+    origin: Origin | None = None
-    security: Optional[ResourceSecurity] = None
+    security: ResourceSecurity | None = None
     # TODO: add resource labels to hydrated resources
 class FieldExtractedData(BaseModel, extra="forbid"):
-    text: Optional[str] = None
+    text: str | None = None
 class SplitFieldExtractedData(BaseModel, extra="forbid"):
-    texts: Optional[dict[str, str]] = None
+    texts: dict[str, str] | None = None
 class HydratedTextField(BaseModel, extra="forbid"):
@@ -293,8 +293,8 @@ class HydratedTextField(BaseModel, extra="forbid"):
     resource: str = Field("Field resource id")
     field_type: FieldTypeName = FieldTypeName.TEXT
-    value: Optional[FieldText] = None
-    extracted: Optional[FieldExtractedData] = None
+    value: FieldText | None = None
+    extracted: FieldExtractedData | None = None
 class HydratedFileField(BaseModel, extra="forbid"):
@@ -302,10 +302,10 @@ class HydratedFileField(BaseModel, extra="forbid"):
     resource: str = Field("Field resource id")
     field_type: FieldTypeName = FieldTypeName.FILE
-    value: Optional[FieldFile] = None
-    extracted: Optional[FieldExtractedData] = None
+    value: FieldFile | None = None
+    extracted: FieldExtractedData | None = None
-    previews: Optional[dict[str, Image]] = Field(
+    previews: dict[str, Image] | None = Field(
         default=None,
         title="Previews of specific parts of the field",
         description=(
@@ -323,8 +323,8 @@ class HydratedLinkField(BaseModel, extra="forbid"):
     resource: str = Field("Field resource id")
     field_type: FieldTypeName = FieldTypeName.LINK
-    value: Optional[FieldLink] = None
-    extracted: Optional[FieldExtractedData] = None
+    value: FieldLink | None = None
+    extracted: FieldExtractedData | None = None
 class HydratedConversationField(BaseModel, extra="forbid"):
@@ -332,8 +332,8 @@ class HydratedConversationField(BaseModel, extra="forbid"):
     resource: str = Field("Field resource id")
     field_type: FieldTypeName = FieldTypeName.CONVERSATION
-    value: Optional[FieldConversation] = None
-    extracted: Optional[FieldExtractedData] = None
+    value: FieldConversation | None = None
+    extracted: FieldExtractedData | None = None
 class HydratedGenericField(BaseModel, extra="forbid"):
@@ -341,24 +341,24 @@ class HydratedGenericField(BaseModel, extra="forbid"):
     resource: str = Field("Field resource id")
     field_type: FieldTypeName = FieldTypeName.TEXT
-    value: Optional[str] = None
-    extracted: Optional[FieldExtractedData] = None
+    value: str | None = None
+    extracted: FieldExtractedData | None = None
 class RelatedNeighbourParagraphRefs(BaseModel, extra="forbid"):
-    before: Optional[list[str]] = None
-    after: Optional[list[str]] = None
+    before: list[str] | None = None
+    after: list[str] | None = None
 class RelatedParagraphRefs(BaseModel, extra="forbid"):
-    neighbours: Optional[RelatedNeighbourParagraphRefs] = None
-    parents: Optional[list[str]] = None
-    siblings: Optional[list[str]] = None
-    replacements: Optional[list[str]] = None
+    neighbours: RelatedNeighbourParagraphRefs | None = None
+    parents: list[str] | None = None
+    siblings: list[str] | None = None
+    replacements: list[str] | None = None
 class HydratedParagraphImage(BaseModel, extra="forbid"):
-    source_image: Optional[Image] = Field(
+    source_image: Image | None = Field(
         default=None,
         description=(
             "Source image for this paragraph. This only applies to paragraphs "
@@ -369,7 +369,7 @@ class HydratedParagraphImage(BaseModel, extra="forbid"):
 class HydratedParagraphTable(BaseModel, extra="forbid"):
-    page_preview_ref: Optional[str] = Field(
+    page_preview_ref: str | None = Field(
         default=None,
         description=(
             "Referento to the page preview for this paragraph. The actual "
@@ -381,7 +381,7 @@ class HydratedParagraphTable(BaseModel, extra="forbid"):
 class HydratedParagraphPage(BaseModel, extra="forbid"):
-    page_preview_ref: Optional[str] = Field(
+    page_preview_ref: str | None = Field(
         default=None,
         description=(
             "Reference to the page preview for this paragraph. The actual "
@@ -398,28 +398,26 @@ class HydratedParagraph(BaseModel, extra="forbid"):
     field: str = Field(description="Paragraph field id")
     resource: str = Field(description="Paragraph resource id")
-    text: Optional[str] = None
+    text: str | None = None
     # TODO: add labels to hydrated paragraphs
     # labels: Optional[list[str]] = None
-    related: Optional[RelatedParagraphRefs] = None
+    related: RelatedParagraphRefs | None = None
-    image: Optional[HydratedParagraphImage] = None
-    table: Optional[HydratedParagraphTable] = None
-    page: Optional[HydratedParagraphPage] = None
+    image: HydratedParagraphImage | None = None
+    table: HydratedParagraphTable | None = None
+    page: HydratedParagraphPage | None = None
 class Hydrated(BaseModel, extra="forbid"):
     resources: dict[str, HydratedResource]
     fields: dict[
         str,
-        Union[
-            HydratedTextField,
-            HydratedFileField,
-            HydratedLinkField,
-            HydratedConversationField,
-            HydratedGenericField,
-        ],
+        HydratedTextField
+        | HydratedFileField
+        | HydratedLinkField
+        | HydratedConversationField
+        | HydratedGenericField,
     ]
     paragraphs: dict[str, HydratedParagraph]

nucliadb_models/internal/predict.py CHANGED Viewed

@@ -19,13 +19,11 @@ Models for Predict API v1.
 ATENTION! Keep these models in sync with models on Predict API
 """
-from typing import List, Optional
 from pydantic import BaseModel, Field
 class SentenceSearch(BaseModel):
-    vectors: dict[str, List[float]] = Field(
+    vectors: dict[str, list[float]] = Field(
         default_factory=dict,
         description="Sentence vectors for each semantic model",
         min_length=1,
@@ -45,14 +43,14 @@ class Ner(BaseModel):
 class TokenSearch(BaseModel):
-    tokens: List[Ner] = []
+    tokens: list[Ner] = []
     time: float
     input_tokens: int = 0
 class QueryInfo(BaseModel):
-    language: Optional[str]
-    stop_words: List[str] = Field(default_factory=list)
+    language: str | None
+    stop_words: list[str] = Field(default_factory=list)
     semantic_thresholds: dict[str, float] = Field(
         default_factory=dict,
         description="Semantic threshold for each semantic model",
@@ -60,10 +58,10 @@ class QueryInfo(BaseModel):
     )
     visual_llm: bool
     max_context: int
-    entities: Optional[TokenSearch]
-    sentence: Optional[SentenceSearch]
+    entities: TokenSearch | None
+    sentence: SentenceSearch | None
     query: str
-    rephrased_query: Optional[str] = None
+    rephrased_query: str | None = None
 class RerankModel(BaseModel):

nucliadb_models/internal/shards.py CHANGED Viewed

@@ -13,7 +13,6 @@
 # limitations under the License.
 #
 from enum import Enum
-from typing import List, Optional
 from pydantic import BaseModel
@@ -58,9 +57,9 @@ class ShardReplica(BaseModel):
 class ShardObject(BaseModel):
     shard: str
-    nidx_shard_id: Optional[str]
+    nidx_shard_id: str | None
 class KnowledgeboxShards(BaseModel):
     kbid: str
-    shards: List[ShardObject]
+    shards: list[ShardObject]

nucliadb_models/labels.py CHANGED Viewed

@@ -14,9 +14,8 @@
 #
 from enum import Enum
-from typing import Dict, List, Optional
-from pydantic import BaseModel, model_validator
+from pydantic import BaseModel, Field, model_validator
 from typing_extensions import Self
 BASE_LABELS: dict[str, set[str]] = {
@@ -96,18 +95,26 @@ class LabelSetKind(str, Enum):
 class Label(BaseModel):
-    title: str
-    related: Optional[str] = None
-    text: Optional[str] = None
-    uri: Optional[str] = None
+    title: str = Field(
+        description="Title of the label. This is the display name for the label shown in the UI and also used for searching."
+    )
+    related: str | None = None
+    text: str | None = None
+    uri: str | None = None
 class LabelSet(BaseModel):
-    title: Optional[str] = None
-    color: Optional[str] = "blue"
+    title: str | None = Field(
+        default=None,
+        description="Title of the labelset. It is a prettier display name for the labelset shown in the UI but it is not intended to be used for searching.",
+    )
+    color: str | None = "blue"
     multiple: bool = True
-    kind: List[LabelSetKind] = []
-    labels: List[Label] = []
+    kind: list[LabelSetKind] = []
+    labels: list[Label] = Field(
+        default_factory=list,
+        description="List of labels in the labelset. The titles of the labels must be unique within the labelset.",
+    )
     @model_validator(mode="after")
     def check_unique_labels(self) -> Self:
@@ -123,4 +130,4 @@ class LabelSet(BaseModel):
 class KnowledgeBoxLabels(BaseModel):
     uuid: str
-    labelsets: Dict[str, LabelSet] = {}
+    labelsets: dict[str, LabelSet] = {}

nucliadb_models/link.py CHANGED Viewed

@@ -13,7 +13,6 @@
 # limitations under the License.
 #
 from datetime import datetime
-from typing import Dict, Optional
 from pydantic import BaseModel, Field
@@ -25,19 +24,19 @@ from pydantic import BaseModel, Field
 class FieldLink(BaseModel):
-    added: Optional[datetime] = None
-    headers: Optional[Dict[str, str]] = None
-    cookies: Optional[Dict[str, str]] = None
-    uri: Optional[str] = None
-    language: Optional[str] = None
-    localstorage: Optional[Dict[str, str]] = None
-    css_selector: Optional[str] = None
-    xpath: Optional[str] = None
-    extract_strategy: Optional[str] = Field(
+    added: datetime | None = None
+    headers: dict[str, str] | None = None
+    cookies: dict[str, str] | None = None
+    uri: str | None = None
+    language: str | None = None
+    localstorage: dict[str, str] | None = None
+    css_selector: str | None = None
+    xpath: str | None = None
+    extract_strategy: str | None = Field(
         default=None,
         description="Id of the Nuclia extract strategy used at processing time. If not set, the default strategy was used. Extract strategies are defined at the learning configuration api.",
     )
-    split_strategy: Optional[str] = Field(
+    split_strategy: str | None = Field(
         default=None,
         description="Id of the Nuclia split strategy used at processing time. If not set, the default strategy was used. Split strategies are defined at the learning configuration api.",
     )
@@ -47,18 +46,18 @@ class FieldLink(BaseModel):
 class LinkField(BaseModel):
-    headers: Optional[Dict[str, str]] = {}
-    cookies: Optional[Dict[str, str]] = {}
+    headers: dict[str, str] | None = {}
+    cookies: dict[str, str] | None = {}
     uri: str
-    language: Optional[str] = None
-    localstorage: Optional[Dict[str, str]] = {}
-    css_selector: Optional[str] = None
-    xpath: Optional[str] = None
-    extract_strategy: Optional[str] = Field(
+    language: str | None = None
+    localstorage: dict[str, str] | None = {}
+    css_selector: str | None = None
+    xpath: str | None = None
+    extract_strategy: str | None = Field(
         default=None,
         description="Id of the Nuclia extract strategy to use at processing time. If not set, the default strategy will be used. Extract strategies are defined at the learning configuration api.",
     )
-    split_strategy: Optional[str] = Field(
+    split_strategy: str | None = Field(
         default=None,
         description="Id of the Nuclia split strategy used at processing time. If not set, the default strategy was used. Split strategies are defined at the learning configuration api.",
     )

nucliadb-models 6.9.3.post5346__py3-none-any.whl → 6.10.0.post5788__py3-none-any.whl

Potentially problematic release.

nucliadb-models 6.9.3.post5346py3-none-any.whl → 6.10.0.post5788py3-none-any.whl