PyPI - nucliadb-models - Versions diffs - 6.9.2.post5276__py3-none-any.whl → 6.9.6.post5453__py3-none-any.whl - Mend

nucliadb-models 6.9.2.post5276py3-none-any.whl → 6.9.6.post5453py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

nucliadb_models/augment.py ADDED Viewed

@@ -0,0 +1,85 @@
+# Copyright 2025 Bosutech XXI S.L.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pydantic import BaseModel
+from nucliadb_models.common import FieldTypeName
+from nucliadb_models.resource import ExtractedDataTypeName, Resource
+from nucliadb_models.search import Image, ResourceProperties, SearchParamDefaults
+ParagraphId = str
+class AugmentedParagraph(BaseModel):
+    text: str | None = None
+    neighbours_before: dict[ParagraphId, str] | None = None
+    neighbours_after: dict[ParagraphId, str] | None = None
+    image: Image | None = None
+class AugmentedField(BaseModel):
+    page_preview_image: Image | None = None
+class AugmentedResource(Resource):
+    def updated_from(self, origin: Resource):
+        for key in origin.model_fields.keys():
+            self.__setattr__(key, getattr(origin, key))
+class AugmentResources(BaseModel):
+    given: list[str]
+    show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
+    extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
+    field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
+    # TODO: field name filter, da field prefix filter
+class AugmentParagraph(BaseModel):
+    id: ParagraphId
+class AugmentParagraphs(BaseModel):
+    given: list[AugmentParagraph]
+    text: bool = True
+    neighbours_before: int = 0
+    neighbours_after: int = 0
+    # paragraph extracted from an image, return an image
+    source_image: bool = False
+    # paragraph extracted from a table, return table image
+    table_image: bool = False
+    # return page_preview instead of table image if table image enabled
+    table_prefers_page_preview: bool = False
+    # paragraph from a page, return page preview image
+    page_preview_image: bool = False
+class AugmentRequest(BaseModel):
+    resources: AugmentResources
+    paragraphs: AugmentParagraphs
+class AugmentResponse(BaseModel):
+    resources: dict[str, AugmentedResource]
+    paragraphs: dict[str, AugmentedParagraph]

nucliadb_models/conversation.py CHANGED Viewed

@@ -86,7 +86,7 @@ class FieldConversation(BaseModel):
 class InputMessageContent(BaseModel):
-    text: str = Field(max_length=10 * 1024)
+    text: str = Field()
     format: MessageFormat = MessageFormat.PLAIN
     attachments: List[FileB64] = Field(default=[], max_length=50)
     attachments_fields: List[FieldRef] = Field(default=[], max_length=50)
@@ -129,7 +129,6 @@ class InputConversationField(BaseModel):
     messages: List[InputMessage] = Field(
         default_factory=list,
         description="List of messages in the conversation field. Each message must have a unique ident. A single conversation can contain up to 51,200 messages. You can add up to 2,048 messages per request.",
-        max_length=2048,
     )
     extract_strategy: Optional[str] = Field(
         default=None,

nucliadb_models/hydration.py CHANGED Viewed

@@ -246,10 +246,11 @@ class Hydration(BaseModel, extra="forbid"):
 ParagraphId = Annotated[
     str,
     StringConstraints(
-        pattern=r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+/[0-9]+-[0-9]+$",
-        min_length=32 + 1 + 1 + 1 + 1 + 1 + 3,
+        # resource-uuid/field-type/field-id/[split-id/]paragraph-id
+        pattern=r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+(/[^/]{1,128})?/[0-9]+-[0-9]+$",
+        min_length=32 + 1 + 1 + 1 + 1 + 0 + 0 + 1 + 3,
         # max field id of 250 and 10 digit paragraphs. More than enough
-        max_length=32 + 1 + 1 + 1 + 250 + 1 + 21,
+        max_length=32 + 1 + 1 + 1 + 250 + 1 + 128 + 1 + 21,
     ),
 ]

nucliadb_models/search.py CHANGED Viewed

@@ -347,10 +347,12 @@ SortOrderMap = {
 class SortOptions(BaseModel):
     field: SortField
-    limit: Optional[int] = Field(None, gt=0)
     order: SortOrder = SortOrder.DESC
+MAX_RANK_FUSION_WINDOW = 500
 class RankFusionName(str, Enum):
     RECIPROCAL_RANK_FUSION = "rrf"
@@ -380,7 +382,7 @@ class ReciprocalRankFusion(_BaseRankFusion):
     )
     window: Optional[int] = Field(
         default=None,
-        le=500,
+        le=MAX_RANK_FUSION_WINDOW,
         title="RRF window",
         description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time",  # noqa: E501
     )
@@ -503,10 +505,18 @@ class SearchParamDefaults:
     )
     top_k = ParamDefault(
         default=20,
+        gt=-1,
         le=200,
         title="Top k",
         description="The number of results search should return. The maximum number of results allowed is 200.",
     )
+    offset = ParamDefault(
+        default=0,
+        gt=-1,
+        le=1000,
+        title="Results offset",
+        description="The number of results to skip, starting from the beginning in sort order. Used for pagination. It can only be used with the keyword and fulltext indexes.",
+    )
     highlight = ParamDefault(
         default=False,
         title="Highlight",
@@ -532,12 +542,6 @@ class SearchParamDefaults:
         title="Sort order",
         description="Order to sort results with",
     )
-    sort_limit = ParamDefault(
-        default=None,
-        title="Sort limit",
-        description="",
-        gt=0,
-    )
     sort_field = ParamDefault(
         default=None,
         title="Sort field",
@@ -938,12 +942,32 @@ class SearchRequest(BaseSearchRequest):
     )
     faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
     sort: Optional[SortOptions] = SearchParamDefaults.sort.to_pydantic_field()
+    offset: int = SearchParamDefaults.offset.to_pydantic_field()
     @field_validator("faceted")
     @classmethod
     def nested_facets_not_supported(cls, facets):
         return validate_facets(facets)
+    @model_validator(mode="after")
+    def offset_sort_only_on_keyword_indexes(self):
+        has_non_keyword_indexes = set(self.features) & {SearchOptions.SEMANTIC, SearchOptions.RELATIONS}
+        if has_non_keyword_indexes:
+            if self.offset > 0:
+                raise ValueError("offset cannot be used with the semantic or relations index")
+            if self.sort and self.sort.field != SortField.SCORE:
+                raise ValueError("sort by date cannot be used with the semantic or relations index")
+        return self
+    @field_validator("sort", mode="after")
+    @classmethod
+    def sorting_by_title_not_supported(cls, value: Optional[SortOptions]) -> Optional[SortOptions]:
+        if value and value.field == SortField.TITLE:
+            raise ValueError("sorting by title not supported in /search")
+        return value
 class Author(str, Enum):
     NUCLIA = "NUCLIA"

{nucliadb_models-6.9.2.post5276.dist-info → nucliadb_models-6.9.6.post5453.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nucliadb_models
-Version: 6.9.2.post5276
+Version: 6.9.6.post5453
 Author-email: Nuclia <nucliadb@nuclia.com>
 License-Expression: Apache-2.0
 Project-URL: Homepage, https://nuclia.com

{nucliadb_models-6.9.2.post5276.dist-info → nucliadb_models-6.9.6.post5453.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,16 @@
 nucliadb_models/__init__.py,sha256=3y8-htogKuCZcbhaUZdSjTeEjUSeec9aRWyL8AlKCyM,1077
+nucliadb_models/augment.py,sha256=vAtFh4D4eC4nvfwaRTlfeuAMOL9Z9TFZnUNiRAMasss,2543
 nucliadb_models/common.py,sha256=2dtKG4ZNi9p-yoNY76Uvyu1SlMeNYpH-MnuU3Q6w9Js,8169
 nucliadb_models/configuration.py,sha256=BBrJsNjP324Cw_5J3dBrGwvpkHQYbXEo3TUaI9IqAOg,2449
 nucliadb_models/content_types.py,sha256=36Ga-iGf4ivCqgtXC7imFgegrwHB117s9eqP62JtGv0,3456
-nucliadb_models/conversation.py,sha256=Ts-h7gMf18rw9tbjF2X__vNtMshHRDqbLua_VXb1qOE,5126
+nucliadb_models/conversation.py,sha256=k9bKhkDiqhqmdrDfDPNoUfG7-2H_-KAyuOnETd8zV0E,5081
 nucliadb_models/entities.py,sha256=i-7Y8qmFRRTih5zw0ajv1U_iiXexe66M3TK8hUikQZk,2356
 nucliadb_models/export_import.py,sha256=mNm9IArOLnC6TLupkwqVFhxD5d08mpIVOVFneECv8UA,1073
 nucliadb_models/external_index_providers.py,sha256=pL3leo4MkuJOnKlU1Sg6GT_mnK_VUBxGui-RPmDYVWU,1126
 nucliadb_models/extracted.py,sha256=Owz7LC3le3Dvau3TtRiO8NY84meOf6IxN-RrOqqpMPs,5593
 nucliadb_models/file.py,sha256=tXtgB9c7i2ADsnJ7HdbXyroAmXadGvOeA49htBh7BZo,2263
 nucliadb_models/filters.py,sha256=NQI2-4AFzzJuZy8NeY3jXlTbbU5wxiwMCP-5DrD-7lE,14759
-nucliadb_models/hydration.py,sha256=7SFnAcTQRE9etVccpph6aA1AUqsHVwkzT4YF6Uzl0Gs,14262
+nucliadb_models/hydration.py,sha256=SlAzraJE6DX0uOpZWxu2k_9-ikYorsj0t8xwsWSBQZY,14363
 nucliadb_models/labels.py,sha256=9zqRgkpZuX3kUPwsTTgCH7JyOWK7dM5pwyuHJR86YdU,3949
 nucliadb_models/link.py,sha256=PF5hHLwdOed5TMBTxtokkgWtMh1bFnORZjybh0NwVCw,2526
 nucliadb_models/metadata.py,sha256=OOKGy_83NtlG1QKQZEwMuwu4wbVEe7P30Y2QvnGSDto,8933
@@ -17,7 +18,7 @@ nucliadb_models/notifications.py,sha256=mna8-AoD_29Wds0Thl0AF0zpERnJmYGLZX1w1fUo
 nucliadb_models/processing.py,sha256=nhKuHQjqCdb9zJVkYGPTLub23tK9e_lwL5OCDVymZjY,719
 nucliadb_models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nucliadb_models/resource.py,sha256=RzCos0QRgSMkaV-p7EoceSmt7UTzt9G9be5BKF-iGrQ,9021
-nucliadb_models/search.py,sha256=gQEXJ9bXXcxswr7aOzvBeGIQlrq5TgRWIqTxKEbSoCE,96409
+nucliadb_models/search.py,sha256=_vn3pDXcK4iwiCfim3BtlD5EaQAeXoxl2IfNDsrKesA,97514
 nucliadb_models/security.py,sha256=opxaDLfvk3aU0sjesK0jGrYLx5h4YCwlKKN0moYs_ig,1150
 nucliadb_models/synonyms.py,sha256=afbaVqSQSxGLwi2PusVaLSRpkOtA5AZmWOKd1f4nl2E,690
 nucliadb_models/text.py,sha256=60bxZnOjRHnDdezR8VfR3AZsXTOwePFPs2BKB8wxBak,3414
@@ -33,7 +34,7 @@ nucliadb_models/graph/responses.py,sha256=Sdq8OgFAL1YT-1lJyLLrkqcScvj7YTEqAUwQ-k
 nucliadb_models/internal/__init__.py,sha256=zG33bUz1rHFPtvqQPWn4rDwBJt3FJodGuQYD45quiQg,583
 nucliadb_models/internal/predict.py,sha256=Pnx6MmLfK65eExe1XnVxqmSlvMwdowewwks9BOEoqMw,2029
 nucliadb_models/internal/shards.py,sha256=__y1OZtWGiNcPQEWfSFOj8yw458WGi7mM4vZe0K-L1Y,1691
-nucliadb_models-6.9.2.post5276.dist-info/METADATA,sha256=-kzs6LaR18FXQmyL87mIkJuOTwmGhctfqrU9Rn1AGuY,745
-nucliadb_models-6.9.2.post5276.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-nucliadb_models-6.9.2.post5276.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
-nucliadb_models-6.9.2.post5276.dist-info/RECORD,,
+nucliadb_models-6.9.6.post5453.dist-info/METADATA,sha256=x-3GFIapOwe9kpigwte5HkdfaNqPk-iL7CuE_NM3PAE,745
+nucliadb_models-6.9.6.post5453.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+nucliadb_models-6.9.6.post5453.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
+nucliadb_models-6.9.6.post5453.dist-info/RECORD,,

{nucliadb_models-6.9.2.post5276.dist-info → nucliadb_models-6.9.6.post5453.dist-info}/WHEEL RENAMED Viewed

File without changes

{nucliadb_models-6.9.2.post5276.dist-info → nucliadb_models-6.9.6.post5453.dist-info}/top_level.txt RENAMED Viewed

File without changes

nucliadb-models 6.9.2.post5276__py3-none-any.whl → 6.9.6.post5453__py3-none-any.whl

nucliadb-models 6.9.2.post5276py3-none-any.whl → 6.9.6.post5453py3-none-any.whl