PyPI - nucliadb-models - Versions diffs - 6.4.2.post4379__py3-none-any.whl → 6.4.2.post4403__py3-none-any.whl - Mend

nucliadb-models 6.4.2.post4379py3-none-any.whl → 6.4.2.post4403py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

nucliadb_models/common.py CHANGED Viewed

@@ -193,6 +193,16 @@ class FieldTypeName(str, Enum):
     CONVERSATION = "conversation"
     GENERIC = "generic"
+    @classmethod
+    def from_abbreviation(cls, abbr: str) -> "FieldTypeName":
+        return {
+            "t": FieldTypeName.TEXT,
+            "f": FieldTypeName.FILE,
+            "u": FieldTypeName.LINK,
+            "c": FieldTypeName.CONVERSATION,
+            "a": FieldTypeName.GENERIC,
+        }[abbr]
 class FieldRef(BaseModel):
     field_type: FieldTypeName

nucliadb_models/trainset.py CHANGED Viewed

@@ -14,8 +14,52 @@
 #
-from pydantic import BaseModel
+from enum import Enum
+from typing import Optional
+from pydantic import BaseModel, Field, model_validator
+from nucliadb_models.filters import FilterExpression
 class TrainSetPartitions(BaseModel):
     partitions: list[str]
+class TrainSetType(int, Enum):
+    # NOTE: matches the TaskType in nucliadb_protos.dataset.proto
+    FIELD_CLASSIFICATION = 0
+    PARAGRAPH_CLASSIFICATION = 1
+    SENTENCE_CLASSIFICATION = 2
+    TOKEN_CLASSIFICATION = 3
+    IMAGE_CLASSIFICATION = 4
+    PARAGRAPH_STREAMING = 5
+    QUESTION_ANSWER_STREAMING = 6
+    FIELD_STREAMING = 7
+class TrainSet(BaseModel):
+    type: TrainSetType = Field(..., description="Streaming type")
+    filter_expression: Optional[FilterExpression] = Field(
+        default=None,
+        title="Filter resource by an expression",
+        description=(
+            "Returns only documents that match this filter expression. "
+            "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters. "
+            "It is only supported on FIELD_STREAMING types."
+        ),
+    )
+    batch_size: int = Field(
+        default=5,
+        description="Batch size of the resulting arrow file. This affects how many rows are read simultaneously while parsing the resulting arrow file.",
+    )
+    exclude_text: bool = Field(
+        default=False,
+        description="Set to True if the extracted text is not needed for the stream and it will not be added. This is useful to reduce the amount of data streamed.",
+    )
+    @model_validator(mode="after")
+    def validate_filter_expressions_supported_on_stream(self):
+        if self.filter_expression is not None and self.type != TrainSetType.FIELD_STREAMING:
+            raise ValueError(f"{self.type.name} does not support `filter_expression` parameter yet.")
+        return self

{nucliadb_models-6.4.2.post4379.dist-info → nucliadb_models-6.4.2.post4403.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nucliadb_models
-Version: 6.4.2.post4379
+Version: 6.4.2.post4403
 Author-email: Nuclia <nucliadb@nuclia.com>
 License-Expression: Apache-2.0
 Project-URL: Homepage, https://nuclia.com

{nucliadb_models-6.4.2.post4379.dist-info → nucliadb_models-6.4.2.post4403.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 nucliadb_models/__init__.py,sha256=3y8-htogKuCZcbhaUZdSjTeEjUSeec9aRWyL8AlKCyM,1077
-nucliadb_models/common.py,sha256=baBX72ngYjbCApmJp2flFEZ0VBBSv1lkeuz9zeTb8oE,7576
+nucliadb_models/common.py,sha256=E5rYsahuRKgUX3ZASIFHjR6LPBfezh453JbfJ6hbckI,7885
 nucliadb_models/configuration.py,sha256=V1d4hhs_1r-_ik7uBpqBHYrpzpMZYI4QeieWpHc8iOM,2508
 nucliadb_models/content_types.py,sha256=eMlBhWwzfYJFlErcWsNCvBdypbv8J9eC-MXw727QiBE,3430
 nucliadb_models/conversation.py,sha256=bgePP_aLvvnLV9VK4x90Aazlkdc4ajJKFVDYEa9sOJY,3383
@@ -20,7 +20,7 @@ nucliadb_models/search.py,sha256=Cg7W4Ng7G5UgMOAD-pkSz6fEcaiSTBdju6q8ApUNgag,853
 nucliadb_models/security.py,sha256=opxaDLfvk3aU0sjesK0jGrYLx5h4YCwlKKN0moYs_ig,1150
 nucliadb_models/synonyms.py,sha256=afbaVqSQSxGLwi2PusVaLSRpkOtA5AZmWOKd1f4nl2E,690
 nucliadb_models/text.py,sha256=kY2ub7AaGm-4vNaLX3Ju2VvRw-eKZ2LRdM9z7XCNaG0,2898
-nucliadb_models/trainset.py,sha256=KSFh81353jvg8Yxfp6bgaZSFB_MlN42A6RTlR2eUyX8,681
+nucliadb_models/trainset.py,sha256=BgUfgdClpwhk6UoOq5x6mbpOopgSmqg8he2bBzEzGqg,2406
 nucliadb_models/utils.py,sha256=OnWaDwZGwja8Spd_gpryuUpAMGIMhh-DNDGpoUYyb-A,2460
 nucliadb_models/vectors.py,sha256=_Z157PojPIwoeF5LStO0gz8IwxKy2styHjhdBkLd_44,1329
 nucliadb_models/vectorsets.py,sha256=XAgg9DfdfLYpfLh9OepJ_KPH0_RqRQNpVZJr74UnNh0,788
@@ -32,7 +32,7 @@ nucliadb_models/graph/responses.py,sha256=Sdq8OgFAL1YT-1lJyLLrkqcScvj7YTEqAUwQ-k
 nucliadb_models/internal/__init__.py,sha256=zG33bUz1rHFPtvqQPWn4rDwBJt3FJodGuQYD45quiQg,583
 nucliadb_models/internal/predict.py,sha256=Pnx6MmLfK65eExe1XnVxqmSlvMwdowewwks9BOEoqMw,2029
 nucliadb_models/internal/shards.py,sha256=__y1OZtWGiNcPQEWfSFOj8yw458WGi7mM4vZe0K-L1Y,1691
-nucliadb_models-6.4.2.post4379.dist-info/METADATA,sha256=ERIdfUeYZYQ_LhUji0WH9Y4d9u3uljTUNEXII8chMDw,776
-nucliadb_models-6.4.2.post4379.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-nucliadb_models-6.4.2.post4379.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
-nucliadb_models-6.4.2.post4379.dist-info/RECORD,,
+nucliadb_models-6.4.2.post4403.dist-info/METADATA,sha256=knxWOrma5Q5JdKoHAEy2dcV0THXPionQQriNzcKl8oQ,776
+nucliadb_models-6.4.2.post4403.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+nucliadb_models-6.4.2.post4403.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
+nucliadb_models-6.4.2.post4403.dist-info/RECORD,,

{nucliadb_models-6.4.2.post4379.dist-info → nucliadb_models-6.4.2.post4403.dist-info}/WHEEL RENAMED Viewed

File without changes

{nucliadb_models-6.4.2.post4379.dist-info → nucliadb_models-6.4.2.post4403.dist-info}/top_level.txt RENAMED Viewed

File without changes

nucliadb-models 6.4.2.post4379__py3-none-any.whl → 6.4.2.post4403__py3-none-any.whl

nucliadb-models 6.4.2.post4379py3-none-any.whl → 6.4.2.post4403py3-none-any.whl