PyPI - dyff-schema - Versions diffs - 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl - Mend

dyff-schema 0.18.0py3-none-any.whl → 0.19.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dyff-schema might be problematic. Click here for more details.

Files changed (13) hide show

dyff/schema/dataset/embedding.py ADDED Viewed

@@ -0,0 +1,4 @@
+# SPDX-FileCopyrightText: 2024 UL Research Institutes
+# SPDX-License-Identifier: Apache-2.0
+from ..v0.r1.dataset.embedding import *

dyff/schema/v0/r1/base.py CHANGED Viewed

@@ -547,8 +547,11 @@ def uint64(
     return type("UInt64Value", (UInt64,), namespace)
+_ListElementT = TypeVar("_ListElementT")
 def list_(
-    item_type: Type[pydantic.BaseModel], *, list_size: Optional[int] = None
+    item_type: Type[_ListElementT], *, list_size: Optional[int] = None
 ) -> Type[list]:
     if list_size is None:
         return pydantic.conlist(item_type)

dyff/schema/v0/r1/dataset/arrow.py CHANGED Viewed

@@ -8,7 +8,7 @@ import functools
 import inspect
 import typing
 import uuid
-from typing import Any, Iterable, Optional
+from typing import Any, Iterable, Literal, Optional
 import pyarrow
 import pyarrow.dataset
@@ -51,7 +51,10 @@ def make_response_schema(schema: pyarrow.Schema) -> pyarrow.Schema:
     """Given an Arrow schema, create a new one that has the extra ``ResponseItem``
     fields added."""
     response_item_schema = make_response_item_schema(schema)
-    fields = list(zip(response_item_schema.names, response_item_schema.types))
+    fields = [
+        pyarrow.field(n, t)
+        for n, t in zip(response_item_schema.names, response_item_schema.types)
+    ]
     item_type = pyarrow.struct(fields)
     responses_type = pyarrow.list_(item_type)
     return pyarrow.schema(
@@ -65,7 +68,8 @@ def make_response_schema(schema: pyarrow.Schema) -> pyarrow.Schema:
 def encode_schema(schema: pyarrow.Schema) -> str:
     """Encode an Arrow schema as a string."""
-    return binary.encode(schema.serialize())
+    # pyarrow.Buffer doesn't satisfy ReadableBuffer but it still works
+    return binary.encode(schema.serialize())  # type: ignore[arg-type]
 def decode_schema(schema: str) -> pyarrow.Schema:
@@ -84,7 +88,7 @@ def subset_schema(schema: pyarrow.Schema, field_names: list[str]) -> pyarrow.Sch
     return pyarrow.schema(fields)
-def arrow_type(annotation: type):
+def arrow_type(annotation: type) -> pyarrow.DataType:
     """Determine a suitable arrow type for a pydantic model field.
     Supports primitive types as well as pydantic sub-models, lists, and optional types.
@@ -130,8 +134,7 @@ def arrow_type(annotation: type):
     if issubclass(annotation, DType):
         # The dtype is in the metaclass
-        return type(annotation).dtype  # type: ignore
-        # return pyarrow.from_numpy_dtype(type(annotation).dtype)  # type: ignore
+        return pyarrow.from_numpy_dtype(type(annotation).dtype)  # type: ignore[attr-defined]
     if annotation == bool:
         return pyarrow.bool_()
@@ -246,6 +249,7 @@ def _construct_field_docs(
         if pyarrow.types.is_struct(field.type):
             children = [field.type.field(i) for i in range(field.type.num_fields)]
         elif pyarrow.types.is_list(field.type):
+            assert isinstance(field.type, pyarrow.ListType)
             children = [field.type.value_field]
         else:
             raise ValueError(f"Unsupported nested type {field.type}")
@@ -275,8 +279,10 @@ def write_dataset(
     *,
     output_path: str,
     feature_schema: pyarrow.Schema,
-    partition_schema: pyarrow.Schema = None,
-    existing_data_behavior: str = "overwrite_or_ignore",
+    partition_schema: Optional[pyarrow.Schema] = None,
+    existing_data_behavior: Literal[
+        "error", "overwrite_or_ignore", "delete_matching"
+    ] = "overwrite_or_ignore",
     **kwargs,
 ):
     """Creates a ``pyarrow.dataset.Dataset`` from a data generator.
@@ -291,15 +297,19 @@ def write_dataset(
       existing_data_behavior: Same as ``pyarrow.dataset.write_dataset``, but
         defaults to ``"overwrite_or_ignore"``, which is typically what we want.
     """
-    partitioning = partition_schema and pyarrow.dataset.partitioning(
-        partition_schema, flavor="hive"
+    partitioning = (
+        pyarrow.dataset.partitioning(partition_schema, flavor="hive")
+        if partition_schema is not None
+        else None
     )
     pyarrow.dataset.write_dataset(
         data_generator,
         output_path,
         format="parquet",
         schema=feature_schema,
-        partitioning=partitioning,
+        # Type annotation doesn't include PartitioningFactory even though
+        # you're clearly meant to pass the output of partitioning() here
+        partitioning=partitioning,  # type: ignore[arg-type]
         existing_data_behavior=existing_data_behavior,
         **kwargs,
     )
@@ -326,10 +336,10 @@ def batches(
     for instance in instances:
         batch.append(instance)
         if len(batch) == batch_size:
-            yield pyarrow.RecordBatch.from_pylist(batch, schema=schema)
+            yield pyarrow.RecordBatch.from_pylist(batch, schema=schema)  # type: ignore[attr-defined]
             batch = []
     if batch:  # Final (incomplete) batch
-        yield pyarrow.RecordBatch.from_pylist(batch, schema=schema)
+        yield pyarrow.RecordBatch.from_pylist(batch, schema=schema)  # type: ignore[attr-defined]
 __all__ = [

dyff/schema/v0/r1/dataset/binary.py CHANGED Viewed

@@ -3,9 +3,13 @@
 import base64
 import hashlib
+import typing
+if typing.TYPE_CHECKING:
+    from _typeshed import ReadableBuffer
-def encode(data: bytes) -> str:
+def encode(data: "ReadableBuffer") -> str:
     return base64.b64encode(data).decode("utf-8")

dyff/schema/v0/r1/dataset/embedding.py ADDED Viewed

@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: 2024 UL Research Institutes
+# SPDX-License-Identifier: Apache-2.0
+from typing import Type
+import pydantic
+from ..base import DyffSchemaBaseModel, FixedWidthFloat, list_
+def embedding(
+    element_type: Type[FixedWidthFloat], size: int
+) -> Type[DyffSchemaBaseModel]:
+    """Returns a schema type representing a list of fixed-length embedding vectors."""
+    class _Embedding(DyffSchemaBaseModel):
+        embedding: list_(element_type, list_size=size) = pydantic.Field(  # type: ignore[valid-type]
+            description="An embedding vector"
+        )
+    return _Embedding
+__all__ = [
+    "embedding",
+]

dyff/schema/v0/r1/platform.py CHANGED Viewed

@@ -179,6 +179,14 @@ def summary_maxlen() -> int:
     return 280
+def entity_id_regex() -> str:
+    """An entity ID is a 32-character HEX string.
+    TODO: This doesn't check whether the hex string is a valid UUID.
+    """
+    return r"^[a-f0-9]{32}$"
 class Entities(str, enum.Enum):
     """The kinds of entities in the dyff system."""
@@ -245,6 +253,9 @@ class Resources(str, enum.Enum):
             raise ValueError(f"No Resources for Entity kind: {kind}")
+EntityID: TypeAlias = pydantic.constr(regex=entity_id_regex())  # type: ignore
 class DyffModelWithID(DyffSchemaBaseModel):
     id: str = pydantic.Field(description="Unique identifier of the entity")
     account: str = pydantic.Field(description="Account that owns the entity")
@@ -418,6 +429,10 @@ class DyffEntity(Status, Labeled, SchemaVersion, DyffModelWithID):
         default=None, description="Resource creation time (assigned by system)"
     )
+    lastTransitionTime: Optional[datetime] = pydantic.Field(
+        default=None, description="Time of last (status, reason) change."
+    )
     @abc.abstractmethod
     def dependencies(self) -> list[str]:
         """List of IDs of resources that this resource depends on.
@@ -843,7 +858,7 @@ class DataSchema(DyffSchemaBaseModel):
     def make_output_schema(
         schema: Union[pyarrow.Schema, Type[DyffSchemaBaseModel], DyffDataSchema],
     ) -> "DataSchema":
-        """Construct a complete ``DataSchema`` for inference inputs.
+        """Construct a complete ``DataSchema`` for inference outputs.
         This function will add required special fields for input data and then
         convert the augmented schema as necessary to populate at least the
@@ -2234,6 +2249,7 @@ __all__ = [
     "DyffModelWithID",
     "DyffSchemaBaseModel",
     "Entities",
+    "EntityID",
     "Evaluation",
     "EvaluationBase",
     "ExtractorStep",

dyff/schema/v0/r1/requests.py CHANGED Viewed

@@ -14,6 +14,7 @@ in response.
 from __future__ import annotations
+import re
 from datetime import datetime
 from typing import Optional, Union
@@ -22,6 +23,7 @@ import pydantic
 from .base import DyffBaseModel
 from .platform import (
     AnalysisBase,
+    AnalysisScope,
     DatasetBase,
     DataView,
     DocumentationBase,
@@ -70,6 +72,25 @@ class AnalysisCreateRequest(DyffEntityCreateRequest, AnalysisBase):
     method: str = pydantic.Field(description="Method ID")
+    @pydantic.validator("scope", check_fields=False)
+    def _validate_scope(cls, scope: AnalysisScope) -> AnalysisScope:
+        # TODO: This has to be a validator function because we can't apply the
+        # regex contraint to AnalysisScope, since there are already entities
+        # with invalid IDs in the data store. Fix in Schema v1.
+        uuid4 = r"^[0-9a-f]{8}[0-9a-f]{4}[4][0-9a-f]{3}[89ab][0-9a-f]{3}[0-9a-f]{12}$"
+        id_pattern = re.compile(uuid4)
+        if scope.dataset is not None and not re.match(id_pattern, scope.dataset):
+            raise ValueError("scope.dataset must be an entity ID")
+        if scope.evaluation is not None and not re.match(id_pattern, scope.evaluation):
+            raise ValueError("scope.evaluation must be an entity ID")
+        if scope.inferenceService is not None and not re.match(
+            id_pattern, scope.inferenceService
+        ):
+            raise ValueError("scope.inferenceService must be an entity ID")
+        if scope.model is not None and not re.match(id_pattern, scope.model):
+            raise ValueError("scope.model must be an entity ID")
+        return scope
 class DatasetCreateRequest(DyffEntityCreateRequest, DatasetBase):
     pass

{dyff_schema-0.18.0.dist-info → dyff_schema-0.19.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dyff-schema
-Version: 0.18.0
+Version: 0.19.0
 Summary: Data models for the Dyff AI auditing platform.
 Author-email: Digital Safety Research Institute <contact@dsri.org>
 License: Apache-2.0

{dyff_schema-0.18.0.dist-info → dyff_schema-0.19.0.dist-info}/RECORD RENAMED Viewed

@@ -15,6 +15,7 @@ dyff/schema/dataset/__init__.py,sha256=P4tOKKiOFaVeh3-Keiwpg9n7VTQUJQVOIVZhm8sdA
 dyff/schema/dataset/arrow.py,sha256=1tkgbXcvU0Wy-HM64ddaHfAKJos6FLXxGxM9l8xLcjY,129
 dyff/schema/dataset/binary.py,sha256=jm73xo-mSMzh1GuI1uUZ2JulY7h2aJqV9CklrD_wScE,130
 dyff/schema/dataset/classification.py,sha256=nXfFeuAA-wGoiatZ6KzZPSWC078a_6nrHRsKeRmjbAw,138
+dyff/schema/dataset/embedding.py,sha256=yp817Op-NsS66MQJjtJuV1r2NTBcPmWmz3BaYeAflg4,133
 dyff/schema/dataset/text.py,sha256=_mHnCM1oPIqSBqOeggCw0IUmoxmm4_J64GjF-xdOY-4,128
 dyff/schema/dataset/vision.py,sha256=D2wCN54xw8m6yYtiFYjvB15t7PUEREZYUo2A76cejv4,130
 dyff/schema/io/__init__.py,sha256=L5y8UhRnojerPYHumsxQJRcHCNz8Hj9NM8b47mewMNs,92
@@ -22,22 +23,23 @@ dyff/schema/io/vllm.py,sha256=2q05M_-lTzq9oywKXHPPpCFCSDVCSsRQqtmERzWTtio,123
 dyff/schema/v0/__init__.py,sha256=L5y8UhRnojerPYHumsxQJRcHCNz8Hj9NM8b47mewMNs,92
 dyff/schema/v0/r1/__init__.py,sha256=L5y8UhRnojerPYHumsxQJRcHCNz8Hj9NM8b47mewMNs,92
 dyff/schema/v0/r1/adapters.py,sha256=2t2oxsnGfSEDKKDIEYw4qqLXMH7qlFIwPVuLyUmbsHs,23552
-dyff/schema/v0/r1/base.py,sha256=i7eOKXDGS8_J9k2aVObUTpSOnA8CAgRW7Quj1fSbyRg,19403
-dyff/schema/v0/r1/platform.py,sha256=LqYVZdeRBBCY3OZ0L3dobhUmQPzN51ik4T9BoUyQHbM,73976
-dyff/schema/v0/r1/requests.py,sha256=yvMHi4P02DIoibczP-PLMkgkh5XR_VFQNfCtFTorlYs,11891
+dyff/schema/v0/r1/base.py,sha256=IpvlYDr6JjYo6tn8XW4C1Fpgd_uqzZGZsG_cuEn_gQs,19441
+dyff/schema/v0/r1/platform.py,sha256=c-XBKsACEmz_VptGDGCDsULGYtxDajVZ6H7mzjxXHGY,74405
+dyff/schema/v0/r1/requests.py,sha256=UC2jK_upahjemie2s_Acg9xd4NDlwlwUUAOGnpI2iS0,13058
 dyff/schema/v0/r1/test.py,sha256=X6dUyVd5svcPCI-PBMOAqEfK9jv3bRDvkQTJzwS96c0,10720
 dyff/schema/v0/r1/version.py,sha256=isKAGuGxsdru8vDaYmI4YiZdJOu_wNxXK7u6QzD6FE4,392
 dyff/schema/v0/r1/dataset/__init__.py,sha256=LbVlkO2asyGYBKk2z49xjJYTM-pu9y9e4eQDXgTDLnM,2553
-dyff/schema/v0/r1/dataset/arrow.py,sha256=juJ3MbiCL54zn3dSmXVl4GBhfLJPk6Qvasb0epFZ4V0,12312
-dyff/schema/v0/r1/dataset/binary.py,sha256=MLqj_O7iJvsDiom23jxR054seJaJntc0FTTkHuHYDJg,544
+dyff/schema/v0/r1/dataset/arrow.py,sha256=PHNtBe32e4NXNQIqUMh0SxCPzTdidVkKBaFqYr6-aFo,12857
+dyff/schema/v0/r1/dataset/binary.py,sha256=KXvn79SUt3e_ZZXrju2atT_yMFwgAkCgDYXBtfv0E_I,636
 dyff/schema/v0/r1/dataset/classification.py,sha256=pbbEXhxyZ0pgYwzaTlM8hVHPNEJDCdHKOeGowPXgWYc,311
+dyff/schema/v0/r1/dataset/embedding.py,sha256=qcHHIzpFQj9bQ2vuECO9g1EU4WT1yjvUZbTlw0qwkc8,642
 dyff/schema/v0/r1/dataset/text.py,sha256=nLIn91Zlt0tNdXUklSgjJ-kEDxoPX32ISLkiv2DzLvE,1008
 dyff/schema/v0/r1/dataset/vision.py,sha256=aIe0fbfM_g3DsrDTdg2K803YKLjZBpurM_VJcJFuZLc,369
 dyff/schema/v0/r1/io/__init__.py,sha256=L5y8UhRnojerPYHumsxQJRcHCNz8Hj9NM8b47mewMNs,92
 dyff/schema/v0/r1/io/vllm.py,sha256=CUE9y8KthtUI7sD49S875rDmPvKotSXVIRaBS79aBZs,5320
-dyff_schema-0.18.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-dyff_schema-0.18.0.dist-info/METADATA,sha256=HN9-j-LkdQYLzQyKV18GoajMwOOAmchU8EQbjtaGIIc,3482
-dyff_schema-0.18.0.dist-info/NOTICE,sha256=YONACu0s_Ui6jNi-wtEsVQbTU1JIkh8wvLH6d1-Ni_w,43
-dyff_schema-0.18.0.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
-dyff_schema-0.18.0.dist-info/top_level.txt,sha256=9e3VVdeX73t_sUJOPQPCcGtYO1JhoErhHIi3WoWGcFI,5
-dyff_schema-0.18.0.dist-info/RECORD,,
+dyff_schema-0.19.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+dyff_schema-0.19.0.dist-info/METADATA,sha256=r7aFHs035B-teQqNrDhlCKYNSicfqzmwXU_8ADCAigo,3482
+dyff_schema-0.19.0.dist-info/NOTICE,sha256=YONACu0s_Ui6jNi-wtEsVQbTU1JIkh8wvLH6d1-Ni_w,43
+dyff_schema-0.19.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+dyff_schema-0.19.0.dist-info/top_level.txt,sha256=9e3VVdeX73t_sUJOPQPCcGtYO1JhoErhHIi3WoWGcFI,5
+dyff_schema-0.19.0.dist-info/RECORD,,

{dyff_schema-0.18.0.dist-info → dyff_schema-0.19.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.2.0)
+Generator: setuptools (75.6.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{dyff_schema-0.18.0.dist-info → dyff_schema-0.19.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{dyff_schema-0.18.0.dist-info → dyff_schema-0.19.0.dist-info}/NOTICE RENAMED Viewed

File without changes

{dyff_schema-0.18.0.dist-info → dyff_schema-0.19.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

dyff-schema 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl

Potentially problematic release.

dyff-schema 0.18.0py3-none-any.whl → 0.19.0py3-none-any.whl