dyff-schema 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dyff-schema might be problematic. Click here for more details.

dyff/schema/_version.py CHANGED
@@ -1,2 +1,2 @@
1
- __version__ = version = "0.30.0"
2
- __version_tuple__ = version_tuple = (0, 30, 0)
1
+ __version__ = version = "0.30.1"
2
+ __version_tuple__ = version_tuple = (0, 30, 1)
dyff/schema/v0/r1/base.py CHANGED
@@ -431,13 +431,11 @@ def list_(
431
431
  item_type: type[_ListElementT], *, list_size: Optional[int] = None
432
432
  ) -> type[list]:
433
433
  if list_size is None:
434
- return Annotated[list[_ListElementT], Field()] # type: ignore [return-value]
434
+ return Annotated[list[item_type], Field()] # type: ignore [return-value, valid-type]
435
435
  else:
436
436
  if list_size <= 0:
437
437
  raise ValueError(f"list_size {list_size} must be > 0")
438
- return Annotated[
439
- list[_ListElementT], Field(min_length=list_size, max_length=list_size)
440
- ] # type: ignore [return-value]
438
+ return Annotated[list[item_type], Field(min_length=list_size, max_length=list_size)] # type: ignore [return-value, valid-type]
441
439
 
442
440
 
443
441
  # mypy gets confused because 'dict' is the name of a method in DyffBaseModel
@@ -7,11 +7,13 @@ from __future__ import annotations
7
7
  import functools
8
8
  import inspect
9
9
  import typing
10
+ import uuid
10
11
  from typing import Any, Iterable, Literal, Optional
11
12
 
12
13
  import pyarrow
13
14
  import pyarrow.dataset
14
15
  import pydantic
16
+ from pydantic.fields import FieldInfo
15
17
 
16
18
  from ..base import DType
17
19
  from . import binary
@@ -90,9 +92,12 @@ def subset_schema(schema: pyarrow.Schema, field_names: list[str]) -> pyarrow.Sch
90
92
 
91
93
 
92
94
  def arrow_type(annotation: type) -> pyarrow.DataType:
93
- """Determine a suitable arrow type for a pydantic model field."""
95
+ """Determine a suitable arrow type for a pydantic model field.
94
96
 
95
- # Handle generic types first (List, Union, etc.)
97
+ Supports primitive types as well as pydantic sub-models, lists, and optional types.
98
+ Numeric types must have appropriate bounds specified, as Arrow cannot represent the
99
+ unbounded integer types used by Python 3.
100
+ """
96
101
  if origin := typing.get_origin(annotation):
97
102
  if origin == list:
98
103
  annotation_args = typing.get_args(annotation)
@@ -114,40 +119,61 @@ def arrow_type(annotation: type) -> pyarrow.DataType:
114
119
  raise ValueError(
115
120
  f"annotation {annotation}: only Optional[T] supported, not general Union"
116
121
  )
117
- return arrow_type(inner_type)
122
+ return arrow_type(inner_type) # All Arrow types are nullable
118
123
 
119
124
  raise NotImplementedError(f"Python type {annotation}")
120
125
 
121
- # Guard against non-types (TypeVars, etc.)
122
- if not isinstance(annotation, type):
123
- return pyarrow.string()
124
-
125
- # Handle custom types
126
- if issubclass(annotation, DType):
127
- # The dtype is in the metaclass
128
- return pyarrow.from_numpy_dtype(type(annotation).dtype) # type: ignore[attr-defined]
126
+ if issubclass(annotation, pydantic.BaseModel):
127
+ subfields = []
128
+ for _name, subfield in annotation.model_fields.items():
129
+ subfields.append(arrow_field(_name, subfield))
130
+ return pyarrow.struct(subfields)
129
131
 
130
132
  # Handle numpy-like types
131
133
  if hasattr(annotation, "dtype"):
132
134
  return pyarrow.from_numpy_dtype(annotation.dtype)
133
135
 
134
- # Handle pydantic models
135
- if issubclass(annotation, pydantic.BaseModel):
136
- subfields = []
137
- for field_name, subfield in annotation.model_fields.items():
138
- subfields.append(arrow_field(field_name, subfield))
139
- return pyarrow.struct(subfields)
136
+ # Handle Annotated list types (e.g., Annotated[list[str], Field(max_length=10)])
137
+ # This covers lists created by our list_() function in base.py which returns
138
+ # Annotated types with Field metadata for length constraints.
139
+ #
140
+ # We need custom logic here because:
141
+ # 1. Standard typing.List doesn't carry Pydantic Field constraints
142
+ # 2. Our list_() function wraps list[T] in Annotated[list[T], Field(...)]
143
+ # to embed validation metadata (min/max length) at the type level
144
+ # 3. PyArrow needs to know these constraints upfront to create proper schemas
145
+ # 4. The nested generic structure requires careful extraction:
146
+ # Annotated[list[str], Field(max_length=10)] needs to become
147
+ # pyarrow.list_(pyarrow.string(), 10)
148
+ if (
149
+ typing.get_origin(annotation) is typing.Annotated
150
+ and typing.get_args(annotation)[0] is list
151
+ ):
152
+ metadata = typing.get_args(annotation)[1:]
153
+ item_type = typing.get_args(typing.get_args(annotation)[0])[0]
154
+ max_length = -1
155
+ for meta in metadata:
156
+ if isinstance(meta, FieldInfo):
157
+ max_length = getattr(meta, "max_length", -1)
158
+ return pyarrow.list_(arrow_type(item_type), max_length)
140
159
 
141
- # Handle built-in types
142
- type_map = {
143
- str: pyarrow.string(),
144
- int: pyarrow.int64(),
145
- float: pyarrow.float64(),
146
- bool: pyarrow.bool_(),
147
- }
160
+ if issubclass(annotation, DType):
161
+ # The dtype is in the metaclass
162
+ return pyarrow.from_numpy_dtype(type(annotation).dtype) # type: ignore[attr-defined]
148
163
 
149
- if annotation in type_map:
150
- return type_map[annotation]
164
+ if annotation == bool:
165
+ return pyarrow.bool_()
166
+ if annotation == bytes:
167
+ return pyarrow.binary()
168
+ if annotation == float:
169
+ return pyarrow.float64()
170
+ if annotation == int:
171
+ raise ValueError("unconstrained integers cannot be represented in Arrow")
172
+ if annotation == uuid.UUID:
173
+ return pyarrow.binary(16)
174
+
175
+ if annotation == str:
176
+ return pyarrow.string()
151
177
 
152
178
  raise NotImplementedError(f"Python type {annotation}")
153
179
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dyff-schema
3
- Version: 0.30.0
3
+ Version: 0.30.1
4
4
  Summary: Data models for the Dyff AI auditing platform.
5
5
  Author-email: Digital Safety Research Institute <contact@dsri.org>
6
6
  License: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  dyff/schema/__init__.py,sha256=w7OWDFuyGKd6xt_yllNtKzHahPgywrfU4Ue02psYaMA,2244
2
- dyff/schema/_version.py,sha256=jaJNt0cIPw7pKpa1ZksSexFHwIRD7mvffKUdrE657pk,80
2
+ dyff/schema/_version.py,sha256=b5YBuhpx4KBNLup_QY5e0xhsx1Q2LttYULeEoU-8uhY,80
3
3
  dyff/schema/adapters.py,sha256=YMTHv_2VlLGFp-Kqwa6H51hjffHmk8gXjZilHysIF5Q,123
4
4
  dyff/schema/annotations.py,sha256=nE6Jk1PLqlShj8uqjE_EzZC9zYnTDW5AVtQcjysiK8M,10018
5
5
  dyff/schema/base.py,sha256=jvaNtsSZyFfsdUZTcY_U-yfLY5_GyrMxSXhON2R9XR0,119
@@ -25,14 +25,14 @@ dyff/schema/io/vllm.py,sha256=2q05M_-lTzq9oywKXHPPpCFCSDVCSsRQqtmERzWTtio,123
25
25
  dyff/schema/v0/__init__.py,sha256=L5y8UhRnojerPYHumsxQJRcHCNz8Hj9NM8b47mewMNs,92
26
26
  dyff/schema/v0/r1/__init__.py,sha256=L5y8UhRnojerPYHumsxQJRcHCNz8Hj9NM8b47mewMNs,92
27
27
  dyff/schema/v0/r1/adapters.py,sha256=hpwCSW8lkMkUKCLe0zaMUDu-VS_caSxJvPsECEi_XRA,33069
28
- dyff/schema/v0/r1/base.py,sha256=WEy-xSL_FoC1gwAij1V_TEqPsITiIF_KlBDaDpPLWr8,20359
28
+ dyff/schema/v0/r1/base.py,sha256=zaxU2fIu1Ca-nZsZwG0eb7COJmnPkDZ_yLrieHXqr0s,20353
29
29
  dyff/schema/v0/r1/commands.py,sha256=wDNMB8lry-H9G5hlT4m6y4fysoq4glY5qKsHytfehqU,9052
30
30
  dyff/schema/v0/r1/platform.py,sha256=Hp9hTaDqWCDsPGHouCmiCMLXByjW8UadXIlgRtb0vZc,82305
31
31
  dyff/schema/v0/r1/requests.py,sha256=VLdhyAKpdQytNdZZW60Y5Kg35GmB6BezDwDengl18Ws,17168
32
32
  dyff/schema/v0/r1/test.py,sha256=X6dUyVd5svcPCI-PBMOAqEfK9jv3bRDvkQTJzwS96c0,10720
33
33
  dyff/schema/v0/r1/version.py,sha256=NONebgcv5Thsw_ymud6PacZdGjV6ndBrmLnap-obcpo,428
34
34
  dyff/schema/v0/r1/dataset/__init__.py,sha256=LbVlkO2asyGYBKk2z49xjJYTM-pu9y9e4eQDXgTDLnM,2553
35
- dyff/schema/v0/r1/dataset/arrow.py,sha256=2XzJ1D2RqwfYRfLrJrsz-czOXZaPuGiWqXGfTGDBomY,12324
35
+ dyff/schema/v0/r1/dataset/arrow.py,sha256=3AVKmxE-dFY7l-5A_riq0Uk4x_KshVF3V_uo2SNHuQs,13795
36
36
  dyff/schema/v0/r1/dataset/binary.py,sha256=KXvn79SUt3e_ZZXrju2atT_yMFwgAkCgDYXBtfv0E_I,636
37
37
  dyff/schema/v0/r1/dataset/classification.py,sha256=pbbEXhxyZ0pgYwzaTlM8hVHPNEJDCdHKOeGowPXgWYc,311
38
38
  dyff/schema/v0/r1/dataset/embedding.py,sha256=2cmcoV6AsvUBRrefmJ-_Vdfg9NVvdMr0-s7dqki15ls,792
@@ -40,9 +40,9 @@ dyff/schema/v0/r1/dataset/text.py,sha256=MYG5seGODDryRSCy-g0Unh5dD0HCytmZ3FeElC-
40
40
  dyff/schema/v0/r1/dataset/vision.py,sha256=aIe0fbfM_g3DsrDTdg2K803YKLjZBpurM_VJcJFuZLc,369
41
41
  dyff/schema/v0/r1/io/__init__.py,sha256=L5y8UhRnojerPYHumsxQJRcHCNz8Hj9NM8b47mewMNs,92
42
42
  dyff/schema/v0/r1/io/vllm.py,sha256=vWyLg-susbg0JDfv6VExBpgFdU2GHP2a14ChOdbckvs,5321
43
- dyff_schema-0.30.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
- dyff_schema-0.30.0.dist-info/licenses/NOTICE,sha256=YONACu0s_Ui6jNi-wtEsVQbTU1JIkh8wvLH6d1-Ni_w,43
45
- dyff_schema-0.30.0.dist-info/METADATA,sha256=g0DxR2SxqA7qTpENblTBKliqpHsnAqwcoJr7J78dAIg,3623
46
- dyff_schema-0.30.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
- dyff_schema-0.30.0.dist-info/top_level.txt,sha256=9e3VVdeX73t_sUJOPQPCcGtYO1JhoErhHIi3WoWGcFI,5
48
- dyff_schema-0.30.0.dist-info/RECORD,,
43
+ dyff_schema-0.30.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
+ dyff_schema-0.30.1.dist-info/licenses/NOTICE,sha256=YONACu0s_Ui6jNi-wtEsVQbTU1JIkh8wvLH6d1-Ni_w,43
45
+ dyff_schema-0.30.1.dist-info/METADATA,sha256=q23-qxTt8hJXIjMnf3sM4VA4JgFhdyWkmPM_xeM5qgE,3623
46
+ dyff_schema-0.30.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ dyff_schema-0.30.1.dist-info/top_level.txt,sha256=9e3VVdeX73t_sUJOPQPCcGtYO1JhoErhHIi3WoWGcFI,5
48
+ dyff_schema-0.30.1.dist-info/RECORD,,