dyff-schema 0.10.3__tar.gz → 0.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dyff-schema might be problematic. Click here for more details.

Files changed (57) hide show
  1. {dyff_schema-0.10.3/dyff_schema.egg-info → dyff_schema-0.11.0}/PKG-INFO +1 -1
  2. dyff_schema-0.11.0/dyff/schema/annotations.py +297 -0
  3. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/base.py +8 -8
  4. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/requests.py +2 -2
  5. {dyff_schema-0.10.3 → dyff_schema-0.11.0/dyff_schema.egg-info}/PKG-INFO +1 -1
  6. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff_schema.egg-info/SOURCES.txt +1 -0
  7. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/.gitignore +0 -0
  8. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/.gitlab-ci.yml +0 -0
  9. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/.licenserc.yaml +0 -0
  10. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/.pre-commit-config.yaml +0 -0
  11. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/.prettierignore +0 -0
  12. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/.secrets.baseline +0 -0
  13. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/CODE_OF_CONDUCT.md +0 -0
  14. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/LICENSE +0 -0
  15. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/NOTICE +0 -0
  16. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/README.md +0 -0
  17. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/__init__.py +0 -0
  18. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/adapters.py +0 -0
  19. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/base.py +0 -0
  20. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/copydoc.py +0 -0
  21. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/dataset/__init__.py +0 -0
  22. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/dataset/arrow.py +0 -0
  23. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/dataset/binary.py +0 -0
  24. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/dataset/classification.py +0 -0
  25. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/dataset/text.py +0 -0
  26. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/dataset/vision.py +0 -0
  27. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/errors.py +0 -0
  28. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/ids.py +0 -0
  29. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/io/__init__.py +0 -0
  30. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/io/vllm.py +0 -0
  31. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/platform.py +0 -0
  32. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/py.typed +0 -0
  33. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/quantity.py +0 -0
  34. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/requests.py +0 -0
  35. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/test.py +0 -0
  36. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/__init__.py +0 -0
  37. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/__init__.py +0 -0
  38. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/adapters.py +0 -0
  39. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/dataset/__init__.py +0 -0
  40. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/dataset/arrow.py +0 -0
  41. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/dataset/binary.py +0 -0
  42. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/dataset/classification.py +0 -0
  43. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/dataset/text.py +0 -0
  44. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/dataset/vision.py +0 -0
  45. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/io/__init__.py +0 -0
  46. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/io/vllm.py +0 -0
  47. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/platform.py +0 -0
  48. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/test.py +0 -0
  49. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/v0/r1/version.py +0 -0
  50. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff/schema/version.py +0 -0
  51. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff_schema.egg-info/dependency_links.txt +0 -0
  52. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff_schema.egg-info/requires.txt +0 -0
  53. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/dyff_schema.egg-info/top_level.txt +0 -0
  54. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/makefile +0 -0
  55. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/pyproject.toml +0 -0
  56. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/setup.cfg +0 -0
  57. {dyff_schema-0.10.3 → dyff_schema-0.11.0}/tests/test_import.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dyff-schema
3
- Version: 0.10.3
3
+ Version: 0.11.0
4
4
  Summary: Data models for the Dyff AI auditing platform.
5
5
  Author-email: Digital Safety Research Institute <contact@dsri.org>
6
6
  License: Apache-2.0
@@ -0,0 +1,297 @@
1
+ # SPDX-FileCopyrightText: 2024 UL Research Institutes
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # mypy: disable-error-code="import-untyped"
5
+ import functools
6
+ import typing
7
+ from typing import Callable, Generic, Literal, NamedTuple, Optional, TypeVar
8
+
9
+ import pyarrow.dataset
10
+ import pydantic
11
+ from typing_extensions import ParamSpec
12
+
13
+ from dyff.schema.dataset import ReplicatedItem, arrow
14
+ from dyff.schema.platform import (
15
+ DataSchema,
16
+ Dataset,
17
+ Evaluation,
18
+ Measurement,
19
+ MeasurementLevel,
20
+ MeasurementSpec,
21
+ MethodImplementation,
22
+ MethodImplementationKind,
23
+ MethodImplementationPythonFunction,
24
+ MethodInput,
25
+ MethodInputKind,
26
+ MethodOutput,
27
+ MethodOutputKind,
28
+ MethodParameter,
29
+ MethodScope,
30
+ )
31
+ from dyff.schema.requests import MethodCreateRequest
32
+
33
+
34
+ def _fqn(obj) -> tuple[str, str]:
35
+ """See: https://stackoverflow.com/a/70693158"""
36
+ try:
37
+ module = obj.__module__
38
+ except AttributeError:
39
+ module = obj.__class__.__module__
40
+ try:
41
+ name = obj.__qualname__
42
+ except AttributeError:
43
+ name = obj.__class__.__qualname__
44
+ # if obj is a method of builtin class, then module will be None
45
+ if module == "builtins" or module is None:
46
+ raise AssertionError("should not be called on a builtin")
47
+ return module, name
48
+
49
+
50
+ class DataAnnotation(NamedTuple):
51
+ kind: str
52
+ direction: Literal["input", "output"]
53
+ level: Optional[MeasurementLevel] = None
54
+ schema: Optional[DataSchema] = None
55
+
56
+
57
+ def Input(input_type) -> DataAnnotation:
58
+ """Apply this annotation to parameters of a Method implementation to
59
+ indicate that the parameter expects a PyArrow dataset derived from the
60
+ specified type of entity, e.g.::
61
+
62
+ def my_method(input_data: Annotated[pyarrow.dataset.Dataset, Input(Evaluation)], ...
63
+
64
+ :param input_type: A Dyff entity type with associated input data; one of
65
+ {Dataset, Evaluation, Measurement}
66
+ :return: Annotation data
67
+ """
68
+ if input_type == Dataset:
69
+ return DataAnnotation(kind="Dataset", direction="input")
70
+ elif input_type == Evaluation:
71
+ return DataAnnotation(kind="Evaluation", direction="input")
72
+ elif input_type == Measurement:
73
+ return DataAnnotation(kind="Measurement", direction="input")
74
+ else:
75
+ raise TypeError()
76
+
77
+
78
+ # TODO: I think this could work if we ever upgrade to Python 3.12+. We need the
79
+ # type checker to accept `InputData[Evaluation]` and treat it as an alias for
80
+ # `pyarrow.dataset.Dataset`.
81
+ #
82
+ # if typing.TYPE_CHECKING:
83
+ # _InputDataType = TypeVar("_InputDataType")
84
+ # type InputData[_InputDataType] = pyarrow.dataset.Dataset
85
+ # else:
86
+ #
87
+ # class InputData:
88
+ # def __init__(self):
89
+ # raise NotImplementedError()
90
+ #
91
+ # def __class_getitem__(cls, input_type) -> typing.GenericAlias:
92
+ # return Annotated[pyarrow.dataset.Dataset, Input(input_type)]
93
+
94
+
95
+ def Output(output_type, *, schema, level: Optional[MeasurementLevel] = None):
96
+ """Apply this annotation to the return type of a Method to provide
97
+ metadata about the type of output created by the Method, e.g.::
98
+
99
+ def my_method(...) -> Annotated[
100
+ Iterable[pyarrow.RecordBatch],
101
+ Output(Measurement, schema=MyPydanticType, level=MeasurementLevel.Instance)
102
+ ]: ...
103
+
104
+ :param output_type: A Dyff entity type with associated output data; one of
105
+ {Measurement, SafetyCase}
106
+ :param schema: The schema of the output. Can be a type derived from
107
+ pydantic.BaseModel or an Arrow schema. The mandatory fields `_index_`
108
+ and `_replication_` will be *added* and should not be present.
109
+ :param level: The MeasurementLevel, if the output is a Measurement.
110
+ :return: Annotation data
111
+ """
112
+ if isinstance(schema, type) and issubclass(schema, pydantic.BaseModel):
113
+ RowSchema = pydantic.create_model(
114
+ "RowSchema", __base__=(schema, ReplicatedItem)
115
+ )
116
+ data_schema = DataSchema(
117
+ arrowSchema=arrow.encode_schema(arrow.arrow_schema(RowSchema))
118
+ )
119
+ elif isinstance(schema, pyarrow.Schema):
120
+ raise NotImplementedError()
121
+ # TODO: Add _index_ and _replication_
122
+ # data_schema = DataSchema(arrowSchema=arrow.encode_schema(schema))
123
+ else:
124
+ raise TypeError()
125
+
126
+ if output_type == Measurement:
127
+ if level is None:
128
+ raise ValueError("Must specify 'level' when output_type == Measurement")
129
+ return DataAnnotation(
130
+ kind="Measurement",
131
+ direction="output",
132
+ level=level,
133
+ schema=data_schema,
134
+ )
135
+ else:
136
+ raise TypeError()
137
+
138
+
139
+ # TODO: See comments about InputData above
140
+
141
+ # _OutputDataType = TypeVar("_OutputDataType")
142
+ # _OutputDataSchema = TypeVar("_OutputDataSchema")
143
+ # _OutputDataLevel = TypeVar("_OutputDataLevel")
144
+
145
+
146
+ # class OutputData(
147
+ # Generic[_OutputDataType, _OutputDataSchema, _OutputDataLevel],
148
+ # ):
149
+ # def __init__(self):
150
+ # raise NotImplementedError()
151
+
152
+ # def __class_getitem__(cls, args) -> typing.GenericAlias:
153
+ # return Annotated[Iterable[pyarrow.RecordBatch], Output(*args)]
154
+
155
+
156
+ P = ParamSpec("P")
157
+ R = TypeVar("R")
158
+
159
+
160
+ class MethodPrototype(Generic[P, R]):
161
+ """A wrapper for Python functions that implement Methods that knows how to create an
162
+ appropriate MethodCreateRequest based on the function signature."""
163
+
164
+ def __init__(
165
+ self,
166
+ f: Callable[P, R],
167
+ *,
168
+ scope: MethodScope,
169
+ description: Optional[str] = None,
170
+ ):
171
+ self.f = f
172
+ self.scope = scope
173
+ self.description = description
174
+ # This is similar to doing @functools.wraps() but it works with
175
+ # function objects
176
+ functools.update_wrapper(self, f)
177
+
178
+ def __call__(self, *args: P.args, **kwargs: P.kwargs) -> R:
179
+ return self.f(*args, **kwargs)
180
+
181
+ def create_request(
182
+ self, *, account: str, modules: list[str]
183
+ ) -> MethodCreateRequest:
184
+ """Create a MethodCreateRequest for the wrapped function.
185
+
186
+ :param account: The .account field for the request
187
+ :param modules: The .modules field for the request. This should include at least
188
+ the module that contains the wrapped function.
189
+ """
190
+ name = self.f.__name__
191
+ hints = typing.get_type_hints(self.f, include_extras=True)
192
+
193
+ parameters: list[MethodParameter] = []
194
+ inputs: list[MethodInput] = []
195
+ output: Optional[MethodOutput] = None
196
+ for k, v in hints.items():
197
+ annotation = None
198
+ if metadata := getattr(v, "__metadata__", None):
199
+ for m in metadata:
200
+ if isinstance(m, DataAnnotation):
201
+ annotation = m
202
+ break
203
+ if k == "return":
204
+ if annotation is None:
205
+ continue
206
+ if annotation.level is None:
207
+ raise ValueError("Must specify .level for Output")
208
+ if annotation.schema is None:
209
+ raise ValueError("Must specify .schema for Output")
210
+ output = MethodOutput(
211
+ kind=MethodOutputKind(annotation.kind),
212
+ measurement=MeasurementSpec(
213
+ name=name,
214
+ description=self.description,
215
+ level=MeasurementLevel(annotation.level),
216
+ schema=annotation.schema,
217
+ ),
218
+ )
219
+ elif annotation is None:
220
+ parameters.append(MethodParameter(keyword=k))
221
+ else:
222
+ inputs.append(
223
+ MethodInput(kind=MethodInputKind(annotation.kind), keyword=k)
224
+ )
225
+
226
+ if output is None:
227
+ raise TypeError("Return type must be annotated with Output()")
228
+
229
+ return MethodCreateRequest(
230
+ account=account,
231
+ modules=modules,
232
+ name=name,
233
+ scope=self.scope,
234
+ description=self.description,
235
+ implementation=MethodImplementation(
236
+ kind=MethodImplementationKind.PythonFunction,
237
+ pythonFunction=MethodImplementationPythonFunction(
238
+ fullyQualifiedName=".".join(_fqn(self.f))
239
+ ),
240
+ ),
241
+ parameters=parameters,
242
+ inputs=inputs,
243
+ output=output,
244
+ )
245
+
246
+
247
+ def method(
248
+ *, scope: MethodScope, description: Optional[str] = None
249
+ ) -> Callable[[Callable[P, R]], MethodPrototype[P, R]]:
250
+ """Use this decorator to indicate that a Python function implements a
251
+ Dyff Method. This should be used in conjunction with appropriate type
252
+ annotations, e.g.::
253
+
254
+ @method
255
+ def my_method(
256
+ arg: str,
257
+ data: Annotated[pyarrow.dataset.Dataset, Input(Evaluation)]
258
+ ) -> Annotated[
259
+ Iterable[pyarrow.RecordBatch],
260
+ Output(Measurement, schema=MyPydanticType, level=MeasurementLevel.Instance)
261
+ ]:
262
+ ...
263
+
264
+ The wrapped function will be an instance of MethodPrototype, and you can
265
+ use its .create_request() member function to create an appropriate
266
+ MethodCreateRequest for the wrapped function.
267
+
268
+ :param scope: The .scope field for the Method
269
+ :param description: The .description field for the Method. If not specified,
270
+ the docstring of the wrapped function will be used.
271
+ :return: A decorator that returns a MethodPrototype.
272
+ """
273
+
274
+ def decorator(f: Callable[P, R]) -> MethodPrototype[P, R]:
275
+ nonlocal description
276
+ if description is None:
277
+ description = f.__doc__
278
+ return MethodPrototype(f, scope=scope, description=description)
279
+
280
+ return decorator
281
+
282
+
283
+ def method_request(
284
+ f: MethodPrototype, *, account: str, modules: list[str]
285
+ ) -> MethodCreateRequest:
286
+ return f.create_request(account=account, modules=modules)
287
+
288
+
289
+ __all__ = [
290
+ "Input",
291
+ # "InputData",
292
+ "MethodPrototype",
293
+ "Output",
294
+ # "OutputData",
295
+ "method",
296
+ "method_request",
297
+ ]
@@ -558,11 +558,11 @@ def list_(
558
558
  return pydantic.conlist(item_type, min_items=list_size, max_items=list_size)
559
559
 
560
560
 
561
- # mypy gets confused because 'dict' is the name of a method in DyffDefaultSerializers
561
+ # mypy gets confused because 'dict' is the name of a method in DyffBaseModel
562
562
  _ModelAsDict = dict[str, Any]
563
563
 
564
564
 
565
- class DyffDefaultSerializers(pydantic.BaseModel):
565
+ class DyffBaseModel(pydantic.BaseModel):
566
566
  """This must be the base class for *all pydantic models* in the Dyff schema.
567
567
 
568
568
  Overrides serialization functions to serialize by alias, so that "round-trip"
@@ -571,6 +571,9 @@ class DyffDefaultSerializers(pydantic.BaseModel):
571
571
  Python reserved words like 'bytes' as field names.
572
572
  """
573
573
 
574
+ class Config:
575
+ extra = pydantic.Extra.forbid
576
+
574
577
  def dict(self, *, by_alias: bool = True, **kwargs) -> _ModelAsDict:
575
578
  return super().dict(by_alias=by_alias, **kwargs)
576
579
 
@@ -603,10 +606,10 @@ class DyffDefaultSerializers(pydantic.BaseModel):
603
606
  # don't have timezones set currently for historical reasons. It's actually
604
607
  # better if all datetimes in the system are UTC, so that their JSON
605
608
  # representations (i.e., isoformat strings) are well-ordered.
606
- class DyffSchemaBaseModel(DyffDefaultSerializers):
609
+ class DyffSchemaBaseModel(DyffBaseModel):
607
610
  """This should be the base class for *almost all* non-request models in the Dyff
608
611
  schema. Models that do not inherit from this class *must* still inherit from
609
- DyffDefaultSerializers.
612
+ DyffBaseModel.
610
613
 
611
614
  Adds a root validator to ensure that all datetime fields are represented in the UTC
612
615
  timezone. This is necessary to avoid errors when comparing "naive" and "aware"
@@ -614,9 +617,6 @@ class DyffSchemaBaseModel(DyffDefaultSerializers):
614
617
  datetimes are well-ordered.
615
618
  """
616
619
 
617
- class Config:
618
- extra = pydantic.Extra.forbid
619
-
620
620
  @pydantic.root_validator
621
621
  def _ensure_datetime_timezone_utc(cls, values):
622
622
  update = {}
@@ -633,7 +633,7 @@ class DyffSchemaBaseModel(DyffDefaultSerializers):
633
633
  __all__ = [
634
634
  "DTYPE",
635
635
  "DType",
636
- "DyffDefaultSerializers",
636
+ "DyffBaseModel",
637
637
  "DyffSchemaBaseModel",
638
638
  "FixedWidthFloat",
639
639
  "FixedWidthInt",
@@ -19,7 +19,7 @@ from typing import Optional, Union
19
19
 
20
20
  import pydantic
21
21
 
22
- from .base import DyffDefaultSerializers
22
+ from .base import DyffBaseModel
23
23
  from .platform import (
24
24
  AnalysisBase,
25
25
  DatasetBase,
@@ -39,7 +39,7 @@ from .platform import (
39
39
  from .version import SchemaVersion
40
40
 
41
41
 
42
- class DyffRequestDefaultValidators(DyffDefaultSerializers):
42
+ class DyffRequestDefaultValidators(DyffBaseModel):
43
43
  """This must be the base class for *all* request models in the Dyff schema.
44
44
 
45
45
  Adds a root validator to ensure that all user-provided datetime fields have a
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dyff-schema
3
- Version: 0.10.3
3
+ Version: 0.11.0
4
4
  Summary: Data models for the Dyff AI auditing platform.
5
5
  Author-email: Digital Safety Research Institute <contact@dsri.org>
6
6
  License: Apache-2.0
@@ -12,6 +12,7 @@ makefile
12
12
  pyproject.toml
13
13
  dyff/schema/__init__.py
14
14
  dyff/schema/adapters.py
15
+ dyff/schema/annotations.py
15
16
  dyff/schema/base.py
16
17
  dyff/schema/copydoc.py
17
18
  dyff/schema/errors.py
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes