dyff-schema 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dyff-schema might be problematic. Click here for more details.

dyff/schema/ids.py CHANGED
@@ -14,9 +14,9 @@ def generate_entity_id() -> str:
14
14
 
15
15
 
16
16
  def null_id() -> str:
17
- """Return a special identifier signifying that the identity of an entity
18
- is not important. Used for entities that are "owned" by another entity
19
- that has a non-null identifier.
17
+ """Return a special identifier signifying that the identity of an entity is not
18
+ important. Used for entities that are "owned" by another entity that has a non-null
19
+ identifier.
20
20
 
21
21
  :returns: The null identifier -- a hex string representation of a UUID.
22
22
  :rtype: str
@@ -25,10 +25,9 @@ def null_id() -> str:
25
25
 
26
26
 
27
27
  def replication_id(evaluation_id: str, replication_index: int) -> str:
28
- """Return a unique identifier for a replication within an evaluation.
29
- Replications in different evaluations will have different identifiers, so
30
- datasets from different evaluations can be combined without worrying about
31
- collisions.
28
+ """Return a unique identifier for a replication within an evaluation. Replications
29
+ in different evaluations will have different identifiers, so datasets from different
30
+ evaluations can be combined without worrying about collisions.
32
31
 
33
32
  :param evaluation_id: The ID of the Evaluation.
34
33
  :type evaluation_id: str
dyff/schema/quantity.py CHANGED
@@ -22,8 +22,7 @@ from typing import Any
22
22
  # See the License for the specific language governing permissions and
23
23
  # limitations under the License.
24
24
  def parse_quantity(quantity: Any) -> Decimal:
25
- """Parse a kubernetes canonical form quantity like 200Mi to a decimal
26
- number.
25
+ """Parse a kubernetes canonical form quantity like 200Mi to a decimal number.
27
26
 
28
27
  Supported SI suffixes:
29
28
  base1024: Ki | Mi | Gi | Ti | Pi | Ei
@@ -15,10 +15,9 @@ from dyff.schema.platform import SchemaAdapter
15
15
 
16
16
 
17
17
  def map_structure(fn, data):
18
- """Given a JSON data structure ``data``, create a new data structure
19
- instance with the same shape as ``data`` by applying ``fn`` to each "leaf"
20
- value in the nested data structure.
21
- """
18
+ """Given a JSON data structure ``data``, create a new data structure instance with
19
+ the same shape as ``data`` by applying ``fn`` to each "leaf" value in the nested
20
+ data structure."""
22
21
  if isinstance(data, dict):
23
22
  return {k: map_structure(fn, v) for k, v in data.items()}
24
23
  elif isinstance(data, list):
@@ -30,10 +29,10 @@ def map_structure(fn, data):
30
29
  def flatten_object(
31
30
  obj: dict, *, max_depth: int | None = None, add_prefix: bool = True
32
31
  ) -> dict:
33
- """Flatten a JSON object the by creating a new object with a key for
34
- each "leaf" value in the input. If ``add_prefix`` is True, the key will be
35
- equal to the "path" string of the leaf, i.e., "obj.field.subfield";
36
- otherwise, it will be just "subfield".
32
+ """Flatten a JSON object the by creating a new object with a key for each "leaf"
33
+ value in the input. If ``add_prefix`` is True, the key will be equal to the "path"
34
+ string of the leaf, i.e., "obj.field.subfield"; otherwise, it will be just
35
+ "subfield".
37
36
 
38
37
  Nested lists are considered "leaf" values, even if they contain objects.
39
38
  """
@@ -72,8 +71,8 @@ class Adapter(Protocol):
72
71
 
73
72
 
74
73
  class TransformJSON:
75
- """Transform an input JSON structure by creating a new output JSON
76
- structure where all of the "leaf" values are populated by either:
74
+ """Transform an input JSON structure by creating a new output JSON structure where
75
+ all of the "leaf" values are populated by either:
77
76
 
78
77
  1. A provided JSON literal value, or
79
78
  2. The result of a jsonpath query on the input structure.
@@ -158,8 +157,8 @@ class TransformJSON:
158
157
 
159
158
 
160
159
  class EmbedIndex:
161
- """Adds one or more fields to each member of the specified collections
162
- that represent "indexes", or possible sort orders, for the collections.
160
+ """Adds one or more fields to each member of the specified collections that
161
+ represent "indexes", or possible sort orders, for the collections.
163
162
 
164
163
  For example, if the input data is::
165
164
 
@@ -232,11 +231,10 @@ class EmbedIndex:
232
231
 
233
232
 
234
233
  class ExplodeCollections:
235
- """Explodes one or more top-level lists of the same length into multiple
236
- records, where each record contains the corresponding value from each
237
- list. This is useful for turning nested-list representations into
238
- "relational" representations where the lists are converted to multiple
239
- rows with a unique index.
234
+ """Explodes one or more top-level lists of the same length into multiple records,
235
+ where each record contains the corresponding value from each list. This is useful
236
+ for turning nested-list representations into "relational" representations where the
237
+ lists are converted to multiple rows with a unique index.
240
238
 
241
239
  The ``configuration`` argument is a dictionary::
242
240
 
@@ -341,8 +339,8 @@ class ExplodeCollections:
341
339
 
342
340
 
343
341
  class FlattenHierarchy:
344
- """Flatten a JSON object -- or the JSON sub-objects in named fields -- by
345
- creating a new object with a key for each "leaf" value in the input.
342
+ """Flatten a JSON object -- or the JSON sub-objects in named fields -- by creating a
343
+ new object with a key for each "leaf" value in the input.
346
344
 
347
345
  The ``configuration`` options are::
348
346
 
@@ -464,8 +462,8 @@ class Select:
464
462
 
465
463
 
466
464
  class Map:
467
- """For each input item, map another Adapter over the elements of each of
468
- the named nested collections within that item.
465
+ """For each input item, map another Adapter over the elements of each of the named
466
+ nested collections within that item.
469
467
 
470
468
  The configuration is a dictionary::
471
469
 
dyff/schema/v0/r1/base.py CHANGED
@@ -147,7 +147,7 @@ class FixedWidthNumberMeta(
147
147
 
148
148
 
149
149
  class DType:
150
- """Base class for pydantic custom types that have an Arrow .dtype"""
150
+ """Base class for pydantic custom types that have an Arrow .dtype."""
151
151
 
152
152
  @classmethod
153
153
  def __modify_schema__(
@@ -240,35 +240,35 @@ class UInt64Meta(FixedWidthNumberMeta[int, pydantic.ConstrainedInt]):
240
240
 
241
241
 
242
242
  class Int8(FixedWidthInt, metaclass=Int8Meta):
243
- """An 8-bit integer"""
243
+ """An 8-bit integer."""
244
244
 
245
245
 
246
246
  class Int16(FixedWidthInt, metaclass=Int16Meta):
247
- """A 16-bit integer"""
247
+ """A 16-bit integer."""
248
248
 
249
249
 
250
250
  class Int32(FixedWidthInt, metaclass=Int32Meta):
251
- """A 32-bit integer"""
251
+ """A 32-bit integer."""
252
252
 
253
253
 
254
254
  class Int64(FixedWidthInt, metaclass=Int64Meta):
255
- """A 64-bit integer"""
255
+ """A 64-bit integer."""
256
256
 
257
257
 
258
258
  class UInt8(FixedWidthInt, metaclass=UInt8Meta):
259
- """An 8-bit unsigned integer"""
259
+ """An 8-bit unsigned integer."""
260
260
 
261
261
 
262
262
  class UInt16(FixedWidthInt, metaclass=UInt16Meta):
263
- """A 16-bit unsigned integer"""
263
+ """A 16-bit unsigned integer."""
264
264
 
265
265
 
266
266
  class UInt32(FixedWidthInt, metaclass=UInt32Meta):
267
- """A 32-bit unsigned integer"""
267
+ """A 32-bit unsigned integer."""
268
268
 
269
269
 
270
270
  class UInt64(FixedWidthInt, metaclass=UInt64Meta):
271
- """A 64-bit unsigned integer"""
271
+ """A 64-bit unsigned integer."""
272
272
 
273
273
 
274
274
  # ----------------------------------------------------------------------------
@@ -557,11 +557,10 @@ def list_(
557
557
  class DyffSchemaBaseModel(pydantic.BaseModel):
558
558
  """Base class for pydantic models that used for defining data schemas.
559
559
 
560
- Overrides serialization functions to serialize by alias, so that
561
- "round-trip" serialization is the default for fields with aliases. We
562
- prefer aliases because we can 1) use _underscore_names_ as reserved names
563
- in our data schema, and 2) allow Python reserved words like 'bytes' as
564
- field names.
560
+ Overrides serialization functions to serialize by alias, so that "round-trip"
561
+ serialization is the default for fields with aliases. We prefer aliases because we
562
+ can 1) use _underscore_names_ as reserved names in our data schema, and 2) allow
563
+ Python reserved words like 'bytes' as field names.
565
564
  """
566
565
 
567
566
  def dict(self, *, by_alias: bool = True, **kwargs) -> dict[str, Any]:
@@ -56,7 +56,8 @@ def make_item_type(schema: Type[DyffSchemaBaseModel]) -> Type[DyffSchemaBaseMode
56
56
  def make_response_item_type(
57
57
  schema: Type[DyffSchemaBaseModel],
58
58
  ) -> Type[DyffSchemaBaseModel]:
59
- """Return a pydantic model type that inherits from both ``ResponseItem`` and ``schema``."""
59
+ """Return a pydantic model type that inherits from both ``ResponseItem`` and
60
+ ``schema``."""
60
61
  return pydantic.create_model(
61
62
  f"{schema.__name__}ResponseItem", __base__=(schema, ResponseItem)
62
63
  )
@@ -28,31 +28,28 @@ def arrow_schema(
28
28
  ) -> pyarrow.Schema:
29
29
  """Create an Arrow schema from a Pydantic model.
30
30
 
31
- We support a very basic subset of pydantic model features currently. The
32
- intention is to expand this.
31
+ We support a very basic subset of pydantic model features currently. The intention
32
+ is to expand this.
33
33
  """
34
34
  arrow_fields = [arrow_field(field) for _, field in model_type.__fields__.items()]
35
35
  return pyarrow.schema(arrow_fields, metadata=metadata)
36
36
 
37
37
 
38
38
  def make_item_schema(schema: pyarrow.Schema) -> pyarrow.Schema:
39
- """Given an Arrow schema, create a new one that has the extra ``Item``
40
- fields added.
41
- """
39
+ """Given an Arrow schema, create a new one that has the extra ``Item`` fields
40
+ added."""
42
41
  return schema.insert(0, pyarrow.field("_index_", pyarrow.int64()))
43
42
 
44
43
 
45
44
  def make_response_item_schema(schema: pyarrow.Schema) -> pyarrow.Schema:
46
- """Given an Arrow schema, create a new one that has the extra
47
- ``ResponseItem`` fields added.
48
- """
45
+ """Given an Arrow schema, create a new one that has the extra ``ResponseItem``
46
+ fields added."""
49
47
  return schema.insert(0, pyarrow.field("_response_index_", pyarrow.int64()))
50
48
 
51
49
 
52
50
  def make_response_schema(schema: pyarrow.Schema) -> pyarrow.Schema:
53
- """Given an Arrow schema, create a new one that has the extra
54
- ``ResponseItem`` fields added.
55
- """
51
+ """Given an Arrow schema, create a new one that has the extra ``ResponseItem``
52
+ fields added."""
56
53
  response_item_schema = make_response_item_schema(schema)
57
54
  fields = list(zip(response_item_schema.names, response_item_schema.types))
58
55
  item_type = pyarrow.struct(fields)
@@ -90,9 +87,9 @@ def subset_schema(schema: pyarrow.Schema, field_names: list[str]) -> pyarrow.Sch
90
87
  def arrow_type(annotation: type):
91
88
  """Determine a suitable arrow type for a pydantic model field.
92
89
 
93
- Supports primitive types as well as pydantic sub-models, lists, and
94
- optional types. Numeric types must have appropriate bounds specified, as
95
- Arrow cannot represent the unbounded integer types used by Python 3.
90
+ Supports primitive types as well as pydantic sub-models, lists, and optional types.
91
+ Numeric types must have appropriate bounds specified, as Arrow cannot represent the
92
+ unbounded integer types used by Python 3.
96
93
  """
97
94
  if origin := typing.get_origin(annotation):
98
95
  if origin == list:
@@ -176,9 +173,8 @@ def field_with_docstring(
176
173
  *,
177
174
  docstring: str | None = None,
178
175
  ) -> pyarrow.Field:
179
- """Wrapper for ``pyarrow.field()`` that adds a docstring in the ``__doc__``
180
- property of ``metadata``.
181
- """
176
+ """Wrapper for ``pyarrow.field()`` that adds a docstring in the ``__doc__`` property
177
+ of ``metadata``."""
182
178
  if metadata:
183
179
  metadata_with_docstring = metadata.copy()
184
180
  if docstring:
@@ -192,9 +188,8 @@ def field_with_docstring(
192
188
 
193
189
 
194
190
  def schema_function(schema: pyarrow.Schema):
195
- """Annotation for functions that return ``pyarrow.Schema``. The annotated
196
- function will return the supplied schema and will have a docstring
197
- describing the schema.
191
+ """Annotation for functions that return ``pyarrow.Schema``. The annotated function
192
+ will return the supplied schema and will have a docstring describing the schema.
198
193
 
199
194
  Intended to be applied to a function with no body, e.g.:
200
195
 
@@ -1,6 +1,5 @@
1
1
  # SPDX-FileCopyrightText: 2024 UL Research Institutes
2
2
  # SPDX-License-Identifier: Apache-2.0
3
-
4
3
  """Schema for the internal data representation of Dyff entities.
5
4
 
6
5
  We use the following naming convention:
@@ -42,9 +41,8 @@ def _k8s_quantity_regex():
42
41
 
43
42
 
44
43
  def _k8s_label_regex():
45
- """A k8s label is like a DNS label but also allows ``.`` an ``_`` as
46
- separator characters.
47
- """
44
+ """A k8s label is like a DNS label but also allows ``.`` an ``_`` as separator
45
+ characters."""
48
46
  return r"[a-z0-9A-Z]([-_.a-z0-9A-Z]{0,61}[a-z0-9A-Z])?"
49
47
 
50
48
 
@@ -64,23 +62,26 @@ def _dns_label_maxlen():
64
62
 
65
63
 
66
64
  def _dns_domain_regex():
67
- """One or more DNS labels separated by dots (``.``). Note that its maximum
68
- length is 253 characters, but we can't enforce this in the regex.
65
+ """One or more DNS labels separated by dots (``.``).
66
+
67
+ Note that its maximum length is 253 characters, but we can't enforce this in the
68
+ regex.
69
69
  """
70
70
  return f"{_dns_label_regex()}(\.{_dns_label_regex()})*"
71
71
 
72
72
 
73
73
  def _k8s_domain_maxlen():
74
- """Max length of a k8s domain. The DNS domain standard specifies 255
75
- characters, but this includes the trailing dot and null terminator. We
76
- never include a trailing dot in k8s-style domains.
74
+ """Max length of a k8s domain.
75
+
76
+ The DNS domain standard specifies 255 characters, but this includes the trailing dot
77
+ and null terminator. We never include a trailing dot in k8s-style domains.
77
78
  """
78
79
  return 253
79
80
 
80
81
 
81
82
  def _k8s_label_key_regex():
82
- """The format of keys for labels and annotations. Optional subdomain prefix
83
- followed by a k8s label.
83
+ """The format of keys for labels and annotations. Optional subdomain prefix followed
84
+ by a k8s label.
84
85
 
85
86
  See: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
86
87
 
@@ -151,6 +152,7 @@ class Entities(str, enum.Enum):
151
152
  """The kinds of entities in the dyff system."""
152
153
 
153
154
  Account = "Account"
155
+ Analysis = "Analysis"
154
156
  Audit = "Audit"
155
157
  AuditProcedure = "AuditProcedure"
156
158
  DataSource = "DataSource"
@@ -158,14 +160,18 @@ class Entities(str, enum.Enum):
158
160
  Evaluation = "Evaluation"
159
161
  InferenceService = "InferenceService"
160
162
  InferenceSession = "InferenceSession"
163
+ Measurement = "Measurement"
164
+ Method = "Method"
161
165
  Model = "Model"
162
166
  Module = "Module"
163
167
  Report = "Report"
168
+ SafetyCase = "SafetyCase"
164
169
 
165
170
 
166
171
  class Resources(str, enum.Enum):
167
172
  """The resource names corresponding to entities that have API endpoints."""
168
173
 
174
+ Analysis = "analyses"
169
175
  Audit = "audits"
170
176
  AuditProcedure = "auditprocedures"
171
177
  Dataset = "datasets"
@@ -173,9 +179,12 @@ class Resources(str, enum.Enum):
173
179
  Evaluation = "evaluations"
174
180
  InferenceService = "inferenceservices"
175
181
  InferenceSession = "inferencesessions"
182
+ Measurement = "measurements"
183
+ Method = "methods"
176
184
  Model = "models"
177
185
  Module = "modules"
178
186
  Report = "reports"
187
+ SafetyCase = "safetycases"
179
188
 
180
189
  Task = "tasks"
181
190
  """
@@ -189,30 +198,29 @@ class Resources(str, enum.Enum):
189
198
 
190
199
  @staticmethod
191
200
  def for_kind(kind: Entities) -> "Resources":
192
- if kind == Entities.Audit:
193
- return Resources.Audit
194
- elif kind == Entities.AuditProcedure:
195
- return Resources.AuditProcedure
196
- elif kind == Entities.Dataset:
197
- return Resources.Dataset
198
- elif kind == Entities.DataSource:
199
- return Resources.DataSource
200
- elif kind == Entities.Evaluation:
201
- return Resources.Evaluation
202
- elif kind == Entities.InferenceService:
203
- return Resources.InferenceService
204
- elif kind == Entities.InferenceSession:
205
- return Resources.InferenceSession
206
- elif kind == Entities.Model:
207
- return Resources.Model
208
- elif kind == Entities.Module:
209
- return Resources.Module
210
- elif kind == Entities.Report:
211
- return Resources.Report
212
- else:
201
+ try:
202
+ return __entities_to_resources[kind]
203
+ except KeyError:
213
204
  raise ValueError(f"No Resources for Entity kind: {kind}")
214
205
 
215
206
 
207
+ __entities_to_resources: dict[Entities, Resources] = {
208
+ Entities.Analysis: Resources.Analysis,
209
+ Entities.Audit: Resources.Audit,
210
+ Entities.AuditProcedure: Resources.AuditProcedure,
211
+ Entities.Dataset: Resources.Dataset,
212
+ Entities.DataSource: Resources.DataSource,
213
+ Entities.Evaluation: Resources.Evaluation,
214
+ Entities.InferenceService: Resources.InferenceService,
215
+ Entities.InferenceSession: Resources.InferenceSession,
216
+ Entities.Method: Resources.Method,
217
+ Entities.Model: Resources.Model,
218
+ Entities.Module: Resources.Module,
219
+ Entities.Report: Resources.Report,
220
+ Entities.SafetyCase: Resources.SafetyCase,
221
+ }
222
+
223
+
216
224
  class DyffModelWithID(DyffSchemaBaseModel):
217
225
  id: str = pydantic.Field(description="Unique identifier of the entity")
218
226
  account: str = pydantic.Field(description="Account that owns the entity")
@@ -231,9 +239,9 @@ LabelValue: TypeAlias = Optional[ # type: ignore
231
239
 
232
240
 
233
241
  class Label(DyffSchemaBaseModel):
234
- """A key-value label for a resource. Used to specify identifying attributes
235
- of resources that are meaningful to users but do not imply semantics in the
236
- dyff system.
242
+ """A key-value label for a resource. Used to specify identifying attributes of
243
+ resources that are meaningful to users but do not imply semantics in the dyff
244
+ system.
237
245
 
238
246
  We follow the kubernetes label conventions closely. See:
239
247
  https://kubernetes.io/docs/concepts/overview/working-with-objects/labels
@@ -287,9 +295,7 @@ Quantity: TypeAlias = pydantic.constr(regex=_k8s_quantity_regex()) # type: igno
287
295
 
288
296
 
289
297
  class ServiceClass(str, enum.Enum):
290
- """Defines the "quality of service" characteristics of a resource
291
- allocation.
292
- """
298
+ """Defines the "quality of service" characteristics of a resource allocation."""
293
299
 
294
300
  STANDARD = "standard"
295
301
  PREEMPTIBLE = "preemptible"
@@ -316,6 +322,7 @@ class Status(DyffSchemaBaseModel):
316
322
 
317
323
  class DyffEntity(Status, Labeled, Versioned, DyffModelWithID):
318
324
  kind: Literal[
325
+ "Analysis",
319
326
  "Audit",
320
327
  "AuditProcedure",
321
328
  "DataSource",
@@ -323,9 +330,12 @@ class DyffEntity(Status, Labeled, Versioned, DyffModelWithID):
323
330
  "Evaluation",
324
331
  "InferenceService",
325
332
  "InferenceSession",
333
+ "Measurement",
334
+ "Method",
326
335
  "Model",
327
336
  "Module",
328
337
  "Report",
338
+ "SafetyCase",
329
339
  ]
330
340
 
331
341
  annotations: list[Annotation] = pydantic.Field(
@@ -424,12 +434,11 @@ class APIFunctions(str, enum.Enum):
424
434
 
425
435
 
426
436
  class AccessGrant(DyffSchemaBaseModel):
427
- """Grants access to call particular functions on particular instances of
428
- particular resource types.
437
+ """Grants access to call particular functions on particular instances of particular
438
+ resource types.
429
439
 
430
- Access grants are **additive**; the subject of a set of grants has
431
- permission to do something if any part of any of those grants gives the
432
- subject that permission.
440
+ Access grants are **additive**; the subject of a set of grants has permission to do
441
+ something if any part of any of those grants gives the subject that permission.
433
442
  """
434
443
 
435
444
  resources: list[Resources] = pydantic.Field(
@@ -450,11 +459,11 @@ class AccessGrant(DyffSchemaBaseModel):
450
459
 
451
460
 
452
461
  class APIKey(DyffSchemaBaseModel):
453
- """A description of a set of permissions granted to a single subject
454
- (either an account or a workload).
462
+ """A description of a set of permissions granted to a single subject (either an
463
+ account or a workload).
455
464
 
456
- Dyff API clients authenticate with a *token* that contains a
457
- cryptographically signed APIKey.
465
+ Dyff API clients authenticate with a *token* that contains a cryptographically
466
+ signed APIKey.
458
467
  """
459
468
 
460
469
  id: str = pydantic.Field(
@@ -543,9 +552,8 @@ class ArchiveFormat(DyffSchemaBaseModel):
543
552
 
544
553
 
545
554
  class ExtractorStep(DyffSchemaBaseModel):
546
- """Description of a step in the process of turning a hierarchical
547
- DataSource into a Dataset.
548
- """
555
+ """Description of a step in the process of turning a hierarchical DataSource into a
556
+ Dataset."""
549
557
 
550
558
  action: str
551
559
  name: Optional[str] = None
@@ -843,8 +851,7 @@ class ModelSpec(ModelBase):
843
851
 
844
852
  class Model(DyffEntity, ModelSpec):
845
853
  """A Model is the "raw" form of an inference model, from which one or more
846
- InferenceServices may be built.
847
- """
854
+ InferenceServices may be built."""
848
855
 
849
856
  kind: Literal["Model"] = Entities.Model.value
850
857
 
@@ -975,9 +982,8 @@ class InferenceSessionSpec(InferenceSessionBase):
975
982
 
976
983
 
977
984
  class InferenceSession(DyffEntity, InferenceSessionSpec):
978
- """An InferenceSession is a deployment of an InferenceService that exposes
979
- an API for interactive queries.
980
- """
985
+ """An InferenceSession is a deployment of an InferenceService that exposes an API
986
+ for interactive queries."""
981
987
 
982
988
  kind: Literal["InferenceSession"] = Entities.InferenceSession.value
983
989
 
@@ -1024,9 +1030,8 @@ class EvaluationBase(DyffSchemaBaseModel):
1024
1030
 
1025
1031
 
1026
1032
  class Evaluation(DyffEntity, EvaluationBase):
1027
- """A description of how to run an InferenceService on a Dataset to obtain
1028
- a set of evaluation results.
1029
- """
1033
+ """A description of how to run an InferenceService on a Dataset to obtain a set of
1034
+ evaluation results."""
1030
1035
 
1031
1036
  kind: Literal["Evaluation"] = Entities.Evaluation.value
1032
1037
 
@@ -1076,7 +1081,14 @@ class ReportBase(DyffSchemaBaseModel):
1076
1081
 
1077
1082
 
1078
1083
  class Report(DyffEntity, ReportBase):
1079
- """A Report transforms raw model outputs into some useful statistics."""
1084
+ """A Report transforms raw model outputs into some useful statistics.
1085
+
1086
+ .. deprecated:: 0.8.0
1087
+
1088
+ Report functionality has been refactored into the
1089
+ Method/Measurement/Analysis apparatus. Creation of new Reports is
1090
+ disabled.
1091
+ """
1080
1092
 
1081
1093
  kind: Literal["Report"] = Entities.Report.value
1082
1094
 
@@ -1102,7 +1114,277 @@ class Report(DyffEntity, ReportBase):
1102
1114
  )
1103
1115
 
1104
1116
  def dependencies(self) -> list[str]:
1105
- return [self.evaluation]
1117
+ return [self.evaluation] + self.modules
1118
+
1119
+ def resource_allocation(self) -> Optional[ResourceAllocation]:
1120
+ return None
1121
+
1122
+
1123
+ class QueryableDyffEntity(DyffModelWithID):
1124
+ name: str = pydantic.Field(description="Descriptive name of the resource")
1125
+
1126
+
1127
+ class MeasurementLevel(str, enum.Enum):
1128
+ Dataset = "Dataset"
1129
+ Instance = "Instance"
1130
+
1131
+
1132
+ class AnalysisOutputQueryFields(DyffSchemaBaseModel):
1133
+ method: QueryableDyffEntity = pydantic.Field(
1134
+ description="Basic information about the Method that was run to produce the output."
1135
+ )
1136
+
1137
+ dataset: Optional[QueryableDyffEntity] = pydantic.Field(
1138
+ default=None,
1139
+ description="Basic information about the Dataset being analyzed, if applicable.",
1140
+ )
1141
+
1142
+ evaluation: Optional[QueryableDyffEntity] = pydantic.Field(
1143
+ default=None,
1144
+ description="Basic information about the Evaluation being analyzed, if applicable.",
1145
+ )
1146
+
1147
+ inferenceService: Optional[QueryableDyffEntity] = pydantic.Field(
1148
+ default=None,
1149
+ description="Basic information about the InferenceService being analyzed, if applicable.",
1150
+ )
1151
+
1152
+ model: Optional[QueryableDyffEntity] = pydantic.Field(
1153
+ description="Basic information about the Model being analyzed, if applicable",
1154
+ )
1155
+
1156
+
1157
+ class MeasurementSpec(DyffSchemaBaseModel):
1158
+ name: str = pydantic.Field(description="Descriptive name of the Measurement.")
1159
+ description: Optional[str] = pydantic.Field(
1160
+ default=None, description="Long-form description, interpreted as Markdown."
1161
+ )
1162
+ level: MeasurementLevel = pydantic.Field(description="Measurement level")
1163
+ schema_: DataSchema = pydantic.Field(
1164
+ alias="schema",
1165
+ description="Schema of the measurement data. Instance-level measurements must include an _index_ field.",
1166
+ )
1167
+
1168
+
1169
+ class Measurement(DyffEntity, MeasurementSpec, AnalysisOutputQueryFields):
1170
+ kind: Literal["Measurement"] = Entities.Measurement.value
1171
+
1172
+ def dependencies(self) -> list[str]:
1173
+ return []
1174
+
1175
+ def resource_allocation(self) -> Optional[ResourceAllocation]:
1176
+ return None
1177
+
1178
+
1179
+ class SafetyCaseSpec(DyffSchemaBaseModel):
1180
+ name: str = pydantic.Field(description="Descriptive name of the SafetyCase.")
1181
+ description: Optional[str] = pydantic.Field(
1182
+ default=None, description="Long-form description, interpreted as Markdown."
1183
+ )
1184
+
1185
+
1186
+ class SafetyCase(DyffEntity, SafetyCaseSpec, AnalysisOutputQueryFields):
1187
+ kind: Literal["SafetyCase"] = Entities.SafetyCase.value
1188
+
1189
+ def dependencies(self) -> list[str]:
1190
+ return []
1191
+
1192
+ def resource_allocation(self) -> Optional[ResourceAllocation]:
1193
+ return None
1194
+
1195
+
1196
+ class MethodImplementationKind(str, enum.Enum):
1197
+ JupyterNotebook = "JupyterNotebook"
1198
+ PythonFunction = "PythonFunction"
1199
+
1200
+ PythonRubric = "PythonRubric"
1201
+ """A Rubric generates an instance-level measurement, consuming a Dataset and an
1202
+ Evaluation.
1203
+
1204
+ .. deprecated:: 0.8.0
1205
+
1206
+ Report functionality has been refactored into the
1207
+ Method/Measurement/Analysis apparatus. Creation of new Reports is
1208
+ disabled.
1209
+ """
1210
+
1211
+
1212
+ class MethodImplementationJupyterNotebook(DyffSchemaBaseModel):
1213
+ notebookModule: str = pydantic.Field(
1214
+ description="ID of the Module that contains the notebook file."
1215
+ " This does *not* add the Module as a dependency; you must do that separately."
1216
+ )
1217
+ notebookPath: str = pydantic.Field(
1218
+ description="Path to the notebook file relative to the Module root directory."
1219
+ )
1220
+
1221
+
1222
+ class MethodImplementationPythonFunction(DyffSchemaBaseModel):
1223
+ fullyQualifiedName: str = pydantic.Field(
1224
+ description="The fully-qualified name of the Python function to call."
1225
+ )
1226
+
1227
+
1228
+ class MethodImplementationPythonRubric(DyffSchemaBaseModel):
1229
+ """A Rubric generates an instance-level measurement, consuming a Dataset and an
1230
+ Evaluation.
1231
+
1232
+ .. deprecated:: 0.8.0
1233
+
1234
+ Report functionality has been refactored into the
1235
+ Method/Measurement/Analysis apparatus. Creation of new Reports is
1236
+ disabled.
1237
+ """
1238
+
1239
+ fullyQualifiedName: str = pydantic.Field(
1240
+ description="The fully-qualified name of the Python Rubric to run."
1241
+ )
1242
+
1243
+
1244
+ class MethodImplementation(DyffSchemaBaseModel):
1245
+ kind: str = pydantic.Field(description="The kind of implementation")
1246
+ pythonFunction: Optional[MethodImplementationPythonFunction] = pydantic.Field(
1247
+ default=None, description="Specification of a Python function to call."
1248
+ )
1249
+ pythonRubric: Optional[MethodImplementationPythonRubric] = pydantic.Field(
1250
+ default=None, description="@deprecated Specification of a Python Rubric to run."
1251
+ )
1252
+ jupyterNotebook: Optional[MethodImplementationJupyterNotebook] = pydantic.Field(
1253
+ default=None, description="Specification of a Jupyter notebook to run."
1254
+ )
1255
+
1256
+
1257
+ class MethodInputKind(str, enum.Enum):
1258
+ Dataset = Entities.Dataset.value
1259
+ Evaluation = Entities.Evaluation.value
1260
+ Measurement = Entities.Measurement.value
1261
+
1262
+ Report = Entities.Report.value
1263
+ """
1264
+ .. deprecated:: 0.8.0
1265
+
1266
+ The Report entity is deprecated, but we accept it as an analysis input
1267
+ for backward compatibility.
1268
+ """
1269
+
1270
+
1271
+ class MethodOutputKind(str, enum.Enum):
1272
+ Measurement = Entities.Measurement.value
1273
+ SafetyCase = Entities.SafetyCase.value
1274
+
1275
+
1276
+ class MethodParameter(DyffSchemaBaseModel):
1277
+ keyword: str = pydantic.Field(
1278
+ description="The parameter is referred to by 'keyword' in the context of the method implementation."
1279
+ )
1280
+ description: Optional[str] = pydantic.Field(
1281
+ default=None, description="Long-form description, interpreted as Markdown."
1282
+ )
1283
+
1284
+
1285
+ class MethodInput(DyffSchemaBaseModel):
1286
+ kind: MethodInputKind = pydantic.Field(description="The kind of input artifact.")
1287
+ keyword: str = pydantic.Field(
1288
+ description="The input is referred to by 'keyword' in the context of the method implementation."
1289
+ )
1290
+ description: Optional[str] = pydantic.Field(
1291
+ default=None, description="Long-form description, interpreted as Markdown."
1292
+ )
1293
+
1294
+
1295
+ class MethodOutput(DyffSchemaBaseModel):
1296
+ kind: MethodOutputKind = pydantic.Field(description="The kind of output artifact")
1297
+ measurement: Optional[MeasurementSpec] = pydantic.Field(
1298
+ default=None, description="Specification of a Measurement output."
1299
+ )
1300
+ safetyCase: Optional[SafetyCaseSpec] = pydantic.Field(
1301
+ default=None, description="Specification of a SafetyCase output."
1302
+ )
1303
+
1304
+
1305
+ class MethodBase(DyffSchemaBaseModel):
1306
+ name: str = pydantic.Field(description="Descriptive name of the Method.")
1307
+
1308
+ description: Optional[str] = pydantic.Field(
1309
+ default=None, description="Long-form description, interpreted as Markdown."
1310
+ )
1311
+
1312
+ implementation: MethodImplementation = pydantic.Field(
1313
+ description="How the Method is implemented."
1314
+ )
1315
+
1316
+ parameters: list[MethodParameter] = pydantic.Field(
1317
+ default_factory=list,
1318
+ description="Configuration parameters accepted by the Method. Values are available at ctx.args(keyword)",
1319
+ )
1320
+
1321
+ inputs: list[MethodInput] = pydantic.Field(
1322
+ default_factory=list,
1323
+ description="Input data entities consumed by the Method. Available at ctx.inputs(keyword)",
1324
+ )
1325
+
1326
+ output: MethodOutput = pydantic.Field(
1327
+ description="Specification of the Method output."
1328
+ )
1329
+
1330
+ modules: list[str] = pydantic.Field(
1331
+ default_factory=list,
1332
+ description="Modules to load into the analysis environment",
1333
+ )
1334
+
1335
+
1336
+ class Method(DyffEntity, MethodBase):
1337
+ kind: Literal["Method"] = Entities.Method.value
1338
+
1339
+ def dependencies(self) -> list[str]:
1340
+ return self.modules
1341
+
1342
+ def resource_allocation(self) -> Optional[ResourceAllocation]:
1343
+ return None
1344
+
1345
+
1346
+ class AnalysisInputMapping(DyffSchemaBaseModel):
1347
+ keyword: str = pydantic.Field(
1348
+ description="The 'keyword' specified for this input in the MethodSpec."
1349
+ )
1350
+ entity: str = pydantic.Field(
1351
+ description="The ID of the entity whose data should be made available as 'keyword'."
1352
+ )
1353
+
1354
+
1355
+ class AnalysisArgument(DyffSchemaBaseModel):
1356
+ keyword: str = pydantic.Field(
1357
+ description="The 'keyword' of the corresponding ModelParameter."
1358
+ )
1359
+ value: str = pydantic.Field(
1360
+ description="The value of of the argument."
1361
+ " Always a string; implementations are responsible for parsing."
1362
+ )
1363
+
1364
+
1365
+ class ForeignMethod(DyffModelWithID, MethodBase):
1366
+ pass
1367
+
1368
+
1369
+ class AnalysisBase(DyffSchemaBaseModel):
1370
+ arguments: list[AnalysisArgument] = pydantic.Field(
1371
+ description="Arguments to pass to the Method implementation."
1372
+ )
1373
+
1374
+ inputs: list[AnalysisInputMapping] = pydantic.Field(
1375
+ description="Mapping of keywords to data entities."
1376
+ )
1377
+
1378
+
1379
+ class Analysis(DyffEntity, AnalysisBase):
1380
+ kind: Literal["Analysis"] = Entities.Analysis.value
1381
+
1382
+ method: ForeignMethod = pydantic.Field(
1383
+ description="The analysis Method to run.",
1384
+ )
1385
+
1386
+ def dependencies(self) -> list[str]:
1387
+ return [self.method.id] + [x.entity for x in self.inputs]
1106
1388
 
1107
1389
  def resource_allocation(self) -> Optional[ResourceAllocation]:
1108
1390
  return None
@@ -1113,9 +1395,8 @@ class Report(DyffEntity, ReportBase):
1113
1395
 
1114
1396
 
1115
1397
  class _JobStatus(NamedTuple):
1116
- """The set of basic ``status`` values that are applicable to all "job"
1117
- entities (entities that involve computation tasks).
1118
- """
1398
+ """The set of basic ``status`` values that are applicable to all "job" entities
1399
+ (entities that involve computation tasks)."""
1119
1400
 
1120
1401
  complete: str = "Complete"
1121
1402
  failed: str = "Failed"
@@ -1126,8 +1407,7 @@ JobStatus = _JobStatus()
1126
1407
 
1127
1408
  class _ResourceStatus(NamedTuple):
1128
1409
  """The set of basic ``status`` values that are applicable to all "resource"
1129
- entities.
1130
- """
1410
+ entities."""
1131
1411
 
1132
1412
  ready: str = "Ready"
1133
1413
  error: str = "Error"
@@ -1283,7 +1563,8 @@ ModelStatusReason = _ModelStatusReason()
1283
1563
 
1284
1564
 
1285
1565
  class _InferenceServiceStatus(NamedTuple):
1286
- """The set of ``status`` values that are applicable to ``InferenceService`` entities."""
1566
+ """The set of ``status`` values that are applicable to ``InferenceService``
1567
+ entities."""
1287
1568
 
1288
1569
  created: str = EntityStatus.created
1289
1570
  admitted: str = EntityStatus.admitted
@@ -1295,7 +1576,8 @@ InferenceServiceStatus = _InferenceServiceStatus()
1295
1576
 
1296
1577
 
1297
1578
  class _InferenceServiceStatusReason(NamedTuple):
1298
- """The set of ``reason`` values that are applicable to ``InferenceService`` entities."""
1579
+ """The set of ``reason`` values that are applicable to ``InferenceService``
1580
+ entities."""
1299
1581
 
1300
1582
  quota_limit: str = EntityStatusReason.quota_limit
1301
1583
  build_failed: str = "BuildFailed"
@@ -1349,6 +1631,7 @@ def is_status_success(status: str) -> bool:
1349
1631
 
1350
1632
 
1351
1633
  _ENTITY_CLASS = {
1634
+ Entities.Analysis: Analysis,
1352
1635
  Entities.Audit: Audit,
1353
1636
  Entities.AuditProcedure: AuditProcedure,
1354
1637
  Entities.Dataset: Dataset,
@@ -1356,9 +1639,12 @@ _ENTITY_CLASS = {
1356
1639
  Entities.Evaluation: Evaluation,
1357
1640
  Entities.InferenceService: InferenceService,
1358
1641
  Entities.InferenceSession: InferenceSession,
1642
+ Entities.Measurement: Measurement,
1643
+ Entities.Method: Method,
1359
1644
  Entities.Model: Model,
1360
1645
  Entities.Module: Module,
1361
1646
  Entities.Report: Report,
1647
+ Entities.SafetyCase: SafetyCase,
1362
1648
  }
1363
1649
 
1364
1650
 
@@ -1367,6 +1653,7 @@ def entity_class(kind: Entities):
1367
1653
 
1368
1654
 
1369
1655
  DyffEntityType = Union[
1656
+ Analysis,
1370
1657
  Audit,
1371
1658
  AuditProcedure,
1372
1659
  DataSource,
@@ -1374,9 +1661,12 @@ DyffEntityType = Union[
1374
1661
  Evaluation,
1375
1662
  InferenceService,
1376
1663
  InferenceSession,
1664
+ Measurement,
1665
+ Method,
1377
1666
  Model,
1378
1667
  Module,
1379
1668
  Report,
1669
+ SafetyCase,
1380
1670
  ]
1381
1671
 
1382
1672
 
@@ -1384,6 +1674,11 @@ __all__ = [
1384
1674
  "Accelerator",
1385
1675
  "AcceleratorGPU",
1386
1676
  "AccessGrant",
1677
+ "Analysis",
1678
+ "AnalysisArgument",
1679
+ "AnalysisBase",
1680
+ "AnalysisInputMapping",
1681
+ "AnalysisOutputQueryFields",
1387
1682
  "Annotation",
1388
1683
  "APIFunctions",
1389
1684
  "APIKey",
@@ -1427,6 +1722,21 @@ __all__ = [
1427
1722
  "LabelKey",
1428
1723
  "LabelValue",
1429
1724
  "Labeled",
1725
+ "Measurement",
1726
+ "MeasurementLevel",
1727
+ "MeasurementSpec",
1728
+ "Method",
1729
+ "MethodBase",
1730
+ "MethodImplementation",
1731
+ "MethodImplementationJupyterNotebook",
1732
+ "MethodImplementationKind",
1733
+ "MethodImplementationPythonFunction",
1734
+ "MethodImplementationPythonRubric",
1735
+ "MethodInput",
1736
+ "MethodInputKind",
1737
+ "MethodOutput",
1738
+ "MethodOutputKind",
1739
+ "MethodParameter",
1430
1740
  "Model",
1431
1741
  "ModelArtifact",
1432
1742
  "ModelArtifactHuggingFaceCache",
@@ -1443,9 +1753,12 @@ __all__ = [
1443
1753
  "ModelStorage",
1444
1754
  "Module",
1445
1755
  "ModuleBase",
1756
+ "QueryableDyffEntity",
1446
1757
  "Report",
1447
1758
  "ReportBase",
1448
1759
  "Resources",
1760
+ "SafetyCase",
1761
+ "SafetyCaseSpec",
1449
1762
  "SchemaAdapter",
1450
1763
  "Status",
1451
1764
  "StorageSignedURL",
@@ -1,9 +1,9 @@
1
1
  # SPDX-FileCopyrightText: 2024 UL Research Institutes
2
2
  # SPDX-License-Identifier: Apache-2.0
3
+ """The request schemas describe the information that you need to provide when creating
4
+ new instances of the core types.
3
5
 
4
- """
5
- The request schemas describe the information that you need to provide when
6
- creating new instances of the core types. For example, requests do not have
6
+ For example, requests do not have
7
7
  ``.id`` fields because these are assigned by the platform when the resource
8
8
  is created. Similarly, if a resource depends on an instance of another
9
9
  resource, the request will refer to the dependency by its ID, while the core
@@ -19,12 +19,14 @@ import pydantic
19
19
 
20
20
  from .base import DyffSchemaBaseModel
21
21
  from .platform import (
22
+ AnalysisBase,
22
23
  DatasetBase,
23
24
  DataView,
24
25
  EvaluationBase,
25
26
  InferenceServiceBase,
26
27
  InferenceSessionBase,
27
28
  Labeled,
29
+ MethodBase,
28
30
  ModelSpec,
29
31
  ModuleBase,
30
32
  ReportBase,
@@ -36,6 +38,13 @@ class DyffEntityCreateRequest(Versioned, DyffSchemaBaseModel):
36
38
  account: str = pydantic.Field(description="Account that owns the entity")
37
39
 
38
40
 
41
+ class AnalysisCreateRequest(DyffEntityCreateRequest, AnalysisBase):
42
+ """An Analysis transforms Datasets, Evaluations, and Measurements into new
43
+ Measurements or SafetyCases."""
44
+
45
+ method: str = pydantic.Field(description="Method ID")
46
+
47
+
39
48
  class DatasetCreateRequest(DyffEntityCreateRequest, DatasetBase):
40
49
  pass
41
50
 
@@ -59,21 +68,31 @@ class EvaluationInferenceSessionRequest(InferenceSessionBase):
59
68
 
60
69
 
61
70
  class EvaluationCreateRequest(DyffEntityCreateRequest, EvaluationBase):
62
- """A description of how to run an InferenceService on a Dataset to obtain
63
- a set of evaluation results.
64
- """
71
+ """A description of how to run an InferenceService on a Dataset to obtain a set of
72
+ evaluation results."""
65
73
 
66
74
  inferenceSession: EvaluationInferenceSessionRequest = pydantic.Field(
67
75
  description="Specification of the InferenceSession that will perform inference for the evaluation.",
68
76
  )
69
77
 
70
78
 
79
+ class MethodCreateRequest(DyffEntityCreateRequest, MethodBase):
80
+ pass
81
+
82
+
71
83
  class ModuleCreateRequest(DyffEntityCreateRequest, ModuleBase):
72
84
  pass
73
85
 
74
86
 
75
87
  class ReportCreateRequest(DyffEntityCreateRequest, ReportBase):
76
- """A Report transforms raw model outputs into some useful statistics."""
88
+ """A Report transforms raw model outputs into some useful statistics.
89
+
90
+ .. deprecated:: 0.8.0
91
+
92
+ Report functionality has been refactored into the
93
+ Method/Measurement/Analysis apparatus. Creation of new Reports is
94
+ disabled.
95
+ """
77
96
 
78
97
  datasetView: Optional[Union[str, DataView]] = pydantic.Field(
79
98
  default=None,
@@ -111,6 +130,10 @@ class DyffEntityQueryRequest(DyffSchemaBaseModel):
111
130
  return super().json(exclude_unset=exclude_unset, **kwargs)
112
131
 
113
132
 
133
+ class AnalysisQueryRequest(DyffEntityQueryRequest):
134
+ name: Optional[str] = pydantic.Field(default=None)
135
+
136
+
114
137
  class AuditQueryRequest(DyffEntityQueryRequest):
115
138
  name: Optional[str] = pydantic.Field(default=None)
116
139
 
@@ -141,6 +164,14 @@ class InferenceSessionQueryRequest(DyffEntityQueryRequest):
141
164
  modelName: Optional[str] = pydantic.Field(default=None)
142
165
 
143
166
 
167
+ class MeasurementQueryRequest(DyffEntityQueryRequest):
168
+ dataset: Optional[str] = pydantic.Field(default=None)
169
+ inferenceService: Optional[str] = pydantic.Field(default=None)
170
+ inferenceServiceName: Optional[str] = pydantic.Field(default=None)
171
+ model: Optional[str] = pydantic.Field(default=None)
172
+ modelName: Optional[str] = pydantic.Field(default=None)
173
+
174
+
144
175
  class ModelQueryRequest(DyffEntityQueryRequest):
145
176
  name: Optional[str] = pydantic.Field(default=None)
146
177
 
@@ -158,6 +189,8 @@ class ReportQueryRequest(DyffEntityQueryRequest):
158
189
 
159
190
 
160
191
  __all__ = [
192
+ "AnalysisCreateRequest",
193
+ "AnalysisQueryRequest",
161
194
  "AuditQueryRequest",
162
195
  "DyffEntityCreateRequest",
163
196
  "DyffEntityQueryRequest",
dyff/schema/v0/r1/test.py CHANGED
@@ -27,8 +27,10 @@ _OutputModelT = TypeVar("_OutputModelT", bound=DyffSchemaBaseModel)
27
27
 
28
28
 
29
29
  def forbid_additional_properties(schema: dict[str, Any]) -> dict[str, Any]:
30
- """Create a modified JSON Schema where all elements of type ``object``
31
- have ``additionalProperties = False`` set. This is useful when generating
30
+ """Create a modified JSON Schema where all elements of type ``object`` have
31
+ ``additionalProperties = False`` set.
32
+
33
+ This is useful when generating
32
34
  data conforming to the schema with the ``hypothesis`` package, since
33
35
  otherwise it will generate arbitrary extra fields.
34
36
  """
@@ -53,8 +55,8 @@ def pydantic_model_samples(
53
55
  *,
54
56
  acceptance_predicates: Optional[list[Callable[[_ModelT], bool]]] = None,
55
57
  ) -> list[_ModelT]:
56
- """Sample a list of values that all conform to the schema defined by a
57
- pydantic model.
58
+ """Sample a list of values that all conform to the schema defined by a pydantic
59
+ model.
58
60
 
59
61
  We use the ``hypothesis`` library to do the sampling. The sampling process
60
62
  tends to generate "extreme" values because the hypothesis library is meant
@@ -159,6 +161,9 @@ class MockDataset:
159
161
  else:
160
162
  raise NotImplementedError("sampling requires jsonSchema")
161
163
 
164
+ if self._data_schema.arrowSchema is None:
165
+ raise ValueError("data_schema.arrowSchema must be != None")
166
+
162
167
  if self._data_view and self._data_view.adapterPipeline:
163
168
  self._data_view_adapter = create_pipeline(self._data_view.adapterPipeline)
164
169
 
@@ -173,9 +178,8 @@ class MockDataset:
173
178
  def sample(self) -> dict[str, Any]:
174
179
  """Sample one item that conforms to the dataset schema, in JSON format.
175
180
 
176
- Note that samples are drawn with replacement from a finite and fairly
177
- small pool of candidates, so you should expect to get duplicate values
178
- fairly often.
181
+ Note that samples are drawn with replacement from a finite and fairly small pool
182
+ of candidates, so you should expect to get duplicate values fairly often.
179
183
  """
180
184
  data = random.choice(self._data_schema_samples)
181
185
  if self._data_view_adapter:
@@ -194,6 +198,7 @@ class MockDataset:
194
198
  @functools.lru_cache()
195
199
  @copydoc(pyarrow.dataset.Dataset.schema)
196
200
  def schema(self) -> pyarrow.Schema:
201
+ assert self.data_schema.arrowSchema is not None # safe; see __init__
197
202
  return arrow.decode_schema(self.data_schema.arrowSchema)
198
203
 
199
204
  @copydoc(pyarrow.dataset.Dataset.to_batches)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dyff-schema
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: Data models for the Dyff AI auditing platform.
5
5
  Author-email: Digital Safety Research Institute <contact@dsri.org>
6
6
  License: Apache-2.0
@@ -2,10 +2,10 @@ dyff/schema/__init__.py,sha256=JcpxaRHNYgLjJWLjVayLlqacb2GX49Pazpwb8m-BctM,1031
2
2
  dyff/schema/adapters.py,sha256=YMTHv_2VlLGFp-Kqwa6H51hjffHmk8gXjZilHysIF5Q,123
3
3
  dyff/schema/base.py,sha256=jvaNtsSZyFfsdUZTcY_U-yfLY5_GyrMxSXhON2R9XR0,119
4
4
  dyff/schema/copydoc.py,sha256=B4ZRpQmbFxi-3l9LCHvaJiVKb9VxADgC5vey804Febc,1075
5
- dyff/schema/ids.py,sha256=MMaYOsmg2RQh0XskH9VA4GVTlBeuRrOIXCgHvrm5zFc,1426
5
+ dyff/schema/ids.py,sha256=Z3JQzlAJQC2Pam7ehxb4TXA4MIuFQN5SyzL5Ql0RukA,1422
6
6
  dyff/schema/platform.py,sha256=peHzGGSd5dQ-EFXrWDjBqMUtoOL3iCHxcV3XzW6Rjag,123
7
7
  dyff/schema/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- dyff/schema/quantity.py,sha256=q4yVF1p515wojZQhHi2mxxpXIpHDK9PThPWBm0gFK4Q,3792
8
+ dyff/schema/quantity.py,sha256=hhS9ybqW_6I--acPhkoZbFWASxFTEs7CUjO4pmpBJ98,3788
9
9
  dyff/schema/requests.py,sha256=euObiC5IRe9fGev9ND--bcysuACNBDhSbZ5kONSaRwE,123
10
10
  dyff/schema/test.py,sha256=xtXZHqdVi_bjGXFFrd4nU10Y9CiukPyZj03rL84ckD4,119
11
11
  dyff/schema/version.py,sha256=MZhYsYOVmMuZo0R1vl1iSus7zfB8UerDcEgoZg7Ln7s,372
@@ -19,23 +19,23 @@ dyff/schema/io/__init__.py,sha256=L5y8UhRnojerPYHumsxQJRcHCNz8Hj9NM8b47mewMNs,92
19
19
  dyff/schema/io/vllm.py,sha256=2q05M_-lTzq9oywKXHPPpCFCSDVCSsRQqtmERzWTtio,123
20
20
  dyff/schema/v0/__init__.py,sha256=L5y8UhRnojerPYHumsxQJRcHCNz8Hj9NM8b47mewMNs,92
21
21
  dyff/schema/v0/r1/__init__.py,sha256=L5y8UhRnojerPYHumsxQJRcHCNz8Hj9NM8b47mewMNs,92
22
- dyff/schema/v0/r1/adapters.py,sha256=WB1L2vf17BPZmALReT4Iiio_M6EvpGU-Wa9zAHv1mao,23561
23
- dyff/schema/v0/r1/base.py,sha256=xVeguAoVzrNYt889uYQDlJrUHPUe2nBOhn7CMIV7Ah0,17134
24
- dyff/schema/v0/r1/platform.py,sha256=wRC62lF6FEJSKaB7vNgOYMrJeJ4eB4lKYNjBMfEq5hE,45090
25
- dyff/schema/v0/r1/requests.py,sha256=AKHr7mHyaz_heA6a66ogZx0RmfoOh9W5SKNsrYDERvA,6148
26
- dyff/schema/v0/r1/test.py,sha256=sSdKCgEONsGSolIPS8keQmNXsYUyNrJ2jHlX3y4JEj0,10522
22
+ dyff/schema/v0/r1/adapters.py,sha256=2t2oxsnGfSEDKKDIEYw4qqLXMH7qlFIwPVuLyUmbsHs,23552
23
+ dyff/schema/v0/r1/base.py,sha256=g_wqh4OU_9ftMHovVxPtdeup4O5MZi422v3eZH99ZQI,17139
24
+ dyff/schema/v0/r1/platform.py,sha256=m2607uqLou3Q9vwN3yUqD694jPJSJINZXOM3fd0Ya-0,55236
25
+ dyff/schema/v0/r1/requests.py,sha256=4xfHa8c53acDacZfUZt7TGYy1PaJRv08bwMWtU279UE,7213
26
+ dyff/schema/v0/r1/test.py,sha256=X6dUyVd5svcPCI-PBMOAqEfK9jv3bRDvkQTJzwS96c0,10720
27
27
  dyff/schema/v0/r1/version.py,sha256=SG9ds8afHgdrqOA4CEY8-MM3lKfAE9G0rxsG4uyeglE,363
28
- dyff/schema/v0/r1/dataset/__init__.py,sha256=ZXO9SELqD1zxHGVwTe1iYfu3HZn9g4QkGY-mwWcdVsQ,2549
29
- dyff/schema/v0/r1/dataset/arrow.py,sha256=81Fqz8oS_67iclFUVr4iEQKZWHClW-LR-R6KAPPSWj0,12336
28
+ dyff/schema/v0/r1/dataset/__init__.py,sha256=LbVlkO2asyGYBKk2z49xjJYTM-pu9y9e4eQDXgTDLnM,2553
29
+ dyff/schema/v0/r1/dataset/arrow.py,sha256=juJ3MbiCL54zn3dSmXVl4GBhfLJPk6Qvasb0epFZ4V0,12312
30
30
  dyff/schema/v0/r1/dataset/binary.py,sha256=MLqj_O7iJvsDiom23jxR054seJaJntc0FTTkHuHYDJg,544
31
31
  dyff/schema/v0/r1/dataset/classification.py,sha256=pbbEXhxyZ0pgYwzaTlM8hVHPNEJDCdHKOeGowPXgWYc,311
32
32
  dyff/schema/v0/r1/dataset/text.py,sha256=nLIn91Zlt0tNdXUklSgjJ-kEDxoPX32ISLkiv2DzLvE,1008
33
33
  dyff/schema/v0/r1/dataset/vision.py,sha256=aIe0fbfM_g3DsrDTdg2K803YKLjZBpurM_VJcJFuZLc,369
34
34
  dyff/schema/v0/r1/io/__init__.py,sha256=L5y8UhRnojerPYHumsxQJRcHCNz8Hj9NM8b47mewMNs,92
35
35
  dyff/schema/v0/r1/io/vllm.py,sha256=CUE9y8KthtUI7sD49S875rDmPvKotSXVIRaBS79aBZs,5320
36
- dyff_schema-0.2.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
37
- dyff_schema-0.2.2.dist-info/METADATA,sha256=xjroREdO3IQVYc1297DKrHj2xpHSUN7_GoZHEon_VMo,3459
38
- dyff_schema-0.2.2.dist-info/NOTICE,sha256=YONACu0s_Ui6jNi-wtEsVQbTU1JIkh8wvLH6d1-Ni_w,43
39
- dyff_schema-0.2.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
40
- dyff_schema-0.2.2.dist-info/top_level.txt,sha256=9e3VVdeX73t_sUJOPQPCcGtYO1JhoErhHIi3WoWGcFI,5
41
- dyff_schema-0.2.2.dist-info/RECORD,,
36
+ dyff_schema-0.3.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
37
+ dyff_schema-0.3.0.dist-info/METADATA,sha256=fU3lw7Kjf0ru9lz0PdNO0iBgjExSNL_7SNAntY9wJMQ,3459
38
+ dyff_schema-0.3.0.dist-info/NOTICE,sha256=YONACu0s_Ui6jNi-wtEsVQbTU1JIkh8wvLH6d1-Ni_w,43
39
+ dyff_schema-0.3.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
40
+ dyff_schema-0.3.0.dist-info/top_level.txt,sha256=9e3VVdeX73t_sUJOPQPCcGtYO1JhoErhHIi3WoWGcFI,5
41
+ dyff_schema-0.3.0.dist-info/RECORD,,