dyff-schema 0.17.0__tar.gz → 0.19.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dyff-schema might be problematic. Click here for more details.

Files changed (59) hide show
  1. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/.gitlab-ci.yml +1 -1
  2. {dyff_schema-0.17.0/dyff_schema.egg-info → dyff_schema-0.19.0}/PKG-INFO +1 -1
  3. dyff_schema-0.19.0/dyff/schema/dataset/embedding.py +4 -0
  4. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/base.py +4 -1
  5. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/dataset/arrow.py +23 -13
  6. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/dataset/binary.py +5 -1
  7. dyff_schema-0.19.0/dyff/schema/v0/r1/dataset/embedding.py +26 -0
  8. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/platform.py +81 -25
  9. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/requests.py +21 -0
  10. {dyff_schema-0.17.0 → dyff_schema-0.19.0/dyff_schema.egg-info}/PKG-INFO +1 -1
  11. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff_schema.egg-info/SOURCES.txt +2 -0
  12. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/.gitignore +0 -0
  13. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/.licenserc.yaml +0 -0
  14. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/.pre-commit-config.yaml +0 -0
  15. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/.prettierignore +0 -0
  16. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/.secrets.baseline +0 -0
  17. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/CODE_OF_CONDUCT.md +0 -0
  18. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/LICENSE +0 -0
  19. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/NOTICE +0 -0
  20. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/README.md +0 -0
  21. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/__init__.py +0 -0
  22. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/adapters.py +0 -0
  23. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/annotations.py +0 -0
  24. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/base.py +0 -0
  25. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/copydoc.py +0 -0
  26. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/dataset/__init__.py +0 -0
  27. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/dataset/arrow.py +0 -0
  28. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/dataset/binary.py +0 -0
  29. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/dataset/classification.py +0 -0
  30. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/dataset/text.py +0 -0
  31. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/dataset/vision.py +0 -0
  32. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/errors.py +0 -0
  33. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/ids.py +0 -0
  34. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/io/__init__.py +0 -0
  35. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/io/vllm.py +0 -0
  36. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/platform.py +0 -0
  37. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/py.typed +0 -0
  38. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/quantity.py +0 -0
  39. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/requests.py +0 -0
  40. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/test.py +0 -0
  41. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/__init__.py +0 -0
  42. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/__init__.py +0 -0
  43. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/adapters.py +0 -0
  44. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/dataset/__init__.py +0 -0
  45. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/dataset/classification.py +0 -0
  46. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/dataset/text.py +0 -0
  47. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/dataset/vision.py +0 -0
  48. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/io/__init__.py +0 -0
  49. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/io/vllm.py +0 -0
  50. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/test.py +0 -0
  51. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/v0/r1/version.py +0 -0
  52. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff/schema/version.py +0 -0
  53. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff_schema.egg-info/dependency_links.txt +0 -0
  54. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff_schema.egg-info/requires.txt +0 -0
  55. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/dyff_schema.egg-info/top_level.txt +0 -0
  56. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/makefile +0 -0
  57. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/pyproject.toml +0 -0
  58. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/setup.cfg +0 -0
  59. {dyff_schema-0.17.0 → dyff_schema-0.19.0}/tests/test_import.py +0 -0
@@ -20,7 +20,7 @@ include:
20
20
  file:
21
21
  - prettier.yml
22
22
  - project: buildgarden/pipelines/python
23
- ref: 0.14.0
23
+ ref: 0.14.1
24
24
  file:
25
25
  - python-autoflake.yml
26
26
  - python-black.yml
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dyff-schema
3
- Version: 0.17.0
3
+ Version: 0.19.0
4
4
  Summary: Data models for the Dyff AI auditing platform.
5
5
  Author-email: Digital Safety Research Institute <contact@dsri.org>
6
6
  License: Apache-2.0
@@ -0,0 +1,4 @@
1
+ # SPDX-FileCopyrightText: 2024 UL Research Institutes
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from ..v0.r1.dataset.embedding import *
@@ -547,8 +547,11 @@ def uint64(
547
547
  return type("UInt64Value", (UInt64,), namespace)
548
548
 
549
549
 
550
+ _ListElementT = TypeVar("_ListElementT")
551
+
552
+
550
553
  def list_(
551
- item_type: Type[pydantic.BaseModel], *, list_size: Optional[int] = None
554
+ item_type: Type[_ListElementT], *, list_size: Optional[int] = None
552
555
  ) -> Type[list]:
553
556
  if list_size is None:
554
557
  return pydantic.conlist(item_type)
@@ -8,7 +8,7 @@ import functools
8
8
  import inspect
9
9
  import typing
10
10
  import uuid
11
- from typing import Any, Iterable, Optional
11
+ from typing import Any, Iterable, Literal, Optional
12
12
 
13
13
  import pyarrow
14
14
  import pyarrow.dataset
@@ -51,7 +51,10 @@ def make_response_schema(schema: pyarrow.Schema) -> pyarrow.Schema:
51
51
  """Given an Arrow schema, create a new one that has the extra ``ResponseItem``
52
52
  fields added."""
53
53
  response_item_schema = make_response_item_schema(schema)
54
- fields = list(zip(response_item_schema.names, response_item_schema.types))
54
+ fields = [
55
+ pyarrow.field(n, t)
56
+ for n, t in zip(response_item_schema.names, response_item_schema.types)
57
+ ]
55
58
  item_type = pyarrow.struct(fields)
56
59
  responses_type = pyarrow.list_(item_type)
57
60
  return pyarrow.schema(
@@ -65,7 +68,8 @@ def make_response_schema(schema: pyarrow.Schema) -> pyarrow.Schema:
65
68
 
66
69
  def encode_schema(schema: pyarrow.Schema) -> str:
67
70
  """Encode an Arrow schema as a string."""
68
- return binary.encode(schema.serialize())
71
+ # pyarrow.Buffer doesn't satisfy ReadableBuffer but it still works
72
+ return binary.encode(schema.serialize()) # type: ignore[arg-type]
69
73
 
70
74
 
71
75
  def decode_schema(schema: str) -> pyarrow.Schema:
@@ -84,7 +88,7 @@ def subset_schema(schema: pyarrow.Schema, field_names: list[str]) -> pyarrow.Sch
84
88
  return pyarrow.schema(fields)
85
89
 
86
90
 
87
- def arrow_type(annotation: type):
91
+ def arrow_type(annotation: type) -> pyarrow.DataType:
88
92
  """Determine a suitable arrow type for a pydantic model field.
89
93
 
90
94
  Supports primitive types as well as pydantic sub-models, lists, and optional types.
@@ -130,8 +134,7 @@ def arrow_type(annotation: type):
130
134
 
131
135
  if issubclass(annotation, DType):
132
136
  # The dtype is in the metaclass
133
- return type(annotation).dtype # type: ignore
134
- # return pyarrow.from_numpy_dtype(type(annotation).dtype) # type: ignore
137
+ return pyarrow.from_numpy_dtype(type(annotation).dtype) # type: ignore[attr-defined]
135
138
 
136
139
  if annotation == bool:
137
140
  return pyarrow.bool_()
@@ -246,6 +249,7 @@ def _construct_field_docs(
246
249
  if pyarrow.types.is_struct(field.type):
247
250
  children = [field.type.field(i) for i in range(field.type.num_fields)]
248
251
  elif pyarrow.types.is_list(field.type):
252
+ assert isinstance(field.type, pyarrow.ListType)
249
253
  children = [field.type.value_field]
250
254
  else:
251
255
  raise ValueError(f"Unsupported nested type {field.type}")
@@ -275,8 +279,10 @@ def write_dataset(
275
279
  *,
276
280
  output_path: str,
277
281
  feature_schema: pyarrow.Schema,
278
- partition_schema: pyarrow.Schema = None,
279
- existing_data_behavior: str = "overwrite_or_ignore",
282
+ partition_schema: Optional[pyarrow.Schema] = None,
283
+ existing_data_behavior: Literal[
284
+ "error", "overwrite_or_ignore", "delete_matching"
285
+ ] = "overwrite_or_ignore",
280
286
  **kwargs,
281
287
  ):
282
288
  """Creates a ``pyarrow.dataset.Dataset`` from a data generator.
@@ -291,15 +297,19 @@ def write_dataset(
291
297
  existing_data_behavior: Same as ``pyarrow.dataset.write_dataset``, but
292
298
  defaults to ``"overwrite_or_ignore"``, which is typically what we want.
293
299
  """
294
- partitioning = partition_schema and pyarrow.dataset.partitioning(
295
- partition_schema, flavor="hive"
300
+ partitioning = (
301
+ pyarrow.dataset.partitioning(partition_schema, flavor="hive")
302
+ if partition_schema is not None
303
+ else None
296
304
  )
297
305
  pyarrow.dataset.write_dataset(
298
306
  data_generator,
299
307
  output_path,
300
308
  format="parquet",
301
309
  schema=feature_schema,
302
- partitioning=partitioning,
310
+ # Type annotation doesn't include PartitioningFactory even though
311
+ # you're clearly meant to pass the output of partitioning() here
312
+ partitioning=partitioning, # type: ignore[arg-type]
303
313
  existing_data_behavior=existing_data_behavior,
304
314
  **kwargs,
305
315
  )
@@ -326,10 +336,10 @@ def batches(
326
336
  for instance in instances:
327
337
  batch.append(instance)
328
338
  if len(batch) == batch_size:
329
- yield pyarrow.RecordBatch.from_pylist(batch, schema=schema)
339
+ yield pyarrow.RecordBatch.from_pylist(batch, schema=schema) # type: ignore[attr-defined]
330
340
  batch = []
331
341
  if batch: # Final (incomplete) batch
332
- yield pyarrow.RecordBatch.from_pylist(batch, schema=schema)
342
+ yield pyarrow.RecordBatch.from_pylist(batch, schema=schema) # type: ignore[attr-defined]
333
343
 
334
344
 
335
345
  __all__ = [
@@ -3,9 +3,13 @@
3
3
 
4
4
  import base64
5
5
  import hashlib
6
+ import typing
6
7
 
8
+ if typing.TYPE_CHECKING:
9
+ from _typeshed import ReadableBuffer
7
10
 
8
- def encode(data: bytes) -> str:
11
+
12
+ def encode(data: "ReadableBuffer") -> str:
9
13
  return base64.b64encode(data).decode("utf-8")
10
14
 
11
15
 
@@ -0,0 +1,26 @@
1
+ # SPDX-FileCopyrightText: 2024 UL Research Institutes
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from typing import Type
5
+
6
+ import pydantic
7
+
8
+ from ..base import DyffSchemaBaseModel, FixedWidthFloat, list_
9
+
10
+
11
+ def embedding(
12
+ element_type: Type[FixedWidthFloat], size: int
13
+ ) -> Type[DyffSchemaBaseModel]:
14
+ """Returns a schema type representing a list of fixed-length embedding vectors."""
15
+
16
+ class _Embedding(DyffSchemaBaseModel):
17
+ embedding: list_(element_type, list_size=size) = pydantic.Field( # type: ignore[valid-type]
18
+ description="An embedding vector"
19
+ )
20
+
21
+ return _Embedding
22
+
23
+
24
+ __all__ = [
25
+ "embedding",
26
+ ]
@@ -23,7 +23,6 @@ import abc
23
23
  import enum
24
24
  import urllib.parse
25
25
  from datetime import datetime
26
- from decimal import Decimal
27
26
  from enum import Enum
28
27
  from typing import Any, Literal, NamedTuple, Optional, Type, Union
29
28
 
@@ -160,6 +159,34 @@ def _oci_image_tag_maxlen():
160
159
  return 127
161
160
 
162
161
 
162
+ def identifier_regex():
163
+ """Python identifiers start with a letter or an underscore, and consist of letters,
164
+ numbers, and underscores."""
165
+ return r"^[a-zA-Z_][a-zA-Z0-9_]*$"
166
+
167
+
168
+ def identifier_maxlen():
169
+ """There isn't really a max length for Python identifiers, but this seems like a
170
+ reasonable limit for our use."""
171
+ return 127
172
+
173
+
174
+ def title_maxlen() -> int:
175
+ return 140
176
+
177
+
178
+ def summary_maxlen() -> int:
179
+ return 280
180
+
181
+
182
+ def entity_id_regex() -> str:
183
+ """An entity ID is a 32-character HEX string.
184
+
185
+ TODO: This doesn't check whether the hex string is a valid UUID.
186
+ """
187
+ return r"^[a-f0-9]{32}$"
188
+
189
+
163
190
  class Entities(str, enum.Enum):
164
191
  """The kinds of entities in the dyff system."""
165
192
 
@@ -226,6 +253,9 @@ class Resources(str, enum.Enum):
226
253
  raise ValueError(f"No Resources for Entity kind: {kind}")
227
254
 
228
255
 
256
+ EntityID: TypeAlias = pydantic.constr(regex=entity_id_regex()) # type: ignore
257
+
258
+
229
259
  class DyffModelWithID(DyffSchemaBaseModel):
230
260
  id: str = pydantic.Field(description="Unique identifier of the entity")
231
261
  account: str = pydantic.Field(description="Account that owns the entity")
@@ -399,6 +429,10 @@ class DyffEntity(Status, Labeled, SchemaVersion, DyffModelWithID):
399
429
  default=None, description="Resource creation time (assigned by system)"
400
430
  )
401
431
 
432
+ lastTransitionTime: Optional[datetime] = pydantic.Field(
433
+ default=None, description="Time of last (status, reason) change."
434
+ )
435
+
402
436
  @abc.abstractmethod
403
437
  def dependencies(self) -> list[str]:
404
438
  """List of IDs of resources that this resource depends on.
@@ -824,7 +858,7 @@ class DataSchema(DyffSchemaBaseModel):
824
858
  def make_output_schema(
825
859
  schema: Union[pyarrow.Schema, Type[DyffSchemaBaseModel], DyffDataSchema],
826
860
  ) -> "DataSchema":
827
- """Construct a complete ``DataSchema`` for inference inputs.
861
+ """Construct a complete ``DataSchema`` for inference outputs.
828
862
 
829
863
  This function will add required special fields for input data and then
830
864
  convert the augmented schema as necessary to populate at least the
@@ -1559,42 +1593,51 @@ class ScoreSpec(DyffSchemaBaseModel):
1559
1593
  name: str = pydantic.Field(
1560
1594
  description="The name of the score. Used as a key for retrieving score data."
1561
1595
  " Must be unique within the Method context.",
1596
+ regex=identifier_regex(),
1597
+ max_length=identifier_maxlen(),
1562
1598
  )
1563
1599
 
1564
1600
  title: str = pydantic.Field(
1565
1601
  description="The title text to use when displaying score information.",
1566
- max_length=140,
1602
+ max_length=title_maxlen(),
1567
1603
  )
1568
1604
 
1569
1605
  summary: str = pydantic.Field(
1570
1606
  description="A short text description of what the score measures.",
1571
- max_length=140,
1607
+ max_length=summary_maxlen(),
1572
1608
  )
1573
1609
 
1574
1610
  valence: Literal["positive", "negative", "neutral"] = pydantic.Field(
1611
+ default="neutral",
1575
1612
  description="A score has 'positive' valence if 'more is better',"
1576
1613
  " 'negative' valence if 'less is better', and 'neutral' valence if"
1577
- " 'better' is not meaningful for this score."
1614
+ " 'better' is not meaningful for this score.",
1578
1615
  )
1579
1616
 
1580
1617
  priority: Literal["primary", "secondary"] = pydantic.Field(
1618
+ default="primary",
1581
1619
  description="The 'primary' score will be displayed in any UI widgets"
1582
- " that expect a single score."
1620
+ " that expect a single score. There must be exactly 1 primary score.",
1583
1621
  )
1584
1622
 
1585
- minimum: Optional[Decimal] = pydantic.Field(
1623
+ minimum: Optional[float] = pydantic.Field(
1586
1624
  default=None, description="The minimum possible value, if known."
1587
1625
  )
1588
1626
 
1589
- maximum: Optional[Decimal] = pydantic.Field(
1627
+ maximum: Optional[float] = pydantic.Field(
1590
1628
  default=None, description="The maximum possible value, if known."
1591
1629
  )
1592
1630
 
1593
1631
  format: str = pydantic.Field(
1594
- default="{quantity}",
1632
+ default="{quantity:.1f}",
1633
+ # Must use the 'quantity' key in the format string:
1634
+ # (Maybe string not ending in '}')(something like '{quantity:f}')(maybe another string)
1635
+ regex=r"^(.*[^{])?[{]quantity(:[^}]*)?[}]([^}].*)?$",
1595
1636
  description="A Python 'format' string describing how to render the score"
1596
- " as a string. You can use the keys 'quantity' and 'unit' in the format"
1597
- " string (e.g., '{quantity} {unit}').",
1637
+ " as a string. You *must* use the keyword 'quantity' in the format"
1638
+ " string, and you may use 'unit' as well (e.g., '{quantity:.2f} {unit}')."
1639
+ " It is *strongly recommended* that you limit the output precision"
1640
+ " appropriately; use ':.0f' for integer-valued scores.",
1598
1641
  )
1599
1642
 
1600
1643
  unit: Optional[str] = pydantic.Field(
@@ -1603,6 +1646,11 @@ class ScoreSpec(DyffSchemaBaseModel):
1603
1646
  " Use standard SI abbreviations where possible for better indexing.",
1604
1647
  )
1605
1648
 
1649
+ def quantity_string(self, quantity: float) -> str:
1650
+ """Formats the given quantity as a string, according to the formatting
1651
+ information stored in this ScoreSpec."""
1652
+ return self.format_quantity(self.format, quantity, unit=self.unit)
1653
+
1606
1654
  @pydantic.root_validator
1607
1655
  def _validate_minimum_maximum(cls, values):
1608
1656
  minimum = values.get("minimum")
@@ -1613,15 +1661,17 @@ class ScoreSpec(DyffSchemaBaseModel):
1613
1661
 
1614
1662
  @pydantic.validator("format")
1615
1663
  def _validate_format(cls, v):
1616
- try:
1617
- cls._format_quantity(v, Decimal("3.14"), unit="kg")
1618
- except Exception:
1619
- raise ValueError(f"invalid format: '{v}'")
1664
+ x = cls.format_quantity(v, 3.14, unit="kg")
1665
+ y = cls.format_quantity(v, -2.03, unit="kg")
1666
+ if x == y:
1667
+ # Formatted results for different quantities should be different
1668
+ raise ValueError("format string does not mention 'quantity'")
1669
+
1620
1670
  return v
1621
1671
 
1622
1672
  @classmethod
1623
- def _format_quantity(
1624
- cls, format: str, quantity: Decimal, *, unit: Optional[str] = None
1673
+ def format_quantity(
1674
+ cls, format: str, quantity: float, *, unit: Optional[str] = None
1625
1675
  ) -> str:
1626
1676
  return format.format(quantity=quantity, unit=unit)
1627
1677
 
@@ -1824,19 +1874,20 @@ class ScoreData(ScoreSpec):
1824
1874
  description="The Analysis that generated the current score instance."
1825
1875
  )
1826
1876
 
1827
- quantity: Decimal = pydantic.Field(
1877
+ quantity: float = pydantic.Field(
1828
1878
  description="The numeric quantity associated with the score."
1829
- " Callers should set the desired precision explicitly."
1879
+ )
1880
+
1881
+ quantityString: str = pydantic.Field(
1882
+ description="The formatted string representation of .quantity,"
1883
+ " after processing with the .format specification."
1830
1884
  )
1831
1885
 
1832
1886
  text: str = pydantic.Field(
1833
1887
  description="A short text description of what the quantity means.",
1834
- max_length=140,
1888
+ max_length=summary_maxlen(),
1835
1889
  )
1836
1890
 
1837
- def format_quantity(self) -> str:
1838
- return self._format_quantity(self.format, self.quantity, unit=self.unit)
1839
-
1840
1891
 
1841
1892
  class Score(ScoreData):
1842
1893
  """A Score is a numeric quantity describing an aspect of system performance.
@@ -2160,6 +2211,7 @@ DyffEntityType = Union[DyffEntityTypeExceptRevision, Revision]
2160
2211
 
2161
2212
 
2162
2213
  __all__ = [
2214
+ "SYSTEM_ATTRIBUTES",
2163
2215
  "Accelerator",
2164
2216
  "AcceleratorGPU",
2165
2217
  "AccessGrant",
@@ -2193,9 +2245,11 @@ __all__ = [
2193
2245
  "DocumentationBase",
2194
2246
  "DyffDataSchema",
2195
2247
  "DyffEntity",
2248
+ "DyffEntityType",
2196
2249
  "DyffModelWithID",
2197
2250
  "DyffSchemaBaseModel",
2198
2251
  "Entities",
2252
+ "EntityID",
2199
2253
  "Evaluation",
2200
2254
  "EvaluationBase",
2201
2255
  "ExtractorStep",
@@ -2295,9 +2349,11 @@ __all__ = [
2295
2349
  "ModelStatusReason",
2296
2350
  "ReportStatus",
2297
2351
  "ReportStatusReason",
2352
+ "identifier_regex",
2353
+ "identifier_maxlen",
2298
2354
  "is_status_terminal",
2299
2355
  "is_status_failure",
2300
2356
  "is_status_success",
2301
- "DyffEntityType",
2302
- "SYSTEM_ATTRIBUTES",
2357
+ "summary_maxlen",
2358
+ "title_maxlen",
2303
2359
  ]
@@ -14,6 +14,7 @@ in response.
14
14
 
15
15
  from __future__ import annotations
16
16
 
17
+ import re
17
18
  from datetime import datetime
18
19
  from typing import Optional, Union
19
20
 
@@ -22,6 +23,7 @@ import pydantic
22
23
  from .base import DyffBaseModel
23
24
  from .platform import (
24
25
  AnalysisBase,
26
+ AnalysisScope,
25
27
  DatasetBase,
26
28
  DataView,
27
29
  DocumentationBase,
@@ -70,6 +72,25 @@ class AnalysisCreateRequest(DyffEntityCreateRequest, AnalysisBase):
70
72
 
71
73
  method: str = pydantic.Field(description="Method ID")
72
74
 
75
+ @pydantic.validator("scope", check_fields=False)
76
+ def _validate_scope(cls, scope: AnalysisScope) -> AnalysisScope:
77
+ # TODO: This has to be a validator function because we can't apply the
78
+ # regex contraint to AnalysisScope, since there are already entities
79
+ # with invalid IDs in the data store. Fix in Schema v1.
80
+ uuid4 = r"^[0-9a-f]{8}[0-9a-f]{4}[4][0-9a-f]{3}[89ab][0-9a-f]{3}[0-9a-f]{12}$"
81
+ id_pattern = re.compile(uuid4)
82
+ if scope.dataset is not None and not re.match(id_pattern, scope.dataset):
83
+ raise ValueError("scope.dataset must be an entity ID")
84
+ if scope.evaluation is not None and not re.match(id_pattern, scope.evaluation):
85
+ raise ValueError("scope.evaluation must be an entity ID")
86
+ if scope.inferenceService is not None and not re.match(
87
+ id_pattern, scope.inferenceService
88
+ ):
89
+ raise ValueError("scope.inferenceService must be an entity ID")
90
+ if scope.model is not None and not re.match(id_pattern, scope.model):
91
+ raise ValueError("scope.model must be an entity ID")
92
+ return scope
93
+
73
94
 
74
95
  class DatasetCreateRequest(DyffEntityCreateRequest, DatasetBase):
75
96
  pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dyff-schema
3
- Version: 0.17.0
3
+ Version: 0.19.0
4
4
  Summary: Data models for the Dyff AI auditing platform.
5
5
  Author-email: Digital Safety Research Institute <contact@dsri.org>
6
6
  License: Apache-2.0
@@ -27,6 +27,7 @@ dyff/schema/dataset/__init__.py
27
27
  dyff/schema/dataset/arrow.py
28
28
  dyff/schema/dataset/binary.py
29
29
  dyff/schema/dataset/classification.py
30
+ dyff/schema/dataset/embedding.py
30
31
  dyff/schema/dataset/text.py
31
32
  dyff/schema/dataset/vision.py
32
33
  dyff/schema/io/__init__.py
@@ -43,6 +44,7 @@ dyff/schema/v0/r1/dataset/__init__.py
43
44
  dyff/schema/v0/r1/dataset/arrow.py
44
45
  dyff/schema/v0/r1/dataset/binary.py
45
46
  dyff/schema/v0/r1/dataset/classification.py
47
+ dyff/schema/v0/r1/dataset/embedding.py
46
48
  dyff/schema/v0/r1/dataset/text.py
47
49
  dyff/schema/v0/r1/dataset/vision.py
48
50
  dyff/schema/v0/r1/io/__init__.py
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes