dataeval 0.74.0__py3-none-any.whl → 0.74.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. dataeval/__init__.py +3 -9
  2. dataeval/detectors/__init__.py +2 -10
  3. dataeval/detectors/drift/base.py +3 -3
  4. dataeval/detectors/drift/mmd.py +1 -1
  5. dataeval/detectors/linters/clusterer.py +3 -3
  6. dataeval/detectors/linters/duplicates.py +4 -4
  7. dataeval/detectors/linters/outliers.py +4 -4
  8. dataeval/detectors/ood/__init__.py +5 -12
  9. dataeval/detectors/ood/base.py +5 -5
  10. dataeval/detectors/ood/metadata_ks_compare.py +12 -13
  11. dataeval/interop.py +1 -1
  12. dataeval/metrics/bias/balance.py +3 -3
  13. dataeval/metrics/bias/coverage.py +3 -3
  14. dataeval/metrics/bias/diversity.py +3 -3
  15. dataeval/metrics/bias/metadata_preprocessing.py +3 -3
  16. dataeval/metrics/bias/parity.py +4 -4
  17. dataeval/metrics/estimators/ber.py +3 -3
  18. dataeval/metrics/estimators/divergence.py +3 -3
  19. dataeval/metrics/estimators/uap.py +3 -3
  20. dataeval/metrics/stats/base.py +2 -2
  21. dataeval/metrics/stats/boxratiostats.py +1 -1
  22. dataeval/metrics/stats/datasetstats.py +6 -6
  23. dataeval/metrics/stats/dimensionstats.py +1 -1
  24. dataeval/metrics/stats/hashstats.py +1 -1
  25. dataeval/metrics/stats/labelstats.py +3 -3
  26. dataeval/metrics/stats/pixelstats.py +1 -1
  27. dataeval/metrics/stats/visualstats.py +1 -1
  28. dataeval/output.py +77 -53
  29. dataeval/utils/__init__.py +1 -7
  30. dataeval/workflows/sufficiency.py +4 -4
  31. {dataeval-0.74.0.dist-info → dataeval-0.74.1.dist-info}/METADATA +3 -8
  32. dataeval-0.74.1.dist-info/RECORD +65 -0
  33. dataeval/detectors/ood/ae.py +0 -76
  34. dataeval/detectors/ood/aegmm.py +0 -67
  35. dataeval/detectors/ood/base_tf.py +0 -109
  36. dataeval/detectors/ood/llr.py +0 -302
  37. dataeval/detectors/ood/vae.py +0 -98
  38. dataeval/detectors/ood/vaegmm.py +0 -76
  39. dataeval/utils/lazy.py +0 -26
  40. dataeval/utils/tensorflow/__init__.py +0 -19
  41. dataeval/utils/tensorflow/_internal/gmm.py +0 -103
  42. dataeval/utils/tensorflow/_internal/loss.py +0 -121
  43. dataeval/utils/tensorflow/_internal/models.py +0 -1394
  44. dataeval/utils/tensorflow/_internal/trainer.py +0 -114
  45. dataeval/utils/tensorflow/_internal/utils.py +0 -256
  46. dataeval/utils/tensorflow/loss/__init__.py +0 -11
  47. dataeval-0.74.0.dist-info/RECORD +0 -79
  48. {dataeval-0.74.0.dist-info → dataeval-0.74.1.dist-info}/LICENSE.txt +0 -0
  49. {dataeval-0.74.0.dist-info → dataeval-0.74.1.dist-info}/WHEEL +0 -0
dataeval/output.py CHANGED
@@ -4,9 +4,10 @@ __all__ = []
4
4
 
5
5
  import inspect
6
6
  import sys
7
+ from collections.abc import Mapping
7
8
  from datetime import datetime, timezone
8
- from functools import wraps
9
- from typing import Any, Callable, Iterable, TypeVar
9
+ from functools import partial, wraps
10
+ from typing import Any, Callable, Iterator, TypeVar
10
11
 
11
12
  import numpy as np
12
13
 
@@ -18,7 +19,7 @@ else:
18
19
  from dataeval import __version__
19
20
 
20
21
 
21
- class OutputMetadata:
22
+ class Output:
22
23
  _name: str
23
24
  _execution_time: datetime
24
25
  _execution_duration: float
@@ -26,6 +27,9 @@ class OutputMetadata:
26
27
  _state: dict[str, str]
27
28
  _version: str
28
29
 
30
+ def __str__(self) -> str:
31
+ return f"{self.__class__.__name__}: {str(self.dict())}"
32
+
29
33
  def dict(self) -> dict[str, Any]:
30
34
  return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
31
35
 
@@ -33,58 +37,78 @@ class OutputMetadata:
33
37
  return {k.removeprefix("_"): v for k, v in self.__dict__.items() if k.startswith("_")}
34
38
 
35
39
 
40
+ TKey = TypeVar("TKey", str, int, float, set)
41
+ TValue = TypeVar("TValue")
42
+
43
+
44
+ class MappingOutput(Mapping[TKey, TValue], Output):
45
+ __slots__ = ["_data"]
46
+
47
+ def __init__(self, data: Mapping[TKey, TValue]):
48
+ self._data = data
49
+
50
+ def __getitem__(self, key: TKey) -> TValue:
51
+ return self._data.__getitem__(key)
52
+
53
+ def __iter__(self) -> Iterator[TKey]:
54
+ return self._data.__iter__()
55
+
56
+ def __len__(self) -> int:
57
+ return self._data.__len__()
58
+
59
+ def dict(self) -> dict[str, TValue]:
60
+ return {str(k): v for k, v in self._data.items()}
61
+
62
+
36
63
  P = ParamSpec("P")
37
- R = TypeVar("R", bound=OutputMetadata)
64
+ R = TypeVar("R", bound=Output)
38
65
 
39
66
 
40
- def set_metadata(
41
- state_attr: Iterable[str] | None = None,
42
- ) -> Callable[[Callable[P, R]], Callable[P, R]]:
67
+ def set_metadata(fn: Callable[P, R] | None = None, *, state: list[str] | None = None) -> Callable[P, R]:
43
68
  """Decorator to stamp OutputMetadata classes with runtime metadata"""
44
69
 
45
- def decorator(fn: Callable[P, R]) -> Callable[P, R]:
46
- @wraps(fn)
47
- def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
48
- def fmt(v):
49
- if np.isscalar(v):
50
- return v
51
- if hasattr(v, "shape"):
52
- return f"{v.__class__.__name__}: shape={getattr(v, 'shape')}"
53
- if hasattr(v, "__len__"):
54
- return f"{v.__class__.__name__}: len={len(v)}"
55
- return f"{v.__class__.__name__}"
56
-
57
- time = datetime.now(timezone.utc)
58
- result = fn(*args, **kwargs)
59
- duration = (datetime.now(timezone.utc) - time).total_seconds()
60
- fn_params = inspect.signature(fn).parameters
61
- # set all params with defaults then update params with mapped arguments and explicit keyword args
62
- arguments = {k: None if v.default is inspect.Parameter.empty else v.default for k, v in fn_params.items()}
63
- arguments.update(zip(fn_params, args))
64
- arguments.update(kwargs)
65
- arguments = {k: fmt(v) for k, v in arguments.items()}
66
- state = (
67
- {k: fmt(getattr(args[0], k)) for k in state_attr if "self" in arguments}
68
- if "self" in arguments and state_attr
69
- else {}
70
- )
71
- name = (
72
- f"{args[0].__class__.__module__}.{args[0].__class__.__name__}.{fn.__name__}"
73
- if "self" in arguments
74
- else f"{fn.__module__}.{fn.__qualname__}"
75
- )
76
- metadata = {
77
- "_name": name,
78
- "_execution_time": time,
79
- "_execution_duration": duration,
80
- "_arguments": {k: v for k, v in arguments.items() if k != "self"},
81
- "_state": state,
82
- "_version": __version__,
83
- }
84
- for k, v in metadata.items():
85
- object.__setattr__(result, k, v)
86
- return result
87
-
88
- return wrapper
89
-
90
- return decorator
70
+ if fn is None:
71
+ return partial(set_metadata, state=state) # type: ignore
72
+
73
+ @wraps(fn)
74
+ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
75
+ def fmt(v):
76
+ if np.isscalar(v):
77
+ return v
78
+ if hasattr(v, "shape"):
79
+ return f"{v.__class__.__name__}: shape={getattr(v, 'shape')}"
80
+ if hasattr(v, "__len__"):
81
+ return f"{v.__class__.__name__}: len={len(v)}"
82
+ return f"{v.__class__.__name__}"
83
+
84
+ time = datetime.now(timezone.utc)
85
+ result = fn(*args, **kwargs)
86
+ duration = (datetime.now(timezone.utc) - time).total_seconds()
87
+ fn_params = inspect.signature(fn).parameters
88
+
89
+ # set all params with defaults then update params with mapped arguments and explicit keyword args
90
+ arguments = {k: None if v.default is inspect.Parameter.empty else v.default for k, v in fn_params.items()}
91
+ arguments.update(zip(fn_params, args))
92
+ arguments.update(kwargs)
93
+ arguments = {k: fmt(v) for k, v in arguments.items()}
94
+ state_attrs = (
95
+ {k: fmt(getattr(args[0], k)) for k in state if "self" in arguments} if "self" in arguments and state else {}
96
+ )
97
+ name = (
98
+ f"{args[0].__class__.__module__}.{args[0].__class__.__name__}.{fn.__name__}"
99
+ if "self" in arguments
100
+ else f"{fn.__module__}.{fn.__qualname__}"
101
+ )
102
+ metadata = {
103
+ "_name": name,
104
+ "_execution_time": time,
105
+ "_execution_duration": duration,
106
+ "_arguments": {k: v for k, v in arguments.items() if k != "self"},
107
+ "_state": state_attrs,
108
+ "_version": __version__,
109
+ }
110
+ for k, v in metadata.items():
111
+ object.__setattr__(result, k, v)
112
+ return result
113
+
114
+ return wrapper
@@ -4,7 +4,7 @@ in setting up architectures that are guaranteed to work with applicable DataEval
4
4
  metrics. Currently DataEval supports both :term:`TensorFlow` and PyTorch backends.
5
5
  """
6
6
 
7
- from dataeval import _IS_TENSORFLOW_AVAILABLE, _IS_TORCH_AVAILABLE
7
+ from dataeval import _IS_TORCH_AVAILABLE
8
8
  from dataeval.utils.metadata import merge_metadata
9
9
  from dataeval.utils.split_dataset import split_dataset
10
10
 
@@ -15,10 +15,4 @@ if _IS_TORCH_AVAILABLE:
15
15
 
16
16
  __all__ += ["torch"]
17
17
 
18
- if _IS_TENSORFLOW_AVAILABLE:
19
- from dataeval.utils import tensorflow
20
-
21
- __all__ += ["tensorflow"]
22
-
23
- del _IS_TENSORFLOW_AVAILABLE
24
18
  del _IS_TORCH_AVAILABLE
@@ -16,11 +16,11 @@ from scipy.optimize import basinhopping
16
16
  from torch.utils.data import Dataset
17
17
 
18
18
  from dataeval.interop import as_numpy
19
- from dataeval.output import OutputMetadata, set_metadata
19
+ from dataeval.output import Output, set_metadata
20
20
 
21
21
 
22
22
  @dataclass(frozen=True)
23
- class SufficiencyOutput(OutputMetadata):
23
+ class SufficiencyOutput(Output):
24
24
  """
25
25
  Output class for :class:`Sufficiency` workflow
26
26
 
@@ -47,7 +47,7 @@ class SufficiencyOutput(OutputMetadata):
47
47
  if c != c_v:
48
48
  raise ValueError(f"{m} does not contain the expected number ({c}) of data points.")
49
49
 
50
- @set_metadata()
50
+ @set_metadata
51
51
  def project(
52
52
  self,
53
53
  projection: int | Iterable[int],
@@ -484,7 +484,7 @@ class Sufficiency(Generic[T]):
484
484
  def eval_kwargs(self, value: Mapping[str, Any] | None) -> None:
485
485
  self._eval_kwargs = {} if value is None else value
486
486
 
487
- @set_metadata(["runs", "substeps"])
487
+ @set_metadata(state=["runs", "substeps"])
488
488
  def evaluate(self, eval_at: int | Iterable[int] | None = None, niter: int = 1000) -> SufficiencyOutput:
489
489
  """
490
490
  Creates data indices, trains models, and returns plotting data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.74.0
3
+ Version: 0.74.1
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -21,17 +21,12 @@ Classifier: Programming Language :: Python :: 3.12
21
21
  Classifier: Programming Language :: Python :: 3 :: Only
22
22
  Classifier: Topic :: Scientific/Engineering
23
23
  Provides-Extra: all
24
- Provides-Extra: tensorflow
25
24
  Provides-Extra: torch
26
- Requires-Dist: markupsafe (<3.0.2) ; extra == "tensorflow" or extra == "all"
27
- Requires-Dist: matplotlib ; extra == "torch" or extra == "all"
28
- Requires-Dist: numpy (>1.24.3)
25
+ Requires-Dist: matplotlib ; extra == "all"
26
+ Requires-Dist: numpy (>=1.24.3)
29
27
  Requires-Dist: pillow (>=10.3.0)
30
28
  Requires-Dist: scikit-learn (>=1.5.0)
31
29
  Requires-Dist: scipy (>=1.10)
32
- Requires-Dist: tensorflow (>=2.16,<2.18) ; extra == "tensorflow" or extra == "all"
33
- Requires-Dist: tensorflow_probability (>=0.24,<0.25) ; extra == "tensorflow" or extra == "all"
34
- Requires-Dist: tf-keras (>=2.16,<2.18) ; extra == "tensorflow" or extra == "all"
35
30
  Requires-Dist: torch (>=2.2.0) ; extra == "torch" or extra == "all"
36
31
  Requires-Dist: torchvision (>=0.17.0) ; extra == "torch" or extra == "all"
37
32
  Requires-Dist: tqdm
@@ -0,0 +1,65 @@
1
+ dataeval/__init__.py,sha256=HNOjwnFIQCD7vwBBo0xMexlnNG3xRZ3s3VUMsA4Qozw,392
2
+ dataeval/detectors/__init__.py,sha256=Y-0bbyWyuMvZU80bCx6WPt3IV_r2hu9ymzpA8uzMqoI,206
3
+ dataeval/detectors/drift/__init__.py,sha256=BSXm21y7cAawHep-ZldCJ5HOvzYjPzYGKGrmoEs3i0E,737
4
+ dataeval/detectors/drift/base.py,sha256=QDGHMu1WADD-38MEIOwjQMEQM3DE7B0yFHO3hsMbV-E,14481
5
+ dataeval/detectors/drift/cvm.py,sha256=kc59w2_wtxFGNnLcaJRvX5v_38gPXiebSGNiFVdunEQ,4142
6
+ dataeval/detectors/drift/ks.py,sha256=gcpe1WIQeNeZdLYkdMZCFLXUp1bHMQUxwJE6-RLVOXs,4229
7
+ dataeval/detectors/drift/mmd.py,sha256=C0FX5v9ZJzmKNYEcYUaC7sDtMpJ2dZpwikNDu-AEWiI,7584
8
+ dataeval/detectors/drift/torch.py,sha256=igEQ2DV9JmcpTdUKCOHBi5LxtoNeCAslJS2Ldulg1hw,7585
9
+ dataeval/detectors/drift/uncertainty.py,sha256=Xz2yzJjtJfw1vLag234jwRvaa_HK36nMajGx8bQaNRs,5322
10
+ dataeval/detectors/drift/updates.py,sha256=UJ0z5hlunRi7twnkLABfdJG3tT2EqX4y9IGx8_USYvo,1780
11
+ dataeval/detectors/linters/__init__.py,sha256=BvpaB1RUpkEhhXk3Mqi5NYoOcJKZRFSBOJCmQOIfYRU,483
12
+ dataeval/detectors/linters/clusterer.py,sha256=hK-ak02GaxwWuufesZMKDsvoE5fMdXO7UWsLiK8hfY0,21008
13
+ dataeval/detectors/linters/duplicates.py,sha256=2bmPTFqoefeiAQV9y4CGlHV_mJNrysJSEFLXLd2DO4I,5661
14
+ dataeval/detectors/linters/merged_stats.py,sha256=X-bDTwjyR8RuVmzxLaHZmQ5nI3oOWvsqVlitdSncapk,1355
15
+ dataeval/detectors/linters/outliers.py,sha256=X48bzTfTr1LqC6WKVKBRfvpjcQRgmb93cNLT7Oipe3M,10113
16
+ dataeval/detectors/ood/__init__.py,sha256=-D4Fq-ysFylNNMqjHG1ALbB9qBCm_UinkCAgsK9HGg0,408
17
+ dataeval/detectors/ood/ae_torch.py,sha256=pO9w5221bXR9lEBkE7oakXeE7PXUUR--xcTpmHvOCSk,2142
18
+ dataeval/detectors/ood/base.py,sha256=UzcDbXl8Gv43VFzjrOegTnKSIoEYmfDP7fAySeWyWPw,6955
19
+ dataeval/detectors/ood/base_torch.py,sha256=yFbSfQsBMwZeVf8mrixmkZYBGChhV5oAHtkgzWnMzsA,3405
20
+ dataeval/detectors/ood/metadata_ks_compare.py,sha256=LNDNWGEDKTW8_-djgmK53sn9EZzzXq1Sgwc47k0QI-Y,5380
21
+ dataeval/detectors/ood/metadata_least_likely.py,sha256=nxMCXUOjOfWHDTGT2SLE7OYBCydRq8zHLd8t17k7hMM,5193
22
+ dataeval/detectors/ood/metadata_ood_mi.py,sha256=KLay2BmgHrStBV92VpIs_B1yEfQKllsMTgzOQEng01I,4065
23
+ dataeval/interop.py,sha256=SB5Nca12rluZeXrpmmlfY7LFJbN5opYM7jmAb2c29hM,1748
24
+ dataeval/metrics/__init__.py,sha256=fPBNLd-T6mCErZBBJrxWmXIL0jCk7fNUYIcNEBkMa80,238
25
+ dataeval/metrics/bias/__init__.py,sha256=dYiPHenS8J7pgRMMW2jNkTBmTbPoYTxT04fZu9PFats,747
26
+ dataeval/metrics/bias/balance.py,sha256=_TZEe17AT-qOvPp-QFrQfTqNwh8uVVCYjC4Sv6JBx9o,9118
27
+ dataeval/metrics/bias/coverage.py,sha256=o65_IgrWSlGnYeYZFABjwKaxq09uqyy5esHJM67PJ-k,4528
28
+ dataeval/metrics/bias/diversity.py,sha256=WL1NbZiRrv0SIq97FY3womZNCSl_EBMVlBWQZAUtjk8,7701
29
+ dataeval/metrics/bias/metadata_preprocessing.py,sha256=ekUFiirkmaHDiH7nJjkNpiUQD7OolAPhHorjLxpXv_Y,12248
30
+ dataeval/metrics/bias/metadata_utils.py,sha256=HmTjlRRTdM9566oKUDDdVMJ8luss4DYykFOiS2FQzhM,6558
31
+ dataeval/metrics/bias/parity.py,sha256=hnA7qQH4Uy3tl_krluZ9BPD5zYjjagUxZt2fEiIa2yE,12745
32
+ dataeval/metrics/estimators/__init__.py,sha256=O6ocxJq8XDkfJWwXeJnnnzbOyRnFPKF4kTIVTTZYOA8,380
33
+ dataeval/metrics/estimators/ber.py,sha256=fs3_e9pgu7I50QIALWtF2aidkBZhTCKVE2pA7PyB5Go,5019
34
+ dataeval/metrics/estimators/divergence.py,sha256=r_SKSurf1TdI5E1ivENqDnz8cQ3_sxVGKAqmF9cqcT4,4275
35
+ dataeval/metrics/estimators/uap.py,sha256=Aw5ReoWNK73Tq96r__qN_-cvHrELauqtDX3Af_QxX4s,2157
36
+ dataeval/metrics/stats/__init__.py,sha256=igLRaAt1nX6yRwC4xI0zNPBADi3u7EsSxWP3OZ8AqcU,1086
37
+ dataeval/metrics/stats/base.py,sha256=_C05KUAuDrfX3N-19o25V3vmXr0-45A5fc57cXyV8qs,12161
38
+ dataeval/metrics/stats/boxratiostats.py,sha256=bZunY-b8Y2IQqHlTusQN77ujLOHftogEQIARDpdVv6A,6463
39
+ dataeval/metrics/stats/datasetstats.py,sha256=rZUDiciHwEpnXmkI8-uJNiYwUuTL9ssZMKMx73hVX-Y,6219
40
+ dataeval/metrics/stats/dimensionstats.py,sha256=xITgQF_oomb6Ty_dJcbT3ARGGNp4QRcYSgnkjB4f-YE,4054
41
+ dataeval/metrics/stats/hashstats.py,sha256=vxw_K74EJM9CZy-EV617vdrysFO8nEspVWqIYsIHC-c,4958
42
+ dataeval/metrics/stats/labelstats.py,sha256=K0hJTphMe7htSjyss8GPtKDiHepTuU60_hX0xRA-uAg,4096
43
+ dataeval/metrics/stats/pixelstats.py,sha256=2zr9i3GLNx1i_SCtbfdtZNxXBEc_9wCe4qDpmXLVbKY,4576
44
+ dataeval/metrics/stats/visualstats.py,sha256=vLIC4sMo796axWl-4e4RzT33ll-_6ki54Dirn3V-EL8,4948
45
+ dataeval/output.py,sha256=SmzH9W9yewdL9SBKVBkUUvOo45oA5lHphE2DYvJJMu0,3573
46
+ dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
+ dataeval/utils/__init__.py,sha256=z7HxSijjycey-rGdQkgVOdpvT0oO2pKAuT4uYyxYGMs,555
48
+ dataeval/utils/gmm.py,sha256=YuLsJKsVWgH_wHr1u_hSRH5Yeexdj8exht8h99L7bLo,561
49
+ dataeval/utils/image.py,sha256=KgC_1nW__nGN5q6bVZNvG4U_qIBdjcPATz9qe8f2XuA,1928
50
+ dataeval/utils/metadata.py,sha256=0A--iru0zEmi044mKz5P35q69KrI30yoiRSlvs7TSdQ,9418
51
+ dataeval/utils/shared.py,sha256=xvF3VLfyheVwJtdtDrneOobkKf7t-JTmf_w91FWXmqo,3616
52
+ dataeval/utils/split_dataset.py,sha256=Ot1ZJhbIhVfcShYXF9MkWXak5odBXyuBdRh-noXh-MI,19555
53
+ dataeval/utils/torch/__init__.py,sha256=lpkqfgyARUxgrV94cZESQv8PIP2p-UnwItZ_wIr0XzQ,675
54
+ dataeval/utils/torch/blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
55
+ dataeval/utils/torch/datasets.py,sha256=10elNgLuH_FDX_CHE3y2Z215JN4-PQovQm5brcIJOeM,15021
56
+ dataeval/utils/torch/gmm.py,sha256=VbLlUQohwToApT493_tjQBWy2UM5R-3ppS9Dp-eP7BA,3240
57
+ dataeval/utils/torch/models.py,sha256=sdGeo7a8vshCTGA4lYyVxxb_aDWUlxdtIVxrddS-_ls,8542
58
+ dataeval/utils/torch/trainer.py,sha256=8BEXr6xtk-CHJTcNxOBnWgkFWfJUAiBy28cEdBhLMRU,7883
59
+ dataeval/utils/torch/utils.py,sha256=nWRcT6z6DbFVrL1RyxCOX3DPoCrv9G0B-VI_9LdGCQQ,5784
60
+ dataeval/workflows/__init__.py,sha256=ef1MiVL5IuhlDXXbwsiAfafhnr7tD3TXF9GRusy9_O8,290
61
+ dataeval/workflows/sufficiency.py,sha256=v9AV3BZT0NW-zD2VNIL_5aWspvoscrxRIUKcUdpy7HI,18540
62
+ dataeval-0.74.1.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
63
+ dataeval-0.74.1.dist-info/METADATA,sha256=nd7os3kaLfp-A5HWH0QYVxe-gQdj5q3dIn9d0fPf-Lk,4298
64
+ dataeval-0.74.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
65
+ dataeval-0.74.1.dist-info/RECORD,,
@@ -1,76 +0,0 @@
1
- """
2
- Source code derived from Alibi-Detect 0.11.4
3
- https://github.com/SeldonIO/alibi-detect/tree/v0.11.4
4
-
5
- Original code Copyright (c) 2023 Seldon Technologies Ltd
6
- Licensed under Apache Software License (Apache 2.0)
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- __all__ = ["OOD_AE"]
12
-
13
- from typing import TYPE_CHECKING, Callable
14
-
15
- import numpy as np
16
- from numpy.typing import ArrayLike
17
-
18
- from dataeval.detectors.ood.base import OODScoreOutput
19
- from dataeval.detectors.ood.base_tf import OODBase
20
- from dataeval.interop import as_numpy
21
- from dataeval.utils.lazy import lazyload
22
- from dataeval.utils.tensorflow._internal.utils import predict_batch
23
-
24
- if TYPE_CHECKING:
25
- import tensorflow as tf
26
- import tf_keras as keras
27
-
28
- import dataeval.utils.tensorflow._internal.models as tf_models
29
- else:
30
- tf = lazyload("tensorflow")
31
- keras = lazyload("tf_keras")
32
- tf_models = lazyload("dataeval.utils.tensorflow._internal.models")
33
-
34
-
35
- class OOD_AE(OODBase):
36
- """
37
- Autoencoder-based :term:`out of distribution<Out-of-distribution (OOD)>` detector.
38
-
39
- Parameters
40
- ----------
41
- model : AE
42
- An :term:`autoencoder<Autoencoder>` model.
43
- """
44
-
45
- def __init__(self, model: tf_models.AE) -> None:
46
- super().__init__(model)
47
-
48
- def fit(
49
- self,
50
- x_ref: ArrayLike,
51
- threshold_perc: float = 100.0,
52
- loss_fn: Callable[..., tf.Tensor] | None = None,
53
- optimizer: keras.optimizers.Optimizer | None = None,
54
- epochs: int = 20,
55
- batch_size: int = 64,
56
- verbose: bool = True,
57
- ) -> None:
58
- if loss_fn is None:
59
- loss_fn = keras.losses.MeanSquaredError()
60
- super().fit(as_numpy(x_ref), threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
61
-
62
- def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
63
- self._validate(X := as_numpy(X))
64
-
65
- # reconstruct instances
66
- X_recon = predict_batch(X, self.model, batch_size=batch_size)
67
-
68
- # compute feature and instance level scores
69
- fscore = np.power(X - X_recon, 2)
70
- fscore_flat = fscore.reshape(fscore.shape[0], -1).copy()
71
- n_score_features = int(np.ceil(fscore_flat.shape[1]))
72
- sorted_fscore = np.sort(fscore_flat, axis=1)
73
- sorted_fscore_perc = sorted_fscore[:, -n_score_features:]
74
- iscore = np.mean(sorted_fscore_perc, axis=1)
75
-
76
- return OODScoreOutput(iscore, fscore)
@@ -1,67 +0,0 @@
1
- """
2
- Source code derived from Alibi-Detect 0.11.4
3
- https://github.com/SeldonIO/alibi-detect/tree/v0.11.4
4
-
5
- Original code Copyright (c) 2023 Seldon Technologies Ltd
6
- Licensed under Apache Software License (Apache 2.0)
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- __all__ = ["OOD_AEGMM"]
12
-
13
- from typing import TYPE_CHECKING, Callable
14
-
15
- from numpy.typing import ArrayLike
16
-
17
- from dataeval.detectors.ood.base import OODScoreOutput
18
- from dataeval.detectors.ood.base_tf import OODBaseGMM
19
- from dataeval.interop import to_numpy
20
- from dataeval.utils.lazy import lazyload
21
- from dataeval.utils.tensorflow._internal.gmm import gmm_energy
22
- from dataeval.utils.tensorflow._internal.loss import LossGMM
23
- from dataeval.utils.tensorflow._internal.utils import predict_batch
24
-
25
- if TYPE_CHECKING:
26
- import tensorflow as tf
27
- import tf_keras as keras
28
-
29
- import dataeval.utils.tensorflow._internal.models as tf_models
30
- else:
31
- tf = lazyload("tensorflow")
32
- keras = lazyload("tf_keras")
33
- tf_models = lazyload("dataeval.utils.tensorflow._internal.models")
34
-
35
-
36
- class OOD_AEGMM(OODBaseGMM):
37
- """
38
- AE with Gaussian Mixture Model based outlier detector.
39
-
40
- Parameters
41
- ----------
42
- model : AEGMM
43
- An AEGMM model.
44
- """
45
-
46
- def __init__(self, model: tf_models.AEGMM) -> None:
47
- super().__init__(model)
48
-
49
- def fit(
50
- self,
51
- x_ref: ArrayLike,
52
- threshold_perc: float = 100.0,
53
- loss_fn: Callable[..., tf.Tensor] | None = None,
54
- optimizer: keras.optimizers.Optimizer | None = None,
55
- epochs: int = 20,
56
- batch_size: int = 64,
57
- verbose: bool = True,
58
- ) -> None:
59
- if loss_fn is None:
60
- loss_fn = LossGMM()
61
- super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
62
-
63
- def _score(self, X: ArrayLike, batch_size: int = int(1e10)) -> OODScoreOutput:
64
- self._validate(X := to_numpy(X))
65
- _, z, _ = predict_batch(X, self.model, batch_size=batch_size)
66
- energy, _ = gmm_energy(z, self._gmm_params, return_mean=False)
67
- return OODScoreOutput(energy.numpy()) # type: ignore
@@ -1,109 +0,0 @@
1
- """
2
- Source code derived from Alibi-Detect 0.11.4
3
- https://github.com/SeldonIO/alibi-detect/tree/v0.11.4
4
-
5
- Original code Copyright (c) 2023 Seldon Technologies Ltd
6
- Licensed under Apache Software License (Apache 2.0)
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- from typing import TYPE_CHECKING, Callable, cast
12
-
13
- from numpy.typing import ArrayLike
14
-
15
- from dataeval.detectors.ood.base import OODBaseMixin, OODFitMixin, OODGMMMixin
16
- from dataeval.interop import to_numpy
17
- from dataeval.utils.lazy import lazyload
18
- from dataeval.utils.tensorflow._internal.gmm import gmm_params
19
- from dataeval.utils.tensorflow._internal.trainer import trainer
20
-
21
- if TYPE_CHECKING:
22
- import tensorflow as tf
23
- import tf_keras as keras
24
- else:
25
- tf = lazyload("tensorflow")
26
- keras = lazyload("tf_keras")
27
-
28
-
29
- class OODBase(OODBaseMixin[keras.Model], OODFitMixin[Callable[..., tf.Tensor], keras.optimizers.Optimizer]):
30
- def __init__(self, model: keras.Model) -> None:
31
- super().__init__(model)
32
-
33
- def fit(
34
- self,
35
- x_ref: ArrayLike,
36
- threshold_perc: float,
37
- loss_fn: Callable[..., tf.Tensor] | None,
38
- optimizer: keras.optimizers.Optimizer | None,
39
- epochs: int,
40
- batch_size: int,
41
- verbose: bool,
42
- ) -> None:
43
- """
44
- Train the model and infer the threshold value.
45
-
46
- Parameters
47
- ----------
48
- x_ref : ArrayLike
49
- Training data.
50
- threshold_perc : float, default 100.0
51
- Percentage of reference data that is normal.
52
- loss_fn : Callable | None, default None
53
- Loss function used for training.
54
- optimizer : Optimizer, default keras.optimizers.Adam
55
- Optimizer used for training.
56
- epochs : int, default 20
57
- Number of training epochs.
58
- batch_size : int, default 64
59
- Batch size used for training.
60
- verbose : bool, default True
61
- Whether to print training progress.
62
- """
63
-
64
- # Train the model
65
- trainer(
66
- model=self.model,
67
- loss_fn=loss_fn,
68
- x_train=to_numpy(x_ref),
69
- y_train=None,
70
- optimizer=optimizer,
71
- epochs=epochs,
72
- batch_size=batch_size,
73
- verbose=verbose,
74
- )
75
-
76
- # Infer the threshold values
77
- self._ref_score = self.score(x_ref, batch_size)
78
- self._threshold_perc = threshold_perc
79
-
80
-
81
- class OODBaseGMM(OODBase, OODGMMMixin[tf.Tensor]):
82
- def fit(
83
- self,
84
- x_ref: ArrayLike,
85
- threshold_perc: float,
86
- loss_fn: Callable[..., tf.Tensor] | None,
87
- optimizer: keras.optimizers.Optimizer | None,
88
- epochs: int,
89
- batch_size: int,
90
- verbose: bool,
91
- ) -> None:
92
- # Train the model
93
- trainer(
94
- model=self.model,
95
- loss_fn=loss_fn,
96
- x_train=to_numpy(x_ref),
97
- optimizer=optimizer,
98
- epochs=epochs,
99
- batch_size=batch_size,
100
- verbose=verbose,
101
- )
102
-
103
- # Calculate the GMM parameters
104
- _, z, gamma = cast(tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.model(x_ref))
105
- self._gmm_params = gmm_params(z, gamma)
106
-
107
- # Infer the threshold values
108
- self._ref_score = self.score(x_ref, batch_size)
109
- self._threshold_perc = threshold_perc