dataeval 0.74.1__py3-none-any.whl → 0.75.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. dataeval/__init__.py +33 -10
  2. dataeval/detectors/__init__.py +2 -2
  3. dataeval/detectors/drift/__init__.py +14 -12
  4. dataeval/detectors/drift/base.py +1 -1
  5. dataeval/detectors/drift/cvm.py +1 -1
  6. dataeval/detectors/drift/ks.py +1 -1
  7. dataeval/detectors/drift/mmd.py +6 -5
  8. dataeval/detectors/drift/torch.py +12 -12
  9. dataeval/detectors/drift/uncertainty.py +3 -2
  10. dataeval/detectors/linters/__init__.py +4 -4
  11. dataeval/detectors/linters/clusterer.py +2 -7
  12. dataeval/detectors/linters/duplicates.py +6 -10
  13. dataeval/detectors/linters/outliers.py +4 -2
  14. dataeval/detectors/ood/__init__.py +3 -10
  15. dataeval/detectors/ood/{ae_torch.py → ae.py} +6 -4
  16. dataeval/detectors/ood/base.py +64 -161
  17. dataeval/detectors/ood/metadata_ks_compare.py +34 -42
  18. dataeval/detectors/ood/metadata_least_likely.py +3 -3
  19. dataeval/detectors/ood/metadata_ood_mi.py +6 -5
  20. dataeval/detectors/ood/mixin.py +146 -0
  21. dataeval/detectors/ood/output.py +63 -0
  22. dataeval/interop.py +16 -3
  23. dataeval/log.py +18 -0
  24. dataeval/metrics/__init__.py +2 -2
  25. dataeval/metrics/bias/__init__.py +9 -12
  26. dataeval/metrics/bias/balance.py +10 -8
  27. dataeval/metrics/bias/coverage.py +52 -4
  28. dataeval/metrics/bias/diversity.py +42 -14
  29. dataeval/metrics/bias/parity.py +15 -12
  30. dataeval/metrics/estimators/__init__.py +2 -2
  31. dataeval/metrics/estimators/ber.py +3 -1
  32. dataeval/metrics/estimators/divergence.py +1 -1
  33. dataeval/metrics/estimators/uap.py +1 -1
  34. dataeval/metrics/stats/__init__.py +18 -18
  35. dataeval/metrics/stats/base.py +4 -4
  36. dataeval/metrics/stats/boxratiostats.py +8 -9
  37. dataeval/metrics/stats/datasetstats.py +10 -14
  38. dataeval/metrics/stats/dimensionstats.py +4 -4
  39. dataeval/metrics/stats/hashstats.py +12 -8
  40. dataeval/metrics/stats/labelstats.py +5 -5
  41. dataeval/metrics/stats/pixelstats.py +4 -9
  42. dataeval/metrics/stats/visualstats.py +4 -9
  43. dataeval/output.py +1 -1
  44. dataeval/utils/__init__.py +4 -13
  45. dataeval/utils/dataset/__init__.py +7 -0
  46. dataeval/utils/{torch → dataset}/datasets.py +2 -0
  47. dataeval/utils/dataset/read.py +63 -0
  48. dataeval/utils/dataset/split.py +527 -0
  49. dataeval/utils/image.py +2 -2
  50. dataeval/utils/metadata.py +310 -5
  51. dataeval/{metrics/bias/metadata_utils.py → utils/plot.py} +1 -104
  52. dataeval/utils/torch/__init__.py +2 -17
  53. dataeval/utils/torch/gmm.py +29 -6
  54. dataeval/utils/torch/{utils.py → internal.py} +82 -58
  55. dataeval/utils/torch/models.py +10 -8
  56. dataeval/utils/torch/trainer.py +6 -85
  57. dataeval/workflows/__init__.py +2 -5
  58. dataeval/workflows/sufficiency.py +16 -6
  59. dataeval-0.75.0.dist-info/METADATA +136 -0
  60. dataeval-0.75.0.dist-info/RECORD +67 -0
  61. dataeval/detectors/ood/base_torch.py +0 -109
  62. dataeval/metrics/bias/metadata_preprocessing.py +0 -285
  63. dataeval/utils/gmm.py +0 -26
  64. dataeval/utils/split_dataset.py +0 -492
  65. dataeval-0.74.1.dist-info/METADATA +0 -120
  66. dataeval-0.74.1.dist-info/RECORD +0 -65
  67. {dataeval-0.74.1.dist-info → dataeval-0.75.0.dist-info}/LICENSE.txt +0 -0
  68. {dataeval-0.74.1.dist-info → dataeval-0.75.0.dist-info}/WHEEL +0 -0
@@ -1,6 +1,8 @@
1
+ """Simple PyTorch model architectures used by DataEval."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
- __all__ = ["AriaAutoencoder", "Encoder", "Decoder"]
5
+ __all__ = ["Autoencoder", "Encoder", "Decoder"]
4
6
 
5
7
  import math
6
8
  from typing import Any
@@ -9,7 +11,7 @@ import torch
9
11
  import torch.nn as nn
10
12
 
11
13
 
12
- class AriaAutoencoder(nn.Module):
14
+ class Autoencoder(nn.Module):
13
15
  """
14
16
  An autoencoder model with a separate encoder and decoder.
15
17
 
@@ -63,7 +65,7 @@ class Encoder(nn.Module):
63
65
  """
64
66
  A simple encoder to be used in an autoencoder model.
65
67
 
66
- This is the encoder used by the AriaAutoencoder model.
68
+ This is the encoder used by the Autoencoder model.
67
69
 
68
70
  Parameters
69
71
  ----------
@@ -104,7 +106,7 @@ class Decoder(nn.Module):
104
106
  """
105
107
  A simple decoder to be used in an autoencoder model.
106
108
 
107
- This is the decoder used by the AriaAutoencoder model.
109
+ This is the decoder used by the Autoencoder model.
108
110
 
109
111
  Parameters
110
112
  ----------
@@ -142,14 +144,14 @@ class Decoder(nn.Module):
142
144
 
143
145
  class AE(nn.Module):
144
146
  """
145
- An autoencoder model with a separate encoder and decoder. Meant to replace the TensorFlow model called AE, which we
146
- used as the core of an autoencoder-based OOD detector, i.e. as an argument to OOD_AE().
147
+ An autoencoder model with a separate encoder and decoder used as the core of an autoencoder-based
148
+ OOD detector, i.e. as an argument to OOD_AE().
147
149
 
148
150
  Parameters
149
151
  ----------
150
152
  input_shape : tuple[int, int, int]
151
153
  Number of input channels, number of rows, number of columns.() Number of examples per batch will be inferred
152
- at runtime.)
154
+ at runtime.)
153
155
  """
154
156
 
155
157
  def __init__(self, input_shape: tuple[int, int, int]) -> None:
@@ -279,7 +281,7 @@ class Decoder_AE(nn.Module):
279
281
  """
280
282
  A simple decoder to be used in an autoencoder model.
281
283
 
282
- This is the decoder used by the AriaAutoencoder model.
284
+ This is the decoder used by the Autoencoder model.
283
285
 
284
286
  Parameters
285
287
  ----------
@@ -1,15 +1,15 @@
1
+ """Utility classes for training PyTorch models."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
- from typing import Any, Callable
5
+ __all__ = ["AETrainer"]
6
+
7
+ from typing import Any
4
8
 
5
9
  import torch
6
10
  import torch.nn as nn
7
- from numpy.typing import NDArray
8
11
  from torch.optim import Adam
9
- from torch.utils.data import DataLoader, Dataset, TensorDataset
10
- from tqdm import tqdm
11
-
12
- __all__ = ["AETrainer", "trainer"]
12
+ from torch.utils.data import DataLoader, Dataset
13
13
 
14
14
 
15
15
  def get_images_from_batch(batch: Any) -> Any:
@@ -176,82 +176,3 @@ class AETrainer:
176
176
  encodings = torch.vstack((encodings, embeddings)) if len(encodings) else embeddings
177
177
 
178
178
  return encodings
179
-
180
-
181
- def trainer(
182
- model: torch.nn.Module,
183
- x_train: NDArray[Any],
184
- y_train: NDArray[Any] | None,
185
- loss_fn: Callable[..., torch.Tensor | torch.nn.Module] | None,
186
- optimizer: torch.optim.Optimizer | None,
187
- preprocess_fn: Callable[[torch.Tensor], torch.Tensor] | None,
188
- epochs: int,
189
- batch_size: int,
190
- device: torch.device,
191
- verbose: bool,
192
- ) -> None:
193
- """
194
- Train Pytorch model.
195
-
196
- Parameters
197
- ----------
198
- model
199
- Model to train.
200
- loss_fn
201
- Loss function used for training.
202
- x_train
203
- Training data.
204
- y_train
205
- Training labels.
206
- optimizer
207
- Optimizer used for training.
208
- preprocess_fn
209
- Preprocessing function applied to each training batch.
210
- epochs
211
- Number of training epochs.
212
- reg_loss_fn
213
- Allows an additional regularisation term to be defined as reg_loss_fn(model)
214
- batch_size
215
- Batch size used for training.
216
- buffer_size
217
- Maximum number of elements that will be buffered when prefetching.
218
- verbose
219
- Whether to print training progress.
220
- """
221
- if optimizer is None:
222
- optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
223
-
224
- if y_train is None:
225
- dataset = TensorDataset(torch.from_numpy(x_train).to(torch.float32))
226
-
227
- else:
228
- dataset = TensorDataset(
229
- torch.from_numpy(x_train).to(torch.float32), torch.from_numpy(y_train).to(torch.float32)
230
- )
231
-
232
- loader = DataLoader(dataset=dataset)
233
-
234
- model = model.to(device)
235
-
236
- # iterate over epochs
237
- loss = torch.nan
238
- disable_tqdm = not verbose
239
- for epoch in (pbar := tqdm(range(epochs), disable=disable_tqdm)):
240
- epoch_loss = loss
241
- for step, data in enumerate(loader):
242
- if step % 250 == 0:
243
- pbar.set_description(f"Epoch: {epoch} ({epoch_loss:.3f}), loss: {loss:.3f}")
244
-
245
- x, y = [d.to(device) for d in data] if len(data) > 1 else (data[0].to(device), None)
246
-
247
- if isinstance(preprocess_fn, Callable):
248
- x = preprocess_fn(x)
249
-
250
- y_hat = model(x)
251
- y = x if y is None else y
252
-
253
- loss = loss_fn(y, y_hat) # type: ignore
254
-
255
- optimizer.zero_grad()
256
- loss.backward()
257
- optimizer.step()
@@ -2,9 +2,6 @@
2
2
  Workflows perform a sequence of actions to analyze the dataset and make predictions.
3
3
  """
4
4
 
5
- from dataeval import _IS_TORCH_AVAILABLE
5
+ __all__ = ["Sufficiency", "SufficiencyOutput"]
6
6
 
7
- if _IS_TORCH_AVAILABLE:
8
- from dataeval.workflows.sufficiency import Sufficiency, SufficiencyOutput
9
-
10
- __all__ = ["Sufficiency", "SufficiencyOutput"]
7
+ from dataeval.workflows.sufficiency import Sufficiency, SufficiencyOutput
@@ -1,16 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ["SufficiencyOutput", "Sufficiency"]
3
+ __all__ = []
4
4
 
5
+ import contextlib
5
6
  import warnings
6
7
  from dataclasses import dataclass
7
8
  from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, TypeVar, cast
8
9
 
9
- import matplotlib.pyplot as plt
10
10
  import numpy as np
11
11
  import torch
12
12
  import torch.nn as nn
13
- from matplotlib.figure import Figure
14
13
  from numpy.typing import ArrayLike, NDArray
15
14
  from scipy.optimize import basinhopping
16
15
  from torch.utils.data import Dataset
@@ -18,6 +17,9 @@ from torch.utils.data import Dataset
18
17
  from dataeval.interop import as_numpy
19
18
  from dataeval.output import Output, set_metadata
20
19
 
20
+ with contextlib.suppress(ImportError):
21
+ from matplotlib.figure import Figure
22
+
21
23
 
22
24
  @dataclass(frozen=True)
23
25
  class SufficiencyOutput(Output):
@@ -97,7 +99,7 @@ class SufficiencyOutput(Output):
97
99
 
98
100
  Returns
99
101
  -------
100
- list[plt.Figure]
102
+ list[Figure]
101
103
  List of Figures for each measure
102
104
 
103
105
  Raises
@@ -344,7 +346,9 @@ def plot_measure(
344
346
  params: NDArray[Any],
345
347
  projection: NDArray[Any],
346
348
  ) -> Figure:
347
- fig = plt.figure()
349
+ import matplotlib.pyplot
350
+
351
+ fig = matplotlib.pyplot.figure()
348
352
  fig = cast(Figure, fig)
349
353
  fig.tight_layout()
350
354
 
@@ -510,7 +514,13 @@ class Sufficiency(Generic[T]):
510
514
  Examples
511
515
  --------
512
516
  >>> suff = Sufficiency(
513
- ... model=model, train_ds=train_ds, test_ds=test_ds, train_fn=train_fn, eval_fn=eval_fn, runs=3, substeps=5
517
+ ... model=model,
518
+ ... train_ds=train_ds,
519
+ ... test_ds=test_ds,
520
+ ... train_fn=train_fn,
521
+ ... eval_fn=eval_fn,
522
+ ... runs=3,
523
+ ... substeps=5,
514
524
  ... )
515
525
  >>> suff.evaluate()
516
526
  SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), params={'test': array([ 0., 42., 0.])}, measures={'test': array([1., 1., 1., 1., 1.])})
@@ -0,0 +1,136 @@
1
+ Metadata-Version: 2.1
2
+ Name: dataeval
3
+ Version: 0.75.0
4
+ Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
+ Home-page: https://dataeval.ai/
6
+ License: MIT
7
+ Author: Andrew Weng
8
+ Author-email: andrew.weng@ariacoustics.com
9
+ Maintainer: ARiA
10
+ Maintainer-email: dataeval@ariacoustics.com
11
+ Requires-Python: >=3.9,<3.13
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3 :: Only
22
+ Classifier: Topic :: Scientific/Engineering
23
+ Provides-Extra: all
24
+ Requires-Dist: matplotlib ; extra == "all"
25
+ Requires-Dist: numpy (>=1.24.3)
26
+ Requires-Dist: pillow (>=10.3.0)
27
+ Requires-Dist: requests
28
+ Requires-Dist: scikit-learn (>=1.5.0)
29
+ Requires-Dist: scipy (>=1.10)
30
+ Requires-Dist: torch (>=2.2.0)
31
+ Requires-Dist: torchvision (>=0.17.0)
32
+ Requires-Dist: tqdm
33
+ Requires-Dist: typing-extensions (>=4.12) ; python_version >= "3.9" and python_version < "4.0"
34
+ Requires-Dist: xxhash (>=3.3)
35
+ Project-URL: Documentation, https://dataeval.readthedocs.io/
36
+ Project-URL: Repository, https://github.com/aria-ml/dataeval/
37
+ Description-Content-Type: text/markdown
38
+
39
+ # DataEval
40
+
41
+ To view our extensive collection of tutorials, how-to's, explanation guides, and reference material, please visit our documentation on **[Read the Docs](https://dataeval.readthedocs.io/)**
42
+
43
+ ## About DataEval
44
+
45
+ <!-- start tagline -->
46
+
47
+ DataEval curates datasets to train and test performant, robust, unbiased and reliable AI models and monitors for data shifts that impact performance of deployed models.
48
+
49
+ <!-- end tagline -->
50
+
51
+ ### Our mission
52
+
53
+ <!-- start needs -->
54
+
55
+ DataEval is an effective, powerful, and reliable set of tools for any T&E engineer. Throughout all stages of the machine learning lifecycle, DataEval supports **model development, data analysis, and monitoring with state-of-the-art algorithms to help you solve difficult problems. With a focus on computer vision tasks, DataEval provides simple, but effective metrics for performance estimation, bias detection, and dataset linting.
56
+
57
+ <!-- end needs -->
58
+
59
+ <!-- start JATIC interop -->
60
+ DataEval is easy to install, supports a wide range of Python versions, and is compatible with many of the most popular packages in the scientific and T&E communities.
61
+ DataEval also has native interopability between JATIC's suite of tools when using MAITE-compliant datasets and models.
62
+ <!-- end JATIC interop -->
63
+
64
+ ## Getting Started
65
+
66
+ **Python versions:** 3.9 - 3.12
67
+
68
+ **Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*, *Gradient*
69
+
70
+ Choose your preferred method of installation below or follow our [installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
71
+
72
+ * [Installing with pip](#installing-with-pip)
73
+ * [Installing with conda/mamba](#installing-with-conda)
74
+ * [Installing from GitHub](#installing-from-github)
75
+
76
+ ### **Installing with pip**
77
+ You can install DataEval directly from pypi.org using the following command. The optional dependencies of DataEval are `all`.
78
+
79
+ ```
80
+ pip install dataeval[all]
81
+ ```
82
+
83
+ ### **Installing with conda**
84
+
85
+ DataEval can be installed in a Conda/Mamba environment using the provided `environment.yaml` file. As some dependencies
86
+ are installed from the `pytorch` channel, the channel is specified in the below example.
87
+
88
+ ```
89
+ micromamba create -f environment\environment.yaml -c pytorch
90
+ ```
91
+
92
+ ### **Installing from GitHub**
93
+
94
+ To install DataEval from source locally on Ubuntu, you will need `git-lfs` to download larger, binary source files and `poetry` for project dependency management.
95
+
96
+ ```
97
+ sudo apt-get install git-lfs
98
+ pip install poetry
99
+ ```
100
+
101
+ Pull the source down and change to the DataEval project directory.
102
+ ```
103
+ git clone https://github.com/aria-ml/dataeval.git
104
+ cd dataeval
105
+ ```
106
+
107
+ Install DataEval with optional dependencies for development.
108
+ ```
109
+ poetry install --all-extras --with dev
110
+ ```
111
+
112
+ Now that DataEval is installed, you can run commands in the poetry virtual environment by prefixing shell commands with `poetry run`, or activate the virtual environment directly in the shell.
113
+ ```
114
+ poetry shell
115
+ ```
116
+
117
+ ## Contact Us
118
+
119
+ If you have any questions, feel free to reach out to the people below:
120
+
121
+ - **POC**: Scott Swan @scott.swan
122
+ - **DPOC**: Andrew Weng @aweng
123
+
124
+ ## Acknowledgement
125
+
126
+ <!-- start attribution -->
127
+
128
+ ### Alibi-Detect
129
+ This project uses code from the [Alibi-Detect](https://github.com/SeldonIO/alibi-detect) Python library developed by SeldonIO.\
130
+ Additional documentation from their developers is available on the [Alibi-Detect documentation page](https://docs.seldon.io/projects/alibi-detect/en/stable/).
131
+
132
+ ### CDAO Funding Acknowledgement
133
+ This material is based upon work supported by the Chief Digital and Artificial Intelligence Office under Contract No. W519TC-23-9-2033. The views and conclusions contained herein are those of the author(s) and should not be interpreted as necessarily representing the official policies or endorsements, either expressed or implied, of the U.S. Government.
134
+
135
+ <!-- end attribution -->
136
+
@@ -0,0 +1,67 @@
1
+ dataeval/__init__.py,sha256=yESctPswyAJ01Hr9k4QUoGZp8D0RtvoQ26k4AFE2vs4,1472
2
+ dataeval/detectors/__init__.py,sha256=iifG-Z08mH5B4QhkKtAieDGJBKldKvmCXpDQJD9qVY8,206
3
+ dataeval/detectors/drift/__init__.py,sha256=wO294Oz--l0GuZTAkBpyGwZphbQsot57HoiEX6kjNOc,652
4
+ dataeval/detectors/drift/base.py,sha256=sX46grnr4DV0WMofLTI2a_tDHR4OLZEUCQrMLePouqg,14468
5
+ dataeval/detectors/drift/cvm.py,sha256=TATS6IOE0INO1pkyRkesgrhDawD_kITsRsOOGVRs420,4132
6
+ dataeval/detectors/drift/ks.py,sha256=3Jgh5W7pC1hO1yZPCiXc47snlSdXv5BIG8sCyRRz-Ec,4220
7
+ dataeval/detectors/drift/mmd.py,sha256=lD__AouWlYWCJOD0eNNEhmLTnUPwNTBU6OCgITcpw40,7592
8
+ dataeval/detectors/drift/torch.py,sha256=ykD-Nggys5T9FTGXXbYYOi2WRKwEzEjXhL8ZueVmTxU,7659
9
+ dataeval/detectors/drift/uncertainty.py,sha256=Pdim80_-ainvOX5-7fhH9cvblYI2d-zocEwZO-JfCg4,5345
10
+ dataeval/detectors/drift/updates.py,sha256=UJ0z5hlunRi7twnkLABfdJG3tT2EqX4y9IGx8_USYvo,1780
11
+ dataeval/detectors/linters/__init__.py,sha256=CZV5naeYQYL3sHXO_CXB26AXkyTeKHI-TMaewtEs8Ag,483
12
+ dataeval/detectors/linters/clusterer.py,sha256=1qIQo5NuJkx-phKFWuXkUpUJLcqTt92L8Cpv3AmO3xQ,20929
13
+ dataeval/detectors/linters/duplicates.py,sha256=pcCRN27IuGa6ASkiFG73kYdI8_X0j12INbkD9GOlWPs,5614
14
+ dataeval/detectors/linters/merged_stats.py,sha256=X-bDTwjyR8RuVmzxLaHZmQ5nI3oOWvsqVlitdSncapk,1355
15
+ dataeval/detectors/linters/outliers.py,sha256=Fn6R_7mGOrWlTRCXFrjHvIFNELN6CTosoJgzDr8cVr0,10253
16
+ dataeval/detectors/ood/__init__.py,sha256=hTeR-Aqt6SKWsqFusaKiw_TlnFPe_sV3fQ7NKUTzZrU,292
17
+ dataeval/detectors/ood/ae.py,sha256=SL8oKTERhMwaZTQWwDhQQ6H07UKj8ozXqEWO3TaOAos,2151
18
+ dataeval/detectors/ood/base.py,sha256=6gUkbGE6PbKmA899rXOTOIeT8u_gaD0DNDQV8Wyfk5Y,3421
19
+ dataeval/detectors/ood/metadata_ks_compare.py,sha256=-hEhDNXFC7X8wmFeoigO7A7Qn90vRLroN_nKDwNgjnE,5204
20
+ dataeval/detectors/ood/metadata_least_likely.py,sha256=rb8GOgsrlrEzc6fxccdmyZQ5PC7HtTsTY8U97D-h5OU,5088
21
+ dataeval/detectors/ood/metadata_ood_mi.py,sha256=7_Sdzf7-x1TlrIQvSyOIB98C8_UQhUwmwFQmZ9_q1Uc,4042
22
+ dataeval/detectors/ood/mixin.py,sha256=Ia-rJF6rtGhE8uavijdbzOha3ueFk2CFfA0Ah_mnF40,4976
23
+ dataeval/detectors/ood/output.py,sha256=8UQbtudQ0gSeq_hQV67IE5SfHednaiGUHv9MideETdk,1710
24
+ dataeval/interop.py,sha256=GLziERWQQGwUO4Nb-uHpbLlvBOT2WF2GVilTHmsDq8w,2279
25
+ dataeval/log.py,sha256=Mn5bRWO0cgtAYd5VGYSFiPgu57ta3zoktrtHAZ1m3dU,357
26
+ dataeval/metrics/__init__.py,sha256=p-lRjm0oVHD3cXZeEajTfuGTuQOCCVHbJ8CqAI_GHVY,238
27
+ dataeval/metrics/bias/__init__.py,sha256=knYgCdeHredaHI6KGdjiYM6ViPfDf8NW35xkKiiGlVM,599
28
+ dataeval/metrics/bias/balance.py,sha256=od3gcejOqJDDymy09OWSxzqkBNyh7Vf3aXN9o6IPKHY,9151
29
+ dataeval/metrics/bias/coverage.py,sha256=k8TJAsUWlLgn_-JEtRWIOwhtMRwXmyGzLDndGxNTsAU,5745
30
+ dataeval/metrics/bias/diversity.py,sha256=upj-Gx_4-bBF-4dDaUSuURIbP98Ghk-BSCK5ZJNGMEg,8318
31
+ dataeval/metrics/bias/parity.py,sha256=wVMfzKFqzHkp3SNUJFjRH_Eej9DIg-xAhHkShIAek68,12755
32
+ dataeval/metrics/estimators/__init__.py,sha256=oY_9jX7V-Kg7-4KpvMNB4rUhsk8QTA0DIoM8d2VtVIg,380
33
+ dataeval/metrics/estimators/ber.py,sha256=p3KaY-rnK45CUDaqx-55wWG6yHcDnH6Kkkt9r6FkmZY,5003
34
+ dataeval/metrics/estimators/divergence.py,sha256=QYkOs7In9un0tYHztwZ5kNqiWVNS3Lgmxn1716H8HG4,4243
35
+ dataeval/metrics/estimators/uap.py,sha256=ELa5MixMOJZoW5rUuVLOXynfLMbVjxb-r7VYF5qqXrw,2139
36
+ dataeval/metrics/stats/__init__.py,sha256=Js_mklHJbHwOXJtMFo9NIyePZLwLZL-jruwmcjoLsZc,1086
37
+ dataeval/metrics/stats/base.py,sha256=U0yPaRSHuPGZk3A7hl8ghJCki7iBtW5wM1eZvElu1_w,12038
38
+ dataeval/metrics/stats/boxratiostats.py,sha256=fNzHT_nZX0MYeHkWRdcfEz2mtRC2d1JxpoK3l4EBrQc,6301
39
+ dataeval/metrics/stats/datasetstats.py,sha256=krOm48yjyzYOWKLaWFqHAQPmuhiN4manif7ZXh2Ohhg,5828
40
+ dataeval/metrics/stats/dimensionstats.py,sha256=_mN7wHencHh4UNd9XUflhq0sIa9yLPk3yHqmossDEGk,3985
41
+ dataeval/metrics/stats/hashstats.py,sha256=_zZOwnQDlpMoPyqbOV2v9V_Uqox0c4vX2Khv5u_fAk8,5068
42
+ dataeval/metrics/stats/labelstats.py,sha256=mLH02Xy_uT-qN7HXuXEgs786T2Xr0BMudweBDeEWd5I,4065
43
+ dataeval/metrics/stats/pixelstats.py,sha256=t8abfenA79x87CMqPuKtddglD3l_LA6nXS4K_FlL4-k,4148
44
+ dataeval/metrics/stats/visualstats.py,sha256=UU0oa5BWuIOTDM1H1ZnlhYyu8ruVEnaLPCDOsbm-q1c,4546
45
+ dataeval/output.py,sha256=hR5TJ67f7FgrZO9Du46aw-jvRpMjOimSgJSau4ZNK44,3565
46
+ dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
+ dataeval/utils/__init__.py,sha256=fBpXVWzNaXySTuZWsD8Jg9LLHzb23nz_PfdxPD_gc8c,279
48
+ dataeval/utils/dataset/__init__.py,sha256=IvRauQaa0CzJ5nZrfTSjGoaaKelyJcQDe3OPRw0-NXs,332
49
+ dataeval/utils/dataset/datasets.py,sha256=7tSqN3d8UncqmXh4eiEwarXgVxc4sMuIKPTqBCE0pN8,15080
50
+ dataeval/utils/dataset/read.py,sha256=tt-9blXzYLRb4Vgv6DrFj2ikUSvBF0-qTSnvvYec_2U,1523
51
+ dataeval/utils/dataset/split.py,sha256=FpxHxmewjqIj6hikCsamNQTq877qu4HfKnzArOyvmyY,18957
52
+ dataeval/utils/image.py,sha256=AQljELyMFkYsf2AoNOH5dZG8DYE4hPw0MCk85eIXqAw,1926
53
+ dataeval/utils/metadata.py,sha256=mhMhBgb7nAIIljDdecOqiZ1zsYagE6h8DKxE_DFDW-E,22270
54
+ dataeval/utils/plot.py,sha256=jQSiqDArFOlKZaIbv4Viso_ShU3LnZE-Y2qXKuKsa8M,3790
55
+ dataeval/utils/shared.py,sha256=xvF3VLfyheVwJtdtDrneOobkKf7t-JTmf_w91FWXmqo,3616
56
+ dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
57
+ dataeval/utils/torch/blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
58
+ dataeval/utils/torch/gmm.py,sha256=fQ8CBO4Bf6i9N1CZdeJ8VJP25fsPjgMextQkondwgvo,3693
59
+ dataeval/utils/torch/internal.py,sha256=qAzQTwTI9Qy6f01Olw3d1TIJ4HoWGf0gQzgWVcdD2x4,6653
60
+ dataeval/utils/torch/models.py,sha256=Df3B_9x5uu-Y5ZOyhRZYpKJnDvxt0hgMeJLy1E4oxpU,8519
61
+ dataeval/utils/torch/trainer.py,sha256=Qay0LK63RuyoGYiJ5zI2C5BVym309ORvp6shhpcrIU4,5589
62
+ dataeval/workflows/__init__.py,sha256=L9yfBipNFGnYuN2JbMknIHDvziwfa2XAGFnOwifZbls,216
63
+ dataeval/workflows/sufficiency.py,sha256=nL99iDlu2bF_9VGu3ioLFDJBgBBJEdwEXROxXm_0sfY,18673
64
+ dataeval-0.75.0.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
65
+ dataeval-0.75.0.dist-info/METADATA,sha256=6m2O6vreJR3Lq1_BXEU6DHnK2C5L_q5YAPofIl4kxCw,5410
66
+ dataeval-0.75.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
67
+ dataeval-0.75.0.dist-info/RECORD,,
@@ -1,109 +0,0 @@
1
- """
2
- Source code derived from Alibi-Detect 0.11.4
3
- https://github.com/SeldonIO/alibi-detect/tree/v0.11.4
4
-
5
- Original code Copyright (c) 2023 Seldon Technologies Ltd
6
- Licensed under Apache Software License (Apache 2.0)
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- from typing import Callable, cast
12
-
13
- import torch
14
- from numpy.typing import ArrayLike
15
-
16
- from dataeval.detectors.drift.torch import get_device
17
- from dataeval.detectors.ood.base import OODBaseMixin, OODFitMixin, OODGMMMixin
18
- from dataeval.interop import to_numpy
19
- from dataeval.utils.torch.gmm import gmm_params
20
- from dataeval.utils.torch.trainer import trainer
21
-
22
-
23
- class OODBase(OODBaseMixin[torch.nn.Module], OODFitMixin[Callable[..., torch.nn.Module], torch.optim.Optimizer]):
24
- def __init__(self, model: torch.nn.Module, device: str | torch.device | None = None) -> None:
25
- self.device: torch.device = get_device(device)
26
- super().__init__(model)
27
-
28
- def fit(
29
- self,
30
- x_ref: ArrayLike,
31
- threshold_perc: float,
32
- loss_fn: Callable[..., torch.nn.Module] | None,
33
- optimizer: torch.optim.Optimizer | None,
34
- epochs: int,
35
- batch_size: int,
36
- verbose: bool,
37
- ) -> None:
38
- """
39
- Train the model and infer the threshold value.
40
-
41
- Parameters
42
- ----------
43
- x_ref : ArrayLike
44
- Training data.
45
- threshold_perc : float, default 100.0
46
- Percentage of reference data that is normal.
47
- loss_fn : Callable | None, default None
48
- Loss function used for training.
49
- optimizer : Optimizer, default keras.optimizers.Adam
50
- Optimizer used for training.
51
- epochs : int, default 20
52
- Number of training epochs.
53
- batch_size : int, default 64
54
- Batch size used for training.
55
- verbose : bool, default True
56
- Whether to print training progress.
57
- """
58
-
59
- # Train the model
60
- trainer(
61
- model=self.model,
62
- x_train=to_numpy(x_ref),
63
- y_train=None,
64
- loss_fn=loss_fn,
65
- optimizer=optimizer,
66
- preprocess_fn=None,
67
- epochs=epochs,
68
- batch_size=batch_size,
69
- device=self.device,
70
- verbose=verbose,
71
- )
72
-
73
- # Infer the threshold values
74
- self._ref_score = self.score(x_ref, batch_size)
75
- self._threshold_perc = threshold_perc
76
-
77
-
78
- class OODBaseGMM(OODBase, OODGMMMixin[torch.Tensor]):
79
- def fit(
80
- self,
81
- x_ref: ArrayLike,
82
- threshold_perc: float,
83
- loss_fn: Callable[..., torch.nn.Module] | None,
84
- optimizer: torch.optim.Optimizer | None,
85
- epochs: int,
86
- batch_size: int,
87
- verbose: bool,
88
- ) -> None:
89
- # Train the model
90
- trainer(
91
- model=self.model,
92
- x_train=to_numpy(x_ref),
93
- y_train=None,
94
- loss_fn=loss_fn,
95
- optimizer=optimizer,
96
- preprocess_fn=None,
97
- epochs=epochs,
98
- batch_size=batch_size,
99
- device=self.device,
100
- verbose=verbose,
101
- )
102
-
103
- # Calculate the GMM parameters
104
- _, z, gamma = cast(tuple[torch.Tensor, torch.Tensor, torch.Tensor], self.model(x_ref))
105
- self._gmm_params = gmm_params(z, gamma)
106
-
107
- # Infer the threshold values
108
- self._ref_score = self.score(x_ref, batch_size)
109
- self._threshold_perc = threshold_perc