dataeval 0.74.2__py3-none-any.whl → 0.75.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +27 -23
- dataeval/detectors/__init__.py +2 -2
- dataeval/detectors/drift/__init__.py +14 -12
- dataeval/detectors/drift/base.py +1 -1
- dataeval/detectors/drift/cvm.py +1 -1
- dataeval/detectors/drift/ks.py +1 -1
- dataeval/detectors/drift/mmd.py +6 -5
- dataeval/detectors/drift/torch.py +12 -12
- dataeval/detectors/drift/uncertainty.py +3 -2
- dataeval/detectors/linters/__init__.py +4 -4
- dataeval/detectors/linters/clusterer.py +2 -7
- dataeval/detectors/linters/duplicates.py +6 -10
- dataeval/detectors/linters/outliers.py +4 -2
- dataeval/detectors/ood/__init__.py +3 -10
- dataeval/detectors/ood/{ae_torch.py → ae.py} +6 -4
- dataeval/detectors/ood/base.py +64 -161
- dataeval/detectors/ood/metadata_ks_compare.py +34 -42
- dataeval/detectors/ood/metadata_least_likely.py +3 -3
- dataeval/detectors/ood/metadata_ood_mi.py +6 -5
- dataeval/detectors/ood/mixin.py +146 -0
- dataeval/detectors/ood/output.py +63 -0
- dataeval/interop.py +6 -5
- dataeval/{logging.py → log.py} +2 -0
- dataeval/metrics/__init__.py +2 -2
- dataeval/metrics/bias/__init__.py +9 -12
- dataeval/metrics/bias/balance.py +10 -8
- dataeval/metrics/bias/coverage.py +52 -4
- dataeval/metrics/bias/diversity.py +42 -14
- dataeval/metrics/bias/parity.py +15 -12
- dataeval/metrics/estimators/__init__.py +2 -2
- dataeval/metrics/estimators/ber.py +3 -1
- dataeval/metrics/estimators/divergence.py +1 -1
- dataeval/metrics/estimators/uap.py +1 -1
- dataeval/metrics/stats/__init__.py +18 -18
- dataeval/metrics/stats/base.py +4 -4
- dataeval/metrics/stats/boxratiostats.py +8 -9
- dataeval/metrics/stats/datasetstats.py +10 -14
- dataeval/metrics/stats/dimensionstats.py +4 -4
- dataeval/metrics/stats/hashstats.py +12 -8
- dataeval/metrics/stats/labelstats.py +5 -5
- dataeval/metrics/stats/pixelstats.py +4 -9
- dataeval/metrics/stats/visualstats.py +4 -9
- dataeval/utils/__init__.py +4 -13
- dataeval/utils/dataset/__init__.py +7 -0
- dataeval/utils/{torch → dataset}/datasets.py +2 -0
- dataeval/utils/dataset/read.py +63 -0
- dataeval/utils/{split_dataset.py → dataset/split.py} +38 -30
- dataeval/utils/image.py +2 -2
- dataeval/utils/metadata.py +310 -5
- dataeval/{metrics/bias/metadata_utils.py → utils/plot.py} +1 -104
- dataeval/utils/torch/__init__.py +2 -17
- dataeval/utils/torch/gmm.py +29 -6
- dataeval/utils/torch/{utils.py → internal.py} +82 -58
- dataeval/utils/torch/models.py +10 -8
- dataeval/utils/torch/trainer.py +6 -85
- dataeval/workflows/__init__.py +2 -5
- dataeval/workflows/sufficiency.py +16 -6
- dataeval-0.75.0.dist-info/METADATA +136 -0
- dataeval-0.75.0.dist-info/RECORD +67 -0
- dataeval/detectors/ood/base_torch.py +0 -109
- dataeval/metrics/bias/metadata_preprocessing.py +0 -285
- dataeval/utils/gmm.py +0 -26
- dataeval-0.74.2.dist-info/METADATA +0 -120
- dataeval-0.74.2.dist-info/RECORD +0 -66
- {dataeval-0.74.2.dist-info → dataeval-0.75.0.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.74.2.dist-info → dataeval-0.75.0.dist-info}/WHEEL +0 -0
dataeval/utils/torch/models.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
+
"""Simple PyTorch model architectures used by DataEval."""
|
2
|
+
|
1
3
|
from __future__ import annotations
|
2
4
|
|
3
|
-
__all__ = ["
|
5
|
+
__all__ = ["Autoencoder", "Encoder", "Decoder"]
|
4
6
|
|
5
7
|
import math
|
6
8
|
from typing import Any
|
@@ -9,7 +11,7 @@ import torch
|
|
9
11
|
import torch.nn as nn
|
10
12
|
|
11
13
|
|
12
|
-
class
|
14
|
+
class Autoencoder(nn.Module):
|
13
15
|
"""
|
14
16
|
An autoencoder model with a separate encoder and decoder.
|
15
17
|
|
@@ -63,7 +65,7 @@ class Encoder(nn.Module):
|
|
63
65
|
"""
|
64
66
|
A simple encoder to be used in an autoencoder model.
|
65
67
|
|
66
|
-
This is the encoder used by the
|
68
|
+
This is the encoder used by the Autoencoder model.
|
67
69
|
|
68
70
|
Parameters
|
69
71
|
----------
|
@@ -104,7 +106,7 @@ class Decoder(nn.Module):
|
|
104
106
|
"""
|
105
107
|
A simple decoder to be used in an autoencoder model.
|
106
108
|
|
107
|
-
This is the decoder used by the
|
109
|
+
This is the decoder used by the Autoencoder model.
|
108
110
|
|
109
111
|
Parameters
|
110
112
|
----------
|
@@ -142,14 +144,14 @@ class Decoder(nn.Module):
|
|
142
144
|
|
143
145
|
class AE(nn.Module):
|
144
146
|
"""
|
145
|
-
An autoencoder model with a separate encoder and decoder
|
146
|
-
|
147
|
+
An autoencoder model with a separate encoder and decoder used as the core of an autoencoder-based
|
148
|
+
OOD detector, i.e. as an argument to OOD_AE().
|
147
149
|
|
148
150
|
Parameters
|
149
151
|
----------
|
150
152
|
input_shape : tuple[int, int, int]
|
151
153
|
Number of input channels, number of rows, number of columns.() Number of examples per batch will be inferred
|
152
|
-
|
154
|
+
at runtime.)
|
153
155
|
"""
|
154
156
|
|
155
157
|
def __init__(self, input_shape: tuple[int, int, int]) -> None:
|
@@ -279,7 +281,7 @@ class Decoder_AE(nn.Module):
|
|
279
281
|
"""
|
280
282
|
A simple decoder to be used in an autoencoder model.
|
281
283
|
|
282
|
-
This is the decoder used by the
|
284
|
+
This is the decoder used by the Autoencoder model.
|
283
285
|
|
284
286
|
Parameters
|
285
287
|
----------
|
dataeval/utils/torch/trainer.py
CHANGED
@@ -1,15 +1,15 @@
|
|
1
|
+
"""Utility classes for training PyTorch models."""
|
2
|
+
|
1
3
|
from __future__ import annotations
|
2
4
|
|
3
|
-
|
5
|
+
__all__ = ["AETrainer"]
|
6
|
+
|
7
|
+
from typing import Any
|
4
8
|
|
5
9
|
import torch
|
6
10
|
import torch.nn as nn
|
7
|
-
from numpy.typing import NDArray
|
8
11
|
from torch.optim import Adam
|
9
|
-
from torch.utils.data import DataLoader, Dataset
|
10
|
-
from tqdm import tqdm
|
11
|
-
|
12
|
-
__all__ = ["AETrainer", "trainer"]
|
12
|
+
from torch.utils.data import DataLoader, Dataset
|
13
13
|
|
14
14
|
|
15
15
|
def get_images_from_batch(batch: Any) -> Any:
|
@@ -176,82 +176,3 @@ class AETrainer:
|
|
176
176
|
encodings = torch.vstack((encodings, embeddings)) if len(encodings) else embeddings
|
177
177
|
|
178
178
|
return encodings
|
179
|
-
|
180
|
-
|
181
|
-
def trainer(
|
182
|
-
model: torch.nn.Module,
|
183
|
-
x_train: NDArray[Any],
|
184
|
-
y_train: NDArray[Any] | None,
|
185
|
-
loss_fn: Callable[..., torch.Tensor | torch.nn.Module] | None,
|
186
|
-
optimizer: torch.optim.Optimizer | None,
|
187
|
-
preprocess_fn: Callable[[torch.Tensor], torch.Tensor] | None,
|
188
|
-
epochs: int,
|
189
|
-
batch_size: int,
|
190
|
-
device: torch.device,
|
191
|
-
verbose: bool,
|
192
|
-
) -> None:
|
193
|
-
"""
|
194
|
-
Train Pytorch model.
|
195
|
-
|
196
|
-
Parameters
|
197
|
-
----------
|
198
|
-
model
|
199
|
-
Model to train.
|
200
|
-
loss_fn
|
201
|
-
Loss function used for training.
|
202
|
-
x_train
|
203
|
-
Training data.
|
204
|
-
y_train
|
205
|
-
Training labels.
|
206
|
-
optimizer
|
207
|
-
Optimizer used for training.
|
208
|
-
preprocess_fn
|
209
|
-
Preprocessing function applied to each training batch.
|
210
|
-
epochs
|
211
|
-
Number of training epochs.
|
212
|
-
reg_loss_fn
|
213
|
-
Allows an additional regularisation term to be defined as reg_loss_fn(model)
|
214
|
-
batch_size
|
215
|
-
Batch size used for training.
|
216
|
-
buffer_size
|
217
|
-
Maximum number of elements that will be buffered when prefetching.
|
218
|
-
verbose
|
219
|
-
Whether to print training progress.
|
220
|
-
"""
|
221
|
-
if optimizer is None:
|
222
|
-
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
223
|
-
|
224
|
-
if y_train is None:
|
225
|
-
dataset = TensorDataset(torch.from_numpy(x_train).to(torch.float32))
|
226
|
-
|
227
|
-
else:
|
228
|
-
dataset = TensorDataset(
|
229
|
-
torch.from_numpy(x_train).to(torch.float32), torch.from_numpy(y_train).to(torch.float32)
|
230
|
-
)
|
231
|
-
|
232
|
-
loader = DataLoader(dataset=dataset)
|
233
|
-
|
234
|
-
model = model.to(device)
|
235
|
-
|
236
|
-
# iterate over epochs
|
237
|
-
loss = torch.nan
|
238
|
-
disable_tqdm = not verbose
|
239
|
-
for epoch in (pbar := tqdm(range(epochs), disable=disable_tqdm)):
|
240
|
-
epoch_loss = loss
|
241
|
-
for step, data in enumerate(loader):
|
242
|
-
if step % 250 == 0:
|
243
|
-
pbar.set_description(f"Epoch: {epoch} ({epoch_loss:.3f}), loss: {loss:.3f}")
|
244
|
-
|
245
|
-
x, y = [d.to(device) for d in data] if len(data) > 1 else (data[0].to(device), None)
|
246
|
-
|
247
|
-
if isinstance(preprocess_fn, Callable):
|
248
|
-
x = preprocess_fn(x)
|
249
|
-
|
250
|
-
y_hat = model(x)
|
251
|
-
y = x if y is None else y
|
252
|
-
|
253
|
-
loss = loss_fn(y, y_hat) # type: ignore
|
254
|
-
|
255
|
-
optimizer.zero_grad()
|
256
|
-
loss.backward()
|
257
|
-
optimizer.step()
|
dataeval/workflows/__init__.py
CHANGED
@@ -2,9 +2,6 @@
|
|
2
2
|
Workflows perform a sequence of actions to analyze the dataset and make predictions.
|
3
3
|
"""
|
4
4
|
|
5
|
-
|
5
|
+
__all__ = ["Sufficiency", "SufficiencyOutput"]
|
6
6
|
|
7
|
-
|
8
|
-
from dataeval.workflows.sufficiency import Sufficiency, SufficiencyOutput
|
9
|
-
|
10
|
-
__all__ = ["Sufficiency", "SufficiencyOutput"]
|
7
|
+
from dataeval.workflows.sufficiency import Sufficiency, SufficiencyOutput
|
@@ -1,16 +1,15 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
__all__ = [
|
3
|
+
__all__ = []
|
4
4
|
|
5
|
+
import contextlib
|
5
6
|
import warnings
|
6
7
|
from dataclasses import dataclass
|
7
8
|
from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, TypeVar, cast
|
8
9
|
|
9
|
-
import matplotlib.pyplot as plt
|
10
10
|
import numpy as np
|
11
11
|
import torch
|
12
12
|
import torch.nn as nn
|
13
|
-
from matplotlib.figure import Figure
|
14
13
|
from numpy.typing import ArrayLike, NDArray
|
15
14
|
from scipy.optimize import basinhopping
|
16
15
|
from torch.utils.data import Dataset
|
@@ -18,6 +17,9 @@ from torch.utils.data import Dataset
|
|
18
17
|
from dataeval.interop import as_numpy
|
19
18
|
from dataeval.output import Output, set_metadata
|
20
19
|
|
20
|
+
with contextlib.suppress(ImportError):
|
21
|
+
from matplotlib.figure import Figure
|
22
|
+
|
21
23
|
|
22
24
|
@dataclass(frozen=True)
|
23
25
|
class SufficiencyOutput(Output):
|
@@ -97,7 +99,7 @@ class SufficiencyOutput(Output):
|
|
97
99
|
|
98
100
|
Returns
|
99
101
|
-------
|
100
|
-
list[
|
102
|
+
list[Figure]
|
101
103
|
List of Figures for each measure
|
102
104
|
|
103
105
|
Raises
|
@@ -344,7 +346,9 @@ def plot_measure(
|
|
344
346
|
params: NDArray[Any],
|
345
347
|
projection: NDArray[Any],
|
346
348
|
) -> Figure:
|
347
|
-
|
349
|
+
import matplotlib.pyplot
|
350
|
+
|
351
|
+
fig = matplotlib.pyplot.figure()
|
348
352
|
fig = cast(Figure, fig)
|
349
353
|
fig.tight_layout()
|
350
354
|
|
@@ -510,7 +514,13 @@ class Sufficiency(Generic[T]):
|
|
510
514
|
Examples
|
511
515
|
--------
|
512
516
|
>>> suff = Sufficiency(
|
513
|
-
... model=model,
|
517
|
+
... model=model,
|
518
|
+
... train_ds=train_ds,
|
519
|
+
... test_ds=test_ds,
|
520
|
+
... train_fn=train_fn,
|
521
|
+
... eval_fn=eval_fn,
|
522
|
+
... runs=3,
|
523
|
+
... substeps=5,
|
514
524
|
... )
|
515
525
|
>>> suff.evaluate()
|
516
526
|
SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), params={'test': array([ 0., 42., 0.])}, measures={'test': array([1., 1., 1., 1., 1.])})
|
@@ -0,0 +1,136 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: dataeval
|
3
|
+
Version: 0.75.0
|
4
|
+
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
|
+
Home-page: https://dataeval.ai/
|
6
|
+
License: MIT
|
7
|
+
Author: Andrew Weng
|
8
|
+
Author-email: andrew.weng@ariacoustics.com
|
9
|
+
Maintainer: ARiA
|
10
|
+
Maintainer-email: dataeval@ariacoustics.com
|
11
|
+
Requires-Python: >=3.9,<3.13
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
15
|
+
Classifier: Operating System :: OS Independent
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
22
|
+
Classifier: Topic :: Scientific/Engineering
|
23
|
+
Provides-Extra: all
|
24
|
+
Requires-Dist: matplotlib ; extra == "all"
|
25
|
+
Requires-Dist: numpy (>=1.24.3)
|
26
|
+
Requires-Dist: pillow (>=10.3.0)
|
27
|
+
Requires-Dist: requests
|
28
|
+
Requires-Dist: scikit-learn (>=1.5.0)
|
29
|
+
Requires-Dist: scipy (>=1.10)
|
30
|
+
Requires-Dist: torch (>=2.2.0)
|
31
|
+
Requires-Dist: torchvision (>=0.17.0)
|
32
|
+
Requires-Dist: tqdm
|
33
|
+
Requires-Dist: typing-extensions (>=4.12) ; python_version >= "3.9" and python_version < "4.0"
|
34
|
+
Requires-Dist: xxhash (>=3.3)
|
35
|
+
Project-URL: Documentation, https://dataeval.readthedocs.io/
|
36
|
+
Project-URL: Repository, https://github.com/aria-ml/dataeval/
|
37
|
+
Description-Content-Type: text/markdown
|
38
|
+
|
39
|
+
# DataEval
|
40
|
+
|
41
|
+
To view our extensive collection of tutorials, how-to's, explanation guides, and reference material, please visit our documentation on **[Read the Docs](https://dataeval.readthedocs.io/)**
|
42
|
+
|
43
|
+
## About DataEval
|
44
|
+
|
45
|
+
<!-- start tagline -->
|
46
|
+
|
47
|
+
DataEval curates datasets to train and test performant, robust, unbiased and reliable AI models and monitors for data shifts that impact performance of deployed models.
|
48
|
+
|
49
|
+
<!-- end tagline -->
|
50
|
+
|
51
|
+
### Our mission
|
52
|
+
|
53
|
+
<!-- start needs -->
|
54
|
+
|
55
|
+
DataEval is an effective, powerful, and reliable set of tools for any T&E engineer. Throughout all stages of the machine learning lifecycle, DataEval supports **model development, data analysis, and monitoring with state-of-the-art algorithms to help you solve difficult problems. With a focus on computer vision tasks, DataEval provides simple, but effective metrics for performance estimation, bias detection, and dataset linting.
|
56
|
+
|
57
|
+
<!-- end needs -->
|
58
|
+
|
59
|
+
<!-- start JATIC interop -->
|
60
|
+
DataEval is easy to install, supports a wide range of Python versions, and is compatible with many of the most popular packages in the scientific and T&E communities.
|
61
|
+
DataEval also has native interopability between JATIC's suite of tools when using MAITE-compliant datasets and models.
|
62
|
+
<!-- end JATIC interop -->
|
63
|
+
|
64
|
+
## Getting Started
|
65
|
+
|
66
|
+
**Python versions:** 3.9 - 3.12
|
67
|
+
|
68
|
+
**Supported packages**: *NumPy*, *Pandas*, *Sci-kit learn*, *MAITE*, *NRTK*, *Gradient*
|
69
|
+
|
70
|
+
Choose your preferred method of installation below or follow our [installation guide](https://dataeval.readthedocs.io/en/v0.74.2/installation.html).
|
71
|
+
|
72
|
+
* [Installing with pip](#installing-with-pip)
|
73
|
+
* [Installing with conda/mamba](#installing-with-conda)
|
74
|
+
* [Installing from GitHub](#installing-from-github)
|
75
|
+
|
76
|
+
### **Installing with pip**
|
77
|
+
You can install DataEval directly from pypi.org using the following command. The optional dependencies of DataEval are `all`.
|
78
|
+
|
79
|
+
```
|
80
|
+
pip install dataeval[all]
|
81
|
+
```
|
82
|
+
|
83
|
+
### **Installing with conda**
|
84
|
+
|
85
|
+
DataEval can be installed in a Conda/Mamba environment using the provided `environment.yaml` file. As some dependencies
|
86
|
+
are installed from the `pytorch` channel, the channel is specified in the below example.
|
87
|
+
|
88
|
+
```
|
89
|
+
micromamba create -f environment\environment.yaml -c pytorch
|
90
|
+
```
|
91
|
+
|
92
|
+
### **Installing from GitHub**
|
93
|
+
|
94
|
+
To install DataEval from source locally on Ubuntu, you will need `git-lfs` to download larger, binary source files and `poetry` for project dependency management.
|
95
|
+
|
96
|
+
```
|
97
|
+
sudo apt-get install git-lfs
|
98
|
+
pip install poetry
|
99
|
+
```
|
100
|
+
|
101
|
+
Pull the source down and change to the DataEval project directory.
|
102
|
+
```
|
103
|
+
git clone https://github.com/aria-ml/dataeval.git
|
104
|
+
cd dataeval
|
105
|
+
```
|
106
|
+
|
107
|
+
Install DataEval with optional dependencies for development.
|
108
|
+
```
|
109
|
+
poetry install --all-extras --with dev
|
110
|
+
```
|
111
|
+
|
112
|
+
Now that DataEval is installed, you can run commands in the poetry virtual environment by prefixing shell commands with `poetry run`, or activate the virtual environment directly in the shell.
|
113
|
+
```
|
114
|
+
poetry shell
|
115
|
+
```
|
116
|
+
|
117
|
+
## Contact Us
|
118
|
+
|
119
|
+
If you have any questions, feel free to reach out to the people below:
|
120
|
+
|
121
|
+
- **POC**: Scott Swan @scott.swan
|
122
|
+
- **DPOC**: Andrew Weng @aweng
|
123
|
+
|
124
|
+
## Acknowledgement
|
125
|
+
|
126
|
+
<!-- start attribution -->
|
127
|
+
|
128
|
+
### Alibi-Detect
|
129
|
+
This project uses code from the [Alibi-Detect](https://github.com/SeldonIO/alibi-detect) Python library developed by SeldonIO.\
|
130
|
+
Additional documentation from their developers is available on the [Alibi-Detect documentation page](https://docs.seldon.io/projects/alibi-detect/en/stable/).
|
131
|
+
|
132
|
+
### CDAO Funding Acknowledgement
|
133
|
+
This material is based upon work supported by the Chief Digital and Artificial Intelligence Office under Contract No. W519TC-23-9-2033. The views and conclusions contained herein are those of the author(s) and should not be interpreted as necessarily representing the official policies or endorsements, either expressed or implied, of the U.S. Government.
|
134
|
+
|
135
|
+
<!-- end attribution -->
|
136
|
+
|
@@ -0,0 +1,67 @@
|
|
1
|
+
dataeval/__init__.py,sha256=yESctPswyAJ01Hr9k4QUoGZp8D0RtvoQ26k4AFE2vs4,1472
|
2
|
+
dataeval/detectors/__init__.py,sha256=iifG-Z08mH5B4QhkKtAieDGJBKldKvmCXpDQJD9qVY8,206
|
3
|
+
dataeval/detectors/drift/__init__.py,sha256=wO294Oz--l0GuZTAkBpyGwZphbQsot57HoiEX6kjNOc,652
|
4
|
+
dataeval/detectors/drift/base.py,sha256=sX46grnr4DV0WMofLTI2a_tDHR4OLZEUCQrMLePouqg,14468
|
5
|
+
dataeval/detectors/drift/cvm.py,sha256=TATS6IOE0INO1pkyRkesgrhDawD_kITsRsOOGVRs420,4132
|
6
|
+
dataeval/detectors/drift/ks.py,sha256=3Jgh5W7pC1hO1yZPCiXc47snlSdXv5BIG8sCyRRz-Ec,4220
|
7
|
+
dataeval/detectors/drift/mmd.py,sha256=lD__AouWlYWCJOD0eNNEhmLTnUPwNTBU6OCgITcpw40,7592
|
8
|
+
dataeval/detectors/drift/torch.py,sha256=ykD-Nggys5T9FTGXXbYYOi2WRKwEzEjXhL8ZueVmTxU,7659
|
9
|
+
dataeval/detectors/drift/uncertainty.py,sha256=Pdim80_-ainvOX5-7fhH9cvblYI2d-zocEwZO-JfCg4,5345
|
10
|
+
dataeval/detectors/drift/updates.py,sha256=UJ0z5hlunRi7twnkLABfdJG3tT2EqX4y9IGx8_USYvo,1780
|
11
|
+
dataeval/detectors/linters/__init__.py,sha256=CZV5naeYQYL3sHXO_CXB26AXkyTeKHI-TMaewtEs8Ag,483
|
12
|
+
dataeval/detectors/linters/clusterer.py,sha256=1qIQo5NuJkx-phKFWuXkUpUJLcqTt92L8Cpv3AmO3xQ,20929
|
13
|
+
dataeval/detectors/linters/duplicates.py,sha256=pcCRN27IuGa6ASkiFG73kYdI8_X0j12INbkD9GOlWPs,5614
|
14
|
+
dataeval/detectors/linters/merged_stats.py,sha256=X-bDTwjyR8RuVmzxLaHZmQ5nI3oOWvsqVlitdSncapk,1355
|
15
|
+
dataeval/detectors/linters/outliers.py,sha256=Fn6R_7mGOrWlTRCXFrjHvIFNELN6CTosoJgzDr8cVr0,10253
|
16
|
+
dataeval/detectors/ood/__init__.py,sha256=hTeR-Aqt6SKWsqFusaKiw_TlnFPe_sV3fQ7NKUTzZrU,292
|
17
|
+
dataeval/detectors/ood/ae.py,sha256=SL8oKTERhMwaZTQWwDhQQ6H07UKj8ozXqEWO3TaOAos,2151
|
18
|
+
dataeval/detectors/ood/base.py,sha256=6gUkbGE6PbKmA899rXOTOIeT8u_gaD0DNDQV8Wyfk5Y,3421
|
19
|
+
dataeval/detectors/ood/metadata_ks_compare.py,sha256=-hEhDNXFC7X8wmFeoigO7A7Qn90vRLroN_nKDwNgjnE,5204
|
20
|
+
dataeval/detectors/ood/metadata_least_likely.py,sha256=rb8GOgsrlrEzc6fxccdmyZQ5PC7HtTsTY8U97D-h5OU,5088
|
21
|
+
dataeval/detectors/ood/metadata_ood_mi.py,sha256=7_Sdzf7-x1TlrIQvSyOIB98C8_UQhUwmwFQmZ9_q1Uc,4042
|
22
|
+
dataeval/detectors/ood/mixin.py,sha256=Ia-rJF6rtGhE8uavijdbzOha3ueFk2CFfA0Ah_mnF40,4976
|
23
|
+
dataeval/detectors/ood/output.py,sha256=8UQbtudQ0gSeq_hQV67IE5SfHednaiGUHv9MideETdk,1710
|
24
|
+
dataeval/interop.py,sha256=GLziERWQQGwUO4Nb-uHpbLlvBOT2WF2GVilTHmsDq8w,2279
|
25
|
+
dataeval/log.py,sha256=Mn5bRWO0cgtAYd5VGYSFiPgu57ta3zoktrtHAZ1m3dU,357
|
26
|
+
dataeval/metrics/__init__.py,sha256=p-lRjm0oVHD3cXZeEajTfuGTuQOCCVHbJ8CqAI_GHVY,238
|
27
|
+
dataeval/metrics/bias/__init__.py,sha256=knYgCdeHredaHI6KGdjiYM6ViPfDf8NW35xkKiiGlVM,599
|
28
|
+
dataeval/metrics/bias/balance.py,sha256=od3gcejOqJDDymy09OWSxzqkBNyh7Vf3aXN9o6IPKHY,9151
|
29
|
+
dataeval/metrics/bias/coverage.py,sha256=k8TJAsUWlLgn_-JEtRWIOwhtMRwXmyGzLDndGxNTsAU,5745
|
30
|
+
dataeval/metrics/bias/diversity.py,sha256=upj-Gx_4-bBF-4dDaUSuURIbP98Ghk-BSCK5ZJNGMEg,8318
|
31
|
+
dataeval/metrics/bias/parity.py,sha256=wVMfzKFqzHkp3SNUJFjRH_Eej9DIg-xAhHkShIAek68,12755
|
32
|
+
dataeval/metrics/estimators/__init__.py,sha256=oY_9jX7V-Kg7-4KpvMNB4rUhsk8QTA0DIoM8d2VtVIg,380
|
33
|
+
dataeval/metrics/estimators/ber.py,sha256=p3KaY-rnK45CUDaqx-55wWG6yHcDnH6Kkkt9r6FkmZY,5003
|
34
|
+
dataeval/metrics/estimators/divergence.py,sha256=QYkOs7In9un0tYHztwZ5kNqiWVNS3Lgmxn1716H8HG4,4243
|
35
|
+
dataeval/metrics/estimators/uap.py,sha256=ELa5MixMOJZoW5rUuVLOXynfLMbVjxb-r7VYF5qqXrw,2139
|
36
|
+
dataeval/metrics/stats/__init__.py,sha256=Js_mklHJbHwOXJtMFo9NIyePZLwLZL-jruwmcjoLsZc,1086
|
37
|
+
dataeval/metrics/stats/base.py,sha256=U0yPaRSHuPGZk3A7hl8ghJCki7iBtW5wM1eZvElu1_w,12038
|
38
|
+
dataeval/metrics/stats/boxratiostats.py,sha256=fNzHT_nZX0MYeHkWRdcfEz2mtRC2d1JxpoK3l4EBrQc,6301
|
39
|
+
dataeval/metrics/stats/datasetstats.py,sha256=krOm48yjyzYOWKLaWFqHAQPmuhiN4manif7ZXh2Ohhg,5828
|
40
|
+
dataeval/metrics/stats/dimensionstats.py,sha256=_mN7wHencHh4UNd9XUflhq0sIa9yLPk3yHqmossDEGk,3985
|
41
|
+
dataeval/metrics/stats/hashstats.py,sha256=_zZOwnQDlpMoPyqbOV2v9V_Uqox0c4vX2Khv5u_fAk8,5068
|
42
|
+
dataeval/metrics/stats/labelstats.py,sha256=mLH02Xy_uT-qN7HXuXEgs786T2Xr0BMudweBDeEWd5I,4065
|
43
|
+
dataeval/metrics/stats/pixelstats.py,sha256=t8abfenA79x87CMqPuKtddglD3l_LA6nXS4K_FlL4-k,4148
|
44
|
+
dataeval/metrics/stats/visualstats.py,sha256=UU0oa5BWuIOTDM1H1ZnlhYyu8ruVEnaLPCDOsbm-q1c,4546
|
45
|
+
dataeval/output.py,sha256=hR5TJ67f7FgrZO9Du46aw-jvRpMjOimSgJSau4ZNK44,3565
|
46
|
+
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
|
+
dataeval/utils/__init__.py,sha256=fBpXVWzNaXySTuZWsD8Jg9LLHzb23nz_PfdxPD_gc8c,279
|
48
|
+
dataeval/utils/dataset/__init__.py,sha256=IvRauQaa0CzJ5nZrfTSjGoaaKelyJcQDe3OPRw0-NXs,332
|
49
|
+
dataeval/utils/dataset/datasets.py,sha256=7tSqN3d8UncqmXh4eiEwarXgVxc4sMuIKPTqBCE0pN8,15080
|
50
|
+
dataeval/utils/dataset/read.py,sha256=tt-9blXzYLRb4Vgv6DrFj2ikUSvBF0-qTSnvvYec_2U,1523
|
51
|
+
dataeval/utils/dataset/split.py,sha256=FpxHxmewjqIj6hikCsamNQTq877qu4HfKnzArOyvmyY,18957
|
52
|
+
dataeval/utils/image.py,sha256=AQljELyMFkYsf2AoNOH5dZG8DYE4hPw0MCk85eIXqAw,1926
|
53
|
+
dataeval/utils/metadata.py,sha256=mhMhBgb7nAIIljDdecOqiZ1zsYagE6h8DKxE_DFDW-E,22270
|
54
|
+
dataeval/utils/plot.py,sha256=jQSiqDArFOlKZaIbv4Viso_ShU3LnZE-Y2qXKuKsa8M,3790
|
55
|
+
dataeval/utils/shared.py,sha256=xvF3VLfyheVwJtdtDrneOobkKf7t-JTmf_w91FWXmqo,3616
|
56
|
+
dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
|
57
|
+
dataeval/utils/torch/blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
|
58
|
+
dataeval/utils/torch/gmm.py,sha256=fQ8CBO4Bf6i9N1CZdeJ8VJP25fsPjgMextQkondwgvo,3693
|
59
|
+
dataeval/utils/torch/internal.py,sha256=qAzQTwTI9Qy6f01Olw3d1TIJ4HoWGf0gQzgWVcdD2x4,6653
|
60
|
+
dataeval/utils/torch/models.py,sha256=Df3B_9x5uu-Y5ZOyhRZYpKJnDvxt0hgMeJLy1E4oxpU,8519
|
61
|
+
dataeval/utils/torch/trainer.py,sha256=Qay0LK63RuyoGYiJ5zI2C5BVym309ORvp6shhpcrIU4,5589
|
62
|
+
dataeval/workflows/__init__.py,sha256=L9yfBipNFGnYuN2JbMknIHDvziwfa2XAGFnOwifZbls,216
|
63
|
+
dataeval/workflows/sufficiency.py,sha256=nL99iDlu2bF_9VGu3ioLFDJBgBBJEdwEXROxXm_0sfY,18673
|
64
|
+
dataeval-0.75.0.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
|
65
|
+
dataeval-0.75.0.dist-info/METADATA,sha256=6m2O6vreJR3Lq1_BXEU6DHnK2C5L_q5YAPofIl4kxCw,5410
|
66
|
+
dataeval-0.75.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
67
|
+
dataeval-0.75.0.dist-info/RECORD,,
|
@@ -1,109 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Source code derived from Alibi-Detect 0.11.4
|
3
|
-
https://github.com/SeldonIO/alibi-detect/tree/v0.11.4
|
4
|
-
|
5
|
-
Original code Copyright (c) 2023 Seldon Technologies Ltd
|
6
|
-
Licensed under Apache Software License (Apache 2.0)
|
7
|
-
"""
|
8
|
-
|
9
|
-
from __future__ import annotations
|
10
|
-
|
11
|
-
from typing import Callable, cast
|
12
|
-
|
13
|
-
import torch
|
14
|
-
from numpy.typing import ArrayLike
|
15
|
-
|
16
|
-
from dataeval.detectors.drift.torch import get_device
|
17
|
-
from dataeval.detectors.ood.base import OODBaseMixin, OODFitMixin, OODGMMMixin
|
18
|
-
from dataeval.interop import to_numpy
|
19
|
-
from dataeval.utils.torch.gmm import gmm_params
|
20
|
-
from dataeval.utils.torch.trainer import trainer
|
21
|
-
|
22
|
-
|
23
|
-
class OODBase(OODBaseMixin[torch.nn.Module], OODFitMixin[Callable[..., torch.nn.Module], torch.optim.Optimizer]):
|
24
|
-
def __init__(self, model: torch.nn.Module, device: str | torch.device | None = None) -> None:
|
25
|
-
self.device: torch.device = get_device(device)
|
26
|
-
super().__init__(model)
|
27
|
-
|
28
|
-
def fit(
|
29
|
-
self,
|
30
|
-
x_ref: ArrayLike,
|
31
|
-
threshold_perc: float,
|
32
|
-
loss_fn: Callable[..., torch.nn.Module] | None,
|
33
|
-
optimizer: torch.optim.Optimizer | None,
|
34
|
-
epochs: int,
|
35
|
-
batch_size: int,
|
36
|
-
verbose: bool,
|
37
|
-
) -> None:
|
38
|
-
"""
|
39
|
-
Train the model and infer the threshold value.
|
40
|
-
|
41
|
-
Parameters
|
42
|
-
----------
|
43
|
-
x_ref : ArrayLike
|
44
|
-
Training data.
|
45
|
-
threshold_perc : float, default 100.0
|
46
|
-
Percentage of reference data that is normal.
|
47
|
-
loss_fn : Callable | None, default None
|
48
|
-
Loss function used for training.
|
49
|
-
optimizer : Optimizer, default keras.optimizers.Adam
|
50
|
-
Optimizer used for training.
|
51
|
-
epochs : int, default 20
|
52
|
-
Number of training epochs.
|
53
|
-
batch_size : int, default 64
|
54
|
-
Batch size used for training.
|
55
|
-
verbose : bool, default True
|
56
|
-
Whether to print training progress.
|
57
|
-
"""
|
58
|
-
|
59
|
-
# Train the model
|
60
|
-
trainer(
|
61
|
-
model=self.model,
|
62
|
-
x_train=to_numpy(x_ref),
|
63
|
-
y_train=None,
|
64
|
-
loss_fn=loss_fn,
|
65
|
-
optimizer=optimizer,
|
66
|
-
preprocess_fn=None,
|
67
|
-
epochs=epochs,
|
68
|
-
batch_size=batch_size,
|
69
|
-
device=self.device,
|
70
|
-
verbose=verbose,
|
71
|
-
)
|
72
|
-
|
73
|
-
# Infer the threshold values
|
74
|
-
self._ref_score = self.score(x_ref, batch_size)
|
75
|
-
self._threshold_perc = threshold_perc
|
76
|
-
|
77
|
-
|
78
|
-
class OODBaseGMM(OODBase, OODGMMMixin[torch.Tensor]):
|
79
|
-
def fit(
|
80
|
-
self,
|
81
|
-
x_ref: ArrayLike,
|
82
|
-
threshold_perc: float,
|
83
|
-
loss_fn: Callable[..., torch.nn.Module] | None,
|
84
|
-
optimizer: torch.optim.Optimizer | None,
|
85
|
-
epochs: int,
|
86
|
-
batch_size: int,
|
87
|
-
verbose: bool,
|
88
|
-
) -> None:
|
89
|
-
# Train the model
|
90
|
-
trainer(
|
91
|
-
model=self.model,
|
92
|
-
x_train=to_numpy(x_ref),
|
93
|
-
y_train=None,
|
94
|
-
loss_fn=loss_fn,
|
95
|
-
optimizer=optimizer,
|
96
|
-
preprocess_fn=None,
|
97
|
-
epochs=epochs,
|
98
|
-
batch_size=batch_size,
|
99
|
-
device=self.device,
|
100
|
-
verbose=verbose,
|
101
|
-
)
|
102
|
-
|
103
|
-
# Calculate the GMM parameters
|
104
|
-
_, z, gamma = cast(tuple[torch.Tensor, torch.Tensor, torch.Tensor], self.model(x_ref))
|
105
|
-
self._gmm_params = gmm_params(z, gamma)
|
106
|
-
|
107
|
-
# Infer the threshold values
|
108
|
-
self._ref_score = self.score(x_ref, batch_size)
|
109
|
-
self._threshold_perc = threshold_perc
|