dataeval 0.70.0__py3-none-any.whl → 0.70.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +6 -6
- dataeval/_internal/datasets.py +235 -131
- dataeval/_internal/detectors/clusterer.py +2 -0
- dataeval/_internal/detectors/drift/base.py +2 -2
- dataeval/_internal/detectors/drift/mmd.py +1 -1
- dataeval/_internal/detectors/duplicates.py +2 -0
- dataeval/_internal/detectors/ood/ae.py +5 -3
- dataeval/_internal/detectors/ood/aegmm.py +6 -4
- dataeval/_internal/detectors/ood/base.py +12 -7
- dataeval/_internal/detectors/ood/llr.py +6 -4
- dataeval/_internal/detectors/ood/vae.py +5 -3
- dataeval/_internal/detectors/ood/vaegmm.py +6 -4
- dataeval/_internal/detectors/outliers.py +4 -2
- dataeval/_internal/metrics/balance.py +4 -2
- dataeval/_internal/metrics/ber.py +2 -0
- dataeval/_internal/metrics/coverage.py +4 -0
- dataeval/_internal/metrics/divergence.py +6 -2
- dataeval/_internal/metrics/diversity.py +8 -6
- dataeval/_internal/metrics/parity.py +8 -6
- dataeval/_internal/metrics/stats/base.py +2 -2
- dataeval/_internal/metrics/stats/datasetstats.py +2 -0
- dataeval/_internal/metrics/stats/dimensionstats.py +2 -0
- dataeval/_internal/metrics/stats/hashstats.py +2 -0
- dataeval/_internal/metrics/stats/labelstats.py +1 -1
- dataeval/_internal/metrics/stats/pixelstats.py +4 -2
- dataeval/_internal/metrics/stats/visualstats.py +4 -2
- dataeval/_internal/metrics/uap.py +6 -2
- dataeval/_internal/metrics/utils.py +2 -2
- dataeval/_internal/models/pytorch/autoencoder.py +5 -5
- dataeval/_internal/models/tensorflow/pixelcnn.py +1 -4
- dataeval/_internal/utils.py +11 -16
- dataeval/_internal/workflows/sufficiency.py +44 -33
- dataeval/detectors/__init__.py +4 -0
- dataeval/detectors/drift/__init__.py +8 -3
- dataeval/detectors/drift/kernels/__init__.py +4 -0
- dataeval/detectors/drift/updates/__init__.py +4 -0
- dataeval/detectors/linters/__init__.py +15 -4
- dataeval/detectors/ood/__init__.py +14 -2
- dataeval/metrics/__init__.py +5 -0
- dataeval/metrics/bias/__init__.py +13 -4
- dataeval/metrics/estimators/__init__.py +8 -8
- dataeval/metrics/stats/__init__.py +17 -6
- dataeval/utils/__init__.py +16 -3
- dataeval/utils/tensorflow/__init__.py +11 -0
- dataeval/utils/torch/__init__.py +12 -0
- dataeval/utils/torch/datasets/__init__.py +7 -0
- dataeval/workflows/__init__.py +4 -0
- {dataeval-0.70.0.dist-info → dataeval-0.70.1.dist-info}/METADATA +10 -2
- dataeval-0.70.1.dist-info/RECORD +80 -0
- dataeval/tensorflow/__init__.py +0 -3
- dataeval/torch/__init__.py +0 -3
- dataeval-0.70.0.dist-info/RECORD +0 -79
- /dataeval/{tensorflow → utils/tensorflow}/loss/__init__.py +0 -0
- /dataeval/{tensorflow → utils/tensorflow}/models/__init__.py +0 -0
- /dataeval/{tensorflow → utils/tensorflow}/recon/__init__.py +0 -0
- /dataeval/{torch → utils/torch}/models/__init__.py +0 -0
- /dataeval/{torch → utils/torch}/trainer/__init__.py +0 -0
- {dataeval-0.70.0.dist-info → dataeval-0.70.1.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.70.0.dist-info → dataeval-0.70.1.dist-info}/WHEEL +0 -0
@@ -2,23 +2,26 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import warnings
|
4
4
|
from dataclasses import dataclass
|
5
|
-
from typing import Any, Callable, Sequence, cast
|
5
|
+
from typing import Any, Callable, Iterable, Mapping, Sequence, cast
|
6
6
|
|
7
7
|
import matplotlib.pyplot as plt
|
8
8
|
import numpy as np
|
9
9
|
import torch
|
10
10
|
import torch.nn as nn
|
11
11
|
from matplotlib.figure import Figure
|
12
|
-
from numpy.typing import NDArray
|
12
|
+
from numpy.typing import ArrayLike, NDArray
|
13
13
|
from scipy.optimize import basinhopping
|
14
14
|
from torch.utils.data import Dataset
|
15
15
|
|
16
|
+
from dataeval._internal.interop import as_numpy
|
16
17
|
from dataeval._internal.output import OutputMetadata, set_metadata
|
17
18
|
|
18
19
|
|
19
20
|
@dataclass(frozen=True)
|
20
21
|
class SufficiencyOutput(OutputMetadata):
|
21
22
|
"""
|
23
|
+
Output class for :class:`Sufficiency` workflow
|
24
|
+
|
22
25
|
Attributes
|
23
26
|
----------
|
24
27
|
steps : NDArray
|
@@ -45,13 +48,13 @@ class SufficiencyOutput(OutputMetadata):
|
|
45
48
|
@set_metadata("dataeval.workflows.SufficiencyOutput")
|
46
49
|
def project(
|
47
50
|
self,
|
48
|
-
projection: int |
|
51
|
+
projection: int | Iterable[int],
|
49
52
|
) -> SufficiencyOutput:
|
50
53
|
"""Projects the measures for each value of X
|
51
54
|
|
52
55
|
Parameters
|
53
56
|
----------
|
54
|
-
projection : int |
|
57
|
+
projection : int | Iterable[int]
|
55
58
|
Step or steps to project
|
56
59
|
|
57
60
|
Returns
|
@@ -63,12 +66,12 @@ class SufficiencyOutput(OutputMetadata):
|
|
63
66
|
------
|
64
67
|
ValueError
|
65
68
|
If the length of data points in the measures do not match
|
66
|
-
If
|
69
|
+
If `projection` is not numerical
|
67
70
|
"""
|
68
|
-
projection =
|
69
|
-
|
70
|
-
if not
|
71
|
-
raise ValueError("'
|
71
|
+
projection = np.asarray(list(projection) if isinstance(projection, Iterable) else [projection])
|
72
|
+
|
73
|
+
if not np.issubdtype(projection.dtype, np.number):
|
74
|
+
raise ValueError("'projection' must consist of numerical values")
|
72
75
|
|
73
76
|
output = {}
|
74
77
|
for name, measures in self.measures.items():
|
@@ -92,7 +95,7 @@ class SufficiencyOutput(OutputMetadata):
|
|
92
95
|
|
93
96
|
Returns
|
94
97
|
-------
|
95
|
-
|
98
|
+
list[plt.Figure]
|
96
99
|
List of Figures for each measure
|
97
100
|
|
98
101
|
Raises
|
@@ -130,19 +133,19 @@ class SufficiencyOutput(OutputMetadata):
|
|
130
133
|
|
131
134
|
return plots
|
132
135
|
|
133
|
-
def inv_project(self, targets:
|
136
|
+
def inv_project(self, targets: Mapping[str, ArrayLike]) -> dict[str, NDArray[np.float64]]:
|
134
137
|
"""
|
135
138
|
Calculate training samples needed to achieve target model metric values.
|
136
139
|
|
137
140
|
Parameters
|
138
141
|
----------
|
139
|
-
targets :
|
140
|
-
|
142
|
+
targets : Mapping[str, ArrayLike]
|
143
|
+
Mapping of target metric scores (from 0.0 to 1.0) that we want
|
141
144
|
to achieve, where the key is the name of the metric.
|
142
145
|
|
143
146
|
Returns
|
144
147
|
-------
|
145
|
-
|
148
|
+
dict[str, NDArray]
|
146
149
|
List of the number of training samples needed to achieve each
|
147
150
|
corresponding entry in targets
|
148
151
|
"""
|
@@ -150,18 +153,19 @@ class SufficiencyOutput(OutputMetadata):
|
|
150
153
|
projection = {}
|
151
154
|
|
152
155
|
for name, target in targets.items():
|
156
|
+
tarray = as_numpy(target)
|
153
157
|
if name not in self.measures:
|
154
158
|
continue
|
155
159
|
|
156
160
|
measure = self.measures[name]
|
157
161
|
if measure.ndim > 1:
|
158
|
-
projection[name] = np.zeros((len(measure), len(
|
162
|
+
projection[name] = np.zeros((len(measure), len(tarray)))
|
159
163
|
for i in range(len(measure)):
|
160
164
|
projection[name][i] = inv_project_steps(
|
161
|
-
self.params[name][i],
|
165
|
+
self.params[name][i], tarray[i] if tarray.ndim == measure.ndim else tarray
|
162
166
|
)
|
163
167
|
else:
|
164
|
-
projection[name] = inv_project_steps(self.params[name],
|
168
|
+
projection[name] = inv_project_steps(self.params[name], tarray)
|
165
169
|
|
166
170
|
return projection
|
167
171
|
|
@@ -379,18 +383,18 @@ class Sufficiency:
|
|
379
383
|
Function which takes a model (torch.nn.Module), a dataset
|
380
384
|
(torch.utils.data.Dataset), indices to train on and executes model
|
381
385
|
training against the data.
|
382
|
-
eval_fn : Callable[[nn.Module, Dataset],
|
386
|
+
eval_fn : Callable[[nn.Module, Dataset], Mapping[str, float | ArrayLike]]
|
383
387
|
Function which takes a model (torch.nn.Module), a dataset
|
384
388
|
(torch.utils.data.Dataset) and returns a dictionary of metric
|
385
|
-
values (
|
389
|
+
values (Mapping[str, float]) which is used to assess model performance
|
386
390
|
given the model and data.
|
387
391
|
runs : int, default 1
|
388
392
|
Number of models to run over all subsets
|
389
393
|
substeps : int, default 5
|
390
394
|
Total number of dataset partitions that each model will train on
|
391
|
-
train_kwargs :
|
395
|
+
train_kwargs : Mapping | None, default None
|
392
396
|
Additional arguments required for custom training function
|
393
|
-
eval_kwargs :
|
397
|
+
eval_kwargs : Mapping | None, default None
|
394
398
|
Additional arguments required for custom evaluation function
|
395
399
|
"""
|
396
400
|
|
@@ -400,11 +404,11 @@ class Sufficiency:
|
|
400
404
|
train_ds: Dataset,
|
401
405
|
test_ds: Dataset,
|
402
406
|
train_fn: Callable[[nn.Module, Dataset, Sequence[int]], None],
|
403
|
-
eval_fn: Callable[[nn.Module, Dataset],
|
407
|
+
eval_fn: Callable[[nn.Module, Dataset], Mapping[str, float] | Mapping[str, ArrayLike]],
|
404
408
|
runs: int = 1,
|
405
409
|
substeps: int = 5,
|
406
|
-
train_kwargs:
|
407
|
-
eval_kwargs:
|
410
|
+
train_kwargs: Mapping[str, Any] | None = None,
|
411
|
+
eval_kwargs: Mapping[str, Any] | None = None,
|
408
412
|
):
|
409
413
|
self.model = model
|
410
414
|
self.train_ds = train_ds
|
@@ -447,42 +451,42 @@ class Sufficiency:
|
|
447
451
|
@property
|
448
452
|
def eval_fn(
|
449
453
|
self,
|
450
|
-
) -> Callable[[nn.Module, Dataset], dict[str, float] |
|
454
|
+
) -> Callable[[nn.Module, Dataset], dict[str, float] | Mapping[str, ArrayLike]]:
|
451
455
|
return self._eval_fn
|
452
456
|
|
453
457
|
@eval_fn.setter
|
454
458
|
def eval_fn(
|
455
459
|
self,
|
456
|
-
value: Callable[[nn.Module, Dataset], dict[str, float] |
|
460
|
+
value: Callable[[nn.Module, Dataset], dict[str, float] | Mapping[str, ArrayLike]],
|
457
461
|
):
|
458
462
|
if not callable(value):
|
459
463
|
raise TypeError("Must provide a callable for eval_fn.")
|
460
464
|
self._eval_fn = value
|
461
465
|
|
462
466
|
@property
|
463
|
-
def train_kwargs(self) ->
|
467
|
+
def train_kwargs(self) -> Mapping[str, Any]:
|
464
468
|
return self._train_kwargs
|
465
469
|
|
466
470
|
@train_kwargs.setter
|
467
|
-
def train_kwargs(self, value:
|
471
|
+
def train_kwargs(self, value: Mapping[str, Any] | None):
|
468
472
|
self._train_kwargs = {} if value is None else value
|
469
473
|
|
470
474
|
@property
|
471
|
-
def eval_kwargs(self) ->
|
475
|
+
def eval_kwargs(self) -> Mapping[str, Any]:
|
472
476
|
return self._eval_kwargs
|
473
477
|
|
474
478
|
@eval_kwargs.setter
|
475
|
-
def eval_kwargs(self, value:
|
479
|
+
def eval_kwargs(self, value: Mapping[str, Any] | None):
|
476
480
|
self._eval_kwargs = {} if value is None else value
|
477
481
|
|
478
482
|
@set_metadata("dataeval.workflows", ["runs", "substeps"])
|
479
|
-
def evaluate(self, eval_at:
|
483
|
+
def evaluate(self, eval_at: int | Iterable[int] | None = None, niter: int = 1000) -> SufficiencyOutput:
|
480
484
|
"""
|
481
485
|
Creates data indices, trains models, and returns plotting data
|
482
486
|
|
483
487
|
Parameters
|
484
488
|
----------
|
485
|
-
eval_at :
|
489
|
+
eval_at : int | Iterable[int] | None, default None
|
486
490
|
Specify this to collect accuracies over a specific set of dataset lengths, rather
|
487
491
|
than letting Sufficiency internally create the lengths to evaluate at.
|
488
492
|
niter : int, default 1000
|
@@ -493,6 +497,11 @@ class Sufficiency:
|
|
493
497
|
SufficiencyOutput
|
494
498
|
Dataclass containing the average of each measure per substep
|
495
499
|
|
500
|
+
Raises
|
501
|
+
------
|
502
|
+
ValueError
|
503
|
+
If `eval_at` is not numerical
|
504
|
+
|
496
505
|
Examples
|
497
506
|
--------
|
498
507
|
>>> suff = Sufficiency(
|
@@ -502,7 +511,9 @@ class Sufficiency:
|
|
502
511
|
SufficiencyOutput(steps=array([ 1, 3, 10, 31, 100], dtype=uint32), params={'test': array([ 0., 42., 0.])}, measures={'test': array([1., 1., 1., 1., 1.])})
|
503
512
|
""" # noqa: E501
|
504
513
|
if eval_at is not None:
|
505
|
-
ranges = eval_at
|
514
|
+
ranges = np.asarray(list(eval_at) if isinstance(eval_at, Iterable) else [eval_at])
|
515
|
+
if not np.issubdtype(ranges.dtype, np.number):
|
516
|
+
raise ValueError("'eval_at' must consist of numerical values")
|
506
517
|
else:
|
507
518
|
geomshape = (
|
508
519
|
0.01 * self._length,
|
dataeval/detectors/__init__.py
CHANGED
@@ -1,16 +1,21 @@
|
|
1
|
+
"""
|
2
|
+
Drift detectors identify if the statistical properties of the data has changed.
|
3
|
+
"""
|
4
|
+
|
1
5
|
from dataeval import _IS_TORCH_AVAILABLE
|
6
|
+
from dataeval._internal.detectors.drift.base import DriftOutput
|
2
7
|
from dataeval._internal.detectors.drift.cvm import DriftCVM
|
3
8
|
from dataeval._internal.detectors.drift.ks import DriftKS
|
4
9
|
|
5
10
|
from . import updates
|
6
11
|
|
7
|
-
__all__ = ["DriftCVM", "DriftKS", "updates"]
|
12
|
+
__all__ = ["DriftCVM", "DriftKS", "DriftOutput", "updates"]
|
8
13
|
|
9
14
|
if _IS_TORCH_AVAILABLE: # pragma: no cover
|
10
|
-
from dataeval._internal.detectors.drift.mmd import DriftMMD
|
15
|
+
from dataeval._internal.detectors.drift.mmd import DriftMMD, DriftMMDOutput
|
11
16
|
from dataeval._internal.detectors.drift.torch import preprocess_drift
|
12
17
|
from dataeval._internal.detectors.drift.uncertainty import DriftUncertainty
|
13
18
|
|
14
19
|
from . import kernels
|
15
20
|
|
16
|
-
__all__ += ["DriftMMD", "DriftUncertainty", "kernels", "preprocess_drift"]
|
21
|
+
__all__ += ["DriftMMD", "DriftMMDOutput", "DriftUncertainty", "kernels", "preprocess_drift"]
|
@@ -1,3 +1,7 @@
|
|
1
|
+
"""
|
2
|
+
Update strategies inform how the drift detector classes update the reference data when monitoring for drift.
|
3
|
+
"""
|
4
|
+
|
1
5
|
from dataeval._internal.detectors.drift.base import LastSeenUpdate, ReservoirSamplingUpdate
|
2
6
|
|
3
7
|
__all__ = ["LastSeenUpdate", "ReservoirSamplingUpdate"]
|
@@ -1,5 +1,16 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
"""
|
2
|
+
Linters help identify potential issues in training and test data and are an important aspect of data cleaning.
|
3
|
+
"""
|
4
4
|
|
5
|
-
|
5
|
+
from dataeval._internal.detectors.clusterer import Clusterer, ClustererOutput
|
6
|
+
from dataeval._internal.detectors.duplicates import Duplicates, DuplicatesOutput
|
7
|
+
from dataeval._internal.detectors.outliers import Outliers, OutliersOutput
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"Clusterer",
|
11
|
+
"ClustererOutput",
|
12
|
+
"Duplicates",
|
13
|
+
"DuplicatesOutput",
|
14
|
+
"Outliers",
|
15
|
+
"OutliersOutput",
|
16
|
+
]
|
@@ -1,11 +1,23 @@
|
|
1
|
+
"""
|
2
|
+
Out-of-distribution detectors identify data that is different from the data used to train a particular model.
|
3
|
+
"""
|
4
|
+
|
1
5
|
from dataeval import _IS_TENSORFLOW_AVAILABLE
|
2
6
|
|
3
7
|
if _IS_TENSORFLOW_AVAILABLE: # pragma: no cover
|
4
8
|
from dataeval._internal.detectors.ood.ae import OOD_AE
|
5
9
|
from dataeval._internal.detectors.ood.aegmm import OOD_AEGMM
|
6
|
-
from dataeval._internal.detectors.ood.base import OODOutput,
|
10
|
+
from dataeval._internal.detectors.ood.base import OODOutput, OODScoreOutput
|
7
11
|
from dataeval._internal.detectors.ood.llr import OOD_LLR
|
8
12
|
from dataeval._internal.detectors.ood.vae import OOD_VAE
|
9
13
|
from dataeval._internal.detectors.ood.vaegmm import OOD_VAEGMM
|
10
14
|
|
11
|
-
__all__ = [
|
15
|
+
__all__ = [
|
16
|
+
"OOD_AE",
|
17
|
+
"OOD_AEGMM",
|
18
|
+
"OOD_LLR",
|
19
|
+
"OOD_VAE",
|
20
|
+
"OOD_VAEGMM",
|
21
|
+
"OODOutput",
|
22
|
+
"OODScoreOutput",
|
23
|
+
]
|
dataeval/metrics/__init__.py
CHANGED
@@ -1,7 +1,12 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
"""
|
2
|
+
Bias metrics check for skewed or imbalanced datasets and incomplete feature
|
3
|
+
representation which may impact model performance.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from dataeval._internal.metrics.balance import BalanceOutput, balance
|
7
|
+
from dataeval._internal.metrics.coverage import CoverageOutput, coverage
|
8
|
+
from dataeval._internal.metrics.diversity import DiversityOutput, diversity
|
9
|
+
from dataeval._internal.metrics.parity import ParityOutput, label_parity, parity
|
5
10
|
|
6
11
|
__all__ = [
|
7
12
|
"balance",
|
@@ -9,4 +14,8 @@ __all__ = [
|
|
9
14
|
"diversity",
|
10
15
|
"label_parity",
|
11
16
|
"parity",
|
17
|
+
"BalanceOutput",
|
18
|
+
"CoverageOutput",
|
19
|
+
"DiversityOutput",
|
20
|
+
"ParityOutput",
|
12
21
|
]
|
@@ -1,9 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
"""
|
2
|
+
Estimators calculate performance bounds and the statistical distance between datasets.
|
3
|
+
"""
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
]
|
5
|
+
from dataeval._internal.metrics.ber import BEROutput, ber
|
6
|
+
from dataeval._internal.metrics.divergence import DivergenceOutput, divergence
|
7
|
+
from dataeval._internal.metrics.uap import UAPOutput, uap
|
8
|
+
|
9
|
+
__all__ = ["ber", "divergence", "uap", "BEROutput", "DivergenceOutput", "UAPOutput"]
|
@@ -1,10 +1,15 @@
|
|
1
|
+
"""
|
2
|
+
Statistics metrics calculate a variety of image properties and pixel statistics
|
3
|
+
and label statistics against the images and labels of a dataset.
|
4
|
+
"""
|
5
|
+
|
1
6
|
from dataeval._internal.metrics.stats.boxratiostats import boxratiostats
|
2
|
-
from dataeval._internal.metrics.stats.datasetstats import datasetstats
|
3
|
-
from dataeval._internal.metrics.stats.dimensionstats import dimensionstats
|
4
|
-
from dataeval._internal.metrics.stats.hashstats import hashstats
|
5
|
-
from dataeval._internal.metrics.stats.labelstats import labelstats
|
6
|
-
from dataeval._internal.metrics.stats.pixelstats import pixelstats
|
7
|
-
from dataeval._internal.metrics.stats.visualstats import visualstats
|
7
|
+
from dataeval._internal.metrics.stats.datasetstats import DatasetStatsOutput, datasetstats
|
8
|
+
from dataeval._internal.metrics.stats.dimensionstats import DimensionStatsOutput, dimensionstats
|
9
|
+
from dataeval._internal.metrics.stats.hashstats import HashStatsOutput, hashstats
|
10
|
+
from dataeval._internal.metrics.stats.labelstats import LabelStatsOutput, labelstats
|
11
|
+
from dataeval._internal.metrics.stats.pixelstats import PixelStatsOutput, pixelstats
|
12
|
+
from dataeval._internal.metrics.stats.visualstats import VisualStatsOutput, visualstats
|
8
13
|
|
9
14
|
__all__ = [
|
10
15
|
"boxratiostats",
|
@@ -14,4 +19,10 @@ __all__ = [
|
|
14
19
|
"labelstats",
|
15
20
|
"pixelstats",
|
16
21
|
"visualstats",
|
22
|
+
"DatasetStatsOutput",
|
23
|
+
"DimensionStatsOutput",
|
24
|
+
"HashStatsOutput",
|
25
|
+
"LabelStatsOutput",
|
26
|
+
"PixelStatsOutput",
|
27
|
+
"VisualStatsOutput",
|
17
28
|
]
|
dataeval/utils/__init__.py
CHANGED
@@ -1,6 +1,19 @@
|
|
1
|
-
|
1
|
+
"""
|
2
|
+
The utility classes and functions are provided by DataEval to assist users
|
3
|
+
in setting up architectures that are guaranteed to work with applicable DataEval
|
4
|
+
metrics. Currently DataEval supports both Tensorflow and PyTorch backends.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from dataeval import _IS_TENSORFLOW_AVAILABLE, _IS_TORCH_AVAILABLE
|
8
|
+
|
9
|
+
__all__ = []
|
2
10
|
|
3
11
|
if _IS_TORCH_AVAILABLE: # pragma: no cover
|
4
|
-
from
|
12
|
+
from . import torch
|
13
|
+
|
14
|
+
__all__ += ["torch"]
|
15
|
+
|
16
|
+
if _IS_TENSORFLOW_AVAILABLE: # pragma: no cover
|
17
|
+
from . import tensorflow
|
5
18
|
|
6
|
-
__all__
|
19
|
+
__all__ += ["tensorflow"]
|
@@ -0,0 +1,11 @@
|
|
1
|
+
"""
|
2
|
+
Tensorflow models are used in out-of-distribution detectors in the :mod:`dataeval.detectors.ood` module.
|
3
|
+
|
4
|
+
DataEval provides both basic default models through the utility :func:`dataeval.utils.tensorflow.models.create_model`
|
5
|
+
as well as constructors which allow for customization of the encoder, decoder and any other applicable
|
6
|
+
layers used by the model.
|
7
|
+
"""
|
8
|
+
|
9
|
+
from . import loss, models, recon
|
10
|
+
|
11
|
+
__all__ = ["loss", "models", "recon"]
|
@@ -0,0 +1,12 @@
|
|
1
|
+
"""
|
2
|
+
PyTorch is the primary backend for metrics that require neural networks.
|
3
|
+
|
4
|
+
While these metrics can take in custom models, DataEval provides utility classes
|
5
|
+
to create a seamless integration between custom models and DataEval's metrics.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from dataeval._internal.utils import read_dataset
|
9
|
+
|
10
|
+
from . import models, trainer
|
11
|
+
|
12
|
+
__all__ = ["read_dataset", "models", "trainer"]
|
dataeval/workflows/__init__.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.70.
|
3
|
+
Version: 0.70.1
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Home-page: https://dataeval.ai/
|
6
6
|
License: MIT
|
@@ -30,7 +30,6 @@ Requires-Dist: pillow (>=10.3.0)
|
|
30
30
|
Requires-Dist: scikit-learn (>=1.5.0)
|
31
31
|
Requires-Dist: scipy (>=1.10)
|
32
32
|
Requires-Dist: tensorflow (>=2.14.1,<2.16) ; extra == "tensorflow" or extra == "all"
|
33
|
-
Requires-Dist: tensorflow-io-gcs-filesystem (>=0.35.0,<0.37) ; extra == "tensorflow" or extra == "all"
|
34
33
|
Requires-Dist: tensorflow_probability (>=0.22.1,<0.24) ; extra == "tensorflow" or extra == "all"
|
35
34
|
Requires-Dist: torch (>=2.2.0) ; extra == "torch" or extra == "all"
|
36
35
|
Requires-Dist: torchvision (>=0.17.0) ; extra == "torch" or extra == "all"
|
@@ -75,6 +74,15 @@ You can install DataEval directly from pypi.org using the following command. Th
|
|
75
74
|
pip install dataeval[all]
|
76
75
|
```
|
77
76
|
|
77
|
+
### Installing DataEval in Conda/Mamba
|
78
|
+
|
79
|
+
DataEval can be installed in a Conda/Mamba environment using the provided `environment.yaml` file. As some dependencies
|
80
|
+
are installed from the `pytorch` channel, the channel is specified in the below example.
|
81
|
+
|
82
|
+
```
|
83
|
+
micromamba create -f environment\environment.yaml -c pytorch
|
84
|
+
```
|
85
|
+
|
78
86
|
### Installing DataEval from GitHub
|
79
87
|
|
80
88
|
To install DataEval from source locally on Ubuntu, you will need `git-lfs` to download larger, binary source files and `poetry` for project dependency management.
|
@@ -0,0 +1,80 @@
|
|
1
|
+
dataeval/__init__.py,sha256=AIHxRS7PYlqg4s7fZJTPKuTtyWFWoVROw4knVoSBH6E,555
|
2
|
+
dataeval/_internal/datasets.py,sha256=KbXSR-vOAzFamfXHRnI9mhhqUzEPyGpK47fZsirQn1I,14638
|
3
|
+
dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
dataeval/_internal/detectors/clusterer.py,sha256=srqTHzh9kIy7Ty4VYaptwuQlBh8emFeiEAeS_mYFKro,20750
|
5
|
+
dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
dataeval/_internal/detectors/drift/base.py,sha256=9EIb4eHVnZ8j6ms_kxRs6fOWbHkYKeHPPHWVGD4ikZo,15984
|
7
|
+
dataeval/_internal/detectors/drift/cvm.py,sha256=xiyZlf0rAQGG8Z6ZBLPVri805aPRkERrUySwRN8cTZQ,4010
|
8
|
+
dataeval/_internal/detectors/drift/ks.py,sha256=aoDx7ps-5vrSI8Q9ii6cwmKnAyaD8tjG69wI-7R3MVQ,4098
|
9
|
+
dataeval/_internal/detectors/drift/mmd.py,sha256=ztQSdSlpD66z9xFKqvNo3QHR1vEvf6X-m0LvxNckQgc,7517
|
10
|
+
dataeval/_internal/detectors/drift/torch.py,sha256=YhIN85MbUV3C4IJcRvqYdXSWLj5lUeEOb05T5DgB3xo,11552
|
11
|
+
dataeval/_internal/detectors/drift/uncertainty.py,sha256=Ot8L42AnFbkij4J3Tis7VzXLv3hfBxoOWBP4UoCEnVs,5125
|
12
|
+
dataeval/_internal/detectors/duplicates.py,sha256=wggaIl3uFxihNBQhPv5JcreZbhVaFKoMAJMv_9-aaHU,5324
|
13
|
+
dataeval/_internal/detectors/merged_stats.py,sha256=okXGrqAgsqfANMxfIjiUQlZWlaIh5TVvIB9UPsOJZ7k,1351
|
14
|
+
dataeval/_internal/detectors/ood/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
dataeval/_internal/detectors/ood/ae.py,sha256=AIffh11UKZX-3oocDDp8-t-uDUm8aQKvrV0nIE3BLQo,2140
|
16
|
+
dataeval/_internal/detectors/ood/aegmm.py,sha256=q2kRXZM5X0PoA10mRsi8Gh-W5FdFzEsnM1yDq3GFzn0,2518
|
17
|
+
dataeval/_internal/detectors/ood/base.py,sha256=qYSmcN74x5-qL0_I7uNo4eQ8X8pr7M4cwjs2qvkJt5g,8561
|
18
|
+
dataeval/_internal/detectors/ood/llr.py,sha256=VgZtMrMgI8zDVUzsqm2l4tqsULFIhdQeDd4lzdo_G7U,10217
|
19
|
+
dataeval/_internal/detectors/ood/vae.py,sha256=iXEltu5pATWr42-28hZ3ARZavJrptLwUM5P9c8omA_s,3030
|
20
|
+
dataeval/_internal/detectors/ood/vaegmm.py,sha256=ujp6UN0wpZcmPDPkVfTHZxgka5kuTOSzgXMmbKdmK2U,2947
|
21
|
+
dataeval/_internal/detectors/outliers.py,sha256=JmAXoMO0Od7tc9RVFGJsDyOnByciPFG5FdS54Iu0BII,10396
|
22
|
+
dataeval/_internal/interop.py,sha256=FLXJY-5hwJcKCtruyvaarqynXCMfcLbQSFvGnrWQDPo,1338
|
23
|
+
dataeval/_internal/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
|
+
dataeval/_internal/metrics/balance.py,sha256=8KwuR5HvytJtS1YW9KkNrCu2dPn_gP4FSbXrxQ-9kK8,6182
|
25
|
+
dataeval/_internal/metrics/ber.py,sha256=RLRC3ouKYCLYJsA0sqS1gLjE_WFLq7dHElRfVVabvWA,4719
|
26
|
+
dataeval/_internal/metrics/coverage.py,sha256=jxvzWpVQrfmN3S9rpXvyb35vpRn1ovV2IdXdB6aU2-w,3560
|
27
|
+
dataeval/_internal/metrics/divergence.py,sha256=gKQt9rxmhW8RnODCoLgFSPnCUWEMjmNIPlCV2w6E6oU,4211
|
28
|
+
dataeval/_internal/metrics/diversity.py,sha256=ZSlq1KBvkMRVAvlznils2QmlPC73TTpHs1ux7PoFrio,7664
|
29
|
+
dataeval/_internal/metrics/parity.py,sha256=eTjltNBJOTFH6T_t9V9-1EFr_U0vqlU642o3x2RWgz0,16527
|
30
|
+
dataeval/_internal/metrics/stats/base.py,sha256=HyjgHTQZqgkkCWDzOF-aNZBr88IAjnao8VSbHC5ZtbI,8554
|
31
|
+
dataeval/_internal/metrics/stats/boxratiostats.py,sha256=Ac6nB41q43xHCJRDEXHNgsJF80VE8MpH8_kySxA84BE,6342
|
32
|
+
dataeval/_internal/metrics/stats/datasetstats.py,sha256=6DFl3TE7t2ggDD8WBVgPH7F2bRvae7NR2PVoEWL92dw,3759
|
33
|
+
dataeval/_internal/metrics/stats/dimensionstats.py,sha256=MUQJgrWmRoQFap7gPf8vTFXJ_z7G7bAQpZ7kCPRtNkA,3847
|
34
|
+
dataeval/_internal/metrics/stats/hashstats.py,sha256=xH0k_wOeGO5UC7-0fhAIg4WV2fO8fnF0Jdn18gYhW88,2087
|
35
|
+
dataeval/_internal/metrics/stats/labelstats.py,sha256=BNxI2flvKhSps2o4-TPbN9nf52ctatI2SuDZ07hah5E,4058
|
36
|
+
dataeval/_internal/metrics/stats/pixelstats.py,sha256=LxoDQ6afsNuzB0WnOgmzkEUV7s534MrAYkzS6Be7PPQ,4419
|
37
|
+
dataeval/_internal/metrics/stats/visualstats.py,sha256=3uET0N3WgV5dcxst8Xb9DhcATiNfAXsx1OKbPz2mU4Q,4712
|
38
|
+
dataeval/_internal/metrics/uap.py,sha256=RumSQey6vNoz9CtOG2_Inb-TurKJrAHqwhkyWBirxhk,2128
|
39
|
+
dataeval/_internal/metrics/utils.py,sha256=vW3mQHjF0AvYlml27X5dZgd0YBk3zyBvvztLEfdRkvI,13475
|
40
|
+
dataeval/_internal/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
41
|
+
dataeval/_internal/models/pytorch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
42
|
+
dataeval/_internal/models/pytorch/autoencoder.py,sha256=nPyLjLZrPNla-lMnym3fUW-O1F00JbIrVUrUFU4C4UQ,8364
|
43
|
+
dataeval/_internal/models/pytorch/blocks.py,sha256=pm2xwsDZjZJYXrhhiz8husvh2vHmrkFMSYEn-EDUD5Q,1354
|
44
|
+
dataeval/_internal/models/pytorch/utils.py,sha256=Qgwym1PxGuwxbXCKUT-8r6Iyrxqm7x94oj45Vf5_CjE,1675
|
45
|
+
dataeval/_internal/models/tensorflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
46
|
+
dataeval/_internal/models/tensorflow/autoencoder.py,sha256=Ryn11jDbpZJOM5De-kMGPdbJBQEdwip6B20ajS8HqpE,10354
|
47
|
+
dataeval/_internal/models/tensorflow/gmm.py,sha256=QoEgbeax1GETqRmUF7A2ih9uFOZfFAjGzgH2ljExlAc,3669
|
48
|
+
dataeval/_internal/models/tensorflow/losses.py,sha256=pZH5RnlM9R0RrBde9Lgq32muwAp7_PWc56Mu4u8RVvo,3976
|
49
|
+
dataeval/_internal/models/tensorflow/pixelcnn.py,sha256=keI1gTNjBk18YD91Cp4exfuGYWU9lt-wapvhSazhcVs,48319
|
50
|
+
dataeval/_internal/models/tensorflow/trainer.py,sha256=xNY0Iw7Qa1TnCuy9N1b77_VduFoW_BhbZjfQCxOVby4,4082
|
51
|
+
dataeval/_internal/models/tensorflow/utils.py,sha256=l6jXKMWyQAEI4LpAONq95Xwr7CPgrs408ypf9TuNxkY,8732
|
52
|
+
dataeval/_internal/output.py,sha256=qVbOi41dvfQICQ4uxysHPWBRKo1XR61kXHPL_vKOPm0,2545
|
53
|
+
dataeval/_internal/utils.py,sha256=jo6bGJZAgyuZqRpAAC4gwhAHYE12316na19ZuFwMqes,1504
|
54
|
+
dataeval/_internal/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55
|
+
dataeval/_internal/workflows/sufficiency.py,sha256=5N07nV5Oi4kUCm_3rJoj0SeIy1iRC3LciEgrR7E9m7Q,18240
|
56
|
+
dataeval/detectors/__init__.py,sha256=8nJk2U99_SQD7RtEKjyS4WJct8gX1HgjEm4oHTnRhTI,320
|
57
|
+
dataeval/detectors/drift/__init__.py,sha256=9TbJok7fH3mcXcM7c2vT3WZnZr2wanY_8TUwu-8JX58,800
|
58
|
+
dataeval/detectors/drift/kernels/__init__.py,sha256=djIbmvYoHWpWxfdYtiouEC2KqzvgmtEqlg1i5p-UCgM,266
|
59
|
+
dataeval/detectors/drift/updates/__init__.py,sha256=tiYSA1-AsTiFgC3LuxM8iYFsWUX0Fr8hElzWvU8ovig,267
|
60
|
+
dataeval/detectors/linters/__init__.py,sha256=m5F5JgGBcqGb3J_qXQ3PBkKyePjOklrYbM9dGUsgxFA,489
|
61
|
+
dataeval/detectors/ood/__init__.py,sha256=K5QrSJg2QePs6Pa3Cg80ZwXu7BELLrSlbEpTdxuL3Ys,777
|
62
|
+
dataeval/metrics/__init__.py,sha256=U0sRw5eiqeeDLbLPxT_rznZsvtNwONVxKVwfC0qVOgo,223
|
63
|
+
dataeval/metrics/bias/__init__.py,sha256=Wn1Ui_g-9cR4c4IS7RFKJ6UH5DLXKjEBoXTuEYPXSBc,619
|
64
|
+
dataeval/metrics/estimators/__init__.py,sha256=4VFMKLPsJdaWiflf84bXGQ2k8ertFQ4WEPhyWqjFFvE,377
|
65
|
+
dataeval/metrics/stats/__init__.py,sha256=UcD41gFwFhcQMtqwWkPQlg6cFA2_gdj6yGRCDrKYXM8,1055
|
66
|
+
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
67
|
+
dataeval/utils/__init__.py,sha256=cW_5-DIZG2OFRs3FVLOz0uCv4JWdaoVO7C9rOlR7ZEA,526
|
68
|
+
dataeval/utils/tensorflow/__init__.py,sha256=XgjqrMtI00ERPPpdokbO1lDyc_H3CZ1TTqUXIj0B6PI,435
|
69
|
+
dataeval/utils/tensorflow/loss/__init__.py,sha256=E9eB87LNh0o5nUCqssB027EXBsOfEayNHPcNW0QGFdA,101
|
70
|
+
dataeval/utils/tensorflow/models/__init__.py,sha256=OVpmHF8itDcgOlfw6N9jr7IphZPbMJoiu7OdqYhU9fs,291
|
71
|
+
dataeval/utils/tensorflow/recon/__init__.py,sha256=xe6gAQqK9tyAoDQTtaJAxIPK1humt5QzsG_9NPsqx58,116
|
72
|
+
dataeval/utils/torch/__init__.py,sha256=bYUm-nNlNVU3bqDz7dQHFmaRWgLy3lLrD4cSDumDlxQ,373
|
73
|
+
dataeval/utils/torch/datasets/__init__.py,sha256=S6C4OaxEjJJaIpHSZcZfkl4U5iS5YtZ9N5GYHqvbzvM,191
|
74
|
+
dataeval/utils/torch/models/__init__.py,sha256=YnDnePYpRIKHyYn3F5qR1OObMSb-g0FGvI8X-uTB09E,162
|
75
|
+
dataeval/utils/torch/trainer/__init__.py,sha256=Te-qElt8h-Zv8NN0r-VJOEdCPHTQ2yO3rd2MhRiZGZs,93
|
76
|
+
dataeval/workflows/__init__.py,sha256=VFeJyMhZxvj8WnU5Un32mwO8lNfBQOBjD9IdOqexnAE,320
|
77
|
+
dataeval-0.70.1.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
|
78
|
+
dataeval-0.70.1.dist-info/METADATA,sha256=B2slR1eY_xRR4QcUTpV8EJh5Z_plWmHFqTT5j4r2Vvk,4502
|
79
|
+
dataeval-0.70.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
80
|
+
dataeval-0.70.1.dist-info/RECORD,,
|
dataeval/tensorflow/__init__.py
DELETED
dataeval/torch/__init__.py
DELETED