dataeval 0.69.4__py3-none-any.whl → 0.70.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +3 -3
- dataeval/_internal/detectors/drift/base.py +5 -6
- dataeval/_internal/detectors/drift/mmd.py +3 -3
- dataeval/_internal/detectors/duplicates.py +62 -45
- dataeval/_internal/detectors/merged_stats.py +23 -54
- dataeval/_internal/detectors/ood/ae.py +3 -3
- dataeval/_internal/detectors/outliers.py +133 -61
- dataeval/_internal/interop.py +11 -7
- dataeval/_internal/metrics/balance.py +9 -9
- dataeval/_internal/metrics/ber.py +3 -3
- dataeval/_internal/metrics/divergence.py +3 -3
- dataeval/_internal/metrics/diversity.py +6 -6
- dataeval/_internal/metrics/parity.py +24 -16
- dataeval/_internal/metrics/stats/base.py +231 -0
- dataeval/_internal/metrics/stats/boxratiostats.py +159 -0
- dataeval/_internal/metrics/stats/datasetstats.py +97 -0
- dataeval/_internal/metrics/stats/dimensionstats.py +111 -0
- dataeval/_internal/metrics/stats/hashstats.py +73 -0
- dataeval/_internal/metrics/stats/labelstats.py +125 -0
- dataeval/_internal/metrics/stats/pixelstats.py +117 -0
- dataeval/_internal/metrics/stats/visualstats.py +122 -0
- dataeval/_internal/metrics/uap.py +2 -2
- dataeval/_internal/metrics/utils.py +28 -13
- dataeval/_internal/output.py +3 -18
- dataeval/_internal/workflows/sufficiency.py +123 -133
- dataeval/metrics/stats/__init__.py +14 -3
- dataeval/workflows/__init__.py +2 -2
- {dataeval-0.69.4.dist-info → dataeval-0.70.0.dist-info}/METADATA +3 -3
- {dataeval-0.69.4.dist-info → dataeval-0.70.0.dist-info}/RECORD +31 -26
- {dataeval-0.69.4.dist-info → dataeval-0.70.0.dist-info}/WHEEL +1 -1
- dataeval/_internal/flags.py +0 -77
- dataeval/_internal/metrics/stats.py +0 -397
- dataeval/flags/__init__.py +0 -3
- {dataeval-0.69.4.dist-info → dataeval-0.70.0.dist-info}/LICENSE.txt +0 -0
@@ -1,10 +1,10 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import Any, Callable, Literal,
|
3
|
+
from typing import Any, Callable, Literal, Mapping, NamedTuple
|
4
4
|
|
5
5
|
import numpy as np
|
6
6
|
import xxhash as xxh
|
7
|
-
from numpy.typing import NDArray
|
7
|
+
from numpy.typing import ArrayLike, NDArray
|
8
8
|
from PIL import Image
|
9
9
|
from scipy.fftpack import dct
|
10
10
|
from scipy.signal import convolve2d
|
@@ -14,6 +14,8 @@ from scipy.spatial.distance import pdist, squareform
|
|
14
14
|
from scipy.stats import entropy as sp_entropy
|
15
15
|
from sklearn.neighbors import NearestNeighbors
|
16
16
|
|
17
|
+
from dataeval._internal.interop import to_numpy
|
18
|
+
|
17
19
|
EPSILON = 1e-5
|
18
20
|
EDGE_KERNEL = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype=np.int8)
|
19
21
|
BIT_DEPTH = (1, 8, 12, 16, 32)
|
@@ -162,26 +164,26 @@ def infer_categorical(X: NDArray, threshold: float = 0.2) -> NDArray:
|
|
162
164
|
|
163
165
|
|
164
166
|
def preprocess_metadata(
|
165
|
-
class_labels:
|
167
|
+
class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], cat_thresh: float = 0.2
|
166
168
|
) -> tuple[NDArray, list[str], list[bool]]:
|
167
|
-
# convert class_labels and
|
168
|
-
|
169
|
-
"class_label": np.asarray(class_labels, dtype=int),
|
170
|
-
**{k: np.array([d[k] for d in metadata]) for k in metadata[0]},
|
171
|
-
}
|
169
|
+
# convert class_labels and dict of lists to matrix of metadata values
|
170
|
+
preprocessed_metadata = {"class_label": np.asarray(class_labels, dtype=int)}
|
172
171
|
|
173
172
|
# map columns of dict that are not numeric (e.g. string) to numeric values
|
174
173
|
# that mutual information and diversity functions can accommodate. Each
|
175
174
|
# unique string receives a unique integer value.
|
176
|
-
for k, v in
|
175
|
+
for k, v in metadata.items():
|
177
176
|
# if not numeric
|
177
|
+
v = to_numpy(v)
|
178
178
|
if not np.issubdtype(v.dtype, np.number):
|
179
179
|
_, mapped_vals = np.unique(v, return_inverse=True)
|
180
|
-
|
180
|
+
preprocessed_metadata[k] = mapped_vals
|
181
|
+
else:
|
182
|
+
preprocessed_metadata[k] = v
|
181
183
|
|
182
|
-
data = np.stack(list(
|
183
|
-
names = list(
|
184
|
-
is_categorical = [infer_categorical(
|
184
|
+
data = np.stack(list(preprocessed_metadata.values()), axis=-1)
|
185
|
+
names = list(preprocessed_metadata.keys())
|
186
|
+
is_categorical = [infer_categorical(preprocessed_metadata[var], cat_thresh)[0] for var in names]
|
185
187
|
|
186
188
|
return data, names, is_categorical
|
187
189
|
|
@@ -350,6 +352,19 @@ def normalize_image_shape(image: NDArray) -> NDArray:
|
|
350
352
|
raise ValueError("Images must have 2 or more dimensions.")
|
351
353
|
|
352
354
|
|
355
|
+
def normalize_box_shape(bounding_box: NDArray) -> NDArray:
|
356
|
+
"""
|
357
|
+
Normalizes the bounding box shape into (N,4).
|
358
|
+
"""
|
359
|
+
ndim = bounding_box.ndim
|
360
|
+
if ndim == 1:
|
361
|
+
return np.expand_dims(bounding_box, axis=0)
|
362
|
+
elif ndim > 2:
|
363
|
+
raise ValueError("Bounding boxes must have 2 dimensions: (# of boxes in an image, [X,Y,W,H]) -> (N,4)")
|
364
|
+
else:
|
365
|
+
return bounding_box
|
366
|
+
|
367
|
+
|
353
368
|
def edge_filter(image: NDArray, offset: float = 0.5) -> NDArray:
|
354
369
|
"""
|
355
370
|
Returns the image filtered using a 3x3 edge detection kernel:
|
dataeval/_internal/output.py
CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
import inspect
|
4
4
|
from datetime import datetime, timezone
|
5
5
|
from functools import wraps
|
6
|
+
from typing import Any
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
|
@@ -17,10 +18,10 @@ class OutputMetadata:
|
|
17
18
|
_state: dict[str, str]
|
18
19
|
_version: str
|
19
20
|
|
20
|
-
def dict(self) -> dict:
|
21
|
+
def dict(self) -> dict[str, Any]:
|
21
22
|
return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
|
22
23
|
|
23
|
-
def meta(self) -> dict:
|
24
|
+
def meta(self) -> dict[str, Any]:
|
24
25
|
return {k.removeprefix("_"): v for k, v in self.__dict__.items() if k.startswith("_")}
|
25
26
|
|
26
27
|
|
@@ -67,19 +68,3 @@ def set_metadata(module_name: str = "", state_attr: list[str] | None = None):
|
|
67
68
|
return wrapper
|
68
69
|
|
69
70
|
return decorator
|
70
|
-
|
71
|
-
|
72
|
-
def populate_defaults(d: dict, c: type) -> dict:
|
73
|
-
def default(t):
|
74
|
-
t = (
|
75
|
-
t if isinstance(t, str) else t._name if hasattr(t, "_name") else t.__name__
|
76
|
-
).lower() # py3.9 : _name, py3.10 : __name__
|
77
|
-
if t.startswith("dict"):
|
78
|
-
return {}
|
79
|
-
if t.startswith("list"):
|
80
|
-
return []
|
81
|
-
if t.startswith("ndarray"):
|
82
|
-
return np.array([])
|
83
|
-
raise TypeError("Unrecognized annotation type")
|
84
|
-
|
85
|
-
return {k: d[k] if k in d else default(t) for k, t in c.__annotations__.items()}
|
@@ -42,6 +42,129 @@ class SufficiencyOutput(OutputMetadata):
|
|
42
42
|
if c != c_v:
|
43
43
|
raise ValueError(f"{m} does not contain the expected number ({c}) of data points.")
|
44
44
|
|
45
|
+
@set_metadata("dataeval.workflows.SufficiencyOutput")
|
46
|
+
def project(
|
47
|
+
self,
|
48
|
+
projection: int | Sequence[int] | NDArray[np.uint],
|
49
|
+
) -> SufficiencyOutput:
|
50
|
+
"""Projects the measures for each value of X
|
51
|
+
|
52
|
+
Parameters
|
53
|
+
----------
|
54
|
+
projection : int | Sequence[int] | NDArray[np.uint]
|
55
|
+
Step or steps to project
|
56
|
+
|
57
|
+
Returns
|
58
|
+
-------
|
59
|
+
SufficiencyOutput
|
60
|
+
Dataclass containing the projected measures per projection
|
61
|
+
|
62
|
+
Raises
|
63
|
+
------
|
64
|
+
ValueError
|
65
|
+
If the length of data points in the measures do not match
|
66
|
+
If the steps are not int, Sequence[int] or an ndarray
|
67
|
+
"""
|
68
|
+
projection = [projection] if isinstance(projection, int) else projection
|
69
|
+
projection = np.array(projection) if isinstance(projection, Sequence) else projection
|
70
|
+
if not isinstance(projection, np.ndarray):
|
71
|
+
raise ValueError("'steps' must be an int, Sequence[int] or ndarray")
|
72
|
+
|
73
|
+
output = {}
|
74
|
+
for name, measures in self.measures.items():
|
75
|
+
if measures.ndim > 1:
|
76
|
+
result = []
|
77
|
+
for i in range(len(measures)):
|
78
|
+
projected = project_steps(self.params[name][i], projection)
|
79
|
+
result.append(projected)
|
80
|
+
output[name] = np.array(result)
|
81
|
+
else:
|
82
|
+
output[name] = project_steps(self.params[name], projection)
|
83
|
+
return SufficiencyOutput(projection, self.params, output)
|
84
|
+
|
85
|
+
def plot(self, class_names: Sequence[str] | None = None) -> list[Figure]:
|
86
|
+
"""Plotting function for data sufficiency tasks
|
87
|
+
|
88
|
+
Parameters
|
89
|
+
----------
|
90
|
+
class_names : Sequence[str] | None, default None
|
91
|
+
List of class names
|
92
|
+
|
93
|
+
Returns
|
94
|
+
-------
|
95
|
+
List[plt.Figure]
|
96
|
+
List of Figures for each measure
|
97
|
+
|
98
|
+
Raises
|
99
|
+
------
|
100
|
+
ValueError
|
101
|
+
If the length of data points in the measures do not match
|
102
|
+
"""
|
103
|
+
# Extrapolation parameters
|
104
|
+
last_X = self.steps[-1]
|
105
|
+
geomshape = (0.01 * last_X, last_X * 4, len(self.steps))
|
106
|
+
extrapolated = np.geomspace(*geomshape).astype(np.int64)
|
107
|
+
|
108
|
+
# Stores all plots
|
109
|
+
plots = []
|
110
|
+
|
111
|
+
# Create a plot for each measure on one figure
|
112
|
+
for name, measures in self.measures.items():
|
113
|
+
if measures.ndim > 1:
|
114
|
+
if class_names is not None and len(measures) != len(class_names):
|
115
|
+
raise IndexError("Class name count does not align with measures")
|
116
|
+
for i, measure in enumerate(measures):
|
117
|
+
class_name = str(i) if class_names is None else class_names[i]
|
118
|
+
fig = plot_measure(
|
119
|
+
f"{name}_{class_name}",
|
120
|
+
self.steps,
|
121
|
+
measure,
|
122
|
+
self.params[name][i],
|
123
|
+
extrapolated,
|
124
|
+
)
|
125
|
+
plots.append(fig)
|
126
|
+
|
127
|
+
else:
|
128
|
+
fig = plot_measure(name, self.steps, measures, self.params[name], extrapolated)
|
129
|
+
plots.append(fig)
|
130
|
+
|
131
|
+
return plots
|
132
|
+
|
133
|
+
def inv_project(self, targets: dict[str, NDArray]) -> dict[str, NDArray]:
|
134
|
+
"""
|
135
|
+
Calculate training samples needed to achieve target model metric values.
|
136
|
+
|
137
|
+
Parameters
|
138
|
+
----------
|
139
|
+
targets : Dict[str, NDArray]
|
140
|
+
Dictionary of target metric scores (from 0.0 to 1.0) that we want
|
141
|
+
to achieve, where the key is the name of the metric.
|
142
|
+
|
143
|
+
Returns
|
144
|
+
-------
|
145
|
+
Dict[str, NDArray]
|
146
|
+
List of the number of training samples needed to achieve each
|
147
|
+
corresponding entry in targets
|
148
|
+
"""
|
149
|
+
|
150
|
+
projection = {}
|
151
|
+
|
152
|
+
for name, target in targets.items():
|
153
|
+
if name not in self.measures:
|
154
|
+
continue
|
155
|
+
|
156
|
+
measure = self.measures[name]
|
157
|
+
if measure.ndim > 1:
|
158
|
+
projection[name] = np.zeros((len(measure), len(target)))
|
159
|
+
for i in range(len(measure)):
|
160
|
+
projection[name][i] = inv_project_steps(
|
161
|
+
self.params[name][i], target[i] if target.ndim == measure.ndim else target
|
162
|
+
)
|
163
|
+
else:
|
164
|
+
projection[name] = inv_project_steps(self.params[name], target)
|
165
|
+
|
166
|
+
return projection
|
167
|
+
|
45
168
|
|
46
169
|
def f_out(n_i: NDArray, x: NDArray) -> NDArray:
|
47
170
|
"""
|
@@ -421,136 +544,3 @@ class Sufficiency:
|
|
421
544
|
measures = {k: (v / self.runs).T for k, v in measures.items()}
|
422
545
|
params_output = get_curve_params(measures, ranges, niter)
|
423
546
|
return SufficiencyOutput(ranges, params_output, measures)
|
424
|
-
|
425
|
-
@classmethod
|
426
|
-
def project(
|
427
|
-
cls,
|
428
|
-
data: SufficiencyOutput,
|
429
|
-
projection: int | Sequence[int] | NDArray[np.uint],
|
430
|
-
) -> SufficiencyOutput:
|
431
|
-
"""Projects the measures for each value of X
|
432
|
-
|
433
|
-
Parameters
|
434
|
-
----------
|
435
|
-
data : SufficiencyOutput
|
436
|
-
Dataclass containing the average of each measure per substep
|
437
|
-
projection : int | Sequence[int] | NDArray[np.uint]
|
438
|
-
Step or steps to project
|
439
|
-
|
440
|
-
Returns
|
441
|
-
-------
|
442
|
-
SufficiencyOutput
|
443
|
-
Dataclass containing the projected measures per projection
|
444
|
-
|
445
|
-
Raises
|
446
|
-
------
|
447
|
-
ValueError
|
448
|
-
If the length of data points in the measures do not match
|
449
|
-
If the steps are not int, Sequence[int] or an ndarray
|
450
|
-
"""
|
451
|
-
projection = [projection] if isinstance(projection, int) else projection
|
452
|
-
projection = np.array(projection) if isinstance(projection, Sequence) else projection
|
453
|
-
if not isinstance(projection, np.ndarray):
|
454
|
-
raise ValueError("'steps' must be an int, Sequence[int] or ndarray")
|
455
|
-
|
456
|
-
output = {}
|
457
|
-
for name, measures in data.measures.items():
|
458
|
-
if measures.ndim > 1:
|
459
|
-
result = []
|
460
|
-
for i in range(len(measures)):
|
461
|
-
projected = project_steps(data.params[name][i], projection)
|
462
|
-
result.append(projected)
|
463
|
-
output[name] = np.array(result)
|
464
|
-
else:
|
465
|
-
output[name] = project_steps(data.params[name], projection)
|
466
|
-
return SufficiencyOutput(projection, data.params, output)
|
467
|
-
|
468
|
-
@classmethod
|
469
|
-
def plot(cls, data: SufficiencyOutput, class_names: Sequence[str] | None = None) -> list[Figure]:
|
470
|
-
"""Plotting function for data sufficiency tasks
|
471
|
-
|
472
|
-
Parameters
|
473
|
-
----------
|
474
|
-
data : SufficiencyOutput
|
475
|
-
Dataclass containing the average of each measure per substep
|
476
|
-
class_names : Sequence[str] | None, default None
|
477
|
-
List of class names
|
478
|
-
|
479
|
-
Returns
|
480
|
-
-------
|
481
|
-
List[plt.Figure]
|
482
|
-
List of Figures for each measure
|
483
|
-
|
484
|
-
Raises
|
485
|
-
------
|
486
|
-
ValueError
|
487
|
-
If the length of data points in the measures do not match
|
488
|
-
"""
|
489
|
-
# Extrapolation parameters
|
490
|
-
last_X = data.steps[-1]
|
491
|
-
geomshape = (0.01 * last_X, last_X * 4, len(data.steps))
|
492
|
-
extrapolated = np.geomspace(*geomshape).astype(np.int64)
|
493
|
-
|
494
|
-
# Stores all plots
|
495
|
-
plots = []
|
496
|
-
|
497
|
-
# Create a plot for each measure on one figure
|
498
|
-
for name, measures in data.measures.items():
|
499
|
-
if measures.ndim > 1:
|
500
|
-
if class_names is not None and len(measures) != len(class_names):
|
501
|
-
raise IndexError("Class name count does not align with measures")
|
502
|
-
for i, measure in enumerate(measures):
|
503
|
-
class_name = str(i) if class_names is None else class_names[i]
|
504
|
-
fig = plot_measure(
|
505
|
-
f"{name}_{class_name}",
|
506
|
-
data.steps,
|
507
|
-
measure,
|
508
|
-
data.params[name][i],
|
509
|
-
extrapolated,
|
510
|
-
)
|
511
|
-
plots.append(fig)
|
512
|
-
|
513
|
-
else:
|
514
|
-
fig = plot_measure(name, data.steps, measures, data.params[name], extrapolated)
|
515
|
-
plots.append(fig)
|
516
|
-
|
517
|
-
return plots
|
518
|
-
|
519
|
-
@classmethod
|
520
|
-
def inv_project(cls, targets: dict[str, NDArray], data: SufficiencyOutput) -> dict[str, NDArray]:
|
521
|
-
"""
|
522
|
-
Calculate training samples needed to achieve target model metric values.
|
523
|
-
|
524
|
-
Parameters
|
525
|
-
----------
|
526
|
-
targets : Dict[str, NDArray]
|
527
|
-
Dictionary of target metric scores (from 0.0 to 1.0) that we want
|
528
|
-
to achieve, where the key is the name of the metric.
|
529
|
-
|
530
|
-
data : SufficiencyOutput
|
531
|
-
Dataclass containing the average of each measure per substep
|
532
|
-
|
533
|
-
Returns
|
534
|
-
-------
|
535
|
-
Dict[str, NDArray]
|
536
|
-
List of the number of training samples needed to achieve each
|
537
|
-
corresponding entry in targets
|
538
|
-
"""
|
539
|
-
|
540
|
-
projection = {}
|
541
|
-
|
542
|
-
for name, target in targets.items():
|
543
|
-
if name not in data.measures:
|
544
|
-
continue
|
545
|
-
|
546
|
-
measure = data.measures[name]
|
547
|
-
if measure.ndim > 1:
|
548
|
-
projection[name] = np.zeros((len(measure), len(target)))
|
549
|
-
for i in range(len(measure)):
|
550
|
-
projection[name][i] = inv_project_steps(
|
551
|
-
data.params[name][i], target[i] if target.ndim == measure.ndim else target
|
552
|
-
)
|
553
|
-
else:
|
554
|
-
projection[name] = inv_project_steps(data.params[name], target)
|
555
|
-
|
556
|
-
return projection
|
@@ -1,6 +1,17 @@
|
|
1
|
-
from dataeval._internal.metrics.stats import
|
1
|
+
from dataeval._internal.metrics.stats.boxratiostats import boxratiostats
|
2
|
+
from dataeval._internal.metrics.stats.datasetstats import datasetstats
|
3
|
+
from dataeval._internal.metrics.stats.dimensionstats import dimensionstats
|
4
|
+
from dataeval._internal.metrics.stats.hashstats import hashstats
|
5
|
+
from dataeval._internal.metrics.stats.labelstats import labelstats
|
6
|
+
from dataeval._internal.metrics.stats.pixelstats import pixelstats
|
7
|
+
from dataeval._internal.metrics.stats.visualstats import visualstats
|
2
8
|
|
3
9
|
__all__ = [
|
4
|
-
"
|
5
|
-
"
|
10
|
+
"boxratiostats",
|
11
|
+
"datasetstats",
|
12
|
+
"dimensionstats",
|
13
|
+
"hashstats",
|
14
|
+
"labelstats",
|
15
|
+
"pixelstats",
|
16
|
+
"visualstats",
|
6
17
|
]
|
dataeval/workflows/__init__.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from dataeval import _IS_TORCH_AVAILABLE
|
2
2
|
|
3
3
|
if _IS_TORCH_AVAILABLE: # pragma: no cover
|
4
|
-
from dataeval._internal.workflows.sufficiency import Sufficiency
|
4
|
+
from dataeval._internal.workflows.sufficiency import Sufficiency, SufficiencyOutput
|
5
5
|
|
6
|
-
__all__ = ["Sufficiency"]
|
6
|
+
__all__ = ["Sufficiency", "SufficiencyOutput"]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.70.0
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Home-page: https://dataeval.ai/
|
6
6
|
License: MIT
|
@@ -32,8 +32,8 @@ Requires-Dist: scipy (>=1.10)
|
|
32
32
|
Requires-Dist: tensorflow (>=2.14.1,<2.16) ; extra == "tensorflow" or extra == "all"
|
33
33
|
Requires-Dist: tensorflow-io-gcs-filesystem (>=0.35.0,<0.37) ; extra == "tensorflow" or extra == "all"
|
34
34
|
Requires-Dist: tensorflow_probability (>=0.22.1,<0.24) ; extra == "tensorflow" or extra == "all"
|
35
|
-
Requires-Dist: torch (>=2.
|
36
|
-
Requires-Dist: torchvision (>=0.
|
35
|
+
Requires-Dist: torch (>=2.2.0) ; extra == "torch" or extra == "all"
|
36
|
+
Requires-Dist: torchvision (>=0.17.0) ; extra == "torch" or extra == "all"
|
37
37
|
Requires-Dist: xxhash (>=3.3)
|
38
38
|
Project-URL: Documentation, https://dataeval.readthedocs.io/
|
39
39
|
Project-URL: Repository, https://github.com/aria-ml/dataeval/
|
@@ -1,36 +1,42 @@
|
|
1
|
-
dataeval/__init__.py,sha256=
|
1
|
+
dataeval/__init__.py,sha256=rWQRN8WyjzjUUZhNkCFFySptSFPM4f67tabVBDly84k,574
|
2
2
|
dataeval/_internal/datasets.py,sha256=MwN6xgZW1cA5yIxXZ05qBBz4aO3bjKzIEbZZfa1HkQo,9790
|
3
3
|
dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
dataeval/_internal/detectors/clusterer.py,sha256=hJwELUeAdZZ3OVLIfwalw2P7Zz13q2ZqrV6gx90s44E,20695
|
5
5
|
dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
dataeval/_internal/detectors/drift/base.py,sha256=
|
6
|
+
dataeval/_internal/detectors/drift/base.py,sha256=6L66aGWUGAbWylT_aHmZUSrvx6wM6Fzzlcie98KdNiY,15900
|
7
7
|
dataeval/_internal/detectors/drift/cvm.py,sha256=xiyZlf0rAQGG8Z6ZBLPVri805aPRkERrUySwRN8cTZQ,4010
|
8
8
|
dataeval/_internal/detectors/drift/ks.py,sha256=aoDx7ps-5vrSI8Q9ii6cwmKnAyaD8tjG69wI-7R3MVQ,4098
|
9
|
-
dataeval/_internal/detectors/drift/mmd.py,sha256=
|
9
|
+
dataeval/_internal/detectors/drift/mmd.py,sha256=0TD0BpIJkwdjU0i3ndlvYp1ItCNrvSO7gT8r4bEdHXc,7493
|
10
10
|
dataeval/_internal/detectors/drift/torch.py,sha256=YhIN85MbUV3C4IJcRvqYdXSWLj5lUeEOb05T5DgB3xo,11552
|
11
11
|
dataeval/_internal/detectors/drift/uncertainty.py,sha256=Ot8L42AnFbkij4J3Tis7VzXLv3hfBxoOWBP4UoCEnVs,5125
|
12
|
-
dataeval/_internal/detectors/duplicates.py,sha256=
|
13
|
-
dataeval/_internal/detectors/merged_stats.py,sha256=
|
12
|
+
dataeval/_internal/detectors/duplicates.py,sha256=VLDEhXWhdNyU3aA6S7dQmCBDAz0uQY5E_RjJYE1wkcw,5268
|
13
|
+
dataeval/_internal/detectors/merged_stats.py,sha256=okXGrqAgsqfANMxfIjiUQlZWlaIh5TVvIB9UPsOJZ7k,1351
|
14
14
|
dataeval/_internal/detectors/ood/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
-
dataeval/_internal/detectors/ood/ae.py,sha256
|
15
|
+
dataeval/_internal/detectors/ood/ae.py,sha256=-nFw3taJ2IWU74o8Yks48J2x7aBoxXeL1vP_Ye_60M4,2031
|
16
16
|
dataeval/_internal/detectors/ood/aegmm.py,sha256=pffThqXRoLx3GuZXEQBd-xEy5DjAZHV7WSeP2HgM_TI,2403
|
17
17
|
dataeval/_internal/detectors/ood/base.py,sha256=Pw34uFEWOJZiG4ciM0ArUkqhiM8WCGl2rc0BwFPu3xM,8240
|
18
18
|
dataeval/_internal/detectors/ood/llr.py,sha256=tCo8G7V8VaVuIZ09rg0ZXZmdE0N_zGm7vCfFUnGbGvo,10102
|
19
19
|
dataeval/_internal/detectors/ood/vae.py,sha256=WbQugS-bBUTTqQ9PRLHBmSUtk7O2_PN4PBLJE9ieMjw,2921
|
20
20
|
dataeval/_internal/detectors/ood/vaegmm.py,sha256=pVUSlVF2jo8uokyks2QzfBJnNtcFWmcF8EQl-azs2Bg,2832
|
21
|
-
dataeval/_internal/detectors/outliers.py,sha256=
|
22
|
-
dataeval/_internal/
|
23
|
-
dataeval/_internal/interop.py,sha256=x4qj4EiBt5NthSxe8prSLrPDAEcipAdyyLwbNyCBaFk,1059
|
21
|
+
dataeval/_internal/detectors/outliers.py,sha256=du4Kd5XrrBlBXyno8K5COkNKP0ByQnGRSsfaTq4ywm0,10345
|
22
|
+
dataeval/_internal/interop.py,sha256=FLXJY-5hwJcKCtruyvaarqynXCMfcLbQSFvGnrWQDPo,1338
|
24
23
|
dataeval/_internal/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
|
-
dataeval/_internal/metrics/balance.py,sha256=
|
26
|
-
dataeval/_internal/metrics/ber.py,sha256=
|
24
|
+
dataeval/_internal/metrics/balance.py,sha256=veAeFFmbyDLW6VlQ-NrLfJbQl0AOA3hFD239NSej078,6134
|
25
|
+
dataeval/_internal/metrics/ber.py,sha256=MUpYivU-klsg2019YLyeV9aWDEGyXhcqYg05Vg_pffk,4668
|
27
26
|
dataeval/_internal/metrics/coverage.py,sha256=EZVES1rbZW2j_CtQv1VFfSO-UmWcrt5nmqxDErtrG14,3473
|
28
|
-
dataeval/_internal/metrics/divergence.py,sha256=
|
29
|
-
dataeval/_internal/metrics/diversity.py,sha256=
|
30
|
-
dataeval/_internal/metrics/parity.py,sha256=
|
31
|
-
dataeval/_internal/metrics/stats.py,sha256=
|
32
|
-
dataeval/_internal/metrics/
|
33
|
-
dataeval/_internal/metrics/
|
27
|
+
dataeval/_internal/metrics/divergence.py,sha256=WTQ1Xx453DH8aCpEmN1Zn6zuCy7NnsHfVphvTYA0L_o,4119
|
28
|
+
dataeval/_internal/metrics/diversity.py,sha256=Us0Nww3wvDH0kvVhDd3KEGXbkY_4-XxmD-ew9fFhqag,7618
|
29
|
+
dataeval/_internal/metrics/parity.py,sha256=TRm4GObItaku3OvxJj1vfxE1fGpwW_N020Nqfs-uFBw,16458
|
30
|
+
dataeval/_internal/metrics/stats/base.py,sha256=dgXAuuFYK0vrl3VPmU5BhjThRBHD6ykE_M2uyCuKDl4,8556
|
31
|
+
dataeval/_internal/metrics/stats/boxratiostats.py,sha256=Ac6nB41q43xHCJRDEXHNgsJF80VE8MpH8_kySxA84BE,6342
|
32
|
+
dataeval/_internal/metrics/stats/datasetstats.py,sha256=zJnBzIthaJPbQFvE0RRx-KvvU0Du7ZSvERW56zeowBU,3703
|
33
|
+
dataeval/_internal/metrics/stats/dimensionstats.py,sha256=RYI8PbiCtlPdli1z4jJ4t05ddDszB9dsnKDJfidaK-c,3789
|
34
|
+
dataeval/_internal/metrics/stats/hashstats.py,sha256=3PUPPmHe2t8VIgfmu9hkyMq7zvxmcdXdLtEqQJvHs5M,2034
|
35
|
+
dataeval/_internal/metrics/stats/labelstats.py,sha256=LTvQTqCnKVOx3ufmHAZIQOI9xYhIoZS-1TAgEjKhYC0,4056
|
36
|
+
dataeval/_internal/metrics/stats/pixelstats.py,sha256=cSOjJ2yTaH_nWd4jqiu96wA39HmU3GkIs1XY2MW4mSw,4367
|
37
|
+
dataeval/_internal/metrics/stats/visualstats.py,sha256=4tIkFE2LNxYEyseb5Lj7BmgueFCwHbwA2JJ9-YsA9QI,4659
|
38
|
+
dataeval/_internal/metrics/uap.py,sha256=EhyEjYtWs1RiXlVrvvGI4gEcMygpu8QHUeOHxfceacY,2043
|
39
|
+
dataeval/_internal/metrics/utils.py,sha256=P3KOybaorAD8Zu4j-3jygKEJld5rwQlgqxMljbVk1Oo,13477
|
34
40
|
dataeval/_internal/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
35
41
|
dataeval/_internal/models/pytorch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
36
42
|
dataeval/_internal/models/pytorch/autoencoder.py,sha256=gmnAHUzzn-fXTUU63SR4ZBjGBLEALWPxmZ_wPzvF_dg,8365
|
@@ -43,21 +49,20 @@ dataeval/_internal/models/tensorflow/losses.py,sha256=pZH5RnlM9R0RrBde9Lgq32muwA
|
|
43
49
|
dataeval/_internal/models/tensorflow/pixelcnn.py,sha256=lRpRNebMgkCJUnEk1xouVaTfS_YGMQgQhI01wNKAjeM,48420
|
44
50
|
dataeval/_internal/models/tensorflow/trainer.py,sha256=xNY0Iw7Qa1TnCuy9N1b77_VduFoW_BhbZjfQCxOVby4,4082
|
45
51
|
dataeval/_internal/models/tensorflow/utils.py,sha256=l6jXKMWyQAEI4LpAONq95Xwr7CPgrs408ypf9TuNxkY,8732
|
46
|
-
dataeval/_internal/output.py,sha256=
|
52
|
+
dataeval/_internal/output.py,sha256=qVbOi41dvfQICQ4uxysHPWBRKo1XR61kXHPL_vKOPm0,2545
|
47
53
|
dataeval/_internal/utils.py,sha256=gK0z4buuQoUYblkrCiRV9pIESzyikcY-3a08XsQkD7E,1585
|
48
54
|
dataeval/_internal/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
49
|
-
dataeval/_internal/workflows/sufficiency.py,sha256=
|
55
|
+
dataeval/_internal/workflows/sufficiency.py,sha256=jLGfp-d0plfV-M8j7W4W71yNjEMutrVzN7pMA9qLHD0,17807
|
50
56
|
dataeval/detectors/__init__.py,sha256=WVlwapZtKXVvrW41Sq30sFd8j2phS8JMsCaLeXfbQ7k,204
|
51
57
|
dataeval/detectors/drift/__init__.py,sha256=XtSjoTy6P_lwRzC9Klmd9BYZ3v4qZrATJ-p7gvvHPGk,598
|
52
58
|
dataeval/detectors/drift/kernels/__init__.py,sha256=qV_r740iRPw39_kHOttmk3VNikDFKCvF9i1IGbgjf3A,186
|
53
59
|
dataeval/detectors/drift/updates/__init__.py,sha256=uwkRV-4WVg0XFX_9futvQ0ggGOEvduDedgCno_eIi4U,149
|
54
60
|
dataeval/detectors/linters/__init__.py,sha256=1yxsJw8CFpHsZwn_YUlWpb-4YBet5U6uB--MeRgB6io,234
|
55
61
|
dataeval/detectors/ood/__init__.py,sha256=ybWhwbMmWygIwE1A-nYihDfugrj3j0GiuABmVvD7264,583
|
56
|
-
dataeval/flags/__init__.py,sha256=qo06_Tk0ul4lOhKSEs0HE2G6WBFvMwNJq77vRX1ynww,72
|
57
62
|
dataeval/metrics/__init__.py,sha256=42szGyZrLekNU-T-rwJu-pUoDBdOoStuScB-mnGzjw4,81
|
58
63
|
dataeval/metrics/bias/__init__.py,sha256=xqpxCttgzz-hMZQI7_IlaNn4OGZaGVz3KKRd26GbSKE,335
|
59
64
|
dataeval/metrics/estimators/__init__.py,sha256=fWQZUIxu88u5POYXN1yoFc-Hxx5B1fveEiiSXmK5kPk,210
|
60
|
-
dataeval/metrics/stats/__init__.py,sha256=
|
65
|
+
dataeval/metrics/stats/__init__.py,sha256=HqorGcA6GSlvLnYALnKduXzJzQo2GPMVpdirXNWB2pY,637
|
61
66
|
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
62
67
|
dataeval/tensorflow/__init__.py,sha256=IH_ELFP9CwKPk_br8_dKi6HeAlwmmV2vgsWdD8IFKXU,72
|
63
68
|
dataeval/tensorflow/loss/__init__.py,sha256=E9eB87LNh0o5nUCqssB027EXBsOfEayNHPcNW0QGFdA,101
|
@@ -67,8 +72,8 @@ dataeval/torch/__init__.py,sha256=ZNGSJJmatdGzbrazw86yNveEXm8smmW63xD-ReA8Nfg,63
|
|
67
72
|
dataeval/torch/models/__init__.py,sha256=YnDnePYpRIKHyYn3F5qR1OObMSb-g0FGvI8X-uTB09E,162
|
68
73
|
dataeval/torch/trainer/__init__.py,sha256=Te-qElt8h-Zv8NN0r-VJOEdCPHTQ2yO3rd2MhRiZGZs,93
|
69
74
|
dataeval/utils/__init__.py,sha256=ExQ1xj62MjcM9uIu1-g1P2fW0EPJpcIofnvxjQ908c4,172
|
70
|
-
dataeval/workflows/__init__.py,sha256=
|
71
|
-
dataeval-0.
|
72
|
-
dataeval-0.
|
73
|
-
dataeval-0.
|
74
|
-
dataeval-0.
|
75
|
+
dataeval/workflows/__init__.py,sha256=Yl6YYgHFwUM1porR3yT6ELyoUw5Op9e6QpQACdXoKBU,226
|
76
|
+
dataeval-0.70.0.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
|
77
|
+
dataeval-0.70.0.dist-info/METADATA,sha256=qdgzLlvHmmQNTQqUVkPcaCZJL9QlxUbzPipHcIaSFsI,4284
|
78
|
+
dataeval-0.70.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
79
|
+
dataeval-0.70.0.dist-info/RECORD,,
|
dataeval/_internal/flags.py
DELETED
@@ -1,77 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from enum import IntFlag, auto
|
4
|
-
from functools import reduce
|
5
|
-
from typing import Iterable, TypeVar, cast
|
6
|
-
|
7
|
-
TFlag = TypeVar("TFlag", bound=IntFlag)
|
8
|
-
|
9
|
-
|
10
|
-
class ImageStat(IntFlag):
|
11
|
-
"""
|
12
|
-
Flags for calculating image and channel statistics
|
13
|
-
"""
|
14
|
-
|
15
|
-
# HASHES
|
16
|
-
XXHASH = auto()
|
17
|
-
PCHASH = auto()
|
18
|
-
|
19
|
-
# PROPERTIES
|
20
|
-
WIDTH = auto()
|
21
|
-
HEIGHT = auto()
|
22
|
-
SIZE = auto()
|
23
|
-
ASPECT_RATIO = auto()
|
24
|
-
CHANNELS = auto()
|
25
|
-
DEPTH = auto()
|
26
|
-
|
27
|
-
# VISUALS
|
28
|
-
BRIGHTNESS = auto()
|
29
|
-
BLURRINESS = auto()
|
30
|
-
CONTRAST = auto()
|
31
|
-
DARKNESS = auto()
|
32
|
-
MISSING = auto()
|
33
|
-
ZEROS = auto()
|
34
|
-
|
35
|
-
# PIXEL STATS
|
36
|
-
MEAN = auto()
|
37
|
-
STD = auto()
|
38
|
-
VAR = auto()
|
39
|
-
SKEW = auto()
|
40
|
-
KURTOSIS = auto()
|
41
|
-
ENTROPY = auto()
|
42
|
-
PERCENTILES = auto()
|
43
|
-
HISTOGRAM = auto()
|
44
|
-
|
45
|
-
# JOINT FLAGS
|
46
|
-
ALL_HASHES = XXHASH | PCHASH
|
47
|
-
ALL_PROPERTIES = WIDTH | HEIGHT | SIZE | ASPECT_RATIO | CHANNELS | DEPTH
|
48
|
-
ALL_VISUALS = BRIGHTNESS | BLURRINESS | CONTRAST | DARKNESS | MISSING | ZEROS
|
49
|
-
ALL_PIXELSTATS = MEAN | STD | VAR | SKEW | KURTOSIS | ENTROPY | PERCENTILES | HISTOGRAM
|
50
|
-
ALL_CHANNEL_STATS = BRIGHTNESS | CONTRAST | DARKNESS | ZEROS | ALL_PIXELSTATS
|
51
|
-
ALL_STATS = ALL_PROPERTIES | ALL_VISUALS | ALL_PIXELSTATS
|
52
|
-
ALL = ALL_HASHES | ALL_STATS
|
53
|
-
|
54
|
-
|
55
|
-
def is_distinct(flag: IntFlag) -> bool:
|
56
|
-
return (flag & (flag - 1) == 0) and flag != 0
|
57
|
-
|
58
|
-
|
59
|
-
def to_distinct(flag: TFlag) -> dict[TFlag, str]:
|
60
|
-
"""
|
61
|
-
Returns a distinct set of all flags set on the input flag and their names
|
62
|
-
|
63
|
-
NOTE: this is supported natively in Python 3.11, but for earlier versions we need
|
64
|
-
to use a combination of list comprehension and bit fiddling to determine distinct
|
65
|
-
flag values from joint aliases.
|
66
|
-
"""
|
67
|
-
if isinstance(flag, Iterable): # >= py311
|
68
|
-
return {f: f.name.lower() for f in flag if f.name}
|
69
|
-
else: # < py311
|
70
|
-
return {f: f.name.lower() for f in list(flag.__class__) if f & flag and is_distinct(f) and f.name}
|
71
|
-
|
72
|
-
|
73
|
-
def verify_supported(flag: TFlag, flags: TFlag | Iterable[TFlag]):
|
74
|
-
supported = flags if isinstance(flags, flag.__class__) else cast(TFlag, reduce(lambda a, b: a | b, flags)) # type: ignore
|
75
|
-
unsupported = flag & ~supported
|
76
|
-
if unsupported:
|
77
|
-
raise ValueError(f"Unsupported flags {unsupported} called. Only {supported} flags are supported.")
|