dataeval 0.86.3__py3-none-any.whl → 0.86.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataeval/__init__.py +1 -1
- dataeval/config.py +1 -1
- dataeval/data/_images.py +3 -1
- dataeval/data/_metadata.py +22 -28
- dataeval/detectors/drift/_nml/_result.py +2 -2
- dataeval/detectors/linters/outliers.py +52 -21
- dataeval/metadata/_distance.py +1 -1
- dataeval/metrics/bias/_balance.py +5 -4
- dataeval/metrics/stats/_base.py +4 -4
- dataeval/metrics/stats/_labelstats.py +11 -11
- dataeval/metrics/stats/_pixelstats.py +5 -4
- dataeval/metrics/stats/_visualstats.py +7 -8
- dataeval/outputs/_drift.py +1 -1
- dataeval/outputs/_linters.py +1 -1
- dataeval/outputs/_stats.py +64 -19
- dataeval/utils/_plot.py +6 -6
- dataeval/utils/data/_dataset.py +2 -3
- dataeval/utils/datasets/_fileio.py +1 -1
- dataeval/utils/torch/_internal.py +1 -1
- {dataeval-0.86.3.dist-info → dataeval-0.86.5.dist-info}/METADATA +1 -1
- {dataeval-0.86.3.dist-info → dataeval-0.86.5.dist-info}/RECORD +23 -23
- {dataeval-0.86.3.dist-info → dataeval-0.86.5.dist-info}/LICENSE.txt +0 -0
- {dataeval-0.86.3.dist-info → dataeval-0.86.5.dist-info}/WHEEL +0 -0
dataeval/__init__.py
CHANGED
dataeval/config.py
CHANGED
dataeval/data/_images.py
CHANGED
@@ -4,6 +4,8 @@ __all__ = []
|
|
4
4
|
|
5
5
|
from typing import TYPE_CHECKING, Any, Generic, Iterator, Sequence, TypeVar, cast, overload
|
6
6
|
|
7
|
+
import numpy as np
|
8
|
+
|
7
9
|
from dataeval.typing import Array, ArrayLike, Dataset
|
8
10
|
from dataeval.utils._array import as_numpy, channels_first_to_last
|
9
11
|
|
@@ -58,7 +60,7 @@ class Images(Generic[T]):
|
|
58
60
|
num_images = len(indices)
|
59
61
|
num_rows = (num_images + images_per_row - 1) // images_per_row
|
60
62
|
fig, axes = plt.subplots(num_rows, images_per_row, figsize=figsize)
|
61
|
-
for i, ax in enumerate(axes.flatten()):
|
63
|
+
for i, ax in enumerate(np.asarray(axes).flatten()):
|
62
64
|
image = channels_first_to_last(as_numpy(self[i]))
|
63
65
|
ax.imshow(image)
|
64
66
|
ax.axis("off")
|
dataeval/data/_metadata.py
CHANGED
@@ -4,7 +4,7 @@ __all__ = []
|
|
4
4
|
|
5
5
|
import warnings
|
6
6
|
from dataclasses import dataclass
|
7
|
-
from typing import Any, Iterable, Literal, Mapping, Sequence
|
7
|
+
from typing import Any, Iterable, Literal, Mapping, Sequence, Sized
|
8
8
|
|
9
9
|
import numpy as np
|
10
10
|
import polars as pl
|
@@ -20,6 +20,10 @@ from dataeval.utils._bin import bin_data, digitize_data
|
|
20
20
|
from dataeval.utils.data.metadata import merge
|
21
21
|
|
22
22
|
|
23
|
+
def _binned(name: str) -> str:
|
24
|
+
return f"{name}[]"
|
25
|
+
|
26
|
+
|
23
27
|
@dataclass
|
24
28
|
class FactorInfo:
|
25
29
|
factor_type: Literal["categorical", "continuous", "discrete"] | None = None
|
@@ -65,6 +69,7 @@ class Metadata:
|
|
65
69
|
self._is_binned = False
|
66
70
|
|
67
71
|
self._dataset = dataset
|
72
|
+
self._count = len(dataset) if isinstance(dataset, Sized) else 0
|
68
73
|
self._continuous_factor_bins = dict(continuous_factor_bins) if continuous_factor_bins else {}
|
69
74
|
self._auto_bin_method: Literal["uniform_width", "uniform_count", "clusters"] = auto_bin_method
|
70
75
|
|
@@ -157,13 +162,13 @@ class Metadata:
|
|
157
162
|
def factor_names(self) -> Sequence[str]:
|
158
163
|
"""Factor names of the metadata."""
|
159
164
|
self._structure()
|
160
|
-
return list(self._factors)
|
165
|
+
return list(filter(self._filter, self._factors))
|
161
166
|
|
162
167
|
@property
|
163
168
|
def factor_info(self) -> Mapping[str, FactorInfo]:
|
164
169
|
"""Factor types of the metadata."""
|
165
170
|
self._bin()
|
166
|
-
return self._factors
|
171
|
+
return dict(filter(self._filter, self._factors.items()))
|
167
172
|
|
168
173
|
@property
|
169
174
|
def factor_data(self) -> NDArray[Any]:
|
@@ -194,14 +199,19 @@ class Metadata:
|
|
194
199
|
|
195
200
|
@property
|
196
201
|
def image_count(self) -> int:
|
197
|
-
self.
|
198
|
-
|
202
|
+
if self._count == 0:
|
203
|
+
self._structure()
|
204
|
+
return self._count
|
205
|
+
|
206
|
+
def _filter(self, factor: str | tuple[str, Any]) -> bool:
|
207
|
+
factor = factor[0] if isinstance(factor, tuple) else factor
|
208
|
+
return factor in self.include if self.include else factor not in self.exclude
|
199
209
|
|
200
210
|
def _reset_bins(self, cols: Iterable[str] | None = None) -> None:
|
201
211
|
if self._is_binned:
|
202
212
|
columns = self._dataframe.columns
|
203
|
-
for col in (col for col in cols or columns if
|
204
|
-
self._dataframe.drop_in_place(
|
213
|
+
for col in (col for col in cols or columns if _binned(col) in columns):
|
214
|
+
self._dataframe.drop_in_place(_binned(col))
|
205
215
|
self._factors[col] = FactorInfo()
|
206
216
|
self._is_binned = False
|
207
217
|
|
@@ -244,7 +254,7 @@ class Metadata:
|
|
244
254
|
bboxes = as_numpy(bboxes).astype(np.float32) if is_od else None
|
245
255
|
srcidx = as_numpy(srcidx).astype(np.intp) if is_od else None
|
246
256
|
|
247
|
-
index2label = self._dataset.metadata.get("index2label", {})
|
257
|
+
index2label = self._dataset.metadata.get("index2label", {i: str(i) for i in np.unique(labels)})
|
248
258
|
|
249
259
|
targets_per_image = None if srcidx is None else np.unique(srcidx, return_counts=True)[1].tolist()
|
250
260
|
merged = merge(raw, return_dropped=True, ignore_lists=False, targets_per_image=targets_per_image)
|
@@ -260,8 +270,9 @@ class Metadata:
|
|
260
270
|
}
|
261
271
|
|
262
272
|
self._raw = raw
|
273
|
+
self._index2label = index2label
|
263
274
|
self._class_labels = labels
|
264
|
-
self._class_names =
|
275
|
+
self._class_names = list(index2label.values())
|
265
276
|
self._image_indices = target_dict["image_index"]
|
266
277
|
self._factors = dict.fromkeys(factor_dict, FactorInfo())
|
267
278
|
self._dataframe = pl.DataFrame({**target_dict, **factor_dict})
|
@@ -289,10 +300,10 @@ class Metadata:
|
|
289
300
|
)
|
290
301
|
|
291
302
|
column_set = set(df.columns)
|
292
|
-
for col in (col for col in self.factor_names if
|
303
|
+
for col in (col for col in self.factor_names if _binned(col) not in column_set):
|
293
304
|
# Get data as numpy array for processing
|
294
305
|
data = df[col].to_numpy()
|
295
|
-
col_dz =
|
306
|
+
col_dz = _binned(col)
|
296
307
|
if col in factor_bins:
|
297
308
|
# User provided binning
|
298
309
|
bins = factor_bins[col]
|
@@ -326,23 +337,6 @@ class Metadata:
|
|
326
337
|
self._factors.update(factor_info)
|
327
338
|
self._is_binned = True
|
328
339
|
|
329
|
-
def get_factors_by_type(self, factor_type: Literal["categorical", "continuous", "discrete"]) -> Sequence[str]:
|
330
|
-
"""
|
331
|
-
Get the names of factors of a specific type.
|
332
|
-
|
333
|
-
Parameters
|
334
|
-
----------
|
335
|
-
factor_type : Literal["categorical", "continuous", "discrete"]
|
336
|
-
The type of factors to retrieve.
|
337
|
-
|
338
|
-
Returns
|
339
|
-
-------
|
340
|
-
list[str]
|
341
|
-
List of factor names of the specified type.
|
342
|
-
"""
|
343
|
-
self._bin()
|
344
|
-
return [name for name, info in self.factor_info.items() if info.factor_type == factor_type]
|
345
|
-
|
346
340
|
def add_factors(self, factors: Mapping[str, Array | Sequence[Any]]) -> None:
|
347
341
|
"""
|
348
342
|
Add additional factors to the metadata.
|
@@ -29,7 +29,7 @@ class AbstractResult(GenericOutput[pd.DataFrame]):
|
|
29
29
|
self._data = results_data.copy(deep=True)
|
30
30
|
|
31
31
|
def data(self) -> pd.DataFrame:
|
32
|
-
return self.
|
32
|
+
return self.to_dataframe()
|
33
33
|
|
34
34
|
@property
|
35
35
|
def empty(self) -> bool:
|
@@ -38,7 +38,7 @@ class AbstractResult(GenericOutput[pd.DataFrame]):
|
|
38
38
|
def __len__(self) -> int:
|
39
39
|
return 0 if self.empty else len(self._data)
|
40
40
|
|
41
|
-
def
|
41
|
+
def to_dataframe(self, multilevel: bool = True) -> pd.DataFrame:
|
42
42
|
"""Export results to pandas dataframe."""
|
43
43
|
if multilevel:
|
44
44
|
return self._data
|
@@ -7,6 +7,7 @@ from typing import Any, Literal, Sequence, overload
|
|
7
7
|
import numpy as np
|
8
8
|
from numpy.typing import NDArray
|
9
9
|
|
10
|
+
from dataeval.config import EPSILON
|
10
11
|
from dataeval.data._images import Images
|
11
12
|
from dataeval.metrics.stats._base import combine_stats, get_dataset_step_from_idx
|
12
13
|
from dataeval.metrics.stats._imagestats import imagestats
|
@@ -18,26 +19,56 @@ from dataeval.typing import ArrayLike, Dataset
|
|
18
19
|
|
19
20
|
|
20
21
|
def _get_outlier_mask(
|
21
|
-
values: NDArray, method: Literal["zscore", "modzscore", "iqr"], threshold: float | None
|
22
|
-
) -> NDArray:
|
22
|
+
values: NDArray[Any], method: Literal["zscore", "modzscore", "iqr"], threshold: float | None
|
23
|
+
) -> NDArray[np.bool_]:
|
24
|
+
if len(values) == 0:
|
25
|
+
return np.array([], dtype=bool)
|
26
|
+
|
23
27
|
values = values.astype(np.float64)
|
28
|
+
|
29
|
+
valid_mask = ~np.isnan(values)
|
30
|
+
outliers = np.full(values.shape, False, dtype=bool)
|
31
|
+
|
32
|
+
if not np.any(valid_mask):
|
33
|
+
return outliers
|
34
|
+
|
24
35
|
if method == "zscore":
|
25
|
-
threshold = threshold if threshold else 3.0
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
36
|
+
threshold = threshold if threshold is not None else 3.0
|
37
|
+
|
38
|
+
std_val = np.nanstd(values)
|
39
|
+
|
40
|
+
if std_val > EPSILON:
|
41
|
+
mean_val = np.nanmean(values)
|
42
|
+
abs_diff = np.abs(values - mean_val)
|
43
|
+
outliers = (abs_diff / std_val) > threshold
|
44
|
+
|
45
|
+
elif method == "modzscore":
|
46
|
+
threshold = threshold if threshold is not None else 3.5
|
47
|
+
|
48
|
+
median_val = np.nanmedian(values)
|
49
|
+
abs_diff = np.abs(values - median_val)
|
50
|
+
m_abs_diff = np.nanmedian(abs_diff)
|
51
|
+
m_abs_diff = np.nanmean(abs_diff) if m_abs_diff <= EPSILON else m_abs_diff
|
52
|
+
|
53
|
+
if m_abs_diff > EPSILON:
|
54
|
+
mod_z_score = 0.6745 * abs_diff / m_abs_diff
|
55
|
+
outliers = mod_z_score > threshold
|
56
|
+
|
57
|
+
elif method == "iqr":
|
58
|
+
threshold = threshold if threshold is not None else 1.5
|
59
|
+
|
60
|
+
qrt = np.nanpercentile(values, q=(25, 75), method="midpoint")
|
61
|
+
iqr_val = qrt[1] - qrt[0]
|
62
|
+
|
63
|
+
if iqr_val > EPSILON:
|
64
|
+
iqr_threshold = iqr_val * threshold
|
65
|
+
outliers = (values < (qrt[0] - iqr_threshold)) | (values > (qrt[1] + iqr_threshold))
|
66
|
+
|
67
|
+
else:
|
68
|
+
raise ValueError("Outlier method must be 'zscore' 'modzscore' or 'iqr'.")
|
69
|
+
|
70
|
+
outliers[~valid_mask] = False
|
71
|
+
return outliers
|
41
72
|
|
42
73
|
|
43
74
|
class Outliers:
|
@@ -164,10 +195,10 @@ class Outliers:
|
|
164
195
|
>>> len(results)
|
165
196
|
2
|
166
197
|
>>> results.issues[0]
|
167
|
-
{10: {'
|
198
|
+
{10: {'entropy': 0.2128}, 12: {'std': 0.00536, 'var': 2.87e-05, 'entropy': 0.2128}}
|
168
199
|
>>> results.issues[1]
|
169
200
|
{}
|
170
|
-
"""
|
201
|
+
"""
|
171
202
|
if isinstance(stats, (ImageStatsOutput, DimensionStatsOutput, PixelStatsOutput, VisualStatsOutput)):
|
172
203
|
return OutliersOutput(self._get_outliers(stats.data()))
|
173
204
|
|
@@ -221,7 +252,7 @@ class Outliers:
|
|
221
252
|
>>> list(results.issues)
|
222
253
|
[10, 12]
|
223
254
|
>>> results.issues[10]
|
224
|
-
{'contrast': 1.25, 'zeros': 0.05493, '
|
255
|
+
{'contrast': 1.25, 'zeros': 0.05493, 'entropy': 0.2128}
|
225
256
|
"""
|
226
257
|
images = Images(data) if isinstance(data, Dataset) else data
|
227
258
|
self.stats = imagestats(images)
|
dataeval/metadata/_distance.py
CHANGED
@@ -81,7 +81,7 @@ def metadata_distance(metadata1: Metadata, metadata2: Metadata) -> MetadataDista
|
|
81
81
|
"""
|
82
82
|
|
83
83
|
_compare_keys(metadata1.factor_names, metadata2.factor_names)
|
84
|
-
cont_fnames = metadata1.
|
84
|
+
cont_fnames = [name for name, info in metadata1.factor_info.items() if info.factor_type == "continuous"]
|
85
85
|
|
86
86
|
if not cont_fnames:
|
87
87
|
return MetadataDistanceOutput({})
|
@@ -99,9 +99,10 @@ def balance(
|
|
99
99
|
factor_types = {"class_label": "categorical"} | {k: v.factor_type for k, v in metadata.factor_info.items()}
|
100
100
|
is_discrete = [factor_type != "continuous" for factor_type in factor_types.values()]
|
101
101
|
num_factors = len(factor_types)
|
102
|
+
class_labels = metadata.class_labels
|
102
103
|
|
103
104
|
mi = np.full((num_factors, num_factors), np.nan, dtype=np.float32)
|
104
|
-
data = np.hstack((
|
105
|
+
data = np.hstack((class_labels[:, np.newaxis], data))
|
105
106
|
|
106
107
|
for idx, factor_type in enumerate(factor_types.values()):
|
107
108
|
if factor_type != "continuous":
|
@@ -132,12 +133,12 @@ def balance(
|
|
132
133
|
factors = nmi[1:, 1:]
|
133
134
|
|
134
135
|
# assume class is a factor
|
135
|
-
|
136
|
+
u_classes = np.unique(class_labels)
|
137
|
+
num_classes = len(u_classes)
|
136
138
|
classwise_mi = np.full((num_classes, num_factors), np.nan, dtype=np.float32)
|
137
139
|
|
138
140
|
# classwise targets
|
139
|
-
|
140
|
-
tgt_bin = data[:, 0][:, None] == classes
|
141
|
+
tgt_bin = data[:, 0][:, None] == u_classes
|
141
142
|
|
142
143
|
# classification MI for discrete/categorical features
|
143
144
|
for idx in range(num_classes):
|
dataeval/metrics/stats/_base.py
CHANGED
@@ -13,8 +13,8 @@ from multiprocessing import Pool
|
|
13
13
|
from typing import Any, Callable, Generic, Iterable, Iterator, Sequence, TypeVar
|
14
14
|
|
15
15
|
import numpy as np
|
16
|
-
import tqdm
|
17
16
|
from numpy.typing import NDArray
|
17
|
+
from tqdm.auto import tqdm
|
18
18
|
|
19
19
|
from dataeval.config import get_max_processes
|
20
20
|
from dataeval.outputs._stats import BASE_ATTRS, BaseStatsOutput, SourceIndex
|
@@ -77,7 +77,7 @@ class PoolWrapper:
|
|
77
77
|
"""
|
78
78
|
|
79
79
|
def __init__(self, processes: int | None) -> None:
|
80
|
-
self.pool = Pool(processes) if processes is
|
80
|
+
self.pool = Pool(processes) if processes is None or processes > 1 else None
|
81
81
|
|
82
82
|
def imap(self, func: Callable[[_S], _T], iterable: Iterable[_S]) -> Iterator[_T]:
|
83
83
|
return map(func, iterable) if self.pool is None else self.pool.imap(func, iterable)
|
@@ -93,7 +93,7 @@ class PoolWrapper:
|
|
93
93
|
|
94
94
|
class StatsProcessor(Generic[TStatsOutput]):
|
95
95
|
output_class: type[TStatsOutput]
|
96
|
-
cache_keys:
|
96
|
+
cache_keys: set[str] = set()
|
97
97
|
image_function_map: dict[str, Callable[[StatsProcessor[TStatsOutput]], Any]] = {}
|
98
98
|
channel_function_map: dict[str, Callable[[StatsProcessor[TStatsOutput]], Any]] = {}
|
99
99
|
|
@@ -267,7 +267,7 @@ def run_stats(
|
|
267
267
|
stats_processor_cls = stats_processor_cls if isinstance(stats_processor_cls, Iterable) else [stats_processor_cls]
|
268
268
|
|
269
269
|
with PoolWrapper(processes=get_max_processes()) as p:
|
270
|
-
for r in tqdm
|
270
|
+
for r in tqdm(
|
271
271
|
p.imap(
|
272
272
|
partial(
|
273
273
|
process_stats_unpack,
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
__all__ = []
|
4
4
|
|
5
|
-
from typing import Any,
|
5
|
+
from typing import Any, TypeVar
|
6
6
|
|
7
7
|
import polars as pl
|
8
8
|
|
@@ -14,10 +14,6 @@ from dataeval.typing import AnnotatedDataset
|
|
14
14
|
TValue = TypeVar("TValue")
|
15
15
|
|
16
16
|
|
17
|
-
def _sort_to_list(d: Mapping[int, TValue]) -> list[TValue]:
|
18
|
-
return [t[1] for t in sorted(d.items())]
|
19
|
-
|
20
|
-
|
21
17
|
@set_metadata
|
22
18
|
def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
|
23
19
|
"""
|
@@ -58,21 +54,25 @@ def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
|
|
58
54
|
|
59
55
|
# Count occurrences of each label across all images
|
60
56
|
label_counts_df = metadata_df.group_by("class_label").len()
|
61
|
-
label_counts = label_counts_df
|
57
|
+
label_counts = dict(zip(label_counts_df["class_label"], label_counts_df["len"]))
|
62
58
|
|
63
59
|
# Count unique images per label (how many images contain each label)
|
64
60
|
image_counts_df = metadata_df.select(["image_index", "class_label"]).unique().group_by("class_label").len()
|
65
|
-
image_counts = image_counts_df
|
61
|
+
image_counts = dict(zip(image_counts_df["class_label"], image_counts_df["len"]))
|
66
62
|
|
67
63
|
# Create index_location mapping (which images contain each label)
|
68
|
-
index_location:
|
64
|
+
index_location: dict[int, list[int]] = {}
|
69
65
|
for row in metadata_df.group_by("class_label").agg(pl.col("image_index")).to_dicts():
|
70
66
|
indices = row["image_index"]
|
71
67
|
index_location[row["class_label"]] = sorted(dict.fromkeys(indices)) if isinstance(indices, list) else [indices]
|
72
68
|
|
73
69
|
# Count labels per image
|
74
|
-
label_per_image_df = metadata_df.group_by("image_index").agg(pl.
|
75
|
-
|
70
|
+
label_per_image_df = metadata_df.group_by("image_index").agg(pl.len().alias("label_count"))
|
71
|
+
|
72
|
+
# Join with all indices to include missing ones with 0 count
|
73
|
+
all_indices = pl.DataFrame({"image_index": range(metadata.image_count)})
|
74
|
+
complete_label_df = all_indices.join(label_per_image_df, on="image_index", how="left").fill_null(0)
|
75
|
+
label_per_image = complete_label_df.sort("image_index")["label_count"].to_list()
|
76
76
|
|
77
77
|
return LabelStatsOutput(
|
78
78
|
label_counts_per_class=label_counts,
|
@@ -81,6 +81,6 @@ def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
|
|
81
81
|
image_indices_per_class=index_location,
|
82
82
|
image_count=len(label_per_image),
|
83
83
|
class_count=len(metadata.class_names),
|
84
|
-
label_count=sum(label_counts),
|
84
|
+
label_count=sum(label_counts.values()),
|
85
85
|
class_names=metadata.class_names,
|
86
86
|
)
|
@@ -15,12 +15,13 @@ from dataeval.typing import ArrayLike, Dataset
|
|
15
15
|
|
16
16
|
class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
|
17
17
|
output_class: type = PixelStatsOutput
|
18
|
+
cache_keys = {"histogram"}
|
18
19
|
image_function_map: dict[str, Callable[[StatsProcessor[PixelStatsOutput]], Any]] = {
|
19
20
|
"mean": lambda x: np.nanmean(x.scaled),
|
20
21
|
"std": lambda x: np.nanstd(x.scaled),
|
21
22
|
"var": lambda x: np.nanvar(x.scaled),
|
22
|
-
"skew": lambda x:
|
23
|
-
"kurtosis": lambda x:
|
23
|
+
"skew": lambda x: skew(x.scaled.ravel(), nan_policy="omit"),
|
24
|
+
"kurtosis": lambda x: kurtosis(x.scaled.ravel(), nan_policy="omit"),
|
24
25
|
"histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
|
25
26
|
"entropy": lambda x: entropy(x.get("histogram")),
|
26
27
|
}
|
@@ -28,8 +29,8 @@ class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
|
|
28
29
|
"mean": lambda x: np.nanmean(x.scaled, axis=1),
|
29
30
|
"std": lambda x: np.nanstd(x.scaled, axis=1),
|
30
31
|
"var": lambda x: np.nanvar(x.scaled, axis=1),
|
31
|
-
"skew": lambda x:
|
32
|
-
"kurtosis": lambda x:
|
32
|
+
"skew": lambda x: skew(x.scaled, axis=1, nan_policy="omit"),
|
33
|
+
"kurtosis": lambda x: kurtosis(x.scaled, axis=1, nan_policy="omit"),
|
33
34
|
"histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
|
34
35
|
"entropy": lambda x: entropy(x.get("histogram"), axis=1),
|
35
36
|
}
|
@@ -6,6 +6,7 @@ from typing import Any, Callable
|
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
|
9
|
+
from dataeval.config import EPSILON
|
9
10
|
from dataeval.metrics.stats._base import StatsProcessor, run_stats
|
10
11
|
from dataeval.outputs import VisualStatsOutput
|
11
12
|
from dataeval.outputs._base import set_metadata
|
@@ -17,23 +18,21 @@ QUARTILES = (0, 25, 50, 75, 100)
|
|
17
18
|
|
18
19
|
class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
|
19
20
|
output_class: type = VisualStatsOutput
|
21
|
+
cache_keys: set[str] = {"percentiles"}
|
20
22
|
image_function_map: dict[str, Callable[[StatsProcessor[VisualStatsOutput]], Any]] = {
|
21
23
|
"brightness": lambda x: x.get("percentiles")[1],
|
22
|
-
"contrast": lambda x:
|
23
|
-
|
24
|
-
else (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles")),
|
24
|
+
"contrast": lambda x: (np.max(x.get("percentiles")) - np.min(x.get("percentiles")))
|
25
|
+
/ (np.mean(x.get("percentiles")) + EPSILON),
|
25
26
|
"darkness": lambda x: x.get("percentiles")[-2],
|
26
27
|
"missing": lambda x: np.count_nonzero(np.isnan(np.sum(x.image, axis=0))) / np.prod(x.shape[-2:]),
|
27
28
|
"sharpness": lambda x: np.nanstd(edge_filter(np.mean(x.image, axis=0))),
|
28
|
-
"zeros": lambda x: np.count_nonzero(np.
|
29
|
+
"zeros": lambda x: np.count_nonzero(np.sum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
|
29
30
|
"percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
|
30
31
|
}
|
31
32
|
channel_function_map: dict[str, Callable[[StatsProcessor[VisualStatsOutput]], Any]] = {
|
32
33
|
"brightness": lambda x: x.get("percentiles")[:, 1],
|
33
|
-
"contrast": lambda x: np.
|
34
|
-
|
35
|
-
/ np.mean(x.get("percentiles"), axis=1)
|
36
|
-
),
|
34
|
+
"contrast": lambda x: (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
|
35
|
+
/ (np.mean(x.get("percentiles"), axis=1) + EPSILON),
|
37
36
|
"darkness": lambda x: x.get("percentiles")[:, -2],
|
38
37
|
"missing": lambda x: np.count_nonzero(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
|
39
38
|
"sharpness": lambda x: np.nanstd(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
|
dataeval/outputs/_drift.py
CHANGED
@@ -114,7 +114,7 @@ class DriftMVDCOutput(PerMetricResult):
|
|
114
114
|
import matplotlib.pyplot as plt
|
115
115
|
|
116
116
|
fig, ax = plt.subplots(dpi=300)
|
117
|
-
resdf = self.
|
117
|
+
resdf = self.to_dataframe()
|
118
118
|
xticks = np.arange(resdf.shape[0])
|
119
119
|
trndf = resdf[resdf["chunk"]["period"] == "reference"]
|
120
120
|
tstdf = resdf[resdf["chunk"]["period"] == "analysis"]
|
dataeval/outputs/_linters.py
CHANGED
@@ -54,7 +54,7 @@ def _reorganize_by_class_and_metric(
|
|
54
54
|
for img, group in result.items():
|
55
55
|
for extreme in group:
|
56
56
|
metrics.setdefault(extreme, []).append(img)
|
57
|
-
for i, images in
|
57
|
+
for i, images in lstats.image_indices_per_class.items():
|
58
58
|
if img in images:
|
59
59
|
class_wise[lstats.class_names[i]][extreme] = class_wise[lstats.class_names[i]].get(extreme, 0) + 1
|
60
60
|
|
dataeval/outputs/_stats.py
CHANGED
@@ -6,7 +6,7 @@ from dataclasses import dataclass
|
|
6
6
|
from typing import TYPE_CHECKING, Any, Iterable, Mapping, NamedTuple, Optional, Sequence, Union
|
7
7
|
|
8
8
|
import numpy as np
|
9
|
-
import
|
9
|
+
import polars as pl
|
10
10
|
from numpy.typing import NDArray
|
11
11
|
from typing_extensions import TypeAlias
|
12
12
|
|
@@ -22,7 +22,7 @@ SOURCE_INDEX = "source_index"
|
|
22
22
|
OBJECT_COUNT = "object_count"
|
23
23
|
IMAGE_COUNT = "image_count"
|
24
24
|
|
25
|
-
BASE_ATTRS =
|
25
|
+
BASE_ATTRS = [SOURCE_INDEX, OBJECT_COUNT, IMAGE_COUNT]
|
26
26
|
|
27
27
|
|
28
28
|
class SourceIndex(NamedTuple):
|
@@ -156,14 +156,21 @@ class BaseStatsOutput(Output):
|
|
156
156
|
Mapping[str, NDArray[Any]]
|
157
157
|
"""
|
158
158
|
filter_ = [filter] if isinstance(filter, str) else filter
|
159
|
+
|
160
|
+
"""
|
161
|
+
Performs validation checks to ensure selected keys and constant or 1-D values
|
162
|
+
Each set of checks returns True if a valid value.
|
163
|
+
Only one set of final checks needs to be True to allow the value through
|
164
|
+
"""
|
159
165
|
return {
|
160
166
|
k: v
|
161
167
|
for k, v in self.data().items()
|
162
|
-
if
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
168
|
+
if (
|
169
|
+
k not in BASE_ATTRS # Ignore BaseStatsOutput attributes
|
170
|
+
and (filter_ is None or k in filter_) # Key is selected
|
171
|
+
and (isinstance(v, np.ndarray) and v.ndim == 1) # Check valid array
|
172
|
+
and (not exclude_constant or len(np.unique(v)) > 1) # Check valid numpy "constant"
|
173
|
+
)
|
167
174
|
}
|
168
175
|
|
169
176
|
def plot(
|
@@ -195,6 +202,11 @@ class BaseStatsOutput(Output):
|
|
195
202
|
return histogram_plot(factors, log)
|
196
203
|
return channel_histogram_plot(factors, log, max_channels, ch_mask)
|
197
204
|
|
205
|
+
def to_dataframe(self) -> pl.DataFrame:
|
206
|
+
"""Returns the processed factors a polars dataframe of shape (factors, samples)"""
|
207
|
+
|
208
|
+
return pl.DataFrame(self.factors())
|
209
|
+
|
198
210
|
|
199
211
|
@dataclass(frozen=True)
|
200
212
|
class DimensionStatsOutput(BaseStatsOutput):
|
@@ -256,6 +268,43 @@ class HashStatsOutput(BaseStatsOutput):
|
|
256
268
|
xxhash: Sequence[str]
|
257
269
|
pchash: Sequence[str]
|
258
270
|
|
271
|
+
def to_dataframe(self) -> pl.DataFrame:
|
272
|
+
"""
|
273
|
+
Returns a polars dataframe for the xxhash and pchash attributes of each sample
|
274
|
+
|
275
|
+
Note
|
276
|
+
----
|
277
|
+
xxhash and pchash do not follow the normal definition of factors but are
|
278
|
+
helpful attributes of the data
|
279
|
+
|
280
|
+
Examples
|
281
|
+
--------
|
282
|
+
Display the hashes of a dataset of images, whose shape is (C, H, W),
|
283
|
+
as a polars DataFrame
|
284
|
+
|
285
|
+
>>> from dataeval.metrics.stats import hashstats
|
286
|
+
>>> results = hashstats(dataset)
|
287
|
+
>>> print(results.to_dataframe())
|
288
|
+
shape: (8, 2)
|
289
|
+
┌──────────────────┬──────────────────┐
|
290
|
+
│ xxhash ┆ pchash │
|
291
|
+
│ --- ┆ --- │
|
292
|
+
│ str ┆ str │
|
293
|
+
╞══════════════════╪══════════════════╡
|
294
|
+
│ 69b50a5f06af238c ┆ e666999999266666 │
|
295
|
+
│ 5a861d7a23d1afe7 ┆ e666999999266666 │
|
296
|
+
│ 7ffdb4990ad44ac6 ┆ e666999966666299 │
|
297
|
+
│ 4f0c366a3298ceac ┆ e666999999266666 │
|
298
|
+
│ c5519e36ac1f8839 ┆ 96e91656e91616e9 │
|
299
|
+
│ e7e92346159a4567 ┆ e666999999266666 │
|
300
|
+
│ 9a538f797a5ba8ee ┆ e666999999266666 │
|
301
|
+
│ 1a658bd2a1baee25 ┆ e666999999266666 │
|
302
|
+
└──────────────────┴──────────────────┘
|
303
|
+
"""
|
304
|
+
data = {"xxhash": self.xxhash, "pchash": self.pchash}
|
305
|
+
schema = {"xxhash": str, "pchash": str}
|
306
|
+
return pl.DataFrame(data=data, schema=schema)
|
307
|
+
|
259
308
|
|
260
309
|
@dataclass(frozen=True)
|
261
310
|
class LabelStatsOutput(Output):
|
@@ -272,7 +321,7 @@ class LabelStatsOutput(Output):
|
|
272
321
|
image_counts_per_class : Mapping[int, int]
|
273
322
|
Dictionary whose keys are the different label classes and
|
274
323
|
values are total counts of each image the class is present in
|
275
|
-
image_indices_per_class : Mapping[int,
|
324
|
+
image_indices_per_class : Mapping[int, Sequence[int]]
|
276
325
|
Dictionary whose keys are the different label classes and
|
277
326
|
values are lists containing the images that have that label
|
278
327
|
image_count : int
|
@@ -284,10 +333,10 @@ class LabelStatsOutput(Output):
|
|
284
333
|
class_names : Sequence[str]
|
285
334
|
"""
|
286
335
|
|
287
|
-
label_counts_per_class:
|
336
|
+
label_counts_per_class: Mapping[int, int]
|
288
337
|
label_counts_per_image: Sequence[int]
|
289
|
-
image_counts_per_class:
|
290
|
-
image_indices_per_class:
|
338
|
+
image_counts_per_class: Mapping[int, int]
|
339
|
+
image_indices_per_class: Mapping[int, Sequence[int]]
|
291
340
|
image_count: int
|
292
341
|
class_count: int
|
293
342
|
label_count: int
|
@@ -325,17 +374,13 @@ class LabelStatsOutput(Output):
|
|
325
374
|
|
326
375
|
return "\n".join(table_str)
|
327
376
|
|
328
|
-
def to_dataframe(self) ->
|
377
|
+
def to_dataframe(self) -> pl.DataFrame:
|
329
378
|
"""
|
330
|
-
Exports the label statistics output results to a
|
331
|
-
|
332
|
-
Notes
|
333
|
-
-----
|
334
|
-
This method requires `pandas <https://pandas.pydata.org/>`_ to be installed.
|
379
|
+
Exports the label statistics output results to a polars DataFrame.
|
335
380
|
|
336
381
|
Returns
|
337
382
|
-------
|
338
|
-
|
383
|
+
pl.DataFrame
|
339
384
|
"""
|
340
385
|
total_count = []
|
341
386
|
image_count = []
|
@@ -343,7 +388,7 @@ class LabelStatsOutput(Output):
|
|
343
388
|
total_count.append(self.label_counts_per_class[cls])
|
344
389
|
image_count.append(self.image_counts_per_class[cls])
|
345
390
|
|
346
|
-
return
|
391
|
+
return pl.DataFrame(
|
347
392
|
{
|
348
393
|
"Label": self.class_names,
|
349
394
|
"Total Count": total_count,
|
dataeval/utils/_plot.py
CHANGED
@@ -164,9 +164,9 @@ def histogram_plot(
|
|
164
164
|
rows = math.ceil(num_metrics / 3)
|
165
165
|
cols = min(num_metrics, 3)
|
166
166
|
fig, axs = plt.subplots(rows, 3, figsize=(cols * 3 + 1, rows * 3))
|
167
|
-
|
167
|
+
axs_flat = np.asarray(axs).flatten()
|
168
168
|
for ax, metric in zip(
|
169
|
-
|
169
|
+
axs_flat,
|
170
170
|
data_dict,
|
171
171
|
):
|
172
172
|
# Plot the histogram for the chosen metric
|
@@ -177,7 +177,7 @@ def histogram_plot(
|
|
177
177
|
ax.set_ylabel(ylabel)
|
178
178
|
ax.set_xlabel(xlabel)
|
179
179
|
|
180
|
-
for ax in
|
180
|
+
for ax in axs_flat[num_metrics:]:
|
181
181
|
ax.axis("off")
|
182
182
|
ax.set_visible(False)
|
183
183
|
|
@@ -222,9 +222,9 @@ def channel_histogram_plot(
|
|
222
222
|
rows = math.ceil(num_metrics / 3)
|
223
223
|
cols = min(num_metrics, 3)
|
224
224
|
fig, axs = plt.subplots(rows, 3, figsize=(cols * 3 + 1, rows * 3))
|
225
|
-
|
225
|
+
axs_flat = np.asarray(axs).flatten()
|
226
226
|
for ax, metric in zip(
|
227
|
-
|
227
|
+
axs_flat,
|
228
228
|
data_keys,
|
229
229
|
):
|
230
230
|
# Plot the histogram for the chosen metric
|
@@ -246,7 +246,7 @@ def channel_histogram_plot(
|
|
246
246
|
ax.set_ylabel(ylabel)
|
247
247
|
ax.set_xlabel(xlabel)
|
248
248
|
|
249
|
-
for ax in
|
249
|
+
for ax in axs_flat[num_metrics:]:
|
250
250
|
ax.axis("off")
|
251
251
|
ax.set_visible(False)
|
252
252
|
|
dataeval/utils/data/_dataset.py
CHANGED
@@ -72,9 +72,8 @@ def _listify_metadata(
|
|
72
72
|
|
73
73
|
def _find_max(arr: ArrayLike) -> Any:
|
74
74
|
if not isinstance(arr, (bytes, str)) and isinstance(arr, (Iterable, Sequence, Array)):
|
75
|
-
|
76
|
-
|
77
|
-
return max(arr)
|
75
|
+
nested = [x for x in [_find_max(x) for x in arr] if x is not None]
|
76
|
+
return max(nested) if len(nested) > 0 else None
|
78
77
|
return arr
|
79
78
|
|
80
79
|
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
8
8
|
import torch
|
9
9
|
from numpy.typing import NDArray
|
10
10
|
from torch.utils.data import DataLoader, TensorDataset
|
11
|
-
from tqdm import tqdm
|
11
|
+
from tqdm.auto import tqdm
|
12
12
|
|
13
13
|
from dataeval.config import DeviceLike, get_device
|
14
14
|
from dataeval.typing import Array
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: dataeval
|
3
|
-
Version: 0.86.
|
3
|
+
Version: 0.86.5
|
4
4
|
Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
|
5
5
|
Home-page: https://dataeval.ai/
|
6
6
|
License: MIT
|
@@ -1,10 +1,10 @@
|
|
1
|
-
dataeval/__init__.py,sha256=
|
1
|
+
dataeval/__init__.py,sha256=5qOVdEDEZt5O--VufuRJXGEByzQC7pJWZluFGzPuNOc,1636
|
2
2
|
dataeval/_log.py,sha256=C7AGkIRzymvYJ0LQXtnShiy3i5Xrp8T58JzIHHguk_Q,365
|
3
|
-
dataeval/config.py,sha256=
|
3
|
+
dataeval/config.py,sha256=bHa8np4FCtLLv8_xlfdDC4lb1InJ_kT0vXDO5P42rvk,4082
|
4
4
|
dataeval/data/__init__.py,sha256=wzQ6uUFLNB3VJR0a2QnRBYwEmwXT93q0WpHu7FmFW1E,486
|
5
5
|
dataeval/data/_embeddings.py,sha256=PFjpdV9bfusCB4taTIYSzx1hP8nJb_KCkZTN8kMw-Hs,12885
|
6
|
-
dataeval/data/_images.py,sha256=
|
7
|
-
dataeval/data/_metadata.py,sha256=
|
6
|
+
dataeval/data/_images.py,sha256=Rc_59CuU4zfN7Xm7an1XUx8ZghQg6a56VJWMZD9edRw,2654
|
7
|
+
dataeval/data/_metadata.py,sha256=OTda9V7DA5Ejxip_NR16LCK2C8HMtpjWHHiFoW3LrLY,14364
|
8
8
|
dataeval/data/_selection.py,sha256=r06xeiyK8nTWPLyItkoPQRWZI1i6LATSue_cuEbCdc4,4463
|
9
9
|
dataeval/data/_split.py,sha256=nQABR05vxil2Qx7-uX4Fm0_DWpibskBGDJOYj_b1u3I,16737
|
10
10
|
dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
|
@@ -26,24 +26,24 @@ dataeval/detectors/drift/_nml/__init__.py,sha256=MNyKyZlfTjr5uQql2uBBfRkUdsuduie
|
|
26
26
|
dataeval/detectors/drift/_nml/_base.py,sha256=o34LcCsD9p1A6u8UdQn-dxIVwC2CMr6uCpC0vq16JX0,2663
|
27
27
|
dataeval/detectors/drift/_nml/_chunk.py,sha256=t12eouanRNiu5DJXOaYDZXUvFMqfcp1BETLOufdV79M,13567
|
28
28
|
dataeval/detectors/drift/_nml/_domainclassifier.py,sha256=n7Ttq5Ej7sAY9Jn2iagaGj4IIWiG8gmA3wwFizlBqes,7292
|
29
|
-
dataeval/detectors/drift/_nml/_result.py,sha256=
|
29
|
+
dataeval/detectors/drift/_nml/_result.py,sha256=TMK17bnlgSdL0MCRHtQZJO8YoWWe4C2kh_akESrlP1g,3269
|
30
30
|
dataeval/detectors/drift/_nml/_thresholds.py,sha256=WGdkLei9w_EvvsRHQzWdDyFVoZHIwM78k_aB3eoh31Q,12060
|
31
31
|
dataeval/detectors/drift/_uncertainty.py,sha256=BHlykJ-r7TGLJxdPfoazXnoAJ1qVDzbk5HjAMdsnHz8,5847
|
32
32
|
dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
|
33
33
|
dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
|
34
34
|
dataeval/detectors/linters/duplicates.py,sha256=X5WSEvI_BHkLoXjkaHK6wTnSkx4IjpO_exMRjSlhc70,4963
|
35
|
-
dataeval/detectors/linters/outliers.py,sha256=
|
35
|
+
dataeval/detectors/linters/outliers.py,sha256=WO686jVbGbtDjO-8CuYVLxpeUGv8MpIK9QjADlTdd40,9596
|
36
36
|
dataeval/detectors/ood/__init__.py,sha256=juCYBDs7CQEAtMhnEpPqF6uTrOIH9kTBSuQ_GRw6a8o,283
|
37
37
|
dataeval/detectors/ood/ae.py,sha256=fTrUfFxv6xUqzKpwMC8rW3JrizA16M_bgzqLuBKMrS0,2944
|
38
38
|
dataeval/detectors/ood/base.py,sha256=9b-Ljznf0lB1SXF4F_Aj3eJ4Y3ijGEDPMjucUsWOGJM,3051
|
39
39
|
dataeval/detectors/ood/mixin.py,sha256=0_o-1HPvgf3-Lf1MSOIfjj5UB8LTLEBGYtJJfyCCzwc,5431
|
40
40
|
dataeval/metadata/__init__.py,sha256=XDDmJbOZBNM6pL0r6Nbu6oMRoyAh22IDkPYGndNlkZU,316
|
41
|
-
dataeval/metadata/_distance.py,sha256=
|
41
|
+
dataeval/metadata/_distance.py,sha256=MbXM9idsooNWnGLaTKg8j4ZqavUeJUjuW7EPW3-UQyg,4234
|
42
42
|
dataeval/metadata/_ood.py,sha256=lNPHouj_9WfM_uTtsaiRaPn46RcVy3YebD1c32vDj-c,8981
|
43
43
|
dataeval/metadata/_utils.py,sha256=BcGoYVfA4AkAWpInY5txOc3QBpsGf6cnnUAsHOQTJAE,1210
|
44
44
|
dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
|
45
45
|
dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
|
46
|
-
dataeval/metrics/bias/_balance.py,sha256=
|
46
|
+
dataeval/metrics/bias/_balance.py,sha256=fREtoMLUZPOf_ivqNKwij6oPiKMTk02ECO5rWURf3KY,5541
|
47
47
|
dataeval/metrics/bias/_completeness.py,sha256=BysXU2Jpw33n5dl3acJFEqF3mFGiJLsfG4n5Q2fkTaY,4608
|
48
48
|
dataeval/metrics/bias/_coverage.py,sha256=PeUoOiaghUEdn6Ov8z2-am7-fnBVIPcFbJK7Ty5JObA,3647
|
49
49
|
dataeval/metrics/bias/_diversity.py,sha256=25udDKmel9IjeVT5nM4dOa1apda66QdRxBc922yuUvI,5830
|
@@ -54,23 +54,23 @@ dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1R
|
|
54
54
|
dataeval/metrics/estimators/_divergence.py,sha256=-np4nWNtRrHnvo4xdWuTzkyJJmobyjDnVDBOMjtBS1Y,4003
|
55
55
|
dataeval/metrics/estimators/_uap.py,sha256=BULEBbJ9BQ1IcTeZf0x7iI60QHAWCccBOM97FIu9VXA,1928
|
56
56
|
dataeval/metrics/stats/__init__.py,sha256=6tA_9nbbM5ObJ6cds8Y1VBtTQiTOxrpGQSFLu_lWGGA,1098
|
57
|
-
dataeval/metrics/stats/_base.py,sha256
|
57
|
+
dataeval/metrics/stats/_base.py,sha256=R-hxoEPLreZcxYxBfyjbKfdoGMMTPiqJ5g2zSO-1UYM,12541
|
58
58
|
dataeval/metrics/stats/_boxratiostats.py,sha256=ROZrlqgbowkGfCR5PJ5TL7Og40iMOdUqJnsCtaz_Xek,6450
|
59
59
|
dataeval/metrics/stats/_dimensionstats.py,sha256=EVO-BlxrZl8qrP09lwPbyWdrG1ZeDtgj4LiswDwEZ1I,2896
|
60
60
|
dataeval/metrics/stats/_hashstats.py,sha256=qa1CYRgOebkxqkALfffaPM-kJ074ZbyfpWbfOfuObSs,4758
|
61
61
|
dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
|
62
|
-
dataeval/metrics/stats/_labelstats.py,sha256=
|
63
|
-
dataeval/metrics/stats/_pixelstats.py,sha256=
|
64
|
-
dataeval/metrics/stats/_visualstats.py,sha256=
|
62
|
+
dataeval/metrics/stats/_labelstats.py,sha256=_dXt3p8_-SHEtHvJWbL0rnQvO2g30zxX42mG2LGJepU,3195
|
63
|
+
dataeval/metrics/stats/_pixelstats.py,sha256=N9e7RXuzSHtlJtWU7l5IcTTIXe2kOmWiuj6lnJpZWq0,3312
|
64
|
+
dataeval/metrics/stats/_visualstats.py,sha256=b6jMq36_UlKduMrkwfq2i0fXNalDEcMdqPgoynXl5hI,3713
|
65
65
|
dataeval/outputs/__init__.py,sha256=geHB5M3QOiFFaQGV4ZwDTTKpqZPvPePbqG7lzaPhaXQ,1741
|
66
66
|
dataeval/outputs/_base.py,sha256=-Wa0gFcBVLbfWPMZyCql7x4vGsnkLP4pecsQIeUZ2_Y,5904
|
67
67
|
dataeval/outputs/_bias.py,sha256=1OZpKncYTryjPLRHb4d6NlhE27uPT57gCob_5jtjKDI,10456
|
68
|
-
dataeval/outputs/_drift.py,sha256=
|
68
|
+
dataeval/outputs/_drift.py,sha256=hXILED_soY8ppIQZgftQvmumtwDrTnABbYl-flIGEU4,4588
|
69
69
|
dataeval/outputs/_estimators.py,sha256=mh-R08CgYtmq9ffANDMYR-V4vrZnSjOjEyOMiMDZ2Ic,3091
|
70
|
-
dataeval/outputs/_linters.py,sha256=
|
70
|
+
dataeval/outputs/_linters.py,sha256=k8lkd8EZ23q0m-HOD-FgqMcLQFy1UH7vws2ucLPyn08,6697
|
71
71
|
dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
|
72
72
|
dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
|
73
|
-
dataeval/outputs/_stats.py,sha256=
|
73
|
+
dataeval/outputs/_stats.py,sha256=_ItGjs9YaMHqjivkR1YBcSErD5ICfa_-iV9nq0l8bTM,17451
|
74
74
|
dataeval/outputs/_utils.py,sha256=NfhYaGT2PZlhIs8ICKUsPWHZXjhWYDkEJqBDdqMeaOM,929
|
75
75
|
dataeval/outputs/_workflows.py,sha256=K786mOgegxVi81diUA-qpbwGEkwa8YA7Fk4ttgjJeaY,10831
|
76
76
|
dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -83,16 +83,16 @@ dataeval/utils/_fast_mst.py,sha256=pv42flr1Uf5RBa9qDG0YLDXWH7Mr7a9zpauO1HqZXaY,8
|
|
83
83
|
dataeval/utils/_image.py,sha256=4uxTIOYZZlRJOfNmdA3ek3no3FrLWCK5un48kStMDt8,3578
|
84
84
|
dataeval/utils/_method.py,sha256=9B9JQbgqWJBRhQJb7glajUtWaQzUTIUuvrZ9_bisxsM,394
|
85
85
|
dataeval/utils/_mst.py,sha256=bLmJmu_1Dtj3hC5gQp3oAiJ_7TKtEjahTqusVRRU4eI,2168
|
86
|
-
dataeval/utils/_plot.py,sha256=
|
86
|
+
dataeval/utils/_plot.py,sha256=1rnMkBRvTFLoTAHqXwF7c7GJ5_5iqlgarZKAzmYciLk,7225
|
87
87
|
dataeval/utils/data/__init__.py,sha256=xGzrjrOxOP2DP1tU84AWMKPnSxFvSjM81CTlDg4rNM8,331
|
88
|
-
dataeval/utils/data/_dataset.py,sha256=
|
88
|
+
dataeval/utils/data/_dataset.py,sha256=tC_vqgWnmojAoAANo5BUVfEUYXl7GzOBSeYjR9olbDk,9506
|
89
89
|
dataeval/utils/data/collate.py,sha256=5egEEKhNNCGeNLChO1p6dZ4Wg6x51VEaMNHz7hEZUxI,3936
|
90
90
|
dataeval/utils/data/metadata.py,sha256=L1c2bCiMj0aR0QCoKkjwBujIftJDEMgW_3ZbgeS8WHo,14703
|
91
91
|
dataeval/utils/datasets/__init__.py,sha256=pAXqHX76yAoBI8XB3m6zGuW-u3s3PCoIXG5GDzxH7Zs,572
|
92
92
|
dataeval/utils/datasets/_antiuav.py,sha256=kA_ia1fYNcJiz9SpCvh-Z8iSc7iJrdogjBI3soyaa7A,8304
|
93
93
|
dataeval/utils/datasets/_base.py,sha256=pyfpJda3ku469M3TFRsJn9S2oAiQODOGTlLcdcoEW9U,9031
|
94
94
|
dataeval/utils/datasets/_cifar10.py,sha256=hZc_A30yKYBbv2kvVdEkZ9egyEe6XBUnmksoIAoJ-5Y,8265
|
95
|
-
dataeval/utils/datasets/_fileio.py,sha256=
|
95
|
+
dataeval/utils/datasets/_fileio.py,sha256=LEoFVNdryRdi7mKpWw-9D8lA6XMa-Jaszd85bv93POo,5454
|
96
96
|
dataeval/utils/datasets/_milco.py,sha256=iXf4C1I3Eg_3gHKUe4XPi21yFMBO51zxTIqAkGf9bYg,7869
|
97
97
|
dataeval/utils/datasets/_mixin.py,sha256=S8iii-SoYUsFFYNXjw2thlZkpBvRLnZ4XI8wTqOKXgU,1729
|
98
98
|
dataeval/utils/datasets/_mnist.py,sha256=uz46sE1Go3TgGjG6x2cXckSVQ0mSg2mhgk8BUvLWjb0,8149
|
@@ -102,12 +102,12 @@ dataeval/utils/datasets/_voc.py,sha256=pafY112O80isYkrdy7Quie9SBm_TmYhREuyl8Sxts
|
|
102
102
|
dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
|
103
103
|
dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
|
104
104
|
dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
|
105
|
-
dataeval/utils/torch/_internal.py,sha256=
|
105
|
+
dataeval/utils/torch/_internal.py,sha256=HuyBB7NWFI9sUrRbOCZFxOfZjRGPdqr5iF7_DT2S0wo,4159
|
106
106
|
dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Zc,9700
|
107
107
|
dataeval/utils/torch/trainer.py,sha256=Oc2lK13uPGhmLYbmAqlPWyKxgG4YJFlnSXCqFHUZbdA,5528
|
108
108
|
dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
|
109
109
|
dataeval/workflows/sufficiency.py,sha256=j-R8dg4XE6a66p_oTXG2GNzgg3vGk85CTblxhFXaxog,8513
|
110
|
-
dataeval-0.86.
|
111
|
-
dataeval-0.86.
|
112
|
-
dataeval-0.86.
|
113
|
-
dataeval-0.86.
|
110
|
+
dataeval-0.86.5.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
|
111
|
+
dataeval-0.86.5.dist-info/METADATA,sha256=qx7aNDgzyAfRRKWjDXkfXojBdsBFnjMgwTVl0JsLbbw,5353
|
112
|
+
dataeval-0.86.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
113
|
+
dataeval-0.86.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|