dataeval 0.86.3__py3-none-any.whl → 0.86.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dataeval/__init__.py CHANGED
@@ -8,7 +8,7 @@ shifts that impact performance of deployed models.
8
8
  from __future__ import annotations
9
9
 
10
10
  __all__ = ["config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
11
- __version__ = "0.86.3"
11
+ __version__ = "0.86.5"
12
12
 
13
13
  import logging
14
14
 
dataeval/config.py CHANGED
@@ -25,7 +25,7 @@ _seed: int | None = None
25
25
 
26
26
  ### CONSTS ###
27
27
 
28
- EPSILON = 1e-10
28
+ EPSILON = 1e-12
29
29
 
30
30
  ### TYPES ###
31
31
 
dataeval/data/_images.py CHANGED
@@ -4,6 +4,8 @@ __all__ = []
4
4
 
5
5
  from typing import TYPE_CHECKING, Any, Generic, Iterator, Sequence, TypeVar, cast, overload
6
6
 
7
+ import numpy as np
8
+
7
9
  from dataeval.typing import Array, ArrayLike, Dataset
8
10
  from dataeval.utils._array import as_numpy, channels_first_to_last
9
11
 
@@ -58,7 +60,7 @@ class Images(Generic[T]):
58
60
  num_images = len(indices)
59
61
  num_rows = (num_images + images_per_row - 1) // images_per_row
60
62
  fig, axes = plt.subplots(num_rows, images_per_row, figsize=figsize)
61
- for i, ax in enumerate(axes.flatten()):
63
+ for i, ax in enumerate(np.asarray(axes).flatten()):
62
64
  image = channels_first_to_last(as_numpy(self[i]))
63
65
  ax.imshow(image)
64
66
  ax.axis("off")
@@ -4,7 +4,7 @@ __all__ = []
4
4
 
5
5
  import warnings
6
6
  from dataclasses import dataclass
7
- from typing import Any, Iterable, Literal, Mapping, Sequence
7
+ from typing import Any, Iterable, Literal, Mapping, Sequence, Sized
8
8
 
9
9
  import numpy as np
10
10
  import polars as pl
@@ -20,6 +20,10 @@ from dataeval.utils._bin import bin_data, digitize_data
20
20
  from dataeval.utils.data.metadata import merge
21
21
 
22
22
 
23
+ def _binned(name: str) -> str:
24
+ return f"{name}[]"
25
+
26
+
23
27
  @dataclass
24
28
  class FactorInfo:
25
29
  factor_type: Literal["categorical", "continuous", "discrete"] | None = None
@@ -65,6 +69,7 @@ class Metadata:
65
69
  self._is_binned = False
66
70
 
67
71
  self._dataset = dataset
72
+ self._count = len(dataset) if isinstance(dataset, Sized) else 0
68
73
  self._continuous_factor_bins = dict(continuous_factor_bins) if continuous_factor_bins else {}
69
74
  self._auto_bin_method: Literal["uniform_width", "uniform_count", "clusters"] = auto_bin_method
70
75
 
@@ -157,13 +162,13 @@ class Metadata:
157
162
  def factor_names(self) -> Sequence[str]:
158
163
  """Factor names of the metadata."""
159
164
  self._structure()
160
- return list(self._factors)
165
+ return list(filter(self._filter, self._factors))
161
166
 
162
167
  @property
163
168
  def factor_info(self) -> Mapping[str, FactorInfo]:
164
169
  """Factor types of the metadata."""
165
170
  self._bin()
166
- return self._factors
171
+ return dict(filter(self._filter, self._factors.items()))
167
172
 
168
173
  @property
169
174
  def factor_data(self) -> NDArray[Any]:
@@ -194,14 +199,19 @@ class Metadata:
194
199
 
195
200
  @property
196
201
  def image_count(self) -> int:
197
- self._bin()
198
- return int(self._image_indices.max() + 1)
202
+ if self._count == 0:
203
+ self._structure()
204
+ return self._count
205
+
206
+ def _filter(self, factor: str | tuple[str, Any]) -> bool:
207
+ factor = factor[0] if isinstance(factor, tuple) else factor
208
+ return factor in self.include if self.include else factor not in self.exclude
199
209
 
200
210
  def _reset_bins(self, cols: Iterable[str] | None = None) -> None:
201
211
  if self._is_binned:
202
212
  columns = self._dataframe.columns
203
- for col in (col for col in cols or columns if f"{col}[|]" in columns):
204
- self._dataframe.drop_in_place(f"{col}[|]")
213
+ for col in (col for col in cols or columns if _binned(col) in columns):
214
+ self._dataframe.drop_in_place(_binned(col))
205
215
  self._factors[col] = FactorInfo()
206
216
  self._is_binned = False
207
217
 
@@ -244,7 +254,7 @@ class Metadata:
244
254
  bboxes = as_numpy(bboxes).astype(np.float32) if is_od else None
245
255
  srcidx = as_numpy(srcidx).astype(np.intp) if is_od else None
246
256
 
247
- index2label = self._dataset.metadata.get("index2label", {})
257
+ index2label = self._dataset.metadata.get("index2label", {i: str(i) for i in np.unique(labels)})
248
258
 
249
259
  targets_per_image = None if srcidx is None else np.unique(srcidx, return_counts=True)[1].tolist()
250
260
  merged = merge(raw, return_dropped=True, ignore_lists=False, targets_per_image=targets_per_image)
@@ -260,8 +270,9 @@ class Metadata:
260
270
  }
261
271
 
262
272
  self._raw = raw
273
+ self._index2label = index2label
263
274
  self._class_labels = labels
264
- self._class_names = [index2label.get(i, str(i)) for i in np.unique(labels)]
275
+ self._class_names = list(index2label.values())
265
276
  self._image_indices = target_dict["image_index"]
266
277
  self._factors = dict.fromkeys(factor_dict, FactorInfo())
267
278
  self._dataframe = pl.DataFrame({**target_dict, **factor_dict})
@@ -289,10 +300,10 @@ class Metadata:
289
300
  )
290
301
 
291
302
  column_set = set(df.columns)
292
- for col in (col for col in self.factor_names if f"{col}[|]" not in column_set):
303
+ for col in (col for col in self.factor_names if _binned(col) not in column_set):
293
304
  # Get data as numpy array for processing
294
305
  data = df[col].to_numpy()
295
- col_dz = f"{col}[|]"
306
+ col_dz = _binned(col)
296
307
  if col in factor_bins:
297
308
  # User provided binning
298
309
  bins = factor_bins[col]
@@ -326,23 +337,6 @@ class Metadata:
326
337
  self._factors.update(factor_info)
327
338
  self._is_binned = True
328
339
 
329
- def get_factors_by_type(self, factor_type: Literal["categorical", "continuous", "discrete"]) -> Sequence[str]:
330
- """
331
- Get the names of factors of a specific type.
332
-
333
- Parameters
334
- ----------
335
- factor_type : Literal["categorical", "continuous", "discrete"]
336
- The type of factors to retrieve.
337
-
338
- Returns
339
- -------
340
- list[str]
341
- List of factor names of the specified type.
342
- """
343
- self._bin()
344
- return [name for name, info in self.factor_info.items() if info.factor_type == factor_type]
345
-
346
340
  def add_factors(self, factors: Mapping[str, Array | Sequence[Any]]) -> None:
347
341
  """
348
342
  Add additional factors to the metadata.
@@ -29,7 +29,7 @@ class AbstractResult(GenericOutput[pd.DataFrame]):
29
29
  self._data = results_data.copy(deep=True)
30
30
 
31
31
  def data(self) -> pd.DataFrame:
32
- return self.to_df()
32
+ return self.to_dataframe()
33
33
 
34
34
  @property
35
35
  def empty(self) -> bool:
@@ -38,7 +38,7 @@ class AbstractResult(GenericOutput[pd.DataFrame]):
38
38
  def __len__(self) -> int:
39
39
  return 0 if self.empty else len(self._data)
40
40
 
41
- def to_df(self, multilevel: bool = True) -> pd.DataFrame:
41
+ def to_dataframe(self, multilevel: bool = True) -> pd.DataFrame:
42
42
  """Export results to pandas dataframe."""
43
43
  if multilevel:
44
44
  return self._data
@@ -7,6 +7,7 @@ from typing import Any, Literal, Sequence, overload
7
7
  import numpy as np
8
8
  from numpy.typing import NDArray
9
9
 
10
+ from dataeval.config import EPSILON
10
11
  from dataeval.data._images import Images
11
12
  from dataeval.metrics.stats._base import combine_stats, get_dataset_step_from_idx
12
13
  from dataeval.metrics.stats._imagestats import imagestats
@@ -18,26 +19,56 @@ from dataeval.typing import ArrayLike, Dataset
18
19
 
19
20
 
20
21
  def _get_outlier_mask(
21
- values: NDArray, method: Literal["zscore", "modzscore", "iqr"], threshold: float | None
22
- ) -> NDArray:
22
+ values: NDArray[Any], method: Literal["zscore", "modzscore", "iqr"], threshold: float | None
23
+ ) -> NDArray[np.bool_]:
24
+ if len(values) == 0:
25
+ return np.array([], dtype=bool)
26
+
23
27
  values = values.astype(np.float64)
28
+
29
+ valid_mask = ~np.isnan(values)
30
+ outliers = np.full(values.shape, False, dtype=bool)
31
+
32
+ if not np.any(valid_mask):
33
+ return outliers
34
+
24
35
  if method == "zscore":
25
- threshold = threshold if threshold else 3.0
26
- std = np.std(values)
27
- abs_diff = np.abs(values - np.mean(values))
28
- return std != 0 and (abs_diff / std) > threshold
29
- if method == "modzscore":
30
- threshold = threshold if threshold else 3.5
31
- abs_diff = np.abs(values - np.median(values))
32
- med_abs_diff = np.median(abs_diff) if np.median(abs_diff) != 0 else np.mean(abs_diff)
33
- mod_z_score = 0.6745 * abs_diff / med_abs_diff
34
- return mod_z_score > threshold
35
- if method == "iqr":
36
- threshold = threshold if threshold else 1.5
37
- qrt = np.percentile(values, q=(25, 75), method="midpoint")
38
- iqr = (qrt[1] - qrt[0]) * threshold
39
- return (values < (qrt[0] - iqr)) | (values > (qrt[1] + iqr))
40
- raise ValueError("Outlier method must be 'zscore' 'modzscore' or 'iqr'.")
36
+ threshold = threshold if threshold is not None else 3.0
37
+
38
+ std_val = np.nanstd(values)
39
+
40
+ if std_val > EPSILON:
41
+ mean_val = np.nanmean(values)
42
+ abs_diff = np.abs(values - mean_val)
43
+ outliers = (abs_diff / std_val) > threshold
44
+
45
+ elif method == "modzscore":
46
+ threshold = threshold if threshold is not None else 3.5
47
+
48
+ median_val = np.nanmedian(values)
49
+ abs_diff = np.abs(values - median_val)
50
+ m_abs_diff = np.nanmedian(abs_diff)
51
+ m_abs_diff = np.nanmean(abs_diff) if m_abs_diff <= EPSILON else m_abs_diff
52
+
53
+ if m_abs_diff > EPSILON:
54
+ mod_z_score = 0.6745 * abs_diff / m_abs_diff
55
+ outliers = mod_z_score > threshold
56
+
57
+ elif method == "iqr":
58
+ threshold = threshold if threshold is not None else 1.5
59
+
60
+ qrt = np.nanpercentile(values, q=(25, 75), method="midpoint")
61
+ iqr_val = qrt[1] - qrt[0]
62
+
63
+ if iqr_val > EPSILON:
64
+ iqr_threshold = iqr_val * threshold
65
+ outliers = (values < (qrt[0] - iqr_threshold)) | (values > (qrt[1] + iqr_threshold))
66
+
67
+ else:
68
+ raise ValueError("Outlier method must be 'zscore' 'modzscore' or 'iqr'.")
69
+
70
+ outliers[~valid_mask] = False
71
+ return outliers
41
72
 
42
73
 
43
74
  class Outliers:
@@ -164,10 +195,10 @@ class Outliers:
164
195
  >>> len(results)
165
196
  2
166
197
  >>> results.issues[0]
167
- {10: {'skew': -3.906, 'kurtosis': 13.266, 'entropy': 0.2128}, 12: {'std': 0.00536, 'var': 2.87e-05, 'skew': -3.906, 'kurtosis': 13.266, 'entropy': 0.2128}}
198
+ {10: {'entropy': 0.2128}, 12: {'std': 0.00536, 'var': 2.87e-05, 'entropy': 0.2128}}
168
199
  >>> results.issues[1]
169
200
  {}
170
- """ # noqa: E501
201
+ """
171
202
  if isinstance(stats, (ImageStatsOutput, DimensionStatsOutput, PixelStatsOutput, VisualStatsOutput)):
172
203
  return OutliersOutput(self._get_outliers(stats.data()))
173
204
 
@@ -221,7 +252,7 @@ class Outliers:
221
252
  >>> list(results.issues)
222
253
  [10, 12]
223
254
  >>> results.issues[10]
224
- {'contrast': 1.25, 'zeros': 0.05493, 'skew': -3.906, 'kurtosis': 13.266, 'entropy': 0.2128}
255
+ {'contrast': 1.25, 'zeros': 0.05493, 'entropy': 0.2128}
225
256
  """
226
257
  images = Images(data) if isinstance(data, Dataset) else data
227
258
  self.stats = imagestats(images)
@@ -81,7 +81,7 @@ def metadata_distance(metadata1: Metadata, metadata2: Metadata) -> MetadataDista
81
81
  """
82
82
 
83
83
  _compare_keys(metadata1.factor_names, metadata2.factor_names)
84
- cont_fnames = metadata1.get_factors_by_type("continuous")
84
+ cont_fnames = [name for name, info in metadata1.factor_info.items() if info.factor_type == "continuous"]
85
85
 
86
86
  if not cont_fnames:
87
87
  return MetadataDistanceOutput({})
@@ -99,9 +99,10 @@ def balance(
99
99
  factor_types = {"class_label": "categorical"} | {k: v.factor_type for k, v in metadata.factor_info.items()}
100
100
  is_discrete = [factor_type != "continuous" for factor_type in factor_types.values()]
101
101
  num_factors = len(factor_types)
102
+ class_labels = metadata.class_labels
102
103
 
103
104
  mi = np.full((num_factors, num_factors), np.nan, dtype=np.float32)
104
- data = np.hstack((metadata.class_labels[:, np.newaxis], data))
105
+ data = np.hstack((class_labels[:, np.newaxis], data))
105
106
 
106
107
  for idx, factor_type in enumerate(factor_types.values()):
107
108
  if factor_type != "continuous":
@@ -132,12 +133,12 @@ def balance(
132
133
  factors = nmi[1:, 1:]
133
134
 
134
135
  # assume class is a factor
135
- num_classes = len(metadata.class_names)
136
+ u_classes = np.unique(class_labels)
137
+ num_classes = len(u_classes)
136
138
  classwise_mi = np.full((num_classes, num_factors), np.nan, dtype=np.float32)
137
139
 
138
140
  # classwise targets
139
- classes = np.unique(metadata.class_labels)
140
- tgt_bin = data[:, 0][:, None] == classes
141
+ tgt_bin = data[:, 0][:, None] == u_classes
141
142
 
142
143
  # classification MI for discrete/categorical features
143
144
  for idx in range(num_classes):
@@ -13,8 +13,8 @@ from multiprocessing import Pool
13
13
  from typing import Any, Callable, Generic, Iterable, Iterator, Sequence, TypeVar
14
14
 
15
15
  import numpy as np
16
- import tqdm
17
16
  from numpy.typing import NDArray
17
+ from tqdm.auto import tqdm
18
18
 
19
19
  from dataeval.config import get_max_processes
20
20
  from dataeval.outputs._stats import BASE_ATTRS, BaseStatsOutput, SourceIndex
@@ -77,7 +77,7 @@ class PoolWrapper:
77
77
  """
78
78
 
79
79
  def __init__(self, processes: int | None) -> None:
80
- self.pool = Pool(processes) if processes is not None and processes > 1 else None
80
+ self.pool = Pool(processes) if processes is None or processes > 1 else None
81
81
 
82
82
  def imap(self, func: Callable[[_S], _T], iterable: Iterable[_S]) -> Iterator[_T]:
83
83
  return map(func, iterable) if self.pool is None else self.pool.imap(func, iterable)
@@ -93,7 +93,7 @@ class PoolWrapper:
93
93
 
94
94
  class StatsProcessor(Generic[TStatsOutput]):
95
95
  output_class: type[TStatsOutput]
96
- cache_keys: list[str] = []
96
+ cache_keys: set[str] = set()
97
97
  image_function_map: dict[str, Callable[[StatsProcessor[TStatsOutput]], Any]] = {}
98
98
  channel_function_map: dict[str, Callable[[StatsProcessor[TStatsOutput]], Any]] = {}
99
99
 
@@ -267,7 +267,7 @@ def run_stats(
267
267
  stats_processor_cls = stats_processor_cls if isinstance(stats_processor_cls, Iterable) else [stats_processor_cls]
268
268
 
269
269
  with PoolWrapper(processes=get_max_processes()) as p:
270
- for r in tqdm.tqdm(
270
+ for r in tqdm(
271
271
  p.imap(
272
272
  partial(
273
273
  process_stats_unpack,
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  __all__ = []
4
4
 
5
- from typing import Any, Mapping, TypeVar
5
+ from typing import Any, TypeVar
6
6
 
7
7
  import polars as pl
8
8
 
@@ -14,10 +14,6 @@ from dataeval.typing import AnnotatedDataset
14
14
  TValue = TypeVar("TValue")
15
15
 
16
16
 
17
- def _sort_to_list(d: Mapping[int, TValue]) -> list[TValue]:
18
- return [t[1] for t in sorted(d.items())]
19
-
20
-
21
17
  @set_metadata
22
18
  def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
23
19
  """
@@ -58,21 +54,25 @@ def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
58
54
 
59
55
  # Count occurrences of each label across all images
60
56
  label_counts_df = metadata_df.group_by("class_label").len()
61
- label_counts = label_counts_df.sort("class_label")["len"].to_list()
57
+ label_counts = dict(zip(label_counts_df["class_label"], label_counts_df["len"]))
62
58
 
63
59
  # Count unique images per label (how many images contain each label)
64
60
  image_counts_df = metadata_df.select(["image_index", "class_label"]).unique().group_by("class_label").len()
65
- image_counts = image_counts_df.sort("class_label")["len"].to_list()
61
+ image_counts = dict(zip(image_counts_df["class_label"], image_counts_df["len"]))
66
62
 
67
63
  # Create index_location mapping (which images contain each label)
68
- index_location: list[list[int]] = [[] for _ in range(len(metadata.class_names))]
64
+ index_location: dict[int, list[int]] = {}
69
65
  for row in metadata_df.group_by("class_label").agg(pl.col("image_index")).to_dicts():
70
66
  indices = row["image_index"]
71
67
  index_location[row["class_label"]] = sorted(dict.fromkeys(indices)) if isinstance(indices, list) else [indices]
72
68
 
73
69
  # Count labels per image
74
- label_per_image_df = metadata_df.group_by("image_index").agg(pl.count().alias("label_count"))
75
- label_per_image = label_per_image_df.sort("image_index")["label_count"].to_list()
70
+ label_per_image_df = metadata_df.group_by("image_index").agg(pl.len().alias("label_count"))
71
+
72
+ # Join with all indices to include missing ones with 0 count
73
+ all_indices = pl.DataFrame({"image_index": range(metadata.image_count)})
74
+ complete_label_df = all_indices.join(label_per_image_df, on="image_index", how="left").fill_null(0)
75
+ label_per_image = complete_label_df.sort("image_index")["label_count"].to_list()
76
76
 
77
77
  return LabelStatsOutput(
78
78
  label_counts_per_class=label_counts,
@@ -81,6 +81,6 @@ def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
81
81
  image_indices_per_class=index_location,
82
82
  image_count=len(label_per_image),
83
83
  class_count=len(metadata.class_names),
84
- label_count=sum(label_counts),
84
+ label_count=sum(label_counts.values()),
85
85
  class_names=metadata.class_names,
86
86
  )
@@ -15,12 +15,13 @@ from dataeval.typing import ArrayLike, Dataset
15
15
 
16
16
  class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
17
17
  output_class: type = PixelStatsOutput
18
+ cache_keys = {"histogram"}
18
19
  image_function_map: dict[str, Callable[[StatsProcessor[PixelStatsOutput]], Any]] = {
19
20
  "mean": lambda x: np.nanmean(x.scaled),
20
21
  "std": lambda x: np.nanstd(x.scaled),
21
22
  "var": lambda x: np.nanvar(x.scaled),
22
- "skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
23
- "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
23
+ "skew": lambda x: skew(x.scaled.ravel(), nan_policy="omit"),
24
+ "kurtosis": lambda x: kurtosis(x.scaled.ravel(), nan_policy="omit"),
24
25
  "histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
25
26
  "entropy": lambda x: entropy(x.get("histogram")),
26
27
  }
@@ -28,8 +29,8 @@ class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
28
29
  "mean": lambda x: np.nanmean(x.scaled, axis=1),
29
30
  "std": lambda x: np.nanstd(x.scaled, axis=1),
30
31
  "var": lambda x: np.nanvar(x.scaled, axis=1),
31
- "skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
32
- "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
32
+ "skew": lambda x: skew(x.scaled, axis=1, nan_policy="omit"),
33
+ "kurtosis": lambda x: kurtosis(x.scaled, axis=1, nan_policy="omit"),
33
34
  "histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
34
35
  "entropy": lambda x: entropy(x.get("histogram"), axis=1),
35
36
  }
@@ -6,6 +6,7 @@ from typing import Any, Callable
6
6
 
7
7
  import numpy as np
8
8
 
9
+ from dataeval.config import EPSILON
9
10
  from dataeval.metrics.stats._base import StatsProcessor, run_stats
10
11
  from dataeval.outputs import VisualStatsOutput
11
12
  from dataeval.outputs._base import set_metadata
@@ -17,23 +18,21 @@ QUARTILES = (0, 25, 50, 75, 100)
17
18
 
18
19
  class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
19
20
  output_class: type = VisualStatsOutput
21
+ cache_keys: set[str] = {"percentiles"}
20
22
  image_function_map: dict[str, Callable[[StatsProcessor[VisualStatsOutput]], Any]] = {
21
23
  "brightness": lambda x: x.get("percentiles")[1],
22
- "contrast": lambda x: 0
23
- if np.mean(x.get("percentiles")) == 0
24
- else (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles")),
24
+ "contrast": lambda x: (np.max(x.get("percentiles")) - np.min(x.get("percentiles")))
25
+ / (np.mean(x.get("percentiles")) + EPSILON),
25
26
  "darkness": lambda x: x.get("percentiles")[-2],
26
27
  "missing": lambda x: np.count_nonzero(np.isnan(np.sum(x.image, axis=0))) / np.prod(x.shape[-2:]),
27
28
  "sharpness": lambda x: np.nanstd(edge_filter(np.mean(x.image, axis=0))),
28
- "zeros": lambda x: np.count_nonzero(np.nansum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
29
+ "zeros": lambda x: np.count_nonzero(np.sum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
29
30
  "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
30
31
  }
31
32
  channel_function_map: dict[str, Callable[[StatsProcessor[VisualStatsOutput]], Any]] = {
32
33
  "brightness": lambda x: x.get("percentiles")[:, 1],
33
- "contrast": lambda x: np.nan_to_num(
34
- (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
35
- / np.mean(x.get("percentiles"), axis=1)
36
- ),
34
+ "contrast": lambda x: (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
35
+ / (np.mean(x.get("percentiles"), axis=1) + EPSILON),
37
36
  "darkness": lambda x: x.get("percentiles")[:, -2],
38
37
  "missing": lambda x: np.count_nonzero(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
39
38
  "sharpness": lambda x: np.nanstd(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
@@ -114,7 +114,7 @@ class DriftMVDCOutput(PerMetricResult):
114
114
  import matplotlib.pyplot as plt
115
115
 
116
116
  fig, ax = plt.subplots(dpi=300)
117
- resdf = self.to_df()
117
+ resdf = self.to_dataframe()
118
118
  xticks = np.arange(resdf.shape[0])
119
119
  trndf = resdf[resdf["chunk"]["period"] == "reference"]
120
120
  tstdf = resdf[resdf["chunk"]["period"] == "analysis"]
@@ -54,7 +54,7 @@ def _reorganize_by_class_and_metric(
54
54
  for img, group in result.items():
55
55
  for extreme in group:
56
56
  metrics.setdefault(extreme, []).append(img)
57
- for i, images in enumerate(lstats.image_indices_per_class):
57
+ for i, images in lstats.image_indices_per_class.items():
58
58
  if img in images:
59
59
  class_wise[lstats.class_names[i]][extreme] = class_wise[lstats.class_names[i]].get(extreme, 0) + 1
60
60
 
@@ -6,7 +6,7 @@ from dataclasses import dataclass
6
6
  from typing import TYPE_CHECKING, Any, Iterable, Mapping, NamedTuple, Optional, Sequence, Union
7
7
 
8
8
  import numpy as np
9
- import pandas as pd
9
+ import polars as pl
10
10
  from numpy.typing import NDArray
11
11
  from typing_extensions import TypeAlias
12
12
 
@@ -22,7 +22,7 @@ SOURCE_INDEX = "source_index"
22
22
  OBJECT_COUNT = "object_count"
23
23
  IMAGE_COUNT = "image_count"
24
24
 
25
- BASE_ATTRS = (SOURCE_INDEX, OBJECT_COUNT, IMAGE_COUNT)
25
+ BASE_ATTRS = [SOURCE_INDEX, OBJECT_COUNT, IMAGE_COUNT]
26
26
 
27
27
 
28
28
  class SourceIndex(NamedTuple):
@@ -156,14 +156,21 @@ class BaseStatsOutput(Output):
156
156
  Mapping[str, NDArray[Any]]
157
157
  """
158
158
  filter_ = [filter] if isinstance(filter, str) else filter
159
+
160
+ """
161
+ Performs validation checks to ensure selected keys and constant or 1-D values
162
+ Each set of checks returns True if a valid value.
163
+ Only one set of final checks needs to be True to allow the value through
164
+ """
159
165
  return {
160
166
  k: v
161
167
  for k, v in self.data().items()
162
- if k not in BASE_ATTRS
163
- and (filter_ is None or k in filter_)
164
- and isinstance(v, np.ndarray)
165
- and v.ndim == 1
166
- and (not exclude_constant or len(np.unique(v)) > 1)
168
+ if (
169
+ k not in BASE_ATTRS # Ignore BaseStatsOutput attributes
170
+ and (filter_ is None or k in filter_) # Key is selected
171
+ and (isinstance(v, np.ndarray) and v.ndim == 1) # Check valid array
172
+ and (not exclude_constant or len(np.unique(v)) > 1) # Check valid numpy "constant"
173
+ )
167
174
  }
168
175
 
169
176
  def plot(
@@ -195,6 +202,11 @@ class BaseStatsOutput(Output):
195
202
  return histogram_plot(factors, log)
196
203
  return channel_histogram_plot(factors, log, max_channels, ch_mask)
197
204
 
205
+ def to_dataframe(self) -> pl.DataFrame:
206
+ """Returns the processed factors a polars dataframe of shape (factors, samples)"""
207
+
208
+ return pl.DataFrame(self.factors())
209
+
198
210
 
199
211
  @dataclass(frozen=True)
200
212
  class DimensionStatsOutput(BaseStatsOutput):
@@ -256,6 +268,43 @@ class HashStatsOutput(BaseStatsOutput):
256
268
  xxhash: Sequence[str]
257
269
  pchash: Sequence[str]
258
270
 
271
+ def to_dataframe(self) -> pl.DataFrame:
272
+ """
273
+ Returns a polars dataframe for the xxhash and pchash attributes of each sample
274
+
275
+ Note
276
+ ----
277
+ xxhash and pchash do not follow the normal definition of factors but are
278
+ helpful attributes of the data
279
+
280
+ Examples
281
+ --------
282
+ Display the hashes of a dataset of images, whose shape is (C, H, W),
283
+ as a polars DataFrame
284
+
285
+ >>> from dataeval.metrics.stats import hashstats
286
+ >>> results = hashstats(dataset)
287
+ >>> print(results.to_dataframe())
288
+ shape: (8, 2)
289
+ ┌──────────────────┬──────────────────┐
290
+ │ xxhash ┆ pchash │
291
+ │ --- ┆ --- │
292
+ │ str ┆ str │
293
+ ╞══════════════════╪══════════════════╡
294
+ │ 69b50a5f06af238c ┆ e666999999266666 │
295
+ │ 5a861d7a23d1afe7 ┆ e666999999266666 │
296
+ │ 7ffdb4990ad44ac6 ┆ e666999966666299 │
297
+ │ 4f0c366a3298ceac ┆ e666999999266666 │
298
+ │ c5519e36ac1f8839 ┆ 96e91656e91616e9 │
299
+ │ e7e92346159a4567 ┆ e666999999266666 │
300
+ │ 9a538f797a5ba8ee ┆ e666999999266666 │
301
+ │ 1a658bd2a1baee25 ┆ e666999999266666 │
302
+ └──────────────────┴──────────────────┘
303
+ """
304
+ data = {"xxhash": self.xxhash, "pchash": self.pchash}
305
+ schema = {"xxhash": str, "pchash": str}
306
+ return pl.DataFrame(data=data, schema=schema)
307
+
259
308
 
260
309
  @dataclass(frozen=True)
261
310
  class LabelStatsOutput(Output):
@@ -272,7 +321,7 @@ class LabelStatsOutput(Output):
272
321
  image_counts_per_class : Mapping[int, int]
273
322
  Dictionary whose keys are the different label classes and
274
323
  values are total counts of each image the class is present in
275
- image_indices_per_class : Mapping[int, list]
324
+ image_indices_per_class : Mapping[int, Sequence[int]]
276
325
  Dictionary whose keys are the different label classes and
277
326
  values are lists containing the images that have that label
278
327
  image_count : int
@@ -284,10 +333,10 @@ class LabelStatsOutput(Output):
284
333
  class_names : Sequence[str]
285
334
  """
286
335
 
287
- label_counts_per_class: Sequence[int]
336
+ label_counts_per_class: Mapping[int, int]
288
337
  label_counts_per_image: Sequence[int]
289
- image_counts_per_class: Sequence[int]
290
- image_indices_per_class: Sequence[Sequence[int]]
338
+ image_counts_per_class: Mapping[int, int]
339
+ image_indices_per_class: Mapping[int, Sequence[int]]
291
340
  image_count: int
292
341
  class_count: int
293
342
  label_count: int
@@ -325,17 +374,13 @@ class LabelStatsOutput(Output):
325
374
 
326
375
  return "\n".join(table_str)
327
376
 
328
- def to_dataframe(self) -> pd.DataFrame:
377
+ def to_dataframe(self) -> pl.DataFrame:
329
378
  """
330
- Exports the label statistics output results to a pandas DataFrame.
331
-
332
- Notes
333
- -----
334
- This method requires `pandas <https://pandas.pydata.org/>`_ to be installed.
379
+ Exports the label statistics output results to a polars DataFrame.
335
380
 
336
381
  Returns
337
382
  -------
338
- pd.DataFrame
383
+ pl.DataFrame
339
384
  """
340
385
  total_count = []
341
386
  image_count = []
@@ -343,7 +388,7 @@ class LabelStatsOutput(Output):
343
388
  total_count.append(self.label_counts_per_class[cls])
344
389
  image_count.append(self.image_counts_per_class[cls])
345
390
 
346
- return pd.DataFrame(
391
+ return pl.DataFrame(
347
392
  {
348
393
  "Label": self.class_names,
349
394
  "Total Count": total_count,
dataeval/utils/_plot.py CHANGED
@@ -164,9 +164,9 @@ def histogram_plot(
164
164
  rows = math.ceil(num_metrics / 3)
165
165
  cols = min(num_metrics, 3)
166
166
  fig, axs = plt.subplots(rows, 3, figsize=(cols * 3 + 1, rows * 3))
167
-
167
+ axs_flat = np.asarray(axs).flatten()
168
168
  for ax, metric in zip(
169
- axs.flat,
169
+ axs_flat,
170
170
  data_dict,
171
171
  ):
172
172
  # Plot the histogram for the chosen metric
@@ -177,7 +177,7 @@ def histogram_plot(
177
177
  ax.set_ylabel(ylabel)
178
178
  ax.set_xlabel(xlabel)
179
179
 
180
- for ax in axs.flat[num_metrics:]:
180
+ for ax in axs_flat[num_metrics:]:
181
181
  ax.axis("off")
182
182
  ax.set_visible(False)
183
183
 
@@ -222,9 +222,9 @@ def channel_histogram_plot(
222
222
  rows = math.ceil(num_metrics / 3)
223
223
  cols = min(num_metrics, 3)
224
224
  fig, axs = plt.subplots(rows, 3, figsize=(cols * 3 + 1, rows * 3))
225
-
225
+ axs_flat = np.asarray(axs).flatten()
226
226
  for ax, metric in zip(
227
- axs.flat,
227
+ axs_flat,
228
228
  data_keys,
229
229
  ):
230
230
  # Plot the histogram for the chosen metric
@@ -246,7 +246,7 @@ def channel_histogram_plot(
246
246
  ax.set_ylabel(ylabel)
247
247
  ax.set_xlabel(xlabel)
248
248
 
249
- for ax in axs.flat[num_metrics:]:
249
+ for ax in axs_flat[num_metrics:]:
250
250
  ax.axis("off")
251
251
  ax.set_visible(False)
252
252
 
@@ -72,9 +72,8 @@ def _listify_metadata(
72
72
 
73
73
  def _find_max(arr: ArrayLike) -> Any:
74
74
  if not isinstance(arr, (bytes, str)) and isinstance(arr, (Iterable, Sequence, Array)):
75
- if isinstance(arr[0], (Iterable, Sequence, Array)):
76
- return max([_find_max(x) for x in arr]) # type: ignore
77
- return max(arr)
75
+ nested = [x for x in [_find_max(x) for x in arr] if x is not None]
76
+ return max(nested) if len(nested) > 0 else None
78
77
  return arr
79
78
 
80
79
 
@@ -8,7 +8,7 @@ import zipfile
8
8
  from pathlib import Path
9
9
 
10
10
  import requests
11
- from tqdm import tqdm
11
+ from tqdm.auto import tqdm
12
12
 
13
13
  ARCHIVE_ENDINGS = [".zip", ".tar", ".tgz"]
14
14
  COMPRESS_ENDINGS = [".gz", ".bz2"]
@@ -8,7 +8,7 @@ import numpy as np
8
8
  import torch
9
9
  from numpy.typing import NDArray
10
10
  from torch.utils.data import DataLoader, TensorDataset
11
- from tqdm import tqdm
11
+ from tqdm.auto import tqdm
12
12
 
13
13
  from dataeval.config import DeviceLike, get_device
14
14
  from dataeval.typing import Array
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.86.3
3
+ Version: 0.86.5
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -1,10 +1,10 @@
1
- dataeval/__init__.py,sha256=Z_VUOb2gf--uAtqeXyzIPUm11noNeEj16OSfkc6H6-Y,1636
1
+ dataeval/__init__.py,sha256=5qOVdEDEZt5O--VufuRJXGEByzQC7pJWZluFGzPuNOc,1636
2
2
  dataeval/_log.py,sha256=C7AGkIRzymvYJ0LQXtnShiy3i5Xrp8T58JzIHHguk_Q,365
3
- dataeval/config.py,sha256=hjad0TK1UmaKQlUuxqxt64_OAUqZkHjicBf06cvTyrQ,4082
3
+ dataeval/config.py,sha256=bHa8np4FCtLLv8_xlfdDC4lb1InJ_kT0vXDO5P42rvk,4082
4
4
  dataeval/data/__init__.py,sha256=wzQ6uUFLNB3VJR0a2QnRBYwEmwXT93q0WpHu7FmFW1E,486
5
5
  dataeval/data/_embeddings.py,sha256=PFjpdV9bfusCB4taTIYSzx1hP8nJb_KCkZTN8kMw-Hs,12885
6
- dataeval/data/_images.py,sha256=3d4Cv-xg5z6_LVtw1eL_QdFwzbDI1cwvPNQblkrMEMk,2622
7
- dataeval/data/_metadata.py,sha256=jEfGZhhvry7qtjU47VifL8ZO1hqXg1jntR3CztkaoWU,14462
6
+ dataeval/data/_images.py,sha256=Rc_59CuU4zfN7Xm7an1XUx8ZghQg6a56VJWMZD9edRw,2654
7
+ dataeval/data/_metadata.py,sha256=OTda9V7DA5Ejxip_NR16LCK2C8HMtpjWHHiFoW3LrLY,14364
8
8
  dataeval/data/_selection.py,sha256=r06xeiyK8nTWPLyItkoPQRWZI1i6LATSue_cuEbCdc4,4463
9
9
  dataeval/data/_split.py,sha256=nQABR05vxil2Qx7-uX4Fm0_DWpibskBGDJOYj_b1u3I,16737
10
10
  dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
@@ -26,24 +26,24 @@ dataeval/detectors/drift/_nml/__init__.py,sha256=MNyKyZlfTjr5uQql2uBBfRkUdsuduie
26
26
  dataeval/detectors/drift/_nml/_base.py,sha256=o34LcCsD9p1A6u8UdQn-dxIVwC2CMr6uCpC0vq16JX0,2663
27
27
  dataeval/detectors/drift/_nml/_chunk.py,sha256=t12eouanRNiu5DJXOaYDZXUvFMqfcp1BETLOufdV79M,13567
28
28
  dataeval/detectors/drift/_nml/_domainclassifier.py,sha256=n7Ttq5Ej7sAY9Jn2iagaGj4IIWiG8gmA3wwFizlBqes,7292
29
- dataeval/detectors/drift/_nml/_result.py,sha256=Nz_qTRu_EcJ1OcywSTVXFm9fx3UyuX66ZWACrffG5dI,3255
29
+ dataeval/detectors/drift/_nml/_result.py,sha256=TMK17bnlgSdL0MCRHtQZJO8YoWWe4C2kh_akESrlP1g,3269
30
30
  dataeval/detectors/drift/_nml/_thresholds.py,sha256=WGdkLei9w_EvvsRHQzWdDyFVoZHIwM78k_aB3eoh31Q,12060
31
31
  dataeval/detectors/drift/_uncertainty.py,sha256=BHlykJ-r7TGLJxdPfoazXnoAJ1qVDzbk5HjAMdsnHz8,5847
32
32
  dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
33
33
  dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
34
34
  dataeval/detectors/linters/duplicates.py,sha256=X5WSEvI_BHkLoXjkaHK6wTnSkx4IjpO_exMRjSlhc70,4963
35
- dataeval/detectors/linters/outliers.py,sha256=R3-p8kzia77Q3k2grXeRXnRiv7nMhosoPY1sDLQVKrs,9049
35
+ dataeval/detectors/linters/outliers.py,sha256=WO686jVbGbtDjO-8CuYVLxpeUGv8MpIK9QjADlTdd40,9596
36
36
  dataeval/detectors/ood/__init__.py,sha256=juCYBDs7CQEAtMhnEpPqF6uTrOIH9kTBSuQ_GRw6a8o,283
37
37
  dataeval/detectors/ood/ae.py,sha256=fTrUfFxv6xUqzKpwMC8rW3JrizA16M_bgzqLuBKMrS0,2944
38
38
  dataeval/detectors/ood/base.py,sha256=9b-Ljznf0lB1SXF4F_Aj3eJ4Y3ijGEDPMjucUsWOGJM,3051
39
39
  dataeval/detectors/ood/mixin.py,sha256=0_o-1HPvgf3-Lf1MSOIfjj5UB8LTLEBGYtJJfyCCzwc,5431
40
40
  dataeval/metadata/__init__.py,sha256=XDDmJbOZBNM6pL0r6Nbu6oMRoyAh22IDkPYGndNlkZU,316
41
- dataeval/metadata/_distance.py,sha256=AABrGoQyD13z9Fqlz3NyfX0Iow_vjBwAugIv6OSRTTE,4187
41
+ dataeval/metadata/_distance.py,sha256=MbXM9idsooNWnGLaTKg8j4ZqavUeJUjuW7EPW3-UQyg,4234
42
42
  dataeval/metadata/_ood.py,sha256=lNPHouj_9WfM_uTtsaiRaPn46RcVy3YebD1c32vDj-c,8981
43
43
  dataeval/metadata/_utils.py,sha256=BcGoYVfA4AkAWpInY5txOc3QBpsGf6cnnUAsHOQTJAE,1210
44
44
  dataeval/metrics/__init__.py,sha256=8VC8q3HuJN3o_WN51Ae2_wXznl3RMXIvA5GYVcy7vr8,225
45
45
  dataeval/metrics/bias/__init__.py,sha256=329S1_3WnWqeU4-qVcbe0fMy4lDrj9uKslWHIQf93yg,839
46
- dataeval/metrics/bias/_balance.py,sha256=L5TR8Twwodulk8xkhE-L7PR-isPGw4LusIjL3ZHIH8c,5525
46
+ dataeval/metrics/bias/_balance.py,sha256=fREtoMLUZPOf_ivqNKwij6oPiKMTk02ECO5rWURf3KY,5541
47
47
  dataeval/metrics/bias/_completeness.py,sha256=BysXU2Jpw33n5dl3acJFEqF3mFGiJLsfG4n5Q2fkTaY,4608
48
48
  dataeval/metrics/bias/_coverage.py,sha256=PeUoOiaghUEdn6Ov8z2-am7-fnBVIPcFbJK7Ty5JObA,3647
49
49
  dataeval/metrics/bias/_diversity.py,sha256=25udDKmel9IjeVT5nM4dOa1apda66QdRxBc922yuUvI,5830
@@ -54,23 +54,23 @@ dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1R
54
54
  dataeval/metrics/estimators/_divergence.py,sha256=-np4nWNtRrHnvo4xdWuTzkyJJmobyjDnVDBOMjtBS1Y,4003
55
55
  dataeval/metrics/estimators/_uap.py,sha256=BULEBbJ9BQ1IcTeZf0x7iI60QHAWCccBOM97FIu9VXA,1928
56
56
  dataeval/metrics/stats/__init__.py,sha256=6tA_9nbbM5ObJ6cds8Y1VBtTQiTOxrpGQSFLu_lWGGA,1098
57
- dataeval/metrics/stats/_base.py,sha256=-TIDSHT-zwNXqWMTM6Nu3BQ11cWP4TFYFaUF40vIChs,12534
57
+ dataeval/metrics/stats/_base.py,sha256=R-hxoEPLreZcxYxBfyjbKfdoGMMTPiqJ5g2zSO-1UYM,12541
58
58
  dataeval/metrics/stats/_boxratiostats.py,sha256=ROZrlqgbowkGfCR5PJ5TL7Og40iMOdUqJnsCtaz_Xek,6450
59
59
  dataeval/metrics/stats/_dimensionstats.py,sha256=EVO-BlxrZl8qrP09lwPbyWdrG1ZeDtgj4LiswDwEZ1I,2896
60
60
  dataeval/metrics/stats/_hashstats.py,sha256=qa1CYRgOebkxqkALfffaPM-kJ074ZbyfpWbfOfuObSs,4758
61
61
  dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
62
- dataeval/metrics/stats/_labelstats.py,sha256=bOLH4FEBN4JZ5njdRVjEK7GUb47XBMq9eqYUgXSLmCY,3071
63
- dataeval/metrics/stats/_pixelstats.py,sha256=5RCQh0OQkHiCkn3DgCPVxKoFfifX_FOtwsnotADSZ0I,3265
64
- dataeval/metrics/stats/_visualstats.py,sha256=0k6bvAL_d66nQMfG7bydCOFJb7B0dhgG7fqCjVTp1sg,3707
62
+ dataeval/metrics/stats/_labelstats.py,sha256=_dXt3p8_-SHEtHvJWbL0rnQvO2g30zxX42mG2LGJepU,3195
63
+ dataeval/metrics/stats/_pixelstats.py,sha256=N9e7RXuzSHtlJtWU7l5IcTTIXe2kOmWiuj6lnJpZWq0,3312
64
+ dataeval/metrics/stats/_visualstats.py,sha256=b6jMq36_UlKduMrkwfq2i0fXNalDEcMdqPgoynXl5hI,3713
65
65
  dataeval/outputs/__init__.py,sha256=geHB5M3QOiFFaQGV4ZwDTTKpqZPvPePbqG7lzaPhaXQ,1741
66
66
  dataeval/outputs/_base.py,sha256=-Wa0gFcBVLbfWPMZyCql7x4vGsnkLP4pecsQIeUZ2_Y,5904
67
67
  dataeval/outputs/_bias.py,sha256=1OZpKncYTryjPLRHb4d6NlhE27uPT57gCob_5jtjKDI,10456
68
- dataeval/outputs/_drift.py,sha256=rKn5vqMR6XNujgSqfHsH76oFkoGsUusquZL2Qy4Ae6Y,4581
68
+ dataeval/outputs/_drift.py,sha256=hXILED_soY8ppIQZgftQvmumtwDrTnABbYl-flIGEU4,4588
69
69
  dataeval/outputs/_estimators.py,sha256=mh-R08CgYtmq9ffANDMYR-V4vrZnSjOjEyOMiMDZ2Ic,3091
70
- dataeval/outputs/_linters.py,sha256=ZClITD4XY99TunS_9ABTl7eauppoUdpCZU1pCVvD0cI,6700
70
+ dataeval/outputs/_linters.py,sha256=k8lkd8EZ23q0m-HOD-FgqMcLQFy1UH7vws2ucLPyn08,6697
71
71
  dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
72
72
  dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
73
- dataeval/outputs/_stats.py,sha256=KIghl-glm9A_h1eVQDKqdTQg8o2zedltWyX4NkCsv2U,15226
73
+ dataeval/outputs/_stats.py,sha256=_ItGjs9YaMHqjivkR1YBcSErD5ICfa_-iV9nq0l8bTM,17451
74
74
  dataeval/outputs/_utils.py,sha256=NfhYaGT2PZlhIs8ICKUsPWHZXjhWYDkEJqBDdqMeaOM,929
75
75
  dataeval/outputs/_workflows.py,sha256=K786mOgegxVi81diUA-qpbwGEkwa8YA7Fk4ttgjJeaY,10831
76
76
  dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -83,16 +83,16 @@ dataeval/utils/_fast_mst.py,sha256=pv42flr1Uf5RBa9qDG0YLDXWH7Mr7a9zpauO1HqZXaY,8
83
83
  dataeval/utils/_image.py,sha256=4uxTIOYZZlRJOfNmdA3ek3no3FrLWCK5un48kStMDt8,3578
84
84
  dataeval/utils/_method.py,sha256=9B9JQbgqWJBRhQJb7glajUtWaQzUTIUuvrZ9_bisxsM,394
85
85
  dataeval/utils/_mst.py,sha256=bLmJmu_1Dtj3hC5gQp3oAiJ_7TKtEjahTqusVRRU4eI,2168
86
- dataeval/utils/_plot.py,sha256=3yn5UGL2xUeayPNws2bkvxm9ZCURsVUkpvSrwOqUE7g,7145
86
+ dataeval/utils/_plot.py,sha256=1rnMkBRvTFLoTAHqXwF7c7GJ5_5iqlgarZKAzmYciLk,7225
87
87
  dataeval/utils/data/__init__.py,sha256=xGzrjrOxOP2DP1tU84AWMKPnSxFvSjM81CTlDg4rNM8,331
88
- dataeval/utils/data/_dataset.py,sha256=CFK9h-XPN7J-iF2nXol6keMDbGm6VIweFAMAjXRUlhg,9527
88
+ dataeval/utils/data/_dataset.py,sha256=tC_vqgWnmojAoAANo5BUVfEUYXl7GzOBSeYjR9olbDk,9506
89
89
  dataeval/utils/data/collate.py,sha256=5egEEKhNNCGeNLChO1p6dZ4Wg6x51VEaMNHz7hEZUxI,3936
90
90
  dataeval/utils/data/metadata.py,sha256=L1c2bCiMj0aR0QCoKkjwBujIftJDEMgW_3ZbgeS8WHo,14703
91
91
  dataeval/utils/datasets/__init__.py,sha256=pAXqHX76yAoBI8XB3m6zGuW-u3s3PCoIXG5GDzxH7Zs,572
92
92
  dataeval/utils/datasets/_antiuav.py,sha256=kA_ia1fYNcJiz9SpCvh-Z8iSc7iJrdogjBI3soyaa7A,8304
93
93
  dataeval/utils/datasets/_base.py,sha256=pyfpJda3ku469M3TFRsJn9S2oAiQODOGTlLcdcoEW9U,9031
94
94
  dataeval/utils/datasets/_cifar10.py,sha256=hZc_A30yKYBbv2kvVdEkZ9egyEe6XBUnmksoIAoJ-5Y,8265
95
- dataeval/utils/datasets/_fileio.py,sha256=OASFA9uX3KgfyPb5vza12BlZyAi9Y8Al9lUR_IYPcsM,5449
95
+ dataeval/utils/datasets/_fileio.py,sha256=LEoFVNdryRdi7mKpWw-9D8lA6XMa-Jaszd85bv93POo,5454
96
96
  dataeval/utils/datasets/_milco.py,sha256=iXf4C1I3Eg_3gHKUe4XPi21yFMBO51zxTIqAkGf9bYg,7869
97
97
  dataeval/utils/datasets/_mixin.py,sha256=S8iii-SoYUsFFYNXjw2thlZkpBvRLnZ4XI8wTqOKXgU,1729
98
98
  dataeval/utils/datasets/_mnist.py,sha256=uz46sE1Go3TgGjG6x2cXckSVQ0mSg2mhgk8BUvLWjb0,8149
@@ -102,12 +102,12 @@ dataeval/utils/datasets/_voc.py,sha256=pafY112O80isYkrdy7Quie9SBm_TmYhREuyl8Sxts
102
102
  dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
103
103
  dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
104
104
  dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
105
- dataeval/utils/torch/_internal.py,sha256=vHy-DzPhmvE8h3wmWc3aciBJ8nDGzQ1z1jTZgGjmDyM,4154
105
+ dataeval/utils/torch/_internal.py,sha256=HuyBB7NWFI9sUrRbOCZFxOfZjRGPdqr5iF7_DT2S0wo,4159
106
106
  dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Zc,9700
107
107
  dataeval/utils/torch/trainer.py,sha256=Oc2lK13uPGhmLYbmAqlPWyKxgG4YJFlnSXCqFHUZbdA,5528
108
108
  dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
109
109
  dataeval/workflows/sufficiency.py,sha256=j-R8dg4XE6a66p_oTXG2GNzgg3vGk85CTblxhFXaxog,8513
110
- dataeval-0.86.3.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
111
- dataeval-0.86.3.dist-info/METADATA,sha256=1zOfOabm9w57nxAWZw5InEzmqyWRRko10btPqT0h64o,5353
112
- dataeval-0.86.3.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
113
- dataeval-0.86.3.dist-info/RECORD,,
110
+ dataeval-0.86.5.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
111
+ dataeval-0.86.5.dist-info/METADATA,sha256=qx7aNDgzyAfRRKWjDXkfXojBdsBFnjMgwTVl0JsLbbw,5353
112
+ dataeval-0.86.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
113
+ dataeval-0.86.5.dist-info/RECORD,,