dataeval 0.86.4__py3-none-any.whl → 0.86.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dataeval/__init__.py CHANGED
@@ -8,7 +8,7 @@ shifts that impact performance of deployed models.
8
8
  from __future__ import annotations
9
9
 
10
10
  __all__ = ["config", "detectors", "log", "metrics", "typing", "utils", "workflows"]
11
- __version__ = "0.86.4"
11
+ __version__ = "0.86.5"
12
12
 
13
13
  import logging
14
14
 
dataeval/config.py CHANGED
@@ -25,7 +25,7 @@ _seed: int | None = None
25
25
 
26
26
  ### CONSTS ###
27
27
 
28
- EPSILON = 1e-10
28
+ EPSILON = 1e-12
29
29
 
30
30
  ### TYPES ###
31
31
 
@@ -4,7 +4,7 @@ __all__ = []
4
4
 
5
5
  import warnings
6
6
  from dataclasses import dataclass
7
- from typing import Any, Iterable, Literal, Mapping, Sequence
7
+ from typing import Any, Iterable, Literal, Mapping, Sequence, Sized
8
8
 
9
9
  import numpy as np
10
10
  import polars as pl
@@ -69,6 +69,7 @@ class Metadata:
69
69
  self._is_binned = False
70
70
 
71
71
  self._dataset = dataset
72
+ self._count = len(dataset) if isinstance(dataset, Sized) else 0
72
73
  self._continuous_factor_bins = dict(continuous_factor_bins) if continuous_factor_bins else {}
73
74
  self._auto_bin_method: Literal["uniform_width", "uniform_count", "clusters"] = auto_bin_method
74
75
 
@@ -198,8 +199,9 @@ class Metadata:
198
199
 
199
200
  @property
200
201
  def image_count(self) -> int:
201
- self._bin()
202
- return 0 if self._image_indices.size == 0 else int(self._image_indices.max() + 1)
202
+ if self._count == 0:
203
+ self._structure()
204
+ return self._count
203
205
 
204
206
  def _filter(self, factor: str | tuple[str, Any]) -> bool:
205
207
  factor = factor[0] if isinstance(factor, tuple) else factor
@@ -29,7 +29,7 @@ class AbstractResult(GenericOutput[pd.DataFrame]):
29
29
  self._data = results_data.copy(deep=True)
30
30
 
31
31
  def data(self) -> pd.DataFrame:
32
- return self.to_df()
32
+ return self.to_dataframe()
33
33
 
34
34
  @property
35
35
  def empty(self) -> bool:
@@ -38,7 +38,7 @@ class AbstractResult(GenericOutput[pd.DataFrame]):
38
38
  def __len__(self) -> int:
39
39
  return 0 if self.empty else len(self._data)
40
40
 
41
- def to_df(self, multilevel: bool = True) -> pd.DataFrame:
41
+ def to_dataframe(self, multilevel: bool = True) -> pd.DataFrame:
42
42
  """Export results to pandas dataframe."""
43
43
  if multilevel:
44
44
  return self._data
@@ -7,6 +7,7 @@ from typing import Any, Literal, Sequence, overload
7
7
  import numpy as np
8
8
  from numpy.typing import NDArray
9
9
 
10
+ from dataeval.config import EPSILON
10
11
  from dataeval.data._images import Images
11
12
  from dataeval.metrics.stats._base import combine_stats, get_dataset_step_from_idx
12
13
  from dataeval.metrics.stats._imagestats import imagestats
@@ -18,26 +19,56 @@ from dataeval.typing import ArrayLike, Dataset
18
19
 
19
20
 
20
21
  def _get_outlier_mask(
21
- values: NDArray, method: Literal["zscore", "modzscore", "iqr"], threshold: float | None
22
- ) -> NDArray:
22
+ values: NDArray[Any], method: Literal["zscore", "modzscore", "iqr"], threshold: float | None
23
+ ) -> NDArray[np.bool_]:
24
+ if len(values) == 0:
25
+ return np.array([], dtype=bool)
26
+
23
27
  values = values.astype(np.float64)
28
+
29
+ valid_mask = ~np.isnan(values)
30
+ outliers = np.full(values.shape, False, dtype=bool)
31
+
32
+ if not np.any(valid_mask):
33
+ return outliers
34
+
24
35
  if method == "zscore":
25
- threshold = threshold if threshold else 3.0
26
- std = np.std(values)
27
- abs_diff = np.abs(values - np.mean(values))
28
- return std != 0 and (abs_diff / std) > threshold
29
- if method == "modzscore":
30
- threshold = threshold if threshold else 3.5
31
- abs_diff = np.abs(values - np.median(values))
32
- med_abs_diff = np.median(abs_diff) if np.median(abs_diff) != 0 else np.mean(abs_diff)
33
- mod_z_score = 0.6745 * abs_diff / med_abs_diff
34
- return mod_z_score > threshold
35
- if method == "iqr":
36
- threshold = threshold if threshold else 1.5
37
- qrt = np.percentile(values, q=(25, 75), method="midpoint")
38
- iqr = (qrt[1] - qrt[0]) * threshold
39
- return (values < (qrt[0] - iqr)) | (values > (qrt[1] + iqr))
40
- raise ValueError("Outlier method must be 'zscore' 'modzscore' or 'iqr'.")
36
+ threshold = threshold if threshold is not None else 3.0
37
+
38
+ std_val = np.nanstd(values)
39
+
40
+ if std_val > EPSILON:
41
+ mean_val = np.nanmean(values)
42
+ abs_diff = np.abs(values - mean_val)
43
+ outliers = (abs_diff / std_val) > threshold
44
+
45
+ elif method == "modzscore":
46
+ threshold = threshold if threshold is not None else 3.5
47
+
48
+ median_val = np.nanmedian(values)
49
+ abs_diff = np.abs(values - median_val)
50
+ m_abs_diff = np.nanmedian(abs_diff)
51
+ m_abs_diff = np.nanmean(abs_diff) if m_abs_diff <= EPSILON else m_abs_diff
52
+
53
+ if m_abs_diff > EPSILON:
54
+ mod_z_score = 0.6745 * abs_diff / m_abs_diff
55
+ outliers = mod_z_score > threshold
56
+
57
+ elif method == "iqr":
58
+ threshold = threshold if threshold is not None else 1.5
59
+
60
+ qrt = np.nanpercentile(values, q=(25, 75), method="midpoint")
61
+ iqr_val = qrt[1] - qrt[0]
62
+
63
+ if iqr_val > EPSILON:
64
+ iqr_threshold = iqr_val * threshold
65
+ outliers = (values < (qrt[0] - iqr_threshold)) | (values > (qrt[1] + iqr_threshold))
66
+
67
+ else:
68
+ raise ValueError("Outlier method must be 'zscore' 'modzscore' or 'iqr'.")
69
+
70
+ outliers[~valid_mask] = False
71
+ return outliers
41
72
 
42
73
 
43
74
  class Outliers:
@@ -164,10 +195,10 @@ class Outliers:
164
195
  >>> len(results)
165
196
  2
166
197
  >>> results.issues[0]
167
- {10: {'skew': -3.906, 'kurtosis': 13.266, 'entropy': 0.2128}, 12: {'std': 0.00536, 'var': 2.87e-05, 'skew': -3.906, 'kurtosis': 13.266, 'entropy': 0.2128}}
198
+ {10: {'entropy': 0.2128}, 12: {'std': 0.00536, 'var': 2.87e-05, 'entropy': 0.2128}}
168
199
  >>> results.issues[1]
169
200
  {}
170
- """ # noqa: E501
201
+ """
171
202
  if isinstance(stats, (ImageStatsOutput, DimensionStatsOutput, PixelStatsOutput, VisualStatsOutput)):
172
203
  return OutliersOutput(self._get_outliers(stats.data()))
173
204
 
@@ -221,7 +252,7 @@ class Outliers:
221
252
  >>> list(results.issues)
222
253
  [10, 12]
223
254
  >>> results.issues[10]
224
- {'contrast': 1.25, 'zeros': 0.05493, 'skew': -3.906, 'kurtosis': 13.266, 'entropy': 0.2128}
255
+ {'contrast': 1.25, 'zeros': 0.05493, 'entropy': 0.2128}
225
256
  """
226
257
  images = Images(data) if isinstance(data, Dataset) else data
227
258
  self.stats = imagestats(images)
@@ -13,8 +13,8 @@ from multiprocessing import Pool
13
13
  from typing import Any, Callable, Generic, Iterable, Iterator, Sequence, TypeVar
14
14
 
15
15
  import numpy as np
16
- import tqdm
17
16
  from numpy.typing import NDArray
17
+ from tqdm.auto import tqdm
18
18
 
19
19
  from dataeval.config import get_max_processes
20
20
  from dataeval.outputs._stats import BASE_ATTRS, BaseStatsOutput, SourceIndex
@@ -77,7 +77,7 @@ class PoolWrapper:
77
77
  """
78
78
 
79
79
  def __init__(self, processes: int | None) -> None:
80
- self.pool = Pool(processes) if processes is not None and processes > 1 else None
80
+ self.pool = Pool(processes) if processes is None or processes > 1 else None
81
81
 
82
82
  def imap(self, func: Callable[[_S], _T], iterable: Iterable[_S]) -> Iterator[_T]:
83
83
  return map(func, iterable) if self.pool is None else self.pool.imap(func, iterable)
@@ -93,7 +93,7 @@ class PoolWrapper:
93
93
 
94
94
  class StatsProcessor(Generic[TStatsOutput]):
95
95
  output_class: type[TStatsOutput]
96
- cache_keys: list[str] = []
96
+ cache_keys: set[str] = set()
97
97
  image_function_map: dict[str, Callable[[StatsProcessor[TStatsOutput]], Any]] = {}
98
98
  channel_function_map: dict[str, Callable[[StatsProcessor[TStatsOutput]], Any]] = {}
99
99
 
@@ -267,7 +267,7 @@ def run_stats(
267
267
  stats_processor_cls = stats_processor_cls if isinstance(stats_processor_cls, Iterable) else [stats_processor_cls]
268
268
 
269
269
  with PoolWrapper(processes=get_max_processes()) as p:
270
- for r in tqdm.tqdm(
270
+ for r in tqdm(
271
271
  p.imap(
272
272
  partial(
273
273
  process_stats_unpack,
@@ -68,7 +68,11 @@ def labelstats(dataset: Metadata | AnnotatedDataset[Any]) -> LabelStatsOutput:
68
68
 
69
69
  # Count labels per image
70
70
  label_per_image_df = metadata_df.group_by("image_index").agg(pl.len().alias("label_count"))
71
- label_per_image = label_per_image_df.sort("image_index")["label_count"].to_list()
71
+
72
+ # Join with all indices to include missing ones with 0 count
73
+ all_indices = pl.DataFrame({"image_index": range(metadata.image_count)})
74
+ complete_label_df = all_indices.join(label_per_image_df, on="image_index", how="left").fill_null(0)
75
+ label_per_image = complete_label_df.sort("image_index")["label_count"].to_list()
72
76
 
73
77
  return LabelStatsOutput(
74
78
  label_counts_per_class=label_counts,
@@ -15,12 +15,13 @@ from dataeval.typing import ArrayLike, Dataset
15
15
 
16
16
  class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
17
17
  output_class: type = PixelStatsOutput
18
+ cache_keys = {"histogram"}
18
19
  image_function_map: dict[str, Callable[[StatsProcessor[PixelStatsOutput]], Any]] = {
19
20
  "mean": lambda x: np.nanmean(x.scaled),
20
21
  "std": lambda x: np.nanstd(x.scaled),
21
22
  "var": lambda x: np.nanvar(x.scaled),
22
- "skew": lambda x: np.nan_to_num(skew(x.scaled.ravel())),
23
- "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled.ravel())),
23
+ "skew": lambda x: skew(x.scaled.ravel(), nan_policy="omit"),
24
+ "kurtosis": lambda x: kurtosis(x.scaled.ravel(), nan_policy="omit"),
24
25
  "histogram": lambda x: np.histogram(x.scaled, 256, (0, 1))[0],
25
26
  "entropy": lambda x: entropy(x.get("histogram")),
26
27
  }
@@ -28,8 +29,8 @@ class PixelStatsProcessor(StatsProcessor[PixelStatsOutput]):
28
29
  "mean": lambda x: np.nanmean(x.scaled, axis=1),
29
30
  "std": lambda x: np.nanstd(x.scaled, axis=1),
30
31
  "var": lambda x: np.nanvar(x.scaled, axis=1),
31
- "skew": lambda x: np.nan_to_num(skew(x.scaled, axis=1)),
32
- "kurtosis": lambda x: np.nan_to_num(kurtosis(x.scaled, axis=1)),
32
+ "skew": lambda x: skew(x.scaled, axis=1, nan_policy="omit"),
33
+ "kurtosis": lambda x: kurtosis(x.scaled, axis=1, nan_policy="omit"),
33
34
  "histogram": lambda x: np.apply_along_axis(lambda y: np.histogram(y, 256, (0, 1))[0], 1, x.scaled),
34
35
  "entropy": lambda x: entropy(x.get("histogram"), axis=1),
35
36
  }
@@ -6,6 +6,7 @@ from typing import Any, Callable
6
6
 
7
7
  import numpy as np
8
8
 
9
+ from dataeval.config import EPSILON
9
10
  from dataeval.metrics.stats._base import StatsProcessor, run_stats
10
11
  from dataeval.outputs import VisualStatsOutput
11
12
  from dataeval.outputs._base import set_metadata
@@ -17,23 +18,21 @@ QUARTILES = (0, 25, 50, 75, 100)
17
18
 
18
19
  class VisualStatsProcessor(StatsProcessor[VisualStatsOutput]):
19
20
  output_class: type = VisualStatsOutput
21
+ cache_keys: set[str] = {"percentiles"}
20
22
  image_function_map: dict[str, Callable[[StatsProcessor[VisualStatsOutput]], Any]] = {
21
23
  "brightness": lambda x: x.get("percentiles")[1],
22
- "contrast": lambda x: 0
23
- if np.mean(x.get("percentiles")) == 0
24
- else (np.max(x.get("percentiles")) - np.min(x.get("percentiles"))) / np.mean(x.get("percentiles")),
24
+ "contrast": lambda x: (np.max(x.get("percentiles")) - np.min(x.get("percentiles")))
25
+ / (np.mean(x.get("percentiles")) + EPSILON),
25
26
  "darkness": lambda x: x.get("percentiles")[-2],
26
27
  "missing": lambda x: np.count_nonzero(np.isnan(np.sum(x.image, axis=0))) / np.prod(x.shape[-2:]),
27
28
  "sharpness": lambda x: np.nanstd(edge_filter(np.mean(x.image, axis=0))),
28
- "zeros": lambda x: np.count_nonzero(np.nansum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
29
+ "zeros": lambda x: np.count_nonzero(np.sum(x.image, axis=0) == 0) / np.prod(x.shape[-2:]),
29
30
  "percentiles": lambda x: np.nanpercentile(x.scaled, q=QUARTILES),
30
31
  }
31
32
  channel_function_map: dict[str, Callable[[StatsProcessor[VisualStatsOutput]], Any]] = {
32
33
  "brightness": lambda x: x.get("percentiles")[:, 1],
33
- "contrast": lambda x: np.nan_to_num(
34
- (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
35
- / np.mean(x.get("percentiles"), axis=1)
36
- ),
34
+ "contrast": lambda x: (np.max(x.get("percentiles"), axis=1) - np.min(x.get("percentiles"), axis=1))
35
+ / (np.mean(x.get("percentiles"), axis=1) + EPSILON),
37
36
  "darkness": lambda x: x.get("percentiles")[:, -2],
38
37
  "missing": lambda x: np.count_nonzero(np.isnan(x.image), axis=(1, 2)) / np.prod(x.shape[-2:]),
39
38
  "sharpness": lambda x: np.nanstd(np.vectorize(edge_filter, signature="(m,n)->(m,n)")(x.image), axis=(1, 2)),
@@ -114,7 +114,7 @@ class DriftMVDCOutput(PerMetricResult):
114
114
  import matplotlib.pyplot as plt
115
115
 
116
116
  fig, ax = plt.subplots(dpi=300)
117
- resdf = self.to_df()
117
+ resdf = self.to_dataframe()
118
118
  xticks = np.arange(resdf.shape[0])
119
119
  trndf = resdf[resdf["chunk"]["period"] == "reference"]
120
120
  tstdf = resdf[resdf["chunk"]["period"] == "analysis"]
@@ -6,7 +6,7 @@ from dataclasses import dataclass
6
6
  from typing import TYPE_CHECKING, Any, Iterable, Mapping, NamedTuple, Optional, Sequence, Union
7
7
 
8
8
  import numpy as np
9
- import pandas as pd
9
+ import polars as pl
10
10
  from numpy.typing import NDArray
11
11
  from typing_extensions import TypeAlias
12
12
 
@@ -22,7 +22,7 @@ SOURCE_INDEX = "source_index"
22
22
  OBJECT_COUNT = "object_count"
23
23
  IMAGE_COUNT = "image_count"
24
24
 
25
- BASE_ATTRS = (SOURCE_INDEX, OBJECT_COUNT, IMAGE_COUNT)
25
+ BASE_ATTRS = [SOURCE_INDEX, OBJECT_COUNT, IMAGE_COUNT]
26
26
 
27
27
 
28
28
  class SourceIndex(NamedTuple):
@@ -156,14 +156,21 @@ class BaseStatsOutput(Output):
156
156
  Mapping[str, NDArray[Any]]
157
157
  """
158
158
  filter_ = [filter] if isinstance(filter, str) else filter
159
+
160
+ """
161
+ Performs validation checks to ensure selected keys and constant or 1-D values
162
+ Each set of checks returns True if a valid value.
163
+ Only one set of final checks needs to be True to allow the value through
164
+ """
159
165
  return {
160
166
  k: v
161
167
  for k, v in self.data().items()
162
- if k not in BASE_ATTRS
163
- and (filter_ is None or k in filter_)
164
- and isinstance(v, np.ndarray)
165
- and v.ndim == 1
166
- and (not exclude_constant or len(np.unique(v)) > 1)
168
+ if (
169
+ k not in BASE_ATTRS # Ignore BaseStatsOutput attributes
170
+ and (filter_ is None or k in filter_) # Key is selected
171
+ and (isinstance(v, np.ndarray) and v.ndim == 1) # Check valid array
172
+ and (not exclude_constant or len(np.unique(v)) > 1) # Check valid numpy "constant"
173
+ )
167
174
  }
168
175
 
169
176
  def plot(
@@ -195,6 +202,11 @@ class BaseStatsOutput(Output):
195
202
  return histogram_plot(factors, log)
196
203
  return channel_histogram_plot(factors, log, max_channels, ch_mask)
197
204
 
205
+ def to_dataframe(self) -> pl.DataFrame:
206
+ """Returns the processed factors a polars dataframe of shape (factors, samples)"""
207
+
208
+ return pl.DataFrame(self.factors())
209
+
198
210
 
199
211
  @dataclass(frozen=True)
200
212
  class DimensionStatsOutput(BaseStatsOutput):
@@ -256,6 +268,43 @@ class HashStatsOutput(BaseStatsOutput):
256
268
  xxhash: Sequence[str]
257
269
  pchash: Sequence[str]
258
270
 
271
+ def to_dataframe(self) -> pl.DataFrame:
272
+ """
273
+ Returns a polars dataframe for the xxhash and pchash attributes of each sample
274
+
275
+ Note
276
+ ----
277
+ xxhash and pchash do not follow the normal definition of factors but are
278
+ helpful attributes of the data
279
+
280
+ Examples
281
+ --------
282
+ Display the hashes of a dataset of images, whose shape is (C, H, W),
283
+ as a polars DataFrame
284
+
285
+ >>> from dataeval.metrics.stats import hashstats
286
+ >>> results = hashstats(dataset)
287
+ >>> print(results.to_dataframe())
288
+ shape: (8, 2)
289
+ ┌──────────────────┬──────────────────┐
290
+ │ xxhash ┆ pchash │
291
+ │ --- ┆ --- │
292
+ │ str ┆ str │
293
+ ╞══════════════════╪══════════════════╡
294
+ │ 69b50a5f06af238c ┆ e666999999266666 │
295
+ │ 5a861d7a23d1afe7 ┆ e666999999266666 │
296
+ │ 7ffdb4990ad44ac6 ┆ e666999966666299 │
297
+ │ 4f0c366a3298ceac ┆ e666999999266666 │
298
+ │ c5519e36ac1f8839 ┆ 96e91656e91616e9 │
299
+ │ e7e92346159a4567 ┆ e666999999266666 │
300
+ │ 9a538f797a5ba8ee ┆ e666999999266666 │
301
+ │ 1a658bd2a1baee25 ┆ e666999999266666 │
302
+ └──────────────────┴──────────────────┘
303
+ """
304
+ data = {"xxhash": self.xxhash, "pchash": self.pchash}
305
+ schema = {"xxhash": str, "pchash": str}
306
+ return pl.DataFrame(data=data, schema=schema)
307
+
259
308
 
260
309
  @dataclass(frozen=True)
261
310
  class LabelStatsOutput(Output):
@@ -325,17 +374,13 @@ class LabelStatsOutput(Output):
325
374
 
326
375
  return "\n".join(table_str)
327
376
 
328
- def to_dataframe(self) -> pd.DataFrame:
377
+ def to_dataframe(self) -> pl.DataFrame:
329
378
  """
330
- Exports the label statistics output results to a pandas DataFrame.
331
-
332
- Notes
333
- -----
334
- This method requires `pandas <https://pandas.pydata.org/>`_ to be installed.
379
+ Exports the label statistics output results to a polars DataFrame.
335
380
 
336
381
  Returns
337
382
  -------
338
- pd.DataFrame
383
+ pl.DataFrame
339
384
  """
340
385
  total_count = []
341
386
  image_count = []
@@ -343,7 +388,7 @@ class LabelStatsOutput(Output):
343
388
  total_count.append(self.label_counts_per_class[cls])
344
389
  image_count.append(self.image_counts_per_class[cls])
345
390
 
346
- return pd.DataFrame(
391
+ return pl.DataFrame(
347
392
  {
348
393
  "Label": self.class_names,
349
394
  "Total Count": total_count,
@@ -72,9 +72,8 @@ def _listify_metadata(
72
72
 
73
73
  def _find_max(arr: ArrayLike) -> Any:
74
74
  if not isinstance(arr, (bytes, str)) and isinstance(arr, (Iterable, Sequence, Array)):
75
- if isinstance(arr[0], (Iterable, Sequence, Array)):
76
- return max([_find_max(x) for x in arr]) # type: ignore
77
- return max(arr)
75
+ nested = [x for x in [_find_max(x) for x in arr] if x is not None]
76
+ return max(nested) if len(nested) > 0 else None
78
77
  return arr
79
78
 
80
79
 
@@ -8,7 +8,7 @@ import zipfile
8
8
  from pathlib import Path
9
9
 
10
10
  import requests
11
- from tqdm import tqdm
11
+ from tqdm.auto import tqdm
12
12
 
13
13
  ARCHIVE_ENDINGS = [".zip", ".tar", ".tgz"]
14
14
  COMPRESS_ENDINGS = [".gz", ".bz2"]
@@ -8,7 +8,7 @@ import numpy as np
8
8
  import torch
9
9
  from numpy.typing import NDArray
10
10
  from torch.utils.data import DataLoader, TensorDataset
11
- from tqdm import tqdm
11
+ from tqdm.auto import tqdm
12
12
 
13
13
  from dataeval.config import DeviceLike, get_device
14
14
  from dataeval.typing import Array
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.86.4
3
+ Version: 0.86.5
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -1,10 +1,10 @@
1
- dataeval/__init__.py,sha256=6gfYCGo82QKKO58jQSma27Mr-R316vmCDbTjXRh5B7o,1636
1
+ dataeval/__init__.py,sha256=5qOVdEDEZt5O--VufuRJXGEByzQC7pJWZluFGzPuNOc,1636
2
2
  dataeval/_log.py,sha256=C7AGkIRzymvYJ0LQXtnShiy3i5Xrp8T58JzIHHguk_Q,365
3
- dataeval/config.py,sha256=hjad0TK1UmaKQlUuxqxt64_OAUqZkHjicBf06cvTyrQ,4082
3
+ dataeval/config.py,sha256=bHa8np4FCtLLv8_xlfdDC4lb1InJ_kT0vXDO5P42rvk,4082
4
4
  dataeval/data/__init__.py,sha256=wzQ6uUFLNB3VJR0a2QnRBYwEmwXT93q0WpHu7FmFW1E,486
5
5
  dataeval/data/_embeddings.py,sha256=PFjpdV9bfusCB4taTIYSzx1hP8nJb_KCkZTN8kMw-Hs,12885
6
6
  dataeval/data/_images.py,sha256=Rc_59CuU4zfN7Xm7an1XUx8ZghQg6a56VJWMZD9edRw,2654
7
- dataeval/data/_metadata.py,sha256=5pND6IZ5KeEGrhCDiBVxhU_BXWU0okBxt8oNkZ9a2_M,14309
7
+ dataeval/data/_metadata.py,sha256=OTda9V7DA5Ejxip_NR16LCK2C8HMtpjWHHiFoW3LrLY,14364
8
8
  dataeval/data/_selection.py,sha256=r06xeiyK8nTWPLyItkoPQRWZI1i6LATSue_cuEbCdc4,4463
9
9
  dataeval/data/_split.py,sha256=nQABR05vxil2Qx7-uX4Fm0_DWpibskBGDJOYj_b1u3I,16737
10
10
  dataeval/data/selections/__init__.py,sha256=2m8ZB53wXzqLcqmc6p5atO6graB6ZyiRSNJFxf11X_g,613
@@ -26,13 +26,13 @@ dataeval/detectors/drift/_nml/__init__.py,sha256=MNyKyZlfTjr5uQql2uBBfRkUdsuduie
26
26
  dataeval/detectors/drift/_nml/_base.py,sha256=o34LcCsD9p1A6u8UdQn-dxIVwC2CMr6uCpC0vq16JX0,2663
27
27
  dataeval/detectors/drift/_nml/_chunk.py,sha256=t12eouanRNiu5DJXOaYDZXUvFMqfcp1BETLOufdV79M,13567
28
28
  dataeval/detectors/drift/_nml/_domainclassifier.py,sha256=n7Ttq5Ej7sAY9Jn2iagaGj4IIWiG8gmA3wwFizlBqes,7292
29
- dataeval/detectors/drift/_nml/_result.py,sha256=Nz_qTRu_EcJ1OcywSTVXFm9fx3UyuX66ZWACrffG5dI,3255
29
+ dataeval/detectors/drift/_nml/_result.py,sha256=TMK17bnlgSdL0MCRHtQZJO8YoWWe4C2kh_akESrlP1g,3269
30
30
  dataeval/detectors/drift/_nml/_thresholds.py,sha256=WGdkLei9w_EvvsRHQzWdDyFVoZHIwM78k_aB3eoh31Q,12060
31
31
  dataeval/detectors/drift/_uncertainty.py,sha256=BHlykJ-r7TGLJxdPfoazXnoAJ1qVDzbk5HjAMdsnHz8,5847
32
32
  dataeval/detectors/drift/updates.py,sha256=L1PnrPlIE1x6ujCc5mCwjcAZwadVTn-Zjb6MnTDvzJQ,2251
33
33
  dataeval/detectors/linters/__init__.py,sha256=xn2zPwUcmsuf-Jd9uw6AVI11C9z1b1Y9fYtuFnXenZ0,404
34
34
  dataeval/detectors/linters/duplicates.py,sha256=X5WSEvI_BHkLoXjkaHK6wTnSkx4IjpO_exMRjSlhc70,4963
35
- dataeval/detectors/linters/outliers.py,sha256=R3-p8kzia77Q3k2grXeRXnRiv7nMhosoPY1sDLQVKrs,9049
35
+ dataeval/detectors/linters/outliers.py,sha256=WO686jVbGbtDjO-8CuYVLxpeUGv8MpIK9QjADlTdd40,9596
36
36
  dataeval/detectors/ood/__init__.py,sha256=juCYBDs7CQEAtMhnEpPqF6uTrOIH9kTBSuQ_GRw6a8o,283
37
37
  dataeval/detectors/ood/ae.py,sha256=fTrUfFxv6xUqzKpwMC8rW3JrizA16M_bgzqLuBKMrS0,2944
38
38
  dataeval/detectors/ood/base.py,sha256=9b-Ljznf0lB1SXF4F_Aj3eJ4Y3ijGEDPMjucUsWOGJM,3051
@@ -54,23 +54,23 @@ dataeval/metrics/estimators/_clusterer.py,sha256=1HrpihGTJ63IkNSOy4Ibw633Gllkm1R
54
54
  dataeval/metrics/estimators/_divergence.py,sha256=-np4nWNtRrHnvo4xdWuTzkyJJmobyjDnVDBOMjtBS1Y,4003
55
55
  dataeval/metrics/estimators/_uap.py,sha256=BULEBbJ9BQ1IcTeZf0x7iI60QHAWCccBOM97FIu9VXA,1928
56
56
  dataeval/metrics/stats/__init__.py,sha256=6tA_9nbbM5ObJ6cds8Y1VBtTQiTOxrpGQSFLu_lWGGA,1098
57
- dataeval/metrics/stats/_base.py,sha256=-TIDSHT-zwNXqWMTM6Nu3BQ11cWP4TFYFaUF40vIChs,12534
57
+ dataeval/metrics/stats/_base.py,sha256=R-hxoEPLreZcxYxBfyjbKfdoGMMTPiqJ5g2zSO-1UYM,12541
58
58
  dataeval/metrics/stats/_boxratiostats.py,sha256=ROZrlqgbowkGfCR5PJ5TL7Og40iMOdUqJnsCtaz_Xek,6450
59
59
  dataeval/metrics/stats/_dimensionstats.py,sha256=EVO-BlxrZl8qrP09lwPbyWdrG1ZeDtgj4LiswDwEZ1I,2896
60
60
  dataeval/metrics/stats/_hashstats.py,sha256=qa1CYRgOebkxqkALfffaPM-kJ074ZbyfpWbfOfuObSs,4758
61
61
  dataeval/metrics/stats/_imagestats.py,sha256=gUPNgN5Zwzdr7WnSwbve1NXNsyxd5dy3cSnlR_7guCg,3007
62
- dataeval/metrics/stats/_labelstats.py,sha256=UG7aKpFctLJvca3rC9sPT_25sCes77KpgZguJYMXfU0,2949
63
- dataeval/metrics/stats/_pixelstats.py,sha256=5RCQh0OQkHiCkn3DgCPVxKoFfifX_FOtwsnotADSZ0I,3265
64
- dataeval/metrics/stats/_visualstats.py,sha256=0k6bvAL_d66nQMfG7bydCOFJb7B0dhgG7fqCjVTp1sg,3707
62
+ dataeval/metrics/stats/_labelstats.py,sha256=_dXt3p8_-SHEtHvJWbL0rnQvO2g30zxX42mG2LGJepU,3195
63
+ dataeval/metrics/stats/_pixelstats.py,sha256=N9e7RXuzSHtlJtWU7l5IcTTIXe2kOmWiuj6lnJpZWq0,3312
64
+ dataeval/metrics/stats/_visualstats.py,sha256=b6jMq36_UlKduMrkwfq2i0fXNalDEcMdqPgoynXl5hI,3713
65
65
  dataeval/outputs/__init__.py,sha256=geHB5M3QOiFFaQGV4ZwDTTKpqZPvPePbqG7lzaPhaXQ,1741
66
66
  dataeval/outputs/_base.py,sha256=-Wa0gFcBVLbfWPMZyCql7x4vGsnkLP4pecsQIeUZ2_Y,5904
67
67
  dataeval/outputs/_bias.py,sha256=1OZpKncYTryjPLRHb4d6NlhE27uPT57gCob_5jtjKDI,10456
68
- dataeval/outputs/_drift.py,sha256=rKn5vqMR6XNujgSqfHsH76oFkoGsUusquZL2Qy4Ae6Y,4581
68
+ dataeval/outputs/_drift.py,sha256=hXILED_soY8ppIQZgftQvmumtwDrTnABbYl-flIGEU4,4588
69
69
  dataeval/outputs/_estimators.py,sha256=mh-R08CgYtmq9ffANDMYR-V4vrZnSjOjEyOMiMDZ2Ic,3091
70
70
  dataeval/outputs/_linters.py,sha256=k8lkd8EZ23q0m-HOD-FgqMcLQFy1UH7vws2ucLPyn08,6697
71
71
  dataeval/outputs/_metadata.py,sha256=ffZgpX8KWURPHXpOWjbvJ2KRqWQkS2nWuIjKUzoHhMI,1710
72
72
  dataeval/outputs/_ood.py,sha256=suLKVXULGtXH0rq9eXHI1d3d2jhGmItJtz4QiQd47A4,1718
73
- dataeval/outputs/_stats.py,sha256=F-515PGBNB69DXM-YaCkGHAyaXkCD-yYvKfj4-q7R4w,15247
73
+ dataeval/outputs/_stats.py,sha256=_ItGjs9YaMHqjivkR1YBcSErD5ICfa_-iV9nq0l8bTM,17451
74
74
  dataeval/outputs/_utils.py,sha256=NfhYaGT2PZlhIs8ICKUsPWHZXjhWYDkEJqBDdqMeaOM,929
75
75
  dataeval/outputs/_workflows.py,sha256=K786mOgegxVi81diUA-qpbwGEkwa8YA7Fk4ttgjJeaY,10831
76
76
  dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -85,14 +85,14 @@ dataeval/utils/_method.py,sha256=9B9JQbgqWJBRhQJb7glajUtWaQzUTIUuvrZ9_bisxsM,394
85
85
  dataeval/utils/_mst.py,sha256=bLmJmu_1Dtj3hC5gQp3oAiJ_7TKtEjahTqusVRRU4eI,2168
86
86
  dataeval/utils/_plot.py,sha256=1rnMkBRvTFLoTAHqXwF7c7GJ5_5iqlgarZKAzmYciLk,7225
87
87
  dataeval/utils/data/__init__.py,sha256=xGzrjrOxOP2DP1tU84AWMKPnSxFvSjM81CTlDg4rNM8,331
88
- dataeval/utils/data/_dataset.py,sha256=CFK9h-XPN7J-iF2nXol6keMDbGm6VIweFAMAjXRUlhg,9527
88
+ dataeval/utils/data/_dataset.py,sha256=tC_vqgWnmojAoAANo5BUVfEUYXl7GzOBSeYjR9olbDk,9506
89
89
  dataeval/utils/data/collate.py,sha256=5egEEKhNNCGeNLChO1p6dZ4Wg6x51VEaMNHz7hEZUxI,3936
90
90
  dataeval/utils/data/metadata.py,sha256=L1c2bCiMj0aR0QCoKkjwBujIftJDEMgW_3ZbgeS8WHo,14703
91
91
  dataeval/utils/datasets/__init__.py,sha256=pAXqHX76yAoBI8XB3m6zGuW-u3s3PCoIXG5GDzxH7Zs,572
92
92
  dataeval/utils/datasets/_antiuav.py,sha256=kA_ia1fYNcJiz9SpCvh-Z8iSc7iJrdogjBI3soyaa7A,8304
93
93
  dataeval/utils/datasets/_base.py,sha256=pyfpJda3ku469M3TFRsJn9S2oAiQODOGTlLcdcoEW9U,9031
94
94
  dataeval/utils/datasets/_cifar10.py,sha256=hZc_A30yKYBbv2kvVdEkZ9egyEe6XBUnmksoIAoJ-5Y,8265
95
- dataeval/utils/datasets/_fileio.py,sha256=OASFA9uX3KgfyPb5vza12BlZyAi9Y8Al9lUR_IYPcsM,5449
95
+ dataeval/utils/datasets/_fileio.py,sha256=LEoFVNdryRdi7mKpWw-9D8lA6XMa-Jaszd85bv93POo,5454
96
96
  dataeval/utils/datasets/_milco.py,sha256=iXf4C1I3Eg_3gHKUe4XPi21yFMBO51zxTIqAkGf9bYg,7869
97
97
  dataeval/utils/datasets/_mixin.py,sha256=S8iii-SoYUsFFYNXjw2thlZkpBvRLnZ4XI8wTqOKXgU,1729
98
98
  dataeval/utils/datasets/_mnist.py,sha256=uz46sE1Go3TgGjG6x2cXckSVQ0mSg2mhgk8BUvLWjb0,8149
@@ -102,12 +102,12 @@ dataeval/utils/datasets/_voc.py,sha256=pafY112O80isYkrdy7Quie9SBm_TmYhREuyl8Sxts
102
102
  dataeval/utils/torch/__init__.py,sha256=dn5mjCrFp0b1aL_UEURhONU0Ag0cmXoTOBSGagpkTiA,325
103
103
  dataeval/utils/torch/_blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
104
104
  dataeval/utils/torch/_gmm.py,sha256=XM68GNEP97EjaB1U49-ZXRb81d0CEFnPS910alrcB3g,3740
105
- dataeval/utils/torch/_internal.py,sha256=vHy-DzPhmvE8h3wmWc3aciBJ8nDGzQ1z1jTZgGjmDyM,4154
105
+ dataeval/utils/torch/_internal.py,sha256=HuyBB7NWFI9sUrRbOCZFxOfZjRGPdqr5iF7_DT2S0wo,4159
106
106
  dataeval/utils/torch/models.py,sha256=1idpXyjrYcCBSsbxxRUOto8xr4MJNjDEqQHiIXVU5Zc,9700
107
107
  dataeval/utils/torch/trainer.py,sha256=Oc2lK13uPGhmLYbmAqlPWyKxgG4YJFlnSXCqFHUZbdA,5528
108
108
  dataeval/workflows/__init__.py,sha256=ou8y0KO-d6W5lgmcyLjKlf-J_ckP3vilW7wHkgiDlZ4,255
109
109
  dataeval/workflows/sufficiency.py,sha256=j-R8dg4XE6a66p_oTXG2GNzgg3vGk85CTblxhFXaxog,8513
110
- dataeval-0.86.4.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
111
- dataeval-0.86.4.dist-info/METADATA,sha256=qdxTuVh3WxpHvsdRZhAvQIYxiATJLDixoF97xMFYrXM,5353
112
- dataeval-0.86.4.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
113
- dataeval-0.86.4.dist-info/RECORD,,
110
+ dataeval-0.86.5.dist-info/LICENSE.txt,sha256=uAooygKWvX6NbU9Ran9oG2msttoG8aeTeHSTe5JeCnY,1061
111
+ dataeval-0.86.5.dist-info/METADATA,sha256=qx7aNDgzyAfRRKWjDXkfXojBdsBFnjMgwTVl0JsLbbw,5353
112
+ dataeval-0.86.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
113
+ dataeval-0.86.5.dist-info/RECORD,,