dataeval 0.63.0__py3-none-any.whl → 0.64.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. dataeval/__init__.py +3 -3
  2. dataeval/_internal/detectors/clusterer.py +2 -1
  3. dataeval/_internal/detectors/drift/base.py +2 -1
  4. dataeval/_internal/detectors/drift/cvm.py +2 -1
  5. dataeval/_internal/detectors/drift/ks.py +2 -1
  6. dataeval/_internal/detectors/drift/mmd.py +4 -3
  7. dataeval/_internal/detectors/drift/uncertainty.py +1 -2
  8. dataeval/_internal/detectors/duplicates.py +2 -1
  9. dataeval/_internal/detectors/linter.py +1 -1
  10. dataeval/_internal/detectors/ood/ae.py +2 -1
  11. dataeval/_internal/detectors/ood/aegmm.py +2 -1
  12. dataeval/_internal/detectors/ood/base.py +2 -1
  13. dataeval/_internal/detectors/ood/llr.py +3 -2
  14. dataeval/_internal/detectors/ood/vae.py +2 -1
  15. dataeval/_internal/detectors/ood/vaegmm.py +2 -1
  16. dataeval/_internal/interop.py +2 -11
  17. dataeval/_internal/metrics/balance.py +180 -0
  18. dataeval/_internal/metrics/base.py +1 -83
  19. dataeval/_internal/metrics/ber.py +122 -48
  20. dataeval/_internal/metrics/coverage.py +83 -74
  21. dataeval/_internal/metrics/divergence.py +67 -67
  22. dataeval/_internal/metrics/diversity.py +206 -0
  23. dataeval/_internal/metrics/parity.py +300 -155
  24. dataeval/_internal/metrics/stats.py +7 -5
  25. dataeval/_internal/metrics/uap.py +37 -29
  26. dataeval/_internal/metrics/utils.py +393 -0
  27. dataeval/_internal/utils.py +64 -0
  28. dataeval/metrics/__init__.py +25 -6
  29. dataeval/utils/__init__.py +9 -0
  30. {dataeval-0.63.0.dist-info → dataeval-0.64.0.dist-info}/METADATA +1 -1
  31. dataeval-0.64.0.dist-info/RECORD +60 -0
  32. dataeval/_internal/functional/__init__.py +0 -0
  33. dataeval/_internal/functional/ber.py +0 -63
  34. dataeval/_internal/functional/coverage.py +0 -75
  35. dataeval/_internal/functional/divergence.py +0 -16
  36. dataeval/_internal/functional/hash.py +0 -79
  37. dataeval/_internal/functional/metadata.py +0 -136
  38. dataeval/_internal/functional/metadataparity.py +0 -190
  39. dataeval/_internal/functional/uap.py +0 -6
  40. dataeval/_internal/functional/utils.py +0 -158
  41. dataeval/_internal/maite/__init__.py +0 -0
  42. dataeval/_internal/maite/utils.py +0 -30
  43. dataeval/_internal/metrics/metadata.py +0 -610
  44. dataeval/_internal/metrics/metadataparity.py +0 -67
  45. dataeval-0.63.0.dist-info/RECORD +0 -68
  46. {dataeval-0.63.0.dist-info → dataeval-0.64.0.dist-info}/LICENSE.txt +0 -0
  47. {dataeval-0.63.0.dist-info → dataeval-0.64.0.dist-info}/WHEEL +0 -0
@@ -4,39 +4,47 @@ FR Test Statistic based estimate for the upperbound
4
4
  average precision using empirical mean precision
5
5
  """
6
6
 
7
- from typing import Dict
7
+ from typing import NamedTuple
8
8
 
9
- from dataeval._internal.functional.uap import uap
10
- from dataeval._internal.interop import ArrayLike, to_numpy
11
- from dataeval._internal.metrics.base import EvaluateMixin
9
+ from numpy.typing import ArrayLike
10
+ from sklearn.metrics import average_precision_score
12
11
 
12
+ from dataeval._internal.interop import to_numpy
13
13
 
14
- class UAP(EvaluateMixin):
14
+
15
+ class UAPOutput(NamedTuple):
16
+ """
17
+ Attributes
18
+ ----------
19
+ uap : float
20
+ The empirical mean precision estimate
15
21
  """
16
- FR Test Statistic based estimate of the empirical mean precision
17
22
 
23
+ uap: float
24
+
25
+
26
+ def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
27
+ """
28
+ FR Test Statistic based estimate of the empirical mean precision for
29
+ the upperbound average precision
30
+
31
+ Parameters
32
+ ----------
33
+ labels : ArrayLike
34
+ A numpy array of n_samples of class labels with M unique classes.
35
+ scores : ArrayLike
36
+ A 2D array of class probabilities per image
37
+
38
+ Returns
39
+ -------
40
+ Dict[str, float]
41
+ uap : The empirical mean precision estimate
42
+
43
+ Raises
44
+ ------
45
+ ValueError
46
+ If unique classes M < 2
18
47
  """
19
48
 
20
- def evaluate(self, labels: ArrayLike, scores: ArrayLike) -> Dict[str, float]:
21
- """
22
- Estimates the upperbound average precision
23
-
24
- Parameters
25
- ----------
26
- labels : ArrayLike
27
- A numpy array of n_samples of class labels with M unique classes.
28
- scores : ArrayLike
29
- A 2D array of class probabilities per image
30
-
31
- Returns
32
- -------
33
- Dict[str, float]
34
- uap : The empirical mean precision estimate
35
-
36
- Raises
37
- ------
38
- ValueError
39
- If unique classes M < 2
40
- """
41
-
42
- return {"uap": uap(to_numpy(labels), to_numpy(scores))}
49
+ precision = float(average_precision_score(to_numpy(labels), to_numpy(scores), average="weighted"))
50
+ return UAPOutput(precision)
@@ -0,0 +1,393 @@
1
+ from typing import Any, Callable, Dict, List, Literal, NamedTuple, Optional, Sequence, Tuple, Union
2
+
3
+ import numpy as np
4
+ import xxhash as xxh
5
+ from PIL import Image
6
+ from scipy.fftpack import dct
7
+ from scipy.signal import convolve2d
8
+ from scipy.sparse import csr_matrix
9
+ from scipy.sparse.csgraph import minimum_spanning_tree as mst
10
+ from scipy.spatial.distance import pdist, squareform
11
+ from scipy.stats import entropy as sp_entropy
12
+ from sklearn.neighbors import NearestNeighbors
13
+
14
+ EPSILON = 1e-5
15
+ EDGE_KERNEL = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype=np.int8)
16
+ BIT_DEPTH = (1, 8, 12, 16, 32)
17
+ HASH_SIZE = 8
18
+ MAX_FACTOR = 4
19
+
20
+
21
+ def get_method(method_map: Dict[str, Callable], method: str) -> Callable:
22
+ if method not in method_map:
23
+ raise ValueError(f"Specified method {method} is not a valid method: {method_map}.")
24
+ return method_map[method]
25
+
26
+
27
+ def get_counts(
28
+ data: np.ndarray, names: List[str], is_categorical: List[bool], subset_mask: Optional[np.ndarray] = None
29
+ ) -> tuple[Dict, Dict]:
30
+ """
31
+ Initialize dictionary of histogram counts --- treat categorical values
32
+ as histogram bins.
33
+
34
+ Parameters
35
+ ----------
36
+ subset_mask: Optional[np.ndarray[bool]]
37
+ Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
38
+
39
+ Returns
40
+ -------
41
+ counts: Dict
42
+ histogram counts per metadata factor in `factors`. Each
43
+ factor will have a different number of bins. Counts get reused
44
+ across metrics, so hist_counts are cached but only if computed
45
+ globally, i.e. without masked samples.
46
+ """
47
+
48
+ hist_counts, hist_bins = {}, {}
49
+ # np.where needed to satisfy linter
50
+ mask = np.where(subset_mask if subset_mask is not None else np.ones(data.shape[0], dtype=bool))
51
+
52
+ for cdx, fn in enumerate(names):
53
+ # linter doesn't like double indexing
54
+ col_data = data[mask, cdx].squeeze()
55
+ if is_categorical[cdx]:
56
+ # if discrete, use unique values as bins
57
+ bins, cnts = np.unique(col_data, return_counts=True)
58
+ else:
59
+ bins = hist_bins.get(fn, "auto")
60
+ cnts, bins = np.histogram(col_data, bins=bins, density=True)
61
+
62
+ hist_counts[fn] = cnts
63
+ hist_bins[fn] = bins
64
+
65
+ return hist_counts, hist_bins
66
+
67
+
68
+ def entropy(
69
+ data: np.ndarray,
70
+ names: List[str],
71
+ is_categorical: List[bool],
72
+ normalized: bool = False,
73
+ subset_mask: Optional[np.ndarray] = None,
74
+ ) -> np.ndarray:
75
+ """
76
+ Meant for use with Bias metrics, Balance, Diversity, ClasswiseBalance,
77
+ and Classwise Diversity.
78
+
79
+ Compute entropy for discrete/categorical variables and, through standard
80
+ histogram binning, for continuous variables.
81
+
82
+ Parameters
83
+ ----------
84
+ normalized: bool
85
+ Flag that determines whether or not to normalize entropy by log(num_bins)
86
+ subset_mask: Optional[np.ndarray[bool]]
87
+ Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
88
+
89
+ Notes
90
+ -----
91
+ For continuous variables, histogram bins are chosen automatically. See
92
+ numpy.histogram for details.
93
+
94
+ Returns
95
+ -------
96
+ ent: np.ndarray[float]
97
+ Entropy estimate per column of X
98
+
99
+ See Also
100
+ --------
101
+ numpy.histogram
102
+ scipy.stats.entropy
103
+ """
104
+
105
+ num_factors = len(names)
106
+ hist_counts, _ = get_counts(data, names, is_categorical, subset_mask)
107
+
108
+ ev_index = np.empty(num_factors)
109
+ for col, cnts in enumerate(hist_counts.values()):
110
+ # entropy in nats, normalizes counts
111
+ ev_index[col] = sp_entropy(cnts)
112
+ if normalized:
113
+ if len(cnts) == 1:
114
+ # log(0)
115
+ ev_index[col] = 0
116
+ else:
117
+ ev_index[col] /= np.log(len(cnts))
118
+ return ev_index
119
+
120
+
121
+ def get_num_bins(
122
+ data: np.ndarray, names: List[str], is_categorical: List[bool], subset_mask: Optional[np.ndarray] = None
123
+ ) -> np.ndarray:
124
+ """
125
+ Number of bins or unique values for each metadata factor, used to
126
+ normalize entropy/diversity.
127
+
128
+ Parameters
129
+ ----------
130
+ subset_mask: Optional[np.ndarray[bool]]
131
+ Boolean mask of samples to bin (e.g. when computing per class). True -> include in histogram counts
132
+ """
133
+ # likely cached
134
+ hist_counts, _ = get_counts(data, names, is_categorical, subset_mask)
135
+ num_bins = np.empty(len(hist_counts))
136
+ for idx, cnts in enumerate(hist_counts.values()):
137
+ num_bins[idx] = len(cnts)
138
+
139
+ return num_bins
140
+
141
+
142
+ def infer_categorical(X: np.ndarray, threshold: float = 0.5) -> np.ndarray:
143
+ """
144
+ Compute fraction of feature values that are unique --- intended to be used
145
+ for inferring whether variables are categorical.
146
+ """
147
+ if X.ndim == 1:
148
+ X = np.expand_dims(X, axis=1)
149
+ num_samples = X.shape[0]
150
+ pct_unique = np.empty(X.shape[1])
151
+ for col in range(X.shape[1]): # type: ignore
152
+ uvals = np.unique(X[:, col], axis=0)
153
+ pct_unique[col] = len(uvals) / num_samples
154
+ return pct_unique < threshold
155
+
156
+
157
+ def preprocess_metadata(class_labels: Sequence[int], metadata: List[Dict]) -> Tuple[np.ndarray, List[str], List[bool]]:
158
+ # convert class_labels and list of metadata dicts to dict of ndarrays
159
+ metadata_dict: Dict[str, np.ndarray] = {
160
+ "class_label": np.asarray(class_labels, dtype=int),
161
+ **{k: np.array([d[k] for d in metadata]) for k in metadata[0]},
162
+ }
163
+
164
+ # map columns of dict that are not numeric (e.g. string) to numeric values
165
+ # that mutual information and diversity functions can accommodate. Each
166
+ # unique string receives a unique integer value.
167
+ for k, v in metadata_dict.items():
168
+ # if not numeric
169
+ if not np.issubdtype(v.dtype, np.number):
170
+ _, mapped_vals = np.unique(v, return_inverse=True)
171
+ metadata_dict[k] = mapped_vals
172
+
173
+ data = np.stack(list(metadata_dict.values()), axis=-1)
174
+ names = list(metadata_dict.keys())
175
+ is_categorical = [infer_categorical(metadata_dict[var], 0.25)[0] for var in names]
176
+
177
+ return data, names, is_categorical
178
+
179
+
180
+ def minimum_spanning_tree(X: np.ndarray) -> Any:
181
+ """
182
+ Returns the minimum spanning tree from a NumPy image array.
183
+
184
+ Parameters
185
+ ----------
186
+ X: np.ndarray
187
+ Numpy image array
188
+
189
+ Returns
190
+ -------
191
+ Data representing the minimum spanning tree
192
+ """
193
+ # All features belong on second dimension
194
+ X = X.reshape((X.shape[0], -1))
195
+ # We add a small constant to the distance matrix to ensure scipy interprets
196
+ # the input graph as fully-connected.
197
+ dense_eudist = squareform(pdist(X)) + EPSILON
198
+ eudist_csr = csr_matrix(dense_eudist)
199
+ return mst(eudist_csr)
200
+
201
+
202
+ def get_classes_counts(labels: np.ndarray) -> Tuple[int, int]:
203
+ """
204
+ Returns the classes and counts of from an array of labels
205
+
206
+ Parameters
207
+ ----------
208
+ label: np.ndarray
209
+ Numpy labels array
210
+
211
+ Returns
212
+ -------
213
+ Classes and counts
214
+
215
+ Raises
216
+ ------
217
+ ValueError
218
+ If the number of unique classes is less than 2
219
+ """
220
+ classes, counts = np.unique(labels, return_counts=True)
221
+ M = len(classes)
222
+ if M < 2:
223
+ raise ValueError("Label vector contains less than 2 classes!")
224
+ N = np.sum(counts).astype(int)
225
+ return M, N
226
+
227
+
228
+ def compute_neighbors(
229
+ A: np.ndarray,
230
+ B: np.ndarray,
231
+ k: int = 1,
232
+ algorithm: Literal["auto", "ball_tree", "kd_tree"] = "auto",
233
+ ) -> np.ndarray:
234
+ """
235
+ For each sample in A, compute the nearest neighbor in B
236
+
237
+ Parameters
238
+ ----------
239
+ A, B : np.ndarray
240
+ The n_samples and n_features respectively
241
+ k : int
242
+ The number of neighbors to find
243
+ algorithm : Literal
244
+ Tree method for nearest neighbor (auto, ball_tree or kd_tree)
245
+
246
+ Note
247
+ ----
248
+ Do not use kd_tree if n_features > 20
249
+
250
+ Returns
251
+ -------
252
+ List:
253
+ Closest points to each point in A and B
254
+
255
+ See Also
256
+ --------
257
+ sklearn.neighbors.NearestNeighbors
258
+ """
259
+
260
+ nbrs = NearestNeighbors(n_neighbors=k + 1, algorithm=algorithm).fit(B)
261
+ nns = nbrs.kneighbors(A)[1]
262
+ nns = nns[:, 1:].squeeze()
263
+
264
+ return nns
265
+
266
+
267
+ class BitDepth(NamedTuple):
268
+ depth: int
269
+ pmin: Union[float, int]
270
+ pmax: Union[float, int]
271
+
272
+
273
+ def get_bitdepth(image: np.ndarray) -> BitDepth:
274
+ """
275
+ Approximates the bit depth of the image using the
276
+ min and max pixel values.
277
+ """
278
+ pmin, pmax = np.min(image), np.max(image)
279
+ if pmin < 0:
280
+ return BitDepth(0, pmin, pmax)
281
+ else:
282
+ depth = ([x for x in BIT_DEPTH if 2**x > pmax] or [max(BIT_DEPTH)])[0]
283
+ return BitDepth(depth, 0, 2**depth - 1)
284
+
285
+
286
+ def rescale(image: np.ndarray, depth: int = 1) -> np.ndarray:
287
+ """
288
+ Rescales the image using the bit depth provided.
289
+ """
290
+ bitdepth = get_bitdepth(image)
291
+ if bitdepth.depth == depth:
292
+ return image
293
+ else:
294
+ normalized = (image + bitdepth.pmin) / (bitdepth.pmax - bitdepth.pmin)
295
+ return normalized * (2**depth - 1)
296
+
297
+
298
+ def normalize_image_shape(image: np.ndarray) -> np.ndarray:
299
+ """
300
+ Normalizes the image shape into (C,H,W).
301
+ """
302
+ ndim = image.ndim
303
+ if ndim == 2:
304
+ return np.expand_dims(image, axis=0)
305
+ elif ndim == 3:
306
+ return image
307
+ elif ndim > 3:
308
+ # Slice all but the last 3 dimensions
309
+ return image[(0,) * (ndim - 3)]
310
+ else:
311
+ raise ValueError("Images must have 2 or more dimensions.")
312
+
313
+
314
+ def edge_filter(image: np.ndarray, offset: float = 0.5) -> np.ndarray:
315
+ """
316
+ Returns the image filtered using a 3x3 edge detection kernel:
317
+ [[ -1, -1, -1 ],
318
+ [ -1, 8, -1 ],
319
+ [ -1, -1, -1 ]]
320
+ """
321
+ edges = convolve2d(image, EDGE_KERNEL, mode="same", boundary="symm") + offset
322
+ np.clip(edges, 0, 255, edges)
323
+ return edges
324
+
325
+
326
+ def pchash(image: np.ndarray) -> str:
327
+ """
328
+ Performs a perceptual hash on an image by resizing to a square NxN image
329
+ using the Lanczos algorithm where N is 32x32 or the largest multiple of
330
+ 8 that is smaller than the input image dimensions. The resampled image
331
+ is compressed using a discrete cosine transform and the lowest frequency
332
+ component is encoded as a bit array of greater or less than median value
333
+ and returned as a hex string.
334
+
335
+ Parameters
336
+ ----------
337
+ image : np.ndarray
338
+ An image as a numpy array in CxHxW format
339
+
340
+ Returns
341
+ -------
342
+ str
343
+ The hex string hash of the image using perceptual hashing
344
+ """
345
+ # Verify that the image is at least larger than an 8x8 image
346
+ min_dim = min(image.shape[-2:])
347
+ if min_dim < HASH_SIZE + 1:
348
+ raise ValueError(f"Image must be larger than {HASH_SIZE}x{HASH_SIZE} for fuzzy hashing.")
349
+
350
+ # Calculates the dimensions of the resized square image
351
+ resize_dim = HASH_SIZE * min((min_dim - 1) // HASH_SIZE, MAX_FACTOR)
352
+
353
+ # Normalizes the image to CxHxW and takes the mean over all the channels
354
+ normalized = np.mean(normalize_image_shape(image), axis=0).squeeze()
355
+
356
+ # Rescales the pixel values to an 8-bit 0-255 image
357
+ rescaled = rescale(normalized, 8).astype(np.uint8)
358
+
359
+ # Resizes the image using the Lanczos algorithm to a square image
360
+ im = np.array(Image.fromarray(rescaled).resize((resize_dim, resize_dim), Image.Resampling.LANCZOS))
361
+
362
+ # Performs discrete cosine transforms to compress the image information and takes the lowest frequency component
363
+ transform = dct(dct(im.T).T)[:HASH_SIZE, :HASH_SIZE]
364
+
365
+ # Encodes the transform as a bit array over the median value
366
+ diff = transform > np.median(transform)
367
+
368
+ # Pads the front of the bit array to a multiple of 8 with False
369
+ padded = np.full(int(np.ceil(diff.size / 8) * 8), False)
370
+ padded[-diff.size :] = diff.ravel()
371
+
372
+ # Converts the bit array to a hex string and strips leading 0s
373
+ hash_hex = np.packbits(padded).tobytes().hex().lstrip("0")
374
+ return hash_hex if hash_hex else "0"
375
+
376
+
377
+ def xxhash(image: np.ndarray) -> str:
378
+ """
379
+ Performs a fast non-cryptographic hash using the xxhash algorithm
380
+ (xxhash.com) against the image as a flattened bytearray. The hash
381
+ is returned as a hex string.
382
+
383
+ Parameters
384
+ ----------
385
+ image : np.ndarray
386
+ An image as a numpy array
387
+
388
+ Returns
389
+ -------
390
+ str
391
+ The hex string hash of the image using the xxHash algorithm
392
+ """
393
+ return xxh.xxh3_64_hexdigest(image.ravel().tobytes())
@@ -0,0 +1,64 @@
1
+ from collections import defaultdict
2
+ from typing import Any, Dict, List
3
+
4
+ from torch.utils.data import Dataset
5
+
6
+
7
+ def read_dataset(dataset: Dataset) -> List[List[Any]]:
8
+ """
9
+ Extract information from a dataset at each index into a individual lists of each information position
10
+
11
+ Parameters
12
+ ----------
13
+ dataset : torch.utils.data.Dataset
14
+ Input dataset
15
+
16
+ Returns
17
+ -------
18
+ List[List[Any]]
19
+ All objects in individual lists based on return position from dataset
20
+
21
+ Warning
22
+ -------
23
+ No type checking is done between lists or data inside lists
24
+
25
+ See Also
26
+ --------
27
+ torch.utils.data.Dataset
28
+
29
+ Examples
30
+ --------
31
+ >>> import numpy as np
32
+
33
+ >>> data = np.ones((10, 3, 3))
34
+ >>> labels = np.ones((10,))
35
+ >>> class ICDataset:
36
+ ... def __init__(self, data, labels):
37
+ ... self.data = data
38
+ ... self.labels = labels
39
+
40
+ ... def __getitem__(self, idx):
41
+ ... return self.data[idx], self.labels[idx]
42
+
43
+ >>> ds = ICDataset(data, labels)
44
+
45
+ >>> result = read_dataset(ds)
46
+ >>> assert len(result) == 2
47
+ True
48
+ >>> assert result[0].shape == (10, 3, 3) # 10 3x3 images
49
+ True
50
+ >>> assert result[1].shape == (10,) # 10 labels
51
+ True
52
+ """
53
+
54
+ ddict: Dict[int, List] = defaultdict(list)
55
+
56
+ for data in dataset:
57
+ # Convert to tuple if single return (e.g. images only)
58
+ if not isinstance(data, tuple):
59
+ data = (data,)
60
+
61
+ for i, d in enumerate(data):
62
+ ddict[i].append(d)
63
+
64
+ return list(ddict.values())
@@ -1,8 +1,27 @@
1
- from dataeval._internal.metrics.ber import BER
2
- from dataeval._internal.metrics.coverage import Coverage
3
- from dataeval._internal.metrics.divergence import Divergence
4
- from dataeval._internal.metrics.parity import Parity
1
+ from typing import List
2
+
3
+ __all__: List[str] = []
4
+
5
+ from dataeval._internal.metrics.balance import balance, balance_classwise
6
+ from dataeval._internal.metrics.ber import ber
7
+ from dataeval._internal.metrics.coverage import coverage
8
+ from dataeval._internal.metrics.divergence import divergence
9
+ from dataeval._internal.metrics.diversity import diversity, diversity_classwise
10
+ from dataeval._internal.metrics.parity import parity, parity_metadata
5
11
  from dataeval._internal.metrics.stats import ChannelStats, ImageStats
6
- from dataeval._internal.metrics.uap import UAP
12
+ from dataeval._internal.metrics.uap import uap
7
13
 
8
- __all__ = ["BER", "Coverage", "Divergence", "Parity", "ChannelStats", "ImageStats", "UAP"]
14
+ __all__ += [
15
+ "balance",
16
+ "balance_classwise",
17
+ "ber",
18
+ "coverage",
19
+ "divergence",
20
+ "diversity",
21
+ "diversity_classwise",
22
+ "parity",
23
+ "parity_metadata",
24
+ "ChannelStats",
25
+ "ImageStats",
26
+ "uap",
27
+ ]
@@ -0,0 +1,9 @@
1
+ from importlib.util import find_spec
2
+ from typing import List
3
+
4
+ __all__: List[str] = []
5
+
6
+ if find_spec("torch") is not None: # pragma: no cover
7
+ from dataeval._internal.utils import read_dataset
8
+
9
+ __all__ += ["read_dataset"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.63.0
3
+ Version: 0.64.0
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -0,0 +1,60 @@
1
+ dataeval/__init__.py,sha256=5krxzT8KNetiYE6ByxRgCTbHG7EHH-Fm9Fof6Ta3fUo,424
2
+ dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ dataeval/_internal/detectors/clusterer.py,sha256=6VklhUH8FvS2ATUAgb-7Q4XYHvQrDMZtkYeFnEznMfU,20328
4
+ dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ dataeval/_internal/detectors/drift/base.py,sha256=TRm-FcSM7Nv6nxqfyIzVZ_ysGdUmyqa5okNp7-gVZAY,9127
6
+ dataeval/_internal/detectors/drift/cvm.py,sha256=cnAJZsBrvLsRaAUSgFvhX-ZNzE6V-s5irySvRW5-dOs,4025
7
+ dataeval/_internal/detectors/drift/ks.py,sha256=Bfv-hVbghfGbRGEp_aGpkP5G2TnW6E5wIGdfx7nmZT0,4028
8
+ dataeval/_internal/detectors/drift/mmd.py,sha256=RX_djx_rC9NQNdapTeNplP6x-4-JiMBYfIUNarakjNg,7056
9
+ dataeval/_internal/detectors/drift/torch.py,sha256=NsQYfDVRcCGmU8k6oBG_aVzmML1zre-xUKBVK1W680o,10872
10
+ dataeval/_internal/detectors/drift/uncertainty.py,sha256=jiqibgOmk37n2qBP-fEAE0Z-dUxyBRptQdT1V52yCxg,5323
11
+ dataeval/_internal/detectors/duplicates.py,sha256=0m7E7EAvUHGfaVzQ_KgKbqDBW1jo8XrRd82uKRn1Gf0,2155
12
+ dataeval/_internal/detectors/linter.py,sha256=Dmy5Rfuxf3jlGNK6DO6qo3puN_X1yh8N4svSem1vysE,5278
13
+ dataeval/_internal/detectors/ood/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ dataeval/_internal/detectors/ood/ae.py,sha256=FjqMucicFsDIKJMAOjWpKBohrPM4F1ubFLJk91GVqio,2681
15
+ dataeval/_internal/detectors/ood/aegmm.py,sha256=Kf9R5q-hoRg6RUHlJG-2oo52ZKeQmJQbxG0kFtHh6zA,2416
16
+ dataeval/_internal/detectors/ood/base.py,sha256=AfbNF_l8h1lDQ41SKlCHYWFjJ9YfAXuMMG3z9f973fM,6973
17
+ dataeval/_internal/detectors/ood/llr.py,sha256=alel7l8hhvBNErAkDtcpjL1IzWRDNcIwWV1eX6KDjEI,10172
18
+ dataeval/_internal/detectors/ood/vae.py,sha256=ntabTTTmPhJ18giZ7A64mxpJvTH9pIHmHPGGnu-gA8g,2987
19
+ dataeval/_internal/detectors/ood/vaegmm.py,sha256=opBfFLuXEAIMa8E6scwf-GWbZbuXnsqXlXTbLN4MoYg,2861
20
+ dataeval/_internal/flags.py,sha256=dRApeFkdSXFbYHSmvzgUP78zH8jUGtfzKFfLQtX0Q18,883
21
+ dataeval/_internal/interop.py,sha256=wkNsLlmYYSHZqq0T204x8j9DIGIue9V2S0WQGPKQD6Y,1030
22
+ dataeval/_internal/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ dataeval/_internal/metrics/balance.py,sha256=i6m7qHSiY2TrKGneQtEpLWDPBkoqxCZsbVhoKnkwv1E,6421
24
+ dataeval/_internal/metrics/base.py,sha256=TIreZWP1ynntqmDUu7-UED_Y3WpVrF28vGb6gZfqMIg,318
25
+ dataeval/_internal/metrics/ber.py,sha256=9NxMCS78KsCLag2ZYJ-G16pgReSrkEfWm1hl-S5h_yU,4560
26
+ dataeval/_internal/metrics/coverage.py,sha256=UKfvtAPIl043qCxfrLaWrEzyJGgrLdPM3AIvNi6wl8k,3744
27
+ dataeval/_internal/metrics/divergence.py,sha256=JNQVRHt363iEZucQ3Fq3Re2PbA0I1dR7a0TJSkMRX64,3238
28
+ dataeval/_internal/metrics/diversity.py,sha256=Lnc5QD6LcxLrFR9wHWg1FspD2KuKZTVht_MUzgN1EKs,6965
29
+ dataeval/_internal/metrics/parity.py,sha256=6c0WpIoyM7hRsPELnyT2Qi_UYNcRVbXd_e7-i3-kuYI,11637
30
+ dataeval/_internal/metrics/stats.py,sha256=T7VM4g5zhJIhfpw4XGDgleojhEqUI3NwsabYuzZ_g8w,12581
31
+ dataeval/_internal/metrics/uap.py,sha256=9oMG_MhTYiNf2KCZ-fHlCUGBzsdXEVpvakHqVVMsM2s,1174
32
+ dataeval/_internal/metrics/utils.py,sha256=f1-R0yCUb0r7Fb0H25o7MGRFrseX4WkJDdzgPuYMfoc,12302
33
+ dataeval/_internal/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ dataeval/_internal/models/pytorch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
+ dataeval/_internal/models/pytorch/autoencoder.py,sha256=iK3Z9claesU_pJkRaiFJIZ9zKZg-Qj8ugzVYTTokDbE,6123
36
+ dataeval/_internal/models/pytorch/blocks.py,sha256=pm2xwsDZjZJYXrhhiz8husvh2vHmrkFMSYEn-EDUD5Q,1354
37
+ dataeval/_internal/models/pytorch/utils.py,sha256=Qgwym1PxGuwxbXCKUT-8r6Iyrxqm7x94oj45Vf5_CjE,1675
38
+ dataeval/_internal/models/tensorflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
+ dataeval/_internal/models/tensorflow/autoencoder.py,sha256=rErnOfDFTd7e4brSGQ2Lr1x1kNjSEHdbOREOtUfIhIM,9975
40
+ dataeval/_internal/models/tensorflow/gmm.py,sha256=wnqQKm3fURuvBROUd2fitCqzKViDo-g0-Djr3TBHZ3U,3640
41
+ dataeval/_internal/models/tensorflow/losses.py,sha256=3y6tHm7PTQ7hmasJDwTXjdARjCUWycoXqSyXJ1uT2mM,3766
42
+ dataeval/_internal/models/tensorflow/pixelcnn.py,sha256=B5cwB2IGPw-7b8klt82j_60g_IvqSiDELxvbiBYJtAo,48068
43
+ dataeval/_internal/models/tensorflow/trainer.py,sha256=2KHtMRniVselCaDXeb8QEfX-wMRsPfT1xiG2gUQgelg,4090
44
+ dataeval/_internal/models/tensorflow/utils.py,sha256=uK_fQ1JXUSVi0kgnhd9eRArlr36OzXUEdL4inJZCs-8,8579
45
+ dataeval/_internal/utils.py,sha256=umvc_vN5c5IR0lz2F1U2YjA3VZloKTAEp9BQx8rSk6g,1561
46
+ dataeval/_internal/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
+ dataeval/_internal/workflows/sufficiency.py,sha256=QZQdhERVr3EmPA2sAFFudSDL4KLM0eAvYgV38jhjzaI,18374
48
+ dataeval/detectors/__init__.py,sha256=I2e7YWb55RRlKQll85Z6KdN5wdBa53smn-_fcZIsCwA,1507
49
+ dataeval/flags/__init__.py,sha256=1-HmwmtfPkHWwqXUjDwWko396qAKBeaSvqVsQZLrzD0,170
50
+ dataeval/metrics/__init__.py,sha256=pY6E04nEkbSTQsWJ4rNFlkvcT3-aWSEO1dOctynSotg,787
51
+ dataeval/models/__init__.py,sha256=onevPb5wznCggowBnVT0OUa8uBJXZCbrkFuek1UFvOs,293
52
+ dataeval/models/tensorflow/__init__.py,sha256=A1XRxVGHefuvh_WpaKE1x95pRD1FecuFp66iuNPA_5U,424
53
+ dataeval/models/torch/__init__.py,sha256=su7P9DF9LChlVCNHWG6d7s_yeIfWQbhCYWIkzJe0Qig,190
54
+ dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
+ dataeval/utils/__init__.py,sha256=bgUXeumTEspt2Q76YyEliGrnS-_incswY-pDexPdSCc,229
56
+ dataeval/workflows/__init__.py,sha256=ObgS1cVYFRzFZWbNzGs2OcU02IVkJkAMHNnlnSNTMCE,208
57
+ dataeval-0.64.0.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
58
+ dataeval-0.64.0.dist-info/METADATA,sha256=HUYwlnRhnTLqcZQ9wDo0ZVpzpeak0CiQgKMKdg39MHE,4217
59
+ dataeval-0.64.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
60
+ dataeval-0.64.0.dist-info/RECORD,,
File without changes