dataeval 0.69.4__py3-none-any.whl → 0.70.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. dataeval/__init__.py +3 -3
  2. dataeval/_internal/detectors/drift/base.py +5 -6
  3. dataeval/_internal/detectors/drift/mmd.py +3 -3
  4. dataeval/_internal/detectors/duplicates.py +62 -45
  5. dataeval/_internal/detectors/merged_stats.py +23 -54
  6. dataeval/_internal/detectors/ood/ae.py +3 -3
  7. dataeval/_internal/detectors/outliers.py +133 -61
  8. dataeval/_internal/interop.py +11 -7
  9. dataeval/_internal/metrics/balance.py +9 -9
  10. dataeval/_internal/metrics/ber.py +3 -3
  11. dataeval/_internal/metrics/divergence.py +3 -3
  12. dataeval/_internal/metrics/diversity.py +6 -6
  13. dataeval/_internal/metrics/parity.py +24 -16
  14. dataeval/_internal/metrics/stats/base.py +231 -0
  15. dataeval/_internal/metrics/stats/boxratiostats.py +159 -0
  16. dataeval/_internal/metrics/stats/datasetstats.py +97 -0
  17. dataeval/_internal/metrics/stats/dimensionstats.py +111 -0
  18. dataeval/_internal/metrics/stats/hashstats.py +73 -0
  19. dataeval/_internal/metrics/stats/labelstats.py +125 -0
  20. dataeval/_internal/metrics/stats/pixelstats.py +117 -0
  21. dataeval/_internal/metrics/stats/visualstats.py +122 -0
  22. dataeval/_internal/metrics/uap.py +2 -2
  23. dataeval/_internal/metrics/utils.py +28 -13
  24. dataeval/_internal/output.py +3 -18
  25. dataeval/_internal/workflows/sufficiency.py +123 -133
  26. dataeval/metrics/stats/__init__.py +14 -3
  27. dataeval/workflows/__init__.py +2 -2
  28. {dataeval-0.69.4.dist-info → dataeval-0.70.0.dist-info}/METADATA +3 -3
  29. {dataeval-0.69.4.dist-info → dataeval-0.70.0.dist-info}/RECORD +31 -26
  30. {dataeval-0.69.4.dist-info → dataeval-0.70.0.dist-info}/WHEEL +1 -1
  31. dataeval/_internal/flags.py +0 -77
  32. dataeval/_internal/metrics/stats.py +0 -397
  33. dataeval/flags/__init__.py +0 -3
  34. {dataeval-0.69.4.dist-info → dataeval-0.70.0.dist-info}/LICENSE.txt +0 -0
@@ -1,10 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Callable, Literal, NamedTuple, Sequence
3
+ from typing import Any, Callable, Literal, Mapping, NamedTuple
4
4
 
5
5
  import numpy as np
6
6
  import xxhash as xxh
7
- from numpy.typing import NDArray
7
+ from numpy.typing import ArrayLike, NDArray
8
8
  from PIL import Image
9
9
  from scipy.fftpack import dct
10
10
  from scipy.signal import convolve2d
@@ -14,6 +14,8 @@ from scipy.spatial.distance import pdist, squareform
14
14
  from scipy.stats import entropy as sp_entropy
15
15
  from sklearn.neighbors import NearestNeighbors
16
16
 
17
+ from dataeval._internal.interop import to_numpy
18
+
17
19
  EPSILON = 1e-5
18
20
  EDGE_KERNEL = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype=np.int8)
19
21
  BIT_DEPTH = (1, 8, 12, 16, 32)
@@ -162,26 +164,26 @@ def infer_categorical(X: NDArray, threshold: float = 0.2) -> NDArray:
162
164
 
163
165
 
164
166
  def preprocess_metadata(
165
- class_labels: Sequence[int], metadata: list[dict], cat_thresh: float = 0.2
167
+ class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], cat_thresh: float = 0.2
166
168
  ) -> tuple[NDArray, list[str], list[bool]]:
167
- # convert class_labels and list of metadata dicts to dict of ndarrays
168
- metadata_dict: dict[str, NDArray] = {
169
- "class_label": np.asarray(class_labels, dtype=int),
170
- **{k: np.array([d[k] for d in metadata]) for k in metadata[0]},
171
- }
169
+ # convert class_labels and dict of lists to matrix of metadata values
170
+ preprocessed_metadata = {"class_label": np.asarray(class_labels, dtype=int)}
172
171
 
173
172
  # map columns of dict that are not numeric (e.g. string) to numeric values
174
173
  # that mutual information and diversity functions can accommodate. Each
175
174
  # unique string receives a unique integer value.
176
- for k, v in metadata_dict.items():
175
+ for k, v in metadata.items():
177
176
  # if not numeric
177
+ v = to_numpy(v)
178
178
  if not np.issubdtype(v.dtype, np.number):
179
179
  _, mapped_vals = np.unique(v, return_inverse=True)
180
- metadata_dict[k] = mapped_vals
180
+ preprocessed_metadata[k] = mapped_vals
181
+ else:
182
+ preprocessed_metadata[k] = v
181
183
 
182
- data = np.stack(list(metadata_dict.values()), axis=-1)
183
- names = list(metadata_dict.keys())
184
- is_categorical = [infer_categorical(metadata_dict[var], cat_thresh)[0] for var in names]
184
+ data = np.stack(list(preprocessed_metadata.values()), axis=-1)
185
+ names = list(preprocessed_metadata.keys())
186
+ is_categorical = [infer_categorical(preprocessed_metadata[var], cat_thresh)[0] for var in names]
185
187
 
186
188
  return data, names, is_categorical
187
189
 
@@ -350,6 +352,19 @@ def normalize_image_shape(image: NDArray) -> NDArray:
350
352
  raise ValueError("Images must have 2 or more dimensions.")
351
353
 
352
354
 
355
+ def normalize_box_shape(bounding_box: NDArray) -> NDArray:
356
+ """
357
+ Normalizes the bounding box shape into (N,4).
358
+ """
359
+ ndim = bounding_box.ndim
360
+ if ndim == 1:
361
+ return np.expand_dims(bounding_box, axis=0)
362
+ elif ndim > 2:
363
+ raise ValueError("Bounding boxes must have 2 dimensions: (# of boxes in an image, [X,Y,W,H]) -> (N,4)")
364
+ else:
365
+ return bounding_box
366
+
367
+
353
368
  def edge_filter(image: NDArray, offset: float = 0.5) -> NDArray:
354
369
  """
355
370
  Returns the image filtered using a 3x3 edge detection kernel:
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import inspect
4
4
  from datetime import datetime, timezone
5
5
  from functools import wraps
6
+ from typing import Any
6
7
 
7
8
  import numpy as np
8
9
 
@@ -17,10 +18,10 @@ class OutputMetadata:
17
18
  _state: dict[str, str]
18
19
  _version: str
19
20
 
20
- def dict(self) -> dict:
21
+ def dict(self) -> dict[str, Any]:
21
22
  return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
22
23
 
23
- def meta(self) -> dict:
24
+ def meta(self) -> dict[str, Any]:
24
25
  return {k.removeprefix("_"): v for k, v in self.__dict__.items() if k.startswith("_")}
25
26
 
26
27
 
@@ -67,19 +68,3 @@ def set_metadata(module_name: str = "", state_attr: list[str] | None = None):
67
68
  return wrapper
68
69
 
69
70
  return decorator
70
-
71
-
72
- def populate_defaults(d: dict, c: type) -> dict:
73
- def default(t):
74
- t = (
75
- t if isinstance(t, str) else t._name if hasattr(t, "_name") else t.__name__
76
- ).lower() # py3.9 : _name, py3.10 : __name__
77
- if t.startswith("dict"):
78
- return {}
79
- if t.startswith("list"):
80
- return []
81
- if t.startswith("ndarray"):
82
- return np.array([])
83
- raise TypeError("Unrecognized annotation type")
84
-
85
- return {k: d[k] if k in d else default(t) for k, t in c.__annotations__.items()}
@@ -42,6 +42,129 @@ class SufficiencyOutput(OutputMetadata):
42
42
  if c != c_v:
43
43
  raise ValueError(f"{m} does not contain the expected number ({c}) of data points.")
44
44
 
45
+ @set_metadata("dataeval.workflows.SufficiencyOutput")
46
+ def project(
47
+ self,
48
+ projection: int | Sequence[int] | NDArray[np.uint],
49
+ ) -> SufficiencyOutput:
50
+ """Projects the measures for each value of X
51
+
52
+ Parameters
53
+ ----------
54
+ projection : int | Sequence[int] | NDArray[np.uint]
55
+ Step or steps to project
56
+
57
+ Returns
58
+ -------
59
+ SufficiencyOutput
60
+ Dataclass containing the projected measures per projection
61
+
62
+ Raises
63
+ ------
64
+ ValueError
65
+ If the length of data points in the measures do not match
66
+ If the steps are not int, Sequence[int] or an ndarray
67
+ """
68
+ projection = [projection] if isinstance(projection, int) else projection
69
+ projection = np.array(projection) if isinstance(projection, Sequence) else projection
70
+ if not isinstance(projection, np.ndarray):
71
+ raise ValueError("'steps' must be an int, Sequence[int] or ndarray")
72
+
73
+ output = {}
74
+ for name, measures in self.measures.items():
75
+ if measures.ndim > 1:
76
+ result = []
77
+ for i in range(len(measures)):
78
+ projected = project_steps(self.params[name][i], projection)
79
+ result.append(projected)
80
+ output[name] = np.array(result)
81
+ else:
82
+ output[name] = project_steps(self.params[name], projection)
83
+ return SufficiencyOutput(projection, self.params, output)
84
+
85
+ def plot(self, class_names: Sequence[str] | None = None) -> list[Figure]:
86
+ """Plotting function for data sufficiency tasks
87
+
88
+ Parameters
89
+ ----------
90
+ class_names : Sequence[str] | None, default None
91
+ List of class names
92
+
93
+ Returns
94
+ -------
95
+ List[plt.Figure]
96
+ List of Figures for each measure
97
+
98
+ Raises
99
+ ------
100
+ ValueError
101
+ If the length of data points in the measures do not match
102
+ """
103
+ # Extrapolation parameters
104
+ last_X = self.steps[-1]
105
+ geomshape = (0.01 * last_X, last_X * 4, len(self.steps))
106
+ extrapolated = np.geomspace(*geomshape).astype(np.int64)
107
+
108
+ # Stores all plots
109
+ plots = []
110
+
111
+ # Create a plot for each measure on one figure
112
+ for name, measures in self.measures.items():
113
+ if measures.ndim > 1:
114
+ if class_names is not None and len(measures) != len(class_names):
115
+ raise IndexError("Class name count does not align with measures")
116
+ for i, measure in enumerate(measures):
117
+ class_name = str(i) if class_names is None else class_names[i]
118
+ fig = plot_measure(
119
+ f"{name}_{class_name}",
120
+ self.steps,
121
+ measure,
122
+ self.params[name][i],
123
+ extrapolated,
124
+ )
125
+ plots.append(fig)
126
+
127
+ else:
128
+ fig = plot_measure(name, self.steps, measures, self.params[name], extrapolated)
129
+ plots.append(fig)
130
+
131
+ return plots
132
+
133
+ def inv_project(self, targets: dict[str, NDArray]) -> dict[str, NDArray]:
134
+ """
135
+ Calculate training samples needed to achieve target model metric values.
136
+
137
+ Parameters
138
+ ----------
139
+ targets : Dict[str, NDArray]
140
+ Dictionary of target metric scores (from 0.0 to 1.0) that we want
141
+ to achieve, where the key is the name of the metric.
142
+
143
+ Returns
144
+ -------
145
+ Dict[str, NDArray]
146
+ List of the number of training samples needed to achieve each
147
+ corresponding entry in targets
148
+ """
149
+
150
+ projection = {}
151
+
152
+ for name, target in targets.items():
153
+ if name not in self.measures:
154
+ continue
155
+
156
+ measure = self.measures[name]
157
+ if measure.ndim > 1:
158
+ projection[name] = np.zeros((len(measure), len(target)))
159
+ for i in range(len(measure)):
160
+ projection[name][i] = inv_project_steps(
161
+ self.params[name][i], target[i] if target.ndim == measure.ndim else target
162
+ )
163
+ else:
164
+ projection[name] = inv_project_steps(self.params[name], target)
165
+
166
+ return projection
167
+
45
168
 
46
169
  def f_out(n_i: NDArray, x: NDArray) -> NDArray:
47
170
  """
@@ -421,136 +544,3 @@ class Sufficiency:
421
544
  measures = {k: (v / self.runs).T for k, v in measures.items()}
422
545
  params_output = get_curve_params(measures, ranges, niter)
423
546
  return SufficiencyOutput(ranges, params_output, measures)
424
-
425
- @classmethod
426
- def project(
427
- cls,
428
- data: SufficiencyOutput,
429
- projection: int | Sequence[int] | NDArray[np.uint],
430
- ) -> SufficiencyOutput:
431
- """Projects the measures for each value of X
432
-
433
- Parameters
434
- ----------
435
- data : SufficiencyOutput
436
- Dataclass containing the average of each measure per substep
437
- projection : int | Sequence[int] | NDArray[np.uint]
438
- Step or steps to project
439
-
440
- Returns
441
- -------
442
- SufficiencyOutput
443
- Dataclass containing the projected measures per projection
444
-
445
- Raises
446
- ------
447
- ValueError
448
- If the length of data points in the measures do not match
449
- If the steps are not int, Sequence[int] or an ndarray
450
- """
451
- projection = [projection] if isinstance(projection, int) else projection
452
- projection = np.array(projection) if isinstance(projection, Sequence) else projection
453
- if not isinstance(projection, np.ndarray):
454
- raise ValueError("'steps' must be an int, Sequence[int] or ndarray")
455
-
456
- output = {}
457
- for name, measures in data.measures.items():
458
- if measures.ndim > 1:
459
- result = []
460
- for i in range(len(measures)):
461
- projected = project_steps(data.params[name][i], projection)
462
- result.append(projected)
463
- output[name] = np.array(result)
464
- else:
465
- output[name] = project_steps(data.params[name], projection)
466
- return SufficiencyOutput(projection, data.params, output)
467
-
468
- @classmethod
469
- def plot(cls, data: SufficiencyOutput, class_names: Sequence[str] | None = None) -> list[Figure]:
470
- """Plotting function for data sufficiency tasks
471
-
472
- Parameters
473
- ----------
474
- data : SufficiencyOutput
475
- Dataclass containing the average of each measure per substep
476
- class_names : Sequence[str] | None, default None
477
- List of class names
478
-
479
- Returns
480
- -------
481
- List[plt.Figure]
482
- List of Figures for each measure
483
-
484
- Raises
485
- ------
486
- ValueError
487
- If the length of data points in the measures do not match
488
- """
489
- # Extrapolation parameters
490
- last_X = data.steps[-1]
491
- geomshape = (0.01 * last_X, last_X * 4, len(data.steps))
492
- extrapolated = np.geomspace(*geomshape).astype(np.int64)
493
-
494
- # Stores all plots
495
- plots = []
496
-
497
- # Create a plot for each measure on one figure
498
- for name, measures in data.measures.items():
499
- if measures.ndim > 1:
500
- if class_names is not None and len(measures) != len(class_names):
501
- raise IndexError("Class name count does not align with measures")
502
- for i, measure in enumerate(measures):
503
- class_name = str(i) if class_names is None else class_names[i]
504
- fig = plot_measure(
505
- f"{name}_{class_name}",
506
- data.steps,
507
- measure,
508
- data.params[name][i],
509
- extrapolated,
510
- )
511
- plots.append(fig)
512
-
513
- else:
514
- fig = plot_measure(name, data.steps, measures, data.params[name], extrapolated)
515
- plots.append(fig)
516
-
517
- return plots
518
-
519
- @classmethod
520
- def inv_project(cls, targets: dict[str, NDArray], data: SufficiencyOutput) -> dict[str, NDArray]:
521
- """
522
- Calculate training samples needed to achieve target model metric values.
523
-
524
- Parameters
525
- ----------
526
- targets : Dict[str, NDArray]
527
- Dictionary of target metric scores (from 0.0 to 1.0) that we want
528
- to achieve, where the key is the name of the metric.
529
-
530
- data : SufficiencyOutput
531
- Dataclass containing the average of each measure per substep
532
-
533
- Returns
534
- -------
535
- Dict[str, NDArray]
536
- List of the number of training samples needed to achieve each
537
- corresponding entry in targets
538
- """
539
-
540
- projection = {}
541
-
542
- for name, target in targets.items():
543
- if name not in data.measures:
544
- continue
545
-
546
- measure = data.measures[name]
547
- if measure.ndim > 1:
548
- projection[name] = np.zeros((len(measure), len(target)))
549
- for i in range(len(measure)):
550
- projection[name][i] = inv_project_steps(
551
- data.params[name][i], target[i] if target.ndim == measure.ndim else target
552
- )
553
- else:
554
- projection[name] = inv_project_steps(data.params[name], target)
555
-
556
- return projection
@@ -1,6 +1,17 @@
1
- from dataeval._internal.metrics.stats import channelstats, imagestats
1
+ from dataeval._internal.metrics.stats.boxratiostats import boxratiostats
2
+ from dataeval._internal.metrics.stats.datasetstats import datasetstats
3
+ from dataeval._internal.metrics.stats.dimensionstats import dimensionstats
4
+ from dataeval._internal.metrics.stats.hashstats import hashstats
5
+ from dataeval._internal.metrics.stats.labelstats import labelstats
6
+ from dataeval._internal.metrics.stats.pixelstats import pixelstats
7
+ from dataeval._internal.metrics.stats.visualstats import visualstats
2
8
 
3
9
  __all__ = [
4
- "channelstats",
5
- "imagestats",
10
+ "boxratiostats",
11
+ "datasetstats",
12
+ "dimensionstats",
13
+ "hashstats",
14
+ "labelstats",
15
+ "pixelstats",
16
+ "visualstats",
6
17
  ]
@@ -1,6 +1,6 @@
1
1
  from dataeval import _IS_TORCH_AVAILABLE
2
2
 
3
3
  if _IS_TORCH_AVAILABLE: # pragma: no cover
4
- from dataeval._internal.workflows.sufficiency import Sufficiency
4
+ from dataeval._internal.workflows.sufficiency import Sufficiency, SufficiencyOutput
5
5
 
6
- __all__ = ["Sufficiency"]
6
+ __all__ = ["Sufficiency", "SufficiencyOutput"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.69.4
3
+ Version: 0.70.0
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -32,8 +32,8 @@ Requires-Dist: scipy (>=1.10)
32
32
  Requires-Dist: tensorflow (>=2.14.1,<2.16) ; extra == "tensorflow" or extra == "all"
33
33
  Requires-Dist: tensorflow-io-gcs-filesystem (>=0.35.0,<0.37) ; extra == "tensorflow" or extra == "all"
34
34
  Requires-Dist: tensorflow_probability (>=0.22.1,<0.24) ; extra == "tensorflow" or extra == "all"
35
- Requires-Dist: torch (>=2.0.1,!=2.2.0) ; extra == "torch" or extra == "all"
36
- Requires-Dist: torchvision (>=0.16.0) ; extra == "torch" or extra == "all"
35
+ Requires-Dist: torch (>=2.2.0) ; extra == "torch" or extra == "all"
36
+ Requires-Dist: torchvision (>=0.17.0) ; extra == "torch" or extra == "all"
37
37
  Requires-Dist: xxhash (>=3.3)
38
38
  Project-URL: Documentation, https://dataeval.readthedocs.io/
39
39
  Project-URL: Repository, https://github.com/aria-ml/dataeval/
@@ -1,36 +1,42 @@
1
- dataeval/__init__.py,sha256=KOZnb9SovSSuD2UrqV-NS_b5vpfWdQlsweB55fned58,590
1
+ dataeval/__init__.py,sha256=rWQRN8WyjzjUUZhNkCFFySptSFPM4f67tabVBDly84k,574
2
2
  dataeval/_internal/datasets.py,sha256=MwN6xgZW1cA5yIxXZ05qBBz4aO3bjKzIEbZZfa1HkQo,9790
3
3
  dataeval/_internal/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  dataeval/_internal/detectors/clusterer.py,sha256=hJwELUeAdZZ3OVLIfwalw2P7Zz13q2ZqrV6gx90s44E,20695
5
5
  dataeval/_internal/detectors/drift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- dataeval/_internal/detectors/drift/base.py,sha256=XSX1VVUxvFFKVFQVsc2WWeaRRmIxuYaIgD_c5H4OraA,15930
6
+ dataeval/_internal/detectors/drift/base.py,sha256=6L66aGWUGAbWylT_aHmZUSrvx6wM6Fzzlcie98KdNiY,15900
7
7
  dataeval/_internal/detectors/drift/cvm.py,sha256=xiyZlf0rAQGG8Z6ZBLPVri805aPRkERrUySwRN8cTZQ,4010
8
8
  dataeval/_internal/detectors/drift/ks.py,sha256=aoDx7ps-5vrSI8Q9ii6cwmKnAyaD8tjG69wI-7R3MVQ,4098
9
- dataeval/_internal/detectors/drift/mmd.py,sha256=j85bwzCiFLNS27WlUFlgpHDMD9yga41ILt-yAr-LABc,7493
9
+ dataeval/_internal/detectors/drift/mmd.py,sha256=0TD0BpIJkwdjU0i3ndlvYp1ItCNrvSO7gT8r4bEdHXc,7493
10
10
  dataeval/_internal/detectors/drift/torch.py,sha256=YhIN85MbUV3C4IJcRvqYdXSWLj5lUeEOb05T5DgB3xo,11552
11
11
  dataeval/_internal/detectors/drift/uncertainty.py,sha256=Ot8L42AnFbkij4J3Tis7VzXLv3hfBxoOWBP4UoCEnVs,5125
12
- dataeval/_internal/detectors/duplicates.py,sha256=qkzbdWuJuUozFLqpnD6CYAGXQb7-aWw2mHr_cxXAfPo,4922
13
- dataeval/_internal/detectors/merged_stats.py,sha256=WVPxz7n5fUkFKW3kobD_TkKkof51YjfIz4M_4CHh-1s,2517
12
+ dataeval/_internal/detectors/duplicates.py,sha256=VLDEhXWhdNyU3aA6S7dQmCBDAz0uQY5E_RjJYE1wkcw,5268
13
+ dataeval/_internal/detectors/merged_stats.py,sha256=okXGrqAgsqfANMxfIjiUQlZWlaIh5TVvIB9UPsOJZ7k,1351
14
14
  dataeval/_internal/detectors/ood/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- dataeval/_internal/detectors/ood/ae.py,sha256=k8pZP7oPwVyQlv6YcoacNMzpmQZy7W222yYrdXGTYZI,2031
15
+ dataeval/_internal/detectors/ood/ae.py,sha256=-nFw3taJ2IWU74o8Yks48J2x7aBoxXeL1vP_Ye_60M4,2031
16
16
  dataeval/_internal/detectors/ood/aegmm.py,sha256=pffThqXRoLx3GuZXEQBd-xEy5DjAZHV7WSeP2HgM_TI,2403
17
17
  dataeval/_internal/detectors/ood/base.py,sha256=Pw34uFEWOJZiG4ciM0ArUkqhiM8WCGl2rc0BwFPu3xM,8240
18
18
  dataeval/_internal/detectors/ood/llr.py,sha256=tCo8G7V8VaVuIZ09rg0ZXZmdE0N_zGm7vCfFUnGbGvo,10102
19
19
  dataeval/_internal/detectors/ood/vae.py,sha256=WbQugS-bBUTTqQ9PRLHBmSUtk7O2_PN4PBLJE9ieMjw,2921
20
20
  dataeval/_internal/detectors/ood/vaegmm.py,sha256=pVUSlVF2jo8uokyks2QzfBJnNtcFWmcF8EQl-azs2Bg,2832
21
- dataeval/_internal/detectors/outliers.py,sha256=oS8lsCPIM6WtLzUjpMZDfiopZA2fJhsHakmSzZUhqHU,7614
22
- dataeval/_internal/flags.py,sha256=5hZ5AHXjXRKbWtFC45-J7M9NvJHsT4LKRsPzPMksgfQ,2323
23
- dataeval/_internal/interop.py,sha256=x4qj4EiBt5NthSxe8prSLrPDAEcipAdyyLwbNyCBaFk,1059
21
+ dataeval/_internal/detectors/outliers.py,sha256=du4Kd5XrrBlBXyno8K5COkNKP0ByQnGRSsfaTq4ywm0,10345
22
+ dataeval/_internal/interop.py,sha256=FLXJY-5hwJcKCtruyvaarqynXCMfcLbQSFvGnrWQDPo,1338
24
23
  dataeval/_internal/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- dataeval/_internal/metrics/balance.py,sha256=eAHvgjiGCH893XSQLqh9j9wgvAECoNPVT8k0u_9Ijzg,6097
26
- dataeval/_internal/metrics/ber.py,sha256=Onsi47AbT9rMvng-Pbu8LIrYRfLpI13En1FxkFoMKQs,4668
24
+ dataeval/_internal/metrics/balance.py,sha256=veAeFFmbyDLW6VlQ-NrLfJbQl0AOA3hFD239NSej078,6134
25
+ dataeval/_internal/metrics/ber.py,sha256=MUpYivU-klsg2019YLyeV9aWDEGyXhcqYg05Vg_pffk,4668
27
26
  dataeval/_internal/metrics/coverage.py,sha256=EZVES1rbZW2j_CtQv1VFfSO-UmWcrt5nmqxDErtrG14,3473
28
- dataeval/_internal/metrics/divergence.py,sha256=nmMUfr9FGnH798eb6xzEiMj4C42rQVthh5HeexiY6EE,4119
29
- dataeval/_internal/metrics/diversity.py,sha256=_oT0FHsgfLOoe_TLD2Aax4r4jmH6WnOPVIkcl_YjaoY,7582
30
- dataeval/_internal/metrics/parity.py,sha256=VszQNbHWjct2bCqrIXUZC_qFi4ZIq2Lm-vs-DiarBFo,16244
31
- dataeval/_internal/metrics/stats.py,sha256=ILKteVMGjrp1s2CECPL_hbLsijIKR2d6II2-8w9oxW8,18105
32
- dataeval/_internal/metrics/uap.py,sha256=w-wvXXnX16kUq-weaZD2SrJi22LJ8EjOFbOhPxeGejI,2043
33
- dataeval/_internal/metrics/utils.py,sha256=mSYa-3cHGcsQwPr7zbdpzrnK_8jIXCiAcu2HCcvrtaY,13007
27
+ dataeval/_internal/metrics/divergence.py,sha256=WTQ1Xx453DH8aCpEmN1Zn6zuCy7NnsHfVphvTYA0L_o,4119
28
+ dataeval/_internal/metrics/diversity.py,sha256=Us0Nww3wvDH0kvVhDd3KEGXbkY_4-XxmD-ew9fFhqag,7618
29
+ dataeval/_internal/metrics/parity.py,sha256=TRm4GObItaku3OvxJj1vfxE1fGpwW_N020Nqfs-uFBw,16458
30
+ dataeval/_internal/metrics/stats/base.py,sha256=dgXAuuFYK0vrl3VPmU5BhjThRBHD6ykE_M2uyCuKDl4,8556
31
+ dataeval/_internal/metrics/stats/boxratiostats.py,sha256=Ac6nB41q43xHCJRDEXHNgsJF80VE8MpH8_kySxA84BE,6342
32
+ dataeval/_internal/metrics/stats/datasetstats.py,sha256=zJnBzIthaJPbQFvE0RRx-KvvU0Du7ZSvERW56zeowBU,3703
33
+ dataeval/_internal/metrics/stats/dimensionstats.py,sha256=RYI8PbiCtlPdli1z4jJ4t05ddDszB9dsnKDJfidaK-c,3789
34
+ dataeval/_internal/metrics/stats/hashstats.py,sha256=3PUPPmHe2t8VIgfmu9hkyMq7zvxmcdXdLtEqQJvHs5M,2034
35
+ dataeval/_internal/metrics/stats/labelstats.py,sha256=LTvQTqCnKVOx3ufmHAZIQOI9xYhIoZS-1TAgEjKhYC0,4056
36
+ dataeval/_internal/metrics/stats/pixelstats.py,sha256=cSOjJ2yTaH_nWd4jqiu96wA39HmU3GkIs1XY2MW4mSw,4367
37
+ dataeval/_internal/metrics/stats/visualstats.py,sha256=4tIkFE2LNxYEyseb5Lj7BmgueFCwHbwA2JJ9-YsA9QI,4659
38
+ dataeval/_internal/metrics/uap.py,sha256=EhyEjYtWs1RiXlVrvvGI4gEcMygpu8QHUeOHxfceacY,2043
39
+ dataeval/_internal/metrics/utils.py,sha256=P3KOybaorAD8Zu4j-3jygKEJld5rwQlgqxMljbVk1Oo,13477
34
40
  dataeval/_internal/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
41
  dataeval/_internal/models/pytorch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
42
  dataeval/_internal/models/pytorch/autoencoder.py,sha256=gmnAHUzzn-fXTUU63SR4ZBjGBLEALWPxmZ_wPzvF_dg,8365
@@ -43,21 +49,20 @@ dataeval/_internal/models/tensorflow/losses.py,sha256=pZH5RnlM9R0RrBde9Lgq32muwA
43
49
  dataeval/_internal/models/tensorflow/pixelcnn.py,sha256=lRpRNebMgkCJUnEk1xouVaTfS_YGMQgQhI01wNKAjeM,48420
44
50
  dataeval/_internal/models/tensorflow/trainer.py,sha256=xNY0Iw7Qa1TnCuy9N1b77_VduFoW_BhbZjfQCxOVby4,4082
45
51
  dataeval/_internal/models/tensorflow/utils.py,sha256=l6jXKMWyQAEI4LpAONq95Xwr7CPgrs408ypf9TuNxkY,8732
46
- dataeval/_internal/output.py,sha256=bFC2qJxXUc_daQwJHHa9KfFNLuxZANGb7Dpget_TXYs,3049
52
+ dataeval/_internal/output.py,sha256=qVbOi41dvfQICQ4uxysHPWBRKo1XR61kXHPL_vKOPm0,2545
47
53
  dataeval/_internal/utils.py,sha256=gK0z4buuQoUYblkrCiRV9pIESzyikcY-3a08XsQkD7E,1585
48
54
  dataeval/_internal/workflows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- dataeval/_internal/workflows/sufficiency.py,sha256=0k7Dbk3QmEGkZp2IW4OcZBcrxb4zAp9hC9nXGN1v1cY,18199
55
+ dataeval/_internal/workflows/sufficiency.py,sha256=jLGfp-d0plfV-M8j7W4W71yNjEMutrVzN7pMA9qLHD0,17807
50
56
  dataeval/detectors/__init__.py,sha256=WVlwapZtKXVvrW41Sq30sFd8j2phS8JMsCaLeXfbQ7k,204
51
57
  dataeval/detectors/drift/__init__.py,sha256=XtSjoTy6P_lwRzC9Klmd9BYZ3v4qZrATJ-p7gvvHPGk,598
52
58
  dataeval/detectors/drift/kernels/__init__.py,sha256=qV_r740iRPw39_kHOttmk3VNikDFKCvF9i1IGbgjf3A,186
53
59
  dataeval/detectors/drift/updates/__init__.py,sha256=uwkRV-4WVg0XFX_9futvQ0ggGOEvduDedgCno_eIi4U,149
54
60
  dataeval/detectors/linters/__init__.py,sha256=1yxsJw8CFpHsZwn_YUlWpb-4YBet5U6uB--MeRgB6io,234
55
61
  dataeval/detectors/ood/__init__.py,sha256=ybWhwbMmWygIwE1A-nYihDfugrj3j0GiuABmVvD7264,583
56
- dataeval/flags/__init__.py,sha256=qo06_Tk0ul4lOhKSEs0HE2G6WBFvMwNJq77vRX1ynww,72
57
62
  dataeval/metrics/__init__.py,sha256=42szGyZrLekNU-T-rwJu-pUoDBdOoStuScB-mnGzjw4,81
58
63
  dataeval/metrics/bias/__init__.py,sha256=xqpxCttgzz-hMZQI7_IlaNn4OGZaGVz3KKRd26GbSKE,335
59
64
  dataeval/metrics/estimators/__init__.py,sha256=fWQZUIxu88u5POYXN1yoFc-Hxx5B1fveEiiSXmK5kPk,210
60
- dataeval/metrics/stats/__init__.py,sha256=N5UvO7reDkYX1xFdAQjwALyJwcC2FAbruzd7ZYYW_4I,123
65
+ dataeval/metrics/stats/__init__.py,sha256=HqorGcA6GSlvLnYALnKduXzJzQo2GPMVpdirXNWB2pY,637
61
66
  dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
67
  dataeval/tensorflow/__init__.py,sha256=IH_ELFP9CwKPk_br8_dKi6HeAlwmmV2vgsWdD8IFKXU,72
63
68
  dataeval/tensorflow/loss/__init__.py,sha256=E9eB87LNh0o5nUCqssB027EXBsOfEayNHPcNW0QGFdA,101
@@ -67,8 +72,8 @@ dataeval/torch/__init__.py,sha256=ZNGSJJmatdGzbrazw86yNveEXm8smmW63xD-ReA8Nfg,63
67
72
  dataeval/torch/models/__init__.py,sha256=YnDnePYpRIKHyYn3F5qR1OObMSb-g0FGvI8X-uTB09E,162
68
73
  dataeval/torch/trainer/__init__.py,sha256=Te-qElt8h-Zv8NN0r-VJOEdCPHTQ2yO3rd2MhRiZGZs,93
69
74
  dataeval/utils/__init__.py,sha256=ExQ1xj62MjcM9uIu1-g1P2fW0EPJpcIofnvxjQ908c4,172
70
- dataeval/workflows/__init__.py,sha256=gkU2B6yUiefexcYrBwqfZKNl8BvX8abUjfeNvVBXF4E,186
71
- dataeval-0.69.4.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
72
- dataeval-0.69.4.dist-info/METADATA,sha256=R_YlthIsAkOizGWkgXiOCEsD_6F5wJm8qjU4hjhL_c8,4292
73
- dataeval-0.69.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
74
- dataeval-0.69.4.dist-info/RECORD,,
75
+ dataeval/workflows/__init__.py,sha256=Yl6YYgHFwUM1porR3yT6ELyoUw5Op9e6QpQACdXoKBU,226
76
+ dataeval-0.70.0.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
77
+ dataeval-0.70.0.dist-info/METADATA,sha256=qdgzLlvHmmQNTQqUVkPcaCZJL9QlxUbzPipHcIaSFsI,4284
78
+ dataeval-0.70.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
79
+ dataeval-0.70.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: poetry-core 1.9.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,77 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from enum import IntFlag, auto
4
- from functools import reduce
5
- from typing import Iterable, TypeVar, cast
6
-
7
- TFlag = TypeVar("TFlag", bound=IntFlag)
8
-
9
-
10
- class ImageStat(IntFlag):
11
- """
12
- Flags for calculating image and channel statistics
13
- """
14
-
15
- # HASHES
16
- XXHASH = auto()
17
- PCHASH = auto()
18
-
19
- # PROPERTIES
20
- WIDTH = auto()
21
- HEIGHT = auto()
22
- SIZE = auto()
23
- ASPECT_RATIO = auto()
24
- CHANNELS = auto()
25
- DEPTH = auto()
26
-
27
- # VISUALS
28
- BRIGHTNESS = auto()
29
- BLURRINESS = auto()
30
- CONTRAST = auto()
31
- DARKNESS = auto()
32
- MISSING = auto()
33
- ZEROS = auto()
34
-
35
- # PIXEL STATS
36
- MEAN = auto()
37
- STD = auto()
38
- VAR = auto()
39
- SKEW = auto()
40
- KURTOSIS = auto()
41
- ENTROPY = auto()
42
- PERCENTILES = auto()
43
- HISTOGRAM = auto()
44
-
45
- # JOINT FLAGS
46
- ALL_HASHES = XXHASH | PCHASH
47
- ALL_PROPERTIES = WIDTH | HEIGHT | SIZE | ASPECT_RATIO | CHANNELS | DEPTH
48
- ALL_VISUALS = BRIGHTNESS | BLURRINESS | CONTRAST | DARKNESS | MISSING | ZEROS
49
- ALL_PIXELSTATS = MEAN | STD | VAR | SKEW | KURTOSIS | ENTROPY | PERCENTILES | HISTOGRAM
50
- ALL_CHANNEL_STATS = BRIGHTNESS | CONTRAST | DARKNESS | ZEROS | ALL_PIXELSTATS
51
- ALL_STATS = ALL_PROPERTIES | ALL_VISUALS | ALL_PIXELSTATS
52
- ALL = ALL_HASHES | ALL_STATS
53
-
54
-
55
- def is_distinct(flag: IntFlag) -> bool:
56
- return (flag & (flag - 1) == 0) and flag != 0
57
-
58
-
59
- def to_distinct(flag: TFlag) -> dict[TFlag, str]:
60
- """
61
- Returns a distinct set of all flags set on the input flag and their names
62
-
63
- NOTE: this is supported natively in Python 3.11, but for earlier versions we need
64
- to use a combination of list comprehension and bit fiddling to determine distinct
65
- flag values from joint aliases.
66
- """
67
- if isinstance(flag, Iterable): # >= py311
68
- return {f: f.name.lower() for f in flag if f.name}
69
- else: # < py311
70
- return {f: f.name.lower() for f in list(flag.__class__) if f & flag and is_distinct(f) and f.name}
71
-
72
-
73
- def verify_supported(flag: TFlag, flags: TFlag | Iterable[TFlag]):
74
- supported = flags if isinstance(flags, flag.__class__) else cast(TFlag, reduce(lambda a, b: a | b, flags)) # type: ignore
75
- unsupported = flag & ~supported
76
- if unsupported:
77
- raise ValueError(f"Unsupported flags {unsupported} called. Only {supported} flags are supported.")