dataeval 0.74.2__py3-none-any.whl → 0.75.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. dataeval/__init__.py +27 -23
  2. dataeval/detectors/__init__.py +2 -2
  3. dataeval/detectors/drift/__init__.py +14 -12
  4. dataeval/detectors/drift/base.py +1 -1
  5. dataeval/detectors/drift/cvm.py +1 -1
  6. dataeval/detectors/drift/ks.py +1 -1
  7. dataeval/detectors/drift/mmd.py +6 -5
  8. dataeval/detectors/drift/torch.py +12 -12
  9. dataeval/detectors/drift/uncertainty.py +3 -2
  10. dataeval/detectors/linters/__init__.py +4 -4
  11. dataeval/detectors/linters/clusterer.py +2 -7
  12. dataeval/detectors/linters/duplicates.py +6 -10
  13. dataeval/detectors/linters/outliers.py +4 -2
  14. dataeval/detectors/ood/__init__.py +3 -10
  15. dataeval/detectors/ood/{ae_torch.py → ae.py} +6 -4
  16. dataeval/detectors/ood/base.py +64 -161
  17. dataeval/detectors/ood/metadata_ks_compare.py +34 -42
  18. dataeval/detectors/ood/metadata_least_likely.py +3 -3
  19. dataeval/detectors/ood/metadata_ood_mi.py +6 -5
  20. dataeval/detectors/ood/mixin.py +146 -0
  21. dataeval/detectors/ood/output.py +63 -0
  22. dataeval/interop.py +6 -5
  23. dataeval/{logging.py → log.py} +2 -0
  24. dataeval/metrics/__init__.py +2 -2
  25. dataeval/metrics/bias/__init__.py +9 -12
  26. dataeval/metrics/bias/balance.py +10 -8
  27. dataeval/metrics/bias/coverage.py +52 -4
  28. dataeval/metrics/bias/diversity.py +42 -14
  29. dataeval/metrics/bias/parity.py +15 -12
  30. dataeval/metrics/estimators/__init__.py +2 -2
  31. dataeval/metrics/estimators/ber.py +3 -1
  32. dataeval/metrics/estimators/divergence.py +1 -1
  33. dataeval/metrics/estimators/uap.py +1 -1
  34. dataeval/metrics/stats/__init__.py +18 -18
  35. dataeval/metrics/stats/base.py +4 -4
  36. dataeval/metrics/stats/boxratiostats.py +8 -9
  37. dataeval/metrics/stats/datasetstats.py +10 -14
  38. dataeval/metrics/stats/dimensionstats.py +4 -4
  39. dataeval/metrics/stats/hashstats.py +12 -8
  40. dataeval/metrics/stats/labelstats.py +5 -5
  41. dataeval/metrics/stats/pixelstats.py +4 -9
  42. dataeval/metrics/stats/visualstats.py +4 -9
  43. dataeval/utils/__init__.py +4 -13
  44. dataeval/utils/dataset/__init__.py +7 -0
  45. dataeval/utils/{torch → dataset}/datasets.py +2 -0
  46. dataeval/utils/dataset/read.py +63 -0
  47. dataeval/utils/{split_dataset.py → dataset/split.py} +38 -30
  48. dataeval/utils/image.py +2 -2
  49. dataeval/utils/metadata.py +310 -5
  50. dataeval/{metrics/bias/metadata_utils.py → utils/plot.py} +1 -104
  51. dataeval/utils/torch/__init__.py +2 -17
  52. dataeval/utils/torch/gmm.py +29 -6
  53. dataeval/utils/torch/{utils.py → internal.py} +82 -58
  54. dataeval/utils/torch/models.py +10 -8
  55. dataeval/utils/torch/trainer.py +6 -85
  56. dataeval/workflows/__init__.py +2 -5
  57. dataeval/workflows/sufficiency.py +16 -6
  58. dataeval-0.75.0.dist-info/METADATA +136 -0
  59. dataeval-0.75.0.dist-info/RECORD +67 -0
  60. dataeval/detectors/ood/base_torch.py +0 -109
  61. dataeval/metrics/bias/metadata_preprocessing.py +0 -285
  62. dataeval/utils/gmm.py +0 -26
  63. dataeval-0.74.2.dist-info/METADATA +0 -120
  64. dataeval-0.74.2.dist-info/RECORD +0 -66
  65. {dataeval-0.74.2.dist-info → dataeval-0.75.0.dist-info}/LICENSE.txt +0 -0
  66. {dataeval-0.74.2.dist-info → dataeval-0.75.0.dist-info}/WHEEL +0 -0
@@ -1,285 +0,0 @@
1
- from __future__ import annotations
2
-
3
- __all__ = ["MetadataOutput", "metadata_preprocessing"]
4
-
5
- import warnings
6
- from dataclasses import dataclass
7
- from typing import Any, Iterable, Literal, Mapping, TypeVar
8
-
9
- import numpy as np
10
- from numpy.typing import ArrayLike, NDArray
11
- from scipy.stats import wasserstein_distance as wd
12
-
13
- from dataeval.interop import as_numpy, to_numpy
14
- from dataeval.output import Output, set_metadata
15
- from dataeval.utils.metadata import merge_metadata
16
-
17
- TNum = TypeVar("TNum", int, float)
18
- DISCRETE_MIN_WD = 0.054
19
- CONTINUOUS_MIN_SAMPLE_SIZE = 20
20
-
21
-
22
- @dataclass(frozen=True)
23
- class MetadataOutput(Output):
24
- """
25
- Output class for :func:`metadata_binning` function
26
-
27
- Attributes
28
- ----------
29
- discrete_factor_names : list[str]
30
- List containing factor names for the original data that was discrete and the binned continuous data
31
- discrete_data : NDArray[np.int]
32
- Array containing values for the original data that was discrete and the binned continuous data
33
- continuous_factor_names : list[str]
34
- List containing factor names for the original continuous data
35
- continuous_data : NDArray[np.int or np.double] | None
36
- Array containing values for the original continuous data or None if there was no continuous data
37
- class_labels : NDArray[np.int]
38
- Numerical class labels for the images/objects
39
- class_names : NDArray[Any]
40
- Array of unique class names (for use with plotting)
41
- total_num_factors : int
42
- Sum of discrete_factor_names and continuous_factor_names plus 1 for class
43
- """
44
-
45
- discrete_factor_names: list[str]
46
- discrete_data: NDArray[np.int_]
47
- continuous_factor_names: list[str]
48
- continuous_data: NDArray[np.int_ | np.double] | None
49
- class_labels: NDArray[np.int_]
50
- class_names: NDArray[Any]
51
- total_num_factors: int
52
-
53
-
54
- @set_metadata
55
- def metadata_preprocessing(
56
- raw_metadata: Iterable[Mapping[str, Any]],
57
- class_labels: ArrayLike | str,
58
- continuous_factor_bins: Mapping[str, int | list[tuple[TNum, TNum]]] | None = None,
59
- auto_bin_method: Literal["uniform_width", "uniform_count", "clusters"] = "uniform_width",
60
- exclude: Iterable[str] | None = None,
61
- ) -> MetadataOutput:
62
- """
63
- Restructures the metadata to be in the correct format for the bias functions.
64
-
65
- This identifies whether the incoming metadata is discrete or continuous,
66
- and whether the data is already binned or still needs binning.
67
- It accepts a list of dictionaries containing the per image metadata and
68
- automatically adjusts for multiple targets in an image.
69
-
70
- Parameters
71
- ----------
72
- raw_metadata : Iterable[Mapping[str, Any]]
73
- Iterable collection of metadata dictionaries to flatten and merge.
74
- class_labels : ArrayLike or string or None
75
- If arraylike, expects the labels for each image (image classification) or each object (object detection).
76
- If the labels are included in the metadata dictionary, pass in the key value.
77
- continuous_factor_bins : Mapping[str, int] or Mapping[str, list[tuple[TNum, TNum]]] or None, default None
78
- User provided dictionary specifying how to bin the continuous metadata factors
79
- auto_bin_method : "uniform_width" or "uniform_count" or "clusters", default "uniform_width"
80
- Method by which the function will automatically bin continuous metadata factors. It is recommended
81
- that the user provide the bins through the `continuous_factor_bins`.
82
- exclude : Iterable[str] or None, default None
83
- User provided collection of metadata keys to exclude when processing metadata.
84
-
85
- Returns
86
- -------
87
- MetadataOutput
88
- Output class containing the binned metadata
89
- """
90
- # Transform metadata into single, flattened dictionary
91
- metadata, image_repeats = merge_metadata(raw_metadata)
92
-
93
- # Drop any excluded metadata keys
94
- if exclude:
95
- for k in list(metadata):
96
- if k in exclude:
97
- metadata.pop(k)
98
-
99
- # Get the class label array in numeric form
100
- class_array = as_numpy(metadata.pop(class_labels)) if isinstance(class_labels, str) else as_numpy(class_labels)
101
- if class_array.ndim > 1:
102
- raise ValueError(
103
- f"Got class labels with {class_array.ndim}-dimensional "
104
- f"shape {class_array.shape}, but expected a 1-dimensional array."
105
- )
106
- if not np.issubdtype(class_array.dtype, np.int_):
107
- unique_classes, numerical_labels = np.unique(class_array, return_inverse=True)
108
- else:
109
- numerical_labels = class_array
110
- unique_classes = np.unique(class_array)
111
-
112
- # Bin according to user supplied bins
113
- continuous_metadata = {}
114
- discrete_metadata = {}
115
- if continuous_factor_bins is not None and continuous_factor_bins != {}:
116
- invalid_keys = set(continuous_factor_bins.keys()) - set(metadata.keys())
117
- if invalid_keys:
118
- raise KeyError(
119
- f"The keys - {invalid_keys} - are present in the `continuous_factor_bins` dictionary "
120
- "but are not keys in the `metadata` dictionary. Delete these keys from `continuous_factor_bins` "
121
- "or add corresponding entries to the `metadata` dictionary."
122
- )
123
- for factor, grouping in continuous_factor_bins.items():
124
- discrete_metadata[factor] = _user_defined_bin(metadata[factor], grouping)
125
- continuous_metadata[factor] = metadata[factor]
126
-
127
- # Determine category of the rest of the keys
128
- remaining_keys = set(metadata.keys()) - set(continuous_metadata.keys())
129
- for key in remaining_keys:
130
- data = to_numpy(metadata[key])
131
- if np.issubdtype(data.dtype, np.number):
132
- result = _is_continuous(data, image_repeats)
133
- if result:
134
- continuous_metadata[key] = data
135
- unique_samples, ordinal_data = np.unique(data, return_inverse=True)
136
- if unique_samples.size <= np.max([20, data.size * 0.01]):
137
- discrete_metadata[key] = ordinal_data
138
- else:
139
- warnings.warn(
140
- f"A user defined binning was not provided for {key}. "
141
- f"Using the {auto_bin_method} method to discretize the data. "
142
- "It is recommended that the user rerun and supply the desired "
143
- "bins using the continuous_factor_bins parameter.",
144
- UserWarning,
145
- )
146
- discrete_metadata[key] = _binning_function(data, auto_bin_method)
147
- else:
148
- _, discrete_metadata[key] = np.unique(data, return_inverse=True)
149
-
150
- # splitting out the dictionaries into the keys and values
151
- discrete_factor_names = list(discrete_metadata.keys())
152
- discrete_data = np.stack(list(discrete_metadata.values()), axis=-1)
153
- continuous_factor_names = list(continuous_metadata.keys())
154
- continuous_data = np.stack(list(continuous_metadata.values()), axis=-1) if continuous_metadata else None
155
- total_num_factors = len(discrete_factor_names + continuous_factor_names) + 1
156
-
157
- return MetadataOutput(
158
- discrete_factor_names,
159
- discrete_data,
160
- continuous_factor_names,
161
- continuous_data,
162
- numerical_labels,
163
- unique_classes,
164
- total_num_factors,
165
- )
166
-
167
-
168
- def _user_defined_bin(data: list[Any] | NDArray[Any], binning: int | list[tuple[TNum, TNum]]) -> NDArray[np.intp]:
169
- """
170
- Digitizes a list of values into a given number of bins.
171
-
172
- Parameters
173
- ----------
174
- data : list | NDArray
175
- The values to be digitized.
176
- binning : int | list[tuple[TNum, TNum]]
177
- The number of bins for the discrete values that data will be digitized into.
178
-
179
- Returns
180
- -------
181
- NDArray[np.intp]
182
- The digitized values
183
- """
184
-
185
- if not np.all([np.issubdtype(type(n), np.number) for n in data]):
186
- raise TypeError(
187
- "Encountered a data value with non-numeric type when digitizing a factor. "
188
- "Ensure all occurrences of continuous factors are numeric types."
189
- )
190
- if type(binning) is int:
191
- _, bin_edges = np.histogram(data, bins=binning)
192
- bin_edges[-1] = np.inf
193
- bin_edges[0] = -np.inf
194
- else:
195
- bin_edges = binning
196
- return np.digitize(data, bin_edges)
197
-
198
-
199
- def _binning_function(data: NDArray[Any], bin_method: str) -> NDArray[np.int_]:
200
- """
201
- Bins continuous data through either equal width bins, equal amounts in each bin, or by clusters.
202
- """
203
- if bin_method == "clusters":
204
- # bin_edges = _binning_by_clusters(data)
205
- warnings.warn(
206
- "Binning by clusters is currently unavailable until changes to the clustering function go through.",
207
- UserWarning,
208
- )
209
- bin_method = "uniform_width"
210
-
211
- if bin_method != "clusters":
212
- counts, bin_edges = np.histogram(data, bins="auto")
213
- n_bins = counts.size
214
- if counts[counts > 0].min() < 10:
215
- for _ in range(20):
216
- n_bins -= 1
217
- counts, bin_edges = np.histogram(data, bins=n_bins)
218
- if counts[counts > 0].min() >= 10 or n_bins < 2:
219
- break
220
-
221
- if bin_method == "uniform_count":
222
- quantiles = np.linspace(0, 100, n_bins + 1)
223
- bin_edges = np.asarray(np.percentile(data, quantiles))
224
-
225
- bin_edges[0] = -np.inf # type: ignore # until the clusters speed up is merged
226
- bin_edges[-1] = np.inf # type: ignore # and the _binning_by_clusters can be uncommented
227
- return np.digitize(data, bin_edges) # type: ignore
228
-
229
-
230
- def _is_continuous(data: NDArray[np.number], image_indicies: NDArray[np.number]) -> bool:
231
- """
232
- Determines whether the data is continuous or discrete using the Wasserstein distance.
233
-
234
- Given a 1D sample, we consider the intervals between adjacent points. For a continuous distribution,
235
- a point is equally likely to lie anywhere in the interval bounded by its two neighbors. Furthermore,
236
- we can put all "between neighbor" locations on the same scale of 0 to 1 by subtracting the smaller
237
- neighbor and dividing out the length of the interval. (Duplicates are either assigned to zero or
238
- ignored, depending on context). These normalized locations will be much more uniformly distributed
239
- for continuous data than for discrete, and this gives us a way to distinguish them. Call this the
240
- Normalized Near Neighbor distribution (NNN), defined on the interval [0,1].
241
-
242
- The Wasserstein distance is available in scipy.stats.wasserstein_distance. We can use it to measure
243
- how close the NNN is to a uniform distribution over [0,1]. We found that as long as a sample has at
244
- least 20 points, and furthermore at least half as many points as there are discrete values, we can
245
- reliably distinguish discrete from continuous samples by testing that the Wasserstein distance
246
- measured from a uniform distribution is greater or less than 0.054, respectively.
247
- """
248
- # Check if the metadata is image specific
249
- _, data_indicies_unsorted = np.unique(data, return_index=True)
250
- if data_indicies_unsorted.size == image_indicies.size:
251
- data_indicies = np.sort(data_indicies_unsorted)
252
- if (data_indicies == image_indicies).all():
253
- data = data[data_indicies]
254
-
255
- # OLD METHOD
256
- # uvals = np.unique(data)
257
- # pct_unique = uvals.size / data.size
258
- # return pct_unique < threshold
259
-
260
- n_examples = len(data)
261
-
262
- if n_examples < CONTINUOUS_MIN_SAMPLE_SIZE:
263
- warnings.warn(
264
- f"All samples look discrete with so few data points (< {CONTINUOUS_MIN_SAMPLE_SIZE})", UserWarning
265
- )
266
- return False
267
-
268
- # Require at least 3 unique values before bothering with NNN
269
- xu = np.unique(data, axis=None)
270
- if xu.size < 3:
271
- return False
272
-
273
- Xs = np.sort(data)
274
-
275
- X0, X1 = Xs[0:-2], Xs[2:] # left and right neighbors
276
-
277
- dx = np.zeros(n_examples - 2) # no dx at end points
278
- gtz = (X1 - X0) > 0 # check for dups; dx will be zero for them
279
- dx[np.logical_not(gtz)] = 0.0
280
-
281
- dx[gtz] = (Xs[1:-1] - X0)[gtz] / (X1 - X0)[gtz] # the core idea: dx is NNN samples.
282
-
283
- shift = wd(dx, np.linspace(0, 1, dx.size)) # how far is dx from uniform, for this feature?
284
-
285
- return shift < DISCRETE_MIN_WD # if NNN is close enough to uniform, consider the sample continuous.
dataeval/utils/gmm.py DELETED
@@ -1,26 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Generic, TypeVar
3
-
4
- TGMMData = TypeVar("TGMMData")
5
-
6
-
7
- @dataclass
8
- class GaussianMixtureModelParams(Generic[TGMMData]):
9
- """
10
- phi : TGMMData
11
- Mixture component distribution weights.
12
- mu : TGMMData
13
- Mixture means.
14
- cov : TGMMData
15
- Mixture covariance.
16
- L : TGMMData
17
- Cholesky decomposition of `cov`.
18
- log_det_cov : TGMMData
19
- Log of the determinant of `cov`.
20
- """
21
-
22
- phi: TGMMData
23
- mu: TGMMData
24
- cov: TGMMData
25
- L: TGMMData
26
- log_det_cov: TGMMData
@@ -1,120 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: dataeval
3
- Version: 0.74.2
4
- Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
- Home-page: https://dataeval.ai/
6
- License: MIT
7
- Author: Andrew Weng
8
- Author-email: andrew.weng@ariacoustics.com
9
- Maintainer: ARiA
10
- Maintainer-email: dataeval@ariacoustics.com
11
- Requires-Python: >=3.9,<3.13
12
- Classifier: Development Status :: 4 - Beta
13
- Classifier: Intended Audience :: Science/Research
14
- Classifier: License :: OSI Approved :: MIT License
15
- Classifier: Operating System :: OS Independent
16
- Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.9
18
- Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3.11
20
- Classifier: Programming Language :: Python :: 3.12
21
- Classifier: Programming Language :: Python :: 3 :: Only
22
- Classifier: Topic :: Scientific/Engineering
23
- Provides-Extra: all
24
- Provides-Extra: torch
25
- Requires-Dist: matplotlib ; extra == "all"
26
- Requires-Dist: numpy (>=1.24.3)
27
- Requires-Dist: pillow (>=10.3.0)
28
- Requires-Dist: scikit-learn (>=1.5.0)
29
- Requires-Dist: scipy (>=1.10)
30
- Requires-Dist: torch (>=2.2.0) ; extra == "torch" or extra == "all"
31
- Requires-Dist: torchvision (>=0.17.0) ; extra == "torch" or extra == "all"
32
- Requires-Dist: tqdm
33
- Requires-Dist: typing-extensions (>=4.12) ; python_version >= "3.9" and python_version < "3.10"
34
- Requires-Dist: xxhash (>=3.3)
35
- Project-URL: Documentation, https://dataeval.readthedocs.io/
36
- Project-URL: Repository, https://github.com/aria-ml/dataeval/
37
- Description-Content-Type: text/markdown
38
-
39
- # DataEval
40
-
41
- ## About DataEval
42
-
43
- DataEval focuses on characterizing image data and its impact on model performance across Classification and object-detection tasks.
44
-
45
- <!-- start about -->
46
-
47
- **Model-agnostic metrics that bound real-world performance**
48
- - relevance/completeness/coverage
49
- - metafeatures (data complexity)
50
-
51
- **Model-specific metrics that guide model selection and training**
52
- - dataset sufficiency
53
- - data/model complexity mismatch
54
-
55
- **Metrics for post-deployment monitoring of data with bounds on model performance to guide retraining**
56
- - dataset-shift metrics
57
- - model performance bounds under covariate shift
58
- - guidance on sampling to assess model error and model retraining
59
-
60
- <!-- end about -->
61
-
62
- ## Getting Started
63
-
64
- ### Requirements
65
- - Python 3.9-3.11
66
-
67
- ### Installing DataEval
68
-
69
- You can install DataEval directly from pypi.org using the following command. The optional dependencies of DataEval are `torch`, `tensorflow` and `all`. Using `torch` enables Sufficiency metrics, and `tensorflow` enables OOD Detection.
70
-
71
- ```
72
- pip install dataeval[all]
73
- ```
74
-
75
- ### Installing DataEval in Conda/Mamba
76
-
77
- DataEval can be installed in a Conda/Mamba environment using the provided `environment.yaml` file. As some dependencies
78
- are installed from the `pytorch` channel, the channel is specified in the below example.
79
-
80
- ```
81
- micromamba create -f environment\environment.yaml -c pytorch
82
- ```
83
-
84
- ### Installing DataEval from GitHub
85
-
86
- To install DataEval from source locally on Ubuntu, you will need `git-lfs` to download larger, binary source files and `poetry` for project dependency management.
87
-
88
- ```
89
- sudo apt-get install git-lfs
90
- pip install poetry
91
- ```
92
-
93
- Pull the source down and change to the DataEval project directory.
94
- ```
95
- git clone https://github.com/aria-ml/dataeval.git
96
- cd dataeval
97
- ```
98
-
99
-
100
-
101
- Install DataEval with optional dependencies for development.
102
- ```
103
- poetry install --all-extras --with dev
104
- ```
105
-
106
- Now that DataEval is installed, you can run commands in the poetry virtual environment by prefixing shell commands with `poetry run`, or activate the virtual environment directly in the shell.
107
- ```
108
- poetry shell
109
- ```
110
-
111
- ### Documentation and Tutorials
112
- For more ideas on getting started using DataEval in your workflow, additional information and tutorials are in our Sphinx documentation hosted on [Read the Docs](https://dataeval.readthedocs.io/).
113
-
114
- ## Attribution
115
- This project uses code from the [Alibi-Detect](https://github.com/SeldonIO/alibi-detect) python library developed by SeldonIO. Additional documentation from the developers are also available [here](https://docs.seldon.io/projects/alibi-detect/en/stable/).
116
-
117
- ## POCs
118
- - **POC**: Scott Swan @scott.swan
119
- - **DPOC**: Andrew Weng @aweng
120
-
@@ -1,66 +0,0 @@
1
- dataeval/__init__.py,sha256=w_On8sJ5o_f8PboMo6LLErdFSqDAQ1Jg_e0mcp-5FRU,959
2
- dataeval/detectors/__init__.py,sha256=Y-0bbyWyuMvZU80bCx6WPt3IV_r2hu9ymzpA8uzMqoI,206
3
- dataeval/detectors/drift/__init__.py,sha256=BSXm21y7cAawHep-ZldCJ5HOvzYjPzYGKGrmoEs3i0E,737
4
- dataeval/detectors/drift/base.py,sha256=QDGHMu1WADD-38MEIOwjQMEQM3DE7B0yFHO3hsMbV-E,14481
5
- dataeval/detectors/drift/cvm.py,sha256=kc59w2_wtxFGNnLcaJRvX5v_38gPXiebSGNiFVdunEQ,4142
6
- dataeval/detectors/drift/ks.py,sha256=gcpe1WIQeNeZdLYkdMZCFLXUp1bHMQUxwJE6-RLVOXs,4229
7
- dataeval/detectors/drift/mmd.py,sha256=C0FX5v9ZJzmKNYEcYUaC7sDtMpJ2dZpwikNDu-AEWiI,7584
8
- dataeval/detectors/drift/torch.py,sha256=igEQ2DV9JmcpTdUKCOHBi5LxtoNeCAslJS2Ldulg1hw,7585
9
- dataeval/detectors/drift/uncertainty.py,sha256=Xz2yzJjtJfw1vLag234jwRvaa_HK36nMajGx8bQaNRs,5322
10
- dataeval/detectors/drift/updates.py,sha256=UJ0z5hlunRi7twnkLABfdJG3tT2EqX4y9IGx8_USYvo,1780
11
- dataeval/detectors/linters/__init__.py,sha256=BvpaB1RUpkEhhXk3Mqi5NYoOcJKZRFSBOJCmQOIfYRU,483
12
- dataeval/detectors/linters/clusterer.py,sha256=hK-ak02GaxwWuufesZMKDsvoE5fMdXO7UWsLiK8hfY0,21008
13
- dataeval/detectors/linters/duplicates.py,sha256=2bmPTFqoefeiAQV9y4CGlHV_mJNrysJSEFLXLd2DO4I,5661
14
- dataeval/detectors/linters/merged_stats.py,sha256=X-bDTwjyR8RuVmzxLaHZmQ5nI3oOWvsqVlitdSncapk,1355
15
- dataeval/detectors/linters/outliers.py,sha256=X48bzTfTr1LqC6WKVKBRfvpjcQRgmb93cNLT7Oipe3M,10113
16
- dataeval/detectors/ood/__init__.py,sha256=-D4Fq-ysFylNNMqjHG1ALbB9qBCm_UinkCAgsK9HGg0,408
17
- dataeval/detectors/ood/ae_torch.py,sha256=pO9w5221bXR9lEBkE7oakXeE7PXUUR--xcTpmHvOCSk,2142
18
- dataeval/detectors/ood/base.py,sha256=UzcDbXl8Gv43VFzjrOegTnKSIoEYmfDP7fAySeWyWPw,6955
19
- dataeval/detectors/ood/base_torch.py,sha256=yFbSfQsBMwZeVf8mrixmkZYBGChhV5oAHtkgzWnMzsA,3405
20
- dataeval/detectors/ood/metadata_ks_compare.py,sha256=LNDNWGEDKTW8_-djgmK53sn9EZzzXq1Sgwc47k0QI-Y,5380
21
- dataeval/detectors/ood/metadata_least_likely.py,sha256=nxMCXUOjOfWHDTGT2SLE7OYBCydRq8zHLd8t17k7hMM,5193
22
- dataeval/detectors/ood/metadata_ood_mi.py,sha256=KLay2BmgHrStBV92VpIs_B1yEfQKllsMTgzOQEng01I,4065
23
- dataeval/interop.py,sha256=5lACbR7bZYGCagiwbXzAWvWeHRj8kWBmsTC9oEjFh78,2249
24
- dataeval/logging.py,sha256=uGxXPqGpn5guQjuHtm25rzILaz7nCQUsy2o7tFo91OI,343
25
- dataeval/metrics/__init__.py,sha256=fPBNLd-T6mCErZBBJrxWmXIL0jCk7fNUYIcNEBkMa80,238
26
- dataeval/metrics/bias/__init__.py,sha256=dYiPHenS8J7pgRMMW2jNkTBmTbPoYTxT04fZu9PFats,747
27
- dataeval/metrics/bias/balance.py,sha256=_TZEe17AT-qOvPp-QFrQfTqNwh8uVVCYjC4Sv6JBx9o,9118
28
- dataeval/metrics/bias/coverage.py,sha256=o65_IgrWSlGnYeYZFABjwKaxq09uqyy5esHJM67PJ-k,4528
29
- dataeval/metrics/bias/diversity.py,sha256=WL1NbZiRrv0SIq97FY3womZNCSl_EBMVlBWQZAUtjk8,7701
30
- dataeval/metrics/bias/metadata_preprocessing.py,sha256=ekUFiirkmaHDiH7nJjkNpiUQD7OolAPhHorjLxpXv_Y,12248
31
- dataeval/metrics/bias/metadata_utils.py,sha256=HmTjlRRTdM9566oKUDDdVMJ8luss4DYykFOiS2FQzhM,6558
32
- dataeval/metrics/bias/parity.py,sha256=hnA7qQH4Uy3tl_krluZ9BPD5zYjjagUxZt2fEiIa2yE,12745
33
- dataeval/metrics/estimators/__init__.py,sha256=O6ocxJq8XDkfJWwXeJnnnzbOyRnFPKF4kTIVTTZYOA8,380
34
- dataeval/metrics/estimators/ber.py,sha256=fs3_e9pgu7I50QIALWtF2aidkBZhTCKVE2pA7PyB5Go,5019
35
- dataeval/metrics/estimators/divergence.py,sha256=r_SKSurf1TdI5E1ivENqDnz8cQ3_sxVGKAqmF9cqcT4,4275
36
- dataeval/metrics/estimators/uap.py,sha256=Aw5ReoWNK73Tq96r__qN_-cvHrELauqtDX3Af_QxX4s,2157
37
- dataeval/metrics/stats/__init__.py,sha256=igLRaAt1nX6yRwC4xI0zNPBADi3u7EsSxWP3OZ8AqcU,1086
38
- dataeval/metrics/stats/base.py,sha256=_C05KUAuDrfX3N-19o25V3vmXr0-45A5fc57cXyV8qs,12161
39
- dataeval/metrics/stats/boxratiostats.py,sha256=bZunY-b8Y2IQqHlTusQN77ujLOHftogEQIARDpdVv6A,6463
40
- dataeval/metrics/stats/datasetstats.py,sha256=rZUDiciHwEpnXmkI8-uJNiYwUuTL9ssZMKMx73hVX-Y,6219
41
- dataeval/metrics/stats/dimensionstats.py,sha256=xITgQF_oomb6Ty_dJcbT3ARGGNp4QRcYSgnkjB4f-YE,4054
42
- dataeval/metrics/stats/hashstats.py,sha256=vxw_K74EJM9CZy-EV617vdrysFO8nEspVWqIYsIHC-c,4958
43
- dataeval/metrics/stats/labelstats.py,sha256=K0hJTphMe7htSjyss8GPtKDiHepTuU60_hX0xRA-uAg,4096
44
- dataeval/metrics/stats/pixelstats.py,sha256=2zr9i3GLNx1i_SCtbfdtZNxXBEc_9wCe4qDpmXLVbKY,4576
45
- dataeval/metrics/stats/visualstats.py,sha256=vLIC4sMo796axWl-4e4RzT33ll-_6ki54Dirn3V-EL8,4948
46
- dataeval/output.py,sha256=hR5TJ67f7FgrZO9Du46aw-jvRpMjOimSgJSau4ZNK44,3565
47
- dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- dataeval/utils/__init__.py,sha256=z7HxSijjycey-rGdQkgVOdpvT0oO2pKAuT4uYyxYGMs,555
49
- dataeval/utils/gmm.py,sha256=YuLsJKsVWgH_wHr1u_hSRH5Yeexdj8exht8h99L7bLo,561
50
- dataeval/utils/image.py,sha256=KgC_1nW__nGN5q6bVZNvG4U_qIBdjcPATz9qe8f2XuA,1928
51
- dataeval/utils/metadata.py,sha256=0A--iru0zEmi044mKz5P35q69KrI30yoiRSlvs7TSdQ,9418
52
- dataeval/utils/shared.py,sha256=xvF3VLfyheVwJtdtDrneOobkKf7t-JTmf_w91FWXmqo,3616
53
- dataeval/utils/split_dataset.py,sha256=KYIl2ueLN0BeBoEvbUP5FdwVcMYW_l-ES1nQf_zKpQA,18776
54
- dataeval/utils/torch/__init__.py,sha256=lpkqfgyARUxgrV94cZESQv8PIP2p-UnwItZ_wIr0XzQ,675
55
- dataeval/utils/torch/blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
56
- dataeval/utils/torch/datasets.py,sha256=10elNgLuH_FDX_CHE3y2Z215JN4-PQovQm5brcIJOeM,15021
57
- dataeval/utils/torch/gmm.py,sha256=VbLlUQohwToApT493_tjQBWy2UM5R-3ppS9Dp-eP7BA,3240
58
- dataeval/utils/torch/models.py,sha256=sdGeo7a8vshCTGA4lYyVxxb_aDWUlxdtIVxrddS-_ls,8542
59
- dataeval/utils/torch/trainer.py,sha256=8BEXr6xtk-CHJTcNxOBnWgkFWfJUAiBy28cEdBhLMRU,7883
60
- dataeval/utils/torch/utils.py,sha256=nWRcT6z6DbFVrL1RyxCOX3DPoCrv9G0B-VI_9LdGCQQ,5784
61
- dataeval/workflows/__init__.py,sha256=ef1MiVL5IuhlDXXbwsiAfafhnr7tD3TXF9GRusy9_O8,290
62
- dataeval/workflows/sufficiency.py,sha256=v9AV3BZT0NW-zD2VNIL_5aWspvoscrxRIUKcUdpy7HI,18540
63
- dataeval-0.74.2.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
64
- dataeval-0.74.2.dist-info/METADATA,sha256=Rcnn55cRPZ2JZ1jn8YamuVDxmQVDKEItK4oqZyAYkHM,4298
65
- dataeval-0.74.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
66
- dataeval-0.74.2.dist-info/RECORD,,