dataeval 0.61.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. dataeval/__init__.py +18 -0
  2. dataeval/_internal/detectors/__init__.py +0 -0
  3. dataeval/_internal/detectors/clusterer.py +469 -0
  4. dataeval/_internal/detectors/drift/__init__.py +0 -0
  5. dataeval/_internal/detectors/drift/base.py +265 -0
  6. dataeval/_internal/detectors/drift/cvm.py +97 -0
  7. dataeval/_internal/detectors/drift/ks.py +100 -0
  8. dataeval/_internal/detectors/drift/mmd.py +166 -0
  9. dataeval/_internal/detectors/drift/torch.py +310 -0
  10. dataeval/_internal/detectors/drift/uncertainty.py +149 -0
  11. dataeval/_internal/detectors/duplicates.py +49 -0
  12. dataeval/_internal/detectors/linter.py +78 -0
  13. dataeval/_internal/detectors/ood/__init__.py +0 -0
  14. dataeval/_internal/detectors/ood/ae.py +77 -0
  15. dataeval/_internal/detectors/ood/aegmm.py +69 -0
  16. dataeval/_internal/detectors/ood/base.py +199 -0
  17. dataeval/_internal/detectors/ood/llr.py +284 -0
  18. dataeval/_internal/detectors/ood/vae.py +86 -0
  19. dataeval/_internal/detectors/ood/vaegmm.py +79 -0
  20. dataeval/_internal/flags.py +47 -0
  21. dataeval/_internal/metrics/__init__.py +0 -0
  22. dataeval/_internal/metrics/base.py +92 -0
  23. dataeval/_internal/metrics/ber.py +124 -0
  24. dataeval/_internal/metrics/coverage.py +80 -0
  25. dataeval/_internal/metrics/divergence.py +94 -0
  26. dataeval/_internal/metrics/hash.py +79 -0
  27. dataeval/_internal/metrics/parity.py +180 -0
  28. dataeval/_internal/metrics/stats.py +332 -0
  29. dataeval/_internal/metrics/uap.py +45 -0
  30. dataeval/_internal/metrics/utils.py +158 -0
  31. dataeval/_internal/models/__init__.py +0 -0
  32. dataeval/_internal/models/pytorch/__init__.py +0 -0
  33. dataeval/_internal/models/pytorch/autoencoder.py +202 -0
  34. dataeval/_internal/models/pytorch/blocks.py +46 -0
  35. dataeval/_internal/models/pytorch/utils.py +67 -0
  36. dataeval/_internal/models/tensorflow/__init__.py +0 -0
  37. dataeval/_internal/models/tensorflow/autoencoder.py +317 -0
  38. dataeval/_internal/models/tensorflow/gmm.py +115 -0
  39. dataeval/_internal/models/tensorflow/losses.py +107 -0
  40. dataeval/_internal/models/tensorflow/pixelcnn.py +1106 -0
  41. dataeval/_internal/models/tensorflow/trainer.py +102 -0
  42. dataeval/_internal/models/tensorflow/utils.py +254 -0
  43. dataeval/_internal/workflows/sufficiency.py +555 -0
  44. dataeval/detectors/__init__.py +29 -0
  45. dataeval/flags/__init__.py +3 -0
  46. dataeval/metrics/__init__.py +7 -0
  47. dataeval/models/__init__.py +15 -0
  48. dataeval/models/tensorflow/__init__.py +6 -0
  49. dataeval/models/torch/__init__.py +8 -0
  50. dataeval/py.typed +0 -0
  51. dataeval/workflows/__init__.py +8 -0
  52. dataeval-0.61.0.dist-info/LICENSE.txt +21 -0
  53. dataeval-0.61.0.dist-info/METADATA +114 -0
  54. dataeval-0.61.0.dist-info/RECORD +55 -0
  55. dataeval-0.61.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,555 @@
1
+ import warnings
2
+ from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast
3
+
4
+ import matplotlib.pyplot as plt
5
+ import numpy as np
6
+ import torch
7
+ import torch.nn as nn
8
+ from matplotlib.figure import Figure
9
+ from scipy.optimize import basinhopping
10
+ from torch.utils.data import Dataset
11
+
12
+ from dataeval._internal.metrics.base import EvaluateMixin
13
+
14
+ STEPS_KEY = "_STEPS_"
15
+ PARAMS_KEY = "_CURVE_PARAMS_"
16
+
17
+ SufficiencyOutput = Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
18
+
19
+
20
+ def f_out(n_i: np.ndarray, x: np.ndarray) -> np.ndarray:
21
+ """
22
+ Calculates the line of best fit based on its free parameters
23
+
24
+ Parameters
25
+ ----------
26
+ n_i : np.ndarray
27
+ Array of sample sizes
28
+ x : np.ndarray
29
+ Array of inverse power curve coefficients
30
+
31
+ Returns
32
+ -------
33
+ np.ndarray
34
+ Data points for the line of best fit
35
+ """
36
+ return x[0] * n_i ** (-x[1]) + x[2]
37
+
38
+
39
+ def f_inv_out(y_i: np.ndarray, x: np.ndarray) -> np.ndarray:
40
+ """
41
+ Inverse function for f_out()
42
+
43
+ Parameters
44
+ ----------
45
+ y_i : np.ndarray
46
+ Data points for the line of best fit
47
+ x : np.ndarray
48
+ Array of inverse power curve coefficients
49
+
50
+ Returns
51
+ -------
52
+ np.ndarray
53
+ Array of sample sizes
54
+ """
55
+ n_i = ((y_i - x[2]) / x[0]) ** (-1 / x[1])
56
+ return n_i
57
+
58
+
59
+ def calc_params(p_i: np.ndarray, n_i: np.ndarray, niter: int) -> np.ndarray:
60
+ """
61
+ Retrieves the inverse power curve coefficients for the line of best fit.
62
+ Global minimization is done via basin hopping. More info on this algorithm
63
+ can be found here: https://arxiv.org/abs/cond-mat/9803344 .
64
+
65
+ Parameters
66
+ ----------
67
+ p_i : np.ndarray
68
+ Array of corresponding losses
69
+ n_i : np.ndarray
70
+ Array of sample sizes
71
+ niter : int
72
+ Number of iterations to perform in the basin-hopping
73
+ numerical process to curve-fit p_i
74
+
75
+ Returns
76
+ -------
77
+ np.ndarray
78
+ Array of parameters to recreate line of best fit
79
+ """
80
+
81
+ def is_valid(f_new, x_new, f_old, x_old):
82
+ return f_new != np.nan
83
+
84
+ def f(x):
85
+ try:
86
+ return np.sum(np.square(p_i - f_out(n_i, x)))
87
+ except RuntimeWarning:
88
+ return np.nan
89
+
90
+ with warnings.catch_warnings():
91
+ warnings.filterwarnings("error", category=RuntimeWarning)
92
+ res = basinhopping(
93
+ f,
94
+ np.array([0.5, 0.5, 0.1]),
95
+ niter=niter,
96
+ stepsize=1.0,
97
+ minimizer_kwargs={"method": "Powell"},
98
+ accept_test=is_valid,
99
+ niter_success=200,
100
+ )
101
+ return res.x
102
+
103
+
104
+ def reset_parameters(model: nn.Module):
105
+ """
106
+ Re-initializes each layer in the model using
107
+ the layer's defined weight_init function
108
+ """
109
+
110
+ @torch.no_grad()
111
+ def weight_reset(m: nn.Module):
112
+ # Check if the current module has reset_parameters
113
+ reset_parameters = getattr(m, "reset_parameters", None)
114
+ if callable(reset_parameters):
115
+ m.reset_parameters() # type: ignore
116
+
117
+ # Applies fn recursively to every submodule see:
118
+ # https://pytorch.org/docs/stable/generated/torch.nn.Module.html
119
+ return model.apply(fn=weight_reset)
120
+
121
+
122
+ def validate_dataset_len(dataset: Dataset) -> int:
123
+ if not hasattr(dataset, "__len__"):
124
+ raise TypeError("Must provide a dataset with a length attribute")
125
+ length: int = dataset.__len__() # type: ignore
126
+ if length <= 0:
127
+ raise ValueError("Dataset length must be greater than 0")
128
+ return length
129
+
130
+
131
+ def validate_output(data: SufficiencyOutput):
132
+ """Ensure the sufficiency data used is not malformed"""
133
+ if not all(key in data for key in [STEPS_KEY, PARAMS_KEY]):
134
+ raise KeyError(f"{STEPS_KEY} and {PARAMS_KEY} are required keys for Sufficiency output.")
135
+ c = len(data[STEPS_KEY])
136
+ for m, v in data.items():
137
+ if m in [STEPS_KEY, PARAMS_KEY]:
138
+ continue
139
+ v = cast(np.ndarray, v)
140
+ c_v = v.shape[1] if v.ndim > 1 else len(v)
141
+ if c != c_v:
142
+ raise ValueError("f{m} does not contain the expected number ({c}) of data points.")
143
+
144
+
145
+ def project_steps(params: np.ndarray, projection: np.ndarray) -> np.ndarray:
146
+ """Projects the measures for each value of X
147
+
148
+ Parameters
149
+ ----------
150
+ params : np.ndarray
151
+ Inverse power curve coefficients used to calculate projection
152
+ projection : np.ndarray
153
+ Steps to extrapolate
154
+
155
+ Returns
156
+ -------
157
+ np.ndarray
158
+ Extrapolated measure values at each projection step
159
+
160
+ """
161
+ return 1 - f_out(projection, params)
162
+
163
+
164
+ def inv_project_steps(params: np.ndarray, targets: np.ndarray) -> np.ndarray:
165
+ """Inverse function for project_steps()
166
+
167
+ Parameters
168
+ ----------
169
+ params : np.ndarray
170
+ Inverse power curve coefficients used to calculate projection
171
+ targets : np.ndarray
172
+ Desired measure values
173
+
174
+ Returns
175
+ -------
176
+ np.ndarray
177
+ Array of sample sizes, or 0 if overflow
178
+ """
179
+ steps = f_inv_out(1 - np.array(targets), params)
180
+ steps[np.isnan(steps)] = 0
181
+ return np.ceil(steps).astype(np.int64)
182
+
183
+
184
+ def get_curve_params(measures: Dict[str, np.ndarray], ranges: np.ndarray, niter: int) -> Dict[str, np.ndarray]:
185
+ """Calculates and aggregates parameters for both single and multi-class metrics"""
186
+ output = {}
187
+ for name, measure in measures.items():
188
+ measure = cast(np.ndarray, measure)
189
+ if measure.ndim > 1:
190
+ result = []
191
+ for value in measure:
192
+ result.append(calc_params(1 - value, ranges, niter))
193
+ output[name] = np.array(result)
194
+ else:
195
+ output[name] = calc_params(1 - measure, ranges, niter)
196
+ return output
197
+
198
+
199
+ def plot_measure(
200
+ name: str,
201
+ steps: np.ndarray,
202
+ measure: np.ndarray,
203
+ params: np.ndarray,
204
+ projection: np.ndarray,
205
+ ) -> Figure:
206
+ fig = plt.figure()
207
+ fig = cast(Figure, fig)
208
+ fig.tight_layout()
209
+
210
+ ax = fig.add_subplot(111)
211
+
212
+ ax.set_title(f"{name} Sufficiency")
213
+ ax.set_ylabel(f"{name}")
214
+ ax.set_xlabel("Steps")
215
+
216
+ # Plot measure over each step
217
+ ax.scatter(steps, measure, label=f"Model Results ({name})", s=15, c="black")
218
+
219
+ # Plot extrapolation
220
+ ax.plot(
221
+ projection,
222
+ project_steps(params, projection),
223
+ linestyle="dashed",
224
+ label=f"Potential Model Results ({name})",
225
+ )
226
+
227
+ ax.legend()
228
+ return fig
229
+
230
+
231
+ class Sufficiency(EvaluateMixin):
232
+ """
233
+ Project dataset sufficiency using given a model and evaluation criteria
234
+
235
+ Parameters
236
+ ----------
237
+ model : nn.Module
238
+ Model that will be trained for each subset of data
239
+ train_ds : Dataset
240
+ Full training data that will be split for each run
241
+ test_ds : Dataset
242
+ Data that will be used for every run's evaluation
243
+ train_fn : Callable[[nn.Module, Dataset, Sequence[int]], None]
244
+ Function which takes a model (torch.nn.Module), a dataset
245
+ (torch.utils.data.Dataset), indices to train on and executes model
246
+ training against the data.
247
+ eval_fn : Callable[[nn.Module, Dataset], Dict[str, float]]
248
+ Function which takes a model (torch.nn.Module), a dataset
249
+ (torch.utils.data.Dataset) and returns a dictionary of metric
250
+ values (Dict[str, float]) which is used to assess model performance
251
+ given the model and data.
252
+ runs : int, default 1
253
+ Number of models to run over all subsets
254
+ substeps : int, default 5
255
+ Total number of dataset partitions that each model will train on
256
+ train_kwargs : Dict[str, Any] | None, default None
257
+ Additional arguments required for custom training function
258
+ eval_kwargs : Dict[str, Any] | None, default None
259
+ Additional arguments required for custom evaluation function
260
+ """
261
+
262
+ def __init__(
263
+ self,
264
+ model: nn.Module,
265
+ train_ds: Dataset,
266
+ test_ds: Dataset,
267
+ train_fn: Callable[[nn.Module, Dataset, Sequence[int]], None],
268
+ eval_fn: Callable[[nn.Module, Dataset], Union[Dict[str, float], Dict[str, np.ndarray]]],
269
+ runs: int = 1,
270
+ substeps: int = 5,
271
+ train_kwargs: Optional[Dict[str, Any]] = None,
272
+ eval_kwargs: Optional[Dict[str, Any]] = None,
273
+ ):
274
+ self.model = model
275
+ self.train_ds = train_ds
276
+ self.test_ds = test_ds
277
+ self.train_fn = train_fn
278
+ self.eval_fn = eval_fn
279
+ self.runs = runs
280
+ self.substeps = substeps
281
+ self.train_kwargs = train_kwargs
282
+ self.eval_kwargs = eval_kwargs
283
+
284
+ @property
285
+ def train_ds(self):
286
+ return self._train_ds
287
+
288
+ @train_ds.setter
289
+ def train_ds(self, value: Dataset):
290
+ self._train_ds = value
291
+ self._length = validate_dataset_len(value)
292
+
293
+ @property
294
+ def test_ds(self):
295
+ return self._test_ds
296
+
297
+ @test_ds.setter
298
+ def test_ds(self, value: Dataset):
299
+ validate_dataset_len(value)
300
+ self._test_ds = value
301
+
302
+ @property
303
+ def train_fn(self) -> Callable[[nn.Module, Dataset, Sequence[int]], None]:
304
+ return self._train_fn
305
+
306
+ @train_fn.setter
307
+ def train_fn(self, value: Callable[[nn.Module, Dataset, Sequence[int]], None]):
308
+ if not callable(value):
309
+ raise TypeError("Must provide a callable for train_fn.")
310
+ self._train_fn = value
311
+
312
+ @property
313
+ def eval_fn(
314
+ self,
315
+ ) -> Callable[[nn.Module, Dataset], Union[Dict[str, float], Dict[str, np.ndarray]]]:
316
+ return self._eval_fn
317
+
318
+ @eval_fn.setter
319
+ def eval_fn(
320
+ self,
321
+ value: Callable[[nn.Module, Dataset], Union[Dict[str, float], Dict[str, np.ndarray]]],
322
+ ):
323
+ if not callable(value):
324
+ raise TypeError("Must provide a callable for eval_fn.")
325
+ self._eval_fn = value
326
+
327
+ @property
328
+ def train_kwargs(self) -> Dict[str, Any]:
329
+ return self._train_kwargs
330
+
331
+ @train_kwargs.setter
332
+ def train_kwargs(self, value: Optional[Dict[str, Any]]):
333
+ self._train_kwargs = {} if value is None else value
334
+
335
+ @property
336
+ def eval_kwargs(self) -> Dict[str, Any]:
337
+ return self._eval_kwargs
338
+
339
+ @eval_kwargs.setter
340
+ def eval_kwargs(self, value: Optional[Dict[str, Any]]):
341
+ self._eval_kwargs = {} if value is None else value
342
+
343
+ def evaluate(self, eval_at: Optional[np.ndarray] = None, niter: int = 1000) -> SufficiencyOutput:
344
+ """
345
+ Creates data indices, trains models, and returns plotting data
346
+
347
+ Parameters
348
+ ----------
349
+ eval_at : Optional[np.ndarray]
350
+ Specify this to collect accuracies over a specific set of dataset lengths, rather
351
+ than letting Sufficiency internally create the lengths to evaluate at.
352
+ niter : int, default 1000
353
+ Iterations to perform when using the basin-hopping method to curve-fit measure(s).
354
+
355
+ Returns
356
+ -------
357
+ Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
358
+ Dictionary containing the average of each measure per substep
359
+ """
360
+ if eval_at is not None:
361
+ ranges = eval_at
362
+ else:
363
+ geomshape = (
364
+ 0.01 * self._length,
365
+ self._length,
366
+ self.substeps,
367
+ ) # Start, Stop, Num steps
368
+ ranges = np.geomspace(*geomshape).astype(np.int64)
369
+ substeps = len(ranges)
370
+ measures = {}
371
+
372
+ # Run each model over all indices
373
+ for _ in range(self.runs):
374
+ # Create a randomized set of indices to use
375
+ indices = np.random.randint(0, self._length, size=self._length)
376
+ # Reset the network weights to "create" an untrained model
377
+ model = reset_parameters(self.model)
378
+ # Run the model with each substep of data
379
+ for iteration, substep in enumerate(ranges):
380
+ # train on subset of train data
381
+ self.train_fn(
382
+ model,
383
+ self.train_ds,
384
+ indices[:substep].tolist(),
385
+ **self.train_kwargs,
386
+ )
387
+
388
+ # evaluate on test data
389
+ measure = self.eval_fn(model, self.test_ds, **self.eval_kwargs)
390
+
391
+ # Keep track of each measures values
392
+ for name, value in measure.items():
393
+ if name in [STEPS_KEY, PARAMS_KEY]:
394
+ raise KeyError(f"Cannot use reserved name '{name}' as a metric name.")
395
+
396
+ # Sum result into current substep iteration to be averaged later
397
+ value = np.array(value).ravel()
398
+ if name not in measures:
399
+ measures[name] = np.zeros(substeps if len(value) == 1 else (substeps, len(value)))
400
+ measures[name][iteration] += value
401
+
402
+ # The mean for each measure must be calculated before being returned
403
+ measures = {k: (v / self.runs).T for k, v in measures.items()}
404
+ params_output = get_curve_params(measures, ranges, niter)
405
+ output = {STEPS_KEY: ranges, PARAMS_KEY: params_output}
406
+ output.update(measures)
407
+ return output
408
+
409
+ @classmethod
410
+ def project(
411
+ cls,
412
+ data: SufficiencyOutput,
413
+ projection: Union[int, Sequence[int], np.ndarray],
414
+ ) -> Dict[str, np.ndarray]:
415
+ """Projects the measures for each value of X
416
+
417
+ Parameters
418
+ ----------
419
+ data : Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
420
+ Dataclass containing the average of each measure per substep
421
+ steps : Union[int, np.ndarray]
422
+ Step or steps to project
423
+ niter : int, default 200
424
+ Number of iterations to perform in the basin-hopping
425
+ numerical process to curve-fit data
426
+
427
+ Raises
428
+ ------
429
+ KeyError
430
+ If STEPS_KEY or measure is not a valid key
431
+ ValueError
432
+ If the length of data points in the measures do not match
433
+ If the steps are not int, Sequence[int] or an ndarray
434
+ """
435
+ validate_output(data)
436
+ projection = [projection] if isinstance(projection, int) else projection
437
+ projection = np.array(projection) if isinstance(projection, Sequence) else projection
438
+ if not isinstance(projection, np.ndarray):
439
+ raise ValueError("'steps' must be an int, Sequence[int] or ndarray")
440
+
441
+ output = {}
442
+ output[STEPS_KEY] = projection
443
+ for name, measures in data.items():
444
+ if name in [STEPS_KEY, PARAMS_KEY]:
445
+ continue
446
+ measures = cast(np.ndarray, measures)
447
+ if measures.ndim > 1:
448
+ result = []
449
+ for i in range(len(measures)):
450
+ projected = project_steps(data[PARAMS_KEY][name][i], projection)
451
+ result.append(projected)
452
+ output[name] = np.array(result).T
453
+ else:
454
+ output[name] = project_steps(data[PARAMS_KEY][name], projection)
455
+ return output
456
+
457
+ @classmethod
458
+ def plot(cls, data: SufficiencyOutput, class_names: Optional[Sequence[str]] = None) -> List[Figure]:
459
+ """Plotting function for data sufficiency tasks
460
+
461
+ Parameters
462
+ ----------
463
+ data : Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
464
+ Dataclass containing the average of each measure per substep
465
+
466
+ Returns
467
+ -------
468
+ List[plt.Figure]
469
+ List of Figures for each measure
470
+
471
+ Raises
472
+ ------
473
+ KeyError
474
+ If STEPS_KEY or measure is not a valid key
475
+ ValueError
476
+ If the length of data points in the measures do not match
477
+ """
478
+ validate_output(data)
479
+
480
+ # X, y data
481
+ steps = cast(np.ndarray, data[STEPS_KEY])
482
+
483
+ # Extrapolation parameters
484
+ last_X = steps[-1]
485
+ geomshape = (0.01 * last_X, last_X * 4, len(steps))
486
+ extrapolated = np.geomspace(*geomshape).astype(np.int64)
487
+
488
+ # Stores all plots
489
+ plots = []
490
+
491
+ # Create a plot for each measure on one figure
492
+ for name, measures in data.items():
493
+ if name in [STEPS_KEY, PARAMS_KEY]:
494
+ continue
495
+ measures = cast(np.ndarray, measures)
496
+ if measures.ndim > 1:
497
+ if class_names is not None and len(measures) != len(class_names):
498
+ raise IndexError("Class name count does not align with measures")
499
+ for i, measure in enumerate(measures):
500
+ class_name = str(i) if class_names is None else class_names[i]
501
+ fig = plot_measure(
502
+ f"{name}_{class_name}",
503
+ steps,
504
+ measure,
505
+ data[PARAMS_KEY][name][i],
506
+ extrapolated,
507
+ )
508
+ plots.append(fig)
509
+
510
+ else:
511
+ fig = plot_measure(name, steps, measures, data[PARAMS_KEY][name], extrapolated)
512
+ plots.append(fig)
513
+
514
+ return plots
515
+
516
+ @classmethod
517
+ def inv_project(cls, targets: Dict[str, np.ndarray], data: SufficiencyOutput) -> Dict[str, np.ndarray]:
518
+ """
519
+ Calculate training samples needed to achieve target model metric values.
520
+
521
+ Parameters
522
+ ----------
523
+ targets : Dict[str, np.ndarray]
524
+ Dictionary of target metric scores (from 0.0 to 1.0) that we want
525
+ to achieve, where the key is the name of the metric.
526
+
527
+ data : Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
528
+ Dataclass containing the average of each measure per substep
529
+
530
+ Returns
531
+ -------
532
+ Dict[str, np.ndarray]
533
+ List of the number of training samples needed to achieve each
534
+ corresponding entry in targets
535
+ """
536
+
537
+ validate_output(data)
538
+
539
+ projection = {}
540
+
541
+ for name, target in targets.items():
542
+ if name not in data:
543
+ continue
544
+
545
+ measure = cast(np.ndarray, data[name])
546
+ if measure.ndim > 1:
547
+ projection[name] = np.zeros((len(measure), len(target)))
548
+ for i in range(len(measure)):
549
+ projection[name][i] = inv_project_steps(
550
+ data[PARAMS_KEY][name][i], target[i] if target.ndim == measure.ndim else target
551
+ )
552
+ else:
553
+ projection[name] = inv_project_steps(data[PARAMS_KEY][name], target)
554
+
555
+ return projection
@@ -0,0 +1,29 @@
1
+ from importlib.util import find_spec
2
+
3
+ from dataeval._internal.detectors.clusterer import Clusterer
4
+ from dataeval._internal.detectors.drift.base import LastSeenUpdate, ReservoirSamplingUpdate
5
+ from dataeval._internal.detectors.drift.cvm import DriftCVM
6
+ from dataeval._internal.detectors.drift.ks import DriftKS
7
+ from dataeval._internal.detectors.duplicates import Duplicates
8
+ from dataeval._internal.detectors.linter import Linter
9
+
10
+ __all__ = ["Clusterer", "Duplicates", "Linter", "DriftCVM", "DriftKS", "LastSeenUpdate", "ReservoirSamplingUpdate"]
11
+
12
+ if find_spec("torch") is not None: # pragma: no cover
13
+ from dataeval._internal.detectors.drift.mmd import DriftMMD
14
+ from dataeval._internal.detectors.drift.torch import GaussianRBF, preprocess_drift
15
+ from dataeval._internal.detectors.drift.uncertainty import DriftUncertainty
16
+
17
+ __all__ += ["DriftMMD", "GaussianRBF", "DriftUncertainty", "preprocess_drift"]
18
+
19
+ if find_spec("tensorflow") is not None and find_spec("tensorflow_probability") is not None: # pragma: no cover
20
+ from dataeval._internal.detectors.ood.ae import OOD_AE
21
+ from dataeval._internal.detectors.ood.aegmm import OOD_AEGMM
22
+ from dataeval._internal.detectors.ood.base import OODScore
23
+ from dataeval._internal.detectors.ood.llr import OOD_LLR
24
+ from dataeval._internal.detectors.ood.vae import OOD_VAE
25
+ from dataeval._internal.detectors.ood.vaegmm import OOD_VAEGMM
26
+
27
+ __all__ += ["OOD_AE", "OOD_AEGMM", "OOD_LLR", "OODScore", "OOD_VAE", "OOD_VAEGMM"]
28
+
29
+ del find_spec
@@ -0,0 +1,3 @@
1
+ from dataeval._internal.flags import ImageHash, ImageProperty, ImageStatistics, ImageVisuals
2
+
3
+ __all__ = ["ImageHash", "ImageProperty", "ImageStatistics", "ImageVisuals"]
@@ -0,0 +1,7 @@
1
+ from dataeval._internal.metrics.ber import BER
2
+ from dataeval._internal.metrics.divergence import Divergence
3
+ from dataeval._internal.metrics.parity import Parity
4
+ from dataeval._internal.metrics.stats import ChannelStats, ImageStats
5
+ from dataeval._internal.metrics.uap import UAP
6
+
7
+ __all__ = ["BER", "Divergence", "Parity", "UAP", "ChannelStats", "ImageStats"]
@@ -0,0 +1,15 @@
1
+ from importlib.util import find_spec
2
+
3
+ __all__ = []
4
+
5
+ if find_spec("tensorflow") is not None: # pragma: no cover
6
+ from . import tensorflow
7
+
8
+ __all__ += ["tensorflow"]
9
+
10
+ if find_spec("torch") is not None: # pragma: no cover
11
+ from . import torch
12
+
13
+ __all__ += ["torch"]
14
+
15
+ del find_spec
@@ -0,0 +1,6 @@
1
+ from dataeval._internal.models.tensorflow.autoencoder import AE, AEGMM, VAE, VAEGMM, eucl_cosim_features
2
+ from dataeval._internal.models.tensorflow.losses import Elbo, LossGMM
3
+ from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
4
+ from dataeval._internal.models.tensorflow.utils import create_model
5
+
6
+ __all__ = ["create_model", "eucl_cosim_features", "AE", "AEGMM", "Elbo", "LossGMM", "PixelCNN", "VAE", "VAEGMM"]
@@ -0,0 +1,8 @@
1
+ from dataeval._internal.models.pytorch.autoencoder import (
2
+ AETrainer,
3
+ AriaAutoencoder,
4
+ Decoder,
5
+ Encoder,
6
+ )
7
+
8
+ __all__ = ["AETrainer", "AriaAutoencoder", "Decoder", "Encoder"]
dataeval/py.typed ADDED
File without changes
@@ -0,0 +1,8 @@
1
+ from importlib.util import find_spec
2
+
3
+ if find_spec("torch") is not None: # pragma: no cover
4
+ from dataeval._internal.workflows.sufficiency import Sufficiency
5
+
6
+ __all__ = ["Sufficiency"]
7
+
8
+ del find_spec
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 ARiA
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.