dataeval 0.72.0__py3-none-any.whl → 0.72.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. dataeval/__init__.py +4 -4
  2. dataeval/detectors/__init__.py +4 -3
  3. dataeval/detectors/drift/__init__.py +10 -11
  4. dataeval/{_internal/detectors → detectors}/drift/base.py +51 -102
  5. dataeval/{_internal/detectors → detectors}/drift/cvm.py +9 -8
  6. dataeval/{_internal/detectors → detectors}/drift/ks.py +11 -10
  7. dataeval/{_internal/detectors → detectors}/drift/mmd.py +33 -34
  8. dataeval/{_internal/detectors → detectors}/drift/torch.py +15 -13
  9. dataeval/{_internal/detectors → detectors}/drift/uncertainty.py +12 -9
  10. dataeval/detectors/drift/updates.py +61 -0
  11. dataeval/detectors/linters/__init__.py +3 -3
  12. dataeval/{_internal/detectors → detectors/linters}/clusterer.py +47 -45
  13. dataeval/{_internal/detectors → detectors/linters}/duplicates.py +20 -10
  14. dataeval/{_internal/detectors → detectors/linters}/merged_stats.py +3 -1
  15. dataeval/{_internal/detectors → detectors/linters}/outliers.py +19 -26
  16. dataeval/detectors/ood/__init__.py +8 -16
  17. dataeval/{_internal/detectors → detectors}/ood/ae.py +9 -9
  18. dataeval/{_internal/detectors → detectors}/ood/aegmm.py +10 -30
  19. dataeval/{_internal/detectors → detectors}/ood/base.py +27 -21
  20. dataeval/{_internal/detectors → detectors}/ood/llr.py +27 -23
  21. dataeval/detectors/ood/metadata_ks_compare.py +99 -0
  22. dataeval/detectors/ood/metadata_least_likely.py +119 -0
  23. dataeval/detectors/ood/metadata_ood_mi.py +92 -0
  24. dataeval/{_internal/detectors → detectors}/ood/vae.py +11 -13
  25. dataeval/{_internal/detectors → detectors}/ood/vaegmm.py +10 -32
  26. dataeval/{_internal/interop.py → interop.py} +12 -7
  27. dataeval/metrics/__init__.py +1 -1
  28. dataeval/metrics/bias/__init__.py +4 -4
  29. dataeval/{_internal/metrics → metrics/bias}/balance.py +70 -4
  30. dataeval/{_internal/metrics → metrics/bias}/coverage.py +10 -8
  31. dataeval/{_internal/metrics → metrics/bias}/diversity.py +54 -20
  32. dataeval/metrics/bias/metadata.py +275 -0
  33. dataeval/{_internal/metrics → metrics/bias}/parity.py +21 -17
  34. dataeval/metrics/estimators/__init__.py +3 -3
  35. dataeval/{_internal/metrics → metrics/estimators}/ber.py +31 -28
  36. dataeval/{_internal/metrics → metrics/estimators}/divergence.py +15 -16
  37. dataeval/{_internal/metrics → metrics/estimators}/uap.py +8 -6
  38. dataeval/metrics/stats/__init__.py +7 -7
  39. dataeval/{_internal/metrics → metrics}/stats/base.py +66 -40
  40. dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py +19 -15
  41. dataeval/{_internal/metrics → metrics}/stats/datasetstats.py +19 -17
  42. dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py +12 -10
  43. dataeval/metrics/stats/hashstats.py +156 -0
  44. dataeval/{_internal/metrics → metrics}/stats/labelstats.py +8 -6
  45. dataeval/{_internal/metrics → metrics}/stats/pixelstats.py +12 -11
  46. dataeval/{_internal/metrics → metrics}/stats/visualstats.py +14 -13
  47. dataeval/{_internal/output.py → output.py} +26 -6
  48. dataeval/utils/__init__.py +8 -4
  49. dataeval/utils/image.py +71 -0
  50. dataeval/utils/shared.py +151 -0
  51. dataeval/utils/split_dataset.py +486 -0
  52. dataeval/utils/tensorflow/__init__.py +9 -7
  53. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/autoencoder.py +64 -68
  54. dataeval/{_internal/models/tensorflow/losses.py → utils/tensorflow/_internal/loss.py} +10 -9
  55. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/pixelcnn.py +18 -22
  56. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/trainer.py +3 -1
  57. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/utils.py +18 -18
  58. dataeval/utils/tensorflow/loss/__init__.py +6 -2
  59. dataeval/utils/torch/__init__.py +7 -3
  60. dataeval/{_internal/models/pytorch → utils/torch}/blocks.py +19 -14
  61. dataeval/{_internal → utils/torch}/datasets.py +49 -43
  62. dataeval/utils/torch/models.py +138 -0
  63. dataeval/{_internal/models/pytorch/autoencoder.py → utils/torch/trainer.py} +12 -141
  64. dataeval/{_internal → utils/torch}/utils.py +3 -1
  65. dataeval/workflows/__init__.py +1 -1
  66. dataeval/{_internal/workflows → workflows}/sufficiency.py +42 -37
  67. {dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/METADATA +7 -5
  68. dataeval-0.72.2.dist-info/RECORD +72 -0
  69. dataeval/_internal/detectors/__init__.py +0 -0
  70. dataeval/_internal/detectors/drift/__init__.py +0 -0
  71. dataeval/_internal/detectors/ood/__init__.py +0 -0
  72. dataeval/_internal/metrics/__init__.py +0 -0
  73. dataeval/_internal/metrics/stats/hashstats.py +0 -75
  74. dataeval/_internal/metrics/utils.py +0 -447
  75. dataeval/_internal/models/__init__.py +0 -0
  76. dataeval/_internal/models/pytorch/__init__.py +0 -0
  77. dataeval/_internal/models/pytorch/utils.py +0 -67
  78. dataeval/_internal/models/tensorflow/__init__.py +0 -0
  79. dataeval/_internal/workflows/__init__.py +0 -0
  80. dataeval/detectors/drift/kernels/__init__.py +0 -10
  81. dataeval/detectors/drift/updates/__init__.py +0 -7
  82. dataeval/utils/tensorflow/models/__init__.py +0 -9
  83. dataeval/utils/tensorflow/recon/__init__.py +0 -3
  84. dataeval/utils/torch/datasets/__init__.py +0 -12
  85. dataeval/utils/torch/models/__init__.py +0 -11
  86. dataeval/utils/torch/trainer/__init__.py +0 -7
  87. dataeval-0.72.0.dist-info/RECORD +0 -80
  88. /dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/gmm.py +0 -0
  89. {dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/LICENSE.txt +0 -0
  90. {dataeval-0.72.0.dist-info → dataeval-0.72.2.dist-info}/WHEEL +0 -0
@@ -1,8 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
+ __all__ = ["SufficiencyOutput", "Sufficiency"]
4
+
3
5
  import warnings
4
6
  from dataclasses import dataclass
5
- from typing import Any, Callable, Iterable, Mapping, Sequence, cast
7
+ from typing import Any, Callable, Generic, Iterable, Mapping, Sequence, TypeVar, cast
6
8
 
7
9
  import matplotlib.pyplot as plt
8
10
  import numpy as np
@@ -13,8 +15,8 @@ from numpy.typing import ArrayLike, NDArray
13
15
  from scipy.optimize import basinhopping
14
16
  from torch.utils.data import Dataset
15
17
 
16
- from dataeval._internal.interop import as_numpy
17
- from dataeval._internal.output import OutputMetadata, set_metadata
18
+ from dataeval.interop import as_numpy
19
+ from dataeval.output import OutputMetadata, set_metadata
18
20
 
19
21
 
20
22
  @dataclass(frozen=True)
@@ -36,7 +38,7 @@ class SufficiencyOutput(OutputMetadata):
36
38
  params: dict[str, NDArray[np.float64]]
37
39
  measures: dict[str, NDArray[np.float64]]
38
40
 
39
- def __post_init__(self):
41
+ def __post_init__(self) -> None:
40
42
  c = len(self.steps)
41
43
  if set(self.params) != set(self.measures):
42
44
  raise ValueError("params and measures have a key mismatch")
@@ -45,7 +47,7 @@ class SufficiencyOutput(OutputMetadata):
45
47
  if c != c_v:
46
48
  raise ValueError(f"{m} does not contain the expected number ({c}) of data points.")
47
49
 
48
- @set_metadata("dataeval.workflows.SufficiencyOutput")
50
+ @set_metadata()
49
51
  def project(
50
52
  self,
51
53
  projection: int | Iterable[int],
@@ -86,7 +88,7 @@ class SufficiencyOutput(OutputMetadata):
86
88
  return SufficiencyOutput(projection, self.params, output)
87
89
 
88
90
  def plot(self, class_names: Sequence[str] | None = None) -> list[Figure]:
89
- """Plotting function for data sufficiency tasks
91
+ """Plotting function for data :term:`sufficience<Sufficiency>` tasks
90
92
 
91
93
  Parameters
92
94
  ----------
@@ -170,7 +172,7 @@ class SufficiencyOutput(OutputMetadata):
170
172
  return projection
171
173
 
172
174
 
173
- def f_out(n_i: NDArray, x: NDArray) -> NDArray:
175
+ def f_out(n_i: NDArray[Any], x: NDArray[Any]) -> NDArray[Any]:
174
176
  """
175
177
  Calculates the line of best fit based on its free parameters
176
178
 
@@ -189,7 +191,7 @@ def f_out(n_i: NDArray, x: NDArray) -> NDArray:
189
191
  return x[0] * n_i ** (-x[1]) + x[2]
190
192
 
191
193
 
192
- def f_inv_out(y_i: NDArray, x: NDArray) -> NDArray[np.uint64]:
194
+ def f_inv_out(y_i: NDArray[Any], x: NDArray[Any]) -> NDArray[np.uint64]:
193
195
  """
194
196
  Inverse function for f_out()
195
197
 
@@ -209,7 +211,7 @@ def f_inv_out(y_i: NDArray, x: NDArray) -> NDArray[np.uint64]:
209
211
  return np.asarray(n_i, dtype=np.uint64)
210
212
 
211
213
 
212
- def calc_params(p_i: NDArray, n_i: NDArray, niter: int) -> NDArray:
214
+ def calc_params(p_i: NDArray[Any], n_i: NDArray[Any], niter: int) -> NDArray[Any]:
213
215
  """
214
216
  Retrieves the inverse power curve coefficients for the line of best fit.
215
217
  Global minimization is done via basin hopping. More info on this algorithm
@@ -254,7 +256,7 @@ def calc_params(p_i: NDArray, n_i: NDArray, niter: int) -> NDArray:
254
256
  return res.x
255
257
 
256
258
 
257
- def reset_parameters(model: nn.Module):
259
+ def reset_parameters(model: nn.Module) -> nn.Module:
258
260
  """
259
261
  Re-initializes each layer in the model using
260
262
  the layer's defined weight_init function
@@ -272,7 +274,7 @@ def reset_parameters(model: nn.Module):
272
274
  return model.apply(fn=weight_reset)
273
275
 
274
276
 
275
- def validate_dataset_len(dataset: Dataset) -> int:
277
+ def validate_dataset_len(dataset: Dataset[Any]) -> int:
276
278
  if not hasattr(dataset, "__len__"):
277
279
  raise TypeError("Must provide a dataset with a length attribute")
278
280
  length: int = dataset.__len__() # type: ignore
@@ -281,7 +283,7 @@ def validate_dataset_len(dataset: Dataset) -> int:
281
283
  return length
282
284
 
283
285
 
284
- def project_steps(params: NDArray, projection: NDArray) -> NDArray:
286
+ def project_steps(params: NDArray[Any], projection: NDArray[Any]) -> NDArray[Any]:
285
287
  """Projects the measures for each value of X
286
288
 
287
289
  Parameters
@@ -300,7 +302,7 @@ def project_steps(params: NDArray, projection: NDArray) -> NDArray:
300
302
  return 1 - f_out(projection, params)
301
303
 
302
304
 
303
- def inv_project_steps(params: NDArray, targets: NDArray) -> NDArray[np.uint64]:
305
+ def inv_project_steps(params: NDArray[Any], targets: NDArray[Any]) -> NDArray[np.uint64]:
304
306
  """Inverse function for project_steps()
305
307
 
306
308
  Parameters
@@ -320,7 +322,7 @@ def inv_project_steps(params: NDArray, targets: NDArray) -> NDArray[np.uint64]:
320
322
  return np.ceil(steps)
321
323
 
322
324
 
323
- def get_curve_params(measures: dict[str, NDArray], ranges: NDArray, niter: int) -> dict[str, NDArray]:
325
+ def get_curve_params(measures: dict[str, NDArray[Any]], ranges: NDArray[Any], niter: int) -> dict[str, NDArray[Any]]:
324
326
  """Calculates and aggregates parameters for both single and multi-class metrics"""
325
327
  output = {}
326
328
  for name, measure in measures.items():
@@ -337,10 +339,10 @@ def get_curve_params(measures: dict[str, NDArray], ranges: NDArray, niter: int)
337
339
 
338
340
  def plot_measure(
339
341
  name: str,
340
- steps: NDArray,
341
- measure: NDArray,
342
- params: NDArray,
343
- projection: NDArray,
342
+ steps: NDArray[Any],
343
+ measure: NDArray[Any],
344
+ params: NDArray[Any],
345
+ projection: NDArray[Any],
344
346
  ) -> Figure:
345
347
  fig = plt.figure()
346
348
  fig = cast(Figure, fig)
@@ -367,9 +369,12 @@ def plot_measure(
367
369
  return fig
368
370
 
369
371
 
370
- class Sufficiency:
372
+ T = TypeVar("T")
373
+
374
+
375
+ class Sufficiency(Generic[T]):
371
376
  """
372
- Project dataset sufficiency using given a model and evaluation criteria
377
+ Project dataset :term:`sufficiency<Sufficiency>` using given a model and evaluation criteria
373
378
 
374
379
  Parameters
375
380
  ----------
@@ -401,10 +406,10 @@ class Sufficiency:
401
406
  def __init__(
402
407
  self,
403
408
  model: nn.Module,
404
- train_ds: Dataset,
405
- test_ds: Dataset,
406
- train_fn: Callable[[nn.Module, Dataset, Sequence[int]], None],
407
- eval_fn: Callable[[nn.Module, Dataset], Mapping[str, float] | Mapping[str, ArrayLike]],
409
+ train_ds: Dataset[T],
410
+ test_ds: Dataset[T],
411
+ train_fn: Callable[[nn.Module, Dataset[T], Sequence[int]], None],
412
+ eval_fn: Callable[[nn.Module, Dataset[T]], Mapping[str, float] | Mapping[str, ArrayLike]],
408
413
  runs: int = 1,
409
414
  substeps: int = 5,
410
415
  train_kwargs: Mapping[str, Any] | None = None,
@@ -421,29 +426,29 @@ class Sufficiency:
421
426
  self.eval_kwargs = eval_kwargs
422
427
 
423
428
  @property
424
- def train_ds(self):
429
+ def train_ds(self) -> Dataset[T]:
425
430
  return self._train_ds
426
431
 
427
432
  @train_ds.setter
428
- def train_ds(self, value: Dataset):
433
+ def train_ds(self, value: Dataset[T]) -> None:
429
434
  self._train_ds = value
430
435
  self._length = validate_dataset_len(value)
431
436
 
432
437
  @property
433
- def test_ds(self):
438
+ def test_ds(self) -> Dataset[T]:
434
439
  return self._test_ds
435
440
 
436
441
  @test_ds.setter
437
- def test_ds(self, value: Dataset):
442
+ def test_ds(self, value: Dataset[T]) -> None:
438
443
  validate_dataset_len(value)
439
444
  self._test_ds = value
440
445
 
441
446
  @property
442
- def train_fn(self) -> Callable[[nn.Module, Dataset, Sequence[int]], None]:
447
+ def train_fn(self) -> Callable[[nn.Module, Dataset[T], Sequence[int]], None]:
443
448
  return self._train_fn
444
449
 
445
450
  @train_fn.setter
446
- def train_fn(self, value: Callable[[nn.Module, Dataset, Sequence[int]], None]):
451
+ def train_fn(self, value: Callable[[nn.Module, Dataset[T], Sequence[int]], None]) -> None:
447
452
  if not callable(value):
448
453
  raise TypeError("Must provide a callable for train_fn.")
449
454
  self._train_fn = value
@@ -451,14 +456,14 @@ class Sufficiency:
451
456
  @property
452
457
  def eval_fn(
453
458
  self,
454
- ) -> Callable[[nn.Module, Dataset], dict[str, float] | Mapping[str, ArrayLike]]:
459
+ ) -> Callable[[nn.Module, Dataset[T]], dict[str, float] | Mapping[str, ArrayLike]]:
455
460
  return self._eval_fn
456
461
 
457
462
  @eval_fn.setter
458
463
  def eval_fn(
459
464
  self,
460
- value: Callable[[nn.Module, Dataset], dict[str, float] | Mapping[str, ArrayLike]],
461
- ):
465
+ value: Callable[[nn.Module, Dataset[T]], dict[str, float] | Mapping[str, ArrayLike]],
466
+ ) -> None:
462
467
  if not callable(value):
463
468
  raise TypeError("Must provide a callable for eval_fn.")
464
469
  self._eval_fn = value
@@ -468,7 +473,7 @@ class Sufficiency:
468
473
  return self._train_kwargs
469
474
 
470
475
  @train_kwargs.setter
471
- def train_kwargs(self, value: Mapping[str, Any] | None):
476
+ def train_kwargs(self, value: Mapping[str, Any] | None) -> None:
472
477
  self._train_kwargs = {} if value is None else value
473
478
 
474
479
  @property
@@ -476,10 +481,10 @@ class Sufficiency:
476
481
  return self._eval_kwargs
477
482
 
478
483
  @eval_kwargs.setter
479
- def eval_kwargs(self, value: Mapping[str, Any] | None):
484
+ def eval_kwargs(self, value: Mapping[str, Any] | None) -> None:
480
485
  self._eval_kwargs = {} if value is None else value
481
486
 
482
- @set_metadata("dataeval.workflows", ["runs", "substeps"])
487
+ @set_metadata(["runs", "substeps"])
483
488
  def evaluate(self, eval_at: int | Iterable[int] | None = None, niter: int = 1000) -> SufficiencyOutput:
484
489
  """
485
490
  Creates data indices, trains models, and returns plotting data
@@ -488,7 +493,7 @@ class Sufficiency:
488
493
  ----------
489
494
  eval_at : int | Iterable[int] | None, default None
490
495
  Specify this to collect accuracies over a specific set of dataset lengths, rather
491
- than letting Sufficiency internally create the lengths to evaluate at.
496
+ than letting :term:`sufficiency<Sufficiency>` internally create the lengths to evaluate at.
492
497
  niter : int, default 1000
493
498
  Iterations to perform when using the basin-hopping method to curve-fit measure(s).
494
499
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataeval
3
- Version: 0.72.0
3
+ Version: 0.72.2
4
4
  Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
5
5
  Home-page: https://dataeval.ai/
6
6
  License: MIT
@@ -8,7 +8,7 @@ Author: Andrew Weng
8
8
  Author-email: andrew.weng@ariacoustics.com
9
9
  Maintainer: ARiA
10
10
  Maintainer-email: dataeval@ariacoustics.com
11
- Requires-Python: >=3.9,<3.12
11
+ Requires-Python: >=3.9,<3.13
12
12
  Classifier: Development Status :: 4 - Beta
13
13
  Classifier: Intended Audience :: Science/Research
14
14
  Classifier: License :: OSI Approved :: MIT License
@@ -17,24 +17,26 @@ Classifier: Programming Language :: Python :: 3
17
17
  Classifier: Programming Language :: Python :: 3.9
18
18
  Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
20
21
  Classifier: Programming Language :: Python :: 3 :: Only
21
22
  Classifier: Topic :: Scientific/Engineering
22
23
  Provides-Extra: all
23
24
  Provides-Extra: tensorflow
24
25
  Provides-Extra: torch
25
26
  Requires-Dist: hdbscan (>=0.8.36)
27
+ Requires-Dist: markupsafe (<3.0.2) ; extra == "tensorflow" or extra == "all"
26
28
  Requires-Dist: matplotlib ; extra == "torch" or extra == "all"
27
29
  Requires-Dist: numpy (>1.24.3)
28
- Requires-Dist: nvidia-cudnn-cu11 (>=8.6.0.163) ; extra == "tensorflow" or extra == "torch" or extra == "all"
29
30
  Requires-Dist: pillow (>=10.3.0)
30
31
  Requires-Dist: scikit-learn (>=1.5.0)
31
32
  Requires-Dist: scipy (>=1.10)
32
- Requires-Dist: tensorflow (>=2.16) ; extra == "tensorflow" or extra == "all"
33
+ Requires-Dist: tensorflow (>=2.16,<2.18) ; extra == "tensorflow" or extra == "all"
33
34
  Requires-Dist: tensorflow_probability (>=0.24) ; extra == "tensorflow" or extra == "all"
34
35
  Requires-Dist: tf-keras (>=2.16) ; extra == "tensorflow" or extra == "all"
35
36
  Requires-Dist: torch (>=2.2.0) ; extra == "torch" or extra == "all"
36
37
  Requires-Dist: torchvision (>=0.17.0) ; extra == "torch" or extra == "all"
37
38
  Requires-Dist: tqdm
39
+ Requires-Dist: typing-extensions (>=4.12) ; python_version >= "3.9" and python_version < "3.10"
38
40
  Requires-Dist: xxhash (>=3.3)
39
41
  Project-URL: Documentation, https://dataeval.readthedocs.io/
40
42
  Project-URL: Repository, https://github.com/aria-ml/dataeval/
@@ -44,7 +46,7 @@ Description-Content-Type: text/markdown
44
46
 
45
47
  ## About DataEval
46
48
 
47
- DataEval focuses on characterizing image data and its impact on model performance across classification and object-detection tasks.
49
+ DataEval focuses on characterizing image data and its impact on model performance across Classification and object-detection tasks.
48
50
 
49
51
  <!-- start about -->
50
52
 
@@ -0,0 +1,72 @@
1
+ dataeval/__init__.py,sha256=UYhkwned7TR5hiU_c8I_qUaKogO1EODTBgT-9_t0ofI,641
2
+ dataeval/detectors/__init__.py,sha256=xdp8LYOFjV5tVbAwu0Y03KU9EajHkSFy_M3raqbxpDc,383
3
+ dataeval/detectors/drift/__init__.py,sha256=MRPWFOaoVoqAHW36nA5F3wk7QXJU4oecND2RbtgG9oY,757
4
+ dataeval/detectors/drift/base.py,sha256=0S-0MFpIFaJ4_8IGreFKSmyna2L50FBn7DVaoNWmw8E,14509
5
+ dataeval/detectors/drift/cvm.py,sha256=kc59w2_wtxFGNnLcaJRvX5v_38gPXiebSGNiFVdunEQ,4142
6
+ dataeval/detectors/drift/ks.py,sha256=gcpe1WIQeNeZdLYkdMZCFLXUp1bHMQUxwJE6-RLVOXs,4229
7
+ dataeval/detectors/drift/mmd.py,sha256=TqGOnUNYKwpS0GQPV3dSl-_qRa0g2flmoQ-dxzW_JfY,7586
8
+ dataeval/detectors/drift/torch.py,sha256=D46J72OPW8-PpP3w9ODMBfcDSdailIgVjgHVFpbYfws,11649
9
+ dataeval/detectors/drift/uncertainty.py,sha256=Xz2yzJjtJfw1vLag234jwRvaa_HK36nMajGx8bQaNRs,5322
10
+ dataeval/detectors/drift/updates.py,sha256=UJ0z5hlunRi7twnkLABfdJG3tT2EqX4y9IGx8_USYvo,1780
11
+ dataeval/detectors/linters/__init__.py,sha256=BvpaB1RUpkEhhXk3Mqi5NYoOcJKZRFSBOJCmQOIfYRU,483
12
+ dataeval/detectors/linters/clusterer.py,sha256=OtBE5rglAGdTTQRmKUHP6J-uWmnh2E3lZxeqJCnc87U,21014
13
+ dataeval/detectors/linters/duplicates.py,sha256=tOD43rJkvheIA3mznbUqHhft2yD3xRZQdCt61daIca4,5665
14
+ dataeval/detectors/linters/merged_stats.py,sha256=X-bDTwjyR8RuVmzxLaHZmQ5nI3oOWvsqVlitdSncapk,1355
15
+ dataeval/detectors/linters/outliers.py,sha256=BUVvtbKHo04KnRmrgb84MBr0l1gtcY3-xNCHjetFrEQ,10117
16
+ dataeval/detectors/ood/__init__.py,sha256=FVyVuaxVKAOgSTaaBf-j2OXXDarSBFcJ7CTlMV6w88s,661
17
+ dataeval/detectors/ood/ae.py,sha256=cdwrgCpQkueK_HQoQbeXw7s0oTE-6FKVtXe9vETDe5M,2117
18
+ dataeval/detectors/ood/aegmm.py,sha256=jK5aN1UjwwZaSLB3BpzH25eLp5wBqzlgylsfphaoZaE,1814
19
+ dataeval/detectors/ood/base.py,sha256=S9jl4xH2zB_-ixalysQJZEvRCGOqMQSruacvfd4Dnfc,8687
20
+ dataeval/detectors/ood/llr.py,sha256=HUNsro-cV7RR5Mht6pJ4NWCRR7aWeVdjwkBNurs5LbM,10378
21
+ dataeval/detectors/ood/metadata_ks_compare.py,sha256=jH7uDwyyBIIcTrRhQEdnLAdrwf7LfNczKBw0CpJyF5c,4282
22
+ dataeval/detectors/ood/metadata_least_likely.py,sha256=nxMCXUOjOfWHDTGT2SLE7OYBCydRq8zHLd8t17k7hMM,5193
23
+ dataeval/detectors/ood/metadata_ood_mi.py,sha256=KLay2BmgHrStBV92VpIs_B1yEfQKllsMTgzOQEng01I,4065
24
+ dataeval/detectors/ood/vae.py,sha256=O1jpGkpavtJAqn4WrmocPRMtkX4iSdkpiCDUPBF1Ano,2925
25
+ dataeval/detectors/ood/vaegmm.py,sha256=37epPiQKeicy6SZD0D7O7hCFQSajZ-8wvga1pmJiq2s,2183
26
+ dataeval/interop.py,sha256=CFtGyVTwTqkJFkNfhHYhnBRVwxKIQ9f-9Zuuz_uQDqo,1589
27
+ dataeval/metrics/__init__.py,sha256=fPBNLd-T6mCErZBBJrxWmXIL0jCk7fNUYIcNEBkMa80,238
28
+ dataeval/metrics/bias/__init__.py,sha256=puf645-hAO5hFHNHlZ239TPopqWIoN-uLGXFB8-hA_o,599
29
+ dataeval/metrics/bias/balance.py,sha256=pgxaIqFvRcygYlAUbM_BKrbi45WU7fRV08HBrI7Z5q4,8569
30
+ dataeval/metrics/bias/coverage.py,sha256=Ku9l-qvc6YrRiQ0PRzkpfjInyOhkAKKSO_bf_LnOwNg,3623
31
+ dataeval/metrics/bias/diversity.py,sha256=-cmh-vyAUrn4rbn6-ZXvLuaO43Ncj28GKyeTmhWRzfE,8973
32
+ dataeval/metrics/bias/metadata.py,sha256=nUZRwhcKaJM0GVwXn5k11Fa1s56_OtOBF7tmXjMDpsM,8919
33
+ dataeval/metrics/bias/parity.py,sha256=uJ3p8m6id5mZpDNnS1NmxCThb5V6v75lJv_0TGAhCRA,16668
34
+ dataeval/metrics/estimators/__init__.py,sha256=O6ocxJq8XDkfJWwXeJnnnzbOyRnFPKF4kTIVTTZYOA8,380
35
+ dataeval/metrics/estimators/ber.py,sha256=SVT-BIC_GLs0l2l2NhWu4OpRbgn96w-OwTSoPHTnQbE,5037
36
+ dataeval/metrics/estimators/divergence.py,sha256=pImaa216-YYTgGWDCSTcpJrC-dfl7150yVrPfW_TyGc,4293
37
+ dataeval/metrics/estimators/uap.py,sha256=Tz1VZOyUa68HlTh94Rl-wnXCWdTAVjTQc3LtSPEWVu4,2175
38
+ dataeval/metrics/stats/__init__.py,sha256=igLRaAt1nX6yRwC4xI0zNPBADi3u7EsSxWP3OZ8AqcU,1086
39
+ dataeval/metrics/stats/base.py,sha256=9M5g2FAWvd50HT-T2h-MCmYLpvk--em_yWro1qWGHFs,12177
40
+ dataeval/metrics/stats/boxratiostats.py,sha256=iNr-FdppiJ7XAeeLY-o7gL_PSxvT8j86iwRijKca2Eg,6465
41
+ dataeval/metrics/stats/datasetstats.py,sha256=LAMFCIS9v0RjLrdKUFuo8nY-3HLVvRlqQIXGMKtsHEw,6255
42
+ dataeval/metrics/stats/dimensionstats.py,sha256=xdTp2AbGH3xefUUsB4sDjgSKiojJ73DCHyuCOPKsErc,4056
43
+ dataeval/metrics/stats/hashstats.py,sha256=X6aSouaMhDcGZMLuCTje3G4QOr2i-Td6H3SyBFDF6mA,4960
44
+ dataeval/metrics/stats/labelstats.py,sha256=BKwSmyxCr2wYq8IMraCUS-b5wqacfT_BukJUYNfqeCo,4114
45
+ dataeval/metrics/stats/pixelstats.py,sha256=x90O10IqVjEORtYwueFLvJnVYTxhPBOOx5HMweBQnJY,4578
46
+ dataeval/metrics/stats/visualstats.py,sha256=y0xIvst7epcajk8vz2jngiAiz0T7DZC-M97Rs1-vV9I,4950
47
+ dataeval/output.py,sha256=jWXXNxFNBEaY1rN7Z-6LZl6bQT-I7z_wqr91Rhrdt_0,3061
48
+ dataeval/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
+ dataeval/utils/__init__.py,sha256=zTgPsmloPy0qZMzb4xipNNdIWpaHtseGph68pIAD-hQ,684
50
+ dataeval/utils/image.py,sha256=KgC_1nW__nGN5q6bVZNvG4U_qIBdjcPATz9qe8f2XuA,1928
51
+ dataeval/utils/shared.py,sha256=BvEeYPMNQTmx4LSaImGeC0VkvcbEY3Byqtxa-jQ3xgc,3623
52
+ dataeval/utils/split_dataset.py,sha256=IopyxwC3FaZwgVriW4OXze-mDMpOlvRr83OADA5Jydk,19454
53
+ dataeval/utils/tensorflow/__init__.py,sha256=l4OjIA75JJXeNWDCkST1xtDMVYsw97lZ-9JXFBlyuYg,539
54
+ dataeval/utils/tensorflow/_internal/autoencoder.py,sha256=-pm4VqMEjHcrgre-K8uhMvaEVHyeqZsZbejrnlM6OtY,10430
55
+ dataeval/utils/tensorflow/_internal/gmm.py,sha256=QoEgbeax1GETqRmUF7A2ih9uFOZfFAjGzgH2ljExlAc,3669
56
+ dataeval/utils/tensorflow/_internal/loss.py,sha256=IXW_kxovLaTLd6UkMOIQLPEAGrOMILHDKagvRYgj-DE,4065
57
+ dataeval/utils/tensorflow/_internal/pixelcnn.py,sha256=Aa7koa7YxqhHmFequpsfMw2-61KO03evWWcvvFTuaco,48518
58
+ dataeval/utils/tensorflow/_internal/trainer.py,sha256=ld7pisl4ZXjEA6nxBStRNDEuNJme0IPo08oWqal6bYc,4167
59
+ dataeval/utils/tensorflow/_internal/utils.py,sha256=k1mjy44oE63SIkckvU8BTlqtWsCnGynJF4eYyw1pebQ,8799
60
+ dataeval/utils/tensorflow/loss/__init__.py,sha256=Q-66vt91Oe1ByYfo28tW32zXDq2MqQ2gngWgmIVmof8,227
61
+ dataeval/utils/torch/__init__.py,sha256=lpkqfgyARUxgrV94cZESQv8PIP2p-UnwItZ_wIr0XzQ,675
62
+ dataeval/utils/torch/blocks.py,sha256=HVhBTMMD5NA4qheMUgyol1KWiKZDIuc8k5j4RcMKmhk,1466
63
+ dataeval/utils/torch/datasets.py,sha256=9YV9-Uhq6NCMuu1hPhMnQXjmeI-Ld8ve1z_haxre88o,15023
64
+ dataeval/utils/torch/models.py,sha256=0BsXmLK8W1OZ8nnEGb1f9LzIeCgtevQC37dvKS1v1vA,3236
65
+ dataeval/utils/torch/trainer.py,sha256=EraOKiXxiMNiycStZNMR5yRz3ehgp87d9ewR9a9dV4w,5559
66
+ dataeval/utils/torch/utils.py,sha256=FI4LJ6DvXFQJVff8fxSCP7LRkp8H9BIUgYX0kk7_Cuo,1537
67
+ dataeval/workflows/__init__.py,sha256=x2JnOoKmLUCZOsB6RNPqMdVvxEb6Hpda5GPJnD_k0v0,310
68
+ dataeval/workflows/sufficiency.py,sha256=1jSYhH9i4oesmJYs5PZvWS1LGXf8ekOgNhpFtMPLPXk,18552
69
+ dataeval-0.72.2.dist-info/LICENSE.txt,sha256=Kpzcfobf1HlqafF-EX6dQLw9TlJiaJzfgvLQFukyXYw,1060
70
+ dataeval-0.72.2.dist-info/METADATA,sha256=ddOmTZA6nX7VceQhOmyQ-cQ1aBv2VU9Za32vnmjP-VE,4702
71
+ dataeval-0.72.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
72
+ dataeval-0.72.2.dist-info/RECORD,,
File without changes
File without changes
File without changes
File without changes
@@ -1,75 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from dataclasses import dataclass
4
- from typing import Iterable
5
-
6
- from numpy.typing import ArrayLike
7
-
8
- from dataeval._internal.metrics.stats.base import BaseStatsOutput, StatsProcessor, run_stats
9
- from dataeval._internal.metrics.utils import pchash, xxhash
10
- from dataeval._internal.output import set_metadata
11
-
12
-
13
- @dataclass(frozen=True)
14
- class HashStatsOutput(BaseStatsOutput):
15
- """
16
- Output class for :func:`hashstats` stats metric
17
-
18
- Attributes
19
- ----------
20
- xxhash : List[str]
21
- xxHash hash of the images as a hex string
22
- pchash : List[str]
23
- Perception hash of the images as a hex string
24
- """
25
-
26
- xxhash: list[str]
27
- pchash: list[str]
28
-
29
-
30
- class HashStatsProcessor(StatsProcessor[HashStatsOutput]):
31
- output_class = HashStatsOutput
32
- image_function_map = {
33
- "xxhash": lambda x: xxhash(x.image),
34
- "pchash": lambda x: pchash(x.image),
35
- }
36
-
37
-
38
- @set_metadata("dataeval.metrics")
39
- def hashstats(
40
- images: Iterable[ArrayLike],
41
- bboxes: Iterable[ArrayLike] | None = None,
42
- ) -> HashStatsOutput:
43
- """
44
- Calculates hashes for each image
45
-
46
- This function computes hashes from the images including exact hashes and perception-based
47
- hashes. These hash values can be used to determine if images are exact or near matches.
48
-
49
- Parameters
50
- ----------
51
- images : ArrayLike
52
- Images to hashing
53
- bboxes : Iterable[ArrayLike] or None
54
- Bounding boxes in `xyxy` format for each image
55
-
56
- Returns
57
- -------
58
- HashStatsOutput
59
- A dictionary-like object containing the computed hashes for each image.
60
-
61
- See Also
62
- --------
63
- Duplicates
64
-
65
- Examples
66
- --------
67
- Calculating the statistics on the images, whose shape is (C, H, W)
68
-
69
- >>> results = hashstats(images)
70
- >>> print(results.xxhash)
71
- ['a72434443d6e7336', 'efc12c2f14581d79', '4a1e03483a27d674', '3a3ecedbcf814226']
72
- >>> print(results.pchash)
73
- ['8f25506af46a7c6a', '8000808000008080', '8e71f18e0ef18e0e', 'a956d6a956d6a928']
74
- """
75
- return run_stats(images, bboxes, False, [HashStatsProcessor])[0]