dataeval 0.72.1__py3-none-any.whl → 0.73.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. dataeval/__init__.py +4 -4
  2. dataeval/detectors/__init__.py +4 -3
  3. dataeval/detectors/drift/__init__.py +9 -10
  4. dataeval/{_internal/detectors → detectors}/drift/base.py +39 -91
  5. dataeval/{_internal/detectors → detectors}/drift/cvm.py +4 -3
  6. dataeval/{_internal/detectors → detectors}/drift/ks.py +4 -3
  7. dataeval/{_internal/detectors → detectors}/drift/mmd.py +23 -25
  8. dataeval/{_internal/detectors → detectors}/drift/torch.py +13 -11
  9. dataeval/{_internal/detectors → detectors}/drift/uncertainty.py +7 -5
  10. dataeval/detectors/drift/updates.py +61 -0
  11. dataeval/detectors/linters/__init__.py +3 -3
  12. dataeval/{_internal/detectors → detectors/linters}/clusterer.py +41 -39
  13. dataeval/{_internal/detectors → detectors/linters}/duplicates.py +19 -9
  14. dataeval/{_internal/detectors → detectors/linters}/merged_stats.py +3 -1
  15. dataeval/{_internal/detectors → detectors/linters}/outliers.py +14 -21
  16. dataeval/detectors/ood/__init__.py +6 -6
  17. dataeval/{_internal/detectors → detectors}/ood/ae.py +20 -12
  18. dataeval/detectors/ood/aegmm.py +66 -0
  19. dataeval/{_internal/detectors → detectors}/ood/base.py +33 -21
  20. dataeval/{_internal/detectors → detectors}/ood/llr.py +43 -33
  21. dataeval/detectors/ood/metadata_ks_compare.py +99 -0
  22. dataeval/detectors/ood/metadata_least_likely.py +119 -0
  23. dataeval/detectors/ood/metadata_ood_mi.py +92 -0
  24. dataeval/{_internal/detectors → detectors}/ood/vae.py +23 -17
  25. dataeval/detectors/ood/vaegmm.py +75 -0
  26. dataeval/interop.py +56 -0
  27. dataeval/metrics/__init__.py +1 -1
  28. dataeval/metrics/bias/__init__.py +4 -4
  29. dataeval/{_internal/metrics → metrics/bias}/balance.py +75 -13
  30. dataeval/{_internal/metrics → metrics/bias}/coverage.py +41 -7
  31. dataeval/{_internal/metrics → metrics/bias}/diversity.py +75 -18
  32. dataeval/metrics/bias/metadata.py +358 -0
  33. dataeval/{_internal/metrics → metrics/bias}/parity.py +54 -44
  34. dataeval/metrics/estimators/__init__.py +3 -3
  35. dataeval/{_internal/metrics → metrics/estimators}/ber.py +25 -22
  36. dataeval/{_internal/metrics → metrics/estimators}/divergence.py +11 -12
  37. dataeval/{_internal/metrics → metrics/estimators}/uap.py +5 -3
  38. dataeval/metrics/stats/__init__.py +7 -7
  39. dataeval/{_internal/metrics → metrics}/stats/base.py +59 -35
  40. dataeval/{_internal/metrics → metrics}/stats/boxratiostats.py +18 -14
  41. dataeval/{_internal/metrics → metrics}/stats/datasetstats.py +18 -16
  42. dataeval/{_internal/metrics → metrics}/stats/dimensionstats.py +9 -7
  43. dataeval/metrics/stats/hashstats.py +156 -0
  44. dataeval/{_internal/metrics → metrics}/stats/labelstats.py +5 -3
  45. dataeval/{_internal/metrics → metrics}/stats/pixelstats.py +9 -8
  46. dataeval/{_internal/metrics → metrics}/stats/visualstats.py +10 -9
  47. dataeval/{_internal/output.py → output.py} +26 -6
  48. dataeval/utils/__init__.py +8 -3
  49. dataeval/utils/image.py +71 -0
  50. dataeval/utils/lazy.py +26 -0
  51. dataeval/utils/metadata.py +258 -0
  52. dataeval/utils/shared.py +151 -0
  53. dataeval/{_internal → utils}/split_dataset.py +98 -33
  54. dataeval/utils/tensorflow/__init__.py +7 -6
  55. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/gmm.py +8 -2
  56. dataeval/{_internal/models/tensorflow/losses.py → utils/tensorflow/_internal/loss.py} +28 -18
  57. dataeval/{_internal/models/tensorflow/pixelcnn.py → utils/tensorflow/_internal/models.py} +387 -97
  58. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/trainer.py +15 -6
  59. dataeval/{_internal/models/tensorflow → utils/tensorflow/_internal}/utils.py +84 -85
  60. dataeval/utils/tensorflow/loss/__init__.py +6 -2
  61. dataeval/utils/torch/__init__.py +7 -3
  62. dataeval/{_internal/models/pytorch → utils/torch}/blocks.py +19 -14
  63. dataeval/{_internal → utils/torch}/datasets.py +48 -42
  64. dataeval/utils/torch/models.py +138 -0
  65. dataeval/{_internal/models/pytorch/autoencoder.py → utils/torch/trainer.py} +7 -136
  66. dataeval/{_internal → utils/torch}/utils.py +3 -1
  67. dataeval/workflows/__init__.py +1 -1
  68. dataeval/{_internal/workflows → workflows}/sufficiency.py +39 -34
  69. {dataeval-0.72.1.dist-info → dataeval-0.73.0.dist-info}/METADATA +4 -3
  70. dataeval-0.73.0.dist-info/RECORD +73 -0
  71. dataeval/_internal/detectors/__init__.py +0 -0
  72. dataeval/_internal/detectors/drift/__init__.py +0 -0
  73. dataeval/_internal/detectors/ood/__init__.py +0 -0
  74. dataeval/_internal/detectors/ood/aegmm.py +0 -78
  75. dataeval/_internal/detectors/ood/vaegmm.py +0 -89
  76. dataeval/_internal/interop.py +0 -49
  77. dataeval/_internal/metrics/__init__.py +0 -0
  78. dataeval/_internal/metrics/stats/hashstats.py +0 -75
  79. dataeval/_internal/metrics/utils.py +0 -447
  80. dataeval/_internal/models/__init__.py +0 -0
  81. dataeval/_internal/models/pytorch/__init__.py +0 -0
  82. dataeval/_internal/models/pytorch/utils.py +0 -67
  83. dataeval/_internal/models/tensorflow/__init__.py +0 -0
  84. dataeval/_internal/models/tensorflow/autoencoder.py +0 -320
  85. dataeval/_internal/workflows/__init__.py +0 -0
  86. dataeval/detectors/drift/kernels/__init__.py +0 -10
  87. dataeval/detectors/drift/updates/__init__.py +0 -8
  88. dataeval/utils/tensorflow/models/__init__.py +0 -9
  89. dataeval/utils/tensorflow/recon/__init__.py +0 -3
  90. dataeval/utils/torch/datasets/__init__.py +0 -12
  91. dataeval/utils/torch/models/__init__.py +0 -11
  92. dataeval/utils/torch/trainer/__init__.py +0 -7
  93. dataeval-0.72.1.dist-info/RECORD +0 -81
  94. {dataeval-0.72.1.dist-info → dataeval-0.73.0.dist-info}/LICENSE.txt +0 -0
  95. {dataeval-0.72.1.dist-info → dataeval-0.73.0.dist-info}/WHEEL +0 -0
@@ -8,10 +8,16 @@ Licensed under Apache Software License (Apache 2.0)
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
- from typing import NamedTuple
11
+ from typing import TYPE_CHECKING, NamedTuple
12
12
 
13
13
  import numpy as np
14
- import tensorflow as tf
14
+
15
+ from dataeval.utils.lazy import lazyload
16
+
17
+ if TYPE_CHECKING:
18
+ import tensorflow as tf
19
+ else:
20
+ tf = lazyload("tensorflow")
15
21
 
16
22
 
17
23
  class GaussianMixtureModelParams(NamedTuple):
@@ -8,16 +8,26 @@ Licensed under Apache Software License (Apache 2.0)
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
- from typing import Literal, cast
11
+ from typing import TYPE_CHECKING, Literal, cast
12
12
 
13
- import tensorflow as tf
13
+ import numpy as np
14
14
  from numpy.typing import NDArray
15
- from tensorflow_probability.python.distributions.mvn_diag import MultivariateNormalDiag
16
- from tensorflow_probability.python.distributions.mvn_tril import MultivariateNormalTriL
17
- from tensorflow_probability.python.stats import covariance
18
- from tf_keras.layers import Flatten
19
15
 
20
- from dataeval._internal.models.tensorflow.gmm import gmm_energy, gmm_params
16
+ from dataeval.utils.lazy import lazyload
17
+ from dataeval.utils.tensorflow._internal.gmm import gmm_energy, gmm_params
18
+
19
+ if TYPE_CHECKING:
20
+ import tensorflow as tf
21
+ import tensorflow_probability.python.distributions.mvn_diag as mvn_diag
22
+ import tensorflow_probability.python.distributions.mvn_tril as mvn_tril
23
+ import tensorflow_probability.python.stats as tfp_stats
24
+ import tf_keras as keras
25
+ else:
26
+ tf = lazyload("tensorflow")
27
+ keras = lazyload("tf_keras")
28
+ mvn_diag = lazyload("tensorflow_probability.python.distributions.mvn_diag")
29
+ mvn_tril = lazyload("tensorflow_probability.python.distributions.mvn_tril")
30
+ tfp_stats = lazyload("tensorflow_probability.python.stats")
21
31
 
22
32
 
23
33
  class Elbo:
@@ -39,29 +49,29 @@ class Elbo:
39
49
  def __init__(
40
50
  self,
41
51
  cov_type: Literal["cov_full", "cov_diag"] | float = 1.0,
42
- x: tf.Tensor | NDArray | None = None,
52
+ x: tf.Tensor | NDArray[np.float32] | None = None,
43
53
  ):
44
54
  if isinstance(cov_type, float):
45
- self.cov = ("sim", cov_type)
55
+ self._cov = ("sim", cov_type)
46
56
  elif cov_type in ["cov_full", "cov_diag"]:
47
- x_np: NDArray = x.numpy() if tf.is_tensor(x) else x # type: ignore
48
- cov = covariance(x_np.reshape(x_np.shape[0], -1)) # type: ignore py38
57
+ x_np: NDArray[np.float32] = x.numpy().astype(np.float32) if tf.is_tensor(x) else x # type: ignore
58
+ cov = tfp_stats.covariance(x_np.reshape(x_np.shape[0], -1)) # type: ignore py38
49
59
  if cov_type == "cov_diag": # infer standard deviation from covariance matrix
50
60
  cov = tf.math.sqrt(tf.linalg.diag_part(cov))
51
- self.cov = (cov_type, cov)
61
+ self._cov = (cov_type, cov)
52
62
  else:
53
63
  raise ValueError("Only cov_full, cov_diag or sim value should be specified.")
54
64
 
55
65
  def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
56
- y_pred_flat = cast(tf.Tensor, Flatten()(y_pred))
66
+ y_pred_flat = cast(tf.Tensor, keras.layers.Flatten()(y_pred))
57
67
 
58
- if self.cov[0] == "cov_full":
59
- y_mn = MultivariateNormalTriL(y_pred_flat, scale_tril=tf.linalg.cholesky(self.cov[1]))
68
+ if self._cov[0] == "cov_full":
69
+ y_mn = mvn_tril.MultivariateNormalTriL(y_pred_flat, scale_tril=tf.linalg.cholesky(self._cov[1]))
60
70
  else: # cov_diag and sim
61
- cov_diag = self.cov[1] if self.cov[0] == "cov_diag" else self.cov[1] * tf.ones(y_pred_flat.shape[-1])
62
- y_mn = MultivariateNormalDiag(y_pred_flat, scale_diag=cov_diag)
71
+ cov_diag = self._cov[1] if self._cov[0] == "cov_diag" else self._cov[1] * tf.ones(y_pred_flat.shape[-1])
72
+ y_mn = mvn_diag.MultivariateNormalDiag(y_pred_flat, scale_diag=cov_diag)
63
73
 
64
- loss = -tf.reduce_mean(y_mn.log_prob(Flatten()(y_true)))
74
+ loss = -tf.reduce_mean(y_mn.log_prob(keras.layers.Flatten()(y_true)))
65
75
  return loss
66
76
 
67
77