PyPI - dataeval - Versions diffs - 0.64.0__py3-none-any.whl → 0.66.0__py3-none-any.whl - Mend

dataeval 0.64.0py3-none-any.whl → 0.66.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

dataeval/__init__.py +13 -9
dataeval/_internal/detectors/clusterer.py +63 -49
dataeval/_internal/detectors/drift/base.py +248 -51
dataeval/_internal/detectors/drift/cvm.py +28 -26
dataeval/_internal/detectors/drift/ks.py +31 -28
dataeval/_internal/detectors/drift/mmd.py +62 -42
dataeval/_internal/detectors/drift/torch.py +69 -60
dataeval/_internal/detectors/drift/uncertainty.py +32 -32
dataeval/_internal/detectors/duplicates.py +67 -31
dataeval/_internal/detectors/ood/ae.py +15 -29
dataeval/_internal/detectors/ood/aegmm.py +33 -27
dataeval/_internal/detectors/ood/base.py +86 -47
dataeval/_internal/detectors/ood/llr.py +34 -31
dataeval/_internal/detectors/ood/vae.py +32 -31
dataeval/_internal/detectors/ood/vaegmm.py +34 -28
dataeval/_internal/detectors/{linter.py → outliers.py} +60 -38
dataeval/_internal/flags.py +44 -21
dataeval/_internal/interop.py +5 -3
dataeval/_internal/metrics/balance.py +42 -5
dataeval/_internal/metrics/ber.py +11 -8
dataeval/_internal/metrics/coverage.py +15 -8
dataeval/_internal/metrics/divergence.py +41 -7
dataeval/_internal/metrics/diversity.py +57 -19
dataeval/_internal/metrics/parity.py +141 -66
dataeval/_internal/metrics/stats.py +330 -313
dataeval/_internal/metrics/uap.py +33 -4
dataeval/_internal/metrics/utils.py +79 -40
dataeval/_internal/models/pytorch/autoencoder.py +127 -22
dataeval/_internal/models/tensorflow/autoencoder.py +33 -30
dataeval/_internal/models/tensorflow/gmm.py +4 -2
dataeval/_internal/models/tensorflow/losses.py +17 -13
dataeval/_internal/models/tensorflow/pixelcnn.py +19 -18
dataeval/_internal/models/tensorflow/trainer.py +10 -7
dataeval/_internal/models/tensorflow/utils.py +23 -20
dataeval/_internal/output.py +85 -0
dataeval/_internal/utils.py +5 -3
dataeval/_internal/workflows/sufficiency.py +122 -121
dataeval/detectors/__init__.py +6 -25
dataeval/detectors/drift/__init__.py +16 -0
dataeval/detectors/drift/kernels/__init__.py +6 -0
dataeval/detectors/drift/updates/__init__.py +3 -0
dataeval/detectors/linters/__init__.py +5 -0
dataeval/detectors/ood/__init__.py +11 -0
dataeval/flags/__init__.py +2 -2
dataeval/metrics/__init__.py +2 -26
dataeval/metrics/bias/__init__.py +14 -0
dataeval/metrics/estimators/__init__.py +9 -0
dataeval/metrics/stats/__init__.py +6 -0
dataeval/tensorflow/__init__.py +3 -0
dataeval/tensorflow/loss/__init__.py +3 -0
dataeval/tensorflow/models/__init__.py +5 -0
dataeval/tensorflow/recon/__init__.py +3 -0
dataeval/torch/__init__.py +3 -0
dataeval/{models/torch → torch/models}/__init__.py +1 -2
dataeval/torch/trainer/__init__.py +3 -0
dataeval/utils/__init__.py +3 -6
dataeval/workflows/__init__.py +2 -4
{dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/METADATA +1 -1
dataeval-0.66.0.dist-info/RECORD +72 -0
dataeval/_internal/metrics/base.py +0 -10
dataeval/models/__init__.py +0 -15
dataeval/models/tensorflow/__init__.py +0 -6
dataeval-0.64.0.dist-info/RECORD +0 -60
{dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/LICENSE.txt +0 -0
{dataeval-0.64.0.dist-info → dataeval-0.66.0.dist-info}/WHEEL +0 -0

dataeval/_internal/models/tensorflow/autoencoder.py CHANGED Viewed

@@ -8,7 +8,9 @@ Licensed under Apache Software License (Apache 2.0)
 # pyright: reportIncompatibleMethodOverride=false
-from typing import Callable, Tuple, cast
+from __future__ import annotations
+from typing import Callable, cast
 import keras
 import tensorflow as tf
@@ -56,16 +58,17 @@ def eucl_cosim_features(x: tf.Tensor, y: tf.Tensor, max_eucl: float = 1e2) -> tf
     Parameters
     ----------
-    x
+    x : tf.Tensor
         Tensor used in feature computation.
-    y
+    y : tf.Tensor
         Tensor used in feature computation.
-    max_eucl
+    max_eucl : float, default 1e2
         Maximum value to clip relative Euclidean distance by.
     Returns
     -------
-    Tensor concatenating the relative Euclidean distance and cosine similarity features.
+    tf.Tensor
+        Tensor concatenating the relative Euclidean distance and cosine similarity features.
     """
     if len(x.shape) > 2 or len(y.shape) > 2:
         x = cast(tf.Tensor, Flatten()(x))
@@ -78,9 +81,9 @@ def eucl_cosim_features(x: tf.Tensor, y: tf.Tensor, max_eucl: float = 1e2) -> tf
 class Sampling(Layer):
-    """Reparametrization trick. Uses (z_mean, z_log_var) to sample the latent vector z."""
+    """Reparametrization trick - Uses (z_mean, z_log_var) to sample the latent vector z."""
-    def call(self, inputs: Tuple[tf.Tensor, tf.Tensor]) -> tf.Tensor:
+    def call(self, inputs: tuple[tf.Tensor, tf.Tensor]) -> tf.Tensor:
         """
         Sample z.
@@ -138,7 +141,7 @@ class EncoderVAE(Layer):
         self.fc_log_var = Dense(latent_dim, activation=None)
         self.sampling = Sampling()
-    def call(self, x: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+    def call(self, x: tf.Tensor) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         x = cast(tf.Tensor, self.encoder_net(x))
         if len(x.shape) > 2:
             x = cast(tf.Tensor, Flatten()(x))
@@ -173,9 +176,9 @@ class AE(keras.Model):
     Parameters
     ----------
-    encoder_net
+    encoder_net : keras.Model
         Layers for the encoder wrapped in a keras.Sequential class.
-    decoder_net
+    decoder_net : keras.Model
         Layers for the decoder wrapped in a keras.Sequential class.
     """
@@ -196,13 +199,13 @@ class VAE(keras.Model):
     Parameters
     ----------
-    encoder_net
+    encoder_net : keras.Model
         Layers for the encoder wrapped in a keras.Sequential class.
-    decoder_net
+    decoder_net : keras.Model
         Layers for the decoder wrapped in a keras.Sequential class.
-    latent_dim
+    latent_dim : int
         Dimensionality of the latent space.
-    beta
+    beta : float, default 1.0
         Beta parameter for KL-divergence loss term.
     """
@@ -214,7 +217,7 @@ class VAE(keras.Model):
         self.latent_dim = latent_dim
     def call(self, x: tf.Tensor) -> tf.Tensor:
-        z_mean, z_log_var, z = cast(Tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(x))
+        z_mean, z_log_var, z = cast(tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(x))
         x_recon = self.decoder(z)
         # add KL divergence loss term
         kl_loss = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)
@@ -228,15 +231,15 @@ class AEGMM(keras.Model):
     Parameters
     ----------
-    encoder_net
+    encoder_net : keras.Model
         Layers for the encoder wrapped in a keras.Sequential class.
-    decoder_net
+    decoder_net : keras.Model
         Layers for the decoder wrapped in a keras.Sequential class.
-    gmm_density_net
+    gmm_density_net : keras.Model
         Layers for the GMM network wrapped in a keras.Sequential class.
-    n_gmm
+    n_gmm : int
         Number of components in GMM.
-    recon_features
+    recon_features : Callable, default eucl_cosim_features
         Function to extract features from the reconstructed instance by the decoder.
     """
@@ -255,7 +258,7 @@ class AEGMM(keras.Model):
         self.n_gmm = n_gmm
         self.recon_features = recon_features
-    def call(self, x: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+    def call(self, x: tf.Tensor) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         enc = self.encoder(x)
         x_recon = cast(tf.Tensor, self.decoder(enc))
         recon_features = self.recon_features(x, x_recon)
@@ -270,19 +273,19 @@ class VAEGMM(keras.Model):
     Parameters
     ----------
-    encoder_net
+    encoder_net : keras.Model
         Layers for the encoder wrapped in a keras.Sequential class.
-    decoder_net
+    decoder_net : keras.Model
         Layers for the decoder wrapped in a keras.Sequential class.
-    gmm_density_net
+    gmm_density_net : keras.Model
         Layers for the GMM network wrapped in a keras.Sequential class.
-    n_gmm
+    n_gmm : int
         Number of components in GMM.
-    latent_dim
+    latent_dim : int
         Dimensionality of the latent space.
-    recon_features
+    recon_features : Callable, default eucl_cosim_features
         Function to extract features from the reconstructed instance by the decoder.
-    beta
+    beta : float, default 1.0
         Beta parameter for KL-divergence loss term.
     """
@@ -305,8 +308,8 @@ class VAEGMM(keras.Model):
         self.recon_features = recon_features
         self.beta = beta
-    def call(self, x: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
-        enc_mean, enc_log_var, enc = cast(Tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(x))
+    def call(self, x: tf.Tensor) -> tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+        enc_mean, enc_log_var, enc = cast(tuple[tf.Tensor, tf.Tensor, tf.Tensor], self.encoder(x))
         x_recon = cast(tf.Tensor, self.decoder(enc))
         recon_features = self.recon_features(x, x_recon)
         z = cast(tf.Tensor, tf.concat([enc, recon_features], -1))

dataeval/_internal/models/tensorflow/gmm.py CHANGED Viewed

@@ -6,7 +6,9 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
-from typing import NamedTuple, Tuple
+from __future__ import annotations
+from typing import NamedTuple
 import numpy as np
 import tensorflow as tf
@@ -75,7 +77,7 @@ def gmm_energy(
     z: tf.Tensor,
     params: GaussianMixtureModelParams,
     return_mean: bool = True,
-) -> Tuple[tf.Tensor, tf.Tensor]:
+) -> tuple[tf.Tensor, tf.Tensor]:
     """
     Compute sample energy from Gaussian Mixture Model.

dataeval/_internal/models/tensorflow/losses.py CHANGED Viewed

@@ -6,11 +6,13 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
-from typing import Literal, Optional, Union, cast
+from __future__ import annotations
+from typing import Literal, cast
-import numpy as np
 import tensorflow as tf
 from keras.layers import Flatten
+from numpy.typing import NDArray
 from tensorflow_probability.python.distributions.mvn_diag import MultivariateNormalDiag
 from tensorflow_probability.python.distributions.mvn_tril import MultivariateNormalTriL
 from tensorflow_probability.python.stats import covariance
@@ -20,27 +22,29 @@ from dataeval._internal.models.tensorflow.gmm import gmm_energy, gmm_params
 class Elbo:
     """
-    Compute ELBO loss. The covariance matrix can be specified by passing the full covariance matrix, the matrix
+    Compute ELBO loss.
+    The covariance matrix can be specified by passing the full covariance matrix, the matrix
     diagonal, or a scale identity multiplier. Only one of these should be specified. If none are specified, the
     identity matrix is used.
     Parameters
     ----------
-    cov_type
+    cov_type : Union[Literal["cov_full", "cov_diag"], float], default 1.0
         Full covariance matrix, diagonal variance matrix, or scale identity multiplier.
-    x
+    x : ArrayLike, optional - default None
         Dataset used to calculate the covariance matrix.  Required for full and diagonal covariance matrix types.
     """
     def __init__(
         self,
-        cov_type: Union[Literal["cov_full", "cov_diag"], float] = 1.0,
-        x: Optional[Union[tf.Tensor, np.ndarray]] = None,
+        cov_type: Literal["cov_full", "cov_diag"] | float = 1.0,
+        x: tf.Tensor | NDArray | None = None,
     ):
         if isinstance(cov_type, float):
             self.cov = ("sim", cov_type)
         elif cov_type in ["cov_full", "cov_diag"]:
-            x_np: np.ndarray = x.numpy() if tf.is_tensor(x) else x  # type: ignore
+            x_np: NDArray = x.numpy() if tf.is_tensor(x) else x  # type: ignore
             cov = covariance(x_np.reshape(x_np.shape[0], -1))  # type: ignore py38
             if cov_type == "cov_diag":  # infer standard deviation from covariance matrix
                 cov = tf.math.sqrt(tf.linalg.diag_part(cov))
@@ -67,13 +71,13 @@ class LossGMM:
     Parameters
     ----------
-    w_recon
+    w_recon : float, default 1e-7
         Weight on elbo loss term.
-    w_energy
+    w_energy : float, default 0.1
         Weight on sample energy loss term.
-    w_cov_diag
+    w_cov_diag : float, default 0.005
         Weight on covariance regularizing loss term.
-    elbo
+    elbo : Elbo, optional - default None
         ELBO loss function used to calculate w_recon.
     """
@@ -82,7 +86,7 @@ class LossGMM:
         w_recon: float = 1e-7,
         w_energy: float = 0.1,
         w_cov_diag: float = 0.005,
-        elbo: Optional[Elbo] = None,
+        elbo: Elbo | None = None,
     ):
         self.w_recon = w_recon
         self.w_energy = w_energy

dataeval/_internal/models/tensorflow/pixelcnn.py CHANGED Viewed

@@ -8,9 +8,10 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 import functools
 import warnings
-from typing import Optional
 import keras
 import numpy as np
@@ -238,47 +239,47 @@ class PixelCNN(distribution.Distribution):
     Parameters
     ----------
-    image_shape
+    image_shape : tuple
         3D `TensorShape` or tuple for the `[height, width, channels]` dimensions of the image.
-    conditional_shape
+    conditional_shape : tuple, optional - default None
         `TensorShape` or tuple for the shape of the conditional input, or `None` if there is no conditional input.
-    num_resnet
+    num_resnet : int, default 5
         The number of layers (shown in Figure 2 of [2]) within each highest-level block of Figure 2 of [1].
-    num_hierarchies
+    num_hierarchies : int, default 3
         The number of highest-level blocks (separated by expansions/contractions of dimensions in Figure 2 of [1].)
-    num_filters
+    num_filters : int, default 160
         The number of convolutional filters.
-    num_logistic_mix
+    num_logistic_mix : int, default 10
         Number of components in the logistic mixture distribution.
-    receptive_field_dims
+    receptive_field_dims tuple, default (3, 3)
         Height and width in pixels of the receptive field of the convolutional layers above and to the left
         of a given pixel. The width (second element of the tuple) should be odd. Figure 1 (middle) of [2]
         shows a receptive field of (3, 5) (the row containing the current pixel is included in the height).
         The default of (3, 3) was used to produce the results in [1].
-    dropout_p
+    dropout_p : float, default 0.0
         The dropout probability. Should be between 0 and 1.
-    resnet_activation
+    resnet_activation : str, default "concat_elu"
         The type of activation to use in the resnet blocks. May be 'concat_elu', 'elu', or 'relu'.
-    l2_weight
+    l2_weight : float, default 0.0
         The L2 regularization weight.
-    use_weight_norm
+    use_weight_norm : bool, default True
         If `True` then use weight normalization (works only in Eager mode).
-    use_data_init
+    use_data_init : bool, default True
         If `True` then use data-dependent initialization (has no effect if `use_weight_norm` is `False`).
-    high
+    high : int, default 255
         The maximum value of the input data (255 for an 8-bit image).
-    low
+    low : int, default 0
         The minimum value of the input data.
-    dtype
+    dtype : tensorflow dtype, default tf.float32
         Data type of the `Distribution`.
-    name
+    name : str, default "PixelCNN"
         The name of the `Distribution`.
     """
     def __init__(
         self,
         image_shape: tuple,
-        conditional_shape: Optional[tuple] = None,
+        conditional_shape: tuple | None = None,
         num_resnet: int = 5,
         num_hierarchies: int = 3,
         num_filters: int = 160,

dataeval/_internal/models/tensorflow/trainer.py CHANGED Viewed

@@ -6,20 +6,23 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
-from typing import Callable, Iterable, Optional, Tuple, cast
+from __future__ import annotations
+from typing import Callable, Iterable, cast
 import keras
 import numpy as np
 import tensorflow as tf
+from numpy.typing import NDArray
 def trainer(
     model: keras.Model,
-    x_train: np.ndarray,
-    y_train: Optional[np.ndarray] = None,
-    loss_fn: Optional[Callable[..., tf.Tensor]] = None,
+    x_train: NDArray,
+    y_train: NDArray | None = None,
+    loss_fn: Callable[..., tf.Tensor] | None = None,
     optimizer: keras.optimizers.Optimizer = keras.optimizers.Adam,
-    preprocess_fn: Optional[Callable[[tf.Tensor], tf.Tensor]] = None,
+    preprocess_fn: Callable[[tf.Tensor], tf.Tensor] | None = None,
     epochs: int = 20,
     reg_loss_fn: Callable[[keras.Model], tf.Tensor] = (lambda _: cast(tf.Tensor, tf.Variable(0, dtype=tf.float32))),
     batch_size: int = 64,
@@ -69,14 +72,14 @@ def trainer(
             dataset.on_epoch_end()  # type: ignore py39
         loss_val_ma = 0.0
         for step, data in enumerate(dataset):
-            x, y = cast(Tuple[tf.Tensor, Optional[tf.Tensor]], data if isinstance(data, tuple) else (data, None))
+            x, y = data if isinstance(data, tuple) else (data, None)
             if isinstance(preprocess_fn, Callable):
                 x = preprocess_fn(x)
             with tf.GradientTape() as tape:
                 y_hat = model(x)
                 y = x if y is None else y
                 if isinstance(loss_fn, Callable):
-                    args = [y] + list(y_hat) if isinstance(y_hat, Tuple) else [y, y_hat]
+                    args = [y] + list(y_hat) if isinstance(y_hat, tuple) else [y, y_hat]
                     loss = loss_fn(*args)
                 else:
                     loss = cast(tf.Tensor, tf.constant(0.0, dtype=tf.float32))

dataeval/_internal/models/tensorflow/utils.py CHANGED Viewed

@@ -6,8 +6,10 @@ Original code Copyright (c) 2023 Seldon Technologies Ltd
 Licensed under Apache Software License (Apache 2.0)
 """
+from __future__ import annotations
 import math
-from typing import Callable, Optional, Tuple, Type, Union, cast
+from typing import Callable, Union, cast
 import keras as keras
 import numpy as np
@@ -21,6 +23,7 @@ from keras.layers import (
     InputLayer,
     Reshape,
 )
+from numpy.typing import NDArray
 from tensorflow._api.v2.nn import relu, softmax, tanh
 from dataeval._internal.models.tensorflow.autoencoder import AE, AEGMM, VAE, VAEGMM
@@ -28,12 +31,12 @@ from dataeval._internal.models.tensorflow.pixelcnn import PixelCNN
 def predict_batch(
-    x: Union[list, np.ndarray, tf.Tensor],
-    model: Union[Callable, keras.Model],
+    x: list | NDArray | tf.Tensor,
+    model: Callable | keras.Model,
     batch_size: int = int(1e10),
-    preprocess_fn: Optional[Callable] = None,
-    dtype: Union[Type[np.generic], tf.DType] = np.float32,
-) -> Union[np.ndarray, tf.Tensor, tuple, list]:
+    preprocess_fn: Callable | None = None,
+    dtype: type[np.generic] | tf.DType = np.float32,
+) -> NDArray | tf.Tensor | tuple | list:
     """
     Make batch predictions on a model.
@@ -58,7 +61,7 @@ def predict_batch(
     n_minibatch = int(np.ceil(n / batch_size))
     return_np = not isinstance(dtype, tf.DType)
     return_list = False
-    preds: Union[list, tuple] = []
+    preds: list | tuple = []
     for i in range(n_minibatch):
         istart, istop = i * batch_size, min((i + 1) * batch_size, n)
         x_batch = x[istart:istop]  # type: ignore
@@ -80,7 +83,7 @@ def predict_batch(
         else:
             raise TypeError(
                 f"Model output type {type(preds_tmp)} not supported. The model output "
-                f"type needs to be one of list, tuple, np.ndarray or tf.Tensor."
+                f"type needs to be one of list, tuple, NDArray or tf.Tensor."
             )
     concat = np.concatenate if return_np else tf.concat
     out = cast(
@@ -92,7 +95,7 @@ def predict_batch(
     return out
-def _get_default_encoder_net(input_shape: Tuple[int, int, int], encoding_dim: int):
+def _get_default_encoder_net(input_shape: tuple[int, int, int], encoding_dim: int):
     return Sequential(
         [
             InputLayer(input_shape=input_shape),
@@ -105,7 +108,7 @@ def _get_default_encoder_net(input_shape: Tuple[int, int, int], encoding_dim: in
     )
-def _get_default_decoder_net(input_shape: Tuple[int, int, int], encoding_dim: int):
+def _get_default_decoder_net(input_shape: tuple[int, int, int], encoding_dim: int):
     return Sequential(
         [
             InputLayer(input_shape=(encoding_dim,)),
@@ -121,26 +124,26 @@ def _get_default_decoder_net(input_shape: Tuple[int, int, int], encoding_dim: in
 def create_model(
-    model_type: Union[AE, AEGMM, PixelCNN, VAE, VAEGMM],
-    input_shape: Tuple[int, int, int],
-    encoding_dim: Optional[int] = None,
-    n_gmm: Optional[int] = None,
-    gmm_latent_dim: Optional[int] = None,
+    model_type: AE | AEGMM | PixelCNN | VAE | VAEGMM,
+    input_shape: tuple[int, int, int],
+    encoding_dim: int | None = None,
+    n_gmm: int | None = None,
+    gmm_latent_dim: int | None = None,
 ):
     """
     Create a default model for the specified model type.
     Parameters
     ----------
-    model_type
+    model_type : Union[AE, AEGMM, PixelCNN, VAE, VAEGMM]
         The model type to create.
-    input_shape
+    input_shape : Tuple[int, int, int]
         The input shape of the data used.
-    encoding_dim
+    encoding_dim : int, optional - default None
         The target encoding dimensionality.
-    n_gmm
+    n_gmm : int, optional - default None
         Number of components used in the GMM layer.
-    gmm_latent_dim
+    gmm_latent_dim : int, optional - default None
         Latent dimensionality of the GMM layer.
     """
     input_dim = math.prod(input_shape)

dataeval/_internal/output.py ADDED Viewed

@@ -0,0 +1,85 @@
+from __future__ import annotations
+import inspect
+from datetime import datetime, timezone
+from functools import wraps
+import numpy as np
+from dataeval import __version__
+class OutputMetadata:
+    _name: str
+    _execution_time: str
+    _execution_duration: float
+    _arguments: dict[str, str]
+    _state: dict[str, str]
+    _version: str
+    def dict(self) -> dict:
+        return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
+    def meta(self) -> dict:
+        return {k.removeprefix("_"): v for k, v in self.__dict__.items() if k.startswith("_")}
+def set_metadata(module_name: str = "", state_attr: list[str] | None = None):
+    def decorator(fn):
+        @wraps(fn)
+        def wrapper(*args, **kwargs):
+            def fmt(v):
+                if np.isscalar(v):
+                    return v
+                if hasattr(v, "shape"):
+                    return f"{v.__class__.__name__}: shape={getattr(v, 'shape')}"
+                if hasattr(v, "__len__"):
+                    return f"{v.__class__.__name__}: len={len(v)}"
+                return f"{v.__class__.__name__}"
+            time = datetime.now(timezone.utc)
+            result = fn(*args, **kwargs)
+            duration = (datetime.now(timezone.utc) - time).total_seconds()
+            fn_params = inspect.signature(fn).parameters
+            # set all params with defaults then update params with mapped arguments and explicit keyword args
+            arguments = {k: None if v.default is inspect.Parameter.empty else v.default for k, v in fn_params.items()}
+            arguments.update(zip(fn_params, args))
+            arguments.update(kwargs)
+            arguments = {k: fmt(v) for k, v in arguments.items()}
+            state = (
+                {k: fmt(getattr(args[0], k)) for k in state_attr if "self" in arguments}
+                if "self" in arguments and state_attr
+                else {}
+            )
+            name = args[0].__class__.__name__ if "self" in arguments else fn.__name__
+            metadata = {
+                "_name": f"{module_name}.{name}",
+                "_execution_time": time,
+                "_execution_duration": duration,
+                "_arguments": {k: v for k, v in arguments.items() if k != "self"},
+                "_state": state,
+                "_version": __version__,
+            }
+            for k, v in metadata.items():
+                object.__setattr__(result, k, v)
+            return result
+        return wrapper
+    return decorator
+def populate_defaults(d: dict, c: type) -> dict:
+    def default(t):
+        t = (
+            t if isinstance(t, str) else t._name if hasattr(t, "_name") else t.__name__
+        ).lower()  # py3.9 : _name, py3.10 : __name__
+        if t.startswith("dict"):
+            return {}
+        if t.startswith("list"):
+            return []
+        if t.startswith("ndarray"):
+            return np.array([])
+        raise TypeError("Unrecognized annotation type")
+    return {k: d[k] if k in d else default(t) for k, t in c.__annotations__.items()}

dataeval/_internal/utils.py CHANGED Viewed

@@ -1,10 +1,12 @@
+from __future__ import annotations
 from collections import defaultdict
-from typing import Any, Dict, List
+from typing import Any
 from torch.utils.data import Dataset
-def read_dataset(dataset: Dataset) -> List[List[Any]]:
+def read_dataset(dataset: Dataset) -> list[list[Any]]:
     """
     Extract information from a dataset at each index into a individual lists of each information position
@@ -51,7 +53,7 @@ def read_dataset(dataset: Dataset) -> List[List[Any]]:
     True
     """
-    ddict: Dict[int, List] = defaultdict(list)
+    ddict: dict[int, list] = defaultdict(list)
     for data in dataset:
         # Convert to tuple if single return (e.g. images only)

dataeval 0.64.0__py3-none-any.whl → 0.66.0__py3-none-any.whl

dataeval 0.64.0py3-none-any.whl → 0.66.0py3-none-any.whl