PyPI - dataeval - Versions diffs - 0.73.1__py3-none-any.whl → 0.74.1__py3-none-any.whl - Mend

dataeval 0.73.1py3-none-any.whl → 0.74.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

dataeval/__init__.py +3 -9
dataeval/detectors/__init__.py +2 -10
dataeval/detectors/drift/base.py +3 -3
dataeval/detectors/drift/mmd.py +1 -1
dataeval/detectors/drift/torch.py +1 -101
dataeval/detectors/linters/clusterer.py +3 -3
dataeval/detectors/linters/duplicates.py +4 -4
dataeval/detectors/linters/outliers.py +4 -4
dataeval/detectors/ood/__init__.py +9 -9
dataeval/detectors/ood/{ae.py → ae_torch.py} +22 -27
dataeval/detectors/ood/base.py +63 -113
dataeval/detectors/ood/base_torch.py +109 -0
dataeval/detectors/ood/metadata_ks_compare.py +52 -14
dataeval/interop.py +1 -1
dataeval/metrics/bias/__init__.py +3 -0
dataeval/metrics/bias/balance.py +73 -70
dataeval/metrics/bias/coverage.py +4 -4
dataeval/metrics/bias/diversity.py +67 -136
dataeval/metrics/bias/metadata_preprocessing.py +285 -0
dataeval/metrics/bias/metadata_utils.py +229 -0
dataeval/metrics/bias/parity.py +51 -161
dataeval/metrics/estimators/ber.py +3 -3
dataeval/metrics/estimators/divergence.py +3 -3
dataeval/metrics/estimators/uap.py +3 -3
dataeval/metrics/stats/base.py +2 -2
dataeval/metrics/stats/boxratiostats.py +1 -1
dataeval/metrics/stats/datasetstats.py +6 -6
dataeval/metrics/stats/dimensionstats.py +1 -1
dataeval/metrics/stats/hashstats.py +1 -1
dataeval/metrics/stats/labelstats.py +3 -3
dataeval/metrics/stats/pixelstats.py +1 -1
dataeval/metrics/stats/visualstats.py +1 -1
dataeval/output.py +77 -53
dataeval/utils/__init__.py +1 -7
dataeval/utils/gmm.py +26 -0
dataeval/utils/metadata.py +29 -9
dataeval/utils/torch/gmm.py +98 -0
dataeval/utils/torch/models.py +192 -0
dataeval/utils/torch/trainer.py +84 -5
dataeval/utils/torch/utils.py +107 -1
dataeval/workflows/sufficiency.py +4 -4
{dataeval-0.73.1.dist-info → dataeval-0.74.1.dist-info}/METADATA +3 -9
dataeval-0.74.1.dist-info/RECORD +65 -0
dataeval/detectors/ood/aegmm.py +0 -66
dataeval/detectors/ood/llr.py +0 -302
dataeval/detectors/ood/vae.py +0 -97
dataeval/detectors/ood/vaegmm.py +0 -75
dataeval/metrics/bias/metadata.py +0 -440
dataeval/utils/lazy.py +0 -26
dataeval/utils/tensorflow/__init__.py +0 -19
dataeval/utils/tensorflow/_internal/gmm.py +0 -123
dataeval/utils/tensorflow/_internal/loss.py +0 -121
dataeval/utils/tensorflow/_internal/models.py +0 -1394
dataeval/utils/tensorflow/_internal/trainer.py +0 -114
dataeval/utils/tensorflow/_internal/utils.py +0 -256
dataeval/utils/tensorflow/loss/__init__.py +0 -11
dataeval-0.73.1.dist-info/RECORD +0 -73
{dataeval-0.73.1.dist-info → dataeval-0.74.1.dist-info}/LICENSE.txt +0 -0
{dataeval-0.73.1.dist-info → dataeval-0.74.1.dist-info}/WHEEL +0 -0

dataeval/metrics/bias/metadata_utils.py ADDED Viewed

@@ -0,0 +1,229 @@
+from __future__ import annotations
+__all__ = []
+import contextlib
+from typing import Any
+import numpy as np
+from numpy.typing import ArrayLike, NDArray
+from dataeval.interop import to_numpy
+with contextlib.suppress(ImportError):
+    from matplotlib.figure import Figure
+def get_counts(data: NDArray[np.int_], min_num_bins: int | None = None) -> NDArray[np.int_]:
+    """
+    Returns columnwise unique counts for discrete data.
+    Parameters
+    ----------
+    data : NDArray
+        Array containing integer values for metadata factors
+    min_num_bins : int | None, default None
+        Minimum number of bins for bincount, helps force consistency across runs
+    Returns
+    -------
+    NDArray[np.int_]
+        Bin counts per column of data.
+    """
+    max_value = data.max() + 1 if min_num_bins is None else min_num_bins
+    cnt_array = np.zeros((max_value, data.shape[1]), dtype=np.int_)
+    for idx in range(data.shape[1]):
+        cnt_array[:, idx] = np.bincount(data[:, idx], minlength=max_value)
+    return cnt_array
+def heatmap(
+    data: ArrayLike,
+    row_labels: list[str] | ArrayLike,
+    col_labels: list[str] | ArrayLike,
+    xlabel: str = "",
+    ylabel: str = "",
+    cbarlabel: str = "",
+) -> Figure:
+    """
+    Plots a formatted heatmap
+    Parameters
+    ----------
+    data : NDArray
+        Array containing numerical values for factors to plot
+    row_labels : ArrayLike
+        List/Array containing the labels for rows in the histogram
+    col_labels : ArrayLike
+        List/Array containing the labels for columns in the histogram
+    xlabel : str, default ""
+        X-axis label
+    ylabel : str, default ""
+        Y-axis label
+    cbarlabel : str, default ""
+        Label for the colorbar
+    Returns
+    -------
+    matplotlib.figure.Figure
+        Formatted heatmap
+    """
+    import matplotlib.pyplot as plt
+    from matplotlib.ticker import FuncFormatter
+    np_data = to_numpy(data)
+    rows = row_labels if isinstance(row_labels, list) else to_numpy(row_labels)
+    cols = col_labels if isinstance(col_labels, list) else to_numpy(col_labels)
+    fig, ax = plt.subplots(figsize=(10, 10))
+    # Plot the heatmap
+    im = ax.imshow(np_data, vmin=0, vmax=1.0)
+    # Create colorbar
+    cbar = fig.colorbar(im, shrink=0.5)
+    cbar.set_ticks([0.0, 0.25, 0.5, 0.75, 1.0])
+    cbar.set_ticklabels(["0.0", "0.25", "0.5", "0.75", "1.0"])
+    cbar.set_label(cbarlabel, loc="center")
+    # Show all ticks and label them with the respective list entries.
+    ax.set_xticks(np.arange(np_data.shape[1]), labels=cols)
+    ax.set_yticks(np.arange(np_data.shape[0]), labels=rows)
+    ax.tick_params(top=False, bottom=True, labeltop=False, labelbottom=True)
+    # Rotate the tick labels and set their alignment.
+    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
+    # Turn spines off and create white grid.
+    ax.spines[:].set_visible(False)
+    ax.set_xticks(np.arange(np_data.shape[1] + 1) - 0.5, minor=True)
+    ax.set_yticks(np.arange(np_data.shape[0] + 1) - 0.5, minor=True)
+    ax.grid(which="minor", color="w", linestyle="-", linewidth=3)
+    ax.tick_params(which="minor", bottom=False, left=False)
+    if xlabel:
+        ax.set_xlabel(xlabel)
+    if ylabel:
+        ax.set_ylabel(ylabel)
+    valfmt = FuncFormatter(format_text)
+    # Normalize the threshold to the images color range.
+    threshold = im.norm(1.0) / 2.0
+    # Set default alignment to center, but allow it to be
+    # overwritten by textkw.
+    kw = {"horizontalalignment": "center", "verticalalignment": "center"}
+    # Loop over the data and create a `Text` for each "pixel".
+    # Change the text's color depending on the data.
+    textcolors = ("white", "black")
+    texts = []
+    for i in range(np_data.shape[0]):
+        for j in range(np_data.shape[1]):
+            kw.update(color=textcolors[int(im.norm(np_data[i, j]) > threshold)])
+            text = im.axes.text(j, i, valfmt(np_data[i, j], None), **kw)  # type: ignore
+            texts.append(text)
+    fig.tight_layout()
+    return fig
+# Function to define how the text is displayed in the heatmap
+def format_text(*args: str) -> str:
+    """
+    Helper function to format text for heatmap()
+    Parameters
+    ----------
+    *args : tuple[str, str]
+        Text to be formatted. Second element is ignored, but is a
+        mandatory pass-through argument as per matplotlib.ticket.FuncFormatter
+    Returns
+    -------
+    str
+        Formatted text
+    """
+    x = args[0]
+    return f"{x:.2f}".replace("0.00", "0").replace("0.", ".").replace("nan", "")
+def diversity_bar_plot(labels: NDArray[Any], bar_heights: NDArray[Any]) -> Figure:
+    """
+    Plots a formatted bar plot
+    Parameters
+    ----------
+    labels : NDArray
+        Array containing the labels for each bar
+    bar_heights : NDArray
+        Array containing the values for each bar
+    Returns
+    -------
+    matplotlib.figure.Figure
+        Bar plot figure
+    """
+    import matplotlib.pyplot as plt
+    fig, ax = plt.subplots(figsize=(10, 10))
+    ax.bar(labels, bar_heights)
+    ax.set_xlabel("Factors")
+    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
+    fig.tight_layout()
+    return fig
+def coverage_plot(images: NDArray[Any], num_images: int) -> Figure:
+    """
+    Creates a single plot of all of the provided images
+    Parameters
+    ----------
+    images : NDArray
+        Array containing only the desired images to plot
+    Returns
+    -------
+    matplotlib.figure.Figure
+        Plot of all provided images
+    """
+    import matplotlib.pyplot as plt
+    num_images = min(num_images, len(images))
+    if images.ndim == 4:
+        images = np.moveaxis(images, 1, -1)
+    elif images.ndim == 3:
+        images = np.repeat(images[:, :, :, np.newaxis], 3, axis=-1)
+    else:
+        raise ValueError(
+            f"Expected a (N,C,H,W) or a (N, H, W) set of images, but got a {images.ndim}-dimensional set of images."
+        )
+    rows = int(np.ceil(num_images / 3))
+    fig, axs = plt.subplots(rows, 3, figsize=(9, 3 * rows))
+    if rows == 1:
+        for j in range(3):
+            if j >= len(images):
+                continue
+            axs[j].imshow(images[j])
+            axs[j].axis("off")
+    else:
+        for i in range(rows):
+            for j in range(3):
+                i_j = i * 3 + j
+                if i_j >= len(images):
+                    continue
+                axs[i, j].imshow(images[i_j])
+                axs[i, j].axis("off")
+    fig.tight_layout()
+    return fig

dataeval/metrics/bias/parity.py CHANGED Viewed

@@ -4,21 +4,22 @@ __all__ = ["ParityOutput", "parity", "label_parity"]
 import warnings
 from dataclasses import dataclass
-from typing import Any, Generic, Mapping, TypeVar
+from typing import Any, Generic, TypeVar
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
-from scipy.stats import chi2_contingency, chisquare
+from scipy.stats import chisquare
+from scipy.stats.contingency import chi2_contingency, crosstab
-from dataeval.interop import to_numpy
-from dataeval.metrics.bias.metadata import CLASS_LABEL, preprocess_metadata
-from dataeval.output import OutputMetadata, set_metadata
+from dataeval.interop import as_numpy, to_numpy
+from dataeval.metrics.bias.metadata_preprocessing import MetadataOutput
+from dataeval.output import Output, set_metadata
 TData = TypeVar("TData", np.float64, NDArray[np.float64])
 @dataclass(frozen=True)
-class ParityOutput(Generic[TData], OutputMetadata):
+class ParityOutput(Generic[TData], Output):
     """
     Output class for :func:`parity` and :func:`label_parity` :term:`bias<Bias>` metrics
@@ -37,97 +38,6 @@ class ParityOutput(Generic[TData], OutputMetadata):
     metadata_names: list[str] | None
-def digitize_factor_bins(continuous_values: NDArray[Any], bins: int, factor_name: str) -> NDArray[np.intp]:
-    """
-    Digitizes a list of values into a given number of bins.
-    Parameters
-    ----------
-    continuous_values : NDArray
-        The values to be digitized.
-    bins : int
-        The number of bins for the discrete values that continuous_values will be digitized into.
-    factor_name : str
-        The name of the factor to be digitized.
-    Returns
-    -------
-    NDArray[np.intp]
-        The digitized values
-    """
-    if not np.all([np.issubdtype(type(n), np.number) for n in continuous_values]):
-        raise TypeError(
-            f"Encountered a non-numeric value for factor {factor_name}, but the factor"
-            " was specified to be continuous. Ensure all occurrences of this factor are numeric types,"
-            f" or do not specify {factor_name} as a continuous factor."
-        )
-    _, bin_edges = np.histogram(continuous_values, bins=bins)
-    bin_edges[-1] = np.inf
-    bin_edges[0] = -np.inf
-    return np.digitize(continuous_values, bin_edges)
-def format_discretize_factors(
-    data: NDArray[Any],
-    names: list[str],
-    is_categorical: list[bool],
-    continuous_factor_bincounts: Mapping[str, int] | None,
-) -> dict[str, NDArray[Any]]:
-    """
-    Sets up the internal list of metadata factors.
-    Parameters
-    ----------
-    data : NDArray
-        The dataset factors, which are per-image attributes including class label and metadata.
-    names : list[str]
-        The class label
-    continuous_factor_bincounts : Mapping[str, int] or None
-        The factors in data_factors that have continuous values and the array of bin counts to
-        discretize values into. All factors are treated as having discrete values unless they
-        are specified as keys in this dictionary. Each element of this array must occur as a key
-        in data_factors.
-    Returns
-    -------
-    Dict[str, NDArray]
-        - Intrinsic per-image metadata information with the formatting that input data_factors uses.
-          Each key is a metadata factor, whose value is the discrete per-image factor values.
-    """
-    if continuous_factor_bincounts:
-        invalid_keys = set(continuous_factor_bincounts.keys()) - set(names)
-        if invalid_keys:
-            raise KeyError(
-                f"The continuous factor(s) {invalid_keys} do not exist in data_factors. Delete these "
-                "keys from `continuous_factor_names` or add corresponding entries to `data_factors`."
-            )
-    warn = []
-    metadata_factors = {}
-    for i, name in enumerate(names):
-        if name == CLASS_LABEL:
-            continue
-        if continuous_factor_bincounts and name in continuous_factor_bincounts:
-            metadata_factors[name] = digitize_factor_bins(data[:, i], continuous_factor_bincounts[name], name)
-        elif not is_categorical[i]:
-            warn.append(name)
-            metadata_factors[name] = data[:, i]
-        else:
-            metadata_factors[name] = data[:, i]
-    if warn:
-        warnings.warn(
-            f"The following factors appear to be continuous but did not have the desired number of bins specified: \n\
-            {warn}",
-            UserWarning,
-        )
-    return metadata_factors
 def normalize_expected_dist(expected_dist: NDArray[Any], observed_dist: NDArray[Any]) -> NDArray[Any]:
     """
     Normalize the expected label distribution to match the total number of labels in the observed distribution.
@@ -206,7 +116,7 @@ def validate_dist(label_dist: NDArray[Any], label_name: str) -> None:
         )
-@set_metadata()
+@set_metadata
 def label_parity(
     expected_labels: ArrayLike,
     observed_labels: ArrayLike,
@@ -294,32 +204,20 @@ def label_parity(
     return ParityOutput(cs, p, None)
-@set_metadata()
-def parity(
-    class_labels: ArrayLike,
-    metadata: Mapping[str, ArrayLike],
-    continuous_factor_bincounts: Mapping[str, int] | None = None,
-) -> ParityOutput[NDArray[np.float64]]:
+@set_metadata
+def parity(metadata: MetadataOutput) -> ParityOutput[NDArray[np.float64]]:
     """
-    Calculate chi-square statistics to assess the relationship between multiple factors
+    Calculate chi-square statistics to assess the linear relationship between multiple factors
     and class labels.
     This function computes the chi-square statistic for each metadata factor to determine if there is
-    a significant relationship between the factor values and class labels. The function handles both categorical
-    and discretized continuous factors.
+    a significant relationship between the factor values and class labels. The chi-square statistic is
+    only valid for linear relationships. If non-linear relationships exist, use `balance`.
     Parameters
     ----------
-    class_labels : ArrayLike
-        List of class labels for each image
-    metadata : Mapping[str, ArrayLike]
-        The dataset factors, which are per-image metadata attributes.
-        Each key of dataset_factors is a factor, whose value is the per-image factor values.
-    continuous_factor_bincounts : Mapping[str, int] or None, default None
-        A dictionary specifying the number of bins for discretizing the continuous factors.
-        The keys should correspond to the names of continuous factors in `metadata`,
-        and the values should be the number of bins to use for discretization.
-        If not provided, no discretization is applied.
+    metadata : MetadataOutput
+        Output after running `metadata_preprocessing`
     Returns
     -------
@@ -333,74 +231,66 @@ def parity(
     Warning
         If any cell in the contingency matrix has a value between 0 and 5, a warning is issued because this can
         lead to inaccurate chi-square calculations. It is recommended to ensure that each label co-occurs with
-        factor values either 0 times or at least 5 times. Alternatively, continuous-valued factors can be digitized
-        into fewer bins.
+        factor values either 0 times or at least 5 times.
     Note
     ----
-    - Each key of the ``continuous_factor_bincounts`` dictionary must occur as a key in data_factors.
     - A high score with a low p-value suggests that a metadata factor is strongly correlated with a class label.
     - The function creates a contingency matrix for each factor, where each entry represents the frequency of a
       specific factor value co-occurring with a particular class label.
     - Rows containing only zeros in the contingency matrix are removed before performing the chi-square test
       to prevent errors in the calculation.
+    See Also
+    --------
+    balance
     Examples
     --------
     Randomly creating some "continuous" and categorical variables using ``np.random.default_rng``
     >>> labels = np_random_gen.choice([0, 1, 2], (100))
-    >>> metadata = {
-    ...     "age": np_random_gen.choice([25, 30, 35, 45], (100)),
-    ...     "income": np_random_gen.choice([50000, 65000, 80000], (100)),
-    ...     "gender": np_random_gen.choice(["M", "F"], (100)),
-    ... }
+    >>> metadata_dict = [
+    ...     {
+    ...         "age": list(np_random_gen.choice([25, 30, 35, 45], (100))),
+    ...         "income": list(np_random_gen.choice([50000, 65000, 80000], (100))),
+    ...         "gender": list(np_random_gen.choice(["M", "F"], (100))),
+    ...     }
+    ... ]
     >>> continuous_factor_bincounts = {"age": 4, "income": 3}
-    >>> parity(labels, metadata, continuous_factor_bincounts)
+    >>> metadata = metadata_preprocessing(metadata_dict, labels, continuous_factor_bincounts)
+    >>> parity(metadata)
     ParityOutput(score=array([7.35731943, 5.46711299, 0.51506212]), p_value=array([0.28906231, 0.24263543, 0.77295762]), metadata_names=['age', 'income', 'gender'])
     """  # noqa: E501
-    if len(np.shape(class_labels)) > 1:
-        raise ValueError(
-            f"Got class labels with {len(np.shape(class_labels))}-dimensional",
-            f" shape {np.shape(class_labels)}, but expected a 1-dimensional array.",
-        )
-    data, names, is_categorical, _ = preprocess_metadata(class_labels, metadata)
-    factors = format_discretize_factors(data, names, is_categorical, continuous_factor_bincounts)
-    # unique class labels
-    class_idx = names.index(CLASS_LABEL)
-    u_cls = np.unique(data[:, class_idx])
-    chi_scores = np.zeros(len(factors))
-    p_values = np.zeros(len(factors))
+    chi_scores = np.zeros(metadata.discrete_data.shape[1])
+    p_values = np.zeros_like(chi_scores)
     not_enough_data = {}
-    for i, (current_factor_name, factor_values) in enumerate(factors.items()):
-        unique_factor_values = np.unique(factor_values)
-        contingency_matrix = np.zeros((len(unique_factor_values), u_cls.size))
+    for i, col_data in enumerate(metadata.discrete_data.T):
         # Builds a contingency matrix where entry at index (r,c) represents
         # the frequency of current_factor_name achieving value unique_factor_values[r]
         # at a data point with class c.
-        # TODO: Vectorize this nested for loop
-        for fi, factor_value in enumerate(unique_factor_values):
-            for label in u_cls:
-                with_both = np.bitwise_and((data[:, class_idx] == label), factor_values == factor_value)
-                contingency_matrix[fi, label] = np.sum(with_both)
-                if 0 < contingency_matrix[fi, label] < 5:
-                    if current_factor_name not in not_enough_data:
-                        not_enough_data[current_factor_name] = {}
-                    if factor_value not in not_enough_data[current_factor_name]:
-                        not_enough_data[current_factor_name][factor_value] = []
-                    not_enough_data[current_factor_name][factor_value].append(
-                        (label, int(contingency_matrix[fi, label]))
-                    )
+        results = crosstab(col_data, metadata.class_labels)
+        contingency_matrix = as_numpy(results.count)  # type: ignore
+        # Determines if any frequencies are too low
+        counts = np.nonzero(contingency_matrix < 5)
+        unique_factor_values = np.unique(col_data)
+        current_factor_name = metadata.discrete_factor_names[i]
+        for int_factor, int_class in zip(counts[0], counts[1]):
+            if contingency_matrix[int_factor, int_class] > 0:
+                factor_category = unique_factor_values[int_factor]
+                if current_factor_name not in not_enough_data:
+                    not_enough_data[current_factor_name] = {}
+                if factor_category not in not_enough_data[current_factor_name]:
+                    not_enough_data[current_factor_name][factor_category] = []
+                not_enough_data[current_factor_name][factor_category].append(
+                    (metadata.class_names[int_class], int(contingency_matrix[int_factor, int_class]))
+                )
         # This deletes rows containing only zeros,
         # because scipy.stats.chi2_contingency fails when there are rows containing only zeros.
         rowsums = np.sum(contingency_matrix, axis=1)
-        rowmask = np.where(rowsums)
+        rowmask = np.nonzero(rowsums)[0]
         contingency_matrix = contingency_matrix[rowmask]
         chi2, p, _, _ = chi2_contingency(contingency_matrix)
@@ -428,4 +318,4 @@ def parity(
             UserWarning,
         )
-    return ParityOutput(chi_scores, p_values, list(metadata.keys()))
+    return ParityOutput(chi_scores, p_values, metadata.discrete_factor_names)

dataeval/metrics/estimators/ber.py CHANGED Viewed

@@ -20,12 +20,12 @@ from scipy.sparse import coo_matrix
 from scipy.stats import mode
 from dataeval.interop import as_numpy
-from dataeval.output import OutputMetadata, set_metadata
+from dataeval.output import Output, set_metadata
 from dataeval.utils.shared import compute_neighbors, get_classes_counts, get_method, minimum_spanning_tree
 @dataclass(frozen=True)
-class BEROutput(OutputMetadata):
+class BEROutput(Output):
     """
     Output class for :func:`ber` estimator metric
@@ -114,7 +114,7 @@ def knn_lowerbound(value: float, classes: int, k: int) -> float:
     return ((classes - 1) / classes) * (1 - np.sqrt(max(0, 1 - ((classes / (classes - 1)) * value))))
-@set_metadata()
+@set_metadata
 def ber(images: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN", "MST"] = "KNN") -> BEROutput:
     """
     An estimator for Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` using FR or KNN test statistic basis

dataeval/metrics/estimators/divergence.py CHANGED Viewed

@@ -14,12 +14,12 @@ import numpy as np
 from numpy.typing import ArrayLike, NDArray
 from dataeval.interop import as_numpy
-from dataeval.output import OutputMetadata, set_metadata
+from dataeval.output import Output, set_metadata
 from dataeval.utils.shared import compute_neighbors, get_method, minimum_spanning_tree
 @dataclass(frozen=True)
-class DivergenceOutput(OutputMetadata):
+class DivergenceOutput(Output):
     """
     Output class for :func:`divergence` estimator metric
@@ -78,7 +78,7 @@ def divergence_fnn(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
     return errors
-@set_metadata()
+@set_metadata
 def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST"] = "FNN") -> DivergenceOutput:
     """
     Calculates the :term`divergence` and any errors between the datasets

dataeval/metrics/estimators/uap.py CHANGED Viewed

@@ -14,11 +14,11 @@ from numpy.typing import ArrayLike
 from sklearn.metrics import average_precision_score
 from dataeval.interop import as_numpy
-from dataeval.output import OutputMetadata, set_metadata
+from dataeval.output import Output, set_metadata
 @dataclass(frozen=True)
-class UAPOutput(OutputMetadata):
+class UAPOutput(Output):
     """
     Output class for :func:`uap` estimator metric
@@ -31,7 +31,7 @@ class UAPOutput(OutputMetadata):
     uap: float
-@set_metadata()
+@set_metadata
 def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
     """
     FR Test Statistic based estimate of the empirical mean precision for

dataeval/metrics/stats/base.py CHANGED Viewed

@@ -15,7 +15,7 @@ import tqdm
 from numpy.typing import ArrayLike, NDArray
 from dataeval.interop import to_numpy_iter
-from dataeval.output import OutputMetadata
+from dataeval.output import Output
 from dataeval.utils.image import normalize_image_shape, rescale
 DTYPE_REGEX = re.compile(r"NDArray\[np\.(.*?)\]")
@@ -65,7 +65,7 @@ class SourceIndex(NamedTuple):
 @dataclass(frozen=True)
-class BaseStatsOutput(OutputMetadata):
+class BaseStatsOutput(Output):
     """
     Attributes
     ----------

dataeval/metrics/stats/boxratiostats.py CHANGED Viewed

@@ -96,7 +96,7 @@ def calculate_ratios(key: str, box_stats: BaseStatsOutput, img_stats: BaseStatsO
     return out_stats
-@set_metadata()
+@set_metadata
 def boxratiostats(
     boxstats: TStatOutput,
     imgstats: TStatOutput,

dataeval/metrics/stats/datasetstats.py CHANGED Viewed

@@ -15,11 +15,11 @@ from dataeval.metrics.stats.dimensionstats import (
 from dataeval.metrics.stats.labelstats import LabelStatsOutput, labelstats
 from dataeval.metrics.stats.pixelstats import PixelStatsOutput, PixelStatsProcessor
 from dataeval.metrics.stats.visualstats import VisualStatsOutput, VisualStatsProcessor
-from dataeval.output import OutputMetadata, set_metadata
+from dataeval.output import Output, set_metadata
 @dataclass(frozen=True)
-class DatasetStatsOutput(OutputMetadata):
+class DatasetStatsOutput(Output):
     """
     Output class for :func:`datasetstats` stats metric
@@ -41,7 +41,7 @@ class DatasetStatsOutput(OutputMetadata):
     visualstats: VisualStatsOutput
     labelstats: LabelStatsOutput | None = None
-    def _outputs(self) -> list[OutputMetadata]:
+    def _outputs(self) -> list[Output]:
         return [s for s in (self.dimensionstats, self.pixelstats, self.visualstats, self.labelstats) if s is not None]
     def dict(self) -> dict[str, Any]:
@@ -54,7 +54,7 @@ class DatasetStatsOutput(OutputMetadata):
 @dataclass(frozen=True)
-class ChannelStatsOutput(OutputMetadata):
+class ChannelStatsOutput(Output):
     """
     Output class for :func:`channelstats` stats metric
@@ -84,7 +84,7 @@ class ChannelStatsOutput(OutputMetadata):
             raise ValueError("All StatsOutput classes must contain the same number of image sources.")
-@set_metadata()
+@set_metadata
 def datasetstats(
     images: Iterable[ArrayLike],
     bboxes: Iterable[ArrayLike] | None = None,
@@ -131,7 +131,7 @@ def datasetstats(
     return DatasetStatsOutput(*outputs, labelstats=labelstats(labels) if labels else None)  # type: ignore
-@set_metadata()
+@set_metadata
 def channelstats(
     images: Iterable[ArrayLike],
     bboxes: Iterable[ArrayLike] | None = None,

dataeval/metrics/stats/dimensionstats.py CHANGED Viewed

@@ -73,7 +73,7 @@ class DimensionStatsProcessor(StatsProcessor[DimensionStatsOutput]):
     }
-@set_metadata()
+@set_metadata
 def dimensionstats(
     images: Iterable[ArrayLike],
     bboxes: Iterable[ArrayLike] | None = None,

dataeval 0.73.1__py3-none-any.whl → 0.74.1__py3-none-any.whl

dataeval 0.73.1py3-none-any.whl → 0.74.1py3-none-any.whl