PyPI - dataeval - Versions diffs - 0.75.0__py3-none-any.whl → 0.76.0__py3-none-any.whl - Mend

dataeval 0.75.0py3-none-any.whl → 0.76.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

dataeval/__init__.py +3 -3
dataeval/detectors/drift/base.py +2 -2
dataeval/detectors/drift/ks.py +2 -1
dataeval/detectors/drift/mmd.py +3 -2
dataeval/detectors/drift/uncertainty.py +2 -2
dataeval/detectors/drift/updates.py +1 -1
dataeval/detectors/linters/clusterer.py +3 -2
dataeval/detectors/linters/duplicates.py +4 -4
dataeval/detectors/linters/outliers.py +96 -3
dataeval/detectors/ood/__init__.py +1 -1
dataeval/detectors/ood/base.py +1 -17
dataeval/detectors/ood/output.py +1 -1
dataeval/interop.py +1 -1
dataeval/metrics/__init__.py +1 -1
dataeval/metrics/bias/__init__.py +1 -1
dataeval/metrics/bias/balance.py +3 -3
dataeval/metrics/bias/coverage.py +1 -1
dataeval/metrics/bias/diversity.py +14 -10
dataeval/metrics/bias/parity.py +5 -5
dataeval/metrics/estimators/ber.py +4 -3
dataeval/metrics/estimators/divergence.py +3 -3
dataeval/metrics/estimators/uap.py +3 -3
dataeval/metrics/stats/__init__.py +1 -1
dataeval/metrics/stats/base.py +24 -8
dataeval/metrics/stats/boxratiostats.py +5 -5
dataeval/metrics/stats/datasetstats.py +39 -6
dataeval/metrics/stats/dimensionstats.py +4 -4
dataeval/metrics/stats/hashstats.py +2 -2
dataeval/metrics/stats/labelstats.py +89 -6
dataeval/metrics/stats/pixelstats.py +7 -5
dataeval/metrics/stats/visualstats.py +6 -4
dataeval/output.py +23 -14
dataeval/utils/__init__.py +2 -2
dataeval/utils/dataset/read.py +1 -1
dataeval/utils/dataset/split.py +1 -1
dataeval/utils/metadata.py +42 -44
dataeval/utils/plot.py +129 -6
dataeval/workflows/sufficiency.py +2 -2
{dataeval-0.75.0.dist-info → dataeval-0.76.0.dist-info}/LICENSE.txt +2 -2
{dataeval-0.75.0.dist-info → dataeval-0.76.0.dist-info}/METADATA +18 -17
dataeval-0.76.0.dist-info/RECORD +67 -0
dataeval-0.75.0.dist-info/RECORD +0 -67
{dataeval-0.75.0.dist-info → dataeval-0.76.0.dist-info}/WHEEL +0 -0

dataeval/__init__.py CHANGED Viewed

@@ -8,7 +8,7 @@ shifts that impact performance of deployed models.
 from __future__ import annotations
 __all__ = ["detectors", "log", "metrics", "utils", "workflows"]
-__version__ = "0.75.0"
+__version__ = "0.76.0"
 import logging
@@ -24,10 +24,10 @@ def log(level: int = logging.DEBUG, handler: logging.Handler | None = None) -> N
     Parameters
     ----------
     level : int, default logging.DEBUG(10)
-        Set the logging level for the logger
+        Set the logging level for the logger.
     handler : logging.Handler, optional
         Sets the logging handler for the logger if provided, otherwise logger will be
-        provided with a StreamHandler
+        provided with a StreamHandler.
     """
     import logging

dataeval/detectors/drift/base.py CHANGED Viewed

@@ -45,7 +45,7 @@ class UpdateStrategy(ABC):
 @dataclass(frozen=True)
 class DriftBaseOutput(Output):
     """
-    Base output class for Drift detector classes
+    Base output class for Drift Detector classes
     Attributes
     ----------
@@ -64,7 +64,7 @@ class DriftBaseOutput(Output):
 @dataclass(frozen=True)
 class DriftOutput(DriftBaseOutput):
     """
-    Output class for :class:`DriftCVM`, :class:`DriftKS`, and :class:`DriftUncertainty` drift detectors
+    Output class for :class:`DriftCVM`, :class:`DriftKS`, and :class:`DriftUncertainty` drift detectors.
     Attributes
     ----------

dataeval/detectors/drift/ks.py CHANGED Viewed

@@ -22,7 +22,8 @@ from dataeval.interop import to_numpy
 class DriftKS(BaseDriftUnivariate):
     """
-    :term:`Drift` detector employing the Kolmogorov-Smirnov (KS) distribution test.
+    :term:`Drift` detector employing the :term:`Kolmogorov-Smirnov (KS) \
+    distribution<Kolmogorov-Smirnov (K-S) test>` test.
     The KS test detects changes in the maximum distance between two data
     distributions with Bonferroni or :term:`False Discovery Rate (FDR)` correction

dataeval/detectors/drift/mmd.py CHANGED Viewed

@@ -26,7 +26,7 @@ from dataeval.utils.torch.internal import get_device
 @dataclass(frozen=True)
 class DriftMMDOutput(DriftBaseOutput):
     """
-    Output class for :class:`DriftMMD` :term:`drift<Drift>` detector
+    Output class for :class:`DriftMMD` :term:`drift<Drift>` detector.
     Attributes
     ----------
@@ -51,7 +51,8 @@ class DriftMMDOutput(DriftBaseOutput):
 class DriftMMD(BaseDrift):
     """
-    :term:`Maximum Mean Discrepancy (MMD) Drift Detection` algorithm using a permutation test.
+    :term:`Maximum Mean Discrepancy (MMD) Drift Detection` algorithm \
+    using a permutation test.
     Parameters
     ----------

dataeval/detectors/drift/uncertainty.py CHANGED Viewed

@@ -66,8 +66,8 @@ def classifier_uncertainty(
 class DriftUncertainty:
     """
-    Test for a change in the number of instances falling into regions on which the
-    model is uncertain.
+    Test for a change in the number of instances falling into regions on which \
+        the model is uncertain.
     Performs a K-S test on prediction entropies.

dataeval/detectors/drift/updates.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Update strategies inform how the :term:`drift<Drift>` detector classes update the reference data when monitoring
+Update strategies inform how the :term:`drift<Drift>` detector classes update the reference data when monitoring.
 for drift.
 """

dataeval/detectors/linters/clusterer.py CHANGED Viewed

@@ -18,7 +18,7 @@ from dataeval.utils.shared import flatten
 @dataclass(frozen=True)
 class ClustererOutput(Output):
     """
-    Output class for :class:`Clusterer` lint detector
+    Output class for :class:`Clusterer` lint detector.
     Attributes
     ----------
@@ -131,7 +131,8 @@ class _ClusterMergeEntry:
 class Clusterer:
     """
-    Uses hierarchical clustering to flag dataset properties of interest like Outliers and :term:`duplicates<Duplicates>`
+    Uses hierarchical clustering to flag dataset properties of interest like outliers \
+    and :term:`duplicates<Duplicates>`.
     Parameters
     ----------

dataeval/detectors/linters/duplicates.py CHANGED Viewed

@@ -19,7 +19,7 @@ TIndexCollection = TypeVar("TIndexCollection", DuplicateGroup, DatasetDuplicateG
 @dataclass(frozen=True)
 class DuplicatesOutput(Generic[TIndexCollection], Output):
     """
-    Output class for :class:`Duplicates` lint detector
+    Output class for :class:`Duplicates` lint detector.
     Attributes
     ----------
@@ -39,8 +39,8 @@ class DuplicatesOutput(Generic[TIndexCollection], Output):
 class Duplicates:
     """
-    Finds the duplicate images in a dataset using xxhash for exact :term:`duplicates<Duplicates>`
-    and pchash for near duplicates
+    Finds the duplicate images in a dataset using xxhash for exact \
+    :term:`duplicates<Duplicates>` and pchash for near duplicates.
     Attributes
     ----------
@@ -92,7 +92,7 @@ class Duplicates:
         Parameters
         ----------
-        data : HashStatsOutput | Sequence[HashStatsOutput]
+        hashes : HashStatsOutput | Sequence[HashStatsOutput]
             The output(s) from a hashstats analysis
         Returns

dataeval/detectors/linters/outliers.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 __all__ = []
+# import contextlib
 from dataclasses import dataclass
 from typing import Generic, Iterable, Literal, Sequence, TypeVar, Union, overload
@@ -12,19 +13,78 @@ from dataeval.detectors.linters.merged_stats import combine_stats, get_dataset_s
 from dataeval.metrics.stats.base import BOX_COUNT, SOURCE_INDEX
 from dataeval.metrics.stats.datasetstats import DatasetStatsOutput, datasetstats
 from dataeval.metrics.stats.dimensionstats import DimensionStatsOutput
+from dataeval.metrics.stats.labelstats import LabelStatsOutput
 from dataeval.metrics.stats.pixelstats import PixelStatsOutput
 from dataeval.metrics.stats.visualstats import VisualStatsOutput
 from dataeval.output import Output, set_metadata
+# with contextlib.suppress(ImportError):
+#     import pandas as pd
 IndexIssueMap = dict[int, dict[str, float]]
 OutlierStatsOutput = Union[DimensionStatsOutput, PixelStatsOutput, VisualStatsOutput]
 TIndexIssueMap = TypeVar("TIndexIssueMap", IndexIssueMap, list[IndexIssueMap])
+def _reorganize_by_class_and_metric(result, lstats):
+    """Flip result from grouping by image to grouping by class and metric"""
+    metrics = {}
+    class_wise = {label: {} for label in lstats.image_indices_per_label}
+    # Group metrics and calculate class-wise counts
+    for img, group in result.items():
+        for extreme in group:
+            metrics.setdefault(extreme, []).append(img)
+            for label, images in lstats.image_indices_per_label.items():
+                if img in images:
+                    class_wise[label][extreme] = class_wise[label].get(extreme, 0) + 1
+    return metrics, class_wise
+def _create_table(metrics, class_wise):
+    """Create table for displaying the results"""
+    max_class_length = max(len(str(label)) for label in class_wise) + 2
+    max_total = max(len(metrics[group]) for group in metrics) + 2
+    table_header = " | ".join(
+        [f"{'Class':>{max_class_length}}"]
+        + [f"{group:^{max(5, len(str(group))) + 2}}" for group in sorted(metrics.keys())]
+        + [f"{'Total':<{max_total}}"]
+    )
+    table_rows = []
+    for class_cat, results in class_wise.items():
+        table_value = [f"{class_cat:>{max_class_length}}"]
+        total = 0
+        for group in sorted(metrics.keys()):
+            count = results.get(group, 0)
+            table_value.append(f"{count:^{max(5, len(str(group))) + 2}}")
+            total += count
+        table_value.append(f"{total:^{max_total}}")
+        table_rows.append(" | ".join(table_value))
+    table = [table_header] + table_rows
+    return table
+# def _create_pandas_dataframe(class_wise):
+#     """Create data for pandas dataframe"""
+#     data = []
+#     for label, metrics_dict in class_wise.items():
+#         row = {"Class": label}
+#         total = sum(metrics_dict.values())
+#         row.update(metrics_dict)  # Add metric counts
+#         row["Total"] = total
+#         data.append(row)
+#     return data
 @dataclass(frozen=True)
 class OutliersOutput(Generic[TIndexIssueMap], Output):
     """
-    Output class for :class:`Outliers` lint detector
+    Output class for :class:`Outliers` lint detector.
     Attributes
     ----------
@@ -45,6 +105,39 @@ class OutliersOutput(Generic[TIndexIssueMap], Output):
         else:
             return sum(len(d) for d in self.issues)
+    def to_table(self, labelstats: LabelStatsOutput) -> str:
+        if isinstance(self.issues, dict):
+            metrics, classwise = _reorganize_by_class_and_metric(self.issues, labelstats)
+            listed_table = _create_table(metrics, classwise)
+            table = "\n".join(listed_table)
+        else:
+            outertable = []
+            for d in self.issues:
+                metrics, classwise = _reorganize_by_class_and_metric(d, labelstats)
+                listed_table = _create_table(metrics, classwise)
+                str_table = "\n".join(listed_table)
+                outertable.append(str_table)
+            table = "\n\n".join(outertable)
+        return table
+    # def to_dataframe(self, labelstats: LabelStatsOutput) -> pd.DataFrame:
+    #     import pandas as pd
+    #     if isinstance(self.issues, dict):
+    #         _, classwise = _reorganize_by_class_and_metric(self.issues, labelstats)
+    #         data = _create_pandas_dataframe(classwise)
+    #         df = pd.DataFrame(data)
+    #     else:
+    #         df_list = []
+    #         for i, d in enumerate(self.issues):
+    #             _, classwise = _reorganize_by_class_and_metric(d, labelstats)
+    #             data = _create_pandas_dataframe(classwise)
+    #             single_df = pd.DataFrame(data)
+    #             single_df["Dataset"] = i
+    #             df_list.append(single_df)
+    #         df = pd.concat(df_list)
+    #     return df
 def _get_outlier_mask(
     values: NDArray, method: Literal["zscore", "modzscore", "iqr"], threshold: float | None
@@ -71,7 +164,7 @@ def _get_outlier_mask(
 class Outliers:
     r"""
-    Calculates statistical Outliers of a dataset using various statistical tests applied to each image
+    Calculates statistical outliers of a dataset using various statistical tests applied to each image.
     Parameters
     ----------
@@ -164,7 +257,7 @@ class Outliers:
         self, stats: OutlierStatsOutput | DatasetStatsOutput | Sequence[OutlierStatsOutput]
     ) -> OutliersOutput[IndexIssueMap] | OutliersOutput[list[IndexIssueMap]]:
         """
-        Returns indices of Outliers with the issues identified for each
+        Returns indices of Outliers with the issues identified for each.
         Parameters
         ----------

dataeval/detectors/ood/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Out-of-distribution (OOD)` detectors identify data that is different from the data used to train a particular model.
+Out-of-distribution (OOD) detectors identify data that is different from the data used to train a particular model.
 """
 __all__ = ["OODOutput", "OODScoreOutput", "OOD_AE"]

dataeval/detectors/ood/base.py CHANGED Viewed

@@ -87,24 +87,8 @@ class OODBaseGMM(OODBase, OODGMMMixin[GaussianMixtureModelParams]):
         batch_size: int,
         verbose: bool,
     ) -> None:
-        # Train the model
-        trainer(
-            model=self.model,
-            x_train=to_numpy(x_ref),
-            y_train=None,
-            loss_fn=loss_fn,
-            optimizer=optimizer,
-            preprocess_fn=None,
-            epochs=epochs,
-            batch_size=batch_size,
-            device=self.device,
-            verbose=verbose,
-        )
+        super().fit(x_ref, threshold_perc, loss_fn, optimizer, epochs, batch_size, verbose)
         # Calculate the GMM parameters
         _, z, gamma = cast(tuple[torch.Tensor, torch.Tensor, torch.Tensor], self.model(x_ref))
         self._gmm_params = gmm_params(z, gamma)
-        # Infer the threshold values
-        self._ref_score = self.score(x_ref, batch_size)
-        self._threshold_perc = threshold_perc

dataeval/detectors/ood/output.py CHANGED Viewed

@@ -36,7 +36,7 @@ class OODScoreOutput(Output):
     """
     Output class for instance and feature scores from out-of-distribution detectors.
-    Parameters
+    Attributes
     ----------
     instance_score : NDArray
         Instance score of the evaluated dataset.

dataeval/interop.py CHANGED Viewed

@@ -46,7 +46,7 @@ def to_numpy(array: ArrayLike | None, copy: bool = True) -> NDArray[Any]:
     if isinstance(array, np.ndarray):
         return array.copy() if copy else array
-    if array.__class__.__module__.startswith("tensorflow"):
+    if array.__class__.__module__.startswith("tensorflow"):  # pragma: no cover - removed tf from deps
         tf = _try_import("tensorflow")
         if tf and tf.is_tensor(array):
             _logger.log(logging.INFO, "Converting Tensorflow array to NumPy array.")

dataeval/metrics/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Metrics are a way to measure the performance of your models or datasets that
+Metrics are a way to measure the performance of your models or datasets that \
 can then be analyzed in the context of a given problem.
 """

dataeval/metrics/bias/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Bias metrics check for skewed or imbalanced datasets and incomplete feature
+Bias metrics check for skewed or imbalanced datasets and incomplete feature \
 representation which may impact model performance.
 """

dataeval/metrics/bias/balance.py CHANGED Viewed

@@ -23,8 +23,8 @@ with contextlib.suppress(ImportError):
 @dataclass(frozen=True)
 class BalanceOutput(Output):
     """
-    Output class for :func:`balance` bias metric
+    Output class for :func:`balance` :term:`bias<Bias>` metric.
     Attributes
     ----------
     balance : NDArray[np.float64]
@@ -123,7 +123,7 @@ def balance(
     num_neighbors: int = 5,
 ) -> BalanceOutput:
     """
-    Mutual information (MI) between factors (class label, metadata, label/image properties)
+    Mutual information (MI) between factors (class label, metadata, label/image properties).
     Parameters
     ----------

dataeval/metrics/bias/coverage.py CHANGED Viewed

@@ -71,7 +71,7 @@ def _plot(images: NDArray[Any], num_images: int) -> Figure:
 @dataclass(frozen=True)
 class CoverageOutput(Output):
     """
-    Output class for :func:`coverage` :term:`bias<Bias>` metric
+    Output class for :func:`coverage` :term:`bias<Bias>` metric.
     Attributes
     ----------

dataeval/metrics/bias/diversity.py CHANGED Viewed

@@ -51,7 +51,7 @@ def _plot(labels: NDArray[Any], bar_heights: NDArray[Any]) -> Figure:
 @dataclass(frozen=True)
 class DiversityOutput(Output):
     """
-    Output class for :func:`diversity` :term:`bias<Bias>` metric
+    Output class for :func:`diversity` :term:`bias<Bias>` metric.
     Attributes
     ----------
@@ -197,10 +197,12 @@ def diversity(
     method: Literal["simpson", "shannon"] = "simpson",
 ) -> DiversityOutput:
     """
-    Compute :term:`diversity<Diversity>` and classwise diversity for discrete/categorical variables and,
-    through standard histogram binning, for continuous variables.
+    Compute :term:`diversity<Diversity>` and classwise diversity for \
+        discrete/categorical variables through standard histogram binning, \
+        for continuous variables.
-    We define diversity as a normalized form of the inverse Simpson diversity index.
+    The method specified defines diversity as the inverse Simpson diversity index linearly rescaled to
+    the unit interval, or the normalized form of the Shannon entropy.
     diversity = 1 implies that samples are evenly distributed across a particular factor
     diversity = 0 implies that all samples belong to one category/bin
@@ -209,11 +211,8 @@ def diversity(
     ----------
     metadata : Metadata
         Preprocessed metadata from :func:`dataeval.utils.metadata.preprocess`
-    Note
-    ----
-    - The expression is undefined for q=1, but it approaches the Shannon entropy in the limit.
-    - If there is only one category, the diversity index takes a value of 0.
+    method : "simpson" or "shannon", default "simpson"
+        The methodology used for defining diversity
     Returns
     -------
@@ -221,9 +220,14 @@ def diversity(
         Diversity index per column of self.data or each factor in self.names and
         classwise diversity [n_class x n_factor]
+    Note
+    ----
+    - The expression is undefined for q=1, but it approaches the Shannon entropy in the limit.
+    - If there is only one category, the diversity index takes a value of 0.
     Example
     -------
-    Compute Simpson diversity index of metadata and class labels
+    Compute the diversity index of metadata and class labels
     >>> div_simp = diversity(metadata, method="simpson")
     >>> div_simp.diversity_index

dataeval/metrics/bias/parity.py CHANGED Viewed

@@ -21,7 +21,7 @@ TData = TypeVar("TData", np.float64, NDArray[np.float64])
 @dataclass(frozen=True)
 class ParityOutput(Generic[TData], Output):
     """
-    Output class for :func:`parity` and :func:`label_parity` :term:`bias<Bias>` metrics
+    Output class for :func:`parity` and :func:`label_parity` :term:`bias<Bias>` metrics.
     Attributes
     ----------
@@ -123,8 +123,8 @@ def label_parity(
     num_classes: int | None = None,
 ) -> ParityOutput[np.float64]:
     """
-    Calculate the chi-square statistic to assess the :term:`parity<Parity>` between expected and
-    observed label distributions.
+    Calculate the chi-square statistic to assess the :term:`parity<Parity>` \
+    between expected and observed label distributions.
     This function computes the frequency distribution of classes in both expected and observed labels, normalizes
     the expected distribution to match the total number of observed labels, and then calculates the chi-square
@@ -208,8 +208,8 @@ def label_parity(
 @set_metadata
 def parity(metadata: Metadata) -> ParityOutput[NDArray[np.float64]]:
     """
-    Calculate chi-square statistics to assess the linear relationship between multiple factors
-    and class labels.
+    Calculate chi-square statistics to assess the linear relationship \
+    between multiple factors and class labels.
     This function computes the chi-square statistic for each metadata factor to determine if there is
     a significant relationship between the factor values and class labels. The chi-square statistic is

dataeval/metrics/estimators/ber.py CHANGED Viewed

@@ -28,7 +28,7 @@ from dataeval.utils.shared import compute_neighbors, get_classes_counts, get_met
 @dataclass(frozen=True)
 class BEROutput(Output):
     """
-    Output class for :func:`ber` estimator metric
+    Output class for :func:`ber` estimator metric.
     Attributes
     ----------
@@ -44,7 +44,7 @@ class BEROutput(Output):
 def ber_mst(images: NDArray[np.float64], labels: NDArray[np.int_], k: int = 1) -> tuple[float, float]:
-    """Calculates the :term:`Bayes error rate<Bayes Error Rate (BER)>` using a minimum spanning tree
+    """Calculates the :term:`Bayes error rate<Bayes Error Rate (BER)>` using a minimum spanning tree.
     Parameters
     ----------
@@ -119,7 +119,8 @@ def knn_lowerbound(value: float, classes: int, k: int) -> float:
 @set_metadata
 def ber(images: ArrayLike, labels: ArrayLike, k: int = 1, method: Literal["KNN", "MST"] = "KNN") -> BEROutput:
     """
-    An estimator for Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` using FR or KNN test statistic basis
+    An estimator for Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` \
+    using FR or KNN test statistic basis.
     Parameters
     ----------

dataeval/metrics/estimators/divergence.py CHANGED Viewed

@@ -21,7 +21,7 @@ from dataeval.utils.shared import compute_neighbors, get_method, minimum_spannin
 @dataclass(frozen=True)
 class DivergenceOutput(Output):
     """
-    Output class for :func:`divergence` estimator metric
+    Output class for :func:`divergence` estimator metric.
     Attributes
     ----------
@@ -59,7 +59,7 @@ def divergence_mst(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
 def divergence_fnn(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
     """
-    Calculates the estimated label errors based on their nearest neighbors
+    Calculates the estimated label errors based on their nearest neighbors.
     Parameters
     ----------
@@ -81,7 +81,7 @@ def divergence_fnn(data: NDArray[np.float64], labels: NDArray[np.int_]) -> int:
 @set_metadata
 def divergence(data_a: ArrayLike, data_b: ArrayLike, method: Literal["FNN", "MST"] = "FNN") -> DivergenceOutput:
     """
-    Calculates the :term`divergence` and any errors between the datasets
+    Calculates the :term:`divergence` and any errors between the datasets.
     Parameters
     ----------

dataeval/metrics/estimators/uap.py CHANGED Viewed

@@ -20,7 +20,7 @@ from dataeval.output import Output, set_metadata
 @dataclass(frozen=True)
 class UAPOutput(Output):
     """
-    Output class for :func:`uap` estimator metric
+    Output class for :func:`uap` estimator metric.
     Attributes
     ----------
@@ -34,8 +34,8 @@ class UAPOutput(Output):
 @set_metadata
 def uap(labels: ArrayLike, scores: ArrayLike) -> UAPOutput:
     """
-    FR Test Statistic based estimate of the empirical mean precision for
-    the upperbound average precision
+    FR Test Statistic based estimate of the empirical mean precision for the \
+    upperbound average precision.
     Parameters
     ----------

dataeval/metrics/stats/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Statistics metrics calculate a variety of image properties and pixel statistics
+Statistics metrics calculate a variety of image properties and pixel statistics \
 and label statistics against the images and labels of a dataset.
 """

dataeval/metrics/stats/base.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from __future__ import annotations
+from dataeval.utils.plot import histogram_plot
 __all__ = []
 import re
@@ -100,19 +102,33 @@ class BaseStatsOutput(Output):
         for source_index in list(self.source_index) + [None]:
             if source_index is None or source_index.image > cur_image:
                 mask.extend(cur_mask if matches(cur_max_channel + 1, channel_count) else [False for _ in cur_mask])
-                if source_index is None:
-                    break
-                cur_image = source_index.image
-                cur_max_channel = 0
-                cur_mask.clear()
-            cur_mask.append(matches(source_index.channel, channel_index))
-            cur_max_channel = max(cur_max_channel, source_index.channel or 0)
+                if source_index is not None:
+                    cur_image = source_index.image
+                    cur_max_channel = 0
+                    cur_mask.clear()
+            if source_index is not None:
+                cur_mask.append(matches(source_index.channel, channel_index))
+                cur_max_channel = max(cur_max_channel, source_index.channel or 0)
         return mask
     def __len__(self) -> int:
         return len(self.source_index)
+def _is_plottable(k: str, v: Any, excluded_keys: Iterable[str]) -> bool:
+    return isinstance(v, np.ndarray) and v[v != 0].size > 0 and all(k != x for x in excluded_keys)
+class HistogramPlotMixin:
+    _excluded_keys: Iterable[str] = []
+    def dict(self) -> dict[str, Any]: ...
+    def plot(self, log: bool) -> None:
+        data_dict = {k: v for k, v in self.dict().items() if _is_plottable(k, v, self._excluded_keys)}
+        histogram_plot(data_dict, log)
 TStatsOutput = TypeVar("TStatsOutput", bound=BaseStatsOutput, covariant=True)
@@ -126,7 +142,7 @@ class StatsProcessor(Generic[TStatsOutput]):
         self.raw = image
         self.width: int = image.shape[-1]
         self.height: int = image.shape[-2]
-        self.box: NDArray[Any] = np.array([0, 0, self.width, self.height]) if box is None else box
+        self.box: NDArray[np.int64] = np.array([0, 0, self.width, self.height]) if box is None else box.astype(np.int64)
         self._per_channel = per_channel
         self._image = None
         self._shape = None

dataeval/metrics/stats/boxratiostats.py CHANGED Viewed

@@ -26,7 +26,7 @@ class BoxImageStatsOutputSlice(Generic[TStatOutput]):
         def __getitem__(self, key: str) -> NDArray[np.float64]:
             _stat = cast(np.ndarray, getattr(self._stats, key)).astype(np.float64)
             _shape = _stat[0].shape
-            _slice = _stat[self._slice[0] : self._slice[1]]
+            _slice = _stat[int(self._slice[0]) : int(self._slice[1])]
             return _slice.reshape(-1, self._channels, *_shape) if self._channels else _slice.reshape(-1, *_shape)
     box: StatSlicer
@@ -102,7 +102,7 @@ def boxratiostats(
     imgstats: TStatOutput,
 ) -> TStatOutput:
     """
-    Calculates ratio :term:`statistics<Statistics>` of box outputs over image outputs
+    Calculates ratio :term:`statistics<Statistics>` of box outputs over image outputs.
     Parameters
     ----------
@@ -147,13 +147,13 @@ def boxratiostats(
     if boxstats.source_index[-1].image != imgstats.source_index[-1].image:
         raise ValueError("Stats index_map length mismatch. Check if the correct box and image stats were provided.")
     if all(count == 0 for count in boxstats.box_count):
-        raise TypeError("Input for boxstats must contain box information.")
+        raise ValueError("Input for boxstats must contain box information.")
     if any(count != 0 for count in imgstats.box_count):
-        raise TypeError("Input for imgstats must not contain box information.")
+        raise ValueError("Input for imgstats must not contain box information.")
     boxstats_has_channels = any(si.channel is None for si in boxstats.source_index)
     imgstats_has_channels = any(si.channel is None for si in imgstats.source_index)
     if boxstats_has_channels != imgstats_has_channels:
-        raise TypeError("Input for boxstats and imgstats must have matching channel information.")
+        raise ValueError("Input for boxstats and imgstats must have matching channel information.")
     output_dict = {}
     for key in boxstats.dict():

dataeval 0.75.0__py3-none-any.whl → 0.76.0__py3-none-any.whl

dataeval 0.75.0py3-none-any.whl → 0.76.0py3-none-any.whl