PyPI - dataeval - Versions diffs - 0.86.0__py3-none-any.whl → 0.86.2__py3-none-any.whl - Mend

dataeval 0.86.0py3-none-any.whl → 0.86.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

dataeval/__init__.py +1 -1
dataeval/_log.py +1 -1
dataeval/config.py +21 -4
dataeval/data/_embeddings.py +2 -2
dataeval/data/_images.py +2 -3
dataeval/data/_metadata.py +188 -178
dataeval/data/_selection.py +1 -2
dataeval/data/_split.py +4 -5
dataeval/data/_targets.py +17 -13
dataeval/data/selections/_classfilter.py +2 -5
dataeval/data/selections/_prioritize.py +6 -9
dataeval/data/selections/_shuffle.py +3 -1
dataeval/detectors/drift/_base.py +4 -5
dataeval/detectors/drift/_mmd.py +3 -6
dataeval/detectors/drift/_nml/_base.py +4 -2
dataeval/detectors/drift/_nml/_chunk.py +11 -19
dataeval/detectors/drift/_nml/_domainclassifier.py +8 -19
dataeval/detectors/drift/_nml/_result.py +8 -9
dataeval/detectors/drift/_nml/_thresholds.py +66 -77
dataeval/detectors/linters/outliers.py +7 -7
dataeval/metadata/_distance.py +10 -7
dataeval/metadata/_ood.py +11 -103
dataeval/metrics/bias/_balance.py +23 -33
dataeval/metrics/bias/_diversity.py +16 -14
dataeval/metrics/bias/_parity.py +18 -18
dataeval/metrics/estimators/_divergence.py +2 -4
dataeval/metrics/stats/_base.py +103 -42
dataeval/metrics/stats/_boxratiostats.py +21 -19
dataeval/metrics/stats/_dimensionstats.py +14 -10
dataeval/metrics/stats/_hashstats.py +1 -1
dataeval/metrics/stats/_pixelstats.py +6 -6
dataeval/metrics/stats/_visualstats.py +3 -3
dataeval/outputs/_base.py +22 -7
dataeval/outputs/_bias.py +24 -70
dataeval/outputs/_drift.py +1 -9
dataeval/outputs/_linters.py +11 -11
dataeval/outputs/_stats.py +82 -23
dataeval/outputs/_workflows.py +2 -2
dataeval/utils/_array.py +6 -9
dataeval/utils/_bin.py +1 -2
dataeval/utils/_clusterer.py +7 -4
dataeval/utils/_fast_mst.py +27 -13
dataeval/utils/_image.py +65 -11
dataeval/utils/_mst.py +1 -3
dataeval/utils/_plot.py +15 -10
dataeval/utils/data/_dataset.py +54 -28
dataeval/utils/data/metadata.py +104 -82
dataeval/utils/datasets/__init__.py +2 -0
dataeval/utils/datasets/_antiuav.py +189 -0
dataeval/utils/datasets/_base.py +11 -8
dataeval/utils/datasets/_cifar10.py +104 -45
dataeval/utils/datasets/_fileio.py +21 -47
dataeval/utils/datasets/_milco.py +22 -12
dataeval/utils/datasets/_mixin.py +2 -4
dataeval/utils/datasets/_mnist.py +3 -4
dataeval/utils/datasets/_ships.py +14 -7
dataeval/utils/datasets/_voc.py +229 -42
dataeval/utils/torch/models.py +5 -10
dataeval/utils/torch/trainer.py +3 -3
dataeval/workflows/sufficiency.py +2 -2
{dataeval-0.86.0.dist-info → dataeval-0.86.2.dist-info}/METADATA +2 -1
dataeval-0.86.2.dist-info/RECORD +114 -0
dataeval/detectors/ood/vae.py +0 -74
dataeval-0.86.0.dist-info/RECORD +0 -114
{dataeval-0.86.0.dist-info → dataeval-0.86.2.dist-info}/LICENSE.txt +0 -0
{dataeval-0.86.0.dist-info → dataeval-0.86.2.dist-info}/WHEEL +0 -0

dataeval/data/_targets.py CHANGED Viewed

@@ -24,11 +24,13 @@ class Targets:
     labels : NDArray[np.intp]
         Labels (N,) for N images or objects
     scores : NDArray[np.float32]
-        Probability scores (N,M) for N images of M classes or confidence score (N,) of objects
+        Probability scores (N, M) for N images of M classes or confidence score (N,) of objects
     bboxes : NDArray[np.float32] | None
-        Bounding boxes (N,4) for N objects in (x0,y0,x1,y1) format
+        Bounding boxes (N, 4) for N objects in (x0, y0, x1, y1) format
     source : NDArray[np.intp] | None
         Source image index (N,) for N objects
+    size : int
+        Count of objects
     """
     labels: NDArray[np.intp]
@@ -55,13 +57,16 @@ class Targets:
             )
         if self.bboxes is not None and len(self.bboxes) > 0 and self.bboxes.shape[-1] != 4:
-            raise ValueError("Bounding boxes must be in (x0,y0,x1,y1) format.")
+            raise ValueError("Bounding boxes must be in (x0, y0, x1, y1) format.")
+    @property
+    def size(self) -> int:
+        return len(self.labels)
     def __len__(self) -> int:
         if self.source is None:
             return len(self.labels)
-        else:
-            return len(np.unique(self.source))
+        return len(np.unique(self.source))
     def __getitem__(self, idx: int, /) -> Targets:
         if self.source is None or self.bboxes is None:
@@ -71,14 +76,13 @@ class Targets:
                 None,
                 None,
             )
-        else:
-            mask = np.where(self.source == idx, True, False)
-            return Targets(
-                np.atleast_1d(self.labels[mask]),
-                np.atleast_1d(self.scores[mask]),
-                np.atleast_2d(self.bboxes[mask]),
-                np.atleast_1d(self.source[mask]),
-            )
+        mask = np.where(self.source == idx, True, False)
+        return Targets(
+            np.atleast_1d(self.labels[mask]),
+            np.atleast_1d(self.scores[mask]),
+            np.atleast_2d(self.bboxes[mask]),
+            np.atleast_1d(self.source[mask]),
+        )
     def __iter__(self) -> Iterator[Targets]:
         for i in range(len(self.labels)) if self.source is None else np.unique(self.source):

dataeval/data/selections/_classfilter.py CHANGED Viewed

@@ -68,11 +68,8 @@ _TTarget = TypeVar("_TTarget", ObjectDetectionTarget, SegmentationTarget)
 def _try_mask_object(obj: _T, mask: NDArray[np.bool_]) -> _T:
-    if isinstance(obj, Sized) and not isinstance(obj, (str, bytes, bytearray)) and len(obj) == len(mask):
-        if isinstance(obj, Array):
-            return obj[mask]
-        elif isinstance(obj, Sequence):
-            return cast(_T, [item for i, item in enumerate(obj) if mask[i]])
+    if not isinstance(obj, (str, bytes, bytearray)) and isinstance(obj, (Sequence, Array)) and len(obj) == len(mask):
+        return obj[mask] if isinstance(obj, Array) else cast(_T, [item for i, item in enumerate(obj) if mask[i]])
     return obj

dataeval/data/selections/_prioritize.py CHANGED Viewed

@@ -99,8 +99,7 @@ class _KNNSorter(_Sorter):
             np.fill_diagonal(dists, np.inf)
         else:
             dists = pairwise_distances(embeddings, reference)
-        inds = np.argsort(np.sort(dists, axis=1)[:, self._k])
-        return inds
+        return np.argsort(np.sort(dists, axis=1)[:, self._k])
 class _KMeansSorter(_Sorter):
@@ -124,15 +123,13 @@ class _KMeansSorter(_Sorter):
 class _KMeansDistanceSorter(_KMeansSorter):
     def _sort(self, embeddings: NDArray[Any], reference: NDArray[Any] | None = None) -> NDArray[np.intp]:
         clst = self._get_clusters(embeddings if reference is None else reference)
-        inds = np.argsort(clst._dist2center(embeddings))
-        return inds
+        return np.argsort(clst._dist2center(embeddings))
 class _KMeansComplexitySorter(_KMeansSorter):
     def _sort(self, embeddings: NDArray[Any], reference: NDArray[Any] | None = None) -> NDArray[np.intp]:
         clst = self._get_clusters(embeddings if reference is None else reference)
-        inds = clst._sort_by_weights(embeddings)
-        return inds
+        return clst._sort_by_weights(embeddings)
 class Prioritize(Selection[Any]):
@@ -266,10 +263,10 @@ class Prioritize(Selection[Any]):
     def _get_sorter(self, samples: int) -> _Sorter:
         if self._method == "knn":
             return _KNNSorter(samples, self._k)
-        elif self._method == "kmeans_distance":
+        if self._method == "kmeans_distance":
             return _KMeansDistanceSorter(samples, self._c)
-        else:  # self._method == "kmeans_complexity"
-            return _KMeansComplexitySorter(samples, self._c)
+        # self._method == "kmeans_complexity"
+        return _KMeansComplexitySorter(samples, self._c)
     def _to_normalized_ndarray(self, embeddings: Embeddings, selection: list[int] | None = None) -> NDArray[Any]:
         emb: NDArray[Any] = embeddings.to_numpy(selection)

dataeval/data/selections/_shuffle.py CHANGED Viewed

@@ -30,7 +30,9 @@ class Shuffle(Selection[Any]):
     seed: int | NDArray[Any] | SeedSequence | BitGenerator | Generator | None
     stage = SelectionStage.ORDER
-    def __init__(self, seed: int | Sequence[int] | Array | SeedSequence | BitGenerator | Generator | None = None):
+    def __init__(
+        self, seed: int | Sequence[int] | Array | SeedSequence | BitGenerator | Generator | None = None
+    ) -> None:
         self.seed = as_numpy(seed) if isinstance(seed, (Sequence, Array)) else seed
     def __call__(self, dataset: Select[Any]) -> None:

dataeval/detectors/drift/_base.py CHANGED Viewed

@@ -13,7 +13,7 @@ __all__ = []
 import math
 from abc import abstractmethod
 from functools import wraps
-from typing import Callable, Literal, Protocol, TypeVar, runtime_checkable
+from typing import Any, Callable, Literal, Protocol, TypeVar, runtime_checkable
 import numpy as np
 from numpy.typing import NDArray
@@ -40,7 +40,7 @@ def update_strategy(fn: Callable[..., R]) -> Callable[..., R]:
     """Decorator to update x_ref with x using selected update methodology"""
     @wraps(fn)
-    def _(self: BaseDrift, data: Embeddings | Array, *args, **kwargs) -> R:
+    def _(self: BaseDrift, data: Embeddings | Array, *args: tuple[Any, ...], **kwargs: dict[str, Any]) -> R:
         output = fn(self, data, *args, **kwargs)
         # update reference dataset
@@ -184,7 +184,7 @@ class BaseDriftUnivariate(BaseDrift):
             threshold = self.p_val / self.n_features
             drift_pred = bool((p_vals < threshold).any())
             return drift_pred, threshold
-        elif self.correction == "fdr":
+        if self.correction == "fdr":
             n = p_vals.shape[0]
             i = np.arange(n) + np.int_(1)
             p_sorted = np.sort(p_vals)
@@ -195,8 +195,7 @@ class BaseDriftUnivariate(BaseDrift):
             except ValueError:  # sorted p-values not below thresholds
                 return bool(below_threshold.any()), q_threshold.min()
             return bool(below_threshold.any()), q_threshold[idx_threshold]
-        else:
-            raise ValueError("`correction` needs to be either `bonferroni` or `fdr`.")
+        raise ValueError("`correction` needs to be either `bonferroni` or `fdr`.")
     @set_metadata
     @update_strategy

dataeval/detectors/drift/_mmd.py CHANGED Viewed

@@ -95,8 +95,7 @@ class DriftMMD(BaseDrift):
         k_xy = self._kernel(x, y)
         k_xx = self._k_xx if self._k_xx is not None and self.update_strategy is None else self._kernel(x, x)
         k_yy = self._kernel(y, y)
-        kernel_mat = torch.cat([torch.cat([k_xx, k_xy], 1), torch.cat([k_xy.T, k_yy], 1)], 0)
-        return kernel_mat
+        return torch.cat([torch.cat([k_xx, k_xy], 1), torch.cat([k_xy.T, k_yy], 1)], 0)
     def score(self, data: Embeddings | Array) -> tuple[float, float, float]:
         """
@@ -205,8 +204,7 @@ def sigma_median(x: torch.Tensor, y: torch.Tensor, dist: torch.Tensor) -> torch.
     n = min(x.shape[0], y.shape[0])
     n = n if (x[:n] == y[:n]).all() and x.shape == y.shape else 0
     n_median = n + (torch.prod(torch.as_tensor(dist.shape)) - n) // 2 - 1
-    sigma = (0.5 * dist.flatten().sort().values[int(n_median)].unsqueeze(dim=-1)) ** 0.5
-    return sigma
+    return (0.5 * dist.flatten().sort().values[int(n_median)].unsqueeze(dim=-1)) ** 0.5
 class GaussianRBF(torch.nn.Module):
@@ -310,5 +308,4 @@ def mmd2_from_kernel_matrix(
         kernel_mat = kernel_mat[idx][:, idx]
     k_xx, k_yy, k_xy = kernel_mat[:-m, :-m], kernel_mat[-m:, -m:], kernel_mat[-m:, :-m]
     c_xx, c_yy = 1 / (n * (n - 1)), 1 / (m * (m - 1))
-    mmd2 = c_xx * k_xx.sum() + c_yy * k_yy.sum() - 2.0 * k_xy.mean()
-    return mmd2
+    return c_xx * k_xx.sum() + c_yy * k_yy.sum() - 2.0 * k_xy.mean()

dataeval/detectors/drift/_nml/_base.py CHANGED Viewed

@@ -27,7 +27,9 @@ def _validate(data: pd.DataFrame, expected_features: int | None = None) -> int:
     return data.shape[-1]
-def _create_multilevel_index(chunks: Sequence[Chunk], result_group_name: str, result_column_names: Sequence[str]):
+def _create_multilevel_index(
+    chunks: Sequence[Chunk], result_group_name: str, result_column_names: Sequence[str]
+) -> pd.MultiIndex:
     chunk_column_names = (*chunks[0].KEYS, "period")
     chunk_tuples = [("chunk", chunk_column_name) for chunk_column_name in chunk_column_names]
     result_tuples = [(result_group_name, column_name) for column_name in result_column_names]
@@ -37,7 +39,7 @@ def _create_multilevel_index(chunks: Sequence[Chunk], result_group_name: str, re
 class AbstractCalculator(ABC):
     """Base class for drift calculation."""
-    def __init__(self, chunker: Chunker | None = None, logger: Logger | None = None):
+    def __init__(self, chunker: Chunker | None = None, logger: Logger | None = None) -> None:
         self.chunker = chunker if isinstance(chunker, Chunker) else CountBasedChunker(10)
         self.result: DriftMVDCOutput | None = None
         self.n_features: int | None = None

dataeval/detectors/drift/_nml/_chunk.py CHANGED Viewed

@@ -16,7 +16,6 @@ from abc import ABC, abstractmethod
 from typing import Any, Generic, Literal, Sequence, TypeVar, cast
 import pandas as pd
-from dateutil.parser import ParserError
 from pandas import Index, Period
 from typing_extensions import Self
@@ -31,7 +30,7 @@ class Chunk(ABC):
     def __init__(
         self,
         data: pd.DataFrame,
-    ):
+    ) -> None:
         self.key: str
         self.data = data
@@ -39,11 +38,11 @@ class Chunk(ABC):
         self.end_index: int = -1
         self.chunk_index: int = -1
-    def __repr__(self):
+    def __repr__(self) -> str:
         attr_str = ", ".join([f"{k}={v}" for k, v in self.dict().items()])
         return f"{self.__class__.__name__}(data=pd.DataFrame(shape={self.data.shape}), {attr_str})"
-    def __len__(self):
+    def __len__(self) -> int:
         return self.data.shape[0]
     @abstractmethod
@@ -76,7 +75,7 @@ class IndexChunk(Chunk):
         data: pd.DataFrame,
         start_index: int,
         end_index: int,
-    ):
+    ) -> None:
         super().__init__(data)
         self.key = f"[{start_index}:{end_index}]"
         self.start_index: int = start_index
@@ -113,7 +112,7 @@ class PeriodChunk(Chunk):
     KEYS = ("key", "chunk_index", "start_date", "end_date", "chunk_size")
-    def __init__(self, data: pd.DataFrame, period: Period, chunk_size: int):
+    def __init__(self, data: pd.DataFrame, period: Period, chunk_size: int) -> None:
         super().__init__(data)
         self.key = str(period)
         self.start_datetime = period.start_time
@@ -127,6 +126,7 @@ class PeriodChunk(Chunk):
         a, b = (self, other) if self < other else (other, self)
         result = copy.deepcopy(a)
         result.data = pd.concat([a.data, b.data])
+        result.end_index = b.end_index
         result.end_datetime = b.end_datetime
         result.chunk_size += b.chunk_size
         return result
@@ -237,13 +237,7 @@ class PeriodBasedChunker(Chunker[PeriodChunk]):
         if self.timestamp_column_name not in data:
             raise ValueError(f"timestamp column '{self.timestamp_column_name}' not in columns")
-        try:
-            grouped = data.groupby(pd.to_datetime(data[self.timestamp_column_name]).dt.to_period(self.offset))
-        except ParserError:
-            raise ValueError(
-                f"could not parse date_column '{self.timestamp_column_name}' values as dates."
-                f"Please verify if you've specified the correct date column."
-            )
+        grouped = data.groupby(pd.to_datetime(data[self.timestamp_column_name]).dt.to_period(self.offset))
         for k, v in grouped.groups.items():
             period, index = cast(Period, k), cast(Index, v)
@@ -281,7 +275,7 @@ class SizeBasedChunker(Chunker[IndexChunk]):
         self,
         chunk_size: int,
         incomplete: Literal["append", "drop", "keep"] = "keep",
-    ):
+    ) -> None:
         """Create a new SizeBasedChunker.
         Parameters
@@ -314,12 +308,11 @@ class SizeBasedChunker(Chunker[IndexChunk]):
     def _split(self, data: pd.DataFrame) -> list[IndexChunk]:
         def _create_chunk(index: int, data: pd.DataFrame, chunk_size: int) -> IndexChunk:
             chunk_data = data.iloc[index : index + chunk_size]
-            chunk = IndexChunk(
+            return IndexChunk(
                 data=chunk_data,
                 start_index=index,
                 end_index=index + chunk_size - 1,
             )
-            return chunk
         chunks = [
             _create_chunk(index=i, data=data, chunk_size=self.chunk_size)
@@ -364,7 +357,7 @@ class CountBasedChunker(Chunker[IndexChunk]):
         self,
         chunk_number: int,
         incomplete: Literal["append", "drop", "keep"] = "keep",
-    ):
+    ) -> None:
         """Creates a new CountBasedChunker.
         It will calculate the amount of observations per chunk based on the given chunk count.
@@ -400,5 +393,4 @@ class CountBasedChunker(Chunker[IndexChunk]):
     def _split(self, data: pd.DataFrame) -> list[IndexChunk]:
         chunk_size = data.shape[0] // self.chunk_number
         chunker = SizeBasedChunker(chunk_size, self.incomplete)
-        chunks = chunker.split(data=data)
-        return chunks
+        return chunker.split(data=data)

dataeval/detectors/drift/_nml/_domainclassifier.py CHANGED Viewed

@@ -20,7 +20,7 @@ from sklearn.model_selection import StratifiedKFold
 from dataeval.config import get_max_processes, get_seed
 from dataeval.detectors.drift._nml._base import AbstractCalculator, _create_multilevel_index
 from dataeval.detectors.drift._nml._chunk import Chunk, Chunker
-from dataeval.detectors.drift._nml._thresholds import ConstantThreshold, Threshold, calculate_threshold_values
+from dataeval.detectors.drift._nml._thresholds import ConstantThreshold, Threshold
 from dataeval.outputs._base import set_metadata
 from dataeval.outputs._drift import DriftMVDCOutput
@@ -38,10 +38,8 @@ DEFAULT_LGBM_HYPERPARAMS = {
     "min_child_weight": 0.001,
     "min_split_gain": 0.0,
     "n_estimators": 100,
-    "n_jobs": get_max_processes() or 0,
     "num_leaves": 31,
     "objective": None,
-    "random_state": get_seed(),
     "reg_alpha": 0.0,
     "reg_lambda": 0.0,
     "subsample": 1.0,
@@ -126,7 +124,7 @@ class DomainClassifierCalculator(AbstractCalculator):
             self.result._data = pd.concat([self.result._data, res], ignore_index=True)
         return self.result
-    def _calculate_chunk(self, chunk: Chunk):
+    def _calculate_chunk(self, chunk: Chunk) -> float:
         if self.result is None:
             # Use information from chunk indices to identify reference chunk's location. This is possible because
             # both the internal reference data copy and the chunk data were sorted by timestamp, so these
@@ -151,7 +149,7 @@ class DomainClassifierCalculator(AbstractCalculator):
             _try = y[train_index]
             _tsx = df_X.iloc[test_index]
             _tsy = y[test_index]
-            model = LGBMClassifier(**self.hyperparameters)
+            model = LGBMClassifier(**self.hyperparameters, n_jobs=get_max_processes(), random_state=get_seed())
             model.fit(_trx, _try)
             preds = np.asarray(model.predict_proba(_tsx), dtype=np.float32)[:, 1]
             all_preds.append(preds)
@@ -159,24 +157,15 @@ class DomainClassifierCalculator(AbstractCalculator):
         np_all_preds = np.concatenate(all_preds, axis=0)
         np_all_tgts = np.concatenate(all_tgts, axis=0)
-        try:
-            # catch case where all rows are duplicates
-            result = roc_auc_score(np_all_tgts, np_all_preds)
-        except ValueError as err:
-            if str(err) != "Only one class present in y_true. ROC AUC score is not defined in that case.":
-                raise
-            else:
-                # by definition if reference and chunk exactly match we can't discriminate
-                result = 0.5
-        return result
+        result = roc_auc_score(np_all_tgts, np_all_preds)
+        return 0.5 if result == np.nan else float(result)
     def _populate_alert_thresholds(self, result_data: pd.DataFrame) -> pd.DataFrame:
         if self.result is None:
-            self._threshold_values = calculate_threshold_values(
-                threshold=self.threshold,
+            self._threshold_values = self.threshold.calculate(
                 data=result_data.loc[:, ("domain_classifier_auroc", "value")],  # type: ignore | dataframe loc
-                lower_threshold_value_limit=0.0,
-                upper_threshold_value_limit=1.0,
+                lower_limit=0.0,
+                upper_limit=1.0,
                 logger=self._logger,
             )

dataeval/detectors/drift/_nml/_result.py CHANGED Viewed

@@ -42,14 +42,13 @@ class AbstractResult(GenericOutput[pd.DataFrame]):
         """Export results to pandas dataframe."""
         if multilevel:
             return self._data
-        else:
-            column_names = [
-                "_".join(col).replace("chunk_chunk_chunk", "chunk").replace("chunk_chunk", "chunk")
-                for col in self._data.columns.values
-            ]
-            single_level_data = self._data.copy(deep=True)
-            single_level_data.columns = column_names
-            return single_level_data
+        column_names = [
+            "_".join(col).replace("chunk_chunk_chunk", "chunk").replace("chunk_chunk", "chunk")
+            for col in self._data.columns.values
+        ]
+        single_level_data = self._data.copy(deep=True)
+        single_level_data.columns = column_names
+        return single_level_data
     def filter(self, period: str = "all", metrics: str | Sequence[str] | None = None) -> Self:
         """Returns filtered result metric data."""
@@ -67,7 +66,7 @@ class Abstract1DResult(AbstractResult, ABC):
     def __init__(self, results_data: pd.DataFrame) -> None:
         super().__init__(results_data)
-    def _filter(self, period: str, metrics=None) -> Self:
+    def _filter(self, period: str, metrics: Sequence[str] | None = None) -> Self:
         data = self._data
         if period != "all":
             data = self._data.loc[self._data.loc[:, ("chunk", "period")] == period, :]  # type: ignore | dataframe loc

dataeval/detectors/drift/_nml/_thresholds.py CHANGED Viewed

@@ -29,10 +29,10 @@ class Threshold(ABC):
     """Class registry lookup to get threshold subclass from threshold_type string"""
     def __str__(self) -> str:
-        return self.__str__()
+        return f"{self.__class__.__name__}({str(vars(self))})"
     def __repr__(self) -> str:
-        return self.__class__.__name__ + str(vars(self))
+        return str(self)
     def __eq__(self, other: object) -> bool:
         return isinstance(other, self.__class__) and other.__dict__ == self.__dict__
@@ -41,7 +41,7 @@ class Threshold(ABC):
         Threshold._registry[threshold_type] = cls
     @abstractmethod
-    def thresholds(self, data: np.ndarray) -> tuple[float | None, float | None]:
+    def _thresholds(self, data: np.ndarray) -> tuple[float | None, float | None]:
         """Returns lower and upper threshold values when given one or more np.ndarray instances.
         Parameters:
@@ -69,6 +69,61 @@ class Threshold(ABC):
         return threshold_cls(**obj)
+    def calculate(
+        self,
+        data: np.ndarray,
+        lower_limit: float | None = None,
+        upper_limit: float | None = None,
+        override_using_none: bool = False,
+        logger: logging.Logger | None = None,
+    ) -> tuple[float | None, float | None]:
+        """
+        Calculate lower and upper threshold values with respect to the provided Threshold and value limits.
+        Parameters
+        ----------
+        data : np.ndarray
+            The data used by the Threshold instance to calculate the lower and upper threshold values.
+            This will often be the values of a drift detection method or performance metric on chunks of reference
+            data.
+        lower_limit : float or None, default None
+            An optional value that serves as a limit for the lower threshold value. Any calculated lower threshold
+            values that end up below this limit will be replaced by this limit value.
+            The limit is often a theoretical constraint enforced by a specific drift detection method or performance
+            metric.
+        upper_threshold_value_limit : float or None, default None
+            An optional value that serves as a limit for the lower threshold value. Any calculated lower threshold
+            values that end up below this limit will be replaced by this limit value.
+            The limit is often a theoretical constraint enforced by a specific drift detection method or performance
+            metric.
+        override_using_none: bool, default False
+            When set to True use None to override threshold values that exceed value limits.
+            This will prevent them from being rendered on plots.
+        logger: Optional[logging.Logger], default=None
+            An optional Logger instance. When provided a warning will be logged when a calculated threshold value
+            gets overridden by a threshold value limit.
+        """
+        lower_value, upper_value = self._thresholds(data)
+        if lower_limit is not None and lower_value is not None and lower_value <= lower_limit:
+            override_value = None if override_using_none else lower_limit
+            if logger:
+                logger.warning(
+                    f"lower threshold value {lower_value} overridden by lower threshold value limit {override_value}"
+                )
+            lower_value = override_value
+        if upper_limit is not None and upper_value is not None and upper_value >= upper_limit:
+            override_value = None if override_using_none else upper_limit
+            if logger:
+                logger.warning(
+                    f"upper threshold value {upper_value} overridden by upper threshold value limit {override_value}"
+                )
+            upper_value = override_value
+        return lower_value, upper_value
 class ConstantThreshold(Threshold, threshold_type="constant"):
     """A `Thresholder` implementation that returns a constant lower and or upper threshold value.
@@ -91,7 +146,7 @@ class ConstantThreshold(Threshold, threshold_type="constant"):
         None 0.1
     """
-    def __init__(self, lower: float | int | None = None, upper: float | int | None = None):
+    def __init__(self, lower: float | int | None = None, upper: float | int | None = None) -> None:
         """Creates a new ConstantThreshold instance.
         Args:
@@ -109,11 +164,11 @@ class ConstantThreshold(Threshold, threshold_type="constant"):
         self.lower = lower
         self.upper = upper
-    def thresholds(self, data: np.ndarray) -> tuple[float | None, float | None]:
+    def _thresholds(self, data: np.ndarray) -> tuple[float | None, float | None]:
         return self.lower, self.upper
     @staticmethod
-    def _validate_inputs(lower: float | int | None = None, upper: float | int | None = None):
+    def _validate_inputs(lower: float | int | None = None, upper: float | int | None = None) -> None:
         if lower is not None and not isinstance(lower, (float, int)) or isinstance(lower, bool):
             raise ValueError(f"expected type of 'lower' to be 'float', 'int' or None but got '{type(lower).__name__}'")
@@ -149,7 +204,7 @@ class StandardDeviationThreshold(Threshold, threshold_type="standard_deviation")
         std_lower_multiplier: float | int | None = 3,
         std_upper_multiplier: float | int | None = 3,
         offset_from: Callable[[np.ndarray], Any] = np.nanmean,
-    ):
+    ) -> None:
         """Creates a new StandardDeviationThreshold instance.
         Args:
@@ -173,7 +228,7 @@ class StandardDeviationThreshold(Threshold, threshold_type="standard_deviation")
         self.std_upper_multiplier = std_upper_multiplier
         self.offset_from = offset_from
-    def thresholds(self, data: np.ndarray) -> tuple[float | None, float | None]:
+    def _thresholds(self, data: np.ndarray) -> tuple[float | None, float | None]:
         aggregate = self.offset_from(data)
         std = np.nanstd(data)
@@ -184,7 +239,9 @@ class StandardDeviationThreshold(Threshold, threshold_type="standard_deviation")
         return lower_threshold, upper_threshold
     @staticmethod
-    def _validate_inputs(std_lower_multiplier: float | int | None = 3, std_upper_multiplier: float | int | None = 3):
+    def _validate_inputs(
+        std_lower_multiplier: float | int | None = 3, std_upper_multiplier: float | int | None = 3
+    ) -> None:
         if (
             std_lower_multiplier is not None
             and not isinstance(std_lower_multiplier, (float, int))
@@ -210,71 +267,3 @@ class StandardDeviationThreshold(Threshold, threshold_type="standard_deviation")
         if std_upper_multiplier and std_upper_multiplier < 0:
             raise ValueError(f"'std_upper_multiplier' should be greater than 0 but got value {std_upper_multiplier}")
-def calculate_threshold_values(
-    threshold: Threshold,
-    data: np.ndarray,
-    lower_threshold_value_limit: float | None = None,
-    upper_threshold_value_limit: float | None = None,
-    override_using_none: bool = False,
-    logger: logging.Logger | None = None,
-    metric_name: str | None = None,
-) -> tuple[float | None, float | None]:
-    """Calculate lower and upper threshold values with respect to the provided Threshold and value limits.
-    Parameters:
-        threshold: Threshold
-            The Threshold instance that determines how the lower and upper threshold values will be calculated.
-        data: np.ndarray
-            The data used by the Threshold instance to calculate the lower and upper threshold values.
-            This will often be the values of a drift detection method or performance metric on chunks of reference data.
-        lower_threshold_value_limit: Optional[float], default=None
-            An optional value that serves as a limit for the lower threshold value. Any calculated lower threshold
-            values that end up below this limit will be replaced by this limit value.
-            The limit is often a theoretical constraint enforced by a specific drift detection method or performance
-            metric.
-        upper_threshold_value_limit: Optional[float], default=None
-            An optional value that serves as a limit for the lower threshold value. Any calculated lower threshold
-            values that end up below this limit will be replaced by this limit value.
-            The limit is often a theoretical constraint enforced by a specific drift detection method or performance
-            metric.
-        override_using_none: bool, default=False
-            When set to True use None to override threshold values that exceed value limits.
-            This will prevent them from being rendered on plots.
-        logger: Optional[logging.Logger], default=None
-            An optional Logger instance. When provided a warning will be logged when a calculated threshold value
-            gets overridden by a threshold value limit.
-        metric_name: Optional[str], default=None
-            When provided the metric name will be included within any log messages for additional clarity.
-    """
-    lower_threshold_value, upper_threshold_value = threshold.thresholds(data)
-    if (
-        lower_threshold_value_limit is not None
-        and lower_threshold_value is not None
-        and lower_threshold_value <= lower_threshold_value_limit
-    ):
-        override_value = None if override_using_none else lower_threshold_value_limit
-        if logger:
-            logger.warning(
-                f"{metric_name + ' ' if metric_name else ''}lower threshold value {lower_threshold_value} "
-                f"overridden by lower threshold value limit {override_value}"
-            )
-        lower_threshold_value = override_value
-    if (
-        upper_threshold_value_limit is not None
-        and upper_threshold_value is not None
-        and upper_threshold_value >= upper_threshold_value_limit
-    ):
-        override_value = None if override_using_none else upper_threshold_value_limit
-        if logger:
-            logger.warning(
-                f"{metric_name + ' ' if metric_name else ''}upper threshold value {upper_threshold_value} "
-                f"overridden by upper threshold value limit {override_value}"
-            )
-        upper_threshold_value = override_value
-    return lower_threshold_value, upper_threshold_value

dataeval 0.86.0__py3-none-any.whl → 0.86.2__py3-none-any.whl

dataeval 0.86.0py3-none-any.whl → 0.86.2py3-none-any.whl