PyPI - dataeval - Versions diffs - 0.73.0__tar.gz → 0.73.1__tar.gz - Mend

dataeval 0.73.0tar.gz → 0.73.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

{dataeval-0.73.0 → dataeval-0.73.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dataeval
-Version: 0.73.0
+Version: 0.73.1
 Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
 Home-page: https://dataeval.ai/
 License: MIT

{dataeval-0.73.0 → dataeval-0.73.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dataeval"
-version = "0.73.0" # dynamic
+version = "0.73.1" # dynamic
 description = "DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks"
 license = "MIT"
 readme = "README.md"
@@ -69,8 +69,7 @@ all = ["matplotlib", "markupsafe", "tensorflow", "tensorflow_probability", "tf-k
 optional = true
 [tool.poetry.group.dev.dependencies]
-tox = {version = "*"}
-tox-uv = {version = "*"}
+nox = {version = "*", extras = ["uv"]}
 uv = {version = "*"}
 poetry = {version = "*"}
 poetry-lock-groups-plugin = {version = "*"}
@@ -122,7 +121,6 @@ files = ["src/dataeval/__init__.py"]
 name = "dataeval"
 [tool.poetry2conda.dependencies]
-nvidia-cudnn-cu11 = { name = "cudnn" }
 tensorflow_probability = { name = "tensorflow-probability" }
 torch = { name = "pytorch" }
 xxhash = { name = "python-xxhash" }
@@ -145,6 +143,9 @@ parallel = true
 exclude_also = [
   "raise NotImplementedError",
   "if TYPE_CHECKING:",
+  "if _IS_TENSORFLOW_AVAILABLE",
+  "if _IS_TORCH_AVAILABLE",
+  "if _IS_TORCHVISION_AVAILABLE",
 ]
 include = ["*/src/dataeval/*"]
 omit = [
@@ -164,6 +165,7 @@ exclude = [
   "*env*",
   "output",
   "_build",
+  ".nox",
   ".tox",
   "prototype",
 ]

{dataeval-0.73.0 → dataeval-0.73.1}/src/dataeval/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.73.0"
+__version__ = "0.73.1"
 from importlib.util import find_spec
@@ -12,12 +12,12 @@ from dataeval import detectors, metrics  # noqa: E402
 __all__ = ["detectors", "metrics"]
-if _IS_TORCH_AVAILABLE:  # pragma: no cover
+if _IS_TORCH_AVAILABLE:
     from dataeval import workflows
     __all__ += ["workflows"]
-if _IS_TENSORFLOW_AVAILABLE or _IS_TORCH_AVAILABLE:  # pragma: no cover
+if _IS_TENSORFLOW_AVAILABLE or _IS_TORCH_AVAILABLE:
     from dataeval import utils
     __all__ += ["utils"]

{dataeval-0.73.0 → dataeval-0.73.1}/src/dataeval/detectors/__init__.py RENAMED Viewed

@@ -7,7 +7,7 @@ from dataeval.detectors import drift, linters
 __all__ = ["drift", "linters"]
-if _IS_TENSORFLOW_AVAILABLE:  # pragma: no cover
+if _IS_TENSORFLOW_AVAILABLE:
     from dataeval.detectors import ood
     __all__ += ["ood"]

{dataeval-0.73.0 → dataeval-0.73.1}/src/dataeval/detectors/drift/__init__.py RENAMED Viewed

@@ -10,7 +10,7 @@ from dataeval.detectors.drift.ks import DriftKS
 __all__ = ["DriftCVM", "DriftKS", "DriftOutput", "updates"]
-if _IS_TORCH_AVAILABLE:  # pragma: no cover
+if _IS_TORCH_AVAILABLE:
     from dataeval.detectors.drift.mmd import DriftMMD, DriftMMDOutput
     from dataeval.detectors.drift.torch import preprocess_drift
     from dataeval.detectors.drift.uncertainty import DriftUncertainty

{dataeval-0.73.0 → dataeval-0.73.1}/src/dataeval/detectors/drift/base.py RENAMED Viewed

@@ -18,7 +18,7 @@ from typing import Any, Callable, Literal, TypeVar
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
-from dataeval.interop import as_numpy, to_numpy
+from dataeval.interop import as_numpy
 from dataeval.output import OutputMetadata, set_metadata
 R = TypeVar("R")
@@ -196,7 +196,7 @@ class BaseDrift:
         if correction not in ["bonferroni", "fdr"]:
             raise ValueError("`correction` must be `bonferroni` or `fdr`.")
-        self._x_ref = to_numpy(x_ref)
+        self._x_ref = as_numpy(x_ref)
         self.x_ref_preprocessed: bool = x_ref_preprocessed
         # Other attributes

{dataeval-0.73.0 → dataeval-0.73.1}/src/dataeval/detectors/linters/clusterer.py RENAMED Viewed

@@ -480,7 +480,7 @@ class Clusterer:
             samples = self.clusters[level][cluster_id].samples
             if len(samples) >= self._min_num_samples_per_cluster:
                 duplicates_std.append(self.clusters[level][cluster_id].dist_std)
-        diag_mask = np.ones_like(self._sqdmat, dtype=bool)
+        diag_mask = np.ones_like(self._sqdmat, dtype=np.bool_)
         np.fill_diagonal(diag_mask, 0)
         diag_mask = np.triu(diag_mask)

{dataeval-0.73.0 → dataeval-0.73.1}/src/dataeval/detectors/ood/__init__.py RENAMED Viewed

@@ -4,7 +4,7 @@ Out-of-distribution (OOD)` detectors identify data that is different from the da
 from dataeval import _IS_TENSORFLOW_AVAILABLE
-if _IS_TENSORFLOW_AVAILABLE:  # pragma: no cover
+if _IS_TENSORFLOW_AVAILABLE:
     from dataeval.detectors.ood.ae import OOD_AE
     from dataeval.detectors.ood.aegmm import OOD_AEGMM
     from dataeval.detectors.ood.base import OODOutput, OODScoreOutput

{dataeval-0.73.0 → dataeval-0.73.1}/src/dataeval/metrics/bias/balance.py RENAMED Viewed

@@ -11,7 +11,7 @@ import numpy as np
 from numpy.typing import ArrayLike, NDArray
 from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
-from dataeval.metrics.bias.metadata import entropy, heatmap, preprocess_metadata
+from dataeval.metrics.bias.metadata import CLASS_LABEL, entropy, heatmap, preprocess_metadata
 from dataeval.output import OutputMetadata, set_metadata
 with contextlib.suppress(ImportError):
@@ -31,9 +31,9 @@ class BalanceOutput(OutputMetadata):
         Estimate of inter/intra-factor mutual information
     classwise : NDArray[np.float64]
         Estimate of mutual information between metadata factors and individual class labels
-    class_list: NDArray
+    class_list : NDArray
         Array of the class labels present in the dataset
-    metadata_names: list[str]
+    metadata_names : list[str]
         Names of each metadata factor
     """
@@ -54,9 +54,9 @@ class BalanceOutput(OutputMetadata):
         Parameters
         ----------
-        row_labels : ArrayLike | None, default None
+        row_labels : ArrayLike or None, default None
             List/Array containing the labels for rows in the histogram
-        col_labels : ArrayLike | None, default None
+        col_labels : ArrayLike or None, default None
             List/Array containing the labels for columns in the histogram
         plot_classwise : bool, default False
             Whether to plot per-class balance instead of global balance
@@ -116,19 +116,29 @@ def validate_num_neighbors(num_neighbors: int) -> int:
 @set_metadata("dataeval.metrics")
-def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neighbors: int = 5) -> BalanceOutput:
+def balance(
+    class_labels: ArrayLike,
+    metadata: Mapping[str, ArrayLike],
+    num_neighbors: int = 5,
+    continuous_factor_bincounts: Mapping[str, int] | None = None,
+) -> BalanceOutput:
     """
     Mutual information (MI) between factors (class label, metadata, label/image properties)
     Parameters
     ----------
-    class_labels: ArrayLike
+    class_labels : ArrayLike
         List of class labels for each image
-    metadata: Mapping[str, ArrayLike]
+    metadata : Mapping[str, ArrayLike]
         Dict of lists of metadata factors for each image
-    num_neighbors: int, default 5
+    num_neighbors : int, default 5
         Number of nearest neighbors to use for computing MI between discrete
         and continuous variables.
+    continuous_factor_bincounts : Mapping[str, int] or None, default None
+        The factors in metadata that have continuous values and the array of bin counts to
+        discretize values into. All factors are treated as having discrete values unless they
+        are specified as keys in this dictionary. Each element of this array must occur as a key
+        in metadata.
     Returns
     -------
@@ -148,7 +158,7 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
     -------
     Return balance (mutual information) of factors with class_labels
-    >>> bal = balance(class_labels, metadata)
+    >>> bal = balance(class_labels, metadata, continuous_factor_bincounts=continuous_factor_bincounts)
     >>> bal.balance
     array([0.99999822, 0.13363788, 0.04505382, 0.02994455])
@@ -165,6 +175,7 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
     array([[0.99999822, 0.13363788, 0.        , 0.        ],
            [0.99999822, 0.13363788, 0.        , 0.        ]])
     See Also
     --------
     sklearn.feature_selection.mutual_info_classif
@@ -178,9 +189,9 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
     mi[:] = np.nan
     for idx in range(num_factors):
-        tgt = data[:, idx].astype(int)
+        tgt = data[:, idx].astype(np.intp)
-        if is_categorical[idx]:
+        if continuous_factor_bincounts and names[idx] not in continuous_factor_bincounts:
             mi[idx, :] = mutual_info_classif(
                 data,
                 tgt,
@@ -197,7 +208,7 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
                 random_state=0,
             )
-    ent_all = entropy(data, names, is_categorical, normalized=False)
+    ent_all = entropy(data, names, continuous_factor_bincounts, normalized=False)
     norm_factor = 0.5 * np.add.outer(ent_all, ent_all) + 1e-6
     # in principle MI should be symmetric, but it is not in practice.
     nmi = 0.5 * (mi + mi.T) / norm_factor
@@ -205,7 +216,7 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
     factors = nmi[1:, 1:]
     # unique class labels
-    class_idx = names.index("class_label")
+    class_idx = names.index(CLASS_LABEL)
     u_cls = np.unique(data[:, class_idx])
     num_classes = len(u_cls)
@@ -214,12 +225,11 @@ def balance(class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], num_neig
     classwise_mi[:] = np.nan
     # categorical variables, excluding class label
-    cat_mask = np.concatenate((is_categorical[:class_idx], is_categorical[(class_idx + 1) :]), axis=0).astype(int)
+    cat_mask = np.concatenate((is_categorical[:class_idx], is_categorical[(class_idx + 1) :]), axis=0).astype(np.intp)
-    tgt_bin = np.stack([data[:, class_idx] == cls for cls in u_cls]).T.astype(int)
-    ent_tgt_bin = entropy(
-        tgt_bin, names=[str(idx) for idx in range(num_classes)], is_categorical=[True for idx in range(num_classes)]
-    )
+    tgt_bin = np.stack([data[:, class_idx] == cls for cls in u_cls]).T.astype(np.intp)
+    names = [str(idx) for idx in range(num_classes)]
+    ent_tgt_bin = entropy(tgt_bin, names, continuous_factor_bincounts)
     # classification MI for discrete/categorical features
     for idx in range(num_classes):

{dataeval-0.73.0 → dataeval-0.73.1}/src/dataeval/metrics/bias/coverage.py RENAMED Viewed

@@ -5,7 +5,7 @@ __all__ = ["CoverageOutput", "coverage"]
 import contextlib
 import math
 from dataclasses import dataclass
-from typing import Any, Literal
+from typing import Literal
 import numpy as np
 from numpy.typing import ArrayLike, NDArray
@@ -27,9 +27,9 @@ class CoverageOutput(OutputMetadata):
     Attributes
     ----------
-    indices : NDArray
+    indices : NDArray[np.intp]
         Array of uncovered indices
-    radii : NDArray
+    radii : NDArray[np.float64]
         Array of critical value radii
     critical_value : float
         Radius for :term:`coverage<Coverage>`
@@ -39,11 +39,7 @@ class CoverageOutput(OutputMetadata):
     radii: NDArray[np.float64]
     critical_value: float
-    def plot(
-        self,
-        images: NDArray[Any],
-        top_k: int = 6,
-    ) -> Figure:
+    def plot(self, images: ArrayLike, top_k: int = 6) -> Figure:
         """
         Plot the top k images together for visualization
@@ -53,6 +49,10 @@ class CoverageOutput(OutputMetadata):
             Original images (not embeddings) in (N, C, H, W) or (N, H, W) format
         top_k : int, default 6
             Number of images to plot (plotting assumes groups of 3)
+        Returns
+        -------
+        matplotlib.figure.Figure
         """
         # Determine which images to plot
         highest_uncovered_indices = self.indices[:top_k]
@@ -82,12 +82,12 @@ def coverage(
     embeddings : ArrayLike, shape - (N, P)
         A dataset in an ArrayLike format.
         Function expects the data to have 2 dimensions, N number of observations in a P-dimesionial space.
-    radius_type : Literal["adaptive", "naive"], default "adaptive"
+    radius_type : {"adaptive", "naive"}, default "adaptive"
         The function used to determine radius.
-    k: int, default 20
+    k : int, default 20
         Number of observations required in order to be covered.
         [1] suggests that a minimum of 20-50 samples is necessary.
-    percent: float, default 0.01
+    percent : float, default 0.01
         Percent of observations to be considered uncovered. Only applies to adaptive radius.
     Returns

{dataeval-0.73.0 → dataeval-0.73.1}/src/dataeval/metrics/bias/diversity.py RENAMED Viewed

@@ -10,6 +10,7 @@ import numpy as np
 from numpy.typing import ArrayLike, NDArray
 from dataeval.metrics.bias.metadata import (
+    CLASS_LABEL,
     diversity_bar_plot,
     entropy,
     get_counts,
@@ -35,9 +36,9 @@ class DiversityOutput(OutputMetadata):
         :term:`Diversity` index for classes and factors
     classwise : NDArray[np.float64]
         Classwise diversity index [n_class x n_factor]
-    class_list: NDArray[np.int64]
+    class_list : NDArray[np.int64]
         Class labels for each value in the dataset
-    metadata_names: list[str]
+    metadata_names : list[str]
         Names of each metadata factor
     """
@@ -45,12 +46,11 @@ class DiversityOutput(OutputMetadata):
     classwise: NDArray[np.float64]
     class_list: NDArray[Any]
     metadata_names: list[str]
-    method: Literal["shannon", "simpson"]
     def plot(
         self,
-        row_labels: list[Any] | NDArray[Any] | None = None,
-        col_labels: list[Any] | NDArray[Any] | None = None,
+        row_labels: ArrayLike | list[Any] | None = None,
+        col_labels: ArrayLike | list[Any] | None = None,
         plot_classwise: bool = False,
     ) -> Figure:
         """
@@ -58,9 +58,9 @@ class DiversityOutput(OutputMetadata):
         Parameters
         ----------
-        row_labels : ArrayLike | None, default None
+        row_labels : ArrayLike or None, default None
             List/Array containing the labels for rows in the histogram
-        col_labels : ArrayLike | None, default None
+        col_labels : ArrayLike or None, default None
             List/Array containing the labels for columns in the histogram
         plot_classwise : bool, default False
             Whether to plot per-class balance instead of global balance
@@ -77,7 +77,7 @@ class DiversityOutput(OutputMetadata):
                 col_labels,
                 xlabel="Factors",
                 ylabel="Class",
-                cbarlabel=f"Normalized {self.method.title()} Index",
+                cbarlabel=f"Normalized {self.meta()['arguments']['method'].title()} Index",
             )
         else:
@@ -92,7 +92,7 @@ class DiversityOutput(OutputMetadata):
 def diversity_shannon(
     data: NDArray[Any],
     names: list[str],
-    is_categorical: list[bool],
+    continuous_factor_bincounts: Mapping[str, int] | None = None,
     subset_mask: NDArray[np.bool_] | None = None,
 ) -> NDArray[np.float64]:
     """
@@ -106,14 +106,16 @@ def diversity_shannon(
     Parameters
     ----------
-    data: NDArray
+    data : NDArray
         Array containing numerical values for metadata factors
-    names: list[str]
+    names : list[str]
         Names of metadata factors -- keys of the metadata dictionary
-    is_categorical: list[bool]
-        List of flags to identify whether variables are categorical (True) or
-        continuous (False)
-    subset_mask: NDArray[np.bool_] | None
+    continuous_factor_bincounts : Mapping[str, int] or None, default None
+        The factors in names that have continuous values and the array of bin counts to
+        discretize values into. All factors are treated as having discrete values unless they
+        are specified as keys in this dictionary. Each element of this array must occur as a key
+        in names.
+    subset_mask : NDArray[np.bool_] or None, default None
         Boolean mask of samples to bin (e.g. when computing per class).  True -> include in histogram counts
     Note
@@ -122,18 +124,32 @@ def diversity_shannon(
     Returns
     -------
-    diversity_index: NDArray
+    diversity_index : NDArray[np.float64]
         Diversity index per column of X
     See Also
     --------
     numpy.histogram
     """
+    hist_cache = {}
     # entropy computed using global auto bins so that we can properly normalize
-    ent_unnormalized = entropy(data, names, is_categorical, normalized=False, subset_mask=subset_mask)
+    ent_unnormalized = entropy(
+        data,
+        names,
+        continuous_factor_bincounts,
+        normalized=False,
+        subset_mask=subset_mask,
+        hist_cache=hist_cache,
+    )
     # normalize by global counts rather than classwise counts
-    num_bins = get_num_bins(data, names, is_categorical=is_categorical, subset_mask=subset_mask)
+    num_bins = get_num_bins(
+        data,
+        names,
+        continuous_factor_bincounts=continuous_factor_bincounts,
+        subset_mask=subset_mask,
+        hist_cache=hist_cache,
+    )
     ent_norm = np.empty(ent_unnormalized.shape)
     ent_norm[num_bins != 1] = ent_unnormalized[num_bins != 1] / np.log(num_bins[num_bins != 1])
     ent_norm[num_bins == 1] = 0
@@ -143,7 +159,7 @@ def diversity_shannon(
 def diversity_simpson(
     data: NDArray[Any],
     names: list[str],
-    is_categorical: list[bool],
+    continuous_factor_bincounts: Mapping[str, int] | None = None,
     subset_mask: NDArray[np.bool_] | None = None,
 ) -> NDArray[np.float64]:
     """
@@ -157,14 +173,16 @@ def diversity_simpson(
     Parameters
     ----------
-    data: NDArray
+    data : NDArray
         Array containing numerical values for metadata factors
-    names: list[str]
+    names : list[str]
         Names of metadata factors -- keys of the metadata dictionary
-    is_categorical: list[bool]
-        List of flags to identify whether variables are categorical (True) or
-        continuous (False)
-    subset_mask: NDArray[np.bool_] | None
+    continuous_factor_bincounts : Mapping[str, int] or None, default None
+        The factors in names that have continuous values and the array of bin counts to
+        discretize values into. All factors are treated as having discrete values unless they
+        are specified as keys in this dictionary. Each element of this array must occur as a key
+        in names.
+    subset_mask : NDArray[np.bool_] or None, default None
         Boolean mask of samples to bin (e.g. when computing per class).  True -> include in histogram counts
     Note
@@ -175,35 +193,39 @@ def diversity_simpson(
     Returns
     -------
-    NDArray
+    diversity_index : NDArray[np.float64]
         Diversity index per column of X
     See Also
     --------
     numpy.histogram
     """
+    hist_cache = {}
-    hist_counts, _ = get_counts(data, names, is_categorical, subset_mask)
+    hist_counts = get_counts(data, names, continuous_factor_bincounts, subset_mask, hist_cache=hist_cache)
     # normalize by global counts, not classwise counts
-    num_bins = get_num_bins(data, names, is_categorical)
+    num_bins = get_num_bins(data, names, continuous_factor_bincounts, hist_cache=hist_cache)
     ev_index = np.empty(len(names))
     # loop over columns for convenience
     for col, cnts in enumerate(hist_counts.values()):
         # relative frequencies
-        p_i = cnts / cnts.sum()
+        p_i = cnts / np.sum(cnts)
         # inverse Simpson index normalized by (number of bins)
-        s_0 = 1 / np.sum(p_i**2) / num_bins[col]
+        s_0 = 1 / np.sum(p_i**2)  # / num_bins[col]
         if num_bins[col] == 1:
             ev_index[col] = 0
         else:
-            ev_index[col] = (s_0 * num_bins[col] - 1) / (num_bins[col] - 1)
+            ev_index[col] = (s_0 - 1) / (num_bins[col] - 1)
     return ev_index
 @set_metadata()
 def diversity(
-    class_labels: ArrayLike, metadata: Mapping[str, ArrayLike], method: Literal["shannon", "simpson"] = "simpson"
+    class_labels: ArrayLike,
+    metadata: Mapping[str, ArrayLike],
+    continuous_factor_bincounts: Mapping[str, int] | None = None,
+    method: Literal["simpson", "shannon"] = "simpson",
 ) -> DiversityOutput:
     """
     Compute :term:`diversity<Diversity>` and classwise diversity for discrete/categorical variables and,
@@ -216,11 +238,16 @@ def diversity(
     Parameters
     ----------
-    class_labels: ArrayLike
+    class_labels : ArrayLike
         List of class labels for each image
-    metadata: Mapping[str, ArrayLike]
+    metadata : Mapping[str, ArrayLike]
         Dict of list of metadata factors for each image
-    method: Literal["shannon", "simpson"], default "simpson"
+    continuous_factor_bincounts : Mapping[str, int] or None, default None
+        The factors in metadata that have continuous values and the array of bin counts to
+        discretize values into. All factors are treated as having discrete values unless they
+        are specified as keys in this dictionary. Each element of this array must occur as a key
+        in metadata.
+    method : {"simpson", "shannon"}, default "simpson"
         Indicates which diversity index should be computed
     Note
@@ -239,40 +266,42 @@ def diversity(
     -------
     Compute Simpson diversity index of metadata and class labels
-    >>> div_simp = diversity(class_labels, metadata, method="simpson")
+    >>> div_simp = diversity(class_labels, metadata, continuous_factor_bincounts, method="simpson")
     >>> div_simp.diversity_index
-    array([0.18103448, 0.18103448, 0.88636364])
+    array([0.72413793, 0.72413793, 0.88636364])
     >>> div_simp.classwise
-    array([[0.17241379, 0.39473684],
-           [0.2       , 0.2       ]])
+    array([[0.68965517, 0.69230769],
+           [0.8       , 1.        ]])
     Compute Shannon diversity index of metadata and class labels
-    >>> div_shan = diversity(class_labels, metadata, method="shannon")
+    >>> div_shan = diversity(class_labels, metadata, continuous_factor_bincounts, method="shannon")
     >>> div_shan.diversity_index
-    array([0.37955133, 0.37955133, 0.96748876])
+    array([0.8812909 , 0.8812909 , 0.96748876])
     >>> div_shan.classwise
-    array([[0.43156028, 0.83224889],
-           [0.57938016, 0.57938016]])
+    array([[0.86312057, 0.91651644],
+           [0.91829583, 1.        ]])
     See Also
     --------
     numpy.histogram
     """
     diversity_fn = get_method({"simpson": diversity_simpson, "shannon": diversity_shannon}, method)
-    data, names, is_categorical, unique_labels = preprocess_metadata(class_labels, metadata)
-    diversity_index = diversity_fn(data, names, is_categorical, None).astype(np.float64)
+    data, names, _, unique_labels = preprocess_metadata(class_labels, metadata)
+    diversity_index = diversity_fn(data, names, continuous_factor_bincounts)
+    class_idx = names.index(CLASS_LABEL)
+    class_lbl = data[:, class_idx]
-    class_idx = names.index("class_label")
-    u_classes = np.unique(data[:, class_idx])
+    u_classes = np.unique(class_lbl)
     num_factors = len(names)
     diversity = np.empty((len(u_classes), num_factors))
     diversity[:] = np.nan
     for idx, cls in enumerate(u_classes):
-        subset_mask = data[:, class_idx] == cls
-        diversity[idx, :] = diversity_fn(data, names, is_categorical, subset_mask)
+        subset_mask = class_lbl == cls
+        diversity[idx, :] = diversity_fn(data, names, continuous_factor_bincounts, subset_mask)
     div_no_class = np.concatenate((diversity[:, :class_idx], diversity[:, (class_idx + 1) :]), axis=1)
-    return DiversityOutput(diversity_index, div_no_class, unique_labels, list(metadata.keys()), method)
+    return DiversityOutput(diversity_index, div_no_class, unique_labels, list(metadata.keys()))

dataeval 0.73.0__tar.gz → 0.73.1__tar.gz

dataeval 0.73.0tar.gz → 0.73.1tar.gz