PyPI - dataeval - Versions diffs - 1.0.5__tar.gz → 1.0.6__tar.gz - Mend

dataeval 1.0.5tar.gz → 1.0.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

{dataeval-1.0.5 → dataeval-1.0.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dataeval
-Version: 1.0.5
+Version: 1.0.6
 Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
 Project-URL: Homepage, https://dataeval.ai/
 Project-URL: Repository, https://github.com/aria-ml/dataeval/

{dataeval-1.0.5 → dataeval-1.0.6}/pyproject.toml RENAMED Viewed

@@ -114,17 +114,23 @@ docs = [
   "sphinx-tabs>=3.4.7",
   "Sphinx>=7.2.6,<9.0.0", # sphinx-immaterial <= 0.13.9 is not compatible with sphinx >=9.0
   "torchmetrics>=1.0.0",
-  "torchvision>=0.17.0",
   "markupsafe>=3,<3.0.2",
   "jupytext>=1.19.1",
 ]
 security = [  # keep in sync with [tool.uv.constraint-dependencies]
   "cryptography>=46.0.5",    # CVE-2026-26007: Missing Subgroup Validation for SECT Curves
   "filelock>=3.20.3",        # GHSA-w853-jp5j-5j7f, GHSA-qmgc-5h2g-mvrw
+  "onnx>=1.21.0",            # CVE-2026-28500: Untrusted Model Repository Warnings Suppressed by silent=True
+                             # CVE-2026-34445: Malicious ONNX models can crash servers by exploiting unprotected object settings
+                             # CVE-2026-27489: Vulnerable to Path Traversal via Symlink
+                             # GHSA-q56x-g2fj-4rj6: TOCTOU arbitrary file read/write in save_external_dat
   "pillow>=12.1.1",          # CVE-2026-25990: OOB write via PSD image
+  "poetry>=2.3.3",           # CVE-2026-34591: Poetry Has Wheel Path Traversal Which Can Lead to Arbitrary File Write
   "protobuf>=6.33.5",        # GHSA-7gcm-g887-7qv7
   "setuptools>=82.0.0",      # CVE-2026-23949: (jaraco_context) path traversal in tarball()
                              # CVE-2026-24049: (wheel) privilege escalation via unpack
+  "tornado>=6.5.5",          # CVE-2026-31958: Tornado is vulnerable to DoS due to too many multipart parts
+                             # CVE-2026-35536: Tornado has cookie attribute injection via .RequestHandler.set_cookie
 ]
 dev = [
   { include-group = "base" },
@@ -150,10 +156,17 @@ conflicts = [
 constraint-dependencies = [
     "cryptography>=46.0.5",    # CVE-2026-26007: Missing Subgroup Validation for SECT Curves
     "filelock>=3.20.3",        # GHSA-w853-jp5j-5j7f, GHSA-qmgc-5h2g-mvrw
+    "onnx>=1.21.0",            # CVE-2026-28500: Untrusted Model Repository Warnings Suppressed by silent=True
+                               # CVE-2026-34445: Malicious ONNX models can crash servers by exploiting unprotected object settings
+                               # CVE-2026-27489: Vulnerable to Path Traversal via Symlink
+                               # GHSA-q56x-g2fj-4rj6: TOCTOU arbitrary file read/write in save_external_dat
     "pillow>=12.1.1",          # CVE-2026-25990: OOB write via PSD image
+    "poetry>=2.3.3",           # CVE-2026-34591: Poetry Has Wheel Path Traversal Which Can Lead to Arbitrary File Write
     "protobuf>=6.33.5",        # GHSA-7gcm-g887-7qv7
     "setuptools>=82.0.0",      # CVE-2026-23949: (jaraco_context) path traversal in tarball()
                                # CVE-2026-24049: (wheel) privilege escalation via unpack
+    "tornado>=6.5.5",          # CVE-2026-31958: Tornado is vulnerable to DoS due to too many multipart parts
+                               # CVE-2026-35536: Tornado has cookie attribute injection via .RequestHandler.set_cookie
 ]
 [[tool.uv.index]]
@@ -211,6 +224,9 @@ version-file = "src/dataeval/_version.py"
 [tool.poetry]
 version = "0.0.0"  # unused
+[tool.poetry.dependencies]
+python = ">=3.10,<3.15"
 [tool.pyproject2conda.dependencies]
 numpy = { skip = true, packages = "numpy>=1.24.2" }
 scikit-learn = { skip = true, packages = "scikit-learn>=1.5.0" }
@@ -307,6 +323,7 @@ max-complexity = 5
 convention = "numpy"
 [tool.ruff.format]
+preview = true
 quote-style = "double"
 indent-style = "space"
 skip-magic-trailing-comma = false

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/_metadata.py RENAMED Viewed

@@ -650,7 +650,7 @@ class Metadata(Array, FeatureExtractor):
         -------
         Sequence[str]
             List of factor names that passed filtering and preprocessing steps.
-            Order matches columns in factor_data and binned_data.
+            Order matches columns in factor_data.
         Notes
         -----

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/_version.py RENAMED Viewed

@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
 commit_id: str | None
 __commit_id__: str | None
-__version__ = version = '1.0.5'
-__version_tuple__ = version_tuple = (1, 0, 5)
+__version__ = version = '1.0.6'
+__version_tuple__ = version_tuple = (1, 0, 6)
 __commit_id__ = commit_id = None

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/bias/_balance.py RENAMED Viewed

@@ -22,28 +22,30 @@ class BalanceOutput(DictOutput):
     """
     Output class for the :class:`.Balance` :term:`bias<Bias>` evaluator.
-    Contains three polars DataFrames with mutual information scores and threshold flags.
+    Contains three polars DataFrames with normalized mutual information scores and threshold flags.
     Attributes
     ----------
     balance : pl.DataFrame
-        DataFrame with global class-to-factor mutual information:
+        DataFrame with global class-to-factor normalized mutual information:
-        - factor_name: str - Name of the metadata factor
-        - mi_value: float - Mutual information value between this factor and class labels
+        - factor_name: str - Name of the metadata factor. Includes "class_label"
+          which represents the self-information (always 1.0).
+        - mi_value: float - Normalized mutual information value between this
+          factor and class labels
     factors : pl.DataFrame
-        DataFrame with inter-factor mutual information correlations:
+        DataFrame with inter-factor normalized mutual information correlations:
         - factor1: str - Name of the first factor
         - factor2: str - Name of the second factor
-        - mi_value: float - Mutual information value
+        - mi_value: float - Normalized mutual information value
         - is_correlated: bool - True if mi_value > factor_correlation_threshold
     classwise : pl.DataFrame
-        DataFrame with per-class-to-factor mutual information:
+        DataFrame with per-class-to-factor normalized mutual information:
         - class_name: str - Name of the class
         - factor_name: str - Name of the metadata factor
-        - mi_value: float - Mutual information value
+        - mi_value: float - Normalized mutual information value
         - is_imbalanced: bool - True if mi_value > class_imbalance_threshold
     """
@@ -58,21 +60,21 @@ class BalanceOutput(DictOutput):
 class Balance(Evaluator):
     """
-    Computes mutual information (MI) between factors (class label, metadata, label/image properties).
+    Computes normalized mutual information (NMI) between factors (class label, metadata, label/image properties).
     Identifies imbalanced classes and highly correlated metadata factors based on
-    mutual information thresholds.
+    NMI thresholds.
     Parameters
     ----------
     num_neighbors : int, default 5
         Number of points to consider as neighbors
     class_imbalance_threshold : float, default 0.3
-        Threshold for identifying imbalanced classes. Classes with MI above this
+        Threshold for identifying imbalanced classes. Classes with NMI above this
         threshold with any metadata factor are considered imbalanced.
     factor_correlation_threshold : float, default 0.5
         Threshold for identifying highly correlated metadata factors. Factor pairs
-        with MI above this threshold are considered highly correlated.
+        with NMI above this threshold are considered highly correlated.
     Attributes
     ----------
@@ -89,7 +91,8 @@ class Balance(Evaluator):
     -----
     We use `mutual_info_classif` from sklearn since class label is categorical.
     `mutual_info_classif` outputs are consistent up to O(1e-4) and depend on a random
-    seed. MI is computed differently for categorical and continuous variables.
+    seed. MI is computed differently for categorical and continuous variables, and
+    in all cases normalized or transformed to [0, 1] prior to being returned.
     Examples
     --------
@@ -149,7 +152,7 @@ class Balance(Evaluator):
     @set_metadata(state=["num_neighbors", "class_imbalance_threshold", "factor_correlation_threshold"])
     def evaluate(self, data: AnnotatedDataset[Any] | MetadataLike) -> BalanceOutput:  # noqa: C901
         """
-        Compute mutual information between factors and identify imbalanced classes.
+        Compute normalized mutual information between factors and identify imbalanced classes.
         Parameters
         ----------
@@ -160,7 +163,7 @@ class Balance(Evaluator):
         Returns
         -------
         BalanceOutput
-            Three DataFrames containing MI scores and threshold flags:
+            Three DataFrames containing NMI scores and threshold flags:
             - balance: Global class-to-factor mutual information
             - factors: Inter-factor mutual information
@@ -168,7 +171,7 @@ class Balance(Evaluator):
         Example
         -------
-        Return balance (mutual information) of factors with class_labels
+        Return balance (NMI) of factors with class_labels
         >>> from dataeval import Metadata
         >>> metadata = Metadata(dataset)

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/bias/_diversity.py RENAMED Viewed

@@ -56,7 +56,7 @@ class Diversity(Evaluator):
     Through standard histogram binning, for continuous variables.
     The method specified defines diversity as the inverse Simpson diversity index linearly rescaled to
-    the unit interval, or the normalized form of the Shannon entropy.
+    the unit interval [0, 1], or the normalized form of the Shannon entropy.
     diversity = 1 implies that samples are evenly distributed across a particular factor
     diversity = 0 implies that all samples belong to one category/bin
@@ -66,7 +66,9 @@ class Diversity(Evaluator):
     Parameters
     ----------
     method : "simpson" or "shannon", default "simpson"
-        The methodology used for defining diversity
+        The methodology used for defining diversity. When "simpson" is used,
+        the index is linearly rescaled so that 1.0 represents maximum diversity
+        (even distribution) and 0.0 represents minimum diversity (all samples in one bin).
     threshold : float, default 0.5
         Threshold for identifying low diversity. Factors with diversity values
         at or below this threshold are flagged as having low diversity.

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/bias/_parity.py RENAMED Viewed

@@ -118,8 +118,6 @@ class Parity(Evaluator):
     >>> config = Parity.Config(score_threshold=0.4, p_value_threshold=0.01)
     >>> parity = Parity(config=config)
-    output = parity(metadata.binned_data, metadata.class_labels.tolist())
     """
     class Config(EvaluatorConfig):

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_ber.py RENAMED Viewed

@@ -78,6 +78,8 @@ def ber_mst(embeddings: ArrayND[float], class_labels: Array1D[int]) -> BERResult
     """
     Estimate Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` using a minimum spanning tree.
+    BER bounds the irreducible classification error given the current feature
+    representation — the error attributable to class overlap in embedding space.
     Uses FR with a minimum spanning tree (MST) test statistic basis.
     Parameters
@@ -137,7 +139,13 @@ def ber_knn(embeddings: ArrayND[float], class_labels: Array1D[int], k: int) -> B
     """
     Estimate Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` using KNN.
-    Uses KNN test statistic basis.
+    BER bounds the irreducible classification error given the current feature
+    representation — the error attributable to class overlap in embedding space.
+    Uses KNN test statistic basis. The estimator's behavior depends on the value of k:
+    - k=1: Uses 1-NN for the lower bound and 2-NN for the upper bound.
+    - k=2: Uses 2-NN for the lower bound and 3-NN for the upper bound.
+    - 2<k<=5: Uses k-NN for the lower bound and (k+1)-NN for the upper bound.
+    - k>5: Only available for binary classification; uses k-NN for both bounds with specialized asymptotic weights.
     Parameters
     ----------
@@ -146,7 +154,7 @@ def ber_knn(embeddings: ArrayND[float], class_labels: Array1D[int], k: int) -> B
     class_labels : Array1D[int]
         Array of class labels for each image. Can be a 1D list, or array-like object.
     k : int
-        Number of nearest neighbors for KNN estimator
+        Number of nearest neighbors for KNN estimator. Should be between 1 and the number of samples.
     Returns
     -------

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_clusterer.py RENAMED Viewed

@@ -241,7 +241,7 @@ class _HDBSCANSorter:
         n_samples_per_cluster = np.bincount(labels)
         _logger.debug(
             "HDBSCAN clustering complete: %d clusters, samples per cluster: min=%d, max=%d, mean=%.1f",
-            clst.unique_clusters,
+            len(clst.unique_clusters),
             np.min(n_samples_per_cluster),
             np.max(n_samples_per_cluster),
             np.mean(n_samples_per_cluster),

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_coverage.py RENAMED Viewed

@@ -6,8 +6,8 @@ from typing import TypedDict
 import numpy as np
 from numpy.typing import NDArray
-from scipy.spatial.distance import pdist, squareform
+from dataeval.core._mst import _compute_nearest_neighbors
 from dataeval.types import Array2D
 from dataeval.utils._internal import as_numpy, ensure_embeddings, flatten_samples
@@ -22,19 +22,24 @@ class CoverageResult(TypedDict):
     ----------
     uncovered_indices : NDArray[np.intp]
         Array of indices for uncovered observations
-    critical_value_radii : NDArray[np.float64]
+    critical_value_radii : NDArray[np.float32]
         Array of critical value radii for each observation
     coverage_radius : float
         The radius threshold for coverage
     """
     uncovered_indices: NDArray[np.intp]
-    critical_value_radii: NDArray[np.float64]
+    critical_value_radii: NDArray[np.float32]
     coverage_radius: float
-def _validate_inputs(embeddings: NDArray[np.float64], num_observations: int) -> NDArray[np.float64]:
-    embeddings = ensure_embeddings(embeddings, dtype=np.float64, unit_interval=True)
+def _validate_inputs(
+    embeddings: NDArray[np.float64],
+    num_observations: int,
+    force_unit_interval: bool = False,
+) -> NDArray[np.float64]:
+    unit_interval = "force" if force_unit_interval else True
+    embeddings = ensure_embeddings(embeddings, dtype=np.float64, unit_interval=unit_interval)
     if len(embeddings) <= num_observations:
         raise ValueError(
             f"Length of embeddings ({len(embeddings)}) is less than or equal to the specified number of \
@@ -43,15 +48,17 @@ def _validate_inputs(embeddings: NDArray[np.float64], num_observations: int) ->
     return embeddings
-def _calculate_critical_value_radii(embeddings: NDArray[np.float64], num_observations: int) -> NDArray[np.float64]:
-    embeddings_matrix = squareform(pdist(flatten_samples(embeddings))).astype(np.float64)
-    sorted_dists = np.sort(embeddings_matrix, axis=1)
-    return sorted_dists[:, num_observations]
+def _calculate_critical_value_radii(embeddings: NDArray[np.float64], num_observations: int) -> NDArray[np.float32]:
+    _, embeddings_matrix = _compute_nearest_neighbors(
+        flatten_samples(embeddings), None, num_observations, return_distances=True
+    )
+    return embeddings_matrix[:, -1]
 def coverage_naive(
     embeddings: Array2D[float],
     num_observations: int,
+    force_unit_interval: bool = False,
 ) -> CoverageResult:
     """
     Evaluate :term:`coverage<Coverage>` using a naive radius calculation method.
@@ -68,6 +75,9 @@ def coverage_naive(
     num_observations : int
         Number of observations required in order to be covered.
         [1] suggests that a minimum of 20-50 samples is necessary.
+    force_unit_interval : bool, default False
+        If True, embeddings will be automatically rescaled to the unit interval [0, 1].
+        If False, a ValueError is raised if embeddings are outside [0, 1].
     Returns
     -------
@@ -81,7 +91,7 @@ def coverage_naive(
     Raises
     ------
     ValueError
-        If embeddings are not unit interval [0-1]
+        If embeddings are not unit interval [0-1] and force_unit_interval is False
     ValueError
         If length of :term:`embeddings<Embeddings>` is less than or equal to num_observations
@@ -101,7 +111,9 @@ def coverage_naive(
     """
     _logger.info("Starting coverage_naive calculation with num_observations=%d", num_observations)
-    embeddings_np = _validate_inputs(as_numpy(embeddings, dtype=np.float64, required_ndim=2), num_observations)
+    embeddings_np = _validate_inputs(
+        as_numpy(embeddings, dtype=np.float64, required_ndim=2), num_observations, force_unit_interval
+    )
     _logger.debug("Embeddings shape: %s", embeddings_np.shape)
     critical_value_radii = _calculate_critical_value_radii(embeddings_np, num_observations)
@@ -132,6 +144,7 @@ def coverage_adaptive(
     embeddings: Array2D[float],
     num_observations: int,
     percent: float,
+    force_unit_interval: bool = False,
 ) -> CoverageResult:
     """
     Evaluate :term:`coverage<Coverage>` using an adaptive radius calculation method.
@@ -150,6 +163,9 @@ def coverage_adaptive(
         [1] suggests that a minimum of 20-50 samples is necessary.
     percent : float
         Percent of observations to be considered uncovered. Should be between 0 and 1.
+    force_unit_interval : bool, default False
+        If True, embeddings will be automatically rescaled to the unit interval [0, 1].
+        If False, a ValueError is raised if embeddings are outside [0, 1].
     Returns
     -------
@@ -163,7 +179,7 @@ def coverage_adaptive(
     Raises
     ------
     ValueError
-        If embeddings are not unit interval [0-1]
+        If embeddings are not unit interval [0-1] and force_unit_interval is False
     ValueError
         If length of :term:`embeddings<Embeddings>` is less than or equal to num_observations
@@ -188,7 +204,9 @@ def coverage_adaptive(
         percent,
     )
-    embeddings = _validate_inputs(as_numpy(embeddings, dtype=np.float64, required_ndim=2), num_observations)
+    embeddings = _validate_inputs(
+        as_numpy(embeddings, dtype=np.float64, required_ndim=2), num_observations, force_unit_interval
+    )
     _logger.debug("Embeddings shape: %s", embeddings.shape)
     critical_value_radii = _calculate_critical_value_radii(embeddings, num_observations)

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_feature_distance.py RENAMED Viewed

@@ -33,7 +33,8 @@ class FeatureDistanceResult(TypedDict):
     location : float
         The normalized location where the KS statistic was achieved
     dist : float
-        The Earth Mover's Distance (Wasserstein distance) between distributions
+        The Wasserstein distance between distributions, scaled by the
+        Interquartile Range (IQR) of the reference distribution.
     p_value : float
         The p-value from the KS test
     """
@@ -67,7 +68,7 @@ def feature_distance(
     Measure the feature-wise distance between two continuous distributions.
     Computes a p-value to evaluate its significance.
-    Uses the Earth Mover's Distance and the Kolmogorov-Smirnov two-sample test, featurewise.
+    Uses the Kolmogorov-Smirnov two-sample test and an IQR-scaled Wasserstein distance, featurewise.
     Parameters
     ----------
@@ -83,7 +84,7 @@ def feature_distance(
         - statistic: float - The Kolmogorov-Smirnov test statistic
         - location: float - The normalized location where the KS statistic was achieved
-        - dist: float - The Earth Mover's Distance between distributions
+        - dist: float - The IQR-scaled Wasserstein distance between distributions
         - p_value: float - The p-value from the KS test
     See Also

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_label_parity.py RENAMED Viewed

@@ -194,9 +194,9 @@ def label_parity(
             f"Found {len(observed_dist)} unique classes in observed label distribution, "
             f"but found {len(expected_dist)} unique classes in expected label distribution. "
             "This can happen when some class ids have zero instances in one dataset but "
-            "not in the other. When initializing Parity, try setting the num_classes "
-            "parameter to the known number of unique class ids, so that classes with "
-            "zero instances are still included in the distributions.",
+            "not in the other. Try setting the num_classes parameter to the known number "
+            "of unique class ids, so that classes with zero instances are still included "
+            "in the distributions.",
         )
     cs, p = chisquare(f_obs=observed_dist, f_exp=expected_dist)

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_metadata_insights.py RENAMED Viewed

@@ -16,12 +16,6 @@ from dataeval.protocols import SequenceLike
 _logger = logging.getLogger(__name__)
-_NATS2BITS = 1.442695
-"""
-_NATS2BITS is the reciprocal of natural log of 2. If you have an information/entropy-type quantity measured in nats,
-which is what many library functions return, multiply it by _NATS2BITS to get it in bits.
-"""
 def _calc_median_deviations(reference: NDArray[Any], test: NDArray[Any]) -> NDArray[Any]:
     """
@@ -189,12 +183,14 @@ def factor_predictors(  # noqa: C901
     discrete_features: list[bool] | None = None,
 ) -> Mapping[str, float]:
     """
-    Compute mutual information between metadata factors and flagged sample indices.
+    Compute a measure of mutual information between metadata factors and flagged sample indices.
     Given a set of metadata factors per sample and indices of flagged samples, this function
     calculates the mutual information between each factor and the flagged status.
     In other words, it finds which metadata factors most likely correlate to a
-    flagged sample (e.g., outliers, OOD samples, or other anomalies).
+    flagged sample (e.g., outliers, OOD samples, or other anomalies). The maximum possible MI
+    is equal to the entropy of the flagged indices, so we normalize by that entropy in order
+    to return a measure of association on a scale from 0 to 1.
     Parameters
     ----------
@@ -213,14 +209,15 @@ def factor_predictors(  # noqa: C901
     -------
     Mapping[str, float]
         A map with keys corresponding to factor names, and values indicating the strength of association
-        between each named factor and the flagged status, as mutual information measured in bits.
+        between each named factor and the flagged status, as normalized mutual information.
         Returns dict with 0.0 values for all factors if no indices are provided.
     Notes
     -----
     A high mutual information between a factor and flagged samples is an indication of correlation,
     but not causation. Additional analysis should be done to determine how to handle factors
-    with a high mutual information.
+    with a high mutual information. And note that "high" is always relative to the information
+    or entropy represented by the flagged indices, which is why we use that entropy to normalize.
     Examples
     --------
@@ -230,7 +227,7 @@ def factor_predictors(  # noqa: C901
     ... }
     >>> indices = [2, 3, 4]  # Flag last three samples
     >>> factor_predictors(factors, indices)
-    {'time': 0.8415720833333329, 'altitude': 0.0}
+    {'time': 0.866750699769533, 'altitude': 0.0}
     """
     if not factors:
         raise ValueError("factors dictionary cannot be empty")
@@ -266,15 +263,22 @@ def factor_predictors(  # noqa: C901
             f"discrete_features length ({len(discrete_features)}) must match number of factors ({len(factor_names)})",
         )
-    mutual_info_values = (
-        mutual_info_classif(
-            X=scaled_data,
-            y=sample_mask,
-            discrete_features=discrete_features,  # type: ignore - sklearn function not typed
-            random_state=get_seed(),
-            n_jobs=get_max_processes(),  # type: ignore
-        )
-        * _NATS2BITS
+    mutual_info_values = mutual_info_classif(
+        X=scaled_data,
+        y=sample_mask,
+        discrete_features=discrete_features,  # type: ignore - sklearn function not typed
+        random_state=get_seed(),
+        n_jobs=get_max_processes(),  # type: ignore
     )
+    # We normalize the mutual info by the entropy of the flag, i.e. by its maximal
+    #   information content. This yields a true measure of the strength of
+    #   association between metadata factors and the flag, from 0 to 1.
+    if 0 < (frac_flagged := len(indices) / n_samples) < 1:
+        flagged_entropy = -(frac_flagged * np.log(frac_flagged) + (1 - frac_flagged) * np.log(1 - frac_flagged))
+        mutual_info_values = np.clip(mutual_info_values / flagged_entropy, 0, 1)
+    else:
+        # all or none are flagged, no MI possible.
+        mutual_info_values = np.zeros_like(mutual_info_values)
     return {k: mutual_info_values[i] for i, k in enumerate(factor_names)}

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_mst.py RENAMED Viewed

@@ -100,7 +100,7 @@ def _compute_nearest_neighbors(
     distances, neighbors = nbrs.kneighbors(data_query, return_distance=True)
     if return_distances:
-        return neighbors, distances
+        return neighbors, distances.astype(np.float32)
     return neighbors

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_mutual_info.py RENAMED Viewed

@@ -19,7 +19,7 @@ _logger = logging.getLogger(__name__)
 class MutualInfoResult(TypedDict):
     """
-    Type definition for mutual information output.
+    Type definition for normalized mutual information output.
     Attributes
     ----------
@@ -60,18 +60,18 @@ def _merge_labels_and_factors(
     factor_data: NDArray[np.intp],
     discrete_features: Iterable[bool] | None,
 ) -> tuple[NDArray[np.intp], list[bool]]:
-    discrete_features = [True] + (
+    discrete_list = [True] + (
         [not is_continuous(d) for d in factor_data.T] if discrete_features is None else list(discrete_features)
     )
     # Use numeric data for MI
     data = np.hstack((class_labels[:, np.newaxis], factor_data))
     # Present discrete features composed of distinct values as continuous for `mutual_info_classif`
-    for i in range(len(discrete_features)):
+    for i in range(len(discrete_list)):
         if len(data) == len(np.unique(data[:, i])):
-            discrete_features[i] = False
+            discrete_list[i] = False
-    return data, discrete_features
+    return data, discrete_list
 def mutual_info(  # noqa: C901
@@ -81,7 +81,7 @@ def mutual_info(  # noqa: C901
     num_neighbors: int = 5,
 ) -> MutualInfoResult:
     """
-    Compute mutual information between factors, transformed to lie in [0, 1].
+    Compute normalized mutual information between factors, transformed to lie in [0, 1].
     Factors include class label, metadata, and label/image properties.
@@ -101,8 +101,8 @@ def mutual_info(  # noqa: C901
     MutualInfoResult
         TypedDict containing:
-        - class_to_factor: NDArray[np.float64] - 1D array of MI between class labels and each factor
-        - interfactor: NDArray[np.float64] - (num_factors) x (num_factors) matrix of MI between factors only
+        - class_to_factor: NDArray[np.float64] - 1D array of normalized MI between class labels and each factor
+        - interfactor: NDArray[np.float64] - (num_factors) x (num_factors) matrix of normalized MI between factors only
     Notes
     -----
@@ -120,7 +120,7 @@ def mutual_info(  # noqa: C901
     Example
     -------
-    Return balance (mutual information) of factors with class_labels
+    Return balance (normalized mutual information) of factors with class_labels
     >>> rng = np.random.default_rng(175)
     >>> class_labels = rng.choice([0, 1, 2], size=100)
@@ -155,7 +155,7 @@ def mutual_info(  # noqa: C901
     data, discrete_list = _merge_labels_and_factors(class_labels_np, factor_data_np, discrete_feat_np)
     num_factors = len(discrete_list)
-    _logger.debug("Computing MI for %d factors (%d discrete)", num_factors, sum(discrete_list))
+    _logger.debug("Computing NMI for %d factors (%d discrete)", num_factors, sum(discrete_list))
     # initialize output matrix
     mi = np.full((num_factors, num_factors), np.nan, dtype=np.float32)
@@ -195,7 +195,7 @@ def mutual_info(  # noqa: C901
     full_matrix = 0.5 * (mi + mi.T).astype(np.float64)
     _logger.info(
-        "Mutual info calculation complete: %d factors, mean class_to_factor MI=%.4f",
+        "Mutual info calculation complete: %d factors, mean class_to_factor NMI=%.4f",
         num_factors - 1,
         np.mean(full_matrix[0, 1:]),
     )
@@ -208,12 +208,12 @@ def mutual_info(  # noqa: C901
 def mutual_info_classwise(
     class_labels: Array1D[int],
-    factor_data: Array2D[int],
+    factor_data: Array2D[int | float],
     discrete_features: Array1D[bool] | None = None,
     num_neighbors: int = 5,
 ) -> NDArray[np.float64]:
     """
-    Compute mutual information (MI) between factors, transformed to lie in [0, 1].
+    Compute normalized mutual information (NMI) between factors.
     Factors include class label, metadata, and label/image properties.
@@ -221,7 +221,7 @@ def mutual_info_classwise(
     ----------
     class_labels : Array1D[int]
         Target class labels as integer indices. Can be a 1D list, or array-like object.
-    factor_data : Array2D[int]
+    factor_data : Array2D[int | float]
         Factor values after binning or digitization. Can be a 1D list, or array-like object.
     discrete_features : Array1D[bool] | None = None
         Boolean array or iterable defining whether or not the feature set is discretized.
@@ -232,19 +232,19 @@ def mutual_info_classwise(
     Returns
     -------
     NDArray[np.float64]
-        (num_factors+1) x (num_factors+1) estimate of mutual information
+        (num_classes) x (num_factors+1) estimate of normalized mutual information
         between num_factors metadata factors and class label. Symmetry is enforced.
     Notes
     -----
     We use `mutual_info_classif` from sklearn since class label is categorical.
     `mutual_info_classif` outputs are consistent up to O(1e-4) and depend on a random
-    seed. MI is computed differently for categorical and continuous variables. We
-    return a transformation of MI onto the interval [0, 1].
+    seed. MI is computed differently for categorical and continuous variables. In all cases,
+    we return either a normalization or transformation of MI onto the interval [0, 1].
     Example
     -------
-    Return classwise balance (mutual information) of factors with individual class_labels
+    Return classwise balance (normalized mutual information) of factors with individual class_labels
     >>> rng = np.random.default_rng(175)
     >>> class_labels = rng.choice([0, 1, 2], size=100)
@@ -267,7 +267,7 @@ def mutual_info_classwise(
     _logger.info("Starting mutual_info_classwise calculation with num_neighbors=%d", num_neighbors)
     class_labels_np = as_numpy(class_labels, dtype=np.intp, required_ndim=1)
-    factor_data_np = as_numpy(factor_data, dtype=np.intp, required_ndim=2)
+    factor_data_np = as_numpy(factor_data, required_ndim=2)
     discrete_feat_np = opt_as_numpy(discrete_features, dtype=np.bool_, required_ndim=1)
     num_neighbors = _validate_num_neighbors(num_neighbors)
@@ -276,7 +276,7 @@ def mutual_info_classwise(
     u_classes = np.unique(class_labels_np)
     num_classes = len(u_classes)
-    _logger.debug("Computing classwise MI for %d classes and %d factors", num_classes, num_factors)
+    _logger.debug("Computing classwise NMI for %d classes and %d factors", num_classes, num_factors)
     # classwise targets (binary indicators)
     tgt_bin = data[:, 0][:, None] == u_classes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_uap.py RENAMED Viewed

@@ -11,14 +11,14 @@ import logging
 from sklearn.metrics import average_precision_score
 from dataeval._experimental import experimental
-from dataeval.types import Array2D
+from dataeval.protocols import ArrayLike
 from dataeval.utils._internal import as_numpy
 _logger = logging.getLogger(__name__)
 @experimental
-def uap(labels: Array2D[int], scores: Array2D[float]) -> float:
+def uap(labels: ArrayLike, scores: ArrayLike) -> float:
     """
     Estimate the empirical mean precision for the upperbound average precision.

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/performance/_sufficiency.py RENAMED Viewed

@@ -112,7 +112,7 @@ class Sufficiency(Evaluator, Generic[T, M]):
         Raises
         ------
         ValueError
-            If runs or substeps is not greater than 1
+            If runs or substeps is not at least 1
         Examples
         --------

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/quality/_duplicates.py RENAMED Viewed

@@ -988,7 +988,7 @@ class Duplicates(Evaluator):
         Attributes
         ----------
-        flags : ImageStats, default ImageStats.HASH
+        flags : ImageStats, default ImageStats.HASH_DUPLICATES_BASIC
             Statistics to compute for hash-based duplicate detection.
         cluster_sensitivity : float or None, default None
             Distance factor for cluster-based near duplicate detection. Scales

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/quality/_outliers.py RENAMED Viewed

@@ -978,10 +978,10 @@ class Outliers(Evaluator):
     - ``AdaptiveThreshold`` (default): Uses tail-weighted Double-MAD (separate MAD for
       data below and above the median) with automatic multiplier scaling for heavy
-      tails to produce asymmetric bounds. Default multiplier: 3.0.
+      tails to produce asymmetric bounds. Default multiplier: 3.5.
     - ``ModifiedZScoreThreshold``: Based on median absolute deviation. Default multiplier: 3.5.
       Modified z score = :math:`0.6745 * |x_i - x̃| / MAD`
-    - ``ZScoreThreshold``: Based on standard deviation from mean. Default multiplier: 3.
+    - ``ZScoreThreshold``: Based on standard deviation from mean. Default multiplier: 3.0.
       Z score = :math:`|x_i - \mu| / \sigma`
     - ``IQRThreshold``: Based on interquartile range. Default multiplier: 1.5.
       Outliers are outside :math:`[Q_1 - 1.5 \cdot IQR, Q_3 + 1.5 \cdot IQR]`

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/scope/_prioritize.py RENAMED Viewed

@@ -364,6 +364,9 @@ class Prioritize(Evaluator):
     ----------
     extractor : FeatureExtractor
         Feature extractor instance to use for extracting embeddings from data.
+    batch_size : int or None, default None
+        Batch size for embedding computation. When None, uses the global
+        batch size from :func:`~dataeval.config.get_batch_size`.
     method : {"knn", "kmeans_distance", "kmeans_complexity", "hdbscan_distance", \
 "hdbscan_complexity"}, default "knn"
         Ranking method to use:
@@ -470,6 +473,9 @@ class Prioritize(Evaluator):
         extractor : FeatureExtractor or None
             Feature extractor instance to use for extracting embeddings
             from data.
+        batch_size : int or None, default None
+            Batch size for embedding computation. When None, uses the global
+            batch size from :func:`~dataeval.config.get_batch_size`.
         method : {"knn", "kmeans_distance", "kmeans_complexity", "hdbscan_distance", \
 "hdbscan_complexity"}, default "knn"
             Ranking method to use.
@@ -481,9 +487,6 @@ class Prioritize(Evaluator):
             Number of K-means initializations (kmeans methods only).
         max_cluster_size : int or None, default None
             Maximum cluster size for HDBSCAN methods.
-        batch_size : int or None, default None
-            Batch size for embedding computation. When None, uses the global
-            batch size from :func:`~dataeval.config.get_batch_size`.
         order : {"easy_first", "hard_first"}, default "easy_first"
             Sort direction for output indices.
         policy : {"difficulty", "stratified", "class_balanced"}, default "difficulty"

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_drift/_base.py RENAMED Viewed

@@ -43,7 +43,8 @@ class DriftOutput(DictOutput, Generic[TDetails]):
         For multivariate methods, this is the corrected threshold after
         Bonferroni or FDR correction.
     distance : float
-        Instance-level test statistic or distance metric, always >= 0.
+        Instance-level test statistic or distance metric. Typically >= 0, but can be
+        slightly negative for metrics like unbiased MMD².
         For univariate methods, this is the mean distance across all features.
         Higher values indicate greater deviation from reference distribution.
     metric_name : str

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_ood/_domain_classifier.py RENAMED Viewed

@@ -3,7 +3,7 @@
 __all__ = []
 from dataclasses import dataclass
-from typing import Any
+from typing import Any, Literal
 import numpy as np
 from numpy.typing import NDArray
@@ -32,6 +32,11 @@ class OODDomainClassifier(ExtractorMixin, BaseOOD):
     reference as class 0, runs repeated k-fold CV, and returns per-point
     class-1 rates. Points with rates exceeding the threshold are flagged OOD.
+    Note: By default, this detector uses the ``n_std`` based threshold for
+    predictions. If a value for ``threshold_perc`` is provided (either directly
+    or via config), it will use percentile-based thresholding from reference
+    scores instead.
     Parameters
     ----------
     n_folds : int, default 5
@@ -40,11 +45,12 @@ class OODDomainClassifier(ExtractorMixin, BaseOOD):
         Number of times to repeat the k-fold split.
     n_std : float, default 2.0
         Number of standard deviations above the null mean for threshold.
+        Used when threshold_perc is not explicitly set.
     hyperparameters : dict or None, default None
         LightGBM hyperparameters.
     threshold_perc : float or None, default None
         Percentage of reference data considered normal (0-100).
-        If None, uses config.threshold_perc (default 95.0).
+        If provided, overrides ``n_std`` for percentile-based thresholding.
     extractor : FeatureExtractor or None, default None
         Feature extractor for transforming input data before scoring.
         When provided, raw data is passed through the extractor in both
@@ -59,7 +65,7 @@ class OODDomainClassifier(ExtractorMixin, BaseOOD):
     >>> test = np.random.randn(50, 8).astype(np.float32) + 3
     >>> detector = OODDomainClassifier(n_folds=3, n_repeats=3)
     >>> detector.fit(ref)
-    OODDomainClassifier(n_folds=3, n_repeats=3, n_std=2.0, threshold_perc=95.0, hyperparameters=None, extractor=None, fitted=True)
+    OODDomainClassifier(n_folds=3, n_repeats=3, n_std=2.0, threshold_perc=None, hyperparameters=None, extractor=None, fitted=True)
     >>> predictions = detector.predict(test)
     """  # noqa: E501
@@ -76,8 +82,9 @@ class OODDomainClassifier(ExtractorMixin, BaseOOD):
             Number of k-fold repeats.
         n_std : float, default 2.0
             Threshold multiplier for standard deviations above null mean.
-        threshold_perc : float, default 95.0
-            Percentile-based threshold (alternative to n_std).
+            Used when threshold_perc is None.
+        threshold_perc : float or None, default None
+            Percentile-based threshold. If provided, overrides n_std.
         hyperparameters : dict or None, default None
             LightGBM hyperparameters.
         extractor : FeatureExtractor or None, default None
@@ -87,7 +94,7 @@ class OODDomainClassifier(ExtractorMixin, BaseOOD):
         n_folds: int = 5
         n_repeats: int = 5
         n_std: float = 2.0
-        threshold_perc: float = 95.0
+        threshold_perc: float | None = None
         hyperparameters: dict[str, Any] | None = None
         extractor: FeatureExtractor | None = None
@@ -103,8 +110,11 @@ class OODDomainClassifier(ExtractorMixin, BaseOOD):
     ) -> None:
         base_config = config or OODDomainClassifier.Config()
-        threshold_perc = threshold_perc if threshold_perc is not None else base_config.threshold_perc
-        super().__init__(threshold_perc)
+        self._threshold_perc_set = threshold_perc is not None or (
+            config is not None and config.threshold_perc is not None
+        )
+        perc = threshold_perc if threshold_perc is not None else (base_config.threshold_perc or 95.0)
+        super().__init__(perc)
         self._n_folds = n_folds if n_folds is not None else base_config.n_folds
         self._n_repeats = n_repeats if n_repeats is not None else base_config.n_repeats
@@ -115,7 +125,7 @@ class OODDomainClassifier(ExtractorMixin, BaseOOD):
             n_folds=self._n_folds,
             n_repeats=self._n_repeats,
             n_std=self._n_std,
-            threshold_perc=threshold_perc,
+            threshold_perc=threshold_perc if threshold_perc is not None else base_config.threshold_perc,
             hyperparameters=self._hyperparameters,
             extractor=self._extractor,
         )
@@ -177,6 +187,12 @@ class OODDomainClassifier(ExtractorMixin, BaseOOD):
         self._ref_score = self.score(reference_data)
         return self
+    def _threshold_score(self, ood_type: Literal["feature", "instance"] = "instance") -> np.floating:
+        """Get the threshold score. Prefers n_std threshold unless threshold_perc was explicitly set."""
+        if not self._threshold_perc_set and ood_type == "instance":
+            return np.float64(self._threshold)
+        return super()._threshold_score(ood_type)
     def _score(self, x: NDArray[np.float32], batch_size: int | None = None) -> OODScoreOutput:  # noqa: ARG002
         """Compute per-point class-1 rates for test data vs reference."""
         x_ref = self._reference_data

{dataeval-1.0.5 → dataeval-1.0.6}/.gitignore RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/LICENSE RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/README.md RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/_embeddings.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/_experimental.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/_helpers.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/_log.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/_warm_cache.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/bias/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/config.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_bin.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_calculators/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_calculators/_base.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_calculators/_cache.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_calculators/_dimensionstats.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_calculators/_hashstats.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_calculators/_pixelstats.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_calculators/_register.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_calculators/_registry.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_calculators/_visualstats.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_completeness.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_compute_ratios.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_compute_stats.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_divergence.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_diversity.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_fast_hdbscan/_cluster_trees.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_fast_hdbscan/_disjoint_set.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_fast_hdbscan/_mst.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_hash.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_label_errors.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_label_stats.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_nullmodel.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_parity.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/core/_rank.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/exceptions.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/extractors/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/extractors/_bovw.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/extractors/_flatten.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/extractors/_onnx.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/extractors/_torch.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/extractors/_uncertainty.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/flags.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/performance/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/performance/_aggregator.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/performance/_output.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/performance/schedules.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/protocols.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/py.typed RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/quality/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/quality/_shared.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/scope/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/selection/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/selection/_classbalance.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/selection/_classfilter.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/selection/_indices.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/selection/_limit.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/selection/_reverse.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/selection/_select.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/selection/_shuffle.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_drift/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_drift/_chunk.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_drift/_domain_classifier.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_drift/_kneighbors.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_drift/_mmd.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_drift/_reconstruction.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_drift/_univariate.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_ood/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_ood/_base.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_ood/_kneighbors.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_ood/_reconstruction.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_shared/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_shared/_domain_classifier.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_shared/_kneighbors.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/_shared/_reconstruction.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/shift/update_strategies.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/types.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/utils/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/utils/_internal.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/utils/data.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/utils/losses.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/utils/models.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/utils/onnx.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/utils/preprocessing.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/utils/thresholds.py RENAMED Viewed

File without changes

{dataeval-1.0.5 → dataeval-1.0.6}/src/dataeval/utils/training.py RENAMED Viewed

File without changes

dataeval 1.0.5__tar.gz → 1.0.6__tar.gz

dataeval 1.0.5tar.gz → 1.0.6tar.gz