PyPI - dataeval - Versions diffs - 1.0.4__tar.gz → 1.0.6__tar.gz - Mend

dataeval 1.0.4tar.gz → 1.0.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

{dataeval-1.0.4 → dataeval-1.0.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dataeval
-Version: 1.0.4
+Version: 1.0.6
 Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
 Project-URL: Homepage, https://dataeval.ai/
 Project-URL: Repository, https://github.com/aria-ml/dataeval/

{dataeval-1.0.4 → dataeval-1.0.6}/pyproject.toml RENAMED Viewed

@@ -114,10 +114,24 @@ docs = [
   "sphinx-tabs>=3.4.7",
   "Sphinx>=7.2.6,<9.0.0", # sphinx-immaterial <= 0.13.9 is not compatible with sphinx >=9.0
   "torchmetrics>=1.0.0",
-  "torchvision>=0.17.0",
   "markupsafe>=3,<3.0.2",
   "jupytext>=1.19.1",
 ]
+security = [  # keep in sync with [tool.uv.constraint-dependencies]
+  "cryptography>=46.0.5",    # CVE-2026-26007: Missing Subgroup Validation for SECT Curves
+  "filelock>=3.20.3",        # GHSA-w853-jp5j-5j7f, GHSA-qmgc-5h2g-mvrw
+  "onnx>=1.21.0",            # CVE-2026-28500: Untrusted Model Repository Warnings Suppressed by silent=True
+                             # CVE-2026-34445: Malicious ONNX models can crash servers by exploiting unprotected object settings
+                             # CVE-2026-27489: Vulnerable to Path Traversal via Symlink
+                             # GHSA-q56x-g2fj-4rj6: TOCTOU arbitrary file read/write in save_external_dat
+  "pillow>=12.1.1",          # CVE-2026-25990: OOB write via PSD image
+  "poetry>=2.3.3",           # CVE-2026-34591: Poetry Has Wheel Path Traversal Which Can Lead to Arbitrary File Write
+  "protobuf>=6.33.5",        # GHSA-7gcm-g887-7qv7
+  "setuptools>=82.0.0",      # CVE-2026-23949: (jaraco_context) path traversal in tarball()
+                             # CVE-2026-24049: (wheel) privilege escalation via unpack
+  "tornado>=6.5.5",          # CVE-2026-31958: Tornado is vulnerable to DoS due to too many multipart parts
+                             # CVE-2026-35536: Tornado has cookie attribute injection via .RequestHandler.set_cookie
+]
 dev = [
   { include-group = "base" },
   { include-group = "lint" },
@@ -141,9 +155,18 @@ conflicts = [
 ]
 constraint-dependencies = [
     "cryptography>=46.0.5",    # CVE-2026-26007: Missing Subgroup Validation for SECT Curves
+    "filelock>=3.20.3",        # GHSA-w853-jp5j-5j7f, GHSA-qmgc-5h2g-mvrw
+    "onnx>=1.21.0",            # CVE-2026-28500: Untrusted Model Repository Warnings Suppressed by silent=True
+                               # CVE-2026-34445: Malicious ONNX models can crash servers by exploiting unprotected object settings
+                               # CVE-2026-27489: Vulnerable to Path Traversal via Symlink
+                               # GHSA-q56x-g2fj-4rj6: TOCTOU arbitrary file read/write in save_external_dat
     "pillow>=12.1.1",          # CVE-2026-25990: OOB write via PSD image
+    "poetry>=2.3.3",           # CVE-2026-34591: Poetry Has Wheel Path Traversal Which Can Lead to Arbitrary File Write
+    "protobuf>=6.33.5",        # GHSA-7gcm-g887-7qv7
     "setuptools>=82.0.0",      # CVE-2026-23949: (jaraco_context) path traversal in tarball()
                                # CVE-2026-24049: (wheel) privilege escalation via unpack
+    "tornado>=6.5.5",          # CVE-2026-31958: Tornado is vulnerable to DoS due to too many multipart parts
+                               # CVE-2026-35536: Tornado has cookie attribute injection via .RequestHandler.set_cookie
 ]
 [[tool.uv.index]]
@@ -201,6 +224,9 @@ version-file = "src/dataeval/_version.py"
 [tool.poetry]
 version = "0.0.0"  # unused
+[tool.poetry.dependencies]
+python = ">=3.10,<3.15"
 [tool.pyproject2conda.dependencies]
 numpy = { skip = true, packages = "numpy>=1.24.2" }
 scikit-learn = { skip = true, packages = "scikit-learn>=1.5.0" }
@@ -262,20 +288,23 @@ exclude = [
   ".jupyter_cache",
   "*env*",
   "output",
+  "_build",
   "build",
   ".nox",
   ".tox",
+  "prototype",
   "src/dataeval/_version.py",
 ]
 line-length = 120
 indent-width = 4
 target-version = "py310"
+extend-include = ["*.ipynb"]
 [tool.ruff.lint]
 select = ["F", "E", "W", "C90", "I", "N", "D", "UP", "YTT", "ANN", "S", "BLE", "B", "A",
           "COM", "C4", "T10", "ISC", "ICN", "PYI", "PT", "Q", "RSE", "RET", "SLF", "SIM",
-          "TID252", "ARG", "FIX", "PD", "FLY", "NPY", "RUF100", "PERF"]
-ignore = ["ANN401", "COM812", "NPY002", "SLF001"]
+          "TID252", "ARG", "FIX", "PD", "FLY", "NPY", "RUF027", "RUF100", "PERF"]
+ignore = ["ANN101", "ANN102", "ANN401", "C408", "C416", "COM812", "NPY002", "SLF001"]
 fixable = ["ALL"]
 unfixable = []
 dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
@@ -287,10 +316,14 @@ builtins-strict-checking = false
 [tool.ruff.lint.isort]
 known-first-party = ["dataeval"]
+[tool.ruff.lint.mccabe]
+max-complexity = 5
 [tool.ruff.lint.pydocstyle]
 convention = "numpy"
 [tool.ruff.format]
+preview = true
 quote-style = "double"
 indent-style = "space"
 skip-magic-trailing-comma = false

{dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/_embeddings.py RENAMED Viewed

@@ -509,7 +509,7 @@ class Embeddings(Array, FeatureExtractor):
             images.append(image)
         return images
-    def _batch(self, indices: Sequence[int]) -> Iterator[NDArray[Any]]:
+    def _batch(self, indices: Sequence[int]) -> Iterator[NDArray[Any]]:  # noqa: C901
         """Process indices in batches using the extractor."""
         if self._dataset is None:
             raise NotFittedError("No dataset bound. Call bind() first.")
@@ -559,7 +559,7 @@ class Embeddings(Array, FeatureExtractor):
             batch_indices = list(indices[batch_start : batch_start + self._batch_size])
             yield self._embeddings[batch_indices]
-    def __getitem__(self, key: int | Iterable[int] | slice, /) -> NDArray[Any]:
+    def __getitem__(self, key: int | Iterable[int] | slice, /) -> NDArray[Any]:  # noqa: C901
         """
         Access embeddings by index, indices or slice.

{dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/_experimental.py RENAMED Viewed

@@ -13,7 +13,7 @@ from dataeval.exceptions import DeprecatedWarning, ExperimentalWarning
 F = TypeVar("F", bound=Callable[..., Any])
-def _make_warning_message(
+def _make_warning_message(  # noqa: C901
     name: str,
     kind: str,
     *,
@@ -51,7 +51,7 @@ def _prepend_doc_note(doc: str | None, note: str) -> str:
 def experimental(_target: F) -> F: ...
 @overload
 def experimental(*, alternative: str | None = None, details: str | None = None) -> Callable[[F], F]: ...
-def experimental(
+def experimental(  # noqa: C901
     _target: F | None = None,
     *,
     alternative: str | None = None,
@@ -72,7 +72,7 @@ def experimental(
         def my_func(): ...
     """
-    def decorator(target: F) -> F:
+    def decorator(target: F) -> F:  # noqa: C901
         name = getattr(target, "__qualname__", getattr(target, "__name__", str(target)))
         msg = _make_warning_message(name, "experimental", alternative=alternative, details=details)
         warned = False
@@ -118,7 +118,7 @@ def deprecated(
     alternative: str | None = None,
     details: str | None = None,
 ) -> Callable[[F], F]: ...
-def deprecated(
+def deprecated(  # noqa: C901
     _target: F | None = None,
     *,
     since: str | None = None,
@@ -141,7 +141,7 @@ def deprecated(
         def old_func(): ...
     """
-    def decorator(target: F) -> F:
+    def decorator(target: F) -> F:  # noqa: C901
         name = getattr(target, "__qualname__", getattr(target, "__name__", str(target)))
         msg = _make_warning_message(
             name,

{dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/_metadata.py RENAMED Viewed

@@ -282,7 +282,7 @@ class Metadata(Array, FeatureExtractor):
             raise NotFittedError("No dataset bound. Call bind() first.")
         yield from self.factor_data
-    def __getitem__(self, index: int | str | slice) -> Array:
+    def __getitem__(self, index: int | str | slice) -> Array:  # noqa: C901
         """Get binned metadata for specific indices or factors.
         Parameters
@@ -650,7 +650,7 @@ class Metadata(Array, FeatureExtractor):
         -------
         Sequence[str]
             List of factor names that passed filtering and preprocessing steps.
-            Order matches columns in factor_data and binned_data.
+            Order matches columns in factor_data.
         Notes
         -----
@@ -934,7 +934,7 @@ class Metadata(Array, FeatureExtractor):
         factor = factor[0] if isinstance(factor, tuple) else factor
         return factor in self.include if self.include else factor not in self.exclude
-    def _reset_bins(self, cols: Iterable[str] | None = None) -> None:
+    def _reset_bins(self, cols: Iterable[str] | None = None) -> None:  # noqa: C901
         if self._is_binned:
             columns = self._dataframe.columns
             for col in cols or columns:
@@ -1006,7 +1006,7 @@ class Metadata(Array, FeatureExtractor):
             )
         return target_rows
-    def _get_target_factor_values(
+    def _get_target_factor_values(  # noqa: C901
         self,
         factor_name: str,
         factor_values: Any,
@@ -1152,7 +1152,7 @@ class Metadata(Array, FeatureExtractor):
             self._structure()
         return bool(self._has_targets)
-    def _process_targets(
+    def _process_targets(  # noqa: C901
         self,
         raw: list,
         labels: list,
@@ -1284,7 +1284,7 @@ class Metadata(Array, FeatureExtractor):
         existing = self._factors if hasattr(self, "_factors") else {}
         self._factors = {k: existing.get(k) for k in usable_factors}
-    def _structure(
+    def _structure(  # noqa: C901
         self,
         *,
         progress_callback: ProgressCallback | None = None,
@@ -1478,7 +1478,7 @@ class Metadata(Array, FeatureExtractor):
         df = self._add_column_with_padding(df, col_dg, ordinal.astype(np.int64), is_od)
         return df, FactorInfo("discrete", is_digitized=True)
-    def _bin(
+    def _bin(  # noqa: C901
         self,
         *,
         progress_callback: ProgressCallback | None = None,
@@ -1523,7 +1523,7 @@ class Metadata(Array, FeatureExtractor):
         self._factors.update(factor_info)
         self._is_binned = True
-    def add_factors(
+    def add_factors(  # noqa: C901
         self,
         factors: Mapping[str, Array1D[Any]],
         level: Literal["image", "target", "auto"] = "auto",

dataeval-1.0.6/src/dataeval/_version.py ADDED Viewed

@@ -0,0 +1,24 @@
+# file generated by vcs-versioning
+# don't change, don't track in version control
+from __future__ import annotations
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+version: str
+__version__: str
+__version_tuple__: tuple[int | str, ...]
+version_tuple: tuple[int | str, ...]
+commit_id: str | None
+__commit_id__: str | None
+__version__ = version = '1.0.6'
+__version_tuple__ = version_tuple = (1, 0, 6)
+__commit_id__ = commit_id = None

{dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/bias/_balance.py RENAMED Viewed

@@ -22,28 +22,30 @@ class BalanceOutput(DictOutput):
     """
     Output class for the :class:`.Balance` :term:`bias<Bias>` evaluator.
-    Contains three polars DataFrames with mutual information scores and threshold flags.
+    Contains three polars DataFrames with normalized mutual information scores and threshold flags.
     Attributes
     ----------
     balance : pl.DataFrame
-        DataFrame with global class-to-factor mutual information:
+        DataFrame with global class-to-factor normalized mutual information:
-        - factor_name: str - Name of the metadata factor
-        - mi_value: float - Mutual information value between this factor and class labels
+        - factor_name: str - Name of the metadata factor. Includes "class_label"
+          which represents the self-information (always 1.0).
+        - mi_value: float - Normalized mutual information value between this
+          factor and class labels
     factors : pl.DataFrame
-        DataFrame with inter-factor mutual information correlations:
+        DataFrame with inter-factor normalized mutual information correlations:
         - factor1: str - Name of the first factor
         - factor2: str - Name of the second factor
-        - mi_value: float - Mutual information value
+        - mi_value: float - Normalized mutual information value
         - is_correlated: bool - True if mi_value > factor_correlation_threshold
     classwise : pl.DataFrame
-        DataFrame with per-class-to-factor mutual information:
+        DataFrame with per-class-to-factor normalized mutual information:
         - class_name: str - Name of the class
         - factor_name: str - Name of the metadata factor
-        - mi_value: float - Mutual information value
+        - mi_value: float - Normalized mutual information value
         - is_imbalanced: bool - True if mi_value > class_imbalance_threshold
     """
@@ -58,21 +60,21 @@ class BalanceOutput(DictOutput):
 class Balance(Evaluator):
     """
-    Computes mutual information (MI) between factors (class label, metadata, label/image properties).
+    Computes normalized mutual information (NMI) between factors (class label, metadata, label/image properties).
     Identifies imbalanced classes and highly correlated metadata factors based on
-    mutual information thresholds.
+    NMI thresholds.
     Parameters
     ----------
     num_neighbors : int, default 5
         Number of points to consider as neighbors
     class_imbalance_threshold : float, default 0.3
-        Threshold for identifying imbalanced classes. Classes with MI above this
+        Threshold for identifying imbalanced classes. Classes with NMI above this
         threshold with any metadata factor are considered imbalanced.
     factor_correlation_threshold : float, default 0.5
         Threshold for identifying highly correlated metadata factors. Factor pairs
-        with MI above this threshold are considered highly correlated.
+        with NMI above this threshold are considered highly correlated.
     Attributes
     ----------
@@ -89,7 +91,8 @@ class Balance(Evaluator):
     -----
     We use `mutual_info_classif` from sklearn since class label is categorical.
     `mutual_info_classif` outputs are consistent up to O(1e-4) and depend on a random
-    seed. MI is computed differently for categorical and continuous variables.
+    seed. MI is computed differently for categorical and continuous variables, and
+    in all cases normalized or transformed to [0, 1] prior to being returned.
     Examples
     --------
@@ -147,9 +150,9 @@ class Balance(Evaluator):
         super().__init__(locals())
     @set_metadata(state=["num_neighbors", "class_imbalance_threshold", "factor_correlation_threshold"])
-    def evaluate(self, data: AnnotatedDataset[Any] | MetadataLike) -> BalanceOutput:
+    def evaluate(self, data: AnnotatedDataset[Any] | MetadataLike) -> BalanceOutput:  # noqa: C901
         """
-        Compute mutual information between factors and identify imbalanced classes.
+        Compute normalized mutual information between factors and identify imbalanced classes.
         Parameters
         ----------
@@ -160,7 +163,7 @@ class Balance(Evaluator):
         Returns
         -------
         BalanceOutput
-            Three DataFrames containing MI scores and threshold flags:
+            Three DataFrames containing NMI scores and threshold flags:
             - balance: Global class-to-factor mutual information
             - factors: Inter-factor mutual information
@@ -168,7 +171,7 @@ class Balance(Evaluator):
         Example
         -------
-        Return balance (mutual information) of factors with class_labels
+        Return balance (NMI) of factors with class_labels
         >>> from dataeval import Metadata
         >>> metadata = Metadata(dataset)

{dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/bias/_diversity.py RENAMED Viewed

@@ -56,7 +56,7 @@ class Diversity(Evaluator):
     Through standard histogram binning, for continuous variables.
     The method specified defines diversity as the inverse Simpson diversity index linearly rescaled to
-    the unit interval, or the normalized form of the Shannon entropy.
+    the unit interval [0, 1], or the normalized form of the Shannon entropy.
     diversity = 1 implies that samples are evenly distributed across a particular factor
     diversity = 0 implies that all samples belong to one category/bin
@@ -66,7 +66,9 @@ class Diversity(Evaluator):
     Parameters
     ----------
     method : "simpson" or "shannon", default "simpson"
-        The methodology used for defining diversity
+        The methodology used for defining diversity. When "simpson" is used,
+        the index is linearly rescaled so that 1.0 represents maximum diversity
+        (even distribution) and 0.0 represents minimum diversity (all samples in one bin).
     threshold : float, default 0.5
         Threshold for identifying low diversity. Factors with diversity values
         at or below this threshold are flagged as having low diversity.
@@ -135,7 +137,7 @@ class Diversity(Evaluator):
         super().__init__(locals())
     @set_metadata(state=["method", "threshold"])
-    def evaluate(self, data: AnnotatedDataset[Any] | MetadataLike) -> DiversityOutput:
+    def evaluate(self, data: AnnotatedDataset[Any] | MetadataLike) -> DiversityOutput:  # noqa: C901
         """
         Compute diversity and classwise diversity for the dataset.

{dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/bias/_parity.py RENAMED Viewed

@@ -118,8 +118,6 @@ class Parity(Evaluator):
     >>> config = Parity.Config(score_threshold=0.4, p_value_threshold=0.01)
     >>> parity = Parity(config=config)
-    output = parity(metadata.binned_data, metadata.class_labels.tolist())
     """
     class Config(EvaluatorConfig):

{dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/config.py RENAMED Viewed

@@ -23,7 +23,7 @@ from pydantic import BaseModel, ConfigDict, field_validator
 from dataeval.protocols import DeviceLike
-### GLOBAL CONFIG ###
+# GLOBAL CONFIG ###
 class GlobalConfig(BaseModel):
@@ -77,7 +77,7 @@ class GlobalConfig(BaseModel):
 _config = GlobalConfig()
-### CONTEXT MANAGER ###
+# CONTEXT MANAGER ###
 class _ConfigContextManager:
@@ -96,7 +96,7 @@ class _ConfigContextManager:
         setattr(_config, self._attr_name, self._old)
-### FUNCS ###
+# FUNCS ###
 def _todevice(device: DeviceLike) -> torch.device:

{dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_ber.py RENAMED Viewed

@@ -78,6 +78,8 @@ def ber_mst(embeddings: ArrayND[float], class_labels: Array1D[int]) -> BERResult
     """
     Estimate Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` using a minimum spanning tree.
+    BER bounds the irreducible classification error given the current feature
+    representation — the error attributable to class overlap in embedding space.
     Uses FR with a minimum spanning tree (MST) test statistic basis.
     Parameters
@@ -137,7 +139,13 @@ def ber_knn(embeddings: ArrayND[float], class_labels: Array1D[int], k: int) -> B
     """
     Estimate Multi-class :term:`Bayes error rate<Bayes Error Rate (BER)>` using KNN.
-    Uses KNN test statistic basis.
+    BER bounds the irreducible classification error given the current feature
+    representation — the error attributable to class overlap in embedding space.
+    Uses KNN test statistic basis. The estimator's behavior depends on the value of k:
+    - k=1: Uses 1-NN for the lower bound and 2-NN for the upper bound.
+    - k=2: Uses 2-NN for the lower bound and 3-NN for the upper bound.
+    - 2<k<=5: Uses k-NN for the lower bound and (k+1)-NN for the upper bound.
+    - k>5: Only available for binary classification; uses k-NN for both bounds with specialized asymptotic weights.
     Parameters
     ----------
@@ -146,7 +154,7 @@ def ber_knn(embeddings: ArrayND[float], class_labels: Array1D[int], k: int) -> B
     class_labels : Array1D[int]
         Array of class labels for each image. Can be a 1D list, or array-like object.
     k : int
-        Number of nearest neighbors for KNN estimator
+        Number of nearest neighbors for KNN estimator. Should be between 1 and the number of samples.
     Returns
     -------

{dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_bin.py RENAMED Viewed

@@ -92,7 +92,7 @@ def bin_data(data: NDArray[Any], bin_method: str) -> NDArray[np.intp]:
     return np.digitize(data, bin_edges)
-def is_continuous(data: NDArray[np.number[Any]], image_indices: NDArray[np.number[Any]] | None = None) -> bool:
+def is_continuous(data: NDArray[np.number[Any]], image_indices: NDArray[np.number[Any]] | None = None) -> bool:  # noqa: C901
     """
     Determine whether the data is continuous or discrete using the Wasserstein distance.
@@ -144,7 +144,7 @@ def is_continuous(data: NDArray[np.number[Any]], image_indices: NDArray[np.numbe
     return bool(shift < DISCRETE_MIN_WD)  # if NNN is close enough to uniform, consider the sample continuous.
-def _bin_by_clusters(data: NDArray[np.number[Any]]) -> NDArray[np.float64]:
+def _bin_by_clusters(data: NDArray[np.number[Any]]) -> NDArray[np.float64]:  # noqa: C901
     """
     Bin continuous data by using the Clusterer to identify clusters.

{dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_clusterer.py RENAMED Viewed

@@ -99,7 +99,7 @@ class _Clusters:
         prob: NDArray[np.float64] = exp / np.sum(exp)
         return prob
-    def _sort_by_weights(self, embeddings: NDArray[np.float64]) -> NDArray[np.intp]:
+    def _sort_by_weights(self, embeddings: NDArray[np.float64]) -> NDArray[np.intp]:  # noqa: C901
         """Sort samples using complexity-based weighted sampling."""
         labels = self._get_labels(embeddings)
         pr = self._complexity(embeddings)
@@ -241,7 +241,7 @@ class _HDBSCANSorter:
         n_samples_per_cluster = np.bincount(labels)
         _logger.debug(
             "HDBSCAN clustering complete: %d clusters, samples per cluster: min=%d, max=%d, mean=%.1f",
-            clst.unique_clusters,
+            len(clst.unique_clusters),
             np.min(n_samples_per_cluster),
             np.max(n_samples_per_cluster),
             np.mean(n_samples_per_cluster),
@@ -356,7 +356,7 @@ class _HDBSCAN:
         self.cluster_selection_epsilon = 0.0
         self.cluster_selection_method = "eom"
-    def fit(self, embeddings: NDArray[np.floating]) -> "_HDBSCAN":
+    def fit(self, embeddings: NDArray[np.floating]) -> "_HDBSCAN":  # noqa: C901
         """
         Find clusters based on hierarchical density-based clustering.
@@ -541,7 +541,7 @@ class ClusterStats(TypedDict):
     nearest_cluster_idx: NDArray[np.int64]
-def compute_cluster_stats(
+def compute_cluster_stats(  # noqa: C901
     embeddings: NDArray[np.floating],
     cluster_labels: _Clusters | NDArray[np.int64],
 ) -> ClusterStats:
@@ -642,7 +642,7 @@ def compute_cluster_stats(
     )
-def cluster(
+def cluster(  # noqa: C901
     embeddings: ArrayND[float],
     algorithm: Literal["kmeans", "hdbscan"] = "hdbscan",
     n_clusters: int | None = None,

{dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_compute_ratios.py RENAMED Viewed

@@ -101,7 +101,7 @@ def _build_image_lookup(source_indices: Sequence[SourceIndex]) -> dict[tuple[int
     return lookup
-def _calculate_ratio_for_stat(
+def _calculate_ratio_for_stat(  # noqa: C901
     stat_name: str,
     box_value: Any,
     img_value: Any,
@@ -160,7 +160,7 @@ def _calculate_ratio_for_stat(
         return box_value
-def _validate_separate_inputs(
+def _validate_separate_inputs(  # noqa: C901
     stats_output: StatsResult,
     box_stats_output: StatsResult,
 ) -> tuple[Sequence[SourceIndex], Sequence[SourceIndex]]:
@@ -241,7 +241,7 @@ def _validate_unified_input(source_indices: Sequence[SourceIndex]) -> None:
         )
-def compute_ratios(
+def compute_ratios(  # noqa: C901
     stats_output: StatsResult,
     *,
     target_stats_output: StatsResult | None = None,

{dataeval-1.0.4 → dataeval-1.0.6}/src/dataeval/core/_compute_stats.py RENAMED Viewed

@@ -158,7 +158,7 @@ def _determine_channel_indices(calculator_output: list[dict[str, list[Any]]], nu
     return sorted(channel_indices_needed, key=lambda x: -1 if x is None else x)
-def _reconcile_stats(
+def _reconcile_stats(  # noqa: C901
     calculator_output: list[dict[str, list[Any]]],
     sorted_channels: list[int | None],
     empty_values_map: dict[str, Any],
@@ -344,7 +344,7 @@ def _aggregate_batch(
 _UNSET = object()
-def compute_stats(
+def compute_stats(  # noqa: C901
     data: Iterable[ArrayLike] | Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]],
     *,
     boxes: Iterable[Iterable[BoxLike] | None] | None = None,
@@ -544,7 +544,7 @@ def compute_stats(
     )
-def combine_stats_results(
+def combine_stats_results(  # noqa: C901
     results: StatsResult | Sequence[StatsResult],
 ) -> tuple[StatsMap, list[SourceIndex], list[int]]:
     """Combine one or more StatsResults into unified stats, source_index, and dataset_steps.

dataeval 1.0.4__tar.gz → 1.0.6__tar.gz

dataeval 1.0.4tar.gz → 1.0.6tar.gz