PyPI - dataeval - Versions diffs - 1.0.3__tar.gz → 1.0.4__tar.gz - Mend

dataeval 1.0.3tar.gz → 1.0.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

{dataeval-1.0.3 → dataeval-1.0.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dataeval
-Version: 1.0.3
+Version: 1.0.4
 Summary: DataEval provides a simple interface to characterize image data and its impact on model performance across classification and object-detection tasks
 Project-URL: Homepage, https://dataeval.ai/
 Project-URL: Repository, https://github.com/aria-ml/dataeval/

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/_metadata.py RENAMED Viewed

@@ -39,6 +39,7 @@ class FactorInfo:
     factor_type: Literal["categorical", "continuous", "discrete"]
     is_binned: bool = False
     is_digitized: bool = False
+    level: Literal["image", "target"] = "image"
 def _to_col(name: str, info: FactorInfo, binned: bool = True) -> str:
@@ -110,8 +111,8 @@ class Metadata(Array, FeatureExtractor):
         *,
         continuous_factor_bins: Mapping[str, int | Sequence[float]] | None = None,
         auto_bin_method: Literal["uniform_width", "uniform_count", "clusters"] = "uniform_width",
-        exclude: Sequence[str] | None = None,
-        include: Sequence[str] | None = None,
+        exclude: str | Sequence[str] | None = None,
+        include: str | Sequence[str] | None = None,
     ) -> None:
         self._class_labels: NDArray[np.intp]
         self._item_indices: NDArray[np.intp]
@@ -132,8 +133,8 @@ class Metadata(Array, FeatureExtractor):
         if exclude is not None and include is not None:
             raise ValueError("Filters for `exclude` and `include` are mutually exclusive.")
-        self._exclude = set(exclude or ())
-        self._include = set(include or ())
+        self._exclude = {exclude} if isinstance(exclude, str) else set(exclude or ())
+        self._include = {include} if isinstance(include, str) else set(include or ())
         self._target_factors_only = False
     def __repr__(self) -> str:
@@ -423,17 +424,17 @@ class Metadata(Array, FeatureExtractor):
         return self._exclude
     @exclude.setter
-    def exclude(self, value: Sequence[str]) -> None:
+    def exclude(self, value: str | Sequence[str]) -> None:
         """Set factor names to exclude from processing.
         Automatically clears include filter and resets binning state when exclusion list changes.
         Parameters
         ----------
-        value : Sequence[str]
-            Factor names to exclude from metadata analysis.
+        value : str | Sequence[str]
+            Factor name or names to exclude from metadata analysis.
         """
-        exclude = set(value)
+        exclude = {value} if isinstance(value, str) else set(value)
         if self._exclude != exclude:
             self._exclude = exclude
             self._include = set()
@@ -451,7 +452,7 @@ class Metadata(Array, FeatureExtractor):
         return self._include
     @include.setter
-    def include(self, value: Sequence[str]) -> None:
+    def include(self, value: str | Sequence[str]) -> None:
         """Set factor names to include in processing.
         Automatically clears exclude filter and resets binning state when
@@ -459,10 +460,10 @@ class Metadata(Array, FeatureExtractor):
         Parameters
         ----------
-        value : Sequence[str]
-            Factor names to include in metadata analysis.
+        value : str | Sequence[str]
+            Factor name or names to include in metadata analysis.
         """
-        include = set(value)
+        include = {value} if isinstance(value, str) else set(value)
         if self._include != include:
             self._include = include
             self._exclude = set()
@@ -1115,13 +1116,26 @@ class Metadata(Array, FeatureExtractor):
             raise ValueError(f"Invalid level: {level}. Must be 'image', 'target', or 'auto'")
     def _create_factor_column(self, data_array: NDArray, level: str, num_image_rows: int) -> list:
-        """Create a factor column with values at the appropriate level."""
+        """Create a factor column with values at the appropriate level.
+        For OD datasets with image-level factors, values are stored in image rows
+        and replicated to target rows using item_index mapping, so that bias
+        evaluators can access them via target_data.
+        """
         if level == "image":
-            # Create column: image-level values in image rows, None in target rows
-            full_data = [None] * len(self.dataframe)
-            for idx, val in enumerate(data_array):
-                full_data[idx] = val  # Image rows come first in our structure
-            return full_data
+            # Image rows get the values directly
+            image_values: list = data_array.tolist()
+            if self.has_targets():
+                # For OD datasets, replicate image-level values to target rows
+                # using the item_index column which maps each target to its source image
+                target_df = self._dataframe.filter(pl.col("target_index").is_not_null())
+                target_image_indices = target_df["item_index"].to_numpy()
+                target_values = data_array[target_image_indices].tolist()
+            else:
+                target_values = []
+            return image_values + target_values
         # level == "target"
         # Create column: None in image rows, target-level values in target rows
         return [None] * num_image_rows + list(data_array)
@@ -1267,7 +1281,8 @@ class Metadata(Array, FeatureExtractor):
             k for k in factors if not isinstance(self._dataframe.schema.get(k), pl.List | pl.Struct | pl.Array)
         }
-        self._factors = dict.fromkeys(usable_factors, None)
+        existing = self._factors if hasattr(self, "_factors") else {}
+        self._factors = {k: existing.get(k) for k in usable_factors}
     def _structure(
         self,
@@ -1492,9 +1507,12 @@ class Metadata(Array, FeatureExtractor):
         factors_to_process = [col for col in self.factor_names if not {_binned(col), _digitized(col)} & column_set]
         total_factors = len(factors_to_process)
+        target_only = self._target_factors - self._image_factors if is_od else set()
         for i, col in enumerate(factors_to_process):
             data = data_df[col].to_numpy()
             df, info = self._process_factor(df, col, data, factor_bins, is_od)
+            if is_od and col in target_only:
+                info.level = "target"
             factor_info[col] = info
             if progress_callback:

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/_version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '1.0.3'
-__version_tuple__ = version_tuple = (1, 0, 3)
+__version__ = version = '1.0.4'
+__version_tuple__ = version_tuple = (1, 0, 4)
 __commit_id__ = commit_id = None

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/bias/_balance.py RENAMED Viewed

@@ -3,6 +3,7 @@ __all__ = []
 from dataclasses import dataclass
 from typing import Any, Literal
+import numpy as np
 import polars as pl
 from dataeval import Metadata
@@ -269,8 +270,9 @@ class Balance(Evaluator):
         # Include class_label as the first factor (index 0), then all metadata factors
         all_factor_names = ["class_label"] + factor_names
+        u_classes = np.unique(self.metadata.class_labels)
         for class_idx in range(classwise.shape[0]):
-            class_name = index2label.get(class_idx, str(class_idx))
+            class_name = index2label.get(int(u_classes[class_idx]), str(u_classes[class_idx]))
             for factor_idx in range(classwise.shape[1]):
                 mi_value = classwise[class_idx, factor_idx]
                 class_name_col.append(class_name)

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/bias/_diversity.py RENAMED Viewed

@@ -251,7 +251,7 @@ class Diversity(Evaluator):
         is_low_diversity_col: list[bool] = []
         for class_idx in range(classwise_div.shape[0]):
-            class_name = index2label.get(class_idx, str(class_idx))
+            class_name = index2label.get(int(u_classes[class_idx]), str(u_classes[class_idx]))
             for factor_idx in range(num_factors):
                 div_value = classwise_div[class_idx, factor_idx]
                 if not np.isnan(div_value):

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_calculators/_cache.py RENAMED Viewed

@@ -22,7 +22,13 @@ class CalculatorCache:
     This class adapts based on the data type passed in.
     """
-    def __init__(self, datum: Any, box: BoundingBox | None = None, per_channel: bool = False) -> None:
+    def __init__(
+        self,
+        datum: Any,
+        box: BoundingBox | None = None,
+        per_channel: bool = False,
+        normalize_pixel_values: bool = False,
+    ) -> None:
         is_spatial = len(datum.shape) >= 2
         self.raw = datum
         # Assume image data for now (will be generic in future)
@@ -30,6 +36,7 @@ class CalculatorCache:
         self.height: int = datum.shape[-2] if is_spatial else 0
         self.shape: tuple[int, ...] = datum.shape
         self.per_channel_mode = per_channel
+        self.normalize_pixel_values = normalize_pixel_values
         self.has_box = box is not None
         # Ensure bounding box
@@ -54,7 +61,9 @@ class CalculatorCache:
     @cached_property
     def scaled(self) -> NDArray[Any]:
-        return rescale(self.image)
+        if self.normalize_pixel_values:
+            return rescale(self.image)
+        return self.image
     @cached_property
     def per_channel(self) -> NDArray[Any]:

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_calculators/_pixelstats.py RENAMED Viewed

@@ -39,11 +39,23 @@ class PixelStatCalculator(Calculator[ImageStats]):
     def _var_func(self, data: NDArray[Any], **kw: Any) -> Any:
         return np.nanvar(data, **kw) if self._has_nan else np.var(data, **kw)
+    @cached_property
+    def _histogram_range(self) -> tuple[float, float]:
+        if self.cache.normalize_pixel_values:
+            return (0.0, 1.0)
+        from dataeval.utils.preprocessing import get_bitdepth
+        bitdepth = get_bitdepth(self.cache.scaled)
+        if bitdepth.depth == 0:
+            return (0.0, 1.0)
+        return (0.0, float(bitdepth.pmax))
     @cached_property
     def histogram(self) -> NDArray[np.float64]:
+        r = self._histogram_range
         if self.per_channel_mode:
-            return np.apply_along_axis(lambda y: np.histogram(y, bins=256, range=(0, 1))[0], 1, self.cache.per_channel)
-        return np.histogram(self.cache.scaled, bins=256, range=(0, 1))[0]
+            return np.apply_along_axis(lambda y: np.histogram(y, bins=256, range=r)[0], 1, self.cache.per_channel)
+        return np.histogram(self.cache.scaled, bins=256, range=r)[0]
     def get_applicable_flags(self) -> ImageStats:
         """Return which flags this calculator handles."""

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_compute_stats.py RENAMED Viewed

@@ -1,6 +1,7 @@
 __all__ = []
 import logging
+import warnings
 from collections.abc import Iterable, Iterator, Mapping, Sequence, Sized
 from dataclasses import dataclass
 from enum import Flag
@@ -103,6 +104,7 @@ def _collect_calculator_stats(
     datum: NDArray[Any],
     box: BoundingBox | None,
     per_channel: bool,
+    normalize_pixel_values: bool = False,
 ) -> tuple[list[dict[str, list[Any]]], dict[str, Any], list[str]]:
     """
     Collect stats from all calculators.
@@ -118,7 +120,7 @@ def _collect_calculator_stats(
     stats_list = []
     empty_values_map: dict[str, Any] = {}
     warnings: list[str] = []
-    processor = CalculatorCache(datum, box, per_channel)
+    processor = CalculatorCache(datum, box, per_channel, normalize_pixel_values=normalize_pixel_values)
     for calculator_cls, flags in calculators:
         calculator = calculator_cls(datum, processor, per_channel)
         stats_list.append(calculator.compute(flags))
@@ -224,6 +226,7 @@ def _compute_batch(
     per_image: bool,
     per_target: bool,
     per_channel: bool,
+    normalize_pixel_values: bool = False,
 ) -> DatumBatchResult:
     i, datum, boxes = args
     results: list[DatumResult] = []
@@ -248,7 +251,7 @@ def _compute_batch(
         # Collect stats from all calculators
         calculator_stats, empty_values_map, calc_warnings = _collect_calculator_stats(
-            calculators, datum, box, per_channel
+            calculators, datum, box, per_channel, normalize_pixel_values=normalize_pixel_values
         )
         # Thread calculator warnings with index context
@@ -338,6 +341,9 @@ def _aggregate_batch(
     warning_list.extend(result.warnings_list)
+_UNSET = object()
 def compute_stats(
     data: Iterable[ArrayLike] | Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]],
     *,
@@ -346,16 +352,12 @@ def compute_stats(
     per_image: bool = True,
     per_target: bool = True,
     per_channel: bool = False,
+    normalize_pixel_values: bool = _UNSET,  # type: ignore
     progress_callback: ProgressCallback | None = None,
 ) -> StatsResult:
     """
     Compute specified statistics on a set of images, optionally within bounding boxes.
-    Mixed-bit-depth datasets can produce misleading statistics when raw pixel values are
-    compared directly. To avoid this, pixel values are normalized to [0, 1] based on each
-    image's bit depth before any statistic is computed, keeping results meaningful and
-    comparable across 8-bit, 16-bit, 32-bit, and other precision images.
     Parameters
     ----------
     data : Iterable[ArrayLike] | Dataset[ArrayLike] | Dataset[tuple[ArrayLike, Any, Any]]
@@ -376,6 +378,15 @@ def compute_stats(
     per_channel : bool, default False
         If True, compute per-channel statistics. If False, statistics are
         aggregated across all channels.
+    normalize_pixel_values : bool, default True
+        If True, pixel values are normalized to [0, 1] based on each image's
+        inferred bit depth before any statistic is computed. This makes results
+        comparable across images with different bit depths (8-bit, 16-bit, etc.).
+        If False, statistics are computed on raw pixel values.
+        .. deprecated::
+            The default will change to False in v1.1. Pass explicitly to silence
+            the deprecation warning.
     progress_callback : ProgressCallback or None, default None
         Callback to report progress during calculation. Called after each image is processed
         with the current image count and total number of images (if known).
@@ -422,6 +433,15 @@ def compute_stats(
     >>> stats = compute_stats(images, boxes=boxes, per_image=True, per_target=True, per_channel=True)
     """
+    if normalize_pixel_values is _UNSET:
+        warnings.warn(
+            "The default value of normalize_pixel_values will change from True to False in v1.1. "
+            "Pass normalize_pixel_values explicitly to silence this warning.",
+            FutureWarning,
+            stacklevel=2,
+        )
+        normalize_pixel_values = True
     source_indices: list[SourceIndex] = []
     aggregated_stats: dict[str, list[Any]] = {}
     object_count: dict[int, int] = {}
@@ -484,6 +504,7 @@ def compute_stats(
                 per_image=per_image,
                 per_target=per_target,
                 per_channel=per_channel,
+                normalize_pixel_values=normalize_pixel_values,
             ),
             _enumerate_datum(images, boxes),
         ):

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/quality/_duplicates.py RENAMED Viewed

@@ -2,6 +2,7 @@
 __all__ = []
+import warnings
 from collections.abc import Mapping, Sequence
 from typing import Any, Generic, Literal, TypeVar, overload
@@ -246,10 +247,10 @@ def _group_by_dataset(row: Mapping[str, Any], has_targets: bool) -> dict[int, li
     """Group a row's members by dataset index."""
     by_ds: dict[int, list[Any]] = {}
     if has_targets:
-        for item, target, ds in zip(row["item_indices"], row["target_indices"], row["dataset_index"], strict=True):
+        for item, target, ds in zip(row["item_indices"], row["target_indices"], row["dataset_indices"], strict=True):
             by_ds.setdefault(ds, []).append(SourceIndex(item=item, target=target))
     else:
-        for item, ds in zip(row["item_indices"], row["dataset_index"], strict=True):
+        for item, ds in zip(row["item_indices"], row["dataset_indices"], strict=True):
             by_ds.setdefault(ds, []).append(item)
     return by_ds
@@ -323,11 +324,11 @@ def _make_row(
         "dup_type": dup_type,
         "item_indices": item_ids,
         "target_indices": target_ids,
-        "methods": methods,
-        "orientation": orientation,
     }
     if ds_ids is not None:
-        row["dataset_index"] = ds_ids
+        row["dataset_indices"] = ds_ids
+    row["methods"] = methods
+    row["orientation"] = orientation
     return row
@@ -492,7 +493,7 @@ class DuplicatesOutput(DataFrameOutput, Generic[TExactDuplicatesGroup, TNearDupl
     - methods: list[str] - Detection method names (e.g., ``["phash", "dhash"]``)
     - orientation: str | None - ``"same"``, ``"rotated"``, or None (only present
       when both basic and D4 hashes were computed)
-    - dataset_index: list[int] - Dataset indices for cross-dataset results (only
+    - dataset_indices: list[int] - Dataset indices for cross-dataset results (only
       present for multi-dataset output, positionally aligned with item_indices)
     Attributes
@@ -529,6 +530,19 @@ class DuplicatesOutput(DataFrameOutput, Generic[TExactDuplicatesGroup, TNearDupl
         self.merge_near_duplicates = merge_near_duplicates
         self.flags = flags
+    _COLUMN_ALIASES = {"dataset_index": "dataset_indices"}
+    def __getitem__(self, item: Any) -> Any:
+        if isinstance(item, str) and item in self._COLUMN_ALIASES:
+            new_name = self._COLUMN_ALIASES[item]
+            warnings.warn(
+                f"Column '{item}' was renamed to '{new_name}'. Access via '{item}' will be removed in v1.1.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            item = new_name
+        return self.data()[item]
     def __len__(self) -> int:
         """Return the number of duplicate groups."""
         return self.data().shape[0]
@@ -569,7 +583,7 @@ class DuplicatesOutput(DataFrameOutput, Generic[TExactDuplicatesGroup, TNearDupl
           - Single-dataset with targets: ``list[tuple[list[SourceIndex], list[str]]]``
           - Cross-dataset: wraps the above in a ``dict`` keyed by dataset index.
         """
-        is_cross = "dataset_index" in self.data().columns
+        is_cross = "dataset_indices" in self.data().columns
         has_targets = "target_indices" in self.data().columns
         is_near = dup_type == "near"
@@ -654,7 +668,7 @@ class DuplicatesOutput(DataFrameOutput, Generic[TExactDuplicatesGroup, TNearDupl
             - dup_types: list[str] - Unique duplicate types for this image
             - methods: list[str] - All unique methods that detected this image
         """
-        if "dataset_index" in self.data().columns:
+        if "dataset_indices" in self.data().columns:
             raise ValueError("aggregate_by_image only works with output from a single dataset.")
         schema: Any = {
@@ -1074,7 +1088,7 @@ class Duplicates(Evaluator):
         -------
         DuplicatesOutput
             Duplicate detection results as a DataFrame of duplicate groups.
-            For cross-dataset detection, includes a dataset_index column.
+            For cross-dataset detection, includes a dataset_indices column.
         See Also
         --------
@@ -1232,7 +1246,7 @@ class Duplicates(Evaluator):
         -------
         SingleDuplicatesOutput or MultiDuplicatesOutput
             Duplicate detection results as a DataFrame of duplicate groups.
-            For multi-dataset input, includes a ``dataset_index`` column.
+            For multi-dataset input, includes a ``dataset_indices`` column.
         Raises
         ------
@@ -1244,24 +1258,32 @@ class Duplicates(Evaluator):
         Hash-based duplicates with merged near duplicates (default):
         >>> detector = Duplicates()
-        >>> result = detector.evaluate(images)
-        >>> result
-        shape: (4, 5)
-        ┌──────────┬───────┬──────────┬───────────────┬────────────────────┐
-        │ group_id ┆ level ┆ dup_type ┆ item_indices  ┆ methods            │
-        │ ---      ┆ ---   ┆ ---      ┆ ---           ┆ ---                │
-        │ i64      ┆ str   ┆ str      ┆ list[i64]     ┆ list[str]          │
-        ╞══════════╪═══════╪══════════╪═══════════════╪════════════════════╡
-        │ 0        ┆ item  ┆ exact    ┆ [3, 20]       ┆ ["xxhash"]         │
-        │ 1        ┆ item  ┆ exact    ┆ [7, 11, … 25] ┆ ["xxhash"]         │
-        │ 2        ┆ item  ┆ exact    ┆ [16, 37]      ┆ ["xxhash"]         │
-        │ 3        ┆ item  ┆ near     ┆ [0, 1, … 49]  ┆ ["dhash", "phash"] │
-        └──────────┴───────┴──────────┴───────────────┴────────────────────┘
+        >>> detector.evaluate(images)
+        shape: (3, 5)
+        ┌──────────┬───────┬──────────┬───────────────┬────────────┐
+        │ group_id ┆ level ┆ dup_type ┆ item_indices  ┆ methods    │
+        │ ---      ┆ ---   ┆ ---      ┆ ---           ┆ ---        │
+        │ i64      ┆ str   ┆ str      ┆ list[i64]     ┆ list[str]  │
+        ╞══════════╪═══════╪══════════╪═══════════════╪════════════╡
+        │ 0        ┆ item  ┆ exact    ┆ [3, 20]       ┆ ["xxhash"] │
+        │ 1        ┆ item  ┆ exact    ┆ [7, 11, … 25] ┆ ["xxhash"] │
+        │ 2        ┆ item  ┆ exact    ┆ [16, 37]      ┆ ["xxhash"] │
+        └──────────┴───────┴──────────┴───────────────┴────────────┘
         Cross-dataset detection:
         >>> detector = Duplicates()
-        >>> result = detector.evaluate(train_ds, test_ds)
+        >>> detector.evaluate(train_ds, test_ds)
+        shape: (3, 6)
+        ┌──────────┬───────┬──────────┬───────────────┬─────────────────┬────────────┐
+        │ group_id ┆ level ┆ dup_type ┆ item_indices  ┆ dataset_indices ┆ methods    │
+        │ ---      ┆ ---   ┆ ---      ┆ ---           ┆ ---             ┆ ---        │
+        │ i64      ┆ str   ┆ str      ┆ list[i64]     ┆ list[i64]       ┆ list[str]  │
+        ╞══════════╪═══════╪══════════╪═══════════════╪═════════════════╪════════════╡
+        │ 0        ┆ item  ┆ exact    ┆ [3, 20]       ┆ [0, 0]          ┆ ["xxhash"] │
+        │ 1        ┆ item  ┆ exact    ┆ [7, 11, … 25] ┆ [0, 0, … 0]     ┆ ["xxhash"] │
+        │ 2        ┆ item  ┆ exact    ┆ [16, 37]      ┆ [0, 0]          ┆ ["xxhash"] │
+        └──────────┴───────┴──────────┴───────────────┴─────────────────┴────────────┘
         """
         if other:
             return self._evaluate_multi([data, *other], per_image=per_image, per_target=per_target)
@@ -1296,7 +1318,11 @@ class Duplicates(Evaluator):
         # Hash-based duplicate detection
         if self.flags & ImageStats.HASH:
             self.stats = compute_stats(
-                data, stats=self.flags & ImageStats.HASH, per_image=per_image, per_target=per_target
+                data,
+                stats=self.flags & ImageStats.HASH,
+                per_image=per_image,
+                per_target=per_target,
+                normalize_pixel_values=False,
             )
             (item_exact, item_near), (target_exact, target_near) = _detect_hash_duplicates(
                 self.stats["stats"], self.stats["source_index"]
@@ -1358,7 +1384,13 @@ class Duplicates(Evaluator):
         calc_results: list[StatsResult] = []
         if has_hash_detection:
             calc_results = [
-                compute_stats(ds, stats=self.flags & ImageStats.HASH, per_image=per_image, per_target=per_target)
+                compute_stats(
+                    ds,
+                    stats=self.flags & ImageStats.HASH,
+                    per_image=per_image,
+                    per_target=per_target,
+                    normalize_pixel_values=False,
+                )
                 for ds in datasets
             ]
             self.stats = calc_results[-1]

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/quality/_outliers.py RENAMED Viewed

@@ -1623,7 +1623,9 @@ class Outliers(Evaluator):
         stored_cluster_stats: ClusterStats | None = None
         if self.flags != ImageStats.NONE:
-            self.stats = compute_stats(data, stats=self.flags, per_image=per_image, per_target=per_target)
+            self.stats = compute_stats(
+                data, stats=self.flags, per_image=per_image, per_target=per_target, normalize_pixel_values=True
+            )
             stats_result = self.stats
             class_ids: NDArray[np.intp] | None = None
@@ -1674,7 +1676,10 @@ class Outliers(Evaluator):
         stats_results: list[StatsResult] = []
         if self.flags != ImageStats.NONE:
             stats_results = [
-                compute_stats(ds, stats=self.flags, per_image=per_image, per_target=per_target) for ds in datasets
+                compute_stats(
+                    ds, stats=self.flags, per_image=per_image, per_target=per_target, normalize_pixel_values=True
+                )
+                for ds in datasets
             ]
             self.stats = stats_results[-1]

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/utils/preprocessing.py RENAMED Viewed

@@ -413,7 +413,7 @@ def rescale(image: NDArray[Any], depth: int = 1) -> NDArray[Any]:
     bitdepth = get_bitdepth(image)
     if bitdepth.depth == depth:
         return image
-    normalized = (image + bitdepth.pmin) / (bitdepth.pmax - bitdepth.pmin)
+    normalized = (image - bitdepth.pmin) / (bitdepth.pmax - bitdepth.pmin)
     return normalized * (2**depth - 1)
@@ -554,6 +554,12 @@ def to_canonical_grayscale(image: NDArray[Any]) -> NDArray[np.uint8]:
     NDArray[np.uint8]
         2D grayscale array (HW) of type np.uint8
     """
+    # Rescale normalized [0, 1] float images to [0, 255] range
+    if np.issubdtype(image.dtype, np.floating) and image.size > 0:
+        pmin, pmax = np.nanmin(image), np.nanmax(image)
+        if pmax <= 1.0 and pmin >= 0.0:
+            image = image * 255.0
     channels = image.shape[0]
     # --- Case 1: Single Channel (Already Grayscale) ---

{dataeval-1.0.3 → dataeval-1.0.4}/.gitignore RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/LICENSE RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/README.md RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/pyproject.toml RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/_embeddings.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/_experimental.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/_helpers.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/_log.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/_warm_cache.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/bias/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/bias/_parity.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/config.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_ber.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_bin.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_calculators/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_calculators/_base.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_calculators/_dimensionstats.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_calculators/_hashstats.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_calculators/_register.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_calculators/_registry.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_calculators/_visualstats.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_clusterer.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_completeness.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_compute_ratios.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_coverage.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_divergence.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_diversity.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_fast_hdbscan/_cluster_trees.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_fast_hdbscan/_disjoint_set.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_fast_hdbscan/_mst.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_feature_distance.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_hash.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_label_errors.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_label_parity.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_label_stats.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_metadata_insights.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_mst.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_mutual_info.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_nullmodel.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_parity.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_rank.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/core/_uap.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/exceptions.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/extractors/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/extractors/_bovw.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/extractors/_flatten.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/extractors/_onnx.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/extractors/_torch.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/extractors/_uncertainty.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/flags.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/performance/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/performance/_aggregator.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/performance/_output.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/performance/_sufficiency.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/performance/schedules.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/protocols.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/py.typed RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/quality/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/quality/_shared.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/scope/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/scope/_prioritize.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/selection/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/selection/_classbalance.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/selection/_classfilter.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/selection/_indices.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/selection/_limit.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/selection/_reverse.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/selection/_select.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/selection/_shuffle.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_drift/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_drift/_base.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_drift/_chunk.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_drift/_domain_classifier.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_drift/_kneighbors.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_drift/_mmd.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_drift/_reconstruction.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_drift/_univariate.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_ood/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_ood/_base.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_ood/_domain_classifier.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_ood/_kneighbors.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_ood/_reconstruction.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_shared/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_shared/_domain_classifier.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_shared/_kneighbors.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/_shared/_reconstruction.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/shift/update_strategies.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/types.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/utils/__init__.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/utils/_internal.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/utils/data.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/utils/losses.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/utils/models.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/utils/onnx.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/utils/thresholds.py RENAMED Viewed

File without changes

{dataeval-1.0.3 → dataeval-1.0.4}/src/dataeval/utils/training.py RENAMED Viewed

File without changes

dataeval 1.0.3__tar.gz → 1.0.4__tar.gz

dataeval 1.0.3tar.gz → 1.0.4tar.gz