PyPI - dataeval - Versions diffs - 0.86.9__py3-none-any.whl → 0.88.0__py3-none-any.whl - Mend

dataeval 0.86.9py3-none-any.whl → 0.88.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

dataeval/__init__.py +1 -1
dataeval/_log.py +1 -1
dataeval/_version.py +2 -2
dataeval/config.py +4 -19
dataeval/data/_embeddings.py +78 -35
dataeval/data/_images.py +41 -8
dataeval/data/_metadata.py +348 -66
dataeval/data/_selection.py +22 -7
dataeval/data/_split.py +3 -2
dataeval/data/selections/_classbalance.py +4 -3
dataeval/data/selections/_classfilter.py +9 -8
dataeval/data/selections/_indices.py +4 -3
dataeval/data/selections/_prioritize.py +249 -29
dataeval/data/selections/_reverse.py +1 -1
dataeval/data/selections/_shuffle.py +5 -4
dataeval/detectors/drift/_base.py +2 -1
dataeval/detectors/drift/_mmd.py +2 -1
dataeval/detectors/drift/_nml/_base.py +1 -1
dataeval/detectors/drift/_nml/_chunk.py +2 -1
dataeval/detectors/drift/_nml/_result.py +3 -2
dataeval/detectors/drift/_nml/_thresholds.py +6 -5
dataeval/detectors/drift/_uncertainty.py +2 -1
dataeval/detectors/linters/duplicates.py +2 -1
dataeval/detectors/linters/outliers.py +4 -3
dataeval/detectors/ood/__init__.py +2 -1
dataeval/detectors/ood/ae.py +1 -1
dataeval/detectors/ood/base.py +39 -1
dataeval/detectors/ood/knn.py +95 -0
dataeval/detectors/ood/mixin.py +2 -1
dataeval/metadata/_utils.py +1 -1
dataeval/metrics/bias/_balance.py +29 -22
dataeval/metrics/bias/_diversity.py +4 -4
dataeval/metrics/bias/_parity.py +2 -2
dataeval/metrics/stats/_base.py +3 -29
dataeval/metrics/stats/_boxratiostats.py +2 -1
dataeval/metrics/stats/_dimensionstats.py +2 -1
dataeval/metrics/stats/_hashstats.py +21 -3
dataeval/metrics/stats/_pixelstats.py +2 -1
dataeval/metrics/stats/_visualstats.py +2 -1
dataeval/outputs/_base.py +2 -3
dataeval/outputs/_bias.py +2 -1
dataeval/outputs/_estimators.py +1 -1
dataeval/outputs/_linters.py +3 -3
dataeval/outputs/_stats.py +3 -3
dataeval/outputs/_utils.py +1 -1
dataeval/outputs/_workflows.py +49 -31
dataeval/typing.py +23 -9
dataeval/utils/__init__.py +2 -2
dataeval/utils/_array.py +3 -2
dataeval/utils/_bin.py +9 -7
dataeval/utils/_method.py +2 -3
dataeval/utils/_multiprocessing.py +34 -0
dataeval/utils/_plot.py +2 -1
dataeval/utils/data/__init__.py +6 -5
dataeval/utils/data/{metadata.py → _merge.py} +3 -2
dataeval/utils/data/_validate.py +170 -0
dataeval/utils/data/collate.py +2 -1
dataeval/utils/torch/_internal.py +2 -1
dataeval/utils/torch/trainer.py +1 -1
dataeval/workflows/sufficiency.py +13 -9
{dataeval-0.86.9.dist-info → dataeval-0.88.0.dist-info}/METADATA +8 -21
dataeval-0.88.0.dist-info/RECORD +105 -0
dataeval/utils/data/_dataset.py +0 -246
dataeval/utils/datasets/__init__.py +0 -21
dataeval/utils/datasets/_antiuav.py +0 -189
dataeval/utils/datasets/_base.py +0 -266
dataeval/utils/datasets/_cifar10.py +0 -201
dataeval/utils/datasets/_fileio.py +0 -142
dataeval/utils/datasets/_milco.py +0 -197
dataeval/utils/datasets/_mixin.py +0 -54
dataeval/utils/datasets/_mnist.py +0 -202
dataeval/utils/datasets/_seadrone.py +0 -512
dataeval/utils/datasets/_ships.py +0 -144
dataeval/utils/datasets/_types.py +0 -48
dataeval/utils/datasets/_voc.py +0 -583
dataeval-0.86.9.dist-info/RECORD +0 -115
{dataeval-0.86.9.dist-info → dataeval-0.88.0.dist-info}/WHEEL +0 -0
/dataeval-0.86.9.dist-info/licenses/LICENSE.txt → /dataeval-0.88.0.dist-info/licenses/LICENSE +0 -0

dataeval/data/_metadata.py CHANGED Viewed

@@ -3,12 +3,14 @@ from __future__ import annotations
 __all__ = []
 import warnings
+from collections.abc import Callable, Iterable, Mapping, Sequence, Sized
 from dataclasses import dataclass
-from typing import Any, Iterable, Literal, Mapping, Sequence, Sized
+from typing import Any, Literal
 import numpy as np
 import polars as pl
 from numpy.typing import NDArray
+from tqdm.auto import tqdm
 from dataeval.typing import (
     AnnotatedDataset,
@@ -16,36 +18,60 @@ from dataeval.typing import (
     ObjectDetectionTarget,
 )
 from dataeval.utils._array import as_numpy
-from dataeval.utils._bin import bin_data, digitize_data
-from dataeval.utils.data.metadata import merge
+from dataeval.utils._bin import bin_data, digitize_data, is_continuous
+from dataeval.utils.data._merge import merge
 def _binned(name: str) -> str:
-    return f"{name}[]"
+    return f"{name}↕"
+def _digitized(name: str) -> str:
+    return f"{name}#"
 @dataclass
 class FactorInfo:
-    factor_type: Literal["categorical", "continuous", "discrete"] | None = None
-    discretized_col: str | None = None
+    factor_type: Literal["categorical", "continuous", "discrete"]
+    is_binned: bool = False
+    is_digitized: bool = False
+def _to_col(name: str, info: FactorInfo, binned: bool = True) -> str:
+    if binned and info.is_binned:
+        return _binned(name)
+    if info.is_digitized:
+        return _digitized(name)
+    return name
 class Metadata:
-    """
-    Class containing binned metadata using Polars DataFrames.
+    """Collection of binned metadata using Polars DataFrames.
+    Processes dataset metadata by automatically binning continuous factors and digitizing
+    categorical factors for analysis and visualization workflows.
     Parameters
     ----------
     dataset : ImageClassificationDataset or ObjectDetectionDataset
-        Dataset to access original targets and metadata from.
+        Dataset that provides original targets and metadata for processing.
     continuous_factor_bins : Mapping[str, int | Sequence[float]] | None, default None
-        Mapping from continuous factor name to the number of bins or bin edges
+        Mapping from continuous factor names to bin counts or explicit bin edges.
+        When None, uses automatic discretization.
     auto_bin_method : Literal["uniform_width", "uniform_count", "clusters"], default "uniform_width"
-        Method for automatically determining the number of bins for continuous factors
+        Binning strategy for continuous factors without explicit bins. Default "uniform_width"
+        provides intuitive equal-width intervals for most distributions.
     exclude : Sequence[str] | None, default None
-        Filter metadata factors to exclude the specified factors, cannot be set with `include`
+        Factor names to exclude from processing. Cannot be used with `include` parameter.
+        When None, processes all available factors.
     include : Sequence[str] | None, default None
-        Filter metadata factors to include the specified factors, cannot be set with `exclude`
+        Factor names to include in processing. Cannot be used with `exclude` parameter.
+        When None, processes all available factors.
+    Raises
+    ------
+    ValueError
+        When both exclude and include parameters are specified simultaneously.
     """
     def __init__(
@@ -60,7 +86,7 @@ class Metadata:
         self._class_labels: NDArray[np.intp]
         self._class_names: list[str]
         self._image_indices: NDArray[np.intp]
-        self._factors: dict[str, FactorInfo]
+        self._factors: dict[str, FactorInfo | None]
         self._dropped_factors: dict[str, list[str]]
         self._dataframe: pl.DataFrame
         self._raw: Sequence[Mapping[str, Any]]
@@ -81,17 +107,48 @@ class Metadata:
     @property
     def raw(self) -> Sequence[Mapping[str, Any]]:
-        """The raw list of metadata dictionaries for the dataset."""
+        """Original metadata dictionaries extracted from the dataset.
+        Access the unprocessed metadata as it was provided in the original dataset before
+        any binning, filtering, or transformation operations.
+        Returns
+        -------
+        Sequence[Mapping[str, Any]]
+            List of metadata dictionaries, one per dataset item, containing the original key-value
+            pairs as provided in the source data
+        Notes
+        -----
+            This property triggers dataset structure analysis on first access.
+        """
         self._structure()
         return self._raw
     @property
     def exclude(self) -> set[str]:
-        """Factors to exclude from the metadata."""
+        """Factor names excluded from metadata processing.
+        Returns
+        -------
+        set[str]
+            Set of factor names that are filtered out during processing.
+            Empty set when no exclusions are active.
+        """
         return self._exclude
     @exclude.setter
     def exclude(self, value: Sequence[str]) -> None:
+        """Set factor names to exclude from processing.
+        Automatically clears include filter and resets binning state when exclusion list changes.
+        Parameters
+        ----------
+        value : Sequence[str]
+            Factor names to exclude from metadata analysis.
+        """
         exclude = set(value)
         if self._exclude != exclude:
             self._exclude = exclude
@@ -100,11 +157,27 @@ class Metadata:
     @property
     def include(self) -> set[str]:
-        """Factors to include from the metadata."""
+        """Factor names included in metadata processing.
+        Returns
+        -------
+        set[str]
+            Set of factor names that are processed during analysis. Empty set when no inclusion filter is active.
+        """
         return self._include
     @include.setter
     def include(self, value: Sequence[str]) -> None:
+        """Set factor names to include in processing.
+        Automatically clears exclude filter and resets binning state when
+        inclusion list changes.
+        Parameters
+        ----------
+        value : Sequence[str]
+            Factor names to include in metadata analysis.
+        """
         include = set(value)
         if self._include != include:
             self._include = include
@@ -113,66 +186,214 @@ class Metadata:
     @property
     def continuous_factor_bins(self) -> Mapping[str, int | Sequence[float]]:
-        """Map of factor names to bin counts or bin edges."""
+        """Binning configuration for continuous factors.
+        Returns
+        -------
+        Mapping[str, int | Sequence[float]]
+            Dictionary mapping factor names to either the number of bins
+            (int) or explicit bin edges (sequence of floats).
+        """
         return self._continuous_factor_bins
     @continuous_factor_bins.setter
     def continuous_factor_bins(self, bins: Mapping[str, int | Sequence[float]]) -> None:
+        """Update binning configuration for continuous factors.
+        Triggers re-binning when configuration changes to ensure data
+        consistency with new bin specifications.
+        Parameters
+        ----------
+        bins : Mapping[str, int | Sequence[float]]
+            Dictionary mapping factor names to bin counts or explicit edges.
+        """
         if self._continuous_factor_bins != bins:
             self._continuous_factor_bins = dict(bins)
             self._reset_bins(bins)
     @property
     def auto_bin_method(self) -> Literal["uniform_width", "uniform_count", "clusters"]:
-        """Binning method to use when continuous_factor_bins is not defined."""
+        """Automatic binning strategy for continuous factors.
+        Returns
+        -------
+        {"uniform_width", "uniform_count", "clusters"}
+            Current method used for automatic discretization of continuous
+            factors that lack explicit bin specifications.
+        """
         return self._auto_bin_method
     @auto_bin_method.setter
     def auto_bin_method(self, method: Literal["uniform_width", "uniform_count", "clusters"]) -> None:
+        """Set automatic binning strategy for continuous factors.
+        Triggers re-binning with the new method when strategy changes to
+        ensure consistent discretization across all factors.
+        Parameters
+        ----------
+        method : {"uniform_width", "uniform_count", "clusters"}
+            Binning strategy to apply for continuous factors without
+            explicit bin configurations.
+        """
         if self._auto_bin_method != method:
             self._auto_bin_method = method
             self._reset_bins()
     @property
     def dataframe(self) -> pl.DataFrame:
-        """Dataframe containing target information and metadata factors."""
+        """Processed DataFrame containing targets and metadata factors.
+        Access the main data structure with target information (class labels,
+        scores, bounding boxes) and processed metadata factors ready for analysis.
+        Returns
+        -------
+        pl.DataFrame
+            DataFrame with columns for image indices, class labels, scores,
+            bounding boxes (when applicable), and all processed metadata factors.
+        Notes
+        -----
+        This property triggers dataset structure analysis on first access.
+        Factor binning occurs automatically when accessing factor-related data.
+        """
         self._structure()
         return self._dataframe
     @property
     def dropped_factors(self) -> Mapping[str, Sequence[str]]:
-        """Factors that were dropped during preprocessing and the reasons why they were dropped."""
+        """Factors removed during preprocessing with removal reasons.
+        Returns
+        -------
+        Mapping[str, Sequence[str]]
+            Dictionary mapping dropped factor names to lists of reasons
+            why they were excluded from the final dataset.
+        Notes
+        -----
+        This property triggers dataset structure analysis on first access.
+        Common removal reasons include incompatible data types, excessive
+        missing values, or insufficient variation.
+        """
         self._structure()
         return self._dropped_factors
     @property
-    def discretized_data(self) -> NDArray[np.int64]:
-        """Factor data with continuous data discretized."""
+    def digitized_data(self) -> NDArray[np.int64]:
+        """Factor data with categorical values converted to integer codes.
+        Access processed factor data where categorical factors are digitized
+        to integer codes but continuous factors remain in their original form.
+        Returns
+        -------
+        NDArray[np.int64]
+            Array with shape (n_samples, n_factors) containing integer-coded
+            categorical data. Returns empty array when no factors are available.
+        Notes
+        -----
+        This property triggers factor binning analysis on first access.
+        Use this for algorithms that can handle mixed categorical and
+        continuous data types.
+        """
         if not self.factor_names:
             return np.array([], dtype=np.int64)
         self._bin()
         return (
-            self.dataframe.select([info.discretized_col or name for name, info in self.factor_info.items()])
+            self.dataframe.select([_to_col(k, v, False) for k, v in self.factor_info.items()])
+            .to_numpy()
+            .astype(np.int64)
+        )
+    @property
+    def binned_data(self) -> NDArray[np.int64]:
+        """Factor data with continuous values discretized into bins.
+        Access fully processed factor data where both categorical and
+        continuous factors are converted to integer bin indices.
+        Returns
+        -------
+        NDArray[np.int64]
+            Array with shape (n_samples, n_factors) containing binned integer
+            data ready for categorical analysis algorithms. Returns empty array
+            when no factors are available.
+        Notes
+        -----
+        This property triggers factor binning analysis on first access.
+        Use this for algorithms requiring purely discrete input data.
+        """
+        if not self.factor_names:
+            return np.array([], dtype=np.int64)
+        self._bin()
+        return (
+            self.dataframe.select([_to_col(k, v, True) for k, v in self.factor_info.items()])
             .to_numpy()
             .astype(np.int64)
         )
     @property
     def factor_names(self) -> Sequence[str]:
-        """Factor names of the metadata."""
+        """Names of all processed metadata factors.
+        Returns
+        -------
+        Sequence[str]
+            List of factor names that passed filtering and preprocessing steps.
+            Order matches columns in factor_data, digitized_data, and binned_data.
+        Notes
+        -----
+        This property triggers dataset structure analysis on first access.
+        Factor names respect include/exclude filtering settings.
+        """
         self._structure()
         return list(filter(self._filter, self._factors))
     @property
     def factor_info(self) -> Mapping[str, FactorInfo]:
-        """Factor types of the metadata."""
+        """Type information and processing status for each factor.
+        Returns
+        -------
+        Mapping[str, FactorInfo]
+            Dictionary mapping factor names to FactorInfo objects containing
+            data type classification and processing flags (binned, digitized).
+        Notes
+        -----
+        This property triggers factor binning analysis on first access.
+        Only includes factors that survived preprocessing and filtering.
+        """
         self._bin()
-        return dict(filter(self._filter, self._factors.items()))
+        return dict(filter(self._filter, ((k, v) for k, v in self._factors.items() if v is not None)))
     @property
     def factor_data(self) -> NDArray[Any]:
-        """Factor data as a NumPy array."""
+        """Raw factor values before binning or digitization.
+        Access unprocessed factor data in its original numeric form before
+        any categorical encoding or binning transformations are applied.
+        Returns
+        -------
+        NDArray[Any]
+            Array with shape (n_samples, n_factors) containing original factor
+            values. Returns empty array when no factors are available.
+        Notes
+        -----
+        Use this for algorithms that can work with mixed data types or when
+        you need access to original continuous values. For analysis-ready
+        integer data, use binned_data or digitized_data instead.
+        """
         if not self.factor_names:
             return np.array([], dtype=np.float64)
@@ -181,24 +402,67 @@ class Metadata:
     @property
     def class_labels(self) -> NDArray[np.intp]:
-        """Class labels as a NumPy array."""
+        """Target class labels as integer indices.
+        Returns
+        -------
+        NDArray[np.intp]
+            Array of class indices corresponding to dataset targets. For
+            object detection datasets, contains one label per detection.
+        Notes
+        -----
+        This property triggers dataset structure analysis on first access.
+        Use class_names property to get human-readable label names.
+        """
         self._structure()
         return self._class_labels
     @property
     def class_names(self) -> Sequence[str]:
-        """Class names as a list of strings."""
+        """Human-readable names corresponding to class labels.
+        Returns
+        -------
+        Sequence[str]
+            List of class names where index corresponds to class label value.
+            Derived from dataset metadata or auto-generated from label indices.
+        Notes
+        -----
+        This property triggers dataset structure analysis on first access.
+        """
         self._structure()
         return self._class_names
     @property
     def image_indices(self) -> NDArray[np.intp]:
-        """Indices of images as a NumPy array."""
-        self._bin()
+        """Dataset indices linking targets back to source images.
+        Returns
+        -------
+        NDArray[np.intp]
+            Array mapping each target/detection back to its source image
+            index in the original dataset. Essential for object detection
+            datasets where multiple detections come from single images.
+        Notes
+        -----
+        This property triggers dataset structure analysis on first access.
+        """
+        self._structure()
         return self._image_indices
     @property
     def image_count(self) -> int:
+        """Total number of images in the dataset.
+        Returns
+        -------
+        int
+            Count of unique images in the source dataset, regardless of
+            how many targets/detections each image contains.
+        """
         if self._count == 0:
             self._structure()
         return self._count
@@ -212,7 +476,7 @@ class Metadata:
             columns = self._dataframe.columns
             for col in (col for col in cols or columns if _binned(col) in columns):
                 self._dataframe.drop_in_place(_binned(col))
-                self._factors[col] = FactorInfo()
+                self._factors[col] = None
             self._is_binned = False
     def _structure(self) -> None:
@@ -226,7 +490,7 @@ class Metadata:
         scores = []
         srcidx = []
         is_od = None
-        for i in range(len(self._dataset)):
+        for i in tqdm(range(len(self._dataset))):
             _, target, metadata = self._dataset[i]
             raw.append(metadata)
@@ -235,15 +499,15 @@ class Metadata:
                 target_labels = as_numpy(target.labels)
                 target_len = len(target_labels)
                 if target_len:
-                    labels.extend(target_labels.tolist())
-                    bboxes.extend(as_numpy(target.boxes).tolist())
-                    scores.extend(as_numpy(target.scores).tolist())
+                    labels.append(target_labels)
+                    bboxes.append(as_numpy(target.boxes))
+                    scores.append(as_numpy(target.scores))
                     srcidx.extend([i] * target_len)
             elif isinstance(target, Array):
-                if len(target):
-                    target_len = 1
-                    labels.append(int(np.argmax(as_numpy(target))))
-                    scores.append(target)
+                target_scores = as_numpy(target)
+                if len(target_scores):
+                    labels.append([np.argmax(target_scores)])
+                    scores.append([target_scores])
                     srcidx.append(i)
             else:
                 raise TypeError("Encountered unsupported target type in dataset")
@@ -252,10 +516,11 @@ class Metadata:
             if is_od != is_od_target:
                 raise ValueError("Encountered unexpected target type in dataset")
-        labels = as_numpy(labels).astype(np.intp)
-        scores = as_numpy(scores).astype(np.float32)
-        bboxes = as_numpy(bboxes).astype(np.float32) if is_od else None
-        srcidx = as_numpy(srcidx).astype(np.intp)
+        np_asarray: Callable[..., np.ndarray] = np.concatenate if srcidx else np.asarray
+        labels = np_asarray(labels, dtype=np.intp)
+        scores = np_asarray(scores, dtype=np.float32)
+        bboxes = np_asarray(bboxes, dtype=np.float32) if is_od else None
+        srcidx = np.asarray(srcidx, dtype=np.intp)
         index2label = self._dataset.metadata.get("index2label", {i: str(i) for i in np.unique(labels)})
@@ -277,7 +542,7 @@ class Metadata:
         self._class_labels = labels
         self._class_names = list(index2label.values())
         self._image_indices = target_dict["image_index"]
-        self._factors = dict.fromkeys(factor_dict, FactorInfo())
+        self._factors = dict.fromkeys(factor_dict, None)
         self._dataframe = pl.DataFrame({**target_dict, **factor_dict})
         self._dropped_factors = merged[1]
         self._is_structured = True
@@ -303,24 +568,25 @@ class Metadata:
             )
         column_set = set(df.columns)
-        for col in (col for col in self.factor_names if _binned(col) not in column_set):
+        for col in (col for col in self.factor_names if not {_binned(col), _digitized(col)} & column_set):
             # Get data as numpy array for processing
             data = df[col].to_numpy()
-            col_dz = _binned(col)
             if col in factor_bins:
                 # User provided binning
                 bins = factor_bins[col]
-                df = df.with_columns(pl.Series(name=col_dz, values=digitize_data(data, bins).astype(np.int64)))
-                factor_info[col] = FactorInfo("continuous", col_dz)
+                col_bn = _binned(col)
+                df = df.with_columns(pl.Series(name=col_bn, values=digitize_data(data, bins).astype(np.int64)))
+                factor_info[col] = FactorInfo("continuous", is_binned=True)
             else:
                 # Check if data is numeric
-                unique, ordinal = np.unique(data, return_inverse=True)
-                if not np.issubdtype(data.dtype, np.number) or unique.size <= max(20, data.size * 0.01):
-                    # Non-numeric data or small number of unique values - convert to categorical
-                    df = df.with_columns(pl.Series(name=col_dz, values=ordinal.astype(np.int64)))
-                    factor_info[col] = FactorInfo("categorical", col_dz)
-                elif data.dtype == float:
-                    # Many unique values - discretize by binning
+                _, ordinal = np.unique(data, return_inverse=True)
+                if not np.issubdtype(data.dtype, np.number):
+                    # Non-numeric data - convert to categorical
+                    col_dg = _digitized(col)
+                    df = df.with_columns(pl.Series(name=col_dg, values=ordinal.astype(np.int64)))
+                    factor_info[col] = FactorInfo("categorical", is_digitized=True)
+                elif is_continuous(data, self.image_indices):
+                    # Continuous values - discretize by binning
                     warnings.warn(
                         f"A user defined binning was not provided for {col}. "
                         f"Using the {self.auto_bin_method} method to discretize the data. "
@@ -330,10 +596,12 @@ class Metadata:
                     )
                     # Create binned version
                     binned_data = bin_data(data, self.auto_bin_method)
-                    df = df.with_columns(pl.Series(name=col_dz, values=binned_data.astype(np.int64)))
-                    factor_info[col] = FactorInfo("continuous", col_dz)
+                    col_bn = _binned(col)
+                    df = df.with_columns(pl.Series(name=col_bn, values=binned_data.astype(np.int64)))
+                    factor_info[col] = FactorInfo("continuous", is_binned=True)
                 else:
-                    factor_info[col] = FactorInfo("discrete", col)
+                    # Non-continuous values - treat as discrete
+                    factor_info[col] = FactorInfo("discrete")
         # Store the results
         self._dataframe = df
@@ -341,16 +609,30 @@ class Metadata:
         self._is_binned = True
     def add_factors(self, factors: Mapping[str, Array | Sequence[Any]]) -> None:
-        """
-        Add additional factors to the metadata.
+        """Add additional factors to metadata collection.
-        The number of measures per factor must match the number of images
-        in the dataset or the number of detections in the dataset.
+        Extend the current metadata with new factors, automatically handling
+        length validation and integration with existing data structures.
         Parameters
         ----------
         factors : Mapping[str, Array | Sequence[Any]]
-            Dictionary of factors to add to the metadata.
+            Dictionary mapping factor names to their values. Factor length must
+            match either the number of images or number of detections in the dataset.
+        Raises
+        ------
+        ValueError
+            When factor lengths do not match dataset dimensions.
+        Examples
+        --------
+        >>> metadata = Metadata(dataset)
+        >>> new_factors = {
+        ...     "brightness": [0.2, 0.8, 0.5, 0.3, 0.4, 0.1, 0.3, 0.2],
+        ...     "contrast": [1.1, 0.9, 1.0, 0.8, 1.2, 1.0, 0.7, 1.3],
+        ... }
+        >>> metadata.add_factors(new_factors)
         """
         self._structure()
@@ -367,7 +649,7 @@ class Metadata:
         for k, v in factors.items():
             data = as_numpy(v)[self.image_indices]
             new_columns.append(pl.Series(name=k, values=data))
-            self._factors[k] = FactorInfo()
+            self._factors[k] = None
         if new_columns:
             self._dataframe = self.dataframe.with_columns(new_columns)

dataeval 0.86.9__py3-none-any.whl → 0.88.0__py3-none-any.whl

dataeval 0.86.9py3-none-any.whl → 0.88.0py3-none-any.whl