PyPI - ionworks-api - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

ionworks-api 0.1.0py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

ionworks/__init__.py +27 -2
ionworks/cell_instance.py +54 -48
ionworks/cell_measurement.py +95 -58
ionworks/cell_specification.py +106 -19
ionworks/client.py +37 -8
ionworks/errors.py +22 -5
ionworks/job.py +90 -36
ionworks/models.py +29 -37
ionworks/pipeline.py +113 -4
ionworks/simulation.py +127 -74
ionworks/validators.py +553 -32
{ionworks_api-0.1.0.dist-info → ionworks_api-0.1.3.dist-info}/METADATA +36 -17
ionworks_api-0.1.3.dist-info/RECORD +15 -0
ionworks_api-0.1.3.dist-info/licenses/LICENSE.md +21 -0
ionworks_api-0.1.0.dist-info/RECORD +0 -14
{ionworks_api-0.1.0.dist-info → ionworks_api-0.1.3.dist-info}/WHEEL +0 -0

ionworks/validators.py CHANGED Viewed

@@ -1,18 +1,527 @@
-"""
-Reusable validator functions and composable pipelines for inbound/outbound value
-normalization (e.g., converting between pandas DataFrames and dictionaries).
+"""Reusable validator functions and composable pipelines for value normalization.
+Provides functions for composable inbound/outbound value normalization
+(e.g., converting between pandas DataFrames and dictionaries).
 """
+from collections.abc import Callable, Iterable
 import math
+import os
 import pathlib
-from typing import Any, Callable, Iterable
+from typing import Any
+from dotenv import load_dotenv
 import numpy as np
 import pandas as pd
 import polars as pl
 import pybamm
 from pybamm.expression_tree.operations.serialise import convert_symbol_to_json
+from .errors import IonworksError
+# --- DataFrame Backend Configuration ---------------------------------------- #
+# Load .env file before reading environment variables
+load_dotenv()
+# Type alias for DataFrame (pandas or polars)
+DataFrame = pd.DataFrame | pl.DataFrame
+def _get_default_backend() -> str:
+    """Get default backend from environment variable or fall back to 'polars'."""
+    env_val = os.getenv("IONWORKS_DATAFRAME_BACKEND", "polars").lower()
+    if env_val not in ("polars", "pandas"):
+        return "polars"
+    return env_val
+# Module-level configuration for DataFrame return type
+# Initialized from IONWORKS_DATAFRAME_BACKEND env var, defaults to "polars"
+_dataframe_backend: str = _get_default_backend()
+def set_dataframe_backend(backend: str) -> None:
+    """Set the default DataFrame backend for data fetching.
+    This overrides the IONWORKS_DATAFRAME_BACKEND environment variable.
+    Parameters
+    ----------
+    backend : str
+        DataFrame backend to use: "polars" or "pandas".
+    Raises
+    ------
+    ValueError
+        If backend is not "polars" or "pandas".
+    """
+    global _dataframe_backend
+    if backend not in ("polars", "pandas"):
+        raise ValueError(f"backend must be 'polars' or 'pandas', got '{backend}'")
+    _dataframe_backend = backend
+def get_dataframe_backend() -> str:
+    """Get the current DataFrame backend setting.
+    Returns
+    -------
+    str
+        Current backend: "polars" or "pandas".
+    """
+    return _dataframe_backend
+# --- Measurement Data Validators -------------------------------------------- #
+class MeasurementValidationError(IonworksError):
+    """Exception raised when measurement data validation fails."""
+    def __init__(self, message: str, errors: list[str] | None = None) -> None:
+        super().__init__(message)
+        self.errors = errors or []
+def _get_column(df: DataFrame, col: str) -> np.ndarray:
+    """
+    Extract a column as a numpy array from either pandas or polars DataFrame.
+    Parameters
+    ----------
+    df : DataFrame
+        pandas or polars DataFrame.
+    col : str
+        Column name.
+    Returns
+    -------
+    np.ndarray
+        Column values as numpy array.
+    """
+    if isinstance(df, pl.DataFrame):
+        return df.get_column(col).to_numpy()
+    return df[col].to_numpy()
+def _has_column(df: DataFrame, col: str) -> bool:
+    """Check if a column exists in the DataFrame."""
+    return col in df.columns
+def _get_step_group_indices(step_data: np.ndarray) -> np.ndarray:
+    """Compute step group indices for each row (0-indexed, based on contiguous groups).
+    Parameters
+    ----------
+    step_data : np.ndarray
+        Array of step numbers/identifiers.
+    Returns
+    -------
+    np.ndarray
+        Array where each element is the step group index (0, 1, 2, ...) for
+        that row.
+    """
+    changes = np.concatenate([[True], np.diff(step_data) != 0])
+    return np.cumsum(changes) - 1
+def validate_positive_current_is_discharge(  # noqa: PLR0913
+    df: DataFrame,
+    current_col: str = "Current [A]",
+    voltage_col: str = "Voltage [V]",
+    step_col: str | None = None,
+    rest_tol: float = 1e-3,
+    current_std_tol: float = 0.01,
+) -> list[str]:
+    """
+    Validate that positive current corresponds to discharge.
+    Discharge should cause voltage to decrease. This function analyzes the
+    relationship between current direction and voltage change to verify the
+    sign convention is correct.
+    Parameters
+    ----------
+    df : DataFrame
+        Time series data with current and voltage columns (pandas or polars).
+    current_col : str
+        Name of the current column.
+    voltage_col : str
+        Name of the voltage column.
+    step_col : str, optional
+        Name of the step column. If provided, analyzes per-step. Otherwise,
+        infers steps from current sign changes.
+    rest_tol : float
+        Tolerance for considering current as zero (rest).
+    current_std_tol : float
+        Tolerance for standard deviation to consider constant current.
+    Returns
+    -------
+    list[str]
+        List of validation error messages. Empty if validation passes.
+    """
+    if not _has_column(df, current_col) or not _has_column(df, voltage_col):
+        return []
+    current = _get_column(df, current_col)
+    voltage = _get_column(df, voltage_col)
+    if len(current) == 0:
+        return []
+    # Determine step groups
+    if step_col and _has_column(df, step_col):
+        step_data = _get_column(df, step_col)
+    else:
+        # Infer steps from current sign changes
+        max_abs = np.max(np.abs(current))
+        if max_abs == 0:
+            return []
+        normalized = current / max_abs
+        step_data = np.sign(normalized * (np.abs(normalized) > rest_tol))
+    step_groups = _get_step_group_indices(step_data)
+    num_steps = step_groups[-1] + 1
+    # Vectorized computation of per-step statistics using bincount
+    # Mean current per step
+    step_current_sum = np.bincount(step_groups, weights=current, minlength=num_steps)
+    step_counts = np.bincount(step_groups, minlength=num_steps).astype(float)
+    step_counts[step_counts == 0] = 1  # Avoid division by zero
+    mean_current = step_current_sum / step_counts
+    # Std current per step: std = sqrt(E[x^2] - E[x]^2)
+    step_current_sq_sum = np.bincount(
+        step_groups, weights=current**2, minlength=num_steps
+    )
+    variance = step_current_sq_sum / step_counts - mean_current**2
+    variance = np.maximum(variance, 0)  # Numerical stability
+    std_current = np.sqrt(variance)
+    # First and last voltage per step
+    first_voltage = np.zeros(num_steps)
+    last_voltage = np.zeros(num_steps)
+    # Use searchsorted on step boundaries for vectorized first/last
+    step_boundaries = np.where(np.diff(step_groups, prepend=-1) != 0)[0]
+    step_end_boundaries = np.concatenate([step_boundaries[1:], [len(voltage)]])
+    first_voltage = voltage[step_boundaries]
+    last_voltage = voltage[step_end_boundaries - 1]
+    delta_v = last_voltage - first_voltage
+    # Filter: non-rest steps with constant current
+    is_non_rest = np.abs(mean_current) >= rest_tol
+    is_constant_current = std_current <= current_std_tol * np.abs(mean_current)
+    valid_mask = is_non_rest & is_constant_current
+    if not np.any(valid_mask):
+        return []
+    # Compute voltage response for valid steps
+    valid_mean_current = mean_current[valid_mask]
+    valid_delta_v = delta_v[valid_mask]
+    voltage_responses = valid_delta_v / valid_mean_current
+    mean_response = np.mean(voltage_responses)
+    if mean_response > 0:
+        return [
+            "Current sign convention error: positive current appears to be charge, "
+            "not discharge. Voltage increases when current is positive, but for "
+            "discharge, voltage should decrease. Please flip the sign of the "
+            "current data (multiply by -1)."
+        ]
+    return []
+def validate_cumulative_values_reset_per_step(
+    df: DataFrame,
+    step_col: str = "Step count",
+    cumulative_cols: list[str] | None = None,
+    tolerance: float = 1e-6,
+) -> list[str]:
+    """Validate cumulative values reset to ~0 at each step and only increase.
+    Parameters
+    ----------
+    df : DataFrame
+        Time series data (pandas or polars).
+    step_col : str
+        Name of the column containing step numbers.
+    cumulative_cols : list[str], optional
+        List of cumulative column names to validate. If None, checks for common
+        capacity and energy columns.
+    tolerance : float
+        Tolerance for considering a value as "zero" at step start.
+    Returns
+    -------
+    list[str]
+        List of validation error messages. Empty if validation passes.
+    """
+    errors = []
+    if not _has_column(df, step_col):
+        return []
+    if cumulative_cols is None:
+        cumulative_cols = [
+            "Discharge capacity [A.h]",
+            "Charge capacity [A.h]",
+            "Discharge energy [W.h]",
+            "Charge energy [W.h]",
+        ]
+    cols_to_check = [col for col in cumulative_cols if _has_column(df, col)]
+    if not cols_to_check:
+        return []
+    step_data = _get_column(df, step_col)
+    if len(step_data) == 0:
+        return []
+    step_groups = _get_step_group_indices(step_data)
+    # Find step boundaries (first index of each step)
+    step_boundaries = np.where(np.diff(step_groups, prepend=-1) != 0)[0]
+    for col in cols_to_check:
+        values = _get_column(df, col)
+        # Check 1: Values at step starts should be ~0
+        start_values = values[step_boundaries]
+        non_zero_mask = np.abs(start_values) > tolerance
+        non_zero_steps = np.where(non_zero_mask)[0]
+        for step_idx in non_zero_steps:
+            errors.append(
+                f"Column '{col}' does not reset at start of step {step_idx}: "
+                f"expected ~0, got {start_values[step_idx]:.6f}. "
+                f"Cumulative values should reset to 0 at the start of each step."
+            )
+        # Check 2: Values should be monotonically non-decreasing within each step
+        # Compute diff and check where it's negative within same step
+        value_diffs = np.diff(values, prepend=values[0])
+        step_diffs = np.diff(step_groups, prepend=step_groups[0])
+        # Mask: same step (diff == 0) and value decreased
+        within_step = step_diffs == 0
+        decreased = value_diffs < -tolerance
+        # Find first decrease per step
+        problem_indices = np.where(within_step & decreased)[0]
+        if len(problem_indices) > 0:
+            # Group by step and report first decrease per step
+            problem_steps = step_groups[problem_indices]
+            unique_problem_steps = np.unique(problem_steps)
+            for step_idx in unique_problem_steps:
+                # Find first index in this step with decrease
+                step_problem_indices = problem_indices[problem_steps == step_idx]
+                first_idx = step_problem_indices[0]
+                errors.append(
+                    f"Column '{col}' decreases within step {step_idx} at "
+                    f"index {first_idx}: "
+                    f"value went from {values[first_idx - 1]:.6f} to "
+                    f"{values[first_idx]:.6f}. "
+                    f"Cumulative values should only increase within a step."
+                )
+    return errors
+def validate_minimum_points_per_step(
+    df: DataFrame,
+    step_col: str = "Step count",
+    min_points: int = 2,
+) -> list[str]:
+    """
+    Validate that each step has at least a minimum number of data points.
+    Parameters
+    ----------
+    df : DataFrame
+        Time series data (pandas or polars).
+    step_col : str
+        Name of the column containing step numbers.
+    min_points : int
+        Minimum number of points required per step.
+    Returns
+    -------
+    list[str]
+        List of validation error messages. Empty if validation passes.
+    """
+    if not _has_column(df, step_col):
+        return []
+    step_data = _get_column(df, step_col)
+    if len(step_data) == 0:
+        return []
+    step_groups = _get_step_group_indices(step_data)
+    num_steps = step_groups[-1] + 1
+    # Vectorized count per step
+    step_counts = np.bincount(step_groups, minlength=num_steps)
+    # Find steps with insufficient points
+    insufficient_mask = step_counts < min_points
+    insufficient_steps = np.where(insufficient_mask)[0]
+    errors = []
+    for step_idx in insufficient_steps:
+        num_points = step_counts[step_idx]
+        errors.append(
+            f"Step {step_idx} has only {num_points} data point(s), "
+            f"but at least {min_points} are required."
+        )
+    return errors
+def validate_cycle_constant_within_step(
+    df: DataFrame,
+    step_col: str = "Step count",
+    cycle_col: str | None = None,
+) -> list[str]:
+    """
+    Validate that cycle number does not change within a step.
+    Parameters
+    ----------
+    df : DataFrame
+        Time series data (pandas or polars).
+    step_col : str
+        Name of the column containing step numbers.
+    cycle_col : str, optional
+        Name of the column containing cycle numbers. If None, tries common names.
+    Returns
+    -------
+    list[str]
+        List of validation error messages. Empty if validation passes.
+    """
+    if not _has_column(df, step_col):
+        return []
+    # Find cycle column
+    if cycle_col is None:
+        for col in ["Cycle count", "Cycle number", "Cycle from cycler"]:
+            if _has_column(df, col):
+                cycle_col = col
+                break
+    if cycle_col is None or not _has_column(df, cycle_col):
+        return []
+    step_data = _get_column(df, step_col)
+    if len(step_data) == 0:
+        return []
+    cycle_data = _get_column(df, cycle_col)
+    step_groups = _get_step_group_indices(step_data)
+    # Detect cycle changes within steps:
+    # A cycle change within a step occurs when:
+    # - The cycle value differs from the previous row
+    # - AND we're in the same step group
+    cycle_diffs = np.diff(cycle_data, prepend=cycle_data[0])
+    step_diffs = np.diff(step_groups, prepend=step_groups[0])
+    # Within-step cycle change: same step (step_diff == 0) but cycle changed
+    within_step_cycle_change = (step_diffs == 0) & (cycle_diffs != 0)
+    problem_indices = np.where(within_step_cycle_change)[0]
+    if len(problem_indices) == 0:
+        return []
+    # Group by step and report
+    problem_steps = step_groups[problem_indices]
+    unique_problem_steps = np.unique(problem_steps)
+    errors = []
+    for step_idx in unique_problem_steps:
+        # Find all unique cycles in this step
+        step_mask = step_groups == step_idx
+        unique_cycles = np.unique(cycle_data[step_mask])
+        errors.append(
+            f"Cycle number changes within step {step_idx}: "
+            f"found cycles {unique_cycles.tolist()}. "
+            f"Each step should belong to a single cycle."
+        )
+    return errors
+def validate_measurement_data(
+    df: DataFrame,
+    strict: bool = True,
+) -> None:
+    """Validate measurement time series data before upload.
+    Performs the following checks:
+    1. Positive current should correspond to discharge (voltage decreases)
+    2. Cumulative values (capacity, energy) should reset at each step start
+       and only increase within steps
+    3. Each step has at least 2 data points (strict mode only)
+    4. Cycle number does not change within a step (strict mode only)
+    Parameters
+    ----------
+    df : DataFrame
+        Time series data to validate (pandas or polars DataFrame).
+    strict : bool
+        If True (default), run additional checks: minimum 2 points per step
+        and cycle number constant within each step.
+    Raises
+    ------
+    MeasurementValidationError
+        If any validation checks fail. The exception contains a list of all
+        errors found.
+    """
+    all_errors = []
+    # Try different possible step column names
+    step_col = None
+    for col in ["Step count", "Step number", "Step from cycler"]:
+        if _has_column(df, col):
+            step_col = col
+            break
+    # Check 1: Positive current should be discharge
+    current_errors = validate_positive_current_is_discharge(df, step_col=step_col)
+    all_errors.extend(current_errors)
+    if step_col:
+        # Check 2: Cumulative values should reset at each step
+        cumulative_errors = validate_cumulative_values_reset_per_step(df, step_col)
+        all_errors.extend(cumulative_errors)
+        if strict:
+            # Check 3: At least 2 points per step
+            points_errors = validate_minimum_points_per_step(df, step_col)
+            all_errors.extend(points_errors)
+            # Check 4: Cycle constant within step
+            cycle_errors = validate_cycle_constant_within_step(df, step_col)
+            all_errors.extend(cycle_errors)
+    if all_errors:
+        raise MeasurementValidationError(
+            f"Measurement data validation failed with {len(all_errors)} error(s):\n"
+            + "\n".join(f"  - {err}" for err in all_errors),
+            errors=all_errors,
+        )
 # --- Atomic validators ------------------------------------------------------ #
@@ -21,23 +530,41 @@ def df_to_dict_validator(v: Any) -> Any:
     if isinstance(v, pd.DataFrame):
         # Replace NaN with None for JSON compatibility
         return v.replace(np.nan, None).to_dict(orient="list")
-    elif isinstance(v, pl.DataFrame):
+    if isinstance(v, pl.DataFrame):
         # Replace NaN with None for JSON compatibility, then convert to dict
         return v.fill_nan(None).to_dict(as_series=False)
     return v
-def dict_to_df_validator(v: Any) -> Any:
-    """Convert dict to DataFrame for data processing."""
+def dict_to_df_validator(v: Any, return_type: str | None = None) -> Any:
+    """Convert dict to DataFrame for data processing.
+    Parameters
+    ----------
+    v : Any
+        Value to convert. If dict, converts to DataFrame.
+    return_type : str | None
+        Type of DataFrame to return: "polars" or "pandas".
+        If None, uses the global setting from set_dataframe_backend().
+    Returns
+    -------
+    Any
+        DataFrame if input was dict, otherwise unchanged.
+    """
     if isinstance(v, dict):
-        try:
-            return pd.DataFrame(v)
-        except ValueError as e:
-            if "If using all scalar values, you must pass an index" in str(e):
-                # Handle case where all values are scalars by providing an index
+        backend = return_type if return_type is not None else _dataframe_backend
+        # Check if all values are scalars (not lists/arrays)
+        all_scalars = all(
+            not isinstance(val, list | tuple | np.ndarray) for val in v.values()
+        )
+        if backend == "pandas":
+            if all_scalars:
                 return pd.DataFrame(v, index=[0])
-            else:
-                raise
+            return pd.DataFrame(v)
+        if all_scalars:
+            return pl.DataFrame({k: [val] for k, val in v.items()})
+        return pl.DataFrame(v)
     return v
@@ -49,17 +576,19 @@ def parameter_validator(v: Any) -> Any:
 def float_sanitizer(v: Any) -> Any:
-    """Sanitize float values to JSON-compatible forms. Currently removes NaN and
-    infinity values."""
+    """Sanitize float values to JSON-compatible forms.
+    Currently removes NaN and infinity values.
+    """
     if isinstance(v, float):
         if math.isinf(v):
             return "Infinity" if v > 0 else "-Infinity"
-        elif np.isnan(v):
+        if np.isnan(v):
             return None
     elif isinstance(v, np.floating):
         if np.isinf(v):
             return "Infinity" if v > 0 else "-Infinity"
-        elif np.isnan(v):
+        if np.isnan(v):
             return None
     return v
@@ -67,10 +596,6 @@ def float_sanitizer(v: Any) -> Any:
 def bounds_tuple_validator(v: Any) -> Any:
     """Convert bounds 2-tuple to list for JSON serialization.
-    Converts tuples with exactly 2 elements to lists. This is useful for
-    bounds parameters that may be provided as tuples (lower, upper) but
-    need to be serialized as lists.
     Parameters
     ----------
     v : Any
@@ -87,27 +612,23 @@ def bounds_tuple_validator(v: Any) -> Any:
 def file_scheme_validator(v: Any) -> Any:
-    """
-    Convert file:// and folder:// scheme paths to serialized dicts.
+    """Convert file:// and folder:// scheme paths to serialized dicts.
-    Handles:
-    - "file:" prefixed paths: loads CSV file as dict (serialized)
-    - "folder:" prefixed paths: loads time_series.csv and steps.csv as dict
-    - All other values: returned unchanged
+    Handles ``file:`` prefixed paths (loads CSV as dict) and ``folder:``
+    prefixed paths (loads time_series.csv and steps.csv as dict).
+    All other values are returned unchanged.
     Raises
     ------
     FileNotFoundError
-        If the file or folder path doesn't exist
-    Exception
-        If reading the CSV file fails for any other reason
+        If the file or folder path doesn't exist.
     """
     if isinstance(v, str) and v.startswith("file:"):
         path = pathlib.Path(v.split(":")[1]).expanduser().resolve()
         if not path.exists() or not path.is_file():
             raise FileNotFoundError(f"CSV file not found: {v}")
         return df_to_dict_validator(pd.read_csv(path))
-    elif isinstance(v, str) and v.startswith("folder:"):
+    if isinstance(v, str) and v.startswith("folder:"):
         path = pathlib.Path(v.split(":")[1]).expanduser().resolve()
         if not path.exists() or not path.is_dir():
             raise FileNotFoundError(f"Folder not found: {v}")

ionworks-api 0.1.0__py3-none-any.whl → 0.1.3__py3-none-any.whl

ionworks-api 0.1.0py3-none-any.whl → 0.1.3py3-none-any.whl