PyPI - fibphot - Versions diffs - 0.1.0__py3-none-any.whl - Mend

fibphot 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

fibphot/__init__.py +6 -0
fibphot/analysis/__init__.py +0 -0
fibphot/analysis/aggregate.py +257 -0
fibphot/analysis/auc.py +354 -0
fibphot/analysis/irls.py +350 -0
fibphot/analysis/peaks.py +1163 -0
fibphot/analysis/photobleaching.py +290 -0
fibphot/analysis/plotting.py +105 -0
fibphot/analysis/report.py +56 -0
fibphot/collection.py +207 -0
fibphot/fit/__init__.py +0 -0
fibphot/fit/regression.py +269 -0
fibphot/io/__init__.py +6 -0
fibphot/io/doric.py +435 -0
fibphot/io/excel.py +76 -0
fibphot/io/h5.py +321 -0
fibphot/misc.py +11 -0
fibphot/peaks.py +628 -0
fibphot/pipeline.py +14 -0
fibphot/plotting.py +594 -0
fibphot/stages/__init__.py +22 -0
fibphot/stages/base.py +101 -0
fibphot/stages/baseline.py +354 -0
fibphot/stages/control_dff.py +214 -0
fibphot/stages/filters.py +273 -0
fibphot/stages/normalisation.py +260 -0
fibphot/stages/regression.py +139 -0
fibphot/stages/smooth.py +442 -0
fibphot/stages/trim.py +141 -0
fibphot/state.py +309 -0
fibphot/tags.py +130 -0
fibphot/types.py +6 -0
fibphot-0.1.0.dist-info/METADATA +63 -0
fibphot-0.1.0.dist-info/RECORD +37 -0
fibphot-0.1.0.dist-info/WHEEL +5 -0
fibphot-0.1.0.dist-info/licenses/LICENSE.md +21 -0
fibphot-0.1.0.dist-info/top_level.txt +1 -0

fibphot/fit/regression.py ADDED Viewed

@@ -0,0 +1,269 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Literal
+import numpy as np
+from ..types import FloatArray
+RegressionMethod = Literal["ols", "irls_tukey", "irls_huber"]
+@dataclass(frozen=True, slots=True)
+class LinearFit:
+    """
+    Fit of y ≈ intercept + slope * x (or slope-only if include_intercept=False).
+    """
+    intercept: float
+    slope: float
+    fitted: FloatArray
+    residuals: FloatArray
+    r2: float
+    method: RegressionMethod
+    n_iter: int | None = None
+    tuning_constant: float | None = None
+    scale: float | None = None
+    weights: FloatArray | None = None
+def _r2_score(y: FloatArray, yhat: FloatArray) -> float:
+    y = np.asarray(y, dtype=float)
+    yhat = np.asarray(yhat, dtype=float)
+    ss_res = float(np.sum((y - yhat) ** 2))
+    ss_tot = float(np.sum((y - float(np.mean(y))) ** 2))
+    if ss_tot <= 1e-20:
+        return float("nan")
+    return 1.0 - ss_res / ss_tot
+def _mad_sigma(x: FloatArray) -> float:
+    """Robust scale estimate using MAD, scaled for Normal data."""
+    x = np.asarray(x, dtype=float)
+    med = float(np.median(x))
+    mad = float(np.median(np.abs(x - med)))
+    return 1.4826 * mad
+def _design_matrix(x: FloatArray, include_intercept: bool) -> FloatArray:
+    x = np.asarray(x, dtype=float)
+    if include_intercept:
+        return np.column_stack([np.ones_like(x), x])
+    return x[:, None]
+def fit_ols(
+    x: FloatArray,
+    y: FloatArray,
+    *,
+    include_intercept: bool = True,
+) -> LinearFit:
+    """
+    Ordinary least squares fit of y on x.
+    Context
+    -------
+    Applied to motion corrections in fiber photometry, x is the control signal
+    (e.g., isosbestic channel) and y is the signal to be corrected. Hence, the
+    estimated motion is given by:
+        yhat = intercept + slope * x
+    and the corrected signal is given by the residuals:
+        corrected = y - yhat.
+    """
+    x = np.asarray(x, dtype=float)
+    y = np.asarray(y, dtype=float)
+    mask = np.isfinite(x) & np.isfinite(y)
+    x0 = x[mask]
+    y0 = y[mask]
+    X = _design_matrix(x0, include_intercept)
+    beta, *_ = np.linalg.lstsq(X, y0, rcond=None)
+    if include_intercept:
+        intercept = float(beta[0])
+        slope = float(beta[1])
+        yhat0 = intercept + slope * x0
+    else:
+        intercept = 0.0
+        slope = float(beta[0])
+        yhat0 = slope * x0
+    fitted = np.full_like(y, np.nan, dtype=float)
+    fitted[mask] = yhat0
+    residuals = y - fitted
+    return LinearFit(
+        intercept=intercept,
+        slope=slope,
+        fitted=fitted,
+        residuals=residuals,
+        r2=_r2_score(y0, yhat0),
+        method="ols",
+    )
+def _weights_tukey(u: FloatArray) -> FloatArray:
+    """Tukey's bisquare weights: w = (1 - u^2)^2 for |u|<1 else 0."""
+    u = np.asarray(u, dtype=float)
+    w = np.zeros_like(u)
+    inside = np.abs(u) < 1.0
+    w[inside] = (1.0 - u[inside] ** 2) ** 2
+    return w
+def _weights_huber(u: FloatArray) -> FloatArray:
+    """Huber weights: w = 1 for |u|<=1 else 1/|u|."""
+    u = np.asarray(u, dtype=float)
+    au = np.abs(u)
+    w = np.ones_like(u)
+    outside = au > 1.0
+    w[outside] = 1.0 / au[outside]
+    return w
+def _wls_line(
+    x: FloatArray,
+    y: FloatArray,
+    w: FloatArray,
+    *,
+    include_intercept: bool,
+) -> tuple[float, float]:
+    x = np.asarray(x, dtype=float)
+    y = np.asarray(y, dtype=float)
+    w = np.asarray(w, dtype=float)
+    w = np.clip(w, 0.0, np.inf)
+    sw = float(np.sum(w))
+    if not np.isfinite(sw) or sw <= 1e-15:
+        return float("nan"), float("nan")
+    if include_intercept:
+        sx = float(np.sum(w * x))
+        sy = float(np.sum(w * y))
+        sxx = float(np.sum(w * x * x))
+        sxy = float(np.sum(w * x * y))
+        denom = sw * sxx - sx * sx
+        if not np.isfinite(denom) or abs(denom) <= 1e-20:
+            return float("nan"), float("nan")
+        slope = (sw * sxy - sx * sy) / denom
+        intercept = (sy - slope * sx) / sw
+        return float(intercept), float(slope)
+    # slope-only
+    sxx = float(np.sum(w * x * x))
+    if not np.isfinite(sxx) or sxx <= 1e-20:
+        return 0.0, float("nan")
+    sxy = float(np.sum(w * x * y))
+    slope = sxy / sxx
+    return 0.0, float(slope)
+def fit_irls(
+    x: FloatArray,
+    y: FloatArray,
+    *,
+    include_intercept: bool = True,
+    loss: Literal["tukey", "huber"] = "tukey",
+    tuning_constant: float = 4.685,
+    max_iter: int = 50,
+    tol: float = 1e-10,
+    store_weights: bool = False,
+) -> LinearFit:
+    x = np.asarray(x, dtype=float)
+    y = np.asarray(y, dtype=float)
+    mask = np.isfinite(x) & np.isfinite(y)
+    x0 = x[mask]
+    y0 = y[mask]
+    # initial OLS.
+    if include_intercept:
+        X = np.column_stack([np.ones_like(x0), x0])
+        beta, *_ = np.linalg.lstsq(X, y0, rcond=None)
+        intercept = float(beta[0])
+        slope = float(beta[1])
+    else:
+        slope = float(np.dot(x0, y0) / (np.dot(x0, x0) + 1e-18))
+        intercept = 0.0
+    weight_fn = _weights_tukey if loss == "tukey" else _weights_huber
+    w = np.ones_like(y0, dtype=float)
+    scale: float | None = None
+    n_iter = 0
+    last_intercept = intercept
+    last_slope = slope
+    for _ in range(max_iter):
+        n_iter += 1
+        yhat = intercept + slope * x0 if include_intercept else slope * x0
+        r = y0 - yhat
+        scale = _mad_sigma(r)
+        if not np.isfinite(scale) or scale <= 1e-15:
+            break
+        u = r / (tuning_constant * scale)
+        w = weight_fn(u)
+        if float(np.sum(w)) <= 1e-12:
+            break
+        intercept, slope = _wls_line(
+            x0, y0, w, include_intercept=include_intercept
+        )
+        if not np.isfinite(slope) or (
+            include_intercept and not np.isfinite(intercept)
+        ):
+            break
+        # convergence
+        di = abs(intercept - last_intercept) if include_intercept else 0.0
+        ds = abs(slope - last_slope)
+        denom = (
+            abs(last_slope)
+            + (abs(last_intercept) if include_intercept else 0.0)
+            + 1e-18
+        )
+        if (di + ds) / denom < tol:
+            break
+        last_intercept = intercept
+        last_slope = slope
+    yhat0 = intercept + slope * x0 if include_intercept else slope * x0
+    fitted = np.full_like(y, np.nan, dtype=float)
+    fitted[mask] = yhat0
+    residuals = y - fitted
+    weights_out = None
+    if store_weights:
+        weights_out = np.full_like(y, np.nan, dtype=float)
+        weights_out[mask] = w
+    method: RegressionMethod = "irls_tukey" if loss == "tukey" else "irls_huber"
+    return LinearFit(
+        intercept=float(intercept),
+        slope=float(slope),
+        fitted=fitted,
+        residuals=residuals,
+        r2=_r2_score(y0, yhat0),
+        method=method,
+        n_iter=n_iter,
+        tuning_constant=float(tuning_constant),
+        scale=float(scale) if scale is not None else None,
+        weights=weights_out,
+    )

fibphot/io/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from __future__ import annotations
+from .doric import read_doric
+from .excel import read_excel
+__all__ = ["read_doric", "read_excel"]

fibphot/io/doric.py ADDED Viewed

@@ -0,0 +1,435 @@
+from __future__ import annotations
+from collections.abc import Mapping, Sequence
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Literal
+import h5py
+import numpy as np
+from ..state import PhotometryState
+from ..types import FloatArray
+AlignMode = Literal["truncate", "interp"]
+Source = Literal[
+    "lockin",  # demodulated photometry channels (GCaMP, Iso, etc.)
+    "analog_in",  # raw detector voltages
+    "analog_out",  # LED modulation outputs
+]
+@dataclass(frozen=True, slots=True)
+class DoricChannel:
+    name: str
+    signal_path: str
+    time_path: str
+    attrs: dict[str, Any]
+def _decode_attr(value: Any) -> Any:
+    """Best-effort decoding for HDF5 attributes."""
+    if isinstance(value, bytes):
+        return value.decode(errors="replace")
+    if isinstance(value, np.ndarray):
+        if value.dtype.kind in {"S", "O"}:
+            out: list[Any] = []
+            for v in value.ravel().tolist():
+                out.append(_decode_attr(v))
+            return np.array(out, dtype=object).reshape(value.shape)
+        if value.size == 1:
+            return _decode_attr(value.item())
+        return value
+    if isinstance(value, np.generic):
+        return value.item()
+    return value
+def _get_attrs(obj: h5py.Dataset | h5py.Group) -> dict[str, Any]:
+    """Extract all attributes from an HDF5 group/dataset and decode them."""
+    return {k: _decode_attr(v) for k, v in obj.attrs.items()}
+def _normalise_name(name: str) -> str:
+    """Normalise channel names so they’re consistent keys."""
+    return name.strip().lower().replace(" ", "_").replace("-", "_")
+def _read_1d(dataset: h5py.Dataset) -> FloatArray:
+    """Read a dataset and ensure it’s a 1D float array."""
+    arr = np.asarray(dataset[()], dtype=float)
+    if arr.ndim != 1:
+        raise ValueError(f"Expected 1D dataset, got shape {arr.shape}.")
+    return arr
+def _discover_series_names(f: h5py.File, fpconsole: str) -> list[str]:
+    """Find which Doric “SeriesXXXX” groups exist in the file."""
+    base = f"DataAcquisition/{fpconsole}/Signals"
+    if base not in f:
+        raise KeyError(f"Missing signals root: {base!r}")
+    grp = f[base]
+    series = [k for k in grp if str(k).lower().startswith("series")]
+    if not series:
+        raise ValueError(f"No series groups found under {base!r}")
+    return sorted(series)
+def _series_sort_key(name: str) -> tuple[int, str]:
+    """Sort key for series names like 'Series0001'."""
+    digits = "".join(ch for ch in name if ch.isdigit())
+    return (int(digits) if digits else -1, name)
+def _choose_series(series: str | None, available: list[str]) -> str:
+    """Decide which series to use."""
+    if series is not None:
+        if series not in available:
+            raise KeyError(
+                f"Series {series!r} not found. Available: {available}"
+            )
+        return series
+    if len(available) == 1:
+        return available[0]
+    # Choose highest numbered series by default.
+    return sorted(available, key=_series_sort_key)[-1]
+def _find_series_root(f: h5py.File, fpconsole: str, series: str) -> str:
+    """Construct and validate the HDF5 path to the chosen series."""
+    root = f"DataAcquisition/{fpconsole}/Signals/{series}"
+    if root not in f:
+        raise KeyError(f"Could not find series root: {root!r}")
+    return root
+def _available_sources(f: h5py.File, series_root: str) -> list[Source]:
+    """Discover which source types are available in the series."""
+    out: list[Source] = []
+    if f"{series_root}/AnalogIn" in f:
+        out.append("analog_in")
+    if f"{series_root}/AnalogOut" in f:
+        out.append("analog_out")
+    grp = f[series_root]
+    if any(str(k).startswith("LockIn") for k in grp):
+        out.append("lockin")
+    order: list[Source] = ["lockin", "analog_in", "analog_out"]
+    return [s for s in order if s in out]
+def _choose_source(source: Source | None, available: list[Source]) -> Source:
+    """
+    Decide which source to use.
+    Priority: lockin > analog_in > analog_out. For photometry, lockin is
+    preferred as it contains demodulated signals.
+    """
+    if source is not None:
+        if source not in available:
+            raise KeyError(
+                f"Source {source!r} not available. Available: {available}"
+            )
+        return source
+    # Priority: lockin > analog_in > analog_out
+    for preferred in ("lockin", "analog_in", "analog_out"):
+        if preferred in available:
+            return preferred  # type: ignore[return-value]
+    raise ValueError("No supported sources found in file.")
+def _discover_lockin_channels(
+    f: h5py.File, series_root: str
+) -> list[DoricChannel]:
+    """Discover LockIn (demodulated) channels under the series root."""
+    channels: list[DoricChannel] = []
+    grp = f[series_root]
+    lockin_keys = [k for k in grp if str(k).startswith("LockIn")]
+    for key in lockin_keys:
+        grp_path = f"{series_root}/{key}"
+        lock_grp = f[grp_path]
+        time_path = f"{grp_path}/Time"
+        if time_path not in f:
+            continue
+        # Pick first non-Time dataset as signal (AIN01 is typical).
+        dset_names = [k for k in lock_grp if str(k).lower() != "time"]
+        if not dset_names:
+            continue
+        signal_path = (
+            f"{grp_path}/AIN01"
+            if f"{grp_path}/AIN01" in f
+            else f"{grp_path}/{dset_names[0]}"
+        )
+        ds = f[signal_path]
+        attrs = _get_attrs(ds)
+        username = attrs.get("Username")
+        name = (
+            str(username)
+            if username not in (None, "", "0")
+            else ds.name.split("/")[-1]
+        )
+        channels.append(
+            DoricChannel(
+                name=_normalise_name(name),
+                signal_path=signal_path,
+                time_path=time_path,
+                attrs=attrs,
+            )
+        )
+    if not channels:
+        raise ValueError(
+            "No LockIn channels found. "
+            f"Expected groups like 'LockInAOUT02' under {series_root!r}."
+        )
+    dedup: dict[str, DoricChannel] = {}
+    for ch in channels:
+        dedup.setdefault(ch.name, ch)
+    return list(dedup.values())
+def _discover_analog_in(f: h5py.File, series_root: str) -> list[DoricChannel]:
+    """Discover analogue input channels."""
+    grp_path = f"{series_root}/AnalogIn"
+    if grp_path not in f:
+        raise KeyError(f"Missing group: {grp_path!r}")
+    time_path = f"{grp_path}/Time"
+    if time_path not in f:
+        raise KeyError(f"Missing time dataset: {time_path!r}")
+    grp = f[grp_path]
+    channels: list[DoricChannel] = []
+    for k in grp:
+        if str(k).lower() == "time":
+            continue
+        ds_path = f"{grp_path}/{k}"
+        ds = f[ds_path]
+        attrs = _get_attrs(ds)
+        username = attrs.get("Username")
+        name = str(username) if username not in (None, "", "0") else str(k)
+        channels.append(
+            DoricChannel(
+                name=_normalise_name(name),
+                signal_path=ds_path,
+                time_path=time_path,
+                attrs=attrs,
+            )
+        )
+    if not channels:
+        raise ValueError(f"No analogue-in datasets found under {grp_path!r}")
+    return channels
+def _discover_analog_out(f: h5py.File, series_root: str) -> list[DoricChannel]:
+    """Discover analogue output channels (LED modulation outputs)."""
+    grp_path = f"{series_root}/AnalogOut"
+    if grp_path not in f:
+        raise KeyError(f"Missing group: {grp_path!r}")
+    time_path = f"{grp_path}/Time"
+    if time_path not in f:
+        raise KeyError(f"Missing time dataset: {time_path!r}")
+    grp = f[grp_path]
+    channels: list[DoricChannel] = []
+    for k in grp:
+        if str(k).lower() == "time":
+            continue
+        ds_path = f"{grp_path}/{k}"
+        ds = f[ds_path]
+        attrs = _get_attrs(ds)
+        username = attrs.get("Username")
+        name = str(username) if username not in (None, "", "0") else str(k)
+        channels.append(
+            DoricChannel(
+                name=_normalise_name(name),
+                signal_path=ds_path,
+                time_path=time_path,
+                attrs=attrs,
+            )
+        )
+    if not channels:
+        raise ValueError(f"No analogue-out datasets found under {grp_path!r}")
+    return channels
+def _align_to_reference(
+    x_ref: FloatArray,
+    x: FloatArray,
+    y: FloatArray,
+    mode: AlignMode,
+) -> FloatArray:
+    """
+    Align y(x) to the reference timebase x_ref.
+    - truncate: cut to the shortest length (no resampling)
+    - interp: interpolate y onto x_ref
+    """
+    if mode == "truncate":
+        n = min(x_ref.shape[0], x.shape[0], y.shape[0])
+        return np.asarray(y[:n], dtype=float)
+    if mode == "interp":
+        n = min(x.shape[0], y.shape[0])
+        return np.interp(x_ref, x[:n], y[:n]).astype(float)
+    raise ValueError(f"Unknown align mode: {mode!r}")
+def read_doric(
+    filename: Path | str,
+    *,
+    fpconsole: str = "FPConsole",
+    series: str | None = None,
+    source: Source | None = None,
+    channels: Sequence[str] | Mapping[str, str] | None = None,
+    align: AlignMode = "truncate",
+) -> PhotometryState:
+    """
+    Read a Doric .doric (HDF5) file into a PhotometryState.
+    Automatic behaviour
+    -------------------
+    - If series is None: choose only series if there is one, otherwise choose
+      the highest-numbered series.
+    - If source is None: prefer lockin > analog_in > analog_out.
+    - Channel naming uses dataset attribute 'Username' when available.
+    Parameters
+    ----------
+    fpconsole:
+        Device group under DataAcquisition (usually 'FPConsole').
+    series:
+        Series group name like 'Series0001'. If None, auto-selected.
+    source:
+        'lockin', 'analog_in', or 'analog_out'. If None, auto-selected.
+    channels:
+        - None: load all discovered channels for the selected source
+        - Sequence[str]: select by discovered channel names (normalised)
+        - Mapping[str, str]: rename channels: {new_name: existing_name}
+    align:
+        - 'truncate': truncate all channels to the shortest timebase length
+        - 'interp': interpolate all channels onto a reference timebase
+    """
+    path = Path(filename)
+    if not path.exists():
+        raise FileNotFoundError(f"File not found: {path}")
+    with h5py.File(path, "r") as f:
+        series_names = _discover_series_names(f, fpconsole)
+        chosen_series = _choose_series(series, series_names)
+        series_root = _find_series_root(f, fpconsole, chosen_series)
+        available = _available_sources(f, series_root)
+        chosen_source = _choose_source(source, available)
+        if chosen_source == "lockin":
+            discovered = _discover_lockin_channels(f, series_root)
+        elif chosen_source == "analog_in":
+            discovered = _discover_analog_in(f, series_root)
+        else:
+            discovered = _discover_analog_out(f, series_root)
+        by_name = {c.name: c for c in discovered}
+        if channels is None:
+            selected = discovered
+            out_names = [c.name for c in selected]
+        elif isinstance(channels, Mapping):
+            selected = []
+            out_names = []
+            for new_name, old_name in channels.items():
+                old_key = _normalise_name(str(old_name))
+                if old_key not in by_name:
+                    raise KeyError(
+                        f"Unknown channel {old_name!r}. "
+                        f"Available: {sorted(by_name)}"
+                    )
+                selected.append(by_name[old_key])
+                out_names.append(_normalise_name(str(new_name)))
+        else:
+            wanted = [_normalise_name(str(c)) for c in channels]
+            missing = [c for c in wanted if c not in by_name]
+            if missing:
+                raise KeyError(
+                    f"Unknown channel(s): {missing}. "
+                    f"Available: {sorted(by_name)}"
+                )
+            selected = [by_name[c] for c in wanted]
+            out_names = wanted
+        if not selected:
+            raise ValueError("No channels selected to load.")
+        # Reference timebase: first selected channel.
+        t_ref = _read_1d(f[selected[0].time_path])
+        signals: list[FloatArray] = []
+        for ch in selected:
+            t = _read_1d(f[ch.time_path])
+            y = _read_1d(f[ch.signal_path])
+            signals.append(_align_to_reference(t_ref, t, y, mode=align))
+        if align == "truncate":
+            n = min([t_ref.shape[0], *[s.shape[0] for s in signals]])
+            t_ref = t_ref[:n]
+            signals = [s[:n] for s in signals]
+        stacked = np.stack(signals, axis=0)
+        meta: dict[str, Any] = {
+            "file": str(path),
+            "fpconsole": fpconsole,
+            "series": chosen_series,
+            "available_series": series_names,
+            "source": chosen_source,
+            "available_sources": available,
+            "align": align,
+            "channels": {
+                name: by_name.get(name).attrs if name in by_name else {}
+                for name in out_names
+            },
+        }
+        gs_path = f"Configurations/{fpconsole}/GlobalSettings"
+        ss_path = f"Configurations/{fpconsole}/SavingSettings"
+        if gs_path in f:
+            meta["global_settings"] = _get_attrs(f[gs_path])
+        if ss_path in f:
+            meta["saving_settings"] = _get_attrs(f[ss_path])
+        return PhotometryState(
+            time_seconds=t_ref,
+            signals=stacked,
+            channel_names=tuple(out_names),
+            metadata=meta,
+        )