PyPI - bidsreader - Versions diffs - 0.1.0__py3-none-any.whl - Mend

bidsreader 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

bidsreader/__init__.py +15 -0
bidsreader/_errorwrap.py +50 -0
bidsreader/basereader.py +208 -0
bidsreader/cmlbidsreader.py +269 -0
bidsreader/convert.py +57 -0
bidsreader/exc.py +23 -0
bidsreader/filtering.py +178 -0
bidsreader/helpers.py +148 -0
bidsreader/units.py +287 -0
bidsreader-0.1.0.dist-info/METADATA +494 -0
bidsreader-0.1.0.dist-info/RECORD +14 -0
bidsreader-0.1.0.dist-info/WHEEL +5 -0
bidsreader-0.1.0.dist-info/licenses/LICENSE +21 -0
bidsreader-0.1.0.dist-info/top_level.txt +1 -0

bidsreader/filtering.py ADDED Viewed

@@ -0,0 +1,178 @@
+import mne
+import numpy as np
+import pandas as pd
+from typing import Iterable, Optional, Dict
+from ._errorwrap import public_api
+def _label_has_trial_type(label: str, trial_types: list[str]) -> bool:
+    # exact token match within merged labels like "WORD/STIM"
+    tokens = label.split("/")
+    return any(t in tokens for t in trial_types)
+def _ensure_list(trial_types: Iterable[str] | str) -> list[str]:
+    return [trial_types] if isinstance(trial_types, str) else list(trial_types)
+@public_api
+def filter_events_df_by_trial_types(
+    events_df: pd.DataFrame,
+    trial_types: Iterable[str] | str,
+) -> tuple[pd.DataFrame, np.ndarray]:
+    tt = _ensure_list(trial_types)
+    mask = events_df["trial_type"].isin(tt).to_numpy()
+    filtered_df = events_df.loc[mask].copy()
+    # integer positions (0..n-1) into the *original* events_df rows
+    df_idx = np.flatnonzero(mask)
+    return filtered_df, df_idx
+@public_api
+def filter_raw_events_by_trial_types(
+    raw: mne.io.BaseRaw,
+    trial_types: Iterable[str] | str,
+) -> tuple[np.ndarray, Dict[str, int], np.ndarray]:
+    tt = _ensure_list(trial_types)
+    events_raw, event_id = mne.events_from_annotations(raw)
+    filtered_event_id = {
+        k: v for k, v in event_id.items()
+        if _label_has_trial_type(k, tt)
+    }
+    if filtered_event_id:
+        codes = np.fromiter(filtered_event_id.values(), dtype=int)
+        mask = np.isin(events_raw[:, 2], codes)
+        filtered_events = events_raw[mask]
+        raw_idx = np.flatnonzero(mask)  # indices into events_raw
+    else:
+        filtered_events = events_raw[:0].copy()
+        filtered_event_id = {}
+        raw_idx = np.array([], dtype=int)
+    return filtered_events, filtered_event_id, raw_idx
+@public_api
+def filter_epochs_by_trial_types(
+    epochs: mne.Epochs,
+    trial_types: Iterable[str] | str,
+) -> tuple[mne.Epochs, Dict[str, int], np.ndarray]:
+    tt = _ensure_list(trial_types)
+    keys = [
+        k for k in epochs.event_id.keys()
+        if _label_has_trial_type(k, tt)
+    ]
+    filtered_event_id = {k: epochs.event_id[k] for k in keys}
+    if keys:
+        filtered_epochs = epochs[keys]
+        codes = np.fromiter(filtered_event_id.values(), dtype=int)
+        mask = np.isin(epochs.events[:, 2], codes)
+        ep_idx = np.flatnonzero(mask)  # indices into original epochs
+    else:
+        filtered_epochs = epochs.copy()[[]]
+        ep_idx = np.array([], dtype=int)
+    return filtered_epochs, filtered_event_id, ep_idx
+@public_api
+def filter_by_trial_types(
+    trial_types: Iterable[str] | str,
+    *,
+    events_df: Optional[pd.DataFrame] = None,
+    raw: Optional[mne.io.BaseRaw] = None,
+    epochs: Optional[mne.Epochs] = None,
+) -> tuple[
+    Optional[pd.DataFrame],
+    Optional[np.ndarray],     # filtered_events (from raw)
+    Optional[mne.Epochs],
+    Dict[str, int],
+    np.ndarray,               # filtered_event_idx (0..n-1)
+]:
+    tt = _ensure_list(trial_types)
+    filtered_df: Optional[pd.DataFrame] = None
+    filtered_events: Optional[np.ndarray] = None
+    filtered_epochs: Optional[mne.Epochs] = None
+    df_idx: Optional[np.ndarray] = None
+    raw_idx: Optional[np.ndarray] = None
+    ep_idx: Optional[np.ndarray] = None
+    event_id_raw: Optional[Dict[str, int]] = None
+    event_id_epochs: Optional[Dict[str, int]] = None
+    n_df = None
+    n_raw = None
+    n_ep = None
+    # ---- DF ----
+    if events_df is not None:
+        filtered_df, df_idx = filter_events_df_by_trial_types(events_df, tt)
+        n_df = len(filtered_df)
+    # ---- RAW ----
+    raw_onsets = None
+    if raw is not None:
+        filtered_events, event_id_raw, raw_idx = filter_raw_events_by_trial_types(raw, tt)
+        n_raw = int(filtered_events.shape[0])
+        raw_onsets = filtered_events[:, 0].astype(int)
+    # ---- EPOCHS ----
+    ep_onsets = None
+    if epochs is not None:
+        filtered_epochs, event_id_epochs, ep_idx = filter_epochs_by_trial_types(epochs, tt)
+        n_ep = len(filtered_epochs)
+        if event_id_epochs:
+            codes = np.fromiter(event_id_epochs.values(), dtype=int)
+            mask = np.isin(epochs.events[:, 2], codes)
+            ep_onsets = epochs.events[mask, 0].astype(int)
+        else:
+            ep_onsets = np.array([], dtype=int)
+    # ---- Check event_id consistency (keys) ----
+    if event_id_raw is not None and event_id_epochs is not None:
+        if set(event_id_raw.keys()) != set(event_id_epochs.keys()):
+            raise ValueError(
+                "filtered_event_id key mismatch between raw and epochs.\n"
+                f"raw keys={sorted(event_id_raw.keys())}\n"
+                f"epochs keys={sorted(event_id_epochs.keys())}"
+            )
+        # Strong alignment check: same event onsets
+        if raw_onsets is not None and ep_onsets is not None:
+            if raw_onsets.shape != ep_onsets.shape or not np.array_equal(raw_onsets, ep_onsets):
+                raise ValueError(
+                    "raw/epochs trial alignment mismatch: filtered event sample onsets differ.\n"
+                    f"n_raw={len(raw_onsets)} n_epochs={len(ep_onsets)}"
+                )
+        filtered_event_id = event_id_raw  # choose one
+    elif event_id_raw is not None:
+        filtered_event_id = event_id_raw
+    elif event_id_epochs is not None:
+        filtered_event_id = event_id_epochs
+    else:
+        filtered_event_id = {}
+    # ---- Trial count consistency (for whichever inputs are provided) ----
+    ns = [n for n in (n_df, n_raw, n_ep) if n is not None]
+    n = ns[0] if ns else 0
+    if any(x != n for x in ns):
+        raise ValueError(
+            "Trial count mismatch across provided inputs.\n"
+            f"n_df={n_df} n_raw={n_raw} n_epochs={n_ep}"
+        )
+    filtered_event_idx = np.arange(n, dtype=int)
+    return filtered_df, filtered_events, filtered_epochs, filtered_event_id, filtered_event_idx

bidsreader/helpers.py ADDED Viewed

@@ -0,0 +1,148 @@
+import numpy as np
+import pandas as pd
+from typing import Iterable, Any, Tuple, Sequence, Optional, Dict
+import re
+from .exc import InvalidOptionError
+def validate_option(name: str, value: Any, allowed: Iterable[Any]) -> Any:
+    if value is None:
+        return None
+    if value not in allowed:
+        raise InvalidOptionError(f"{name} must be one of: {allowed}. Got {value!r}")
+    return value
+def space_from_coordsystem_fname(fname: str) -> Optional[str]:
+    if "_space-" not in fname:
+        return None
+    return fname.split("_space-")[1].split("_coordsystem.json")[0]
+def add_prefix(value: Optional[str], prefix: str) -> Optional[str]:
+    if value is None:
+        return None
+    value = str(value)
+    if value.startswith(prefix):
+        return value
+    return f"{prefix}{value}"
+def merge_duplicate_sample_events(evs: pd.DataFrame, sample_col: str = "sample") -> pd.DataFrame:
+    df = evs.copy()
+    # Ensure stable ordering so "first" is well-defined.
+    df["_orig_order"] = np.arange(len(df))
+    def first_non_nan(s: pd.Series):
+        s2 = s.dropna()
+        return s2.iloc[0] if len(s2) else np.nan
+    def merge_series(s: pd.Series):
+        # General "take the first non-NaN; if only one non-NaN, that's what it is" behavior
+        return first_non_nan(s)
+    def merge_trial_type(s: pd.Series):
+        vals = [v for v in s.tolist() if pd.notna(v)]
+        # preserve order but avoid duplicates like A/A
+        uniq = []
+        for v in vals:
+            if v not in uniq:
+                uniq.append(v)
+        if not uniq:
+            return np.nan
+        return "/".join(map(str, uniq))
+    merged_rows = []
+    for sample_val, g in df.sort_values("_orig_order").groupby(sample_col, sort=False):
+        out = {}
+        for col in df.columns:
+            if col in ("_orig_order",):
+                continue
+            if col == "trial_type":
+                out[col] = merge_trial_type(g[col])
+            else:
+                out[col] = merge_series(g[col])
+        merged_rows.append(out)
+    out_df = pd.DataFrame(merged_rows)
+    # If you want to preserve original column order (minus helper col)
+    out_df = out_df[[c for c in evs.columns if c in out_df.columns]]
+    return out_df
+def find_coord_triplets(columns: Sequence[str]) -> Dict[str, Tuple[str, str, str]]:
+        cols = set(columns)
+        triplets = {}
+        if {"x", "y", "z"} <= cols:
+            triplets[""] = ("x", "y", "z")
+        prefixed = [c for c in cols if re.match(r"^.+\.(x|y|z)$", c)]
+        prefixes = set(c.rsplit(".", 1)[0] for c in prefixed)
+        for p in prefixes:
+            x, y, z = f"{p}.x", f"{p}.y", f"{p}.z"
+            if {x, y, z} <= cols:
+                triplets[p] = (x, y, z)
+        return triplets
+def combine_bipolar_electrodes(
+        pairs_df: pd.DataFrame,
+        elec_df: pd.DataFrame,
+        pair_col: str = "name",
+        elec_name_col: str = "name",
+        region_cols: Sequence[str] = ("wb.region", "ind.region", "stein.region"),
+    ) -> pd.DataFrame:
+    sep = "-"
+    out = pairs_df.copy()
+    # Split bipolar pair
+    ch = out[pair_col].astype(str).str.split(sep, n=1, expand=True)
+    out["ch1"] = ch[0].str.strip()
+    out["ch2"] = ch[1].str.strip()
+    # Detect all coordinate triplets present in electrodes df
+    coord_triplets = find_coord_triplets(elec_df.columns)
+    # Keep electrode name + region cols + all coordinate columns we found
+    coord_cols = [c for trip in coord_triplets.values() for c in trip]
+    keep_cols = [elec_name_col, *region_cols, *coord_cols]
+    keep_cols = [c for c in keep_cols if c in elec_df.columns]  # safety
+    look = elec_df[keep_cols].copy()
+    # Merge ch1 metadata
+    look1 = look.add_suffix("_ch1").rename(columns={f"{elec_name_col}_ch1": "ch1"})
+    out = out.merge(look1, on="ch1", how="left")
+    # Merge ch2 metadata
+    look2 = look.add_suffix("_ch2").rename(columns={f"{elec_name_col}_ch2": "ch2"})
+    out = out.merge(look2, on="ch2", how="left")
+    # Region agreement
+    for rc in region_cols:
+        if f"{rc}_ch1" in out.columns and f"{rc}_ch2" in out.columns:
+            a = out[f"{rc}_ch1"]
+            b = out[f"{rc}_ch2"]
+            out[f"{rc}_pair"] = np.where(a.notna() & (a == b), a, np.nan)
+    # Midpoints for every detected coordinate triplet
+    for prefix, (xcol, ycol, zcol) in coord_triplets.items():
+        for col in (xcol, ycol, zcol):
+            a = out[f"{col}_ch1"]
+            b = out[f"{col}_ch2"]
+            mid_name = f"{col}_mid"  # e.g., "x_mid" or "tal.x_mid"
+            out[mid_name] = np.where(a.notna() & b.notna(), (a + b) / 2.0, np.nan)
+    return out
+def normalize_trial_types(trial_types: Iterable[str]) -> set[str]:
+    return {str(t) for t in trial_types}
+def match_event_label(label: str, trial_types: list[str]) -> bool:
+    # exact token match within merged labels like "WORD/STIM"
+    tokens = label.split("/")
+    return any(t in tokens for t in trial_types)

bidsreader/units.py ADDED Viewed

@@ -0,0 +1,287 @@
+from __future__ import annotations
+import mne
+import numpy as np
+from typing import Optional, Union, TYPE_CHECKING
+from ._errorwrap import public_api
+if TYPE_CHECKING:
+    from ptsa.data.timeseries import TimeSeries
+# ---------- unit constants ----------
+_EXP_TO_PREFIX = {
+    15: "P",   14: "e14_",  13: "e13_",  12: "T",   11: "e11_",  10: "e10_",
+    9: "G",    8: "e8_",    7: "e7_",    6: "M",    5: "e5_",    4: "e4_",
+    3: "k",    2: "h",      1: "da",
+    0: "",
+    -1: "d",   -2: "c",
+    -3: "m",   -4: "e-4_",  -5: "e-5_",  -6: "u",   -7: "e-7_",  -8: "e-8_",
+    -9: "n",   -10: "e-10_", -11: "e-11_", -12: "p", -13: "e-13_", -14: "e-14_",
+    -15: "f",
+}
+_UNIT_EXPONENTS = {
+    # Volts
+    "PV": 15, "TV": 12, "GV": 9, "MV": 6, "kV": 3,
+    "hV": 2, "daV": 1,
+    "V": 0,
+    "dV": -1, "cV": -2,
+    "mV": -3, "uV": -6, "nV": -9, "pV": -12, "fV": -15,
+    # Tesla
+    "PT": 15, "TT": 12, "GT": 9, "MT": 6, "kT": 3,
+    "hT": 2, "daT": 1,
+    "T": 0,
+    "dT": -1, "cT": -2,
+    "mT": -3, "uT": -6, "nT": -9, "pT": -12, "fT": -15,
+}
+_FIFF_UNIT_TO_BASE = {107: "V", 201: "T", 0: None}
+# ---------- internal helpers ----------
+def _normalize_unit(unit: str) -> str:
+    return (
+        unit
+        .replace("\u00b5", "u")   # micro sign
+        .replace("\u03bc", "u")   # greek mu
+        .replace("\u03a9", "Ohm") # greek omega
+        .replace("\u2126", "Ohm") # ohm sign
+        .replace("\u00b0", "deg") # degree sign
+    )
+def _detect_unit_mne(inst: Union[mne.io.BaseRaw, mne.Epochs]) -> str:
+    """Detect unit string from an MNE Raw or Epochs object."""
+    eeg_types = {"eeg", "seeg", "ecog", "ieeg", "dbs"}
+    for ch_info in inst.info["chs"]:
+        ch_kind = mne.channel_type(
+            inst.info, inst.ch_names.index(ch_info["ch_name"]),
+        )
+        if ch_kind not in eeg_types:
+            continue
+        fiff_unit = ch_info.get("unit", 0)
+        fiff_mul = ch_info.get("unit_mul", 0)
+        base = _FIFF_UNIT_TO_BASE.get(fiff_unit)
+        if base is None:
+            raise ValueError(
+                f"Unknown FIFF unit code {fiff_unit} on channel "
+                f"'{ch_info['ch_name']}'. Pass current_unit= explicitly."
+            )
+        exp = fiff_mul
+        prefix = _EXP_TO_PREFIX.get(exp, "")
+        return f"{prefix}{base}"
+    raise ValueError(
+        "No EEG/iEEG/SEEG/ECoG channel found. Cannot detect unit."
+    )
+def _detect_unit_ptsa(ts: TimeSeries) -> str:
+    """Detect unit string from a PTSA TimeSeries."""
+    for key in ("units", "unit"):
+        val = ts.attrs.get(key)
+        if val is not None and str(val).strip():
+            unit_str = _normalize_unit(str(val).strip())
+            if unit_str in _UNIT_EXPONENTS:
+                return unit_str
+            raise ValueError(
+                f"TimeSeries has unit '{val}' which is not recognized. "
+                f"Known: {sorted(_UNIT_EXPONENTS.keys())}"
+            )
+    raise ValueError(
+        "TimeSeries has no 'units' or 'unit' attribute. "
+        "Pass current_unit= explicitly."
+    )
+def _convert_mne(
+    inst: Union[mne.io.BaseRaw, mne.Epochs],
+    factor: float,
+    target_unit: str,
+    copy: bool,
+) -> Union[mne.io.BaseRaw, mne.Epochs]:
+    """Scale MNE data and update FIFF unit metadata."""
+    if copy:
+        inst = inst.copy()
+    inst.apply_function(lambda x: x * factor, picks="all", channel_wise=False)
+    base_char = target_unit[-1]
+    target_exp = _UNIT_EXPONENTS[target_unit]
+    fiff_unit_code = {"V": 107, "T": 201}.get(base_char, 0)
+    fiff_mul = max(-15, min(15, target_exp))
+    eeg_kinds = {2, 302, 802, 803}
+    for ch in inst.info["chs"]:
+        if ch.get("kind", 0) in eeg_kinds or ch.get("unit", 0) in (107, 201):
+            ch["unit"] = fiff_unit_code
+            ch["unit_mul"] = fiff_mul
+    return inst
+def _convert_ptsa(
+    ts: TimeSeries,
+    factor: float,
+    target_unit: str,
+    copy: bool,
+) -> TimeSeries:
+    """Scale PTSA TimeSeries data and update attrs."""
+    if copy:
+        result = ts * factor
+    else:
+        ts.values[:] *= factor
+        result = ts
+    result.attrs["units"] = target_unit
+    result.attrs["unit"] = target_unit
+    return result
+def _is_timeseries(obj) -> bool:
+    """Check if obj is a PTSA TimeSeries without requiring PTSA at import time."""
+    try:
+        from ptsa.data.timeseries import TimeSeries
+        return isinstance(obj, TimeSeries)
+    except ImportError:
+        return False
+# ---------- public API ----------
+@public_api
+def detect_unit(
+    data: Union[mne.io.BaseRaw, mne.Epochs, TimeSeries],
+    current_unit: Optional[str] = None,
+) -> str:
+    """Detect or validate the unit of EEG data.
+    Parameters
+    ----------
+    data : mne.io.BaseRaw, mne.Epochs, or PTSA TimeSeries
+        The data object to inspect.
+    current_unit : str, optional
+        If provided, overrides auto-detection. Validated against
+        known units and returned directly.
+    Returns
+    -------
+    str
+        Unit string like "V", "mV", "uV", "nV", "T", etc.
+    Raises
+    ------
+    ValueError
+        If unit cannot be detected and current_unit is not provided.
+    """
+    if current_unit is not None:
+        normalized = _normalize_unit(current_unit)
+        if normalized not in _UNIT_EXPONENTS:
+            raise ValueError(
+                f"Unknown unit '{current_unit}'. "
+                f"Known: {sorted(_UNIT_EXPONENTS.keys())}"
+            )
+        return normalized
+    if isinstance(data, (mne.io.BaseRaw, mne.Epochs)):
+        return _detect_unit_mne(data)
+    if _is_timeseries(data):
+        return _detect_unit_ptsa(data)
+    raise TypeError(
+        f"Cannot detect unit from {type(data).__name__}. "
+        f"Expected mne.io.BaseRaw, mne.Epochs, or TimeSeries."
+    )
+@public_api
+def get_scale_factor(from_unit: str, to_unit: str) -> float:
+    """Compute multiplicative factor to convert between units.
+    Parameters
+    ----------
+    from_unit : str
+        Current unit (e.g. "V").
+    to_unit : str
+        Target unit (e.g. "uV").
+    Returns
+    -------
+    float
+        Multiply data by this value to convert.
+    Examples
+    --------
+    >>> get_scale_factor("V", "uV")
+    1000000.0
+    >>> get_scale_factor("uV", "V")
+    1e-06
+    """
+    from_u = _normalize_unit(from_unit)
+    to_u = _normalize_unit(to_unit)
+    if from_u not in _UNIT_EXPONENTS:
+        raise ValueError(f"Unknown source unit '{from_unit}'")
+    if to_u not in _UNIT_EXPONENTS:
+        raise ValueError(f"Unknown target unit '{to_unit}'")
+    from_base = from_u[-1]
+    to_base = to_u[-1]
+    if from_base != to_base:
+        raise ValueError(
+            f"Cannot convert between different base units: "
+            f"'{from_unit}' ({from_base}) -> '{to_unit}' ({to_base})"
+        )
+    from_exp = _UNIT_EXPONENTS[from_u]
+    to_exp = _UNIT_EXPONENTS[to_u]
+    return 10.0 ** (from_exp - to_exp)
+@public_api
+def convert_unit(
+    data: Union[mne.io.BaseRaw, mne.Epochs, TimeSeries],
+    target: str,
+    *,
+    current_unit: Optional[str] = None,
+    copy: bool = True,
+) -> Union[mne.io.BaseRaw, mne.Epochs, TimeSeries]:
+    """Convert EEG data to a target unit.
+    Parameters
+    ----------
+    data : mne.io.BaseRaw, mne.Epochs, or PTSA TimeSeries
+        The data to convert.
+    target : str
+        Target unit string (e.g. "uV", "mV", "V").
+    current_unit : str, optional
+        Override auto-detection of the current unit. Required if
+        the data object doesn't store unit metadata.
+    copy : bool
+        If True (default), return a copy. If False, modify in place.
+    Returns
+    -------
+    Same type as input, with data scaled to the target unit.
+    """
+    detected = detect_unit(data, current_unit=current_unit)
+    target_normalized = _normalize_unit(target)
+    factor = get_scale_factor(detected, target_normalized)
+    if factor == 1.0:
+        return data.copy() if copy else data
+    if isinstance(data, (mne.io.BaseRaw, mne.Epochs)):
+        return _convert_mne(data, factor, target_normalized, copy)
+    if _is_timeseries(data):
+        return _convert_ptsa(data, factor, target_normalized, copy)
+    raise TypeError(f"Cannot convert type {type(data).__name__}")