PyPI - bidsreader - Versions diffs - 0.2.0__tar.gz → 0.3.0__tar.gz - Mend

bidsreader 0.2.0tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{bidsreader-0.2.0 → bidsreader-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bidsreader
-Version: 0.2.0
+Version: 0.3.0
 Summary: Data loader and file reader for the OpenBIDS format
 Author: Computational Memory Lab
 License-Expression: MIT
@@ -19,8 +19,8 @@ Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: mne>=1.0
-Requires-Dist: mne-bids>=0.14
-Requires-Dist: numpy>=1.23
+Requires-Dist: mne-bids>=0.12
+Requires-Dist: numpy<2.0,>=1.23
 Requires-Dist: pandas>=1.5
 Provides-Extra: ptsa
 Requires-Dist: ptsa; extra == "ptsa"

{bidsreader-0.2.0 → bidsreader-0.3.0}/bidsreader/__init__.py RENAMED Viewed

@@ -8,9 +8,8 @@ from .filtering import (
 )
 from .convert import mne_epochs_to_ptsa, mne_raw_to_ptsa
 from .units import detect_unit, get_scale_factor, convert_unit
-from .public_helpers import get_data_index
 from collections import namedtuple
-__version__ = "0.2.0"
+__version__ = "0.3.0"
 version_info = namedtuple("VersionInfo", "major,minor,patch")(
     *__version__.split('.'))

bidsreader-0.3.0/bidsreader/_neurorad_algo.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Pair-location algorithms copied verbatim from the CML neurorad pipeline.
+Source of truth:
+    github.com/pennmem/neurorad_pipeline
+    File: localization.py :: Localization.get_pair_coordinate
+    Pinned revision (on rhino2): cf563def3087126b1aedb1dea274172200f3fa75
+    Path checked: /home2/iped/neurorad_pipeline/localization.py
+We copy rather than import to keep neurorad_pipeline off the runtime
+dependency list. A test in tests/test_pair_midpoint_parity.py imports
+neurorad_pipeline as a dev-only dependency and pins this copy against
+the upstream implementation.
+When the upstream algorithm changes, update this file AND the pinned
+revision in the parity test's docstring in the same commit.
+"""
+from __future__ import annotations
+from typing import Dict, Tuple
+import numpy as np
+# Mirrors Localization.VALID_COORDINATE_SPACES (localization.py:13-19).
+VALID_COORDINATE_SPACES: Tuple[str, ...] = (
+    "ct_voxel",
+    "fs",
+    "t1_mri",
+    "t2_mri",
+    "mni",
+)
+# Mirrors Localization.VALID_COORDINATE_TYPES (localization.py:21-24).
+VALID_COORDINATE_TYPES: Tuple[str, ...] = (
+    "raw",
+    "corrected",
+)
+# Mirrors Localization.VALID_ATLASES (localization.py:35-39).
+VALID_ATLASES: Tuple[str, ...] = (
+    "dk",
+    "whole_brain",
+    "mtl",
+)
+# BIDS space label -> (neurorad coordinate_space, coordinate_type).
+# Synthesized from event_creation/submission/neurorad_tasks.py
+# FIELD_NAMES_TABLE (lines 190-198) and the bids-convert CML_TO_BIDS_SPACE
+# map. Spaces not present here (Talairach, t1MRI, fsnativeDural) are
+# valid midpoint targets but have no neurorad atlas lookup.
+BIDS_SPACE_TO_NEURORAD: Dict[str, Tuple[str, str]] = {
+    "fsnative":            ("fs",        "raw"),
+    "fsnativeBrainshift":  ("fs",        "corrected"),
+    "fsaverage":           ("fsaverage", "raw"),
+    "fsaverageBrainshift": ("fsaverage", "corrected"),
+    "MNI152NLin6ASym":     ("mni",       "raw"),
+    "Pixels":              ("ct_voxel",  "raw"),
+}
+def pair_coordinate(coord1, coord2):
+    """Midpoint of two contact coordinates.
+    Verbatim behavior of
+    neurorad_pipeline.localization.Localization.get_pair_coordinate
+    (localization.py:274-289), stripped of its Localization-object
+    plumbing so callers can pass raw arrays.
+    """
+    if coord1 is None or coord2 is None:
+        return None
+    return (np.array(coord1) + np.array(coord2)) / 2
+def pair_coordinate_axis(a, b):
+    """Vectorized midpoint for one coordinate axis across N pairs.
+    NaN on either side propagates, matching the None-returning behavior
+    of scalar pair_coordinate. Intended for pandas/numpy column math.
+    """
+    return (np.asarray(a, dtype=float) + np.asarray(b, dtype=float)) / 2.0

{bidsreader-0.2.0 → bidsreader-0.3.0}/bidsreader/basereader.py RENAMED Viewed

@@ -1,3 +1,5 @@
+import re
 import pandas as pd
 from mne_bids import BIDSPath, get_entity_vals
 from pathlib import Path
@@ -206,3 +208,48 @@ class BaseReader:
                     max_ses = si if max_ses is None else max(max_ses, si)
         return max_ses
+    # ---------- data index ----------
+    def get_data_index(self, root: Union[str, Path] = None, task: str = None) -> pd.DataFrame:
+        """Scan a BIDS root for sessions matching *task* and return a
+        DataFrame with ``subject``, ``task``, ``session`` columns.
+        Parameters default to ``self.root`` and ``self.task`` when called
+        on an instance, but can be overridden with explicit values::
+            # instance — uses reader's own root / task
+            reader = BaseReader(root="/data/BIDS", task="FR1")
+            df = reader.get_data_index()
+            # explicit — useful without a fully configured reader
+            df = reader.get_data_index(root="/other/root", task="catFR1")
+        Subclasses can override to add file-path columns for each
+        expected BIDS output (see ``CMLBIDSReader.get_data_index``).
+        """
+        root = Path(root) if root is not None else self.root
+        task = task if task is not None else self.task
+        if root is None:
+            raise ValueError("root must be provided either on the instance or as an argument")
+        if task is None:
+            raise ValueError("task must be provided either on the instance or as an argument")
+        pat = re.compile(
+            r"sub-(?P<sub>[^_]+)_ses-(?P<ses>[^_]+)_task-" + re.escape(task),
+            re.IGNORECASE,
+        )
+        rows = []
+        for f in root.rglob(f"*task-{task}*beh.tsv"):
+            m = pat.search(f.name)
+            if m:
+                rows.append({
+                    "subject": m.group("sub"),
+                    "task": task,
+                    "session": m.group("ses"),
+                })
+        return (
+            pd.DataFrame(rows)
+            .drop_duplicates()
+            .sort_values(["subject", "session"])
+            .reset_index(drop=True)
+        )

{bidsreader-0.2.0 → bidsreader-0.3.0}/bidsreader/cmlbidsreader.py RENAMED Viewed

@@ -54,14 +54,32 @@ class CMLBIDSReader(BaseReader):
             return "ieeg"
         return None
-    def _determine_space(self) -> str:
+    DEFAULT_SPACE = "MNI152NLin6ASym"
+    SPACE_PREFERENCE = ("MNI152NLin6ASym", "Talairach", "fsaverage", "Pixels", "fsaverageBrainshift", "fsnative", "fsnativeBrainshift", "fsnativeDural", "t1MRI")
+    def _coordsystem_dir(self) -> Path:
         subject_root = self._subject_root()
-        data_dir = subject_root / self._add_bids_prefix("session", self.session) / self.device
+        return subject_root / self._add_bids_prefix("session", self.session) / self.device
+    def list_available_spaces(self) -> list:
+        """Return the sorted list of BIDS space names present as
+        *_coordsystem.json files for this session."""
+        data_dir = self._coordsystem_dir()
+        if not data_dir.exists():
+            return []
+        spaces = []
+        for m in data_dir.glob("*_coordsystem.json"):
+            space = space_from_coordsystem_fname(m.name)
+            if space is not None:
+                spaces.append(space)
+        return sorted(set(spaces))
+    def _determine_space(self) -> str:
+        data_dir = self._coordsystem_dir()
         if not data_dir.exists():
             raise FileNotFoundBIDSError(
                 f"determine_space: data directory does not exist.\n"
-                f"subject_root={subject_root}\n"
                 f"data_dir={data_dir}"
             )
@@ -72,22 +90,38 @@ class CMLBIDSReader(BaseReader):
                 f"data_dir={data_dir}"
             )
-        if len(matches) > 1:
-            raise AmbiguousMatchError(
-                f"determine_space: multiple coordsystem files found.\n"
-                f"files={[m.name for m in matches]}"
-            )
-        fname = matches[0].name
-        space = space_from_coordsystem_fname(fname)
-        if space is None:
-            raise DataParseError(
-                f"determine_space: could not parse space from filename.\n"
-                f"filename={fname}"
-            )
-        return space
+        spaces = []
+        for m in matches:
+            space = space_from_coordsystem_fname(m.name)
+            if space is None:
+                raise DataParseError(
+                    f"determine_space: could not parse space from filename.\n"
+                    f"filename={m.name}"
+                )
+            spaces.append(space)
+        spaces = sorted(set(spaces))
+        if len(spaces) == 1:
+            return spaces[0]
+        # Multiple spaces present: pick the highest-priority preferred space
+        # that is available.
+        for preferred in self.SPACE_PREFERENCE:
+            if preferred in spaces:
+                return preferred
+        # None of the preferred defaults is present. Rather than make the
+        # caller pass space= explicitly, fall back to a default: DEFAULT_SPACE
+        # if it happens to be available, otherwise the first space (sorted).
+        chosen = self.DEFAULT_SPACE if self.DEFAULT_SPACE in spaces else spaces[0]
+        warnings.warn(
+            f"determine_space: multiple spaces found and none of the "
+            f"preferred defaults {self.SPACE_PREFERENCE} present. "
+            f"Available spaces: {spaces}. Defaulting to {chosen!r}. "
+            f"Pass space=<one of these> to choose explicitly.",
+            RuntimeWarning,
+        )
+        return chosen
     def _validate_acq(self, acquisition: Optional[str]) -> Optional[str]:
         if not self.is_intracranial():
@@ -99,10 +133,10 @@ class CMLBIDSReader(BaseReader):
     def _get_needed_fields(self):
         return self.INTRACRANIAL_FIELDS if self.is_intracranial() else self.SCALP_FIELDS
-    def _attach_bipolar_midpoint_montage(self, raw: mne.io.BaseRaw) -> None:
+    def _attach_bipolar_midpoint_montage(self, raw: mne.io.BaseRaw, space: Optional[str] = None) -> None:
         pairs_df = self.load_channels("bipolar")
-        elec_df = self.load_electrodes()
-        combo = combine_bipolar_electrodes(pairs_df, elec_df)
+        elec_df = self.load_electrodes(space=space)
+        combo = combine_bipolar_electrodes(pairs_df, elec_df, space=self.space)
         if not {"name", "x_mid", "y_mid", "z_mid"}.issubset(combo.columns):
             return
@@ -145,13 +179,25 @@ class CMLBIDSReader(BaseReader):
         return pd.read_csv(matches[0].fpath, sep="\t")
+    def _space_file(self, space: str, suffix: str, extension: str) -> Path:
+        """Build a space-<label>_<suffix>.<ext> path manually, bypassing
+        mne_bids.BIDSPath validation (which rejects non-standard space
+        labels like 'fsnative', 'fsnativeDural', 'fsaverageBrainshift')."""
+        data_dir = self._coordsystem_dir()
+        task_part = f"_task-{self.task}" if self.is_intracranial() and self.task else ""
+        fname = (
+            f"sub-{self.subject}_"
+            f"ses-{self.session}"
+            f"{task_part}"
+            f"_space-{space}_{suffix}{extension}"
+        )
+        return data_dir / fname
     @public_api
-    def load_electrodes(self) -> pd.DataFrame:
+    def load_electrodes(self, space: Optional[str] = None) -> pd.DataFrame:
         self._require(self._get_needed_fields(), context="load_electrodes")
-        _task = self.task if self.is_intracranial() else None
-        bp = self._bp(datatype=self.device, suffix="electrodes", space=self.space, task=_task, extension=".tsv")
-        return pd.read_csv(bp.fpath, sep="\t")
+        _space = space if space is not None else self.space
+        return pd.read_csv(self._space_file(_space, "electrodes", ".tsv"), sep="\t")
     @public_api
     def load_channels(self, acquisition: Optional[str] = None) -> pd.DataFrame:
@@ -162,28 +208,25 @@ class CMLBIDSReader(BaseReader):
         return pd.read_csv(bp.fpath, sep="\t")
     @public_api
-    def load_combined_channels(self, acquisition: Optional[str] = None) -> pd.DataFrame:
+    def load_combined_channels(self, acquisition: Optional[str] = None, space: Optional[str] = None) -> pd.DataFrame:
         self._require(self._get_needed_fields(), context="load_combined_channels")
         channel_df = self.load_channels(acquisition)
-        elec_df = self.load_electrodes()
+        elec_df = self.load_electrodes(space=space)
         if acquisition == "monopolar" or acquisition is None:
             return channel_df.merge(elec_df, on="name", how="left", suffixes=("", "_elec"))
         if acquisition == "bipolar":
-            return combine_bipolar_electrodes(channel_df, elec_df)
+            return combine_bipolar_electrodes(channel_df, elec_df, space=self.space)
     @public_api
-    def load_coordsystem_desc(self) -> Dict:
+    def load_coordsystem_desc(self, space: Optional[str] = None) -> Dict:
         self._require(self._get_needed_fields(), context="load_coordsystem")
-        _task = self.task if self.is_intracranial() else None
-        bp = self._bp(datatype=self.device, suffix="coordsystem", space=self.space, task=_task, extension=".json")
-        with open(bp.fpath, "r") as f:
+        _space = space if space is not None else self.space
+        with open(self._space_file(_space, "coordsystem", ".json"), "r") as f:
             return json.load(f)
     @public_api
-    def load_raw(self, acquisition: Optional[str] = None) -> mne.io.BaseRaw:
+    def load_raw(self, acquisition: Optional[str] = None, extension: Optional[str] = None) -> mne.io.BaseRaw:
         self._require(self._get_needed_fields(), context="load_raw")
         acq = self._validate_acq(acquisition)
@@ -193,6 +236,15 @@ class CMLBIDSReader(BaseReader):
             bp_kwargs["acquisition"] = acq
         bp = self._bp(**bp_kwargs)
+        if extension is None:
+            for ext in (".bdf", ".edf", ".vhdr", ".set", ".nwb", ".fif"):
+                candidate = bp.copy().update(suffix=self.device, extension=ext).fpath
+                if candidate.exists():
+                    bp = bp.copy().update(suffix=self.device, extension=ext)
+                    break
+        else :
+            bp = bp.copy().update(suffix=self.device, extension=extension)
         with warnings.catch_warnings():
             warnings.filterwarnings(
                 "ignore",
@@ -204,6 +256,16 @@ class CMLBIDSReader(BaseReader):
                 message=r".*is not an MNE-Python coordinate frame.*",
                 category=RuntimeWarning,
             )
+            warnings.filterwarnings(
+                "ignore",
+                message=r"Expected to find a single (electrodes\.tsv|coordsystem\.json) file.*",
+                category=RuntimeWarning,
+            )
+            warnings.filterwarnings(
+                "ignore",
+                message=r"participants\.tsv file not found.*",
+                category=RuntimeWarning,
+            )
             raw = read_raw_bids(bp)
         if self.is_intracranial() and acq == "bipolar":
@@ -222,9 +284,10 @@ class CMLBIDSReader(BaseReader):
         event_repeated: str = "merge",
         channels: Optional[Iterable[str]] = None,
         preload: bool = False,
+        extension: Optional[str] = None
     ) -> mne.Epochs:
         self._require(self._get_needed_fields(), context="load_epochs")
-        raw = self.load_raw(acquisition=acquisition)
+        raw = self.load_raw(acquisition=acquisition, extension=extension)
         all_events_raw, all_event_id = mne.events_from_annotations(raw)
@@ -267,3 +330,69 @@ class CMLBIDSReader(BaseReader):
             event_repeated=event_repeated,
             picks=picks,
         )
+    # ---------- data index ----------
+    def get_data_index(self, root: Union[str, Path] = None, task: str = None) -> pd.DataFrame:
+        """Scan a BIDS root and return a session-level DataFrame with
+        file-path columns for each major BIDS output.
+        Parameters default to ``self.root`` and ``self.task`` when called
+        on an instance, but can be overridden with explicit values.
+        Extends ``BaseReader.get_data_index`` (subject / task / session)
+        with columns whose values are the file path if the file exists,
+        or ``None`` if it doesn't:
+        - ``beh`` — behavioral events TSV
+        - ``eeg`` — scalp EEG recording (edf/bdf/set/vhdr)
+        - ``mono_ieeg`` — monopolar iEEG recording (edf/bdf)
+        - ``bi_ieeg`` — bipolar iEEG recording (edf/bdf)
+        - ``mono_channels`` — monopolar channels TSV
+        - ``bi_channels`` — bipolar channels TSV
+        - ``eeg_events`` / ``ieeg_events`` — device-level events TSV
+        - ``electrodes`` — electrodes TSV
+        - ``coordsystem`` — coordinate system JSON
+        """
+        root = root if root is not None else self.root
+        task = task if task is not None else self.task
+        df = super().get_data_index(root, task)
+        if df.empty:
+            return df
+        root = Path(root)
+        # File patterns to search for. Each entry is (column_name, subdir,
+        # glob_pattern). Patterns use {pfx} for the BIDS filename prefix.
+        _PATTERNS = [
+            ("beh",            "beh",  "{pfx}_beh.tsv"),
+            ("eeg",            "eeg",  "{pfx}_eeg.*"),
+            ("mono_ieeg",      "ieeg", "{pfx}_acq-monopolar_ieeg.*"),
+            ("bi_ieeg",        "ieeg", "{pfx}_acq-bipolar_ieeg.*"),
+            ("mono_channels",  "ieeg", "{pfx}_acq-monopolar_channels.tsv"),
+            ("bi_channels",    "ieeg", "{pfx}_acq-bipolar_channels.tsv"),
+            ("eeg_channels",   "eeg",  "{pfx}_channels.tsv"),
+            ("ieeg_events",    "ieeg", "{pfx}_events.tsv"),
+            ("eeg_events",     "eeg",  "{pfx}_events.tsv"),
+            ("electrodes",     "ieeg", "{pfx}_*electrodes.tsv"),
+            ("coordsystem",    "ieeg", "{pfx}_*coordsystem.json"),
+        ]
+        # Data-file extensions to accept (skip sidecars).
+        _DATA_EXTS = {".edf", ".bdf", ".set", ".vhdr", ".fif", ".nwb", ".mff"}
+        new_cols = {col: [None] * len(df) for col, _, _ in _PATTERNS}
+        for idx, row in df.iterrows():
+            sub, ses = row["subject"], row["session"]
+            pfx = f"sub-{sub}_ses-{ses}_task-{task}"
+            sess_dir = root / f"sub-{sub}" / f"ses-{ses}"
+            for col, subdir, pat in _PATTERNS:
+                glob_pat = pat.format(pfx=pfx)
+                matches = list((sess_dir / subdir).glob(glob_pat))
+                # For recording files (eeg/ieeg), keep only data files.
+                if col in ("eeg", "mono_ieeg", "bi_ieeg"):
+                    matches = [m for m in matches if m.suffix in _DATA_EXTS]
+                if matches:
+                    new_cols[col][idx] = str(matches[0])
+        for col, _, _ in _PATTERNS:
+            df[col] = new_cols[col]
+        return df

{bidsreader-0.2.0 → bidsreader-0.3.0}/bidsreader/helpers.py RENAMED Viewed

@@ -1,8 +1,27 @@
+"""Utility helpers for bidsreader.
+Pair-coordinate math (``combine_bipolar_electrodes``) uses
+``pair_coordinate_axis`` from :mod:`._neurorad_algo`, which is a
+verbatim copy of the CML neurorad pipeline's
+``Localization.get_pair_coordinate``. The rule is the same in every
+BIDS coordinate space we support — ``MNI152NLin6ASym``, ``Talairach``,
+``fsaverage``, ``fsaverageBrainshift``, ``fsnative``,
+``fsnativeBrainshift``, ``fsnativeDural``, ``t1MRI``, ``Pixels`` —
+because those per-space contact coordinates have already been
+produced by the pipeline's space-specific transforms before BIDS
+export.
+For the BIDS space ↔ CML ``coordinate_space`` / ``coordinate_type``
+mapping see ``eeg_validation.preparers.montage.CML_TO_BIDS_SPACE`` or
+:attr:`bidsreader._neurorad_algo.BIDS_SPACE_TO_NEURORAD`.
+"""
 import numpy as np
 import pandas as pd
 from typing import Iterable, Any, Tuple, Sequence, Optional, Dict
 import re
 from .exc import InvalidOptionError
+from ._neurorad_algo import pair_coordinate_axis
 def validate_option(name: str, value: Any, allowed: Iterable[Any]) -> Any:
     if value is None:
@@ -95,7 +114,23 @@ def combine_bipolar_electrodes(
         pair_col: str = "name",
         elec_name_col: str = "name",
         region_cols: Sequence[str] = ("wb.region", "ind.region", "stein.region"),
+        space: Optional[str] = None,
     ) -> pd.DataFrame:
+    """Join bipolar pairs with electrode metadata and compute per-pair
+    coordinate midpoints, matching the neurorad pipeline's pair-location
+    algorithm (``Localization.get_pair_coordinate`` — see
+    :mod:`bidsreader._neurorad_algo`). Midpoint is taken in whatever BIDS
+    space ``elec_df`` carries; upstream brainshift / nonlinear-warp
+    corrections are expected to have already been applied per-contact.
+    Region columns from ``region_cols`` are carried through per-contact
+    (as ``{col}_ch1`` / ``{col}_ch2``) but no ``{col}_pair`` column is
+    synthesized. Neurorad assigns pair-region labels by independently
+    looking up the atlas at each pair's midpoint voxel, which cannot be
+    reproduced from contact-level agreement. Callers that need
+    pair-region labels should pull them from the upstream
+    ``pairs.json`` via ``enrich_pairs_with_cml_regions``.
+    """
     sep = "-"
     out = pairs_df.copy()
@@ -122,20 +157,18 @@ def combine_bipolar_electrodes(
     look2 = look.add_suffix("_ch2").rename(columns={f"{elec_name_col}_ch2": "ch2"})
     out = out.merge(look2, on="ch2", how="left")
-    # Region agreement
-    for rc in region_cols:
-        if f"{rc}_ch1" in out.columns and f"{rc}_ch2" in out.columns:
-            a = out[f"{rc}_ch1"]
-            b = out[f"{rc}_ch2"]
-            out[f"{rc}_pair"] = np.where(a.notna() & (a == b), a, np.nan)
-    # Midpoints for every detected coordinate triplet
+    # Midpoints for every detected coordinate triplet. pair_coordinate_axis
+    # is a verbatim copy of neurorad_pipeline.localization.Localization
+    # .get_pair_coordinate reduced to a single axis.
     for prefix, (xcol, ycol, zcol) in coord_triplets.items():
         for col in (xcol, ycol, zcol):
             a = out[f"{col}_ch1"]
             b = out[f"{col}_ch2"]
             mid_name = f"{col}_mid"  # e.g., "x_mid" or "tal.x_mid"
-            out[mid_name] = np.where(a.notna() & b.notna(), (a + b) / 2.0, np.nan)
+            mid = pair_coordinate_axis(a, b)
+            if space == "Pixels":
+                mid = np.floor(mid)
+            out[mid_name] = np.where(a.notna() & b.notna(), mid, np.nan)
     return out

bidsreader-0.3.0/bidsreader/region_enrichment.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""Opt-in enrichment that attaches pair-level region labels from
+the upstream CML ``pairs.json`` to a bidsreader pairs DataFrame.
+Pure-BIDS users don't need this module. It exists only for the hybrid
+case where a subject's BIDS export sits alongside the original
+neurorad-pipeline output on rhino and the caller wants the pair-region
+labels that neurorad computed (via independent atlas lookup at each
+pair's midpoint voxel). The BIDS side can't reproduce those labels
+from contact-level data, so we pull them from the upstream artifact.
+See :mod:`._neurorad_algo` for why pair region labels differ from the
+contact-level agreement heuristic.
+"""
+from __future__ import annotations
+from typing import Iterable, Optional
+import pandas as pd
+_DEFAULT_REGION_COLS: tuple[str, ...] = (
+    "ind.region",
+    "ind.corrected.region",
+    "avg.region",
+    "avg.corrected.region",
+    "mni.region",
+    "hcp.region",
+    "stein.region",
+    "das.region",
+)
+def enrich_pairs_with_cml_regions(
+    pairs_df: pd.DataFrame,
+    *,
+    subject: Optional[str] = None,
+    experiment: Optional[str] = None,
+    session: Optional[int] = None,
+    localization: Optional[int] = 0,
+    montage: Optional[int] = 0,
+    reader=None,
+    label_col: str = "name",
+    region_cols: Optional[Iterable[str]] = None,
+) -> pd.DataFrame:
+    """Return a copy of ``pairs_df`` with CML ``*.region`` columns joined on.
+    ``pairs_df`` is the output of
+    :func:`bidsreader.helpers.combine_bipolar_electrodes`, which uses
+    ``name`` as the pair-label column by default.
+    """
+    try:
+        from cmlreaders import CMLReader
+    except ImportError as exc:
+        raise ImportError(
+            "enrich_pairs_with_cml_regions requires cmlreaders. Install "
+            "it (pip install cmlreaders) or load pairs.json manually."
+        ) from exc
+    if reader is None:
+        if subject is None or experiment is None or session is None:
+            raise ValueError(
+                "Provide either a ready CMLReader or "
+                "(subject, experiment, session)."
+            )
+        reader = CMLReader(
+            subject=subject,
+            experiment=experiment,
+            session=session,
+            localization=localization,
+            montage=montage,
+        )
+    cml_pairs = reader.load("pairs")
+    wanted = tuple(region_cols) if region_cols is not None else _DEFAULT_REGION_COLS
+    available = [c for c in wanted if c in cml_pairs.columns]
+    if "label" not in cml_pairs.columns:
+        raise KeyError(
+            "cml_pairs is missing the 'label' column — can't join on pair "
+            "name. Got columns: %r" % (list(cml_pairs.columns),)
+        )
+    right = cml_pairs[["label", *available]].copy()
+    right["label"] = right["label"].astype("string").str.strip()
+    if label_col != "label":
+        right = right.rename(columns={"label": label_col})
+    out = pairs_df.copy()
+    out[label_col] = out[label_col].astype("string").str.strip()
+    return out.merge(right, how="left", on=label_col)

{bidsreader-0.2.0 → bidsreader-0.3.0}/bidsreader.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bidsreader
-Version: 0.2.0
+Version: 0.3.0
 Summary: Data loader and file reader for the OpenBIDS format
 Author: Computational Memory Lab
 License-Expression: MIT
@@ -19,8 +19,8 @@ Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: mne>=1.0
-Requires-Dist: mne-bids>=0.14
-Requires-Dist: numpy>=1.23
+Requires-Dist: mne-bids>=0.12
+Requires-Dist: numpy<2.0,>=1.23
 Requires-Dist: pandas>=1.5
 Provides-Extra: ptsa
 Requires-Dist: ptsa; extra == "ptsa"

{bidsreader-0.2.0 → bidsreader-0.3.0}/bidsreader.egg-info/SOURCES.txt RENAMED Viewed

@@ -6,13 +6,14 @@ pyproject.toml
 .github/workflows/test.yml
 bidsreader/__init__.py
 bidsreader/_errorwrap.py
+bidsreader/_neurorad_algo.py
 bidsreader/basereader.py
 bidsreader/cmlbidsreader.py
 bidsreader/convert.py
 bidsreader/exc.py
 bidsreader/filtering.py
 bidsreader/helpers.py
-bidsreader/public_helpers.py
+bidsreader/region_enrichment.py
 bidsreader/units.py
 bidsreader.egg-info/PKG-INFO
 bidsreader.egg-info/SOURCES.txt
@@ -28,5 +29,8 @@ tests/test_errorwrap.py
 tests/test_exc.py
 tests/test_filtering.py
 tests/test_helpers.py
+tests/test_neurorad_algo.py
+tests/test_pair_midpoint_parity.py
+tests/test_region_enrichment.py
 tests/test_units.py
 tutorials/bidsreader_tutorial.ipynb

{bidsreader-0.2.0 → bidsreader-0.3.0}/bidsreader.egg-info/requires.txt RENAMED Viewed

@@ -1,6 +1,6 @@
 mne>=1.0
-mne-bids>=0.14
-numpy>=1.23
+mne-bids>=0.12
+numpy<2.0,>=1.23
 pandas>=1.5
 [all]

{bidsreader-0.2.0 → bidsreader-0.3.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "bidsreader"
-version = "0.2.0"
+version = "0.3.0"
 description = "Data loader and file reader for the OpenBIDS format"
 readme = "README.md"
 license = "MIT"
@@ -24,8 +24,8 @@ classifiers = [
 ]
 dependencies = [
     "mne>=1.0",
-    "mne-bids>=0.14",
-    "numpy>=1.23",
+    "mne-bids>=0.12",
+    "numpy>=1.23,<2.0",
     "pandas>=1.5",
 ]

{bidsreader-0.2.0 → bidsreader-0.3.0}/tests/test_cmlbidsreader.py RENAMED Viewed

@@ -142,13 +142,27 @@ class TestDetermineSpace:
         with pytest.raises(FileNotFoundBIDSError, match="no.*coordsystem.json"):
             r._determine_space()
-    def test_multiple_coordsystem_files_raises(self, tmp_root):
+    def test_multiple_spaces_prefers_preferred_default(self, tmp_root):
+        """When several spaces are present, the highest-priority space in
+        SPACE_PREFERENCE wins (here MNI152NLin6ASym over fsnative)."""
+        data_dir = self._make_data_dir(tmp_root)
+        (data_dir / "sub-R1001P_ses-0_space-fsnative_coordsystem.json").touch()
+        (data_dir / "sub-R1001P_ses-0_space-MNI152NLin6ASym_coordsystem.json").touch()
+        r = CMLBIDSReader(root=tmp_root, subject="R1001P", task="FR1", session="0", device="ieeg")
+        assert r._determine_space() == "MNI152NLin6ASym"
+    def test_multiple_spaces_none_preferred_falls_back_with_warning(self, tmp_root):
+        """When multiple spaces are present and none is in SPACE_PREFERENCE,
+        _determine_space falls back to a default (warning) instead of raising.
+        Neither MNI nor TAL is a canonical preferred label, so it picks the
+        first available (sorted)."""
         data_dir = self._make_data_dir(tmp_root)
         (data_dir / "sub-R1001P_ses-0_space-MNI_coordsystem.json").touch()
         (data_dir / "sub-R1001P_ses-0_space-TAL_coordsystem.json").touch()
         r = CMLBIDSReader(root=tmp_root, subject="R1001P", task="FR1", session="0", device="ieeg")
-        with pytest.raises(AmbiguousMatchError, match="multiple coordsystem"):
-            r._determine_space()
+        with pytest.warns(RuntimeWarning, match="Defaulting to"):
+            chosen = r._determine_space()
+        assert chosen == "MNI"  # sorted(['MNI', 'TAL'])[0]
     def test_single_valid_match(self, tmp_root):
         data_dir = self._make_data_dir(tmp_root)

{bidsreader-0.2.0 → bidsreader-0.3.0}/tests/test_helpers.py RENAMED Viewed

@@ -7,7 +7,7 @@ What is tested:
   - add_prefix: None input, already-prefixed value, value needing prefix
   - merge_duplicate_sample_events: no duplicates, duplicate trial_type merge, all-NaN
   - find_coord_triplets: bare xyz, prefixed xyz, mixed columns, no triplets
-  - combine_bipolar_electrodes: pair splitting, midpoint computation, region agreement
+  - combine_bipolar_electrodes: pair splitting, midpoint computation, no synthetic pair region
   - normalize_trial_types: converts iterable of strings to set
   - match_event_label: exact match, slash-separated label, no match
 """
@@ -197,7 +197,12 @@ class TestCombineBipolarElectrodes:
         assert result.iloc[0]["y_mid"] == pytest.approx(5.5)
         assert result.iloc[0]["z_mid"] == pytest.approx(9.5)
-    def test_region_agreement(self):
+    def test_no_synthetic_pair_region_column(self):
+        """Neurorad assigns pair-region labels via atlas lookup at the
+        midpoint voxel, not by contact-level agreement. The combiner
+        must therefore NOT synthesize a ``{col}_pair`` column. Per-contact
+        region columns are still present as ``{col}_ch1`` / ``{col}_ch2``.
+        """
         pairs = pd.DataFrame({"name": ["A1-A2"]})
         elecs = pd.DataFrame({
             "name": ["A1", "A2"],
@@ -205,17 +210,74 @@ class TestCombineBipolarElectrodes:
             "wb.region": ["hippocampus", "hippocampus"],
         })
         result = combine_bipolar_electrodes(pairs, elecs)
-        assert result.iloc[0]["wb.region_pair"] == "hippocampus"
+        assert "wb.region_pair" not in result.columns
+        assert result.iloc[0]["wb.region_ch1"] == "hippocampus"
+        assert result.iloc[0]["wb.region_ch2"] == "hippocampus"
-    def test_region_disagreement(self):
+    def test_pixels_space_floors_midpoint(self):
+        """In Pixels (CT voxel) space the midpoint is floored to an integer
+        voxel index, matching the neurorad pipeline's voxel handling."""
         pairs = pd.DataFrame({"name": ["A1-A2"]})
         elecs = pd.DataFrame({
             "name": ["A1", "A2"],
-            "x": [1.0, 2.0], "y": [3.0, 4.0], "z": [5.0, 6.0],
-            "wb.region": ["hippocampus", "amygdala"],
+            "x": [1.0, 2.0], "y": [5.0, 8.0], "z": [9.0, 10.0],
+        })
+        # raw midpoint = (1.5, 6.5, 9.5) -> floored = (1.0, 6.0, 9.0)
+        result = combine_bipolar_electrodes(pairs, elecs, space="Pixels")
+        assert result.iloc[0]["x_mid"] == pytest.approx(1.0)
+        assert result.iloc[0]["y_mid"] == pytest.approx(6.0)
+        assert result.iloc[0]["z_mid"] == pytest.approx(9.0)
+    def test_pixels_space_floors_multiple_pairs(self):
+        """Regression: flooring must work across many pairs at once. A scalar
+        math.floor here raised 'only length-1 arrays can be converted to
+        Python scalars' for any subject with >1 bipolar pair."""
+        pairs = pd.DataFrame({"name": ["A1-A2", "B1-B2"]})
+        elecs = pd.DataFrame({
+            "name": ["A1", "A2", "B1", "B2"],
+            "x": [1.2, 2.9, 3.1, 4.4],
+            "y": [5.0, 6.0, 7.0, 8.0],
+            "z": [9.0, 10.0, 11.0, 12.0],
+        })
+        result = combine_bipolar_electrodes(pairs, elecs, space="Pixels")
+        # A1-A2 raw mid x = 2.05 -> 2.0 ; B1-B2 raw mid x = 3.75 -> 3.0
+        assert list(result["x_mid"]) == pytest.approx([2.0, 3.0])
+    def test_non_pixels_space_does_not_floor(self):
+        """Default / non-Pixels spaces keep the fractional midpoint."""
+        pairs = pd.DataFrame({"name": ["A1-A2"]})
+        elecs = pd.DataFrame({
+            "name": ["A1", "A2"],
+            "x": [1.0, 2.0], "y": [5.0, 8.0], "z": [9.0, 10.0],
+        })
+        result = combine_bipolar_electrodes(pairs, elecs, space="MNI152NLin6ASym")
+        assert result.iloc[0]["x_mid"] == pytest.approx(1.5)
+        assert result.iloc[0]["y_mid"] == pytest.approx(6.5)
+    def test_missing_contact_coords_yield_nan_midpoint(self):
+        """If a pair references a contact missing from elec_df, its midpoint
+        columns are NaN rather than raising."""
+        pairs = pd.DataFrame({"name": ["A1-A2", "A1-GHOST"]})
+        elecs = pd.DataFrame({
+            "name": ["A1", "A2"],
+            "x": [1.0, 3.0], "y": [5.0, 7.0], "z": [9.0, 11.0],
+        })
+        result = combine_bipolar_electrodes(pairs, elecs)
+        assert result.iloc[0]["x_mid"] == pytest.approx(2.0)
+        assert pd.isna(result.iloc[1]["x_mid"])
+    def test_prefixed_coord_triplet_midpoint(self):
+        """Prefixed coordinate triplets (e.g. tal.x/tal.y/tal.z) also get a
+        {prefix}.{axis}_mid column."""
+        pairs = pd.DataFrame({"name": ["A1-A2"]})
+        elecs = pd.DataFrame({
+            "name": ["A1", "A2"],
+            "tal.x": [0.0, 4.0], "tal.y": [0.0, 2.0], "tal.z": [0.0, 6.0],
         })
         result = combine_bipolar_electrodes(pairs, elecs)
-        assert pd.isna(result.iloc[0]["wb.region_pair"])
+        assert result.iloc[0]["tal.x_mid"] == pytest.approx(2.0)
+        assert result.iloc[0]["tal.y_mid"] == pytest.approx(1.0)
+        assert result.iloc[0]["tal.z_mid"] == pytest.approx(3.0)
 # ---------------------------------------------------------------------------

bidsreader-0.3.0/tests/test_neurorad_algo.py ADDED Viewed

@@ -0,0 +1,113 @@
+"""
+Tests for bidsreader._neurorad_algo
+These are upstream-free unit tests for the copied neurorad pair-location
+algorithm. The rhino-only parity test in test_pair_midpoint_parity.py pins
+this implementation against the real neurorad_pipeline; here we exercise the
+copy's behavior directly so it is covered even off rhino.
+What is tested:
+  - pair_coordinate: midpoint, None-in/None-out short-circuit, return type
+  - pair_coordinate_axis: vectorized midpoint, NaN propagation, scalar input
+  - constant maps: shape/content sanity for the mirrored neurorad constants
+"""
+import numpy as np
+import pytest
+from bidsreader._neurorad_algo import (
+    pair_coordinate,
+    pair_coordinate_axis,
+    VALID_COORDINATE_SPACES,
+    VALID_COORDINATE_TYPES,
+    VALID_ATLASES,
+    BIDS_SPACE_TO_NEURORAD,
+)
+# ---------------------------------------------------------------------------
+# pair_coordinate
+# ---------------------------------------------------------------------------
+class TestPairCoordinate:
+    """Tests for pair_coordinate (scalar / single-triplet midpoint)."""
+    def test_midpoint(self):
+        result = pair_coordinate([0.0, 0.0, 0.0], [2.0, 4.0, 6.0])
+        np.testing.assert_allclose(result, [1.0, 2.0, 3.0])
+    def test_negative_and_fractional(self):
+        result = pair_coordinate([-1.5, 0.0, 10.25], [1.5, 0.0, -10.25])
+        np.testing.assert_allclose(result, [0.0, 0.0, 0.0])
+    def test_none_first_returns_none(self):
+        assert pair_coordinate(None, [1.0, 2.0, 3.0]) is None
+    def test_none_second_returns_none(self):
+        assert pair_coordinate([1.0, 2.0, 3.0], None) is None
+    def test_returns_ndarray(self):
+        result = pair_coordinate([1.0, 2.0, 3.0], [3.0, 4.0, 5.0])
+        assert isinstance(result, np.ndarray)
+    def test_accepts_ndarray_input(self):
+        result = pair_coordinate(np.array([0.0, 0.0, 0.0]), np.array([4.0, 8.0, 2.0]))
+        np.testing.assert_allclose(result, [2.0, 4.0, 1.0])
+# ---------------------------------------------------------------------------
+# pair_coordinate_axis
+# ---------------------------------------------------------------------------
+class TestPairCoordinateAxis:
+    """Tests for pair_coordinate_axis (vectorized single-axis midpoint)."""
+    def test_vectorized_midpoint(self):
+        a = [0.0, 10.0, -4.0]
+        b = [2.0, 20.0, 4.0]
+        np.testing.assert_allclose(pair_coordinate_axis(a, b), [1.0, 15.0, 0.0])
+    def test_nan_propagates(self):
+        a = [1.0, np.nan, 3.0]
+        b = [3.0, 5.0, np.nan]
+        result = pair_coordinate_axis(a, b)
+        assert result[0] == pytest.approx(2.0)
+        assert np.isnan(result[1])
+        assert np.isnan(result[2])
+    def test_scalar_input(self):
+        # Single-pair case: must not crash and must return the midpoint.
+        result = pair_coordinate_axis([2.0], [5.0])
+        np.testing.assert_allclose(result, [3.5])
+    def test_returns_float_dtype(self):
+        # Integer input must be promoted so /2 does not truncate.
+        result = pair_coordinate_axis([1, 2], [2, 3])
+        assert result.dtype == np.float64
+        np.testing.assert_allclose(result, [1.5, 2.5])
+# ---------------------------------------------------------------------------
+# Mirrored neurorad constants
+# ---------------------------------------------------------------------------
+class TestNeuroradConstants:
+    """Light sanity checks on the constants copied from localization.py."""
+    def test_valid_spaces(self):
+        assert VALID_COORDINATE_SPACES == ("ct_voxel", "fs", "t1_mri", "t2_mri", "mni")
+    def test_valid_types(self):
+        assert VALID_COORDINATE_TYPES == ("raw", "corrected")
+    def test_valid_atlases(self):
+        assert VALID_ATLASES == ("dk", "whole_brain", "mtl")
+    def test_bids_space_map_values_are_pairs(self):
+        # Every value is a (coordinate_space, coordinate_type) tuple, and the
+        # coordinate_type is one of the valid neurorad types.
+        for space, mapped in BIDS_SPACE_TO_NEURORAD.items():
+            assert len(mapped) == 2
+            assert mapped[1] in VALID_COORDINATE_TYPES
+    def test_pixels_maps_to_ct_voxel(self):
+        assert BIDS_SPACE_TO_NEURORAD["Pixels"] == ("ct_voxel", "raw")
+    def test_brainshift_maps_to_corrected(self):
+        assert BIDS_SPACE_TO_NEURORAD["fsnativeBrainshift"][1] == "corrected"

bidsreader-0.3.0/tests/test_pair_midpoint_parity.py ADDED Viewed

@@ -0,0 +1,137 @@
+"""Parity test: bidsreader._neurorad_algo.pair_coordinate vs. the
+upstream neurorad pipeline's Localization.get_pair_coordinate.
+neurorad_pipeline is NOT a runtime dependency of bidsreader. It is
+loaded here dynamically from its rhino install (default:
+/home2/iped/neurorad_pipeline/) for test purposes only. If the
+directory isn't present (e.g. we're off rhino), the whole module is
+skipped — the copied algorithm in _neurorad_algo.py is still
+exercised by unit tests that don't require upstream.
+When the upstream get_pair_coordinate changes, this test fails. Update
+_neurorad_algo.pair_coordinate AND the pinned revision recorded in
+_neurorad_algo.py's top-of-file docstring in the same commit.
+"""
+from __future__ import annotations
+import importlib.util
+import os
+import sys
+from pathlib import Path
+import numpy as np
+import pytest
+NEURORAD_DIR_CANDIDATES = (
+    Path("/home2/iped/neurorad_pipeline"),
+    Path("/home2/iped/event_creation/neurorad"),
+)
+def _load_upstream_localization():
+    for d in NEURORAD_DIR_CANDIDATES:
+        loc_py = d / "localization.py"
+        if not loc_py.exists():
+            continue
+        # localization.py does `from json_cleaner import ...` so the dir
+        # must be on sys.path before import.
+        sys.path.insert(0, str(d))
+        try:
+            spec = importlib.util.spec_from_file_location(
+                f"_upstream_localization_{d.name}", str(loc_py)
+            )
+            mod = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(mod)
+            return mod.Localization
+        except Exception:
+            sys.path.pop(0)
+            continue
+    return None
+UpstreamLocalization = _load_upstream_localization()
+pytestmark = pytest.mark.skipif(
+    UpstreamLocalization is None,
+    reason="upstream neurorad_pipeline not available (off rhino?)",
+)
+def _make_loc_with_two_contacts(space, c1_xyz, c2_xyz, ctype="raw"):
+    loc = UpstreamLocalization()
+    loc._contact_dict = {
+        "leads": {
+            "LA": {
+                "type": "D",
+                "n_groups": 1,
+                "contacts": [
+                    {
+                        "name": "LA1",
+                        "lead_group": 0,
+                        "lead_loc": 0,
+                        "grid_group": 0,
+                        "grid_loc": [0, 0],
+                        "atlases": {},
+                        "info": {},
+                        "coordinate_spaces": {space: {ctype: list(c1_xyz)}},
+                    },
+                    {
+                        "name": "LA2",
+                        "lead_group": 0,
+                        "lead_loc": 1,
+                        "grid_group": 0,
+                        "grid_loc": [1, 0],
+                        "atlases": {},
+                        "info": {},
+                        "coordinate_spaces": {space: {ctype: list(c2_xyz)}},
+                    },
+                ],
+                "pairs": [],
+            }
+        }
+    }
+    return loc
+SPACE_TYPE_CASES = [
+    ("ct_voxel", "raw"),
+    ("fs", "raw"),
+    ("fs", "corrected"),
+    ("t1_mri", "raw"),
+    ("t2_mri", "raw"),
+    ("mni", "raw"),
+    ("mni", "corrected"),
+]
+COORD_CASES = [
+    ([0.0, 0.0, 0.0], [2.0, 4.0, 6.0]),
+    ([-1.5, 0.0, 10.25], [1.5, 0.0, -10.25]),
+    ([100.0, 200.0, -50.0], [101.0, 201.0, -49.0]),
+]
+@pytest.mark.parametrize("space,ctype", SPACE_TYPE_CASES)
+@pytest.mark.parametrize("c1,c2", COORD_CASES)
+def test_pair_coordinate_matches_upstream(space, ctype, c1, c2):
+    """pair_coordinate(c1, c2) == Localization.get_pair_coordinate
+    across every (space, coordinate_type) combination."""
+    from bidsreader._neurorad_algo import pair_coordinate
+    loc = _make_loc_with_two_contacts(space, c1, c2, ctype=ctype)
+    upstream = loc.get_pair_coordinate(space, ["LA1", "LA2"], ctype)
+    ours = pair_coordinate(c1, c2)
+    # Upstream returns shape (1, 3); ours returns shape (3,). Flatten.
+    np.testing.assert_allclose(
+        np.asarray(upstream).ravel(), np.asarray(ours).ravel()
+    )
+def test_pair_coordinate_none_in_none_out():
+    """Matches upstream short-circuit behavior (localization.py:286-287)."""
+    from bidsreader._neurorad_algo import pair_coordinate
+    assert pair_coordinate(None, [1.0, 2.0, 3.0]) is None
+    assert pair_coordinate([1.0, 2.0, 3.0], None) is None

bidsreader-0.3.0/tests/test_region_enrichment.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""
+Tests for bidsreader.region_enrichment.enrich_pairs_with_cml_regions
+enrich_pairs_with_cml_regions joins authoritative pair-level region labels
+from the upstream CML pairs.json onto a bidsreader pairs DataFrame. It loads
+those labels via a cmlreaders.CMLReader, but accepts a ready reader object so
+the join logic can be tested without cmlreaders installed or rhino access.
+What is tested:
+  - region columns from pairs.json are joined onto matching pair names
+  - whitespace on both join keys is stripped before merging
+  - only the requested / available region columns are pulled
+  - left-join semantics: unmatched pairs survive with NaN region labels
+  - missing 'label' column in cml_pairs raises KeyError
+  - reader=None with incomplete (subject, experiment, session) raises ValueError
+"""
+import importlib.util
+import pandas as pd
+import pytest
+from bidsreader.region_enrichment import enrich_pairs_with_cml_regions
+# enrich_pairs_with_cml_regions imports cmlreaders unconditionally (even when a
+# reader is injected), so skip the whole module where cmlreaders is unavailable.
+pytestmark = pytest.mark.skipif(
+    importlib.util.find_spec("cmlreaders") is None,
+    reason="cmlreaders not installed",
+)
+class FakeReader:
+    """Stand-in for cmlreaders.CMLReader.load('pairs')."""
+    def __init__(self, pairs_df):
+        self._pairs_df = pairs_df
+    def load(self, what):
+        assert what == "pairs"
+        return self._pairs_df
+def _cml_pairs():
+    return pd.DataFrame({
+        "label": ["A1-A2", "B1-B2"],
+        "ind.region": ["hippocampus", "amygdala"],
+        "stein.region": ["CA1", "BLA"],
+        # a column not in the default wanted-set, to confirm filtering
+        "contact_1": [1, 3],
+    })
+class TestEnrichPairsWithCmlRegions:
+    """Tests for enrich_pairs_with_cml_regions."""
+    def test_joins_region_columns(self):
+        pairs = pd.DataFrame({"name": ["A1-A2", "B1-B2"]})
+        result = enrich_pairs_with_cml_regions(pairs, reader=FakeReader(_cml_pairs()))
+        assert list(result["ind.region"]) == ["hippocampus", "amygdala"]
+        assert list(result["stein.region"]) == ["CA1", "BLA"]
+    def test_only_requested_region_cols_pulled(self):
+        pairs = pd.DataFrame({"name": ["A1-A2"]})
+        result = enrich_pairs_with_cml_regions(
+            pairs, reader=FakeReader(_cml_pairs()), region_cols=["ind.region"]
+        )
+        assert "ind.region" in result.columns
+        assert "stein.region" not in result.columns
+    def test_non_region_columns_not_pulled(self):
+        """contact_1 exists in cml_pairs but is not a region col -> dropped."""
+        pairs = pd.DataFrame({"name": ["A1-A2"]})
+        result = enrich_pairs_with_cml_regions(pairs, reader=FakeReader(_cml_pairs()))
+        assert "contact_1" not in result.columns
+    def test_strips_whitespace_on_join_keys(self):
+        pairs = pd.DataFrame({"name": ["  A1-A2  "]})
+        cml = _cml_pairs()
+        cml.loc[0, "label"] = " A1-A2 "
+        result = enrich_pairs_with_cml_regions(pairs, reader=FakeReader(cml))
+        assert result.iloc[0]["ind.region"] == "hippocampus"
+    def test_left_join_keeps_unmatched_pairs(self):
+        pairs = pd.DataFrame({"name": ["A1-A2", "Z9-Z10"]})
+        result = enrich_pairs_with_cml_regions(pairs, reader=FakeReader(_cml_pairs()))
+        assert len(result) == 2
+        assert result.iloc[0]["ind.region"] == "hippocampus"
+        assert pd.isna(result.iloc[1]["ind.region"])
+    def test_does_not_mutate_input(self):
+        pairs = pd.DataFrame({"name": ["A1-A2"]})
+        enrich_pairs_with_cml_regions(pairs, reader=FakeReader(_cml_pairs()))
+        assert "ind.region" not in pairs.columns
+    def test_custom_label_col(self):
+        pairs = pd.DataFrame({"pair": ["A1-A2"]})
+        result = enrich_pairs_with_cml_regions(
+            pairs, reader=FakeReader(_cml_pairs()), label_col="pair"
+        )
+        assert result.iloc[0]["ind.region"] == "hippocampus"
+    def test_missing_label_column_raises(self):
+        bad = _cml_pairs().drop(columns=["label"])
+        pairs = pd.DataFrame({"name": ["A1-A2"]})
+        with pytest.raises(KeyError):
+            enrich_pairs_with_cml_regions(pairs, reader=FakeReader(bad))
+    def test_no_reader_and_incomplete_args_raises(self):
+        pairs = pd.DataFrame({"name": ["A1-A2"]})
+        with pytest.raises(ValueError):
+            enrich_pairs_with_cml_regions(pairs, subject="R1001P")  # no exp/session

bidsreader-0.2.0/bidsreader/public_helpers.py DELETED Viewed

@@ -1,17 +0,0 @@
-# imports
-import pandas as pd
-from pathlib import Path
-import re
-def get_data_index(root, task):
-    root = Path(root)
-    rows = []
-    pattern = re.compile(r"sub-(?P<sub>[^_]+)_ses-(?P<ses>[^_]+)_task-" + re.escape(task), re.IGNORECASE)
-    for f in root.rglob(f"*task-{task}*beh.tsv"):
-        m = pattern.search(f.name)
-        if m:
-            rows.append({"subject": m.group("sub"), "task": task, "session": m.group("ses")})
-    return pd.DataFrame(rows).drop_duplicates().sort_values(["subject", "session"]).reset_index(drop=True)