bidsreader 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bidsreader/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ from .basereader import BaseReader
2
+ from .cmlbidsreader import CMLBIDSReader
3
+ from .filtering import (
4
+ filter_events_df_by_trial_types,
5
+ filter_raw_events_by_trial_types,
6
+ filter_epochs_by_trial_types,
7
+ filter_by_trial_types,
8
+ )
9
+ from .convert import mne_epochs_to_ptsa, mne_raw_to_ptsa
10
+ from .units import detect_unit, get_scale_factor, convert_unit
11
+ from collections import namedtuple
12
+
13
+ __version__ = "0.1.0"
14
+ version_info = namedtuple("VersionInfo", "major,minor,patch")(
15
+ *__version__.split('.'))
@@ -0,0 +1,50 @@
1
+ # _errorwrap.py
2
+ from __future__ import annotations
3
+ from functools import wraps
4
+ import json
5
+ import pandas as pd
6
+
7
+ from .exc import (
8
+ BIDSReaderError,
9
+ InvalidOptionError,
10
+ MissingRequiredFieldError,
11
+ FileNotFoundBIDSError,
12
+ AmbiguousMatchError,
13
+ DataParseError,
14
+ ExternalLibraryError,
15
+ )
16
+
17
+ def public_api(func):
18
+ @wraps(func)
19
+ def wrapper(*args, **kwargs):
20
+ try:
21
+ return func(*args, **kwargs)
22
+
23
+ # If it's already one of yours, don't touch it.
24
+ except BIDSReaderError:
25
+ raise
26
+
27
+ # Map common “expected” external exceptions to your hierarchy.
28
+ except FileNotFoundError as e:
29
+ raise FileNotFoundBIDSError(str(e)) from e
30
+
31
+ except json.JSONDecodeError as e:
32
+ raise DataParseError(f"Invalid JSON: {e}") from e
33
+
34
+ except pd.errors.ParserError as e:
35
+ raise DataParseError(f"Could not parse TSV/CSV: {e}") from e
36
+
37
+ except KeyError as e:
38
+ # Often means missing expected column like "trial_type"
39
+ raise DataParseError(f"Missing expected key/column: {e}") from e
40
+
41
+ except ValueError as e:
42
+ # Be careful: ValueError is broad. Only map if you know it's "yours".
43
+ # Otherwise wrap as ExternalLibraryError.
44
+ raise ExternalLibraryError(str(e)) from e
45
+
46
+ except Exception as e:
47
+ # Last resort: you still guarantee your hierarchy.
48
+ raise ExternalLibraryError(f"{type(e).__name__}: {e}") from e
49
+
50
+ return wrapper
@@ -0,0 +1,208 @@
1
+ import pandas as pd
2
+ from mne_bids import BIDSPath, get_entity_vals
3
+ from pathlib import Path
4
+ from typing import Iterable, Optional, Union, List
5
+ import warnings
6
+ from ._errorwrap import public_api
7
+ from .helpers import add_prefix
8
+ from .exc import InvalidOptionError, MissingRequiredFieldError
9
+
10
+
11
+ class BaseReader:
12
+ _FIELDS = {"root", "subject", "session", "task", "acquisition", "_device", "_space"}
13
+ # REQUIRED_FIELDS = ("subject", "task", "session", "device")
14
+
15
+ def __init__(
16
+ self,
17
+ root: Optional[Union[str, Path]] = None,
18
+ subject: Optional[str] = None,
19
+ task: Optional[str] = None,
20
+ session: Optional[str | int] = None,
21
+ space: Optional[str] = None,
22
+ acquisition: Optional[str] = None,
23
+ device: Optional[str] = None,
24
+ ):
25
+ if root is None:
26
+ raise ValueError("root must be provided")
27
+ self.root = Path(root)
28
+ self.subject = subject
29
+ self.session = session
30
+ self.task = str(task)
31
+
32
+ self.acquisition = acquisition
33
+
34
+ self._device = device
35
+
36
+ self._space = space
37
+
38
+ # ---------- magic functions ----------
39
+ def __str__(self) -> str:
40
+ parts = [
41
+ f"root={self.root}",
42
+ f"subject={self.subject}",
43
+ ]
44
+
45
+ if self.session:
46
+ parts.append(f"session={self.session}")
47
+ if self.task:
48
+ parts.append(f"task={self.task}")
49
+ if self.device:
50
+ parts.append(f"type={self.device}")
51
+ if self.space:
52
+ parts.append(f"space={self.space}")
53
+
54
+ cls = type(self).__name__
55
+ return f"{cls}({', '.join(parts)})"
56
+
57
+ def __repr__(self) -> str:
58
+ cls = type(self).__name__
59
+ return (
60
+ f"{cls}(root={self.root!r}, subject={self.subject!r}, "
61
+ f"session={self.session!r}, task={self.task!r}, "
62
+ f"device={self.device!r}, space={self.space!r})"
63
+ )
64
+
65
+ def __setattr__(self, name, value):
66
+ if name.startswith("_"):
67
+ object.__setattr__(self, name, value)
68
+ return
69
+ if name not in self._FIELDS:
70
+ raise AttributeError(f"Unknown field: {name}")
71
+ object.__setattr__(self, name, value)
72
+
73
+ # ---------- property ----------
74
+ @property
75
+ def space(self) -> Optional[str]:
76
+ if self._space is not None:
77
+ return self._space
78
+
79
+ try:
80
+ self._space = self._determine_space()
81
+ except Exception as e:
82
+ warnings.warn(
83
+ f"Could not determine space automatically: {e}",
84
+ RuntimeWarning,
85
+ )
86
+ return None
87
+
88
+ return self._space
89
+
90
+ @property
91
+ def device(self) -> Optional[str]:
92
+ if self._device is not None:
93
+ return self._device
94
+
95
+ try:
96
+ self._device = self._determine_device()
97
+ except Exception as e:
98
+ warnings.warn(
99
+ f"Could not determine device automatically: {e}",
100
+ RuntimeWarning,
101
+ )
102
+ return None
103
+
104
+ if self._device is None:
105
+ warnings.warn(
106
+ "device could not be inferred from subject.",
107
+ RuntimeWarning,
108
+ )
109
+
110
+ return self._device
111
+
112
+ # ---------- internal helpers ----------
113
+
114
+ def _bp(self, **kwargs) -> BIDSPath:
115
+ bp = BIDSPath(
116
+ root=self.root,
117
+ subject=self.subject,
118
+ session=str(self.session) if self.session is not None else None,
119
+ task=self.task,
120
+ datatype=self.device,
121
+ )
122
+ bp.update(**kwargs)
123
+ return bp
124
+
125
+ def _subject_root(self) -> Path:
126
+ p = self.root / self._add_bids_prefix("subject", self.subject)
127
+ return p
128
+
129
+ def _add_bids_prefix(self, field: str, value: Optional[str]) -> Optional[str]:
130
+ prefix_map = {
131
+ "subject": "sub-",
132
+ "session": "ses-",
133
+ "acquisition": "acq-",
134
+ "task": "task-",
135
+ "space": "space-",
136
+ }
137
+
138
+ if field not in prefix_map:
139
+ raise InvalidOptionError(f"Unknown BIDS field: {field}")
140
+
141
+ return add_prefix(value, prefix_map[field])
142
+
143
+ def _require(self, fields: Iterable[str], context: str = "") -> None:
144
+ missing = [f for f in fields if getattr(self, f, None) in (None, "")]
145
+ if missing:
146
+ raise MissingRequiredFieldError(
147
+ f"{context}: missing required fields: {', '.join(missing)}"
148
+ )
149
+
150
+ # idk if this is useful for anyone, should override for proper checking
151
+ # def _get_needed_fields(self):
152
+ # return self.REQUIRED_FIELDS
153
+
154
+ def _determine_space(self) -> Optional[str]:
155
+ """Override in subclasses to provide automatic space detection."""
156
+ return None
157
+
158
+ def _determine_device(self) -> Optional[str]:
159
+ """Override in subclasses to provide automatic device detection."""
160
+ return None
161
+
162
+ # ---------- public API ----------
163
+ @public_api
164
+ def set_fields(self, **kwargs):
165
+ for k, v in kwargs.items():
166
+ setattr(self, k, v) # validated by __setattr__
167
+ return self
168
+
169
+ # ---- simple metadata queries ----
170
+ @public_api
171
+ def get_subject_tasks(self) -> List[str]:
172
+ subject_root = self._subject_root()
173
+ return get_entity_vals(subject_root, "task")
174
+
175
+ @public_api
176
+ def get_subject_sessions(self) -> List[str]:
177
+ subject_root = self._subject_root()
178
+ return get_entity_vals(subject_root, "session")
179
+
180
+ @public_api
181
+ def get_dataset_subjects(self) -> List[str]:
182
+ return get_entity_vals(self.root, "subject")
183
+
184
+ @public_api
185
+ def get_dataset_tasks(self) -> List[str]:
186
+ return get_entity_vals(self.root, "task")
187
+
188
+ @public_api
189
+ def get_dataset_max_sessions(self, outlier_thresh: Optional[int] = None) -> Optional[int]:
190
+ subs = self.get_dataset_subjects()
191
+ max_ses: Optional[int] = None
192
+
193
+ for sub in subs:
194
+ subject_root = self.root / f"sub-{str(sub).replace('sub-', '')}"
195
+ sessions = get_entity_vals(subject_root, "session") or []
196
+
197
+ for s in sessions:
198
+ try:
199
+ si = int(str(s).replace("ses-", ""))
200
+ except ValueError:
201
+ continue
202
+
203
+ if outlier_thresh is not None and si > outlier_thresh:
204
+ warnings.warn(f"Session number is over {outlier_thresh}. Double check dataset.")
205
+ else:
206
+ max_ses = si if max_ses is None else max(max_ses, si)
207
+
208
+ return max_ses
@@ -0,0 +1,269 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import mne
4
+ from mne_bids import read_raw_bids
5
+ from pathlib import Path
6
+ from typing import Iterable, Tuple, Optional, Union, Dict
7
+ import warnings
8
+ import json
9
+ from .basereader import BaseReader
10
+ from ._errorwrap import public_api
11
+ from .helpers import validate_option, space_from_coordsystem_fname, combine_bipolar_electrodes
12
+ from .exc import InvalidOptionError, FileNotFoundBIDSError, AmbiguousMatchError, DataParseError
13
+
14
+ CML_ROOT = "/data/LTP_BIDS"
15
+
16
+
17
+ class CMLBIDSReader(BaseReader):
18
+ VALID_ACQ = ("bipolar", "monopolar")
19
+ VALID_DEVICES = ("eeg", "ieeg")
20
+ INTRACRANIAL_FIELDS = ("subject", "task", "session", "device")
21
+ SCALP_FIELDS = ("subject", "task", "session", "device")
22
+
23
+ def __init__(
24
+ self,
25
+ root: Optional[Union[str, Path]] = CML_ROOT,
26
+ subject: Optional[str] = None,
27
+ task: Optional[str] = None,
28
+ session: Optional[str | int] = None,
29
+ space: Optional[str] = None,
30
+ acquisition: Optional[str] = None,
31
+ device: Optional[str] = None,
32
+ ):
33
+ device = validate_option(
34
+ "device", device, self.VALID_DEVICES
35
+ )
36
+ super().__init__(
37
+ root=root,
38
+ subject=subject,
39
+ task=task,
40
+ session=session,
41
+ space=space,
42
+ acquisition=acquisition,
43
+ device=device,
44
+ )
45
+
46
+ # ---------- internal helpers ----------
47
+
48
+ def _determine_device(self) -> Optional[str]:
49
+ if self.subject is None:
50
+ return None
51
+ if self.subject.startswith("LTP"):
52
+ return "eeg"
53
+ if self.subject.startswith("R"):
54
+ return "ieeg"
55
+ return None
56
+
57
+ def _determine_space(self) -> str:
58
+ subject_root = self._subject_root()
59
+ data_dir = subject_root / self._add_bids_prefix("session", self.session) / self.device
60
+
61
+ if not data_dir.exists():
62
+ raise FileNotFoundBIDSError(
63
+ f"determine_space: data directory does not exist.\n"
64
+ f"subject_root={subject_root}\n"
65
+ f"data_dir={data_dir}"
66
+ )
67
+
68
+ matches = list(data_dir.glob("*_coordsystem.json"))
69
+ if not matches:
70
+ raise FileNotFoundBIDSError(
71
+ f"determine_space: no *_coordsystem.json file found.\n"
72
+ f"data_dir={data_dir}"
73
+ )
74
+
75
+ if len(matches) > 1:
76
+ raise AmbiguousMatchError(
77
+ f"determine_space: multiple coordsystem files found.\n"
78
+ f"files={[m.name for m in matches]}"
79
+ )
80
+
81
+ fname = matches[0].name
82
+ space = space_from_coordsystem_fname(fname)
83
+
84
+ if space is None:
85
+ raise DataParseError(
86
+ f"determine_space: could not parse space from filename.\n"
87
+ f"filename={fname}"
88
+ )
89
+
90
+ return space
91
+
92
+ def _validate_acq(self, acquisition: Optional[str]) -> Optional[str]:
93
+ if not self.is_intracranial():
94
+ return None
95
+ if acquisition is None:
96
+ raise InvalidOptionError("acquisition is set to None")
97
+ return validate_option("acquisition", acquisition, self.VALID_ACQ)
98
+
99
+ def _get_needed_fields(self):
100
+ return self.INTRACRANIAL_FIELDS if self.is_intracranial() else self.SCALP_FIELDS
101
+
102
+ def _attach_bipolar_midpoint_montage(self, raw: mne.io.BaseRaw) -> None:
103
+ pairs_df = self.load_channels("bipolar")
104
+ elec_df = self.load_electrodes()
105
+ combo = combine_bipolar_electrodes(pairs_df, elec_df)
106
+
107
+ if not {"name", "x_mid", "y_mid", "z_mid"}.issubset(combo.columns):
108
+ return
109
+
110
+ ch_pos = {
111
+ str(r["name"]): (float(r["x_mid"]), float(r["y_mid"]), float(r["z_mid"]))
112
+ for _, r in combo.iterrows()
113
+ if np.isfinite(r["x_mid"]) and np.isfinite(r["y_mid"]) and np.isfinite(r["z_mid"])
114
+ }
115
+ if not ch_pos:
116
+ return
117
+
118
+ montage = mne.channels.make_dig_montage(ch_pos=ch_pos, coord_frame="mni_tal")
119
+ raw.set_montage(montage, on_missing="ignore")
120
+
121
+ # ---------- public API ----------
122
+
123
+ @public_api
124
+ def is_intracranial(self) -> bool:
125
+ return self.device == "ieeg"
126
+
127
+ # ---------- loaders ----------
128
+
129
+ @public_api
130
+ def load_events(self, event_type: str = "beh") -> pd.DataFrame:
131
+ self._require(self._get_needed_fields(), context="load_events")
132
+ allowed = ["beh", self.device]
133
+ event_type = validate_option("event_type", event_type, allowed)
134
+ suffix = "beh" if event_type == "beh" else "events"
135
+
136
+ bp = self._bp(
137
+ datatype=event_type,
138
+ suffix=suffix,
139
+ extension=".tsv",
140
+ )
141
+
142
+ matches = bp.match()
143
+ if not matches:
144
+ raise FileNotFoundBIDSError(f"load_events: no file matched for {bp}")
145
+
146
+ return pd.read_csv(matches[0].fpath, sep="\t")
147
+
148
+ @public_api
149
+ def load_electrodes(self) -> pd.DataFrame:
150
+ self._require(self._get_needed_fields(), context="load_electrodes")
151
+
152
+ _task = self.task if self.is_intracranial() else None
153
+ bp = self._bp(datatype=self.device, suffix="electrodes", space=self.space, task=_task, extension=".tsv")
154
+ return pd.read_csv(bp.fpath, sep="\t")
155
+
156
+ @public_api
157
+ def load_channels(self, acquisition: Optional[str] = None) -> pd.DataFrame:
158
+ self._require(self._get_needed_fields(), context="load_channels")
159
+
160
+ acq = self._validate_acq(acquisition)
161
+ bp = self._bp(datatype=self.device, suffix="channels", acquisition=acq, extension=".tsv")
162
+ return pd.read_csv(bp.fpath, sep="\t")
163
+
164
+ @public_api
165
+ def load_combined_channels(self, acquisition: Optional[str] = None) -> pd.DataFrame:
166
+ self._require(self._get_needed_fields(), context="load_combined_channels")
167
+
168
+ channel_df = self.load_channels(acquisition)
169
+ elec_df = self.load_electrodes()
170
+ if acquisition == "monopolar" or acquisition is None:
171
+ return channel_df.merge(elec_df, on="name", how="left", suffixes=("", "_elec"))
172
+ if acquisition == "bipolar":
173
+ return combine_bipolar_electrodes(channel_df, elec_df)
174
+
175
+ @public_api
176
+ def load_coordsystem_desc(self) -> Dict:
177
+ self._require(self._get_needed_fields(), context="load_coordsystem")
178
+
179
+ _task = self.task if self.is_intracranial() else None
180
+ bp = self._bp(datatype=self.device, suffix="coordsystem", space=self.space, task=_task, extension=".json")
181
+
182
+ with open(bp.fpath, "r") as f:
183
+ return json.load(f)
184
+
185
+ @public_api
186
+ def load_raw(self, acquisition: Optional[str] = None) -> mne.io.BaseRaw:
187
+ self._require(self._get_needed_fields(), context="load_raw")
188
+
189
+ acq = self._validate_acq(acquisition)
190
+
191
+ bp_kwargs = {"datatype": self.device}
192
+ if acq is not None:
193
+ bp_kwargs["acquisition"] = acq
194
+ bp = self._bp(**bp_kwargs)
195
+
196
+ with warnings.catch_warnings():
197
+ warnings.filterwarnings(
198
+ "ignore",
199
+ message=r"DigMontage is only a subset of info\.",
200
+ category=RuntimeWarning,
201
+ )
202
+ warnings.filterwarnings(
203
+ "ignore",
204
+ message=r".*is not an MNE-Python coordinate frame.*",
205
+ category=RuntimeWarning,
206
+ )
207
+ raw = read_raw_bids(bp)
208
+
209
+ if self.is_intracranial() and acq == "bipolar":
210
+ self._attach_bipolar_midpoint_montage(raw)
211
+
212
+ return raw
213
+
214
+ @public_api
215
+ def load_epochs(
216
+ self,
217
+ tmin: float,
218
+ tmax: float,
219
+ events: Optional[pd.DataFrame] = None,
220
+ baseline: Optional[Tuple[float | None, float | None]] = None,
221
+ acquisition: Optional[str] = None,
222
+ event_repeated: str = "merge",
223
+ channels: Optional[Iterable[str]] = None,
224
+ preload: bool = False,
225
+ ) -> mne.Epochs:
226
+ self._require(self._get_needed_fields(), context="load_epochs")
227
+ raw = self.load_raw(acquisition=acquisition)
228
+
229
+ all_events_raw, all_event_id = mne.events_from_annotations(raw)
230
+
231
+ if events is not None:
232
+ if "sample" not in events.columns:
233
+ raise ValueError("Events DataFrame must contain a 'sample' column")
234
+
235
+ if "trial_type" in events.columns:
236
+ codes = events["trial_type"].map(all_event_id)
237
+ if codes.isna().any():
238
+ missing = set(events.loc[codes.isna(), "trial_type"].unique())
239
+ raise ValueError(
240
+ f"trial_type values not found in raw annotations: {missing}"
241
+ )
242
+ codes = codes.values.astype(int)
243
+ present_types = set(events["trial_type"].unique())
244
+ event_id = {k: v for k, v in all_event_id.items() if k in present_types}
245
+ else:
246
+ codes = np.ones(len(events), dtype=int)
247
+ event_id = {"event": 1}
248
+
249
+ events_raw = np.column_stack([
250
+ events["sample"].values.astype(int),
251
+ np.zeros(len(events), dtype=int),
252
+ codes,
253
+ ])
254
+ else:
255
+ events_raw = all_events_raw
256
+ event_id = all_event_id
257
+
258
+ picks = list(channels) if channels is not None else None
259
+ return mne.Epochs(
260
+ raw,
261
+ events=events_raw,
262
+ event_id=event_id,
263
+ tmin=tmin,
264
+ tmax=tmax,
265
+ baseline=baseline,
266
+ preload=preload,
267
+ event_repeated=event_repeated,
268
+ picks=picks,
269
+ )
bidsreader/convert.py ADDED
@@ -0,0 +1,57 @@
1
+ from __future__ import annotations
2
+
3
+ import mne
4
+ import numpy as np
5
+ import pandas as pd
6
+ from typing import Iterable, Optional, TYPE_CHECKING
7
+ from ._errorwrap import public_api
8
+ from .helpers import merge_duplicate_sample_events
9
+
10
+ if TYPE_CHECKING:
11
+ from ptsa.data.timeseries import TimeSeries
12
+
13
+
14
+ @public_api
15
+ def mne_epochs_to_ptsa(epochs: mne.Epochs, events: pd.DataFrame) -> TimeSeries:
16
+ from ptsa.data.timeseries import TimeSeries
17
+ events = merge_duplicate_sample_events(events)
18
+ return TimeSeries.from_mne_epochs(epochs, events)
19
+
20
+
21
+ @public_api
22
+ def mne_raw_to_ptsa(raw: mne.io.BaseRaw, picks: Optional[Iterable[str]] = None, tmin: float = None, tmax: float = None) -> TimeSeries:
23
+ from ptsa.data.timeseries import TimeSeries
24
+
25
+ inst = raw.copy()
26
+ if tmin is not None or tmax is not None:
27
+ inst.crop(tmin=tmin, tmax=tmax)
28
+
29
+ if picks is not None:
30
+ if all(isinstance(p, str) for p in picks):
31
+ pick_idx = [inst.ch_names.index(ch) for ch in picks]
32
+ else:
33
+ pick_idx = list(picks)
34
+
35
+ data = inst.get_data(picks=pick_idx)
36
+ ch_names = [inst.ch_names[i] for i in pick_idx]
37
+ else:
38
+ data = inst.get_data()
39
+ ch_names = inst.ch_names
40
+
41
+ sfreq = float(inst.info["sfreq"])
42
+ times = inst.times
43
+
44
+ ts = TimeSeries.create(
45
+ data,
46
+ samplerate=sfreq,
47
+ dims=("channel", "time"),
48
+ coords={
49
+ "channel": np.asarray(ch_names, dtype=object),
50
+ "time": np.asarray(times, dtype=float),
51
+ },
52
+ attrs={
53
+ "mne_meas_date": str(inst.info.get("meas_date")),
54
+ "mne_first_samp": int(inst.first_samp),
55
+ },
56
+ )
57
+ return ts
bidsreader/exc.py ADDED
@@ -0,0 +1,23 @@
1
+ class BIDSReaderError(Exception):
2
+ """ Generic exception for all BIDS Reader exceptions """
3
+
4
+ class InvalidOptionError(BIDSReaderError, ValueError):
5
+ """ Raised when an input is not among options. """
6
+
7
+ class MissingRequiredFieldError(BIDSReaderError, ValueError):
8
+ """ Raised when a required field is missing when loading file using BIDSPath. """
9
+
10
+ class FileNotFoundBIDSError(BIDSReaderError, FileNotFoundError):
11
+ """ Raised when a BIDS file is not found. """
12
+
13
+ class AmbiguousMatchError(BIDSReaderError, Exception):
14
+ """ Raised when multiple files are returned when searching. """
15
+
16
+ class DataParseError(BIDSReaderError):
17
+ """TSV/JSON parsing, schema issues, etc."""
18
+
19
+ class DependencyError(BIDSReaderError):
20
+ """Errors originating from optional deps or incompatible versions."""
21
+
22
+ class ExternalLibraryError(BIDSReaderError):
23
+ """Fallback wrapper when MNE/pandas/etc. throws something unexpected."""