eegdash 0.3.3.dev61__py3-none-any.whl → 0.5.0.dev180784713__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eegdash/__init__.py +19 -6
- eegdash/api.py +336 -539
- eegdash/bids_eeg_metadata.py +495 -0
- eegdash/const.py +349 -0
- eegdash/dataset/__init__.py +28 -0
- eegdash/dataset/base.py +311 -0
- eegdash/dataset/bids_dataset.py +641 -0
- eegdash/dataset/dataset.py +692 -0
- eegdash/dataset/dataset_summary.csv +255 -0
- eegdash/dataset/registry.py +287 -0
- eegdash/downloader.py +197 -0
- eegdash/features/__init__.py +15 -13
- eegdash/features/datasets.py +329 -138
- eegdash/features/decorators.py +105 -13
- eegdash/features/extractors.py +233 -63
- eegdash/features/feature_bank/__init__.py +12 -12
- eegdash/features/feature_bank/complexity.py +22 -20
- eegdash/features/feature_bank/connectivity.py +27 -28
- eegdash/features/feature_bank/csp.py +3 -1
- eegdash/features/feature_bank/dimensionality.py +6 -6
- eegdash/features/feature_bank/signal.py +29 -30
- eegdash/features/feature_bank/spectral.py +40 -44
- eegdash/features/feature_bank/utils.py +8 -0
- eegdash/features/inspect.py +126 -15
- eegdash/features/serialization.py +58 -17
- eegdash/features/utils.py +90 -16
- eegdash/hbn/__init__.py +28 -0
- eegdash/hbn/preprocessing.py +105 -0
- eegdash/hbn/windows.py +428 -0
- eegdash/logging.py +54 -0
- eegdash/mongodb.py +55 -24
- eegdash/paths.py +52 -0
- eegdash/utils.py +29 -1
- eegdash-0.5.0.dev180784713.dist-info/METADATA +121 -0
- eegdash-0.5.0.dev180784713.dist-info/RECORD +38 -0
- eegdash-0.5.0.dev180784713.dist-info/licenses/LICENSE +29 -0
- eegdash/data_config.py +0 -34
- eegdash/data_utils.py +0 -687
- eegdash/dataset.py +0 -69
- eegdash/preprocessing.py +0 -63
- eegdash-0.3.3.dev61.dist-info/METADATA +0 -192
- eegdash-0.3.3.dev61.dist-info/RECORD +0 -28
- eegdash-0.3.3.dev61.dist-info/licenses/LICENSE +0 -23
- {eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/WHEEL +0 -0
- {eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,641 @@
|
|
|
1
|
+
# Authors: The EEGDash contributors.
|
|
2
|
+
# License: BSD-3-Clause
|
|
3
|
+
# Copyright the EEGDash contributors.
|
|
4
|
+
|
|
5
|
+
"""Local BIDS dataset interface for EEGDash.
|
|
6
|
+
|
|
7
|
+
This module provides the EEGBIDSDataset class for interfacing with local BIDS
|
|
8
|
+
datasets on the filesystem, parsing metadata, and retrieving BIDS-related information.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import pandas as pd
|
|
18
|
+
from mne_bids import BIDSPath, find_matching_paths
|
|
19
|
+
from mne_bids.config import ALLOWED_DATATYPE_EXTENSIONS, EPHY_ALLOWED_DATATYPES, reader
|
|
20
|
+
|
|
21
|
+
# Known companion/sidecar files for specific formats (BIDS spec requirement)
|
|
22
|
+
# These files must be downloaded together with the primary file
|
|
23
|
+
_COMPANION_FILES = {
|
|
24
|
+
".set": [".fdt"], # EEGLAB: data file
|
|
25
|
+
".vhdr": [".eeg", ".vmrk"], # BrainVision: data + marker files
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class EEGBIDSDataset:
|
|
30
|
+
"""An interface to a local BIDS dataset containing electrophysiology recordings.
|
|
31
|
+
|
|
32
|
+
This class centralizes interactions with a BIDS dataset on the local
|
|
33
|
+
filesystem, providing methods to parse metadata, find files, and
|
|
34
|
+
retrieve BIDS-related information. Supports multiple modalities including
|
|
35
|
+
EEG, MEG, iEEG, and NIRS.
|
|
36
|
+
|
|
37
|
+
The class uses MNE-BIDS constants to stay synchronized with the BIDS
|
|
38
|
+
specification and automatically supports all file formats recognized by MNE.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
data_dir : str or Path
|
|
43
|
+
The path to the local BIDS dataset directory.
|
|
44
|
+
dataset : str
|
|
45
|
+
A name for the dataset (e.g., "ds002718").
|
|
46
|
+
allow_symlinks : bool, default False
|
|
47
|
+
If True, accept broken symlinks (e.g., git-annex) for metadata extraction.
|
|
48
|
+
If False, require actual readable files for data loading.
|
|
49
|
+
Set to True when doing metadata digestion without loading raw data.
|
|
50
|
+
modalities : list of str or None, default None
|
|
51
|
+
List of modalities to search for (e.g., ["eeg", "meg"]).
|
|
52
|
+
If None, defaults to all electrophysiology modalities from MNE-BIDS:
|
|
53
|
+
['meg', 'eeg', 'ieeg', 'nirs'].
|
|
54
|
+
|
|
55
|
+
Attributes
|
|
56
|
+
----------
|
|
57
|
+
RAW_EXTENSIONS : dict
|
|
58
|
+
Mapping of file extensions to their companion files, dynamically
|
|
59
|
+
built from mne_bids.config.reader.
|
|
60
|
+
files : list of str
|
|
61
|
+
List of all recording file paths found in the dataset.
|
|
62
|
+
detected_modality : str
|
|
63
|
+
The modality of the first file found (e.g., 'eeg', 'meg').
|
|
64
|
+
|
|
65
|
+
Examples
|
|
66
|
+
--------
|
|
67
|
+
>>> # Load EEG-only dataset
|
|
68
|
+
>>> dataset = EEGBIDSDataset(
|
|
69
|
+
... data_dir="/path/to/ds002718",
|
|
70
|
+
... dataset="ds002718",
|
|
71
|
+
... modalities=["eeg"]
|
|
72
|
+
... )
|
|
73
|
+
|
|
74
|
+
>>> # Load dataset with multiple modalities
|
|
75
|
+
>>> dataset = EEGBIDSDataset(
|
|
76
|
+
... data_dir="/path/to/ds005810",
|
|
77
|
+
... dataset="ds005810",
|
|
78
|
+
... modalities=["meg", "eeg"]
|
|
79
|
+
... )
|
|
80
|
+
|
|
81
|
+
>>> # Metadata extraction from git-annex (symlinks)
|
|
82
|
+
>>> dataset = EEGBIDSDataset(
|
|
83
|
+
... data_dir="/path/to/dataset",
|
|
84
|
+
... dataset="ds000001",
|
|
85
|
+
... allow_symlinks=True
|
|
86
|
+
... )
|
|
87
|
+
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
# Dynamically build from MNE-BIDS constants (mne_bids.config.reader)
|
|
91
|
+
# reader dict maps file extensions to MNE read functions
|
|
92
|
+
# This ensures compatibility with the latest BIDS specification
|
|
93
|
+
|
|
94
|
+
# Primary extension + companions = files that must be downloaded together
|
|
95
|
+
RAW_EXTENSIONS = {
|
|
96
|
+
ext: [ext] + _COMPANION_FILES.get(ext, []) for ext in reader.keys()
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
data_dir=None, # location of bids dataset
|
|
102
|
+
dataset="", # dataset name
|
|
103
|
+
allow_symlinks=False, # allow broken symlinks for digestion
|
|
104
|
+
modalities=None,
|
|
105
|
+
):
|
|
106
|
+
if data_dir is None or not os.path.exists(data_dir):
|
|
107
|
+
raise ValueError("data_dir must be specified and must exist")
|
|
108
|
+
|
|
109
|
+
self.bidsdir = Path(data_dir)
|
|
110
|
+
self.dataset = dataset
|
|
111
|
+
self.data_dir = data_dir
|
|
112
|
+
self.allow_symlinks = allow_symlinks
|
|
113
|
+
|
|
114
|
+
# Set modalities to search for (default: all electrophysiology modalities from MNE-BIDS)
|
|
115
|
+
if modalities is None:
|
|
116
|
+
self.modalities = EPHY_ALLOWED_DATATYPES # ['meg', 'eeg', 'ieeg', 'nirs']
|
|
117
|
+
else:
|
|
118
|
+
self.modalities = (
|
|
119
|
+
modalities if isinstance(modalities, list) else [modalities]
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Accept exact dataset folder or a variant with informative suffixes
|
|
123
|
+
# (e.g., dsXXXXX-bdf, dsXXXXX-bdf-mini) to avoid collisions.
|
|
124
|
+
dir_name = self.bidsdir.name
|
|
125
|
+
if not (dir_name == self.dataset or dir_name.startswith(self.dataset + "-")):
|
|
126
|
+
raise AssertionError(
|
|
127
|
+
f"BIDS directory '{dir_name}' does not correspond to dataset '{self.dataset}'"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Initialize BIDS paths using fast mne_bids approach instead of pybids
|
|
131
|
+
self._init_bids_paths()
|
|
132
|
+
|
|
133
|
+
# get all recording files in the bids directory
|
|
134
|
+
assert len(self.files) > 0, ValueError(
|
|
135
|
+
f"Unable to construct dataset. No recordings found for modalities: {self.modalities}"
|
|
136
|
+
)
|
|
137
|
+
# Store the detected modality for later use
|
|
138
|
+
self.detected_modality = self.get_bids_file_attribute(
|
|
139
|
+
"modality", self.files[0]
|
|
140
|
+
).lower()
|
|
141
|
+
|
|
142
|
+
def check_eeg_dataset(self) -> bool:
|
|
143
|
+
"""Check if the BIDS dataset contains EEG data.
|
|
144
|
+
|
|
145
|
+
Returns
|
|
146
|
+
-------
|
|
147
|
+
bool
|
|
148
|
+
True if the dataset's modality is EEG, False otherwise.
|
|
149
|
+
|
|
150
|
+
"""
|
|
151
|
+
return self.detected_modality == "eeg"
|
|
152
|
+
|
|
153
|
+
def _init_bids_paths(self) -> None:
|
|
154
|
+
"""Initialize BIDS file paths using mne_bids for fast discovery.
|
|
155
|
+
|
|
156
|
+
Uses mne_bids.find_matching_paths() for efficient pattern-based file
|
|
157
|
+
discovery. Falls back to manual glob search if needed.
|
|
158
|
+
|
|
159
|
+
When allow_symlinks=True, includes broken symlinks (e.g., git-annex)
|
|
160
|
+
for metadata extraction without requiring actual data files.
|
|
161
|
+
|
|
162
|
+
Searches across multiple modalities (eeg, meg, ieeg) based on self.modalities.
|
|
163
|
+
"""
|
|
164
|
+
# Initialize cache for BIDSPath objects
|
|
165
|
+
self._bids_path_cache = {}
|
|
166
|
+
|
|
167
|
+
# Find all recordings across specified modalities
|
|
168
|
+
# Use MNE-BIDS constants to get valid extensions per modality
|
|
169
|
+
self.files = []
|
|
170
|
+
for modality in self.modalities:
|
|
171
|
+
for ext in ALLOWED_DATATYPE_EXTENSIONS.get(modality, []):
|
|
172
|
+
found_files = _find_bids_files(
|
|
173
|
+
self.bidsdir,
|
|
174
|
+
ext,
|
|
175
|
+
modalities=[modality],
|
|
176
|
+
allow_symlinks=self.allow_symlinks,
|
|
177
|
+
)
|
|
178
|
+
if found_files:
|
|
179
|
+
self.files = found_files
|
|
180
|
+
break
|
|
181
|
+
if self.files:
|
|
182
|
+
break
|
|
183
|
+
|
|
184
|
+
def _get_bids_path_from_file(self, data_filepath: str):
|
|
185
|
+
"""Get a BIDSPath object for a data file with caching.
|
|
186
|
+
|
|
187
|
+
Parameters
|
|
188
|
+
----------
|
|
189
|
+
data_filepath : str
|
|
190
|
+
The path to the data file.
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
BIDSPath
|
|
195
|
+
The BIDSPath object for the file.
|
|
196
|
+
|
|
197
|
+
"""
|
|
198
|
+
if data_filepath not in self._bids_path_cache:
|
|
199
|
+
# Parse the filename to extract BIDS entities
|
|
200
|
+
filepath = Path(data_filepath)
|
|
201
|
+
filename = filepath.name
|
|
202
|
+
|
|
203
|
+
# Detect modality from the directory path
|
|
204
|
+
# BIDS structure: .../sub-XX/[ses-YY/]<modality>/sub-XX_...
|
|
205
|
+
path_parts = filepath.parts
|
|
206
|
+
modality = "eeg" # default
|
|
207
|
+
for part in path_parts:
|
|
208
|
+
if part in ["eeg", "meg", "ieeg", "emg"]:
|
|
209
|
+
modality = part
|
|
210
|
+
break
|
|
211
|
+
|
|
212
|
+
# Extract entities from filename using BIDS pattern
|
|
213
|
+
# Expected format: sub-<label>[_ses-<label>][_task-<label>][_run-<label>]_<modality>.<ext>
|
|
214
|
+
subject = re.search(r"sub-([^_]*)", filename)
|
|
215
|
+
session = re.search(r"ses-([^_]*)", filename)
|
|
216
|
+
task = re.search(r"task-([^_]*)", filename)
|
|
217
|
+
run = re.search(r"run-([^_]*)", filename)
|
|
218
|
+
|
|
219
|
+
bids_path = BIDSPath(
|
|
220
|
+
subject=subject.group(1) if subject else None,
|
|
221
|
+
session=session.group(1) if session else None,
|
|
222
|
+
task=task.group(1) if task else None,
|
|
223
|
+
run=int(run.group(1)) if run else None,
|
|
224
|
+
datatype=modality,
|
|
225
|
+
extension=filepath.suffix,
|
|
226
|
+
root=self.bidsdir,
|
|
227
|
+
)
|
|
228
|
+
self._bids_path_cache[data_filepath] = bids_path
|
|
229
|
+
|
|
230
|
+
return self._bids_path_cache[data_filepath]
|
|
231
|
+
|
|
232
|
+
def _get_json_with_inheritance(
|
|
233
|
+
self, data_filepath: str, json_filename: str
|
|
234
|
+
) -> dict:
|
|
235
|
+
"""Get JSON metadata with BIDS inheritance handling.
|
|
236
|
+
|
|
237
|
+
Walks up the directory tree to find and merge JSON files following
|
|
238
|
+
BIDS inheritance principles.
|
|
239
|
+
|
|
240
|
+
Parameters
|
|
241
|
+
----------
|
|
242
|
+
data_filepath : str
|
|
243
|
+
The path to the data file.
|
|
244
|
+
json_filename : str
|
|
245
|
+
The name of the JSON file to find (e.g., "eeg.json").
|
|
246
|
+
|
|
247
|
+
Returns
|
|
248
|
+
-------
|
|
249
|
+
dict
|
|
250
|
+
The merged JSON metadata.
|
|
251
|
+
|
|
252
|
+
"""
|
|
253
|
+
json_dict = {}
|
|
254
|
+
current_dir = Path(data_filepath).parent
|
|
255
|
+
root_dir = self.bidsdir
|
|
256
|
+
|
|
257
|
+
# Walk up from file directory to root, collecting JSON files
|
|
258
|
+
while current_dir >= root_dir:
|
|
259
|
+
# Try exact match first (e.g., "eeg.json" at root level)
|
|
260
|
+
json_path = current_dir / json_filename
|
|
261
|
+
if json_path.exists():
|
|
262
|
+
with open(json_path) as f:
|
|
263
|
+
json_dict.update(json.load(f))
|
|
264
|
+
else:
|
|
265
|
+
# Look for BIDS-specific JSON files (e.g., "sub-001_task-rest_eeg.json")
|
|
266
|
+
# Match files ending with the json_filename pattern
|
|
267
|
+
for json_file in current_dir.glob(f"*_{json_filename}"):
|
|
268
|
+
# Check if this JSON corresponds to the data file
|
|
269
|
+
data_basename = Path(data_filepath).stem
|
|
270
|
+
json_basename = json_file.stem
|
|
271
|
+
# They should share the same BIDS entities prefix
|
|
272
|
+
if data_basename.split("_eeg")[0] == json_basename.split("_eeg")[0]:
|
|
273
|
+
with open(json_file) as f:
|
|
274
|
+
json_dict.update(json.load(f))
|
|
275
|
+
break
|
|
276
|
+
|
|
277
|
+
# Stop at BIDS root (contains dataset_description.json)
|
|
278
|
+
if (current_dir / "dataset_description.json").exists():
|
|
279
|
+
break
|
|
280
|
+
|
|
281
|
+
current_dir = current_dir.parent
|
|
282
|
+
|
|
283
|
+
return json_dict
|
|
284
|
+
|
|
285
|
+
def _merge_json_inheritance(self, json_files: list[str | Path]) -> dict:
|
|
286
|
+
"""Merge a list of JSON files according to BIDS inheritance."""
|
|
287
|
+
json_files.reverse()
|
|
288
|
+
json_dict = {}
|
|
289
|
+
for f in json_files:
|
|
290
|
+
with open(f) as fp:
|
|
291
|
+
json_dict.update(json.load(fp))
|
|
292
|
+
return json_dict
|
|
293
|
+
|
|
294
|
+
def _get_bids_file_inheritance(
|
|
295
|
+
self, path: str | Path, basename: str, extension: str
|
|
296
|
+
) -> list[Path]:
|
|
297
|
+
"""Find all applicable metadata files using BIDS inheritance."""
|
|
298
|
+
top_level_files = ["README", "dataset_description.json", "participants.tsv"]
|
|
299
|
+
bids_files = []
|
|
300
|
+
|
|
301
|
+
if isinstance(path, str):
|
|
302
|
+
path = Path(path)
|
|
303
|
+
if not path.exists():
|
|
304
|
+
raise ValueError(f"path {path} does not exist")
|
|
305
|
+
|
|
306
|
+
for file in os.listdir(path):
|
|
307
|
+
if os.path.isfile(path / file) and file.endswith(extension):
|
|
308
|
+
bids_files.append(path / file)
|
|
309
|
+
|
|
310
|
+
if any(file in os.listdir(path) for file in top_level_files):
|
|
311
|
+
return bids_files
|
|
312
|
+
else:
|
|
313
|
+
bids_files.extend(
|
|
314
|
+
self._get_bids_file_inheritance(path.parent, basename, extension)
|
|
315
|
+
)
|
|
316
|
+
return bids_files
|
|
317
|
+
|
|
318
|
+
def get_bids_metadata_files(
|
|
319
|
+
self, filepath: str | Path, metadata_file_extension: str
|
|
320
|
+
) -> list[Path]:
|
|
321
|
+
"""Retrieve all metadata files that apply to a given data file.
|
|
322
|
+
|
|
323
|
+
Follows the BIDS inheritance principle to find all relevant metadata
|
|
324
|
+
files (e.g., ``channels.tsv``, ``eeg.json``) for a specific recording.
|
|
325
|
+
|
|
326
|
+
Parameters
|
|
327
|
+
----------
|
|
328
|
+
filepath : str or Path
|
|
329
|
+
The path to the data file.
|
|
330
|
+
metadata_file_extension : str
|
|
331
|
+
The extension of the metadata file to search for (e.g., "channels.tsv").
|
|
332
|
+
|
|
333
|
+
Returns
|
|
334
|
+
-------
|
|
335
|
+
list of Path
|
|
336
|
+
A list of paths to the matching metadata files.
|
|
337
|
+
|
|
338
|
+
"""
|
|
339
|
+
if isinstance(filepath, str):
|
|
340
|
+
filepath = Path(filepath)
|
|
341
|
+
|
|
342
|
+
# Validate file based on current mode
|
|
343
|
+
if not _is_valid_eeg_file(filepath, allow_symlinks=self.allow_symlinks):
|
|
344
|
+
raise ValueError(
|
|
345
|
+
f"filepath {filepath} does not exist. "
|
|
346
|
+
f"If doing metadata extraction from git-annex, set allow_symlinks=True"
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
path, filename = os.path.split(filepath)
|
|
350
|
+
basename = filename[: filename.rfind("_")]
|
|
351
|
+
meta_files = self._get_bids_file_inheritance(
|
|
352
|
+
path, basename, metadata_file_extension
|
|
353
|
+
)
|
|
354
|
+
return meta_files
|
|
355
|
+
|
|
356
|
+
def get_files(self) -> list[str]:
|
|
357
|
+
"""Get all EEG recording file paths in the BIDS dataset.
|
|
358
|
+
|
|
359
|
+
Returns
|
|
360
|
+
-------
|
|
361
|
+
list of str
|
|
362
|
+
A list of file paths for all valid EEG recordings.
|
|
363
|
+
|
|
364
|
+
"""
|
|
365
|
+
return self.files
|
|
366
|
+
|
|
367
|
+
def get_bids_file_attribute(self, attribute: str, data_filepath: str) -> Any:
|
|
368
|
+
"""Retrieve a specific attribute from BIDS metadata.
|
|
369
|
+
|
|
370
|
+
Parameters
|
|
371
|
+
----------
|
|
372
|
+
attribute : str
|
|
373
|
+
The name of the attribute to retrieve (e.g., "sfreq", "subject").
|
|
374
|
+
data_filepath : str
|
|
375
|
+
The path to the data file.
|
|
376
|
+
|
|
377
|
+
Returns
|
|
378
|
+
-------
|
|
379
|
+
Any
|
|
380
|
+
The value of the requested attribute, or None if not found.
|
|
381
|
+
|
|
382
|
+
"""
|
|
383
|
+
bids_path = self._get_bids_path_from_file(data_filepath)
|
|
384
|
+
|
|
385
|
+
# Direct BIDSPath properties for entities
|
|
386
|
+
direct_attrs = {
|
|
387
|
+
"subject": bids_path.subject,
|
|
388
|
+
"session": bids_path.session,
|
|
389
|
+
"task": bids_path.task,
|
|
390
|
+
"run": bids_path.run,
|
|
391
|
+
"modality": bids_path.datatype,
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
if attribute in direct_attrs:
|
|
395
|
+
return direct_attrs[attribute]
|
|
396
|
+
|
|
397
|
+
# For JSON-based attributes, read the modality-specific JSON file
|
|
398
|
+
# (eeg.json for EEG, meg.json for MEG, ieeg.json for iEEG)
|
|
399
|
+
modality = bids_path.datatype or "eeg"
|
|
400
|
+
json_filename = f"{modality}.json"
|
|
401
|
+
modality_json = self._get_json_with_inheritance(data_filepath, json_filename)
|
|
402
|
+
|
|
403
|
+
json_attrs = {
|
|
404
|
+
"sfreq": modality_json.get("SamplingFrequency"),
|
|
405
|
+
"ntimes": modality_json.get("RecordingDuration"),
|
|
406
|
+
"nchans": modality_json.get("EEGChannelCount")
|
|
407
|
+
or modality_json.get("MEGChannelCount")
|
|
408
|
+
or modality_json.get("iEEGChannelCount"),
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
return json_attrs.get(attribute)
|
|
412
|
+
|
|
413
|
+
def channel_labels(self, data_filepath: str) -> list[str]:
|
|
414
|
+
"""Get a list of channel labels from channels.tsv.
|
|
415
|
+
|
|
416
|
+
Parameters
|
|
417
|
+
----------
|
|
418
|
+
data_filepath : str
|
|
419
|
+
The path to the data file.
|
|
420
|
+
|
|
421
|
+
Returns
|
|
422
|
+
-------
|
|
423
|
+
list of str
|
|
424
|
+
A list of channel names.
|
|
425
|
+
|
|
426
|
+
"""
|
|
427
|
+
# Find channels.tsv in the same directory as the data file
|
|
428
|
+
# It can be named either "channels.tsv" or "*_channels.tsv"
|
|
429
|
+
filepath = Path(data_filepath)
|
|
430
|
+
parent_dir = filepath.parent
|
|
431
|
+
|
|
432
|
+
# Try the standard channels.tsv first
|
|
433
|
+
channels_tsv_path = parent_dir / "channels.tsv"
|
|
434
|
+
if not channels_tsv_path.exists():
|
|
435
|
+
# Try to find *_channels.tsv matching the filename prefix
|
|
436
|
+
base_name = filepath.stem # filename without extension
|
|
437
|
+
for tsv_file in parent_dir.glob("*_channels.tsv"):
|
|
438
|
+
# Check if it matches by looking at task/run components
|
|
439
|
+
tsv_name = tsv_file.stem.replace("_channels", "")
|
|
440
|
+
if base_name.startswith(tsv_name):
|
|
441
|
+
channels_tsv_path = tsv_file
|
|
442
|
+
break
|
|
443
|
+
|
|
444
|
+
if not channels_tsv_path.exists():
|
|
445
|
+
raise FileNotFoundError(f"No channels.tsv found for {data_filepath}")
|
|
446
|
+
|
|
447
|
+
channels_tsv = pd.read_csv(channels_tsv_path, sep="\t")
|
|
448
|
+
return channels_tsv["name"].tolist()
|
|
449
|
+
|
|
450
|
+
def channel_types(self, data_filepath: str) -> list[str]:
|
|
451
|
+
"""Get a list of channel types from channels.tsv.
|
|
452
|
+
|
|
453
|
+
Parameters
|
|
454
|
+
----------
|
|
455
|
+
data_filepath : str
|
|
456
|
+
The path to the data file.
|
|
457
|
+
|
|
458
|
+
Returns
|
|
459
|
+
-------
|
|
460
|
+
list of str
|
|
461
|
+
A list of channel types.
|
|
462
|
+
|
|
463
|
+
"""
|
|
464
|
+
# Find channels.tsv in the same directory as the data file
|
|
465
|
+
# It can be named either "channels.tsv" or "*_channels.tsv"
|
|
466
|
+
filepath = Path(data_filepath)
|
|
467
|
+
parent_dir = filepath.parent
|
|
468
|
+
|
|
469
|
+
# Try the standard channels.tsv first
|
|
470
|
+
channels_tsv_path = parent_dir / "channels.tsv"
|
|
471
|
+
if not channels_tsv_path.exists():
|
|
472
|
+
# Try to find *_channels.tsv matching the filename prefix
|
|
473
|
+
base_name = filepath.stem # filename without extension
|
|
474
|
+
for tsv_file in parent_dir.glob("*_channels.tsv"):
|
|
475
|
+
# Check if it matches by looking at task/run components
|
|
476
|
+
tsv_name = tsv_file.stem.replace("_channels", "")
|
|
477
|
+
if base_name.startswith(tsv_name):
|
|
478
|
+
channels_tsv_path = tsv_file
|
|
479
|
+
break
|
|
480
|
+
|
|
481
|
+
if not channels_tsv_path.exists():
|
|
482
|
+
raise FileNotFoundError(f"No channels.tsv found for {data_filepath}")
|
|
483
|
+
|
|
484
|
+
channels_tsv = pd.read_csv(channels_tsv_path, sep="\t")
|
|
485
|
+
return channels_tsv["type"].tolist()
|
|
486
|
+
|
|
487
|
+
def num_times(self, data_filepath: str) -> int:
|
|
488
|
+
"""Get the number of time points in the recording.
|
|
489
|
+
|
|
490
|
+
Calculated from ``SamplingFrequency`` and ``RecordingDuration`` in eeg.json.
|
|
491
|
+
|
|
492
|
+
Parameters
|
|
493
|
+
----------
|
|
494
|
+
data_filepath : str
|
|
495
|
+
The path to the data file.
|
|
496
|
+
|
|
497
|
+
Returns
|
|
498
|
+
-------
|
|
499
|
+
int
|
|
500
|
+
The approximate number of time points.
|
|
501
|
+
|
|
502
|
+
"""
|
|
503
|
+
eeg_json_dict = self._get_json_with_inheritance(data_filepath, "eeg.json")
|
|
504
|
+
return int(
|
|
505
|
+
eeg_json_dict.get("SamplingFrequency", 0)
|
|
506
|
+
* eeg_json_dict.get("RecordingDuration", 0)
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
def subject_participant_tsv(self, data_filepath: str) -> dict[str, Any]:
|
|
510
|
+
"""Get the participants.tsv record for a subject.
|
|
511
|
+
|
|
512
|
+
Parameters
|
|
513
|
+
----------
|
|
514
|
+
data_filepath : str
|
|
515
|
+
The path to a data file belonging to the subject.
|
|
516
|
+
|
|
517
|
+
Returns
|
|
518
|
+
-------
|
|
519
|
+
dict
|
|
520
|
+
A dictionary of the subject's information from participants.tsv.
|
|
521
|
+
|
|
522
|
+
"""
|
|
523
|
+
participants_tsv_path = self.get_bids_metadata_files(
|
|
524
|
+
data_filepath, "participants.tsv"
|
|
525
|
+
)[0]
|
|
526
|
+
participants_tsv = pd.read_csv(participants_tsv_path, sep="\t")
|
|
527
|
+
if participants_tsv.empty:
|
|
528
|
+
return {}
|
|
529
|
+
participants_tsv.set_index("participant_id", inplace=True)
|
|
530
|
+
subject = f"sub-{self.get_bids_file_attribute('subject', data_filepath)}"
|
|
531
|
+
return participants_tsv.loc[subject].to_dict()
|
|
532
|
+
|
|
533
|
+
def eeg_json(self, data_filepath: str) -> dict[str, Any]:
|
|
534
|
+
"""Get the merged eeg.json metadata for a data file.
|
|
535
|
+
|
|
536
|
+
Parameters
|
|
537
|
+
----------
|
|
538
|
+
data_filepath : str
|
|
539
|
+
The path to the data file.
|
|
540
|
+
|
|
541
|
+
Returns
|
|
542
|
+
-------
|
|
543
|
+
dict
|
|
544
|
+
The merged eeg.json metadata.
|
|
545
|
+
|
|
546
|
+
"""
|
|
547
|
+
return self._get_json_with_inheritance(data_filepath, "eeg.json")
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def _is_valid_eeg_file(filepath: Path, allow_symlinks: bool = False) -> bool:
|
|
551
|
+
"""Check if a file path is valid for EEG processing.
|
|
552
|
+
|
|
553
|
+
Parameters
|
|
554
|
+
----------
|
|
555
|
+
filepath : Path
|
|
556
|
+
The file path to check.
|
|
557
|
+
allow_symlinks : bool, default False
|
|
558
|
+
If True, accept broken symlinks (e.g., git-annex pointers).
|
|
559
|
+
If False, only accept files that actually exist and can be read.
|
|
560
|
+
|
|
561
|
+
Returns
|
|
562
|
+
-------
|
|
563
|
+
bool
|
|
564
|
+
True if the file is valid for the current mode.
|
|
565
|
+
|
|
566
|
+
"""
|
|
567
|
+
if filepath.exists():
|
|
568
|
+
return True
|
|
569
|
+
if allow_symlinks and filepath.is_symlink():
|
|
570
|
+
return True
|
|
571
|
+
return False
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _find_bids_files(
|
|
575
|
+
bidsdir: Path,
|
|
576
|
+
extension: str,
|
|
577
|
+
modalities: list[str] = None,
|
|
578
|
+
allow_symlinks: bool = False,
|
|
579
|
+
) -> list[str]:
|
|
580
|
+
"""Find BIDS files in a BIDS directory across multiple modalities.
|
|
581
|
+
|
|
582
|
+
Parameters
|
|
583
|
+
----------
|
|
584
|
+
bidsdir : Path
|
|
585
|
+
The BIDS dataset root directory.
|
|
586
|
+
extension : str
|
|
587
|
+
File extension to search for (e.g., '.set', '.bdf', '.fif').
|
|
588
|
+
modalities : list of str, optional
|
|
589
|
+
List of modalities to search (e.g., ["eeg", "meg", "ieeg"]).
|
|
590
|
+
If None, defaults to EPHY_ALLOWED_DATATYPES from mne_bids.config.
|
|
591
|
+
allow_symlinks : bool, default False
|
|
592
|
+
If True, include broken symlinks in results (for metadata extraction).
|
|
593
|
+
If False, only return files that can be read (for data loading).
|
|
594
|
+
|
|
595
|
+
Returns
|
|
596
|
+
-------
|
|
597
|
+
list of str
|
|
598
|
+
List of file paths found.
|
|
599
|
+
|
|
600
|
+
"""
|
|
601
|
+
if modalities is None:
|
|
602
|
+
modalities = EPHY_ALLOWED_DATATYPES
|
|
603
|
+
|
|
604
|
+
all_files = []
|
|
605
|
+
|
|
606
|
+
for modality in modalities:
|
|
607
|
+
# First try mne_bids (fast, but skips broken symlinks)
|
|
608
|
+
if not allow_symlinks:
|
|
609
|
+
try:
|
|
610
|
+
paths = find_matching_paths(
|
|
611
|
+
bidsdir, datatypes=modality, extensions=extension
|
|
612
|
+
)
|
|
613
|
+
if paths:
|
|
614
|
+
all_files.extend([str(p.fpath) for p in paths])
|
|
615
|
+
except Exception:
|
|
616
|
+
pass # Continue to fallback search
|
|
617
|
+
|
|
618
|
+
# Fallback: manual glob search (finds symlinks too)
|
|
619
|
+
pattern = f"**/{modality}/*{extension}"
|
|
620
|
+
found = list(bidsdir.glob(pattern))
|
|
621
|
+
|
|
622
|
+
# Filter based on validation mode
|
|
623
|
+
valid_files = [
|
|
624
|
+
str(f)
|
|
625
|
+
for f in found
|
|
626
|
+
if _is_valid_eeg_file(f, allow_symlinks=allow_symlinks)
|
|
627
|
+
]
|
|
628
|
+
all_files.extend(valid_files)
|
|
629
|
+
|
|
630
|
+
# Remove duplicates while preserving order
|
|
631
|
+
seen = set()
|
|
632
|
+
unique_files = []
|
|
633
|
+
for f in all_files:
|
|
634
|
+
if f not in seen:
|
|
635
|
+
seen.add(f)
|
|
636
|
+
unique_files.append(f)
|
|
637
|
+
|
|
638
|
+
return unique_files
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
__all__ = ["EEGBIDSDataset"]
|