eegdash 0.4.0.dev150__py3-none-any.whl → 0.4.0.dev162__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/__init__.py +1 -1
- eegdash/api.py +180 -86
- eegdash/bids_eeg_metadata.py +139 -39
- eegdash/const.py +25 -0
- eegdash/data_utils.py +239 -173
- eegdash/dataset/dataset.py +35 -13
- eegdash/dataset/dataset_summary.csv +1 -1
- eegdash/dataset/registry.py +69 -4
- eegdash/downloader.py +95 -9
- eegdash/features/datasets.py +320 -136
- eegdash/features/decorators.py +88 -3
- eegdash/features/extractors.py +201 -55
- eegdash/features/inspect.py +78 -5
- eegdash/features/serialization.py +45 -19
- eegdash/features/utils.py +75 -8
- eegdash/hbn/preprocessing.py +50 -17
- eegdash/hbn/windows.py +145 -32
- eegdash/logging.py +19 -0
- eegdash/mongodb.py +44 -27
- eegdash/paths.py +14 -5
- eegdash/utils.py +16 -1
- {eegdash-0.4.0.dev150.dist-info → eegdash-0.4.0.dev162.dist-info}/METADATA +1 -1
- eegdash-0.4.0.dev162.dist-info/RECORD +37 -0
- eegdash-0.4.0.dev150.dist-info/RECORD +0 -37
- {eegdash-0.4.0.dev150.dist-info → eegdash-0.4.0.dev162.dist-info}/WHEEL +0 -0
- {eegdash-0.4.0.dev150.dist-info → eegdash-0.4.0.dev162.dist-info}/licenses/LICENSE +0 -0
- {eegdash-0.4.0.dev150.dist-info → eegdash-0.4.0.dev162.dist-info}/top_level.txt +0 -0
eegdash/data_utils.py
CHANGED
|
@@ -37,10 +37,26 @@ from .paths import get_default_cache_dir
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
class EEGDashBaseDataset(BaseDataset):
|
|
40
|
-
"""A single EEG recording
|
|
40
|
+
"""A single EEG recording dataset.
|
|
41
|
+
|
|
42
|
+
Represents a single EEG recording, typically hosted on a remote server (like AWS S3)
|
|
43
|
+
and cached locally upon first access. This class is a subclass of
|
|
44
|
+
:class:`braindecode.datasets.BaseDataset` and can be used with braindecode's
|
|
45
|
+
preprocessing and training pipelines.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
record : dict
|
|
50
|
+
A fully resolved metadata record for the data to load.
|
|
51
|
+
cache_dir : str
|
|
52
|
+
The local directory where the data will be cached.
|
|
53
|
+
s3_bucket : str, optional
|
|
54
|
+
The S3 bucket to download data from. If not provided, defaults to the
|
|
55
|
+
OpenNeuro bucket.
|
|
56
|
+
**kwargs
|
|
57
|
+
Additional keyword arguments passed to the
|
|
58
|
+
:class:`braindecode.datasets.BaseDataset` constructor.
|
|
41
59
|
|
|
42
|
-
This is a subclass of braindecode's BaseDataset, which can consequently be used in
|
|
43
|
-
conjunction with the preprocessing and training pipelines of braindecode.
|
|
44
60
|
"""
|
|
45
61
|
|
|
46
62
|
_AWS_BUCKET = "s3://openneuro.org"
|
|
@@ -52,20 +68,6 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
52
68
|
s3_bucket: str | None = None,
|
|
53
69
|
**kwargs,
|
|
54
70
|
):
|
|
55
|
-
"""Create a new EEGDashBaseDataset instance. Users do not usually need to call this
|
|
56
|
-
directly -- instead use the EEGDashDataset class to load a collection of these
|
|
57
|
-
recordings from a local BIDS folder or using a database query.
|
|
58
|
-
|
|
59
|
-
Parameters
|
|
60
|
-
----------
|
|
61
|
-
record : dict
|
|
62
|
-
A fully resolved metadata record for the data to load.
|
|
63
|
-
cache_dir : str
|
|
64
|
-
A local directory where the data will be cached.
|
|
65
|
-
kwargs : dict
|
|
66
|
-
Additional keyword arguments to pass to the BaseDataset constructor.
|
|
67
|
-
|
|
68
|
-
"""
|
|
69
71
|
super().__init__(None, **kwargs)
|
|
70
72
|
self.record = record
|
|
71
73
|
self.cache_dir = Path(cache_dir)
|
|
@@ -121,14 +123,12 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
121
123
|
self._raw = None
|
|
122
124
|
|
|
123
125
|
def _get_raw_bids_args(self) -> dict[str, Any]:
|
|
124
|
-
"""
|
|
125
|
-
recording.
|
|
126
|
-
"""
|
|
126
|
+
"""Extract BIDS-related arguments from the metadata record."""
|
|
127
127
|
desired_fields = ["subject", "session", "task", "run"]
|
|
128
128
|
return {k: self.record[k] for k in desired_fields if self.record[k]}
|
|
129
129
|
|
|
130
130
|
def _ensure_raw(self) -> None:
|
|
131
|
-
"""
|
|
131
|
+
"""Ensure the raw data file and its dependencies are cached locally."""
|
|
132
132
|
# TO-DO: remove this once is fixed on the our side
|
|
133
133
|
# for the competition
|
|
134
134
|
if not self.s3_open_neuro:
|
|
@@ -190,42 +190,53 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
190
190
|
return len(self._raw)
|
|
191
191
|
|
|
192
192
|
@property
|
|
193
|
-
def raw(self):
|
|
194
|
-
"""
|
|
195
|
-
|
|
193
|
+
def raw(self) -> BaseRaw:
|
|
194
|
+
"""The MNE Raw object for this recording.
|
|
195
|
+
|
|
196
|
+
Accessing this property triggers the download and caching of the data
|
|
197
|
+
if it has not been accessed before.
|
|
198
|
+
|
|
199
|
+
Returns
|
|
200
|
+
-------
|
|
201
|
+
mne.io.BaseRaw
|
|
202
|
+
The loaded MNE Raw object.
|
|
203
|
+
|
|
196
204
|
"""
|
|
197
205
|
if self._raw is None:
|
|
198
206
|
self._ensure_raw()
|
|
199
207
|
return self._raw
|
|
200
208
|
|
|
201
209
|
@raw.setter
|
|
202
|
-
def raw(self, raw):
|
|
210
|
+
def raw(self, raw: BaseRaw):
|
|
203
211
|
self._raw = raw
|
|
204
212
|
|
|
205
213
|
|
|
206
214
|
class EEGDashBaseRaw(BaseRaw):
|
|
207
|
-
"""
|
|
208
|
-
|
|
215
|
+
"""MNE BaseRaw wrapper for automatic S3 data fetching.
|
|
216
|
+
|
|
217
|
+
This class extends :class:`mne.io.BaseRaw` to automatically fetch data
|
|
218
|
+
from an S3 bucket and cache it locally when data is first accessed.
|
|
219
|
+
It is intended for internal use within the EEGDash ecosystem.
|
|
209
220
|
|
|
210
221
|
Parameters
|
|
211
222
|
----------
|
|
212
|
-
input_fname :
|
|
213
|
-
|
|
223
|
+
input_fname : str
|
|
224
|
+
The path to the file on the S3 bucket (relative to the bucket root).
|
|
214
225
|
metadata : dict
|
|
215
|
-
The metadata record for the recording
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
alongside the main recording
|
|
223
|
-
verbose : str
|
|
224
|
-
|
|
226
|
+
The metadata record for the recording, containing information like
|
|
227
|
+
sampling frequency, channel names, etc.
|
|
228
|
+
preload : bool, default False
|
|
229
|
+
If True, preload the data into memory.
|
|
230
|
+
cache_dir : str, optional
|
|
231
|
+
Local directory for caching data. If None, a default directory is used.
|
|
232
|
+
bids_dependencies : list of str, default []
|
|
233
|
+
A list of BIDS metadata files to download alongside the main recording.
|
|
234
|
+
verbose : str, int, or None, default None
|
|
235
|
+
The MNE verbosity level.
|
|
225
236
|
|
|
226
237
|
See Also
|
|
227
238
|
--------
|
|
228
|
-
mne.io.Raw :
|
|
239
|
+
mne.io.Raw : The base class for Raw objects in MNE.
|
|
229
240
|
|
|
230
241
|
"""
|
|
231
242
|
|
|
@@ -241,7 +252,6 @@ class EEGDashBaseRaw(BaseRaw):
|
|
|
241
252
|
bids_dependencies: list[str] = [],
|
|
242
253
|
verbose: Any = None,
|
|
243
254
|
):
|
|
244
|
-
"""Get to work with S3 endpoint first, no caching"""
|
|
245
255
|
# Create a simple RawArray
|
|
246
256
|
sfreq = metadata["sfreq"] # Sampling frequency
|
|
247
257
|
n_times = metadata["n_times"]
|
|
@@ -277,6 +287,7 @@ class EEGDashBaseRaw(BaseRaw):
|
|
|
277
287
|
def _read_segment(
|
|
278
288
|
self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
|
|
279
289
|
):
|
|
290
|
+
"""Read a segment of data, downloading if necessary."""
|
|
280
291
|
if not os.path.exists(self.filecache): # not preload
|
|
281
292
|
if self.bids_dependencies: # this is use only to sidecars for now
|
|
282
293
|
downloader.download_dependencies(
|
|
@@ -297,22 +308,23 @@ class EEGDashBaseRaw(BaseRaw):
|
|
|
297
308
|
return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
|
|
298
309
|
|
|
299
310
|
def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
|
|
300
|
-
"""Read a chunk of data from
|
|
311
|
+
"""Read a chunk of data from a local file."""
|
|
301
312
|
_read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
|
|
302
313
|
|
|
303
314
|
|
|
304
315
|
class EEGBIDSDataset:
|
|
305
|
-
"""
|
|
316
|
+
"""An interface to a local BIDS dataset containing EEG recordings.
|
|
306
317
|
|
|
307
|
-
This
|
|
308
|
-
|
|
318
|
+
This class centralizes interactions with a BIDS dataset on the local
|
|
319
|
+
filesystem, providing methods to parse metadata, find files, and
|
|
320
|
+
retrieve BIDS-related information.
|
|
309
321
|
|
|
310
322
|
Parameters
|
|
311
323
|
----------
|
|
312
|
-
data_dir : str
|
|
324
|
+
data_dir : str or Path
|
|
313
325
|
The path to the local BIDS dataset directory.
|
|
314
326
|
dataset : str
|
|
315
|
-
A name for the dataset.
|
|
327
|
+
A name for the dataset (e.g., "ds002718").
|
|
316
328
|
|
|
317
329
|
"""
|
|
318
330
|
|
|
@@ -357,7 +369,14 @@ class EEGBIDSDataset:
|
|
|
357
369
|
assert self.check_eeg_dataset(), ValueError("Dataset is not an EEG dataset.")
|
|
358
370
|
|
|
359
371
|
def check_eeg_dataset(self) -> bool:
|
|
360
|
-
"""Check if the dataset
|
|
372
|
+
"""Check if the BIDS dataset contains EEG data.
|
|
373
|
+
|
|
374
|
+
Returns
|
|
375
|
+
-------
|
|
376
|
+
bool
|
|
377
|
+
True if the dataset's modality is EEG, False otherwise.
|
|
378
|
+
|
|
379
|
+
"""
|
|
361
380
|
return self.get_bids_file_attribute("modality", self.files[0]).lower() == "eeg"
|
|
362
381
|
|
|
363
382
|
def _get_recordings(self, layout: BIDSLayout) -> list[str]:
|
|
@@ -370,14 +389,12 @@ class EEGBIDSDataset:
|
|
|
370
389
|
return files
|
|
371
390
|
|
|
372
391
|
def _get_relative_bidspath(self, filename: str) -> str:
|
|
373
|
-
"""Make
|
|
392
|
+
"""Make a file path relative to the BIDS parent directory."""
|
|
374
393
|
bids_parent_dir = self.bidsdir.parent.absolute()
|
|
375
394
|
return str(Path(filename).relative_to(bids_parent_dir))
|
|
376
395
|
|
|
377
396
|
def _get_property_from_filename(self, property: str, filename: str) -> str:
|
|
378
|
-
"""Parse a
|
|
379
|
-
if not found.
|
|
380
|
-
"""
|
|
397
|
+
"""Parse a BIDS entity from a filename."""
|
|
381
398
|
import platform
|
|
382
399
|
|
|
383
400
|
if platform.system() == "Windows":
|
|
@@ -387,159 +404,106 @@ class EEGBIDSDataset:
|
|
|
387
404
|
return lookup.group(1) if lookup else ""
|
|
388
405
|
|
|
389
406
|
def _merge_json_inheritance(self, json_files: list[str | Path]) -> dict:
|
|
390
|
-
"""
|
|
391
|
-
expecting the order (from left to right) is from lowest
|
|
392
|
-
level to highest level, and return a merged dictionary
|
|
393
|
-
"""
|
|
407
|
+
"""Merge a list of JSON files according to BIDS inheritance."""
|
|
394
408
|
json_files.reverse()
|
|
395
409
|
json_dict = {}
|
|
396
410
|
for f in json_files:
|
|
397
|
-
|
|
411
|
+
with open(f) as fp:
|
|
412
|
+
json_dict.update(json.load(fp))
|
|
398
413
|
return json_dict
|
|
399
414
|
|
|
400
415
|
def _get_bids_file_inheritance(
|
|
401
416
|
self, path: str | Path, basename: str, extension: str
|
|
402
417
|
) -> list[Path]:
|
|
403
|
-
"""
|
|
404
|
-
and that end with the specified suffix, recursively searching parent directories
|
|
405
|
-
(following the BIDS inheritance principle in the order of lowest level first).
|
|
406
|
-
|
|
407
|
-
Parameters
|
|
408
|
-
----------
|
|
409
|
-
path : str | Path
|
|
410
|
-
The directory path to search for files.
|
|
411
|
-
basename : str
|
|
412
|
-
BIDS file basename without _eeg.set extension for example
|
|
413
|
-
extension : str
|
|
414
|
-
Only consider files that end with the specified suffix; e.g. channels.tsv
|
|
415
|
-
|
|
416
|
-
Returns
|
|
417
|
-
-------
|
|
418
|
-
list[Path]
|
|
419
|
-
A list of file paths that match the given basename and extension.
|
|
420
|
-
|
|
421
|
-
"""
|
|
418
|
+
"""Find all applicable metadata files using BIDS inheritance."""
|
|
422
419
|
top_level_files = ["README", "dataset_description.json", "participants.tsv"]
|
|
423
420
|
bids_files = []
|
|
424
421
|
|
|
425
|
-
# check if path is str object
|
|
426
422
|
if isinstance(path, str):
|
|
427
423
|
path = Path(path)
|
|
428
|
-
if not path.exists:
|
|
429
|
-
raise ValueError("path {path} does not exist")
|
|
424
|
+
if not path.exists():
|
|
425
|
+
raise ValueError(f"path {path} does not exist")
|
|
430
426
|
|
|
431
|
-
# check if file is in current path
|
|
432
427
|
for file in os.listdir(path):
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
# check if file basename has extension
|
|
437
|
-
if file.endswith(extension):
|
|
438
|
-
filepath = path / file
|
|
439
|
-
bids_files.append(filepath)
|
|
440
|
-
|
|
441
|
-
# check if file is in top level directory
|
|
428
|
+
if os.path.isfile(path / file) and file.endswith(extension):
|
|
429
|
+
bids_files.append(path / file)
|
|
430
|
+
|
|
442
431
|
if any(file in os.listdir(path) for file in top_level_files):
|
|
443
432
|
return bids_files
|
|
444
433
|
else:
|
|
445
|
-
# call get_bids_file_inheritance recursively with parent directory
|
|
446
434
|
bids_files.extend(
|
|
447
435
|
self._get_bids_file_inheritance(path.parent, basename, extension)
|
|
448
436
|
)
|
|
449
437
|
return bids_files
|
|
450
438
|
|
|
451
439
|
def get_bids_metadata_files(
|
|
452
|
-
self, filepath: str | Path, metadata_file_extension:
|
|
440
|
+
self, filepath: str | Path, metadata_file_extension: str
|
|
453
441
|
) -> list[Path]:
|
|
454
|
-
"""Retrieve all metadata
|
|
455
|
-
|
|
442
|
+
"""Retrieve all metadata files that apply to a given data file.
|
|
443
|
+
|
|
444
|
+
Follows the BIDS inheritance principle to find all relevant metadata
|
|
445
|
+
files (e.g., ``channels.tsv``, ``eeg.json``) for a specific recording.
|
|
456
446
|
|
|
457
447
|
Parameters
|
|
458
448
|
----------
|
|
459
|
-
filepath: str
|
|
460
|
-
The
|
|
449
|
+
filepath : str or Path
|
|
450
|
+
The path to the data file.
|
|
461
451
|
metadata_file_extension : str
|
|
462
|
-
|
|
463
|
-
e.g., channels.tsv or eeg.json
|
|
452
|
+
The extension of the metadata file to search for (e.g., "channels.tsv").
|
|
464
453
|
|
|
465
454
|
Returns
|
|
466
455
|
-------
|
|
467
|
-
list
|
|
468
|
-
A list of
|
|
456
|
+
list of Path
|
|
457
|
+
A list of paths to the matching metadata files.
|
|
469
458
|
|
|
470
459
|
"""
|
|
471
460
|
if isinstance(filepath, str):
|
|
472
461
|
filepath = Path(filepath)
|
|
473
|
-
if not filepath.exists:
|
|
474
|
-
raise ValueError("filepath {filepath} does not exist")
|
|
462
|
+
if not filepath.exists():
|
|
463
|
+
raise ValueError(f"filepath {filepath} does not exist")
|
|
475
464
|
path, filename = os.path.split(filepath)
|
|
476
465
|
basename = filename[: filename.rfind("_")]
|
|
477
|
-
# metadata files
|
|
478
466
|
meta_files = self._get_bids_file_inheritance(
|
|
479
467
|
path, basename, metadata_file_extension
|
|
480
468
|
)
|
|
481
469
|
return meta_files
|
|
482
470
|
|
|
483
471
|
def _scan_directory(self, directory: str, extension: str) -> list[Path]:
|
|
484
|
-
"""
|
|
485
|
-
directory. Ignores certain special directories like .git, .datalad, derivatives,
|
|
486
|
-
and code.
|
|
487
|
-
"""
|
|
472
|
+
"""Scan a directory for files with a given extension."""
|
|
488
473
|
result_files = []
|
|
489
474
|
directory_to_ignore = [".git", ".datalad", "derivatives", "code"]
|
|
490
475
|
with os.scandir(directory) as entries:
|
|
491
476
|
for entry in entries:
|
|
492
477
|
if entry.is_file() and entry.name.endswith(extension):
|
|
493
|
-
result_files.append(entry.path)
|
|
494
|
-
elif entry.is_dir()
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
478
|
+
result_files.append(Path(entry.path))
|
|
479
|
+
elif entry.is_dir() and not any(
|
|
480
|
+
name in entry.name for name in directory_to_ignore
|
|
481
|
+
):
|
|
482
|
+
result_files.append(Path(entry.path))
|
|
498
483
|
return result_files
|
|
499
484
|
|
|
500
485
|
def _get_files_with_extension_parallel(
|
|
501
486
|
self, directory: str, extension: str = ".set", max_workers: int = -1
|
|
502
487
|
) -> list[Path]:
|
|
503
|
-
"""
|
|
504
|
-
the given extension.
|
|
505
|
-
|
|
506
|
-
Parameters
|
|
507
|
-
----------
|
|
508
|
-
directory : str
|
|
509
|
-
The root directory to scan for files.
|
|
510
|
-
extension : str
|
|
511
|
-
Only consider files that end with this suffix, e.g. '.set'.
|
|
512
|
-
max_workers : int
|
|
513
|
-
Optionally specify the maximum number of worker threads to use for parallel scanning.
|
|
514
|
-
Defaults to all available CPU cores if set to -1.
|
|
515
|
-
|
|
516
|
-
Returns
|
|
517
|
-
-------
|
|
518
|
-
list[Path]:
|
|
519
|
-
A list of filepaths for all matching metadata files
|
|
520
|
-
|
|
521
|
-
"""
|
|
488
|
+
"""Scan a directory tree in parallel for files with a given extension."""
|
|
522
489
|
result_files = []
|
|
523
490
|
dirs_to_scan = [directory]
|
|
524
491
|
|
|
525
|
-
# Use joblib.Parallel and delayed to parallelize directory scanning
|
|
526
492
|
while dirs_to_scan:
|
|
527
493
|
logger.info(
|
|
528
494
|
f"Directories to scan: {len(dirs_to_scan)}, files: {dirs_to_scan}"
|
|
529
495
|
)
|
|
530
|
-
# Run the scan_directory function in parallel across directories
|
|
531
496
|
results = Parallel(n_jobs=max_workers, prefer="threads", verbose=1)(
|
|
532
497
|
delayed(self._scan_directory)(d, extension) for d in dirs_to_scan
|
|
533
498
|
)
|
|
534
499
|
|
|
535
|
-
# Reset the directories to scan and process the results
|
|
536
500
|
dirs_to_scan = []
|
|
537
501
|
for res in results:
|
|
538
502
|
for path in res:
|
|
539
503
|
if os.path.isdir(path):
|
|
540
|
-
dirs_to_scan.append(path)
|
|
504
|
+
dirs_to_scan.append(path)
|
|
541
505
|
else:
|
|
542
|
-
result_files.append(path)
|
|
506
|
+
result_files.append(path)
|
|
543
507
|
logger.info(f"Found {len(result_files)} files.")
|
|
544
508
|
|
|
545
509
|
return result_files
|
|
@@ -547,19 +511,29 @@ class EEGBIDSDataset:
|
|
|
547
511
|
def load_and_preprocess_raw(
|
|
548
512
|
self, raw_file: str, preprocess: bool = False
|
|
549
513
|
) -> np.ndarray:
|
|
550
|
-
"""
|
|
551
|
-
|
|
552
|
-
|
|
514
|
+
"""Load and optionally preprocess a raw data file.
|
|
515
|
+
|
|
516
|
+
This is a utility function for testing or debugging, not for general use.
|
|
517
|
+
|
|
518
|
+
Parameters
|
|
519
|
+
----------
|
|
520
|
+
raw_file : str
|
|
521
|
+
Path to the raw EEGLAB file (.set).
|
|
522
|
+
preprocess : bool, default False
|
|
523
|
+
If True, apply a high-pass filter, notch filter, and resample the data.
|
|
524
|
+
|
|
525
|
+
Returns
|
|
526
|
+
-------
|
|
527
|
+
numpy.ndarray
|
|
528
|
+
The loaded and processed data as a NumPy array.
|
|
529
|
+
|
|
553
530
|
"""
|
|
554
531
|
logger.info(f"Loading raw data from {raw_file}")
|
|
555
532
|
EEG = mne.io.read_raw_eeglab(raw_file, preload=True, verbose="error")
|
|
556
533
|
|
|
557
534
|
if preprocess:
|
|
558
|
-
# highpass filter
|
|
559
535
|
EEG = EEG.filter(l_freq=0.25, h_freq=25, verbose=False)
|
|
560
|
-
# remove 60Hz line noise
|
|
561
536
|
EEG = EEG.notch_filter(freqs=(60), verbose=False)
|
|
562
|
-
# bring to common sampling rate
|
|
563
537
|
sfreq = 128
|
|
564
538
|
if EEG.info["sfreq"] != sfreq:
|
|
565
539
|
EEG = EEG.resample(sfreq)
|
|
@@ -570,26 +544,35 @@ class EEGBIDSDataset:
|
|
|
570
544
|
raise ValueError("Expect raw data to be CxT dimension")
|
|
571
545
|
return mat_data
|
|
572
546
|
|
|
573
|
-
def get_files(self) -> list[
|
|
574
|
-
"""Get all EEG recording file paths
|
|
547
|
+
def get_files(self) -> list[str]:
|
|
548
|
+
"""Get all EEG recording file paths in the BIDS dataset.
|
|
549
|
+
|
|
550
|
+
Returns
|
|
551
|
+
-------
|
|
552
|
+
list of str
|
|
553
|
+
A list of file paths for all valid EEG recordings.
|
|
554
|
+
|
|
555
|
+
"""
|
|
575
556
|
return self.files
|
|
576
557
|
|
|
577
558
|
def resolve_bids_json(self, json_files: list[str]) -> dict:
|
|
578
|
-
"""Resolve
|
|
559
|
+
"""Resolve BIDS JSON inheritance and merge files.
|
|
579
560
|
|
|
580
561
|
Parameters
|
|
581
562
|
----------
|
|
582
|
-
json_files : list
|
|
583
|
-
A list of JSON file paths
|
|
563
|
+
json_files : list of str
|
|
564
|
+
A list of JSON file paths, ordered from the lowest (most specific)
|
|
565
|
+
to highest level of the BIDS hierarchy.
|
|
584
566
|
|
|
585
567
|
Returns
|
|
586
568
|
-------
|
|
587
|
-
|
|
569
|
+
dict
|
|
570
|
+
A dictionary containing the merged JSON data.
|
|
588
571
|
|
|
589
572
|
"""
|
|
590
|
-
if
|
|
573
|
+
if not json_files:
|
|
591
574
|
raise ValueError("No JSON files provided")
|
|
592
|
-
json_files.reverse()
|
|
575
|
+
json_files.reverse()
|
|
593
576
|
|
|
594
577
|
json_dict = {}
|
|
595
578
|
for json_file in json_files:
|
|
@@ -598,8 +581,20 @@ class EEGBIDSDataset:
|
|
|
598
581
|
return json_dict
|
|
599
582
|
|
|
600
583
|
def get_bids_file_attribute(self, attribute: str, data_filepath: str) -> Any:
|
|
601
|
-
"""Retrieve a specific attribute from
|
|
602
|
-
|
|
584
|
+
"""Retrieve a specific attribute from BIDS metadata.
|
|
585
|
+
|
|
586
|
+
Parameters
|
|
587
|
+
----------
|
|
588
|
+
attribute : str
|
|
589
|
+
The name of the attribute to retrieve (e.g., "sfreq", "subject").
|
|
590
|
+
data_filepath : str
|
|
591
|
+
The path to the data file.
|
|
592
|
+
|
|
593
|
+
Returns
|
|
594
|
+
-------
|
|
595
|
+
Any
|
|
596
|
+
The value of the requested attribute, or None if not found.
|
|
597
|
+
|
|
603
598
|
"""
|
|
604
599
|
entities = self.layout.parse_file_entities(data_filepath)
|
|
605
600
|
bidsfile = self.layout.get(**entities)[0]
|
|
@@ -618,21 +613,59 @@ class EEGBIDSDataset:
|
|
|
618
613
|
return attribute_value
|
|
619
614
|
|
|
620
615
|
def channel_labels(self, data_filepath: str) -> list[str]:
|
|
621
|
-
"""Get a list of channel labels
|
|
616
|
+
"""Get a list of channel labels from channels.tsv.
|
|
617
|
+
|
|
618
|
+
Parameters
|
|
619
|
+
----------
|
|
620
|
+
data_filepath : str
|
|
621
|
+
The path to the data file.
|
|
622
|
+
|
|
623
|
+
Returns
|
|
624
|
+
-------
|
|
625
|
+
list of str
|
|
626
|
+
A list of channel names.
|
|
627
|
+
|
|
628
|
+
"""
|
|
622
629
|
channels_tsv = pd.read_csv(
|
|
623
630
|
self.get_bids_metadata_files(data_filepath, "channels.tsv")[0], sep="\t"
|
|
624
631
|
)
|
|
625
632
|
return channels_tsv["name"].tolist()
|
|
626
633
|
|
|
627
634
|
def channel_types(self, data_filepath: str) -> list[str]:
|
|
628
|
-
"""Get a list of channel types
|
|
635
|
+
"""Get a list of channel types from channels.tsv.
|
|
636
|
+
|
|
637
|
+
Parameters
|
|
638
|
+
----------
|
|
639
|
+
data_filepath : str
|
|
640
|
+
The path to the data file.
|
|
641
|
+
|
|
642
|
+
Returns
|
|
643
|
+
-------
|
|
644
|
+
list of str
|
|
645
|
+
A list of channel types.
|
|
646
|
+
|
|
647
|
+
"""
|
|
629
648
|
channels_tsv = pd.read_csv(
|
|
630
649
|
self.get_bids_metadata_files(data_filepath, "channels.tsv")[0], sep="\t"
|
|
631
650
|
)
|
|
632
651
|
return channels_tsv["type"].tolist()
|
|
633
652
|
|
|
634
653
|
def num_times(self, data_filepath: str) -> int:
|
|
635
|
-
"""Get the
|
|
654
|
+
"""Get the number of time points in the recording.
|
|
655
|
+
|
|
656
|
+
Calculated from ``SamplingFrequency`` and ``RecordingDuration`` in eeg.json.
|
|
657
|
+
|
|
658
|
+
Parameters
|
|
659
|
+
----------
|
|
660
|
+
data_filepath : str
|
|
661
|
+
The path to the data file.
|
|
662
|
+
|
|
663
|
+
Returns
|
|
664
|
+
-------
|
|
665
|
+
int
|
|
666
|
+
The approximate number of time points.
|
|
667
|
+
|
|
668
|
+
"""
|
|
636
669
|
eeg_jsons = self.get_bids_metadata_files(data_filepath, "eeg.json")
|
|
637
670
|
eeg_json_dict = self._merge_json_inheritance(eeg_jsons)
|
|
638
671
|
return int(
|
|
@@ -640,38 +673,71 @@ class EEGBIDSDataset:
|
|
|
640
673
|
)
|
|
641
674
|
|
|
642
675
|
def subject_participant_tsv(self, data_filepath: str) -> dict[str, Any]:
|
|
643
|
-
"""Get
|
|
644
|
-
|
|
676
|
+
"""Get the participants.tsv record for a subject.
|
|
677
|
+
|
|
678
|
+
Parameters
|
|
679
|
+
----------
|
|
680
|
+
data_filepath : str
|
|
681
|
+
The path to a data file belonging to the subject.
|
|
682
|
+
|
|
683
|
+
Returns
|
|
684
|
+
-------
|
|
685
|
+
dict
|
|
686
|
+
A dictionary of the subject's information from participants.tsv.
|
|
687
|
+
|
|
645
688
|
"""
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
)
|
|
649
|
-
|
|
689
|
+
participants_tsv_path = self.get_bids_metadata_files(
|
|
690
|
+
data_filepath, "participants.tsv"
|
|
691
|
+
)[0]
|
|
692
|
+
participants_tsv = pd.read_csv(participants_tsv_path, sep="\t")
|
|
650
693
|
if participants_tsv.empty:
|
|
651
694
|
return {}
|
|
652
|
-
# set 'participant_id' as index
|
|
653
695
|
participants_tsv.set_index("participant_id", inplace=True)
|
|
654
696
|
subject = f"sub-{self.get_bids_file_attribute('subject', data_filepath)}"
|
|
655
697
|
return participants_tsv.loc[subject].to_dict()
|
|
656
698
|
|
|
657
699
|
def eeg_json(self, data_filepath: str) -> dict[str, Any]:
|
|
658
|
-
"""Get
|
|
700
|
+
"""Get the merged eeg.json metadata for a data file.
|
|
701
|
+
|
|
702
|
+
Parameters
|
|
703
|
+
----------
|
|
704
|
+
data_filepath : str
|
|
705
|
+
The path to the data file.
|
|
706
|
+
|
|
707
|
+
Returns
|
|
708
|
+
-------
|
|
709
|
+
dict
|
|
710
|
+
The merged eeg.json metadata.
|
|
711
|
+
|
|
712
|
+
"""
|
|
659
713
|
eeg_jsons = self.get_bids_metadata_files(data_filepath, "eeg.json")
|
|
660
|
-
|
|
661
|
-
return eeg_json_dict
|
|
714
|
+
return self._merge_json_inheritance(eeg_jsons)
|
|
662
715
|
|
|
663
716
|
def channel_tsv(self, data_filepath: str) -> dict[str, Any]:
|
|
664
|
-
"""Get
|
|
665
|
-
|
|
717
|
+
"""Get the channels.tsv metadata as a dictionary.
|
|
718
|
+
|
|
719
|
+
Parameters
|
|
720
|
+
----------
|
|
721
|
+
data_filepath : str
|
|
722
|
+
The path to the data file.
|
|
723
|
+
|
|
724
|
+
Returns
|
|
725
|
+
-------
|
|
726
|
+
dict
|
|
727
|
+
The channels.tsv data, with columns as keys.
|
|
728
|
+
|
|
666
729
|
"""
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
730
|
+
channels_tsv_path = self.get_bids_metadata_files(data_filepath, "channels.tsv")[
|
|
731
|
+
0
|
|
732
|
+
]
|
|
733
|
+
channels_tsv = pd.read_csv(channels_tsv_path, sep="\t")
|
|
734
|
+
channel_tsv_dict = channels_tsv.to_dict()
|
|
672
735
|
for list_field in ["name", "type", "units"]:
|
|
673
|
-
|
|
674
|
-
|
|
736
|
+
if list_field in channel_tsv_dict:
|
|
737
|
+
channel_tsv_dict[list_field] = list(
|
|
738
|
+
channel_tsv_dict[list_field].values()
|
|
739
|
+
)
|
|
740
|
+
return channel_tsv_dict
|
|
675
741
|
|
|
676
742
|
|
|
677
743
|
__all__ = ["EEGDashBaseDataset", "EEGBIDSDataset", "EEGDashBaseRaw"]
|