eegdash 0.4.0.dev173498563__py3-none-any.whl → 0.4.1.dev185__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

eegdash/data_utils.py DELETED
@@ -1,677 +0,0 @@
1
- # Authors: The EEGDash contributors.
2
- # License: GNU General Public License
3
- # Copyright the EEGDash contributors.
4
-
5
- """Data utilities and dataset classes for EEG data handling.
6
-
7
- This module provides core dataset classes for working with EEG data in the EEGDash ecosystem,
8
- including classes for individual recordings and collections of datasets. It integrates with
9
- braindecode for machine learning workflows and handles data loading from both local and remote sources.
10
- """
11
-
12
- import io
13
- import json
14
- import os
15
- import re
16
- import traceback
17
- from contextlib import redirect_stderr
18
- from pathlib import Path
19
- from typing import Any
20
-
21
- import mne
22
- import mne_bids
23
- import numpy as np
24
- import pandas as pd
25
- from bids import BIDSLayout
26
- from joblib import Parallel, delayed
27
- from mne._fiff.utils import _read_segments_file
28
- from mne.io import BaseRaw
29
- from mne_bids import BIDSPath
30
-
31
- from braindecode.datasets import BaseDataset
32
-
33
- from . import downloader
34
- from .bids_eeg_metadata import enrich_from_participants
35
- from .logging import logger
36
- from .paths import get_default_cache_dir
37
-
38
-
39
- class EEGDashBaseDataset(BaseDataset):
40
- """A single EEG recording hosted on AWS S3 and cached locally upon first access.
41
-
42
- This is a subclass of braindecode's BaseDataset, which can consequently be used in
43
- conjunction with the preprocessing and training pipelines of braindecode.
44
- """
45
-
46
- _AWS_BUCKET = "s3://openneuro.org"
47
-
48
- def __init__(
49
- self,
50
- record: dict[str, Any],
51
- cache_dir: str,
52
- s3_bucket: str | None = None,
53
- **kwargs,
54
- ):
55
- """Create a new EEGDashBaseDataset instance. Users do not usually need to call this
56
- directly -- instead use the EEGDashDataset class to load a collection of these
57
- recordings from a local BIDS folder or using a database query.
58
-
59
- Parameters
60
- ----------
61
- record : dict
62
- A fully resolved metadata record for the data to load.
63
- cache_dir : str
64
- A local directory where the data will be cached.
65
- kwargs : dict
66
- Additional keyword arguments to pass to the BaseDataset constructor.
67
-
68
- """
69
- super().__init__(None, **kwargs)
70
- self.record = record
71
- self.cache_dir = Path(cache_dir)
72
- self.bids_kwargs = self._get_raw_bids_args()
73
-
74
- if s3_bucket:
75
- self.s3_bucket = s3_bucket
76
- self.s3_open_neuro = False
77
- else:
78
- self.s3_bucket = self._AWS_BUCKET
79
- self.s3_open_neuro = True
80
-
81
- # Compute a dataset folder name under cache_dir that encodes preprocessing
82
- # (e.g., bdf, mini) to avoid overlapping with the original dataset cache.
83
- self.dataset_folder = record.get("dataset", "")
84
- # TODO: remove this hack when competition is over
85
- if s3_bucket:
86
- suffixes: list[str] = []
87
- bucket_lower = str(s3_bucket).lower()
88
- if "bdf" in bucket_lower:
89
- suffixes.append("bdf")
90
- if "mini" in bucket_lower:
91
- suffixes.append("mini")
92
- if suffixes:
93
- self.dataset_folder = f"{self.dataset_folder}-{'-'.join(suffixes)}"
94
-
95
- # Place files under the dataset-specific folder (with suffix if any)
96
- rel = Path(record["bidspath"]) # usually starts with dataset id
97
- if rel.parts and rel.parts[0] == record.get("dataset"):
98
- rel = Path(self.dataset_folder, *rel.parts[1:])
99
- else:
100
- rel = Path(self.dataset_folder) / rel
101
- self.filecache = self.cache_dir / rel
102
- self.bids_root = self.cache_dir / self.dataset_folder
103
-
104
- self.bidspath = BIDSPath(
105
- root=self.bids_root,
106
- datatype="eeg",
107
- suffix="eeg",
108
- **self.bids_kwargs,
109
- )
110
-
111
- self.s3file = downloader.get_s3path(self.s3_bucket, record["bidspath"])
112
- self.bids_dependencies = record["bidsdependencies"]
113
- self.bids_dependencies_original = record["bidsdependencies"]
114
- # TODO: removing temporary fix for BIDS dependencies path
115
- # when the competition is over and dataset is digested properly
116
- if not self.s3_open_neuro:
117
- self.bids_dependencies = [
118
- dep.split("/", 1)[1] for dep in self.bids_dependencies
119
- ]
120
-
121
- self._raw = None
122
-
123
- def _get_raw_bids_args(self) -> dict[str, Any]:
124
- """Helper to restrict the metadata record to the fields needed to locate a BIDS
125
- recording.
126
- """
127
- desired_fields = ["subject", "session", "task", "run"]
128
- return {k: self.record[k] for k in desired_fields if self.record[k]}
129
-
130
- def _ensure_raw(self) -> None:
131
- """Download the S3 file and BIDS dependencies if not already cached."""
132
- # TO-DO: remove this once is fixed on the our side
133
- # for the competition
134
- if not self.s3_open_neuro:
135
- self.bidspath = self.bidspath.update(extension=".bdf")
136
- self.filecache = self.filecache.with_suffix(".bdf")
137
-
138
- if not os.path.exists(self.filecache): # not preload
139
- if self.bids_dependencies:
140
- downloader.download_dependencies(
141
- s3_bucket=self.s3_bucket,
142
- bids_dependencies=self.bids_dependencies,
143
- bids_dependencies_original=self.bids_dependencies_original,
144
- cache_dir=self.cache_dir,
145
- dataset_folder=self.dataset_folder,
146
- record=self.record,
147
- s3_open_neuro=self.s3_open_neuro,
148
- )
149
- self.filecache = downloader.download_s3_file(
150
- self.s3file, self.filecache, self.s3_open_neuro
151
- )
152
- self.filenames = [self.filecache]
153
- if self._raw is None:
154
- try:
155
- # mne-bids can emit noisy warnings to stderr; keep user logs clean
156
- _stderr_buffer = io.StringIO()
157
- with redirect_stderr(_stderr_buffer):
158
- self._raw = mne_bids.read_raw_bids(
159
- bids_path=self.bidspath, verbose="ERROR"
160
- )
161
- # Enrich Raw.info and description with participants.tsv extras
162
- enrich_from_participants(
163
- self.bids_root, self.bidspath, self._raw, self.description
164
- )
165
-
166
- except Exception as e:
167
- logger.error(
168
- f"Error while reading BIDS file: {self.bidspath}\n"
169
- "This may be due to a missing or corrupted file.\n"
170
- "Please check the file and try again.\n"
171
- "Usually erasing the local cache and re-downloading helps.\n"
172
- f"`rm {self.bidspath}`"
173
- )
174
- logger.error(f"Exception: {e}")
175
- logger.error(traceback.format_exc())
176
- raise e
177
-
178
- def __len__(self) -> int:
179
- """Return the number of samples in the dataset."""
180
- if self._raw is None:
181
- if (
182
- self.record["ntimes"] is None
183
- or self.record["sampling_frequency"] is None
184
- ):
185
- self._ensure_raw()
186
- else:
187
- # FIXME: this is a bit strange and should definitely not change as a side effect
188
- # of accessing the data (which it will, since ntimes is the actual length but rounded down)
189
- return int(self.record["ntimes"] * self.record["sampling_frequency"])
190
- return len(self._raw)
191
-
192
- @property
193
- def raw(self):
194
- """Return the MNE Raw object for this recording. This will perform the actual
195
- retrieval if not yet done so.
196
- """
197
- if self._raw is None:
198
- self._ensure_raw()
199
- return self._raw
200
-
201
- @raw.setter
202
- def raw(self, raw):
203
- self._raw = raw
204
-
205
-
206
- class EEGDashBaseRaw(BaseRaw):
207
- """Wrapper around the MNE BaseRaw class that automatically fetches the data from S3
208
- (when _read_segment is called) and caches it locally. Currently for internal use.
209
-
210
- Parameters
211
- ----------
212
- input_fname : path-like
213
- Path to the S3 file
214
- metadata : dict
215
- The metadata record for the recording (e.g., from the database).
216
- preload : bool
217
- Whether to pre-loaded the data before the first access.
218
- cache_dir : str
219
- Local path under which the data will be cached.
220
- bids_dependencies : list
221
- List of additional BIDS metadata files that should be downloaded and cached
222
- alongside the main recording file.
223
- verbose : str | int | None
224
- Optionally the verbosity level for MNE logging (see MNE documentation for possible values).
225
-
226
- See Also
227
- --------
228
- mne.io.Raw : Documentation of attributes and methods.
229
-
230
- """
231
-
232
- _AWS_BUCKET = "s3://openneuro.org"
233
-
234
- def __init__(
235
- self,
236
- input_fname: str,
237
- metadata: dict[str, Any],
238
- preload: bool = False,
239
- *,
240
- cache_dir: str | None = None,
241
- bids_dependencies: list[str] = [],
242
- verbose: Any = None,
243
- ):
244
- """Get to work with S3 endpoint first, no caching"""
245
- # Create a simple RawArray
246
- sfreq = metadata["sfreq"] # Sampling frequency
247
- n_times = metadata["n_times"]
248
- ch_names = metadata["ch_names"]
249
- ch_types = []
250
- for ch in metadata["ch_types"]:
251
- chtype = ch.lower()
252
- if chtype == "heog" or chtype == "veog":
253
- chtype = "eog"
254
- ch_types.append(chtype)
255
- info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
256
-
257
- self.s3file = downloader.get_s3path(self._AWS_BUCKET, input_fname)
258
- self.cache_dir = Path(cache_dir) if cache_dir else get_default_cache_dir()
259
- self.filecache = self.cache_dir / input_fname
260
- self.bids_dependencies = bids_dependencies
261
-
262
- if preload and not os.path.exists(self.filecache):
263
- self.filecache = downloader.download_s3_file(
264
- self.s3file, self.filecache, self.s3_open_neuro
265
- )
266
- self.filenames = [self.filecache]
267
- preload = self.filecache
268
-
269
- super().__init__(
270
- info,
271
- preload,
272
- last_samps=[n_times - 1],
273
- orig_format="single",
274
- verbose=verbose,
275
- )
276
-
277
- def _read_segment(
278
- self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
279
- ):
280
- if not os.path.exists(self.filecache): # not preload
281
- if self.bids_dependencies: # this is use only to sidecars for now
282
- downloader.download_dependencies(
283
- s3_bucket=self._AWS_BUCKET,
284
- bids_dependencies=self.bids_dependencies,
285
- bids_dependencies_original=None,
286
- cache_dir=self.cache_dir,
287
- dataset_folder=self.filecache,
288
- record={},
289
- s3_open_neuro=self.s3_open_neuro,
290
- )
291
- self.filecache = downloader.download_s3_file(
292
- self.s3file, self.filecache, self.s3_open_neuro
293
- )
294
- self.filenames = [self.filecache]
295
- else: # not preload and file is not cached
296
- self.filenames = [self.filecache]
297
- return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
298
-
299
- def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
300
- """Read a chunk of data from the file."""
301
- _read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
302
-
303
-
304
- class EEGBIDSDataset:
305
- """A one-stop shop interface to a local BIDS dataset containing EEG recordings.
306
-
307
- This is mainly tailored to the needs of EEGDash application and is used to centralize
308
- interactions with the BIDS dataset, such as parsing the metadata.
309
-
310
- Parameters
311
- ----------
312
- data_dir : str | Path
313
- The path to the local BIDS dataset directory.
314
- dataset : str
315
- A name for the dataset.
316
-
317
- """
318
-
319
- ALLOWED_FILE_FORMAT = ["eeglab", "brainvision", "biosemi", "european"]
320
- RAW_EXTENSIONS = {
321
- ".set": [".set", ".fdt"], # eeglab
322
- ".edf": [".edf"], # european
323
- ".vhdr": [".eeg", ".vhdr", ".vmrk", ".dat", ".raw"], # brainvision
324
- ".bdf": [".bdf"], # biosemi
325
- }
326
- METADATA_FILE_EXTENSIONS = [
327
- "eeg.json",
328
- "channels.tsv",
329
- "electrodes.tsv",
330
- "events.tsv",
331
- "events.json",
332
- ]
333
-
334
- def __init__(
335
- self,
336
- data_dir=None, # location of bids dataset
337
- dataset="", # dataset name
338
- ):
339
- if data_dir is None or not os.path.exists(data_dir):
340
- raise ValueError("data_dir must be specified and must exist")
341
- self.bidsdir = Path(data_dir)
342
- self.dataset = dataset
343
- # Accept exact dataset folder or a variant with informative suffixes
344
- # (e.g., dsXXXXX-bdf, dsXXXXX-bdf-mini) to avoid collisions.
345
- dir_name = self.bidsdir.name
346
- if not (dir_name == self.dataset or dir_name.startswith(self.dataset + "-")):
347
- raise AssertionError(
348
- f"BIDS directory '{dir_name}' does not correspond to dataset '{self.dataset}'"
349
- )
350
- self.layout = BIDSLayout(data_dir)
351
-
352
- # get all recording files in the bids directory
353
- self.files = self._get_recordings(self.layout)
354
- assert len(self.files) > 0, ValueError(
355
- "Unable to construct EEG dataset. No EEG recordings found."
356
- )
357
- assert self.check_eeg_dataset(), ValueError("Dataset is not an EEG dataset.")
358
-
359
- def check_eeg_dataset(self) -> bool:
360
- """Check if the dataset is EEG."""
361
- return self.get_bids_file_attribute("modality", self.files[0]).lower() == "eeg"
362
-
363
- def _get_recordings(self, layout: BIDSLayout) -> list[str]:
364
- """Get a list of all EEG recording files in the BIDS layout."""
365
- files = []
366
- for ext, exts in self.RAW_EXTENSIONS.items():
367
- files = layout.get(extension=ext, return_type="filename")
368
- if files:
369
- break
370
- return files
371
-
372
- def _get_relative_bidspath(self, filename: str) -> str:
373
- """Make the given file path relative to the BIDS directory."""
374
- bids_parent_dir = self.bidsdir.parent.absolute()
375
- return str(Path(filename).relative_to(bids_parent_dir))
376
-
377
- def _get_property_from_filename(self, property: str, filename: str) -> str:
378
- """Parse a property out of a BIDS-compliant filename. Returns an empty string
379
- if not found.
380
- """
381
- import platform
382
-
383
- if platform.system() == "Windows":
384
- lookup = re.search(rf"{property}-(.*?)[_\\]", filename)
385
- else:
386
- lookup = re.search(rf"{property}-(.*?)[_\/]", filename)
387
- return lookup.group(1) if lookup else ""
388
-
389
- def _merge_json_inheritance(self, json_files: list[str | Path]) -> dict:
390
- """Internal helper to merge list of json files found by get_bids_file_inheritance,
391
- expecting the order (from left to right) is from lowest
392
- level to highest level, and return a merged dictionary
393
- """
394
- json_files.reverse()
395
- json_dict = {}
396
- for f in json_files:
397
- json_dict.update(json.load(open(f))) # FIXME: should close file
398
- return json_dict
399
-
400
- def _get_bids_file_inheritance(
401
- self, path: str | Path, basename: str, extension: str
402
- ) -> list[Path]:
403
- """Get all file paths that apply to the basename file in the specified directory
404
- and that end with the specified suffix, recursively searching parent directories
405
- (following the BIDS inheritance principle in the order of lowest level first).
406
-
407
- Parameters
408
- ----------
409
- path : str | Path
410
- The directory path to search for files.
411
- basename : str
412
- BIDS file basename without _eeg.set extension for example
413
- extension : str
414
- Only consider files that end with the specified suffix; e.g. channels.tsv
415
-
416
- Returns
417
- -------
418
- list[Path]
419
- A list of file paths that match the given basename and extension.
420
-
421
- """
422
- top_level_files = ["README", "dataset_description.json", "participants.tsv"]
423
- bids_files = []
424
-
425
- # check if path is str object
426
- if isinstance(path, str):
427
- path = Path(path)
428
- if not path.exists:
429
- raise ValueError("path {path} does not exist")
430
-
431
- # check if file is in current path
432
- for file in os.listdir(path):
433
- # target_file = path / f"{cur_file_basename}_{extension}"
434
- if os.path.isfile(path / file):
435
- # check if file has extension extension
436
- # check if file basename has extension
437
- if file.endswith(extension):
438
- filepath = path / file
439
- bids_files.append(filepath)
440
-
441
- # check if file is in top level directory
442
- if any(file in os.listdir(path) for file in top_level_files):
443
- return bids_files
444
- else:
445
- # call get_bids_file_inheritance recursively with parent directory
446
- bids_files.extend(
447
- self._get_bids_file_inheritance(path.parent, basename, extension)
448
- )
449
- return bids_files
450
-
451
- def get_bids_metadata_files(
452
- self, filepath: str | Path, metadata_file_extension: list[str]
453
- ) -> list[Path]:
454
- """Retrieve all metadata file paths that apply to a given data file path and that
455
- end with a specific suffix (following the BIDS inheritance principle).
456
-
457
- Parameters
458
- ----------
459
- filepath: str | Path
460
- The filepath to get the associated metadata files for.
461
- metadata_file_extension : str
462
- Consider only metadata files that end with the specified suffix,
463
- e.g., channels.tsv or eeg.json
464
-
465
- Returns
466
- -------
467
- list[Path]:
468
- A list of filepaths for all matching metadata files
469
-
470
- """
471
- if isinstance(filepath, str):
472
- filepath = Path(filepath)
473
- if not filepath.exists:
474
- raise ValueError("filepath {filepath} does not exist")
475
- path, filename = os.path.split(filepath)
476
- basename = filename[: filename.rfind("_")]
477
- # metadata files
478
- meta_files = self._get_bids_file_inheritance(
479
- path, basename, metadata_file_extension
480
- )
481
- return meta_files
482
-
483
- def _scan_directory(self, directory: str, extension: str) -> list[Path]:
484
- """Return a list of file paths that end with the given extension in the specified
485
- directory. Ignores certain special directories like .git, .datalad, derivatives,
486
- and code.
487
- """
488
- result_files = []
489
- directory_to_ignore = [".git", ".datalad", "derivatives", "code"]
490
- with os.scandir(directory) as entries:
491
- for entry in entries:
492
- if entry.is_file() and entry.name.endswith(extension):
493
- result_files.append(entry.path)
494
- elif entry.is_dir():
495
- # check that entry path doesn't contain any name in ignore list
496
- if not any(name in entry.name for name in directory_to_ignore):
497
- result_files.append(entry.path) # Add directory to scan later
498
- return result_files
499
-
500
- def _get_files_with_extension_parallel(
501
- self, directory: str, extension: str = ".set", max_workers: int = -1
502
- ) -> list[Path]:
503
- """Efficiently scan a directory and its subdirectories for files that end with
504
- the given extension.
505
-
506
- Parameters
507
- ----------
508
- directory : str
509
- The root directory to scan for files.
510
- extension : str
511
- Only consider files that end with this suffix, e.g. '.set'.
512
- max_workers : int
513
- Optionally specify the maximum number of worker threads to use for parallel scanning.
514
- Defaults to all available CPU cores if set to -1.
515
-
516
- Returns
517
- -------
518
- list[Path]:
519
- A list of filepaths for all matching metadata files
520
-
521
- """
522
- result_files = []
523
- dirs_to_scan = [directory]
524
-
525
- # Use joblib.Parallel and delayed to parallelize directory scanning
526
- while dirs_to_scan:
527
- logger.info(
528
- f"Directories to scan: {len(dirs_to_scan)}, files: {dirs_to_scan}"
529
- )
530
- # Run the scan_directory function in parallel across directories
531
- results = Parallel(n_jobs=max_workers, prefer="threads", verbose=1)(
532
- delayed(self._scan_directory)(d, extension) for d in dirs_to_scan
533
- )
534
-
535
- # Reset the directories to scan and process the results
536
- dirs_to_scan = []
537
- for res in results:
538
- for path in res:
539
- if os.path.isdir(path):
540
- dirs_to_scan.append(path) # Queue up subdirectories to scan
541
- else:
542
- result_files.append(path) # Add files to the final result
543
- logger.info(f"Found {len(result_files)} files.")
544
-
545
- return result_files
546
-
547
- def load_and_preprocess_raw(
548
- self, raw_file: str, preprocess: bool = False
549
- ) -> np.ndarray:
550
- """Utility function to load a raw data file with MNE and apply some simple
551
- (hardcoded) preprocessing and return as a numpy array. Not meant for purposes
552
- other than testing or debugging.
553
- """
554
- logger.info(f"Loading raw data from {raw_file}")
555
- EEG = mne.io.read_raw_eeglab(raw_file, preload=True, verbose="error")
556
-
557
- if preprocess:
558
- # highpass filter
559
- EEG = EEG.filter(l_freq=0.25, h_freq=25, verbose=False)
560
- # remove 60Hz line noise
561
- EEG = EEG.notch_filter(freqs=(60), verbose=False)
562
- # bring to common sampling rate
563
- sfreq = 128
564
- if EEG.info["sfreq"] != sfreq:
565
- EEG = EEG.resample(sfreq)
566
-
567
- mat_data = EEG.get_data()
568
-
569
- if len(mat_data.shape) > 2:
570
- raise ValueError("Expect raw data to be CxT dimension")
571
- return mat_data
572
-
573
- def get_files(self) -> list[Path]:
574
- """Get all EEG recording file paths (with valid extensions) in the BIDS folder."""
575
- return self.files
576
-
577
- def resolve_bids_json(self, json_files: list[str]) -> dict:
578
- """Resolve the BIDS JSON files and return a dictionary of the resolved values.
579
-
580
- Parameters
581
- ----------
582
- json_files : list
583
- A list of JSON file paths to resolve in order of leaf level first.
584
-
585
- Returns
586
- -------
587
- dict: A dictionary of the resolved values.
588
-
589
- """
590
- if len(json_files) == 0:
591
- raise ValueError("No JSON files provided")
592
- json_files.reverse() # TODO undeterministic
593
-
594
- json_dict = {}
595
- for json_file in json_files:
596
- with open(json_file) as f:
597
- json_dict.update(json.load(f))
598
- return json_dict
599
-
600
- def get_bids_file_attribute(self, attribute: str, data_filepath: str) -> Any:
601
- """Retrieve a specific attribute from the BIDS file metadata applicable
602
- to the provided recording file path.
603
- """
604
- entities = self.layout.parse_file_entities(data_filepath)
605
- bidsfile = self.layout.get(**entities)[0]
606
- attributes = bidsfile.get_entities(metadata="all")
607
- attribute_mapping = {
608
- "sfreq": "SamplingFrequency",
609
- "modality": "datatype",
610
- "task": "task",
611
- "session": "session",
612
- "run": "run",
613
- "subject": "subject",
614
- "ntimes": "RecordingDuration",
615
- "nchans": "EEGChannelCount",
616
- }
617
- attribute_value = attributes.get(attribute_mapping.get(attribute), None)
618
- return attribute_value
619
-
620
- def channel_labels(self, data_filepath: str) -> list[str]:
621
- """Get a list of channel labels for the given data file path."""
622
- channels_tsv = pd.read_csv(
623
- self.get_bids_metadata_files(data_filepath, "channels.tsv")[0], sep="\t"
624
- )
625
- return channels_tsv["name"].tolist()
626
-
627
- def channel_types(self, data_filepath: str) -> list[str]:
628
- """Get a list of channel types for the given data file path."""
629
- channels_tsv = pd.read_csv(
630
- self.get_bids_metadata_files(data_filepath, "channels.tsv")[0], sep="\t"
631
- )
632
- return channels_tsv["type"].tolist()
633
-
634
- def num_times(self, data_filepath: str) -> int:
635
- """Get the approximate number of time points in the EEG recording based on the BIDS metadata."""
636
- eeg_jsons = self.get_bids_metadata_files(data_filepath, "eeg.json")
637
- eeg_json_dict = self._merge_json_inheritance(eeg_jsons)
638
- return int(
639
- eeg_json_dict["SamplingFrequency"] * eeg_json_dict["RecordingDuration"]
640
- )
641
-
642
- def subject_participant_tsv(self, data_filepath: str) -> dict[str, Any]:
643
- """Get BIDS participants.tsv record for the subject to which the given file
644
- path corresponds, as a dictionary.
645
- """
646
- participants_tsv = pd.read_csv(
647
- self.get_bids_metadata_files(data_filepath, "participants.tsv")[0], sep="\t"
648
- )
649
- # if participants_tsv is not empty
650
- if participants_tsv.empty:
651
- return {}
652
- # set 'participant_id' as index
653
- participants_tsv.set_index("participant_id", inplace=True)
654
- subject = f"sub-{self.get_bids_file_attribute('subject', data_filepath)}"
655
- return participants_tsv.loc[subject].to_dict()
656
-
657
- def eeg_json(self, data_filepath: str) -> dict[str, Any]:
658
- """Get BIDS eeg.json metadata for the given data file path."""
659
- eeg_jsons = self.get_bids_metadata_files(data_filepath, "eeg.json")
660
- eeg_json_dict = self._merge_json_inheritance(eeg_jsons)
661
- return eeg_json_dict
662
-
663
- def channel_tsv(self, data_filepath: str) -> dict[str, Any]:
664
- """Get BIDS channels.tsv metadata for the given data file path, as a dictionary
665
- of lists and/or single values.
666
- """
667
- channels_tsv = pd.read_csv(
668
- self.get_bids_metadata_files(data_filepath, "channels.tsv")[0], sep="\t"
669
- )
670
- channel_tsv = channels_tsv.to_dict()
671
- # 'name' and 'type' now have a dictionary of index-value. Convert them to list
672
- for list_field in ["name", "type", "units"]:
673
- channel_tsv[list_field] = list(channel_tsv[list_field].values())
674
- return channel_tsv
675
-
676
-
677
- __all__ = ["EEGDashBaseDataset", "EEGBIDSDataset", "EEGDashBaseRaw"]