eegdash 0.3.3.dev61__py3-none-any.whl → 0.5.0.dev180784713__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. eegdash/__init__.py +19 -6
  2. eegdash/api.py +336 -539
  3. eegdash/bids_eeg_metadata.py +495 -0
  4. eegdash/const.py +349 -0
  5. eegdash/dataset/__init__.py +28 -0
  6. eegdash/dataset/base.py +311 -0
  7. eegdash/dataset/bids_dataset.py +641 -0
  8. eegdash/dataset/dataset.py +692 -0
  9. eegdash/dataset/dataset_summary.csv +255 -0
  10. eegdash/dataset/registry.py +287 -0
  11. eegdash/downloader.py +197 -0
  12. eegdash/features/__init__.py +15 -13
  13. eegdash/features/datasets.py +329 -138
  14. eegdash/features/decorators.py +105 -13
  15. eegdash/features/extractors.py +233 -63
  16. eegdash/features/feature_bank/__init__.py +12 -12
  17. eegdash/features/feature_bank/complexity.py +22 -20
  18. eegdash/features/feature_bank/connectivity.py +27 -28
  19. eegdash/features/feature_bank/csp.py +3 -1
  20. eegdash/features/feature_bank/dimensionality.py +6 -6
  21. eegdash/features/feature_bank/signal.py +29 -30
  22. eegdash/features/feature_bank/spectral.py +40 -44
  23. eegdash/features/feature_bank/utils.py +8 -0
  24. eegdash/features/inspect.py +126 -15
  25. eegdash/features/serialization.py +58 -17
  26. eegdash/features/utils.py +90 -16
  27. eegdash/hbn/__init__.py +28 -0
  28. eegdash/hbn/preprocessing.py +105 -0
  29. eegdash/hbn/windows.py +428 -0
  30. eegdash/logging.py +54 -0
  31. eegdash/mongodb.py +55 -24
  32. eegdash/paths.py +52 -0
  33. eegdash/utils.py +29 -1
  34. eegdash-0.5.0.dev180784713.dist-info/METADATA +121 -0
  35. eegdash-0.5.0.dev180784713.dist-info/RECORD +38 -0
  36. eegdash-0.5.0.dev180784713.dist-info/licenses/LICENSE +29 -0
  37. eegdash/data_config.py +0 -34
  38. eegdash/data_utils.py +0 -687
  39. eegdash/dataset.py +0 -69
  40. eegdash/preprocessing.py +0 -63
  41. eegdash-0.3.3.dev61.dist-info/METADATA +0 -192
  42. eegdash-0.3.3.dev61.dist-info/RECORD +0 -28
  43. eegdash-0.3.3.dev61.dist-info/licenses/LICENSE +0 -23
  44. {eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/WHEEL +0 -0
  45. {eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/top_level.txt +0 -0
eegdash/const.py ADDED
@@ -0,0 +1,349 @@
1
+ # Authors: The EEGDash contributors.
2
+ # License: BSD-3-Clause
3
+ # Copyright the EEGDash contributors.
4
+
5
+ """Configuration constants and mappings for EEGDash.
6
+
7
+ This module contains global configuration settings, allowed query fields, and mapping
8
+ constants used throughout the EEGDash package. It defines the interface between EEGDash
9
+ releases and OpenNeuro dataset identifiers, as well as validation rules for database queries.
10
+ """
11
+
12
+ __all__ = [
13
+ "config",
14
+ "ALLOWED_QUERY_FIELDS",
15
+ "RELEASE_TO_OPENNEURO_DATASET_MAP",
16
+ "SUBJECT_MINI_RELEASE_MAP",
17
+ ]
18
+
19
+ ALLOWED_QUERY_FIELDS = {
20
+ "data_name",
21
+ "dataset",
22
+ "subject",
23
+ "task",
24
+ "session",
25
+ "run",
26
+ "modality",
27
+ "sampling_frequency",
28
+ "nchans",
29
+ "ntimes",
30
+ }
31
+ """set: A set of field names that are permitted in database queries constructed
32
+ via :func:`~eegdash.api.EEGDash.find` with keyword arguments."""
33
+
34
+ RELEASE_TO_OPENNEURO_DATASET_MAP = {
35
+ "R11": "ds005516",
36
+ "R10": "ds005515",
37
+ "R9": "ds005514",
38
+ "R8": "ds005512",
39
+ "R7": "ds005511",
40
+ "R6": "ds005510",
41
+ "R4": "ds005508",
42
+ "R5": "ds005509",
43
+ "R3": "ds005507",
44
+ "R2": "ds005506",
45
+ "R1": "ds005505",
46
+ }
47
+ """dict: A mapping from Healthy Brain Network (HBN) release identifiers (e.g., "R11")
48
+ to their corresponding OpenNeuro dataset identifiers (e.g., "ds005516")."""
49
+
50
+ SUBJECT_MINI_RELEASE_MAP = {
51
+ "R11": [
52
+ "NDARAB678VYW",
53
+ "NDARAG788YV9",
54
+ "NDARAM946HJE",
55
+ "NDARAY977BZT",
56
+ "NDARAZ532KK0",
57
+ "NDARCE912ZXW",
58
+ "NDARCM214WFE",
59
+ "NDARDL033XRG",
60
+ "NDARDT889RT9",
61
+ "NDARDZ794ZVP",
62
+ "NDAREV869CPW",
63
+ "NDARFN221WW5",
64
+ "NDARFV289RKB",
65
+ "NDARFY623ZTE",
66
+ "NDARGA890MKA",
67
+ "NDARHN206XY3",
68
+ "NDARHP518FUR",
69
+ "NDARJL292RYV",
70
+ "NDARKM199DXW",
71
+ "NDARKW236TN7",
72
+ ],
73
+ "R10": [
74
+ "NDARAR935TGZ",
75
+ "NDARAV474ADJ",
76
+ "NDARCB869VM8",
77
+ "NDARCJ667UPL",
78
+ "NDARCM677TC1",
79
+ "NDARET671FTC",
80
+ "NDARKM061NHZ",
81
+ "NDARLD501HDK",
82
+ "NDARLL176DJR",
83
+ "NDARMT791WDH",
84
+ "NDARMW299ZAB",
85
+ "NDARNC405WJA",
86
+ "NDARNP962TJK",
87
+ "NDARPB967KU7",
88
+ "NDARRU560AGK",
89
+ "NDARTB173LY2",
90
+ "NDARUW377KAE",
91
+ "NDARVH565FX9",
92
+ "NDARVP799KGY",
93
+ "NDARVY962GB5",
94
+ ],
95
+ "R9": [
96
+ "NDARAC589YMB",
97
+ "NDARAC853CR6",
98
+ "NDARAH239PGG",
99
+ "NDARAL897CYV",
100
+ "NDARAN160GUF",
101
+ "NDARAP049KXJ",
102
+ "NDARAP457WB5",
103
+ "NDARAW216PM7",
104
+ "NDARBA004KBT",
105
+ "NDARBD328NUQ",
106
+ "NDARBF042LDM",
107
+ "NDARBH019KPD",
108
+ "NDARBH728DFK",
109
+ "NDARBM370JCB",
110
+ "NDARBU183TDJ",
111
+ "NDARBW971DCW",
112
+ "NDARBZ444ZHK",
113
+ "NDARCC620ZFT",
114
+ "NDARCD182XT1",
115
+ "NDARCK113CJM",
116
+ ],
117
+ "R8": [
118
+ "NDARAB514MAJ",
119
+ "NDARAD571FLB",
120
+ "NDARAF003VCL",
121
+ "NDARAG191AE8",
122
+ "NDARAJ977PRJ",
123
+ "NDARAP912JK3",
124
+ "NDARAV454VF0",
125
+ "NDARAY298THW",
126
+ "NDARBJ375VP4",
127
+ "NDARBT436PMT",
128
+ "NDARBV630BK6",
129
+ "NDARCB627KDN",
130
+ "NDARCC059WTH",
131
+ "NDARCM953HKD",
132
+ "NDARCN681CXW",
133
+ "NDARCT889DMB",
134
+ "NDARDJ204EPU",
135
+ "NDARDJ544BU5",
136
+ "NDARDP292DVC",
137
+ "NDARDW178AC6",
138
+ ],
139
+ "R7": [
140
+ "NDARAY475AKD",
141
+ "NDARBW026UGE",
142
+ "NDARCK162REX",
143
+ "NDARCK481KRH",
144
+ "NDARCV378MMX",
145
+ "NDARCX462NVA",
146
+ "NDARDJ970ELG",
147
+ "NDARDU617ZW1",
148
+ "NDAREM609ZXW",
149
+ "NDAREW074ZM2",
150
+ "NDARFE555KXB",
151
+ "NDARFT176NJP",
152
+ "NDARGK442YHH",
153
+ "NDARGM439FZD",
154
+ "NDARGT634DUJ",
155
+ "NDARHE283KZN",
156
+ "NDARHG260BM9",
157
+ "NDARHL684WYU",
158
+ "NDARHN224TPA",
159
+ "NDARHP841RMR",
160
+ ],
161
+ "R6": [
162
+ "NDARAD224CRB",
163
+ "NDARAE301XTM",
164
+ "NDARAT680GJA",
165
+ "NDARCA578CEB",
166
+ "NDARDZ147ETZ",
167
+ "NDARFL793LDE",
168
+ "NDARFX710UZA",
169
+ "NDARGE994BMX",
170
+ "NDARGP191YHN",
171
+ "NDARGV436PFT",
172
+ "NDARHF545HFW",
173
+ "NDARHP039DBU",
174
+ "NDARHT774ZK1",
175
+ "NDARJA830BYV",
176
+ "NDARKB614KGY",
177
+ "NDARKM250ET5",
178
+ "NDARKZ085UKQ",
179
+ "NDARLB581AXF",
180
+ "NDARNJ899HW7",
181
+ "NDARRZ606EDP",
182
+ ],
183
+ "R4": [
184
+ "NDARAC350BZ0",
185
+ "NDARAD615WLJ",
186
+ "NDARAG584XLU",
187
+ "NDARAH503YG1",
188
+ "NDARAX272ZJL",
189
+ "NDARAY461TZZ",
190
+ "NDARBC734UVY",
191
+ "NDARBL444FBA",
192
+ "NDARBT640EBN",
193
+ "NDARBU098PJT",
194
+ "NDARBU928LV0",
195
+ "NDARBV059CGE",
196
+ "NDARCG037CX4",
197
+ "NDARCG947ZC0",
198
+ "NDARCH001CN2",
199
+ "NDARCU001ZN7",
200
+ "NDARCW497XW2",
201
+ "NDARCX053GU5",
202
+ "NDARDF568GL5",
203
+ "NDARDJ092YKH",
204
+ ],
205
+ "R5": [
206
+ "NDARAH793FBF",
207
+ "NDARAJ689BVN",
208
+ "NDARAP785CTE",
209
+ "NDARAU708TL8",
210
+ "NDARBE091BGD",
211
+ "NDARBE103DHM",
212
+ "NDARBF851NH6",
213
+ "NDARBH228RDW",
214
+ "NDARBJ674TVU",
215
+ "NDARBM433VER",
216
+ "NDARCA740UC8",
217
+ "NDARCU633GCZ",
218
+ "NDARCU736GZ1",
219
+ "NDARCU744XWL",
220
+ "NDARDC843HHM",
221
+ "NDARDH086ZKK",
222
+ "NDARDL305BT8",
223
+ "NDARDU853XZ6",
224
+ "NDARDV245WJG",
225
+ "NDAREC480KFA",
226
+ ],
227
+ "R3": [
228
+ "NDARAA948VFH",
229
+ "NDARAD774HAZ",
230
+ "NDARAE828CML",
231
+ "NDARAG340ERT",
232
+ "NDARBA839HLG",
233
+ "NDARBE641DGZ",
234
+ "NDARBG574KF4",
235
+ "NDARBM642JFT",
236
+ "NDARCL016NHB",
237
+ "NDARCV944JA6",
238
+ "NDARCY178KJP",
239
+ "NDARDY150ZP9",
240
+ "NDAREC542MH3",
241
+ "NDAREK549XUQ",
242
+ "NDAREM887YY8",
243
+ "NDARFA815FXE",
244
+ "NDARFF644ZGD",
245
+ "NDARFV557XAA",
246
+ "NDARFV780ABD",
247
+ "NDARGB102NWJ",
248
+ ],
249
+ "R2": [
250
+ "NDARAB793GL3",
251
+ "NDARAM675UR8",
252
+ "NDARBM839WR5",
253
+ "NDARBU730PN8",
254
+ "NDARCT974NAJ",
255
+ "NDARCW933FD5",
256
+ "NDARCZ770BRG",
257
+ "NDARDW741HCF",
258
+ "NDARDZ058NZN",
259
+ "NDAREC377AU2",
260
+ "NDAREM500WWH",
261
+ "NDAREV527ZRF",
262
+ "NDAREV601CE7",
263
+ "NDARFF070XHV",
264
+ "NDARFR108JNB",
265
+ "NDARFT305CG1",
266
+ "NDARGA056TMW",
267
+ "NDARGH775KF5",
268
+ "NDARGJ878ZP4",
269
+ "NDARHA387FPM",
270
+ ],
271
+ "R1": [
272
+ "NDARAC904DMU",
273
+ "NDARAM704GKZ",
274
+ "NDARAP359UM6",
275
+ "NDARBD879MBX",
276
+ "NDARBH024NH2",
277
+ "NDARBK082PDD",
278
+ "NDARCA153NKE",
279
+ "NDARCE721YB5",
280
+ "NDARCJ594BWQ",
281
+ "NDARCN669XPR",
282
+ "NDARCW094JCG",
283
+ "NDARCZ947WU5",
284
+ "NDARDH670PXH",
285
+ "NDARDL511UND",
286
+ "NDARDU986RBM",
287
+ "NDAREM731BYM",
288
+ "NDAREN519BLJ",
289
+ "NDARFK610GY5",
290
+ "NDARFT581ZW5",
291
+ "NDARFW972KFQ",
292
+ ],
293
+ }
294
+ """dict: A mapping from HBN release identifiers to a list of subject IDs.
295
+ This is used to select a small, representative subset of subjects for creating
296
+ "mini" datasets for testing and demonstration purposes."""
297
+
298
+ config = {
299
+ "required_fields": ["data_name"],
300
+ # Default set of user-facing primary record attributes expected in the database. Records
301
+ # where any of these are missing will be loaded with the respective attribute set to None.
302
+ # Additional fields may be returned if they are present in the database, notably bidsdependencies.
303
+ "attributes": {
304
+ "data_name": "str",
305
+ "dataset": "str",
306
+ "bidspath": "str",
307
+ "subject": "str",
308
+ "task": "str",
309
+ "session": "str",
310
+ "run": "str",
311
+ "sampling_frequency": "float",
312
+ "modality": "str",
313
+ "nchans": "int",
314
+ "ntimes": "int", # note: this is really the number of seconds in the data, rounded down
315
+ },
316
+ # queryable descriptive fields for a given recording
317
+ "description_fields": ["subject", "session", "run", "task", "age", "gender", "sex"],
318
+ # list of filenames that may be present in the BIDS dataset directory that are used
319
+ # to load and interpret a given BIDS recording.
320
+ "bids_dependencies_files": [
321
+ "dataset_description.json",
322
+ "participants.tsv",
323
+ "events.tsv",
324
+ "events.json",
325
+ "eeg.json",
326
+ "electrodes.tsv",
327
+ "channels.tsv",
328
+ "coordsystem.json",
329
+ ],
330
+ "accepted_query_fields": ["data_name", "dataset"],
331
+ }
332
+ """dict: A global configuration dictionary for the EEGDash package.
333
+
334
+ Keys
335
+ ----
336
+ required_fields : list
337
+ Fields that must be present in every database record.
338
+ attributes : dict
339
+ A schema defining the expected primary attributes and their types for a
340
+ database record.
341
+ description_fields : list
342
+ A list of fields considered to be descriptive metadata for a recording,
343
+ which can be used for filtering and display.
344
+ bids_dependencies_files : list
345
+ A list of BIDS metadata filenames that are relevant for interpreting an
346
+ EEG recording.
347
+ accepted_query_fields : list
348
+ Fields that are accepted for lightweight existence checks in the database.
349
+ """
@@ -0,0 +1,28 @@
1
+ """Public API for dataset helpers and dynamically generated datasets."""
2
+
3
+ from . import dataset as _dataset_mod # triggers dynamic class registration
4
+ from .bids_dataset import EEGBIDSDataset
5
+ from .dataset import EEGChallengeDataset, EEGDashDataset
6
+ from .registry import register_openneuro_datasets
7
+
8
+ # Re-export dynamically generated dataset classes at the package level so that
9
+ # ``eegdash.dataset`` shows them in the API docs and users can import as
10
+ # ``from eegdash.dataset import DSXXXXX``.
11
+ _dyn_names = []
12
+ for _name in getattr(_dataset_mod, "__all__", []):
13
+ if _name == "EEGChallengeDataset":
14
+ # Already imported explicitly above
15
+ continue
16
+ _obj = getattr(_dataset_mod, _name, None)
17
+ if _obj is not None:
18
+ globals()[_name] = _obj
19
+ _dyn_names.append(_name)
20
+
21
+ __all__ = [
22
+ "EEGBIDSDataset",
23
+ "EEGDashDataset",
24
+ "EEGChallengeDataset",
25
+ "register_openneuro_datasets",
26
+ ] + _dyn_names
27
+
28
+ del _dataset_mod, _name, _obj, _dyn_names
@@ -0,0 +1,311 @@
1
+ # Authors: The EEGDash contributors.
2
+ # License: BSD-3-Clause
3
+ # Copyright the EEGDash contributors.
4
+
5
+ """Data utilities and dataset classes for EEG data handling.
6
+
7
+ This module provides core dataset classes for working with EEG data in the EEGDash ecosystem,
8
+ including classes for individual recordings and collections of datasets. It integrates with
9
+ braindecode for machine learning workflows and handles data loading from both local and remote sources.
10
+ """
11
+
12
+ import io
13
+ import os
14
+ import traceback
15
+ from contextlib import redirect_stderr
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ import mne
20
+ import mne_bids
21
+ from mne._fiff.utils import _read_segments_file
22
+ from mne.io import BaseRaw
23
+ from mne_bids import BIDSPath
24
+
25
+ from braindecode.datasets.base import BaseDataset
26
+
27
+ from .. import downloader
28
+ from ..bids_eeg_metadata import enrich_from_participants
29
+ from ..logging import logger
30
+ from ..paths import get_default_cache_dir
31
+
32
+
33
+ class EEGDashBaseDataset(BaseDataset):
34
+ """A single EEG recording dataset.
35
+
36
+ Represents a single EEG recording, typically hosted on a remote server (like AWS S3)
37
+ and cached locally upon first access. This class is a subclass of
38
+ :class:`braindecode.datasets.BaseDataset` and can be used with braindecode's
39
+ preprocessing and training pipelines.
40
+
41
+ Parameters
42
+ ----------
43
+ record : dict
44
+ A fully resolved metadata record for the data to load.
45
+ cache_dir : str
46
+ The local directory where the data will be cached.
47
+ s3_bucket : str, optional
48
+ The S3 bucket to download data from. If not provided, defaults to the
49
+ OpenNeuro bucket.
50
+ **kwargs
51
+ Additional keyword arguments passed to the
52
+ :class:`braindecode.datasets.BaseDataset` constructor.
53
+
54
+ """
55
+
56
+ _AWS_BUCKET = "s3://openneuro.org"
57
+
58
+ def __init__(
59
+ self,
60
+ record: dict[str, Any],
61
+ cache_dir: str,
62
+ s3_bucket: str | None = None,
63
+ **kwargs,
64
+ ):
65
+ super().__init__(None, **kwargs)
66
+ self.record = record
67
+ self.cache_dir = Path(cache_dir)
68
+ self.bids_kwargs = self._get_raw_bids_args()
69
+
70
+ if s3_bucket:
71
+ self.s3_bucket = s3_bucket
72
+ self.s3_open_neuro = False
73
+ else:
74
+ self.s3_bucket = self._AWS_BUCKET
75
+ self.s3_open_neuro = True
76
+
77
+ # Compute a dataset folder name under cache_dir that encodes preprocessing
78
+ # (e.g., bdf, mini) to avoid overlapping with the original dataset cache.
79
+ self.dataset_folder = record.get("dataset", "")
80
+ # TODO: remove this hack when competition is over
81
+ if s3_bucket:
82
+ suffixes: list[str] = []
83
+ bucket_lower = str(s3_bucket).lower()
84
+ if "bdf" in bucket_lower:
85
+ suffixes.append("bdf")
86
+ if "mini" in bucket_lower:
87
+ suffixes.append("mini")
88
+ if suffixes:
89
+ self.dataset_folder = f"{self.dataset_folder}-{'-'.join(suffixes)}"
90
+
91
+ # Place files under the dataset-specific folder (with suffix if any)
92
+ rel = Path(record["bidspath"]) # usually starts with dataset id
93
+ if rel.parts and rel.parts[0] == record.get("dataset"):
94
+ rel = Path(self.dataset_folder, *rel.parts[1:])
95
+ else:
96
+ rel = Path(self.dataset_folder) / rel
97
+ self.filecache = self.cache_dir / rel
98
+ self.bids_root = self.cache_dir / self.dataset_folder
99
+
100
+ self.bidspath = BIDSPath(
101
+ root=self.bids_root,
102
+ datatype="eeg",
103
+ suffix="eeg",
104
+ **self.bids_kwargs,
105
+ )
106
+
107
+ self.s3file = downloader.get_s3path(self.s3_bucket, record["bidspath"])
108
+ self.bids_dependencies = record["bidsdependencies"]
109
+ self.bids_dependencies_original = record["bidsdependencies"]
110
+ # TODO: removing temporary fix for BIDS dependencies path
111
+ # when the competition is over and dataset is digested properly
112
+ if not self.s3_open_neuro:
113
+ self.bids_dependencies = [
114
+ dep.split("/", 1)[1] for dep in self.bids_dependencies
115
+ ]
116
+
117
+ self._raw = None
118
+
119
+ def _get_raw_bids_args(self) -> dict[str, Any]:
120
+ """Extract BIDS-related arguments from the metadata record."""
121
+ desired_fields = ["subject", "session", "task", "run"]
122
+ return {k: self.record[k] for k in desired_fields if self.record[k]}
123
+
124
+ def _ensure_raw(self) -> None:
125
+ """Ensure the raw data file and its dependencies are cached locally."""
126
+ # TO-DO: remove this once is fixed on the our side
127
+ # for the competition
128
+ if not self.s3_open_neuro:
129
+ self.bidspath = self.bidspath.update(extension=".bdf")
130
+ self.filecache = self.filecache.with_suffix(".bdf")
131
+
132
+ if not os.path.exists(self.filecache): # not preload
133
+ if self.bids_dependencies:
134
+ downloader.download_dependencies(
135
+ s3_bucket=self.s3_bucket,
136
+ bids_dependencies=self.bids_dependencies,
137
+ bids_dependencies_original=self.bids_dependencies_original,
138
+ cache_dir=self.cache_dir,
139
+ dataset_folder=self.dataset_folder,
140
+ record=self.record,
141
+ s3_open_neuro=self.s3_open_neuro,
142
+ )
143
+ self.filecache = downloader.download_s3_file(
144
+ self.s3file, self.filecache, self.s3_open_neuro
145
+ )
146
+ self.filenames = [self.filecache]
147
+ if self._raw is None:
148
+ try:
149
+ # mne-bids can emit noisy warnings to stderr; keep user logs clean
150
+ _stderr_buffer = io.StringIO()
151
+ with redirect_stderr(_stderr_buffer):
152
+ self._raw = mne_bids.read_raw_bids(
153
+ bids_path=self.bidspath, verbose="ERROR"
154
+ )
155
+ # Enrich Raw.info and description with participants.tsv extras
156
+ enrich_from_participants(
157
+ self.bids_root, self.bidspath, self._raw, self.description
158
+ )
159
+
160
+ except Exception as e:
161
+ logger.error(
162
+ f"Error while reading BIDS file: {self.bidspath}\n"
163
+ "This may be due to a missing or corrupted file.\n"
164
+ "Please check the file and try again.\n"
165
+ "Usually erasing the local cache and re-downloading helps.\n"
166
+ f"`rm {self.bidspath}`"
167
+ )
168
+ logger.error(f"Exception: {e}")
169
+ logger.error(traceback.format_exc())
170
+ raise e
171
+
172
+ def __len__(self) -> int:
173
+ """Return the number of samples in the dataset."""
174
+ if self._raw is None:
175
+ if (
176
+ self.record["ntimes"] is None
177
+ or self.record["sampling_frequency"] is None
178
+ ):
179
+ self._ensure_raw()
180
+ else:
181
+ # FIXME: this is a bit strange and should definitely not change as a side effect
182
+ # of accessing the data (which it will, since ntimes is the actual length but rounded down)
183
+ return int(self.record["ntimes"] * self.record["sampling_frequency"])
184
+ return len(self._raw)
185
+
186
+ @property
187
+ def raw(self) -> BaseRaw:
188
+ """The MNE Raw object for this recording.
189
+
190
+ Accessing this property triggers the download and caching of the data
191
+ if it has not been accessed before.
192
+
193
+ Returns
194
+ -------
195
+ mne.io.BaseRaw
196
+ The loaded MNE Raw object.
197
+
198
+ """
199
+ if self._raw is None:
200
+ self._ensure_raw()
201
+ return self._raw
202
+
203
+ @raw.setter
204
+ def raw(self, raw: BaseRaw):
205
+ self._raw = raw
206
+
207
+
208
+ class EEGDashBaseRaw(BaseRaw):
209
+ """MNE BaseRaw wrapper for automatic S3 data fetching.
210
+
211
+ This class extends :class:`mne.io.BaseRaw` to automatically fetch data
212
+ from an S3 bucket and cache it locally when data is first accessed.
213
+ It is intended for internal use within the EEGDash ecosystem.
214
+
215
+ Parameters
216
+ ----------
217
+ input_fname : str
218
+ The path to the file on the S3 bucket (relative to the bucket root).
219
+ metadata : dict
220
+ The metadata record for the recording, containing information like
221
+ sampling frequency, channel names, etc.
222
+ preload : bool, default False
223
+ If True, preload the data into memory.
224
+ cache_dir : str, optional
225
+ Local directory for caching data. If None, a default directory is used.
226
+ bids_dependencies : list of str, default []
227
+ A list of BIDS metadata files to download alongside the main recording.
228
+ verbose : str, int, or None, default None
229
+ The MNE verbosity level.
230
+
231
+ See Also
232
+ --------
233
+ mne.io.Raw : The base class for Raw objects in MNE.
234
+
235
+ """
236
+
237
+ _AWS_BUCKET = "s3://openneuro.org"
238
+
239
+ def __init__(
240
+ self,
241
+ input_fname: str,
242
+ metadata: dict[str, Any],
243
+ preload: bool = False,
244
+ *,
245
+ cache_dir: str | None = None,
246
+ bids_dependencies: list[str] | None = None,
247
+ verbose: Any = None,
248
+ ):
249
+ # Create a simple RawArray
250
+ sfreq = metadata["sfreq"] # Sampling frequency
251
+ n_times = metadata["n_times"]
252
+ ch_names = metadata["ch_names"]
253
+ ch_types = []
254
+ for ch in metadata["ch_types"]:
255
+ chtype = ch.lower()
256
+ if chtype == "heog" or chtype == "veog":
257
+ chtype = "eog"
258
+ ch_types.append(chtype)
259
+ info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
260
+
261
+ self.s3file = downloader.get_s3path(self._AWS_BUCKET, input_fname)
262
+ self.cache_dir = Path(cache_dir) if cache_dir else get_default_cache_dir()
263
+ self.filecache = self.cache_dir / input_fname
264
+ if bids_dependencies is None:
265
+ bids_dependencies = []
266
+ self.bids_dependencies = bids_dependencies
267
+
268
+ if preload and not os.path.exists(self.filecache):
269
+ self.filecache = downloader.download_s3_file(
270
+ self.s3file, self.filecache, self.s3_open_neuro
271
+ )
272
+ self.filenames = [self.filecache]
273
+ preload = self.filecache
274
+
275
+ super().__init__(
276
+ info,
277
+ preload,
278
+ last_samps=[n_times - 1],
279
+ orig_format="single",
280
+ verbose=verbose,
281
+ )
282
+
283
+ def _read_segment(
284
+ self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
285
+ ):
286
+ """Read a segment of data, downloading if necessary."""
287
+ if not os.path.exists(self.filecache): # not preload
288
+ if self.bids_dependencies: # this is use only to sidecars for now
289
+ downloader.download_dependencies(
290
+ s3_bucket=self._AWS_BUCKET,
291
+ bids_dependencies=self.bids_dependencies,
292
+ bids_dependencies_original=None,
293
+ cache_dir=self.cache_dir,
294
+ dataset_folder=self.filecache,
295
+ record={},
296
+ s3_open_neuro=self.s3_open_neuro,
297
+ )
298
+ self.filecache = downloader.download_s3_file(
299
+ self.s3file, self.filecache, self.s3_open_neuro
300
+ )
301
+ self.filenames = [self.filecache]
302
+ else: # not preload and file is not cached
303
+ self.filenames = [self.filecache]
304
+ return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
305
+
306
+ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
307
+ """Read a chunk of data from a local file."""
308
+ _read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
309
+
310
+
311
+ __all__ = ["EEGDashBaseDataset", "EEGDashBaseRaw"]