eegdash 0.4.0.dev173498563__py3-none-any.whl → 0.4.1.dev185__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

@@ -33,12 +33,30 @@ __all__ = [
33
33
 
34
34
 
35
35
  def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
36
- """Build and validate a MongoDB query from user-friendly keyword arguments.
36
+ """Build and validate a MongoDB query from keyword arguments.
37
+
38
+ This function converts user-friendly keyword arguments into a valid
39
+ MongoDB query dictionary. It handles scalar values as exact matches and
40
+ list-like values as ``$in`` queries. It also performs validation to
41
+ reject unsupported fields and empty values.
42
+
43
+ Parameters
44
+ ----------
45
+ **kwargs
46
+ Keyword arguments representing query filters. Allowed keys are defined
47
+ in ``eegdash.const.ALLOWED_QUERY_FIELDS``.
48
+
49
+ Returns
50
+ -------
51
+ dict
52
+ A MongoDB query dictionary.
53
+
54
+ Raises
55
+ ------
56
+ ValueError
57
+ If an unsupported query field is provided, or if a value is None or
58
+ an empty string/list.
37
59
 
38
- Improvements:
39
- - Reject None values and empty/whitespace-only strings
40
- - For list/tuple/set values: strip strings, drop None/empties, deduplicate, and use `$in`
41
- - Preserve scalars as exact matches
42
60
  """
43
61
  # 1. Validate that all provided keys are allowed for querying
44
62
  unknown_fields = set(kwargs.keys()) - ALLOWED_QUERY_FIELDS
@@ -89,24 +107,29 @@ def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
89
107
 
90
108
 
91
109
  def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any]:
92
- """Build the metadata record for a given BIDS file (single recording) in a BIDS dataset.
110
+ """Build a metadata record for a BIDS file.
93
111
 
94
- Attributes are at least the ones defined in data_config attributes (set to None if missing),
95
- but are typically a superset, and include, among others, the paths to relevant
96
- meta-data files needed to load and interpret the file in question.
112
+ Extracts metadata attributes from a single BIDS EEG file within a given
113
+ BIDS dataset. The extracted attributes include BIDS entities, file paths,
114
+ and technical metadata required for database indexing.
97
115
 
98
116
  Parameters
99
117
  ----------
100
118
  bids_dataset : EEGBIDSDataset
101
119
  The BIDS dataset object containing the file.
102
120
  bids_file : str
103
- The path to the BIDS file within the dataset.
121
+ The path to the BIDS file to process.
104
122
 
105
123
  Returns
106
124
  -------
107
- dict:
108
- A dictionary representing the metadata record for the given file. This is the
109
- same format as the records stored in the database.
125
+ dict
126
+ A dictionary of metadata attributes for the file, suitable for
127
+ insertion into the database.
128
+
129
+ Raises
130
+ ------
131
+ ValueError
132
+ If ``bids_file`` is not found in the ``bids_dataset``.
110
133
 
111
134
  """
112
135
  if bids_file not in bids_dataset.files:
@@ -198,11 +221,23 @@ def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any
198
221
 
199
222
 
200
223
  def normalize_key(key: str) -> str:
201
- """Normalize a metadata key for robust matching.
224
+ """Normalize a string key for robust matching.
225
+
226
+ Converts the key to lowercase, replaces non-alphanumeric characters with
227
+ underscores, and removes leading/trailing underscores. This allows for
228
+ tolerant matching of keys that may have different capitalization or
229
+ separators (e.g., "p-factor" becomes "p_factor").
230
+
231
+ Parameters
232
+ ----------
233
+ key : str
234
+ The key to normalize.
235
+
236
+ Returns
237
+ -------
238
+ str
239
+ The normalized key.
202
240
 
203
- Lowercase and replace non-alphanumeric characters with underscores, then strip
204
- leading/trailing underscores. This allows tolerant matching such as
205
- "p-factor" ≈ "p_factor" ≈ "P Factor".
206
241
  """
207
242
  return re.sub(r"[^a-z0-9]+", "_", str(key).lower()).strip("_")
208
243
 
@@ -212,27 +247,27 @@ def merge_participants_fields(
212
247
  participants_row: dict[str, Any] | None,
213
248
  description_fields: list[str] | None = None,
214
249
  ) -> dict[str, Any]:
215
- """Merge participants.tsv fields into a dataset description dictionary.
250
+ """Merge fields from a participants.tsv row into a description dict.
216
251
 
217
- - Preserves existing entries in ``description`` (no overwrites).
218
- - Fills requested ``description_fields`` first, preserving their original names.
219
- - Adds all remaining participants columns generically using normalized keys
220
- unless a matching requested field already captured them.
252
+ Enriches a description dictionary with data from a subject's row in
253
+ ``participants.tsv``. It avoids overwriting existing keys in the
254
+ description.
221
255
 
222
256
  Parameters
223
257
  ----------
224
258
  description : dict
225
- Current description to be enriched in-place and returned.
226
- participants_row : dict | None
227
- A mapping of participants.tsv columns for the current subject.
228
- description_fields : list[str] | None
229
- Optional list of requested description fields. When provided, matching is
230
- performed by normalized names; the original requested field names are kept.
259
+ The description dictionary to enrich.
260
+ participants_row : dict or None
261
+ A dictionary representing a row from ``participants.tsv``. If None,
262
+ the original description is returned unchanged.
263
+ description_fields : list of str, optional
264
+ A list of specific fields to include in the description. Matching is
265
+ done using normalized keys.
231
266
 
232
267
  Returns
233
268
  -------
234
269
  dict
235
- The enriched description (same object as input for convenience).
270
+ The enriched description dictionary.
236
271
 
237
272
  """
238
273
  if not isinstance(description, dict) or not isinstance(participants_row, dict):
@@ -272,10 +307,26 @@ def participants_row_for_subject(
272
307
  subject: str,
273
308
  id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
274
309
  ) -> pd.Series | None:
275
- """Load participants.tsv and return the row for a subject.
310
+ """Load participants.tsv and return the row for a specific subject.
311
+
312
+ Searches for a subject's data in the ``participants.tsv`` file within a
313
+ BIDS dataset. It can identify the subject with or without the "sub-"
314
+ prefix.
315
+
316
+ Parameters
317
+ ----------
318
+ bids_root : str or Path
319
+ The root directory of the BIDS dataset.
320
+ subject : str
321
+ The subject identifier (e.g., "01" or "sub-01").
322
+ id_columns : tuple of str, default ("participant_id", "participant", "subject")
323
+ A tuple of column names to search for the subject identifier.
324
+
325
+ Returns
326
+ -------
327
+ pandas.Series or None
328
+ A pandas Series containing the subject's data if found, otherwise None.
276
329
 
277
- - Accepts either "01" or "sub-01" as the subject identifier.
278
- - Returns a pandas Series for the first matching row, or None if not found.
279
330
  """
280
331
  try:
281
332
  participants_tsv = Path(bids_root) / "participants.tsv"
@@ -311,9 +362,28 @@ def participants_extras_from_tsv(
311
362
  id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
312
363
  na_like: tuple[str, ...] = ("", "n/a", "na", "nan", "unknown", "none"),
313
364
  ) -> dict[str, Any]:
314
- """Return non-identifier, non-empty participants.tsv fields for a subject.
365
+ """Extract additional participant information from participants.tsv.
366
+
367
+ Retrieves all non-identifier and non-empty fields for a subject from
368
+ the ``participants.tsv`` file.
369
+
370
+ Parameters
371
+ ----------
372
+ bids_root : str or Path
373
+ The root directory of the BIDS dataset.
374
+ subject : str
375
+ The subject identifier.
376
+ id_columns : tuple of str, default ("participant_id", "participant", "subject")
377
+ Column names to be treated as identifiers and excluded from the
378
+ output.
379
+ na_like : tuple of str, default ("", "n/a", "na", "nan", "unknown", "none")
380
+ Values to be considered as "Not Available" and excluded.
381
+
382
+ Returns
383
+ -------
384
+ dict
385
+ A dictionary of extra participant information.
315
386
 
316
- Uses vectorized pandas operations to drop id columns and NA-like values.
317
387
  """
318
388
  row = participants_row_for_subject(bids_root, subject, id_columns=id_columns)
319
389
  if row is None:
@@ -331,10 +401,21 @@ def attach_participants_extras(
331
401
  description: Any,
332
402
  extras: dict[str, Any],
333
403
  ) -> None:
334
- """Attach extras to Raw.info and dataset description without overwriting.
404
+ """Attach extra participant data to a raw object and its description.
405
+
406
+ Updates the ``raw.info['subject_info']`` and the description object
407
+ (dict or pandas Series) with extra data from ``participants.tsv``.
408
+ It does not overwrite existing keys.
409
+
410
+ Parameters
411
+ ----------
412
+ raw : mne.io.Raw
413
+ The MNE Raw object to be updated.
414
+ description : dict or pandas.Series
415
+ The description object to be updated.
416
+ extras : dict
417
+ A dictionary of extra participant information to attach.
335
418
 
336
- - Adds to ``raw.info['subject_info']['participants_extras']``.
337
- - Adds to ``description`` if dict or pandas Series (only missing keys).
338
419
  """
339
420
  if not extras:
340
421
  return
@@ -375,9 +456,28 @@ def enrich_from_participants(
375
456
  raw: Any,
376
457
  description: Any,
377
458
  ) -> dict[str, Any]:
378
- """Convenience wrapper: read participants.tsv and attach extras for this subject.
459
+ """Read participants.tsv and attach extra info for the subject.
460
+
461
+ This is a convenience function that finds the subject from the
462
+ ``bidspath``, retrieves extra information from ``participants.tsv``,
463
+ and attaches it to the raw object and its description.
464
+
465
+ Parameters
466
+ ----------
467
+ bids_root : str or Path
468
+ The root directory of the BIDS dataset.
469
+ bidspath : mne_bids.BIDSPath
470
+ The BIDSPath object for the current data file.
471
+ raw : mne.io.Raw
472
+ The MNE Raw object to be updated.
473
+ description : dict or pandas.Series
474
+ The description object to be updated.
475
+
476
+ Returns
477
+ -------
478
+ dict
479
+ The dictionary of extras that were attached.
379
480
 
380
- Returns the extras dictionary for further use if needed.
381
481
  """
382
482
  subject = getattr(bidspath, "subject", None)
383
483
  if not subject:
eegdash/const.py CHANGED
@@ -28,6 +28,8 @@ ALLOWED_QUERY_FIELDS = {
28
28
  "nchans",
29
29
  "ntimes",
30
30
  }
31
+ """set: A set of field names that are permitted in database queries constructed
32
+ via :func:`~eegdash.api.EEGDash.find` with keyword arguments."""
31
33
 
32
34
  RELEASE_TO_OPENNEURO_DATASET_MAP = {
33
35
  "R11": "ds005516",
@@ -42,6 +44,8 @@ RELEASE_TO_OPENNEURO_DATASET_MAP = {
42
44
  "R2": "ds005506",
43
45
  "R1": "ds005505",
44
46
  }
47
+ """dict: A mapping from Healthy Brain Network (HBN) release identifiers (e.g., "R11")
48
+ to their corresponding OpenNeuro dataset identifiers (e.g., "ds005516")."""
45
49
 
46
50
  SUBJECT_MINI_RELEASE_MAP = {
47
51
  "R11": [
@@ -287,6 +291,9 @@ SUBJECT_MINI_RELEASE_MAP = {
287
291
  "NDARFW972KFQ",
288
292
  ],
289
293
  }
294
+ """dict: A mapping from HBN release identifiers to a list of subject IDs.
295
+ This is used to select a small, representative subset of subjects for creating
296
+ "mini" datasets for testing and demonstration purposes."""
290
297
 
291
298
  config = {
292
299
  "required_fields": ["data_name"],
@@ -322,3 +329,21 @@ config = {
322
329
  ],
323
330
  "accepted_query_fields": ["data_name", "dataset"],
324
331
  }
332
+ """dict: A global configuration dictionary for the EEGDash package.
333
+
334
+ Keys
335
+ ----
336
+ required_fields : list
337
+ Fields that must be present in every database record.
338
+ attributes : dict
339
+ A schema defining the expected primary attributes and their types for a
340
+ database record.
341
+ description_fields : list
342
+ A list of fields considered to be descriptive metadata for a recording,
343
+ which can be used for filtering and display.
344
+ bids_dependencies_files : list
345
+ A list of BIDS metadata filenames that are relevant for interpreting an
346
+ EEG recording.
347
+ accepted_query_fields : list
348
+ Fields that are accepted for lightweight existence checks in the database.
349
+ """
@@ -1,7 +1,8 @@
1
1
  """Public API for dataset helpers and dynamically generated datasets."""
2
2
 
3
3
  from . import dataset as _dataset_mod # triggers dynamic class registration
4
- from .dataset import EEGChallengeDataset
4
+ from .bids_dataset import EEGBIDSDataset
5
+ from .dataset import EEGChallengeDataset, EEGDashDataset
5
6
  from .registry import register_openneuro_datasets
6
7
 
7
8
  # Re-export dynamically generated dataset classes at the package level so that
@@ -17,6 +18,11 @@ for _name in getattr(_dataset_mod, "__all__", []):
17
18
  globals()[_name] = _obj
18
19
  _dyn_names.append(_name)
19
20
 
20
- __all__ = ["EEGChallengeDataset", "register_openneuro_datasets"] + _dyn_names
21
+ __all__ = [
22
+ "EEGBIDSDataset",
23
+ "EEGDashDataset",
24
+ "EEGChallengeDataset",
25
+ "register_openneuro_datasets",
26
+ ] + _dyn_names
21
27
 
22
28
  del _dataset_mod, _name, _obj, _dyn_names
@@ -0,0 +1,311 @@
1
+ # Authors: The EEGDash contributors.
2
+ # License: GNU General Public License
3
+ # Copyright the EEGDash contributors.
4
+
5
+ """Data utilities and dataset classes for EEG data handling.
6
+
7
+ This module provides core dataset classes for working with EEG data in the EEGDash ecosystem,
8
+ including classes for individual recordings and collections of datasets. It integrates with
9
+ braindecode for machine learning workflows and handles data loading from both local and remote sources.
10
+ """
11
+
12
+ import io
13
+ import os
14
+ import traceback
15
+ from contextlib import redirect_stderr
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ import mne
20
+ import mne_bids
21
+ from mne._fiff.utils import _read_segments_file
22
+ from mne.io import BaseRaw
23
+ from mne_bids import BIDSPath
24
+
25
+ from braindecode.datasets import BaseDataset
26
+
27
+ from .. import downloader
28
+ from ..bids_eeg_metadata import enrich_from_participants
29
+ from ..logging import logger
30
+ from ..paths import get_default_cache_dir
31
+
32
+
33
+ class EEGDashBaseDataset(BaseDataset):
34
+ """A single EEG recording dataset.
35
+
36
+ Represents a single EEG recording, typically hosted on a remote server (like AWS S3)
37
+ and cached locally upon first access. This class is a subclass of
38
+ :class:`braindecode.datasets.BaseDataset` and can be used with braindecode's
39
+ preprocessing and training pipelines.
40
+
41
+ Parameters
42
+ ----------
43
+ record : dict
44
+ A fully resolved metadata record for the data to load.
45
+ cache_dir : str
46
+ The local directory where the data will be cached.
47
+ s3_bucket : str, optional
48
+ The S3 bucket to download data from. If not provided, defaults to the
49
+ OpenNeuro bucket.
50
+ **kwargs
51
+ Additional keyword arguments passed to the
52
+ :class:`braindecode.datasets.BaseDataset` constructor.
53
+
54
+ """
55
+
56
+ _AWS_BUCKET = "s3://openneuro.org"
57
+
58
+ def __init__(
59
+ self,
60
+ record: dict[str, Any],
61
+ cache_dir: str,
62
+ s3_bucket: str | None = None,
63
+ **kwargs,
64
+ ):
65
+ super().__init__(None, **kwargs)
66
+ self.record = record
67
+ self.cache_dir = Path(cache_dir)
68
+ self.bids_kwargs = self._get_raw_bids_args()
69
+
70
+ if s3_bucket:
71
+ self.s3_bucket = s3_bucket
72
+ self.s3_open_neuro = False
73
+ else:
74
+ self.s3_bucket = self._AWS_BUCKET
75
+ self.s3_open_neuro = True
76
+
77
+ # Compute a dataset folder name under cache_dir that encodes preprocessing
78
+ # (e.g., bdf, mini) to avoid overlapping with the original dataset cache.
79
+ self.dataset_folder = record.get("dataset", "")
80
+ # TODO: remove this hack when competition is over
81
+ if s3_bucket:
82
+ suffixes: list[str] = []
83
+ bucket_lower = str(s3_bucket).lower()
84
+ if "bdf" in bucket_lower:
85
+ suffixes.append("bdf")
86
+ if "mini" in bucket_lower:
87
+ suffixes.append("mini")
88
+ if suffixes:
89
+ self.dataset_folder = f"{self.dataset_folder}-{'-'.join(suffixes)}"
90
+
91
+ # Place files under the dataset-specific folder (with suffix if any)
92
+ rel = Path(record["bidspath"]) # usually starts with dataset id
93
+ if rel.parts and rel.parts[0] == record.get("dataset"):
94
+ rel = Path(self.dataset_folder, *rel.parts[1:])
95
+ else:
96
+ rel = Path(self.dataset_folder) / rel
97
+ self.filecache = self.cache_dir / rel
98
+ self.bids_root = self.cache_dir / self.dataset_folder
99
+
100
+ self.bidspath = BIDSPath(
101
+ root=self.bids_root,
102
+ datatype="eeg",
103
+ suffix="eeg",
104
+ **self.bids_kwargs,
105
+ )
106
+
107
+ self.s3file = downloader.get_s3path(self.s3_bucket, record["bidspath"])
108
+ self.bids_dependencies = record["bidsdependencies"]
109
+ self.bids_dependencies_original = record["bidsdependencies"]
110
+ # TODO: removing temporary fix for BIDS dependencies path
111
+ # when the competition is over and dataset is digested properly
112
+ if not self.s3_open_neuro:
113
+ self.bids_dependencies = [
114
+ dep.split("/", 1)[1] for dep in self.bids_dependencies
115
+ ]
116
+
117
+ self._raw = None
118
+
119
+ def _get_raw_bids_args(self) -> dict[str, Any]:
120
+ """Extract BIDS-related arguments from the metadata record."""
121
+ desired_fields = ["subject", "session", "task", "run"]
122
+ return {k: self.record[k] for k in desired_fields if self.record[k]}
123
+
124
+ def _ensure_raw(self) -> None:
125
+ """Ensure the raw data file and its dependencies are cached locally."""
126
+ # TO-DO: remove this once is fixed on the our side
127
+ # for the competition
128
+ if not self.s3_open_neuro:
129
+ self.bidspath = self.bidspath.update(extension=".bdf")
130
+ self.filecache = self.filecache.with_suffix(".bdf")
131
+
132
+ if not os.path.exists(self.filecache): # not preload
133
+ if self.bids_dependencies:
134
+ downloader.download_dependencies(
135
+ s3_bucket=self.s3_bucket,
136
+ bids_dependencies=self.bids_dependencies,
137
+ bids_dependencies_original=self.bids_dependencies_original,
138
+ cache_dir=self.cache_dir,
139
+ dataset_folder=self.dataset_folder,
140
+ record=self.record,
141
+ s3_open_neuro=self.s3_open_neuro,
142
+ )
143
+ self.filecache = downloader.download_s3_file(
144
+ self.s3file, self.filecache, self.s3_open_neuro
145
+ )
146
+ self.filenames = [self.filecache]
147
+ if self._raw is None:
148
+ try:
149
+ # mne-bids can emit noisy warnings to stderr; keep user logs clean
150
+ _stderr_buffer = io.StringIO()
151
+ with redirect_stderr(_stderr_buffer):
152
+ self._raw = mne_bids.read_raw_bids(
153
+ bids_path=self.bidspath, verbose="ERROR"
154
+ )
155
+ # Enrich Raw.info and description with participants.tsv extras
156
+ enrich_from_participants(
157
+ self.bids_root, self.bidspath, self._raw, self.description
158
+ )
159
+
160
+ except Exception as e:
161
+ logger.error(
162
+ f"Error while reading BIDS file: {self.bidspath}\n"
163
+ "This may be due to a missing or corrupted file.\n"
164
+ "Please check the file and try again.\n"
165
+ "Usually erasing the local cache and re-downloading helps.\n"
166
+ f"`rm {self.bidspath}`"
167
+ )
168
+ logger.error(f"Exception: {e}")
169
+ logger.error(traceback.format_exc())
170
+ raise e
171
+
172
+ def __len__(self) -> int:
173
+ """Return the number of samples in the dataset."""
174
+ if self._raw is None:
175
+ if (
176
+ self.record["ntimes"] is None
177
+ or self.record["sampling_frequency"] is None
178
+ ):
179
+ self._ensure_raw()
180
+ else:
181
+ # FIXME: this is a bit strange and should definitely not change as a side effect
182
+ # of accessing the data (which it will, since ntimes is the actual length but rounded down)
183
+ return int(self.record["ntimes"] * self.record["sampling_frequency"])
184
+ return len(self._raw)
185
+
186
+ @property
187
+ def raw(self) -> BaseRaw:
188
+ """The MNE Raw object for this recording.
189
+
190
+ Accessing this property triggers the download and caching of the data
191
+ if it has not been accessed before.
192
+
193
+ Returns
194
+ -------
195
+ mne.io.BaseRaw
196
+ The loaded MNE Raw object.
197
+
198
+ """
199
+ if self._raw is None:
200
+ self._ensure_raw()
201
+ return self._raw
202
+
203
+ @raw.setter
204
+ def raw(self, raw: BaseRaw):
205
+ self._raw = raw
206
+
207
+
208
+ class EEGDashBaseRaw(BaseRaw):
209
+ """MNE BaseRaw wrapper for automatic S3 data fetching.
210
+
211
+ This class extends :class:`mne.io.BaseRaw` to automatically fetch data
212
+ from an S3 bucket and cache it locally when data is first accessed.
213
+ It is intended for internal use within the EEGDash ecosystem.
214
+
215
+ Parameters
216
+ ----------
217
+ input_fname : str
218
+ The path to the file on the S3 bucket (relative to the bucket root).
219
+ metadata : dict
220
+ The metadata record for the recording, containing information like
221
+ sampling frequency, channel names, etc.
222
+ preload : bool, default False
223
+ If True, preload the data into memory.
224
+ cache_dir : str, optional
225
+ Local directory for caching data. If None, a default directory is used.
226
+ bids_dependencies : list of str, default []
227
+ A list of BIDS metadata files to download alongside the main recording.
228
+ verbose : str, int, or None, default None
229
+ The MNE verbosity level.
230
+
231
+ See Also
232
+ --------
233
+ mne.io.Raw : The base class for Raw objects in MNE.
234
+
235
+ """
236
+
237
+ _AWS_BUCKET = "s3://openneuro.org"
238
+
239
+ def __init__(
240
+ self,
241
+ input_fname: str,
242
+ metadata: dict[str, Any],
243
+ preload: bool = False,
244
+ *,
245
+ cache_dir: str | None = None,
246
+ bids_dependencies: list[str] | None = None,
247
+ verbose: Any = None,
248
+ ):
249
+ # Create a simple RawArray
250
+ sfreq = metadata["sfreq"] # Sampling frequency
251
+ n_times = metadata["n_times"]
252
+ ch_names = metadata["ch_names"]
253
+ ch_types = []
254
+ for ch in metadata["ch_types"]:
255
+ chtype = ch.lower()
256
+ if chtype == "heog" or chtype == "veog":
257
+ chtype = "eog"
258
+ ch_types.append(chtype)
259
+ info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
260
+
261
+ self.s3file = downloader.get_s3path(self._AWS_BUCKET, input_fname)
262
+ self.cache_dir = Path(cache_dir) if cache_dir else get_default_cache_dir()
263
+ self.filecache = self.cache_dir / input_fname
264
+ if bids_dependencies is None:
265
+ bids_dependencies = []
266
+ self.bids_dependencies = bids_dependencies
267
+
268
+ if preload and not os.path.exists(self.filecache):
269
+ self.filecache = downloader.download_s3_file(
270
+ self.s3file, self.filecache, self.s3_open_neuro
271
+ )
272
+ self.filenames = [self.filecache]
273
+ preload = self.filecache
274
+
275
+ super().__init__(
276
+ info,
277
+ preload,
278
+ last_samps=[n_times - 1],
279
+ orig_format="single",
280
+ verbose=verbose,
281
+ )
282
+
283
+ def _read_segment(
284
+ self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
285
+ ):
286
+ """Read a segment of data, downloading if necessary."""
287
+ if not os.path.exists(self.filecache): # not preload
288
+ if self.bids_dependencies: # this is use only to sidecars for now
289
+ downloader.download_dependencies(
290
+ s3_bucket=self._AWS_BUCKET,
291
+ bids_dependencies=self.bids_dependencies,
292
+ bids_dependencies_original=None,
293
+ cache_dir=self.cache_dir,
294
+ dataset_folder=self.filecache,
295
+ record={},
296
+ s3_open_neuro=self.s3_open_neuro,
297
+ )
298
+ self.filecache = downloader.download_s3_file(
299
+ self.s3file, self.filecache, self.s3_open_neuro
300
+ )
301
+ self.filenames = [self.filecache]
302
+ else: # not preload and file is not cached
303
+ self.filenames = [self.filecache]
304
+ return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
305
+
306
+ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
307
+ """Read a chunk of data from a local file."""
308
+ _read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
309
+
310
+
311
+ __all__ = ["EEGDashBaseDataset", "EEGDashBaseRaw"]