eegdash 0.3.9.dev182388821__py3-none-any.whl → 0.4.0.dev144__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

eegdash/data_utils.py CHANGED
@@ -1,10 +1,19 @@
1
+ # Authors: The EEGDash contributors.
2
+ # License: GNU General Public License
3
+ # Copyright the EEGDash contributors.
4
+
5
+ """Data utilities and dataset classes for EEG data handling.
6
+
7
+ This module provides core dataset classes for working with EEG data in the EEGDash ecosystem,
8
+ including classes for individual recordings and collections of datasets. It integrates with
9
+ braindecode for machine learning workflows and handles data loading from both local and remote sources.
10
+ """
11
+
1
12
  import io
2
13
  import json
3
- import logging
4
14
  import os
5
15
  import re
6
16
  import traceback
7
- import warnings
8
17
  from contextlib import redirect_stderr
9
18
  from pathlib import Path
10
19
  from typing import Any
@@ -13,9 +22,7 @@ import mne
13
22
  import mne_bids
14
23
  import numpy as np
15
24
  import pandas as pd
16
- import s3fs
17
25
  from bids import BIDSLayout
18
- from fsspec.callbacks import TqdmCallback
19
26
  from joblib import Parallel, delayed
20
27
  from mne._fiff.utils import _read_segments_file
21
28
  from mne.io import BaseRaw
@@ -23,10 +30,11 @@ from mne_bids import BIDSPath
23
30
 
24
31
  from braindecode.datasets import BaseDataset
25
32
 
33
+ from . import downloader
34
+ from .bids_eeg_metadata import enrich_from_participants
35
+ from .logging import logger
26
36
  from .paths import get_default_cache_dir
27
37
 
28
- logger = logging.getLogger("eegdash")
29
-
30
38
 
31
39
  class EEGDashBaseDataset(BaseDataset):
32
40
  """A single EEG recording hosted on AWS S3 and cached locally upon first access.
@@ -73,6 +81,7 @@ class EEGDashBaseDataset(BaseDataset):
73
81
  # Compute a dataset folder name under cache_dir that encodes preprocessing
74
82
  # (e.g., bdf, mini) to avoid overlapping with the original dataset cache.
75
83
  self.dataset_folder = record.get("dataset", "")
84
+ # TODO: remove this hack when competition is over
76
85
  if s3_bucket:
77
86
  suffixes: list[str] = []
78
87
  bucket_lower = str(s3_bucket).lower()
@@ -91,6 +100,7 @@ class EEGDashBaseDataset(BaseDataset):
91
100
  rel = Path(self.dataset_folder) / rel
92
101
  self.filecache = self.cache_dir / rel
93
102
  self.bids_root = self.cache_dir / self.dataset_folder
103
+
94
104
  self.bidspath = BIDSPath(
95
105
  root=self.bids_root,
96
106
  datatype="eeg",
@@ -98,113 +108,18 @@ class EEGDashBaseDataset(BaseDataset):
98
108
  **self.bids_kwargs,
99
109
  )
100
110
 
101
- self.s3file = self._get_s3path(record["bidspath"])
111
+ self.s3file = downloader.get_s3path(self.s3_bucket, record["bidspath"])
102
112
  self.bids_dependencies = record["bidsdependencies"]
103
- # Temporary fix for BIDS dependencies path
104
- # just to release to the competition
113
+ self.bids_dependencies_original = record["bidsdependencies"]
114
+ # TODO: removing temporary fix for BIDS dependencies path
115
+ # when the competition is over and dataset is digested properly
105
116
  if not self.s3_open_neuro:
106
- self.bids_dependencies_original = self.bids_dependencies
107
117
  self.bids_dependencies = [
108
118
  dep.split("/", 1)[1] for dep in self.bids_dependencies
109
119
  ]
110
120
 
111
121
  self._raw = None
112
122
 
113
- def _get_s3path(self, filepath: str) -> str:
114
- """Helper to form an AWS S3 URI for the given relative filepath."""
115
- return f"{self.s3_bucket}/{filepath}"
116
-
117
- def _download_s3(self) -> None:
118
- """Download function that gets the raw EEG data from S3."""
119
- filesystem = s3fs.S3FileSystem(
120
- anon=True, client_kwargs={"region_name": "us-east-2"}
121
- )
122
- if not self.s3_open_neuro:
123
- self.s3file = re.sub(r"(^|/)ds\d{6}/", r"\1", self.s3file, count=1)
124
- if self.s3file.endswith(".set"):
125
- self.s3file = self.s3file[:-4] + ".bdf"
126
- self.filecache = self.filecache.with_suffix(".bdf")
127
-
128
- self.filecache.parent.mkdir(parents=True, exist_ok=True)
129
- info = filesystem.info(self.s3file)
130
- size = info.get("size") or info.get("Size")
131
-
132
- callback = TqdmCallback(
133
- size=size,
134
- tqdm_kwargs=dict(
135
- desc=f"Downloading {Path(self.s3file).name}",
136
- unit="B",
137
- unit_scale=True,
138
- unit_divisor=1024,
139
- dynamic_ncols=True,
140
- leave=True,
141
- mininterval=0.2,
142
- smoothing=0.1,
143
- miniters=1,
144
- bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} "
145
- "[{elapsed}<{remaining}, {rate_fmt}]",
146
- ),
147
- )
148
- filesystem.get(self.s3file, self.filecache, callback=callback)
149
-
150
- self.filenames = [self.filecache]
151
-
152
- def _download_dependencies(self) -> None:
153
- """Download all BIDS dependency files (metadata files, recording sidecar files)
154
- from S3 and cache them locally.
155
- """
156
- filesystem = s3fs.S3FileSystem(
157
- anon=True, client_kwargs={"region_name": "us-east-2"}
158
- )
159
- for i, dep in enumerate(self.bids_dependencies):
160
- if not self.s3_open_neuro:
161
- # fix this when our bucket is integrated into the
162
- # mongodb
163
- # if the file have ".set" replace to ".bdf"
164
- if dep.endswith(".set"):
165
- dep = dep[:-4] + ".bdf"
166
-
167
- s3path = self._get_s3path(dep)
168
- if not self.s3_open_neuro:
169
- dep = self.bids_dependencies_original[i]
170
-
171
- dep_path = Path(dep)
172
- if dep_path.parts and dep_path.parts[0] == self.record.get("dataset"):
173
- dep_local = Path(self.dataset_folder, *dep_path.parts[1:])
174
- else:
175
- dep_local = Path(self.dataset_folder) / dep_path
176
- filepath = self.cache_dir / dep_local
177
- if not self.s3_open_neuro:
178
- if filepath.suffix == ".set":
179
- filepath = filepath.with_suffix(".bdf")
180
- if self.filecache.suffix == ".set":
181
- self.filecache = self.filecache.with_suffix(".bdf")
182
-
183
- # here, we download the dependency and it is fine
184
- # in the case of the competition.
185
- if not filepath.exists():
186
- filepath.parent.mkdir(parents=True, exist_ok=True)
187
- info = filesystem.info(s3path)
188
- size = info.get("size") or info.get("Size")
189
-
190
- callback = TqdmCallback(
191
- size=size,
192
- tqdm_kwargs=dict(
193
- desc=f"Downloading {Path(s3path).name}",
194
- unit="B",
195
- unit_scale=True,
196
- unit_divisor=1024,
197
- dynamic_ncols=True,
198
- leave=True,
199
- mininterval=0.2,
200
- smoothing=0.1,
201
- miniters=1,
202
- bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} "
203
- "[{elapsed}<{remaining}, {rate_fmt}]",
204
- ),
205
- )
206
- filesystem.get(s3path, filepath, callback=callback)
207
-
208
123
  def _get_raw_bids_args(self) -> dict[str, Any]:
209
124
  """Helper to restrict the metadata record to the fields needed to locate a BIDS
210
125
  recording.
@@ -222,130 +137,43 @@ class EEGDashBaseDataset(BaseDataset):
222
137
 
223
138
  if not os.path.exists(self.filecache): # not preload
224
139
  if self.bids_dependencies:
225
- self._download_dependencies()
226
- self._download_s3()
140
+ downloader.download_dependencies(
141
+ s3_bucket=self.s3_bucket,
142
+ bids_dependencies=self.bids_dependencies,
143
+ bids_dependencies_original=self.bids_dependencies_original,
144
+ cache_dir=self.cache_dir,
145
+ dataset_folder=self.dataset_folder,
146
+ record=self.record,
147
+ s3_open_neuro=self.s3_open_neuro,
148
+ )
149
+ self.filecache = downloader.download_s3_file(
150
+ self.s3file, self.filecache, self.s3_open_neuro
151
+ )
152
+ self.filenames = [self.filecache]
227
153
  if self._raw is None:
228
- # capturing any warnings
229
- # to-do: remove this once is fixed on the mne-bids side.
230
- with warnings.catch_warnings(record=True) as w:
231
- # Ensure all warnings are captured into 'w' and not shown to users
232
- warnings.simplefilter("always")
233
- try:
234
- # mne-bids emits RuntimeWarnings to stderr; silence stderr during read
235
- _stderr_buffer = io.StringIO()
236
- with redirect_stderr(_stderr_buffer):
237
- self._raw = mne_bids.read_raw_bids(
238
- bids_path=self.bidspath, verbose="ERROR"
239
- )
240
- # Parse unmapped participants.tsv fields reported by mne-bids and
241
- # inject them into Raw.info and the dataset description generically.
242
- extras = self._extract_unmapped_participants_from_warnings(w)
243
- if extras:
244
- # 1) Attach to Raw.info under subject_info.participants_extras
245
- try:
246
- subject_info = self._raw.info.get("subject_info") or {}
247
- if not isinstance(subject_info, dict):
248
- subject_info = {}
249
- pe = subject_info.get("participants_extras") or {}
250
- if not isinstance(pe, dict):
251
- pe = {}
252
- # Merge without overwriting
253
- for k, v in extras.items():
254
- pe.setdefault(k, v)
255
- subject_info["participants_extras"] = pe
256
- self._raw.info["subject_info"] = subject_info
257
- except Exception:
258
- # Non-fatal; continue
259
- pass
260
-
261
- # 2) Also add to this dataset's description, if possible, so
262
- # targets can be selected later without naming specifics.
263
- try:
264
- if isinstance(self.description, dict):
265
- for k, v in extras.items():
266
- self.description.setdefault(k, v)
267
- elif isinstance(self.description, pd.Series):
268
- for k, v in extras.items():
269
- if k not in self.description.index:
270
- self.description.loc[k] = v
271
- except Exception:
272
- pass
273
- except Exception as e:
274
- logger.error(
275
- f"Error while reading BIDS file: {self.bidspath}\n"
276
- "This may be due to a missing or corrupted file.\n"
277
- "Please check the file and try again."
278
- )
279
- logger.error(f"Exception: {e}")
280
- logger.error(traceback.format_exc())
281
- raise e
282
- # Filter noisy mapping notices from mne-bids; surface others
283
- for captured_warning in w:
284
- try:
285
- msg = str(captured_warning.message)
286
- except Exception:
287
- continue
288
- # Suppress verbose participants mapping messages
289
- if "Unable to map the following column" in msg and "MNE" in msg:
290
- logger.debug(
291
- "Suppressed mne-bids mapping warning while reading BIDS file: %s",
292
- msg,
293
- )
294
- continue
295
-
296
- def _extract_unmapped_participants_from_warnings(
297
- self, warnings_list: list[Any]
298
- ) -> dict[str, Any]:
299
- """Scan captured warnings from mne-bids and extract unmapped participants.tsv
300
- entries in a generic way.
301
-
302
- Optionally, the column name can carry a note in parentheses that we ignore
303
- for key/value extraction. Returns a mapping of column name -> raw value.
304
- """
305
- extras: dict[str, Any] = {}
306
- header = "Unable to map the following column(s) to MNE:"
307
- for wr in warnings_list:
308
- try:
309
- msg = str(wr.message)
310
- except Exception:
311
- continue
312
- if header not in msg:
313
- continue
314
- lines = msg.splitlines()
315
- # Find the header line, then parse subsequent lines as entries
316
154
  try:
317
- idx = next(i for i, ln in enumerate(lines) if header in ln)
318
- except StopIteration:
319
- idx = -1
320
- for line in lines[idx + 1 :]:
321
- line = line.strip()
322
- if not line:
323
- continue
324
- # Pattern: <col>(optional note): <value>
325
- # Examples: "gender: F", "Ethnicity: Indian", "foo (ignored): bar"
326
- m = re.match(r"^([^:]+?)(?:\s*\([^)]*\))?\s*:\s*(.*)$", line)
327
- if not m:
328
- continue
329
- col = m.group(1).strip()
330
- val = m.group(2).strip()
331
- # Keep original column names as provided to stay agnostic
332
- if col and col not in extras:
333
- extras[col] = val
334
- return extras
335
-
336
- # === BaseDataset and PyTorch Dataset interface ===
337
-
338
- def __getitem__(self, index):
339
- """Main function to access a sample from the dataset."""
340
- X = self.raw[:, index][0]
341
- y = None
342
- if self.target_name is not None:
343
- y = self.description[self.target_name]
344
- if isinstance(y, pd.Series):
345
- y = y.to_list()
346
- if self.transform is not None:
347
- X = self.transform(X)
348
- return X, y
155
+ # mne-bids can emit noisy warnings to stderr; keep user logs clean
156
+ _stderr_buffer = io.StringIO()
157
+ with redirect_stderr(_stderr_buffer):
158
+ self._raw = mne_bids.read_raw_bids(
159
+ bids_path=self.bidspath, verbose="ERROR"
160
+ )
161
+ # Enrich Raw.info and description with participants.tsv extras
162
+ enrich_from_participants(
163
+ self.bids_root, self.bidspath, self._raw, self.description
164
+ )
165
+
166
+ except Exception as e:
167
+ logger.error(
168
+ f"Error while reading BIDS file: {self.bidspath}\n"
169
+ "This may be due to a missing or corrupted file.\n"
170
+ "Please check the file and try again.\n"
171
+ "Usually erasing the local cache and re-downloading helps.\n"
172
+ f"`rm {self.bidspath}`"
173
+ )
174
+ logger.error(f"Exception: {e}")
175
+ logger.error(traceback.format_exc())
176
+ raise e
349
177
 
350
178
  def __len__(self) -> int:
351
179
  """Return the number of samples in the dataset."""
@@ -426,13 +254,16 @@ class EEGDashBaseRaw(BaseRaw):
426
254
  ch_types.append(chtype)
427
255
  info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
428
256
 
429
- self.s3file = self._get_s3path(input_fname)
257
+ self.s3file = downloader.get_s3path(self._AWS_BUCKET, input_fname)
430
258
  self.cache_dir = Path(cache_dir) if cache_dir else get_default_cache_dir()
431
259
  self.filecache = self.cache_dir / input_fname
432
260
  self.bids_dependencies = bids_dependencies
433
261
 
434
262
  if preload and not os.path.exists(self.filecache):
435
- self._download_s3()
263
+ self.filecache = downloader.download_s3_file(
264
+ self.s3file, self.filecache, self.s3_open_neuro
265
+ )
266
+ self.filenames = [self.filecache]
436
267
  preload = self.filecache
437
268
 
438
269
  super().__init__(
@@ -443,35 +274,24 @@ class EEGDashBaseRaw(BaseRaw):
443
274
  verbose=verbose,
444
275
  )
445
276
 
446
- def _get_s3path(self, filepath):
447
- return f"{self._AWS_BUCKET}/{filepath}"
448
-
449
- def _download_s3(self) -> None:
450
- self.filecache.parent.mkdir(parents=True, exist_ok=True)
451
- filesystem = s3fs.S3FileSystem(
452
- anon=True, client_kwargs={"region_name": "us-east-2"}
453
- )
454
- filesystem.download(self.s3file, self.filecache)
455
- self.filenames = [self.filecache]
456
-
457
- def _download_dependencies(self):
458
- filesystem = s3fs.S3FileSystem(
459
- anon=True, client_kwargs={"region_name": "us-east-2"}
460
- )
461
- for dep in self.bids_dependencies:
462
- s3path = self._get_s3path(dep)
463
- filepath = self.cache_dir / dep
464
- if not filepath.exists():
465
- filepath.parent.mkdir(parents=True, exist_ok=True)
466
- filesystem.download(s3path, filepath)
467
-
468
277
  def _read_segment(
469
278
  self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
470
279
  ):
471
280
  if not os.path.exists(self.filecache): # not preload
472
- if self.bids_dependencies:
473
- self._download_dependencies()
474
- self._download_s3()
281
+ if self.bids_dependencies: # this is use only to sidecars for now
282
+ downloader.download_dependencies(
283
+ s3_bucket=self._AWS_BUCKET,
284
+ bids_dependencies=self.bids_dependencies,
285
+ bids_dependencies_original=None,
286
+ cache_dir=self.cache_dir,
287
+ dataset_folder=self.filecache,
288
+ record={},
289
+ s3_open_neuro=self.s3_open_neuro,
290
+ )
291
+ self.filecache = downloader.download_s3_file(
292
+ self.s3file, self.filecache, self.s3_open_neuro
293
+ )
294
+ self.filenames = [self.filecache]
475
295
  else: # not preload and file is not cached
476
296
  self.filenames = [self.filecache]
477
297
  return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
@@ -1,4 +1,22 @@
1
+ """Public API for dataset helpers and dynamically generated datasets."""
2
+
3
+ from . import dataset as _dataset_mod # triggers dynamic class registration
1
4
  from .dataset import EEGChallengeDataset
2
5
  from .registry import register_openneuro_datasets
3
6
 
4
- __all__ = ["EEGChallengeDataset", "register_openneuro_datasets"]
7
+ # Re-export dynamically generated dataset classes at the package level so that
8
+ # ``eegdash.dataset`` shows them in the API docs and users can import as
9
+ # ``from eegdash.dataset import DSXXXXX``.
10
+ _dyn_names = []
11
+ for _name in getattr(_dataset_mod, "__all__", []):
12
+ if _name == "EEGChallengeDataset":
13
+ # Already imported explicitly above
14
+ continue
15
+ _obj = getattr(_dataset_mod, _name, None)
16
+ if _obj is not None:
17
+ globals()[_name] = _obj
18
+ _dyn_names.append(_name)
19
+
20
+ __all__ = ["EEGChallengeDataset", "register_openneuro_datasets"] + _dyn_names
21
+
22
+ del _dataset_mod, _name, _obj, _dyn_names
@@ -1,15 +1,15 @@
1
- import logging
2
1
  from pathlib import Path
3
2
 
4
- from mne.utils import warn
3
+ from rich.console import Console
4
+ from rich.panel import Panel
5
+ from rich.text import Text
5
6
 
6
7
  from ..api import EEGDashDataset
7
8
  from ..bids_eeg_metadata import build_query_from_kwargs
8
9
  from ..const import RELEASE_TO_OPENNEURO_DATASET_MAP, SUBJECT_MINI_RELEASE_MAP
10
+ from ..logging import logger
9
11
  from .registry import register_openneuro_datasets
10
12
 
11
- logger = logging.getLogger("eegdash")
12
-
13
13
 
14
14
  class EEGChallengeDataset(EEGDashDataset):
15
15
  """EEG 2025 Challenge dataset helper.
@@ -23,8 +23,6 @@ class EEGChallengeDataset(EEGDashDataset):
23
23
  ----------
24
24
  release : str
25
25
  Release name. One of ["R1", ..., "R11"].
26
- cache_dir : str
27
- Local cache directory for data files.
28
26
  mini : bool, default True
29
27
  If True, restrict subjects to the challenge mini subset.
30
28
  query : dict | None
@@ -123,24 +121,32 @@ class EEGChallengeDataset(EEGDashDataset):
123
121
  else:
124
122
  s3_bucket = f"{s3_bucket}/{release}_L100_bdf"
125
123
 
126
- warn(
127
- "\n\n"
128
- "[EEGChallengeDataset] EEG 2025 Competition Data Notice:\n"
129
- "-------------------------------------------------------\n"
124
+ message_text = Text.from_markup(
130
125
  "This object loads the HBN dataset that has been preprocessed for the EEG Challenge:\n"
131
- " - Downsampled from 500Hz to 100Hz\n"
132
- " - Bandpass filtered (0.550 Hz)\n"
133
- "\n"
134
- "For full preprocessing details, see:\n"
135
- " https://github.com/eeg2025/downsample-datasets\n"
136
- "\n"
137
- "IMPORTANT: The data accessed via `EEGChallengeDataset` is NOT identical to what you get from `EEGDashDataset` directly.\n"
138
- "If you are participating in the competition, always use `EEGChallengeDataset` to ensure consistency with the challenge data.\n"
139
- "\n",
140
- UserWarning,
141
- module="eegdash",
126
+ " * Downsampled from 500Hz to 100Hz\n"
127
+ " * Bandpass filtered (0.5-50 Hz)\n\n"
128
+ "For full preprocessing applied for competition details, see:\n"
129
+ " [link=https://github.com/eeg2025/downsample-datasets]https://github.com/eeg2025/downsample-datasets[/link]\n\n"
130
+ "The HBN dataset have some preprocessing applied by the HBN team:\n"
131
+ " * Re-reference (Cz Channel)\n\n"
132
+ "[bold red]IMPORTANT[/bold red]: The data accessed via `EEGChallengeDataset` is [u]NOT[/u] identical to what you get from [link=https://github.com/sccn/EEGDash/blob/develop/eegdash/api.py]EEGDashDataset[/link] directly.\n"
133
+ "If you are participating in the competition, always use `EEGChallengeDataset` to ensure consistency with the challenge data."
134
+ )
135
+
136
+ warning_panel = Panel(
137
+ message_text,
138
+ title="[yellow]EEG 2025 Competition Data Notice[/yellow]",
139
+ subtitle="[cyan]Source: EEGChallengeDataset[/cyan]",
140
+ border_style="yellow",
142
141
  )
143
142
 
143
+ # Render the panel directly to the console so it displays in IPython/terminals
144
+ try:
145
+ Console().print(warning_panel)
146
+ except Exception:
147
+ warning_message = str(message_text)
148
+ logger.warning(warning_message)
149
+
144
150
  super().__init__(
145
151
  dataset=RELEASE_TO_OPENNEURO_DATASET_MAP[release],
146
152
  query=query,
@@ -10,7 +10,6 @@
10
10
  8,ds005508,3342,324,10,129,500,269.281,229.81 GB,246753736933,0,,,,,
11
11
  9,ds005507,1812,184,10,129,500,168.649,139.37 GB,149646718160,0,,,,,
12
12
  10,ds005506,1405,150,10,129,500,127.896,111.88 GB,120126449650,0,,,,,
13
- 11,test,2,1,1,64,500,20.556,0 B,0,0,,,,,
14
13
  12,ds004854,1,1,1,64,128,0.535,79.21 MB,83057080,0,,,,,
15
14
  13,ds004853,1,1,1,64,128,0.535,79.21 MB,83057080,0,,,,,
16
15
  14,ds004844,68,17,1,64,1024,21.252,22.33 GB,23976121966,0,ds004844,,,Multisensory,Decision-making
@@ -57,14 +57,8 @@ def register_openneuro_datasets(
57
57
 
58
58
  init = make_init(dataset_id)
59
59
 
60
- doc = f"""OpenNeuro dataset ``{dataset_id}``.
61
-
62
- {_markdown_table(row_series)}
63
-
64
- This class is a thin convenience wrapper for the dataset ``{dataset_id}``.
65
- Constructor arguments are forwarded to :class:`{base_class.__name__}`; see the
66
- base class documentation for parameter details and examples.
67
- """
60
+ # Generate rich docstring with dataset metadata
61
+ doc = _generate_rich_docstring(dataset_id, row_series, base_class)
68
62
 
69
63
  # init.__doc__ = doc
70
64
 
@@ -90,6 +84,94 @@ def register_openneuro_datasets(
90
84
  return registered
91
85
 
92
86
 
87
+ def _generate_rich_docstring(dataset_id: str, row_series: pd.Series, base_class) -> str:
88
+ """Generate a comprehensive docstring for a dataset class."""
89
+ # Extract metadata with safe defaults
90
+ n_subjects = row_series.get("n_subjects", "Unknown")
91
+ n_records = row_series.get("n_records", "Unknown")
92
+ n_tasks = row_series.get("n_tasks", "Unknown")
93
+ modality = row_series.get("modality of exp", "")
94
+ exp_type = row_series.get("type of exp", "")
95
+ subject_type = row_series.get("Type Subject", "")
96
+ duration = row_series.get("duration_hours_total", "Unknown")
97
+ size = row_series.get("size", "Unknown")
98
+
99
+ # Create description based on available metadata
100
+ description_parts = []
101
+ if modality and str(modality).strip():
102
+ description_parts.append(f"**Modality**: {modality}")
103
+ if exp_type and str(exp_type).strip():
104
+ description_parts.append(f"**Type**: {exp_type}")
105
+ if subject_type and str(subject_type).strip():
106
+ description_parts.append(f"**Subjects**: {subject_type}")
107
+
108
+ description = (
109
+ " | ".join(description_parts)
110
+ if description_parts
111
+ else "EEG dataset from OpenNeuro"
112
+ )
113
+
114
+ # Generate the docstring
115
+ docstring = f"""OpenNeuro dataset ``{dataset_id}``.
116
+
117
+ {description}
118
+
119
+ This dataset contains {n_subjects} subjects with {n_records} recordings across {n_tasks} tasks.
120
+ Total duration: {duration} hours. Dataset size: {size}.
121
+
122
+ {_markdown_table(row_series)}
123
+
124
+ This dataset class provides convenient access to the ``{dataset_id}`` dataset through the EEGDash interface.
125
+ It inherits all functionality from :class:`~{base_class.__module__}.{base_class.__name__}` with the dataset filter pre-configured.
126
+
127
+ Parameters
128
+ ----------
129
+ cache_dir : str
130
+ Directory to cache downloaded data.
131
+ query : dict, optional
132
+ Additional MongoDB-style filters to AND with the dataset selection.
133
+ Must not contain the key ``dataset``.
134
+ s3_bucket : str, optional
135
+ Base S3 bucket used to locate the data.
136
+ **kwargs
137
+ Additional arguments passed to the base dataset class.
138
+
139
+ Examples
140
+ --------
141
+ Basic usage:
142
+
143
+ >>> from eegdash.dataset import {dataset_id.upper()}
144
+ >>> dataset = {dataset_id.upper()}(cache_dir="./data")
145
+ >>> print(f"Number of recordings: {{len(dataset)}}")
146
+
147
+ Load a specific recording:
148
+
149
+ >>> if len(dataset) > 0:
150
+ ... recording = dataset[0]
151
+ ... raw = recording.load()
152
+ ... print(f"Sampling rate: {{raw.info['sfreq']}} Hz")
153
+ ... print(f"Number of channels: {{len(raw.ch_names)}}")
154
+
155
+ Filter by additional criteria:
156
+
157
+ >>> # Get subset with specific task or subject
158
+ >>> filtered_dataset = {dataset_id.upper()}(
159
+ ... cache_dir="./data",
160
+ ... query={{"task": "RestingState"}} # if applicable
161
+ ... )
162
+
163
+ Notes
164
+ -----
165
+ More details available in the `NEMAR documentation <https://nemar.org/dataexplorer/detail?dataset_id={dataset_id}>`__.
166
+
167
+ See Also
168
+ --------
169
+ {base_class.__name__} : Base dataset class with full API documentation
170
+ """
171
+
172
+ return docstring
173
+
174
+
93
175
  def _markdown_table(row_series: pd.Series) -> str:
94
176
  """Create a reStructuredText grid table from a pandas Series."""
95
177
  if row_series.empty:
@@ -128,7 +210,12 @@ def _markdown_table(row_series: pd.Series) -> str:
128
210
  table = tabulate(df, headers="keys", tablefmt="rst", showindex=False)
129
211
 
130
212
  # Add a caption for the table
131
- caption = f"Short overview of dataset {dataset_id} more details in the `Nemar documentation <https://nemar.org/dataexplorer/detail?dataset_id={dataset_id}>`_."
213
+ # Use an anonymous external link (double underscore) to avoid duplicate
214
+ # target warnings when this docstring is repeated across many classes.
215
+ caption = (
216
+ f"Short overview of dataset {dataset_id} more details in the "
217
+ f"`NeMAR documentation <https://nemar.org/dataexplorer/detail?dataset_id={dataset_id}>`__."
218
+ )
132
219
  # adding caption below the table
133
220
  # Indent the table to fit within the admonition block
134
221
  indented_table = "\n".join(" " + line for line in table.split("\n"))