eegdash 0.4.0.dev153__py3-none-any.whl → 0.4.0.dev162__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/__init__.py +1 -1
- eegdash/api.py +180 -86
- eegdash/bids_eeg_metadata.py +139 -39
- eegdash/const.py +25 -0
- eegdash/data_utils.py +239 -173
- eegdash/dataset/dataset.py +35 -13
- eegdash/dataset/dataset_summary.csv +1 -1
- eegdash/dataset/registry.py +69 -4
- eegdash/downloader.py +95 -9
- eegdash/features/datasets.py +320 -136
- eegdash/features/decorators.py +88 -3
- eegdash/features/extractors.py +201 -55
- eegdash/features/inspect.py +78 -5
- eegdash/features/serialization.py +45 -19
- eegdash/features/utils.py +75 -8
- eegdash/hbn/preprocessing.py +50 -17
- eegdash/hbn/windows.py +145 -32
- eegdash/logging.py +19 -0
- eegdash/mongodb.py +44 -27
- eegdash/paths.py +14 -5
- eegdash/utils.py +16 -1
- {eegdash-0.4.0.dev153.dist-info → eegdash-0.4.0.dev162.dist-info}/METADATA +1 -1
- eegdash-0.4.0.dev162.dist-info/RECORD +37 -0
- eegdash-0.4.0.dev153.dist-info/RECORD +0 -37
- {eegdash-0.4.0.dev153.dist-info → eegdash-0.4.0.dev162.dist-info}/WHEEL +0 -0
- {eegdash-0.4.0.dev153.dist-info → eegdash-0.4.0.dev162.dist-info}/licenses/LICENSE +0 -0
- {eegdash-0.4.0.dev153.dist-info → eegdash-0.4.0.dev162.dist-info}/top_level.txt +0 -0
eegdash/dataset/dataset.py
CHANGED
|
@@ -12,26 +12,48 @@ from .registry import register_openneuro_datasets
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class EEGChallengeDataset(EEGDashDataset):
|
|
15
|
-
"""EEG 2025 Challenge
|
|
15
|
+
"""A dataset helper for the EEG 2025 Challenge.
|
|
16
16
|
|
|
17
|
-
This class
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
This class simplifies access to the EEG 2025 Challenge datasets. It is a
|
|
18
|
+
specialized version of :class:`~eegdash.api.EEGDashDataset` that is
|
|
19
|
+
pre-configured for the challenge's data releases. It automatically maps a
|
|
20
|
+
release name (e.g., "R1") to the corresponding OpenNeuro dataset and handles
|
|
21
|
+
the selection of subject subsets (e.g., "mini" release).
|
|
21
22
|
|
|
22
23
|
Parameters
|
|
23
24
|
----------
|
|
24
25
|
release : str
|
|
25
|
-
|
|
26
|
+
The name of the challenge release to load. Must be one of the keys in
|
|
27
|
+
:const:`~eegdash.const.RELEASE_TO_OPENNEURO_DATASET_MAP`
|
|
28
|
+
(e.g., "R1", "R2", ..., "R11").
|
|
29
|
+
cache_dir : str
|
|
30
|
+
The local directory where the dataset will be downloaded and cached.
|
|
26
31
|
mini : bool, default True
|
|
27
|
-
If True,
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
32
|
+
If True, the dataset is restricted to the official "mini" subset of
|
|
33
|
+
subjects for the specified release. If False, all subjects for the
|
|
34
|
+
release are included.
|
|
35
|
+
query : dict, optional
|
|
36
|
+
An additional MongoDB-style query to apply as a filter. This query is
|
|
37
|
+
combined with the release and subject filters using a logical AND.
|
|
38
|
+
The query must not contain the ``dataset`` key, as this is determined
|
|
39
|
+
by the ``release`` parameter.
|
|
40
|
+
s3_bucket : str, optional
|
|
41
|
+
The base S3 bucket URI where the challenge data is stored. Defaults to
|
|
42
|
+
the official challenge bucket.
|
|
33
43
|
**kwargs
|
|
34
|
-
|
|
44
|
+
Additional keyword arguments that are passed directly to the
|
|
45
|
+
:class:`~eegdash.api.EEGDashDataset` constructor.
|
|
46
|
+
|
|
47
|
+
Raises
|
|
48
|
+
------
|
|
49
|
+
ValueError
|
|
50
|
+
If the specified ``release`` is unknown, or if the ``query`` argument
|
|
51
|
+
contains a ``dataset`` key. Also raised if ``mini`` is True and a
|
|
52
|
+
requested subject is not part of the official mini-release subset.
|
|
53
|
+
|
|
54
|
+
See Also
|
|
55
|
+
--------
|
|
56
|
+
EEGDashDataset : The base class for creating datasets from queries.
|
|
35
57
|
|
|
36
58
|
"""
|
|
37
59
|
|
|
@@ -198,7 +198,7 @@
|
|
|
198
198
|
197,ds003751,38,38,1,128,250,19.95,4.71 GB,5057922307,0,ds003751,Healthy,other,Multisensory,Affect
|
|
199
199
|
198,ds003421,80,20,1,257,1000,11.604,76.77 GB,82433418198,0,ds003421,Healthy,10-20,Multisensory,Decision-making
|
|
200
200
|
199,ds002158,117,20,1,,,0.0,428.59 GB,460190030981,0,ds002158,Healthy,10-20,Visual,Affect
|
|
201
|
-
200,ds004951,23,11,1,63,1000,29.563,22.00 GB,23627352274,0,ds004951
|
|
201
|
+
200,ds004951,23,11,1,63,1000,29.563,22.00 GB,23627352274,0,ds004951,,,Tactile,Learning
|
|
202
202
|
201,ds004802,38,38,1,65,"2048,512",0.0,29.34 GB,31504070800,0,ds004802,Other,,Visual,Affect
|
|
203
203
|
202,ds004816,20,20,1,63,1000,0.0,23.31 GB,25028989553,0,ds004816,Healthy,,Visual,Attention
|
|
204
204
|
203,ds005873,2850,125,1,2,256,11935.09,117.21 GB,125851664268,0,,,,,
|
eegdash/dataset/registry.py
CHANGED
|
@@ -14,7 +14,35 @@ def register_openneuro_datasets(
|
|
|
14
14
|
namespace: Dict[str, Any] | None = None,
|
|
15
15
|
add_to_all: bool = True,
|
|
16
16
|
) -> Dict[str, type]:
|
|
17
|
-
"""Dynamically create dataset classes from a summary file.
|
|
17
|
+
"""Dynamically create and register dataset classes from a summary file.
|
|
18
|
+
|
|
19
|
+
This function reads a CSV file containing summaries of OpenNeuro datasets
|
|
20
|
+
and dynamically creates a Python class for each dataset. These classes
|
|
21
|
+
inherit from a specified base class and are pre-configured with the
|
|
22
|
+
dataset's ID.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
summary_file : str or pathlib.Path
|
|
27
|
+
The path to the CSV file containing the dataset summaries.
|
|
28
|
+
base_class : type, optional
|
|
29
|
+
The base class from which the new dataset classes will inherit. If not
|
|
30
|
+
provided, :class:`eegdash.api.EEGDashDataset` is used.
|
|
31
|
+
namespace : dict, optional
|
|
32
|
+
The namespace (e.g., `globals()`) into which the newly created classes
|
|
33
|
+
will be injected. Defaults to the local `globals()` of this module.
|
|
34
|
+
add_to_all : bool, default True
|
|
35
|
+
If True, the names of the newly created classes are added to the
|
|
36
|
+
`__all__` list of the target namespace, making them importable with
|
|
37
|
+
`from ... import *`.
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
dict[str, type]
|
|
42
|
+
A dictionary mapping the names of the registered classes to the class
|
|
43
|
+
types themselves.
|
|
44
|
+
|
|
45
|
+
"""
|
|
18
46
|
if base_class is None:
|
|
19
47
|
from ..api import EEGDashDataset as base_class # lazy import
|
|
20
48
|
|
|
@@ -84,8 +112,28 @@ def register_openneuro_datasets(
|
|
|
84
112
|
return registered
|
|
85
113
|
|
|
86
114
|
|
|
87
|
-
def _generate_rich_docstring(
|
|
88
|
-
|
|
115
|
+
def _generate_rich_docstring(
|
|
116
|
+
dataset_id: str, row_series: pd.Series, base_class: type
|
|
117
|
+
) -> str:
|
|
118
|
+
"""Generate a comprehensive, well-formatted docstring for a dataset class.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
dataset_id : str
|
|
123
|
+
The identifier of the dataset (e.g., "ds002718").
|
|
124
|
+
row_series : pandas.Series
|
|
125
|
+
A pandas Series containing the metadata for the dataset, extracted
|
|
126
|
+
from the summary CSV file.
|
|
127
|
+
base_class : type
|
|
128
|
+
The base class from which the new dataset class inherits. Used to
|
|
129
|
+
generate the "See Also" section of the docstring.
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
str
|
|
134
|
+
A formatted docstring.
|
|
135
|
+
|
|
136
|
+
"""
|
|
89
137
|
# Extract metadata with safe defaults
|
|
90
138
|
n_subjects = row_series.get("n_subjects", "Unknown")
|
|
91
139
|
n_records = row_series.get("n_records", "Unknown")
|
|
@@ -173,7 +221,24 @@ See Also
|
|
|
173
221
|
|
|
174
222
|
|
|
175
223
|
def _markdown_table(row_series: pd.Series) -> str:
|
|
176
|
-
"""Create a reStructuredText grid table from a pandas Series.
|
|
224
|
+
"""Create a reStructuredText grid table from a pandas Series.
|
|
225
|
+
|
|
226
|
+
This helper function takes a pandas Series containing dataset metadata
|
|
227
|
+
and formats it into a reStructuredText grid table for inclusion in
|
|
228
|
+
docstrings.
|
|
229
|
+
|
|
230
|
+
Parameters
|
|
231
|
+
----------
|
|
232
|
+
row_series : pandas.Series
|
|
233
|
+
A Series where each index is a metadata field and each value is the
|
|
234
|
+
corresponding metadata value.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
str
|
|
239
|
+
A string containing the formatted reStructuredText table.
|
|
240
|
+
|
|
241
|
+
"""
|
|
177
242
|
if row_series.empty:
|
|
178
243
|
return ""
|
|
179
244
|
dataset_id = row_series["dataset"]
|
eegdash/downloader.py
CHANGED
|
@@ -17,18 +17,62 @@ import s3fs
|
|
|
17
17
|
from fsspec.callbacks import TqdmCallback
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
def get_s3_filesystem():
|
|
21
|
-
"""
|
|
20
|
+
def get_s3_filesystem() -> s3fs.S3FileSystem:
|
|
21
|
+
"""Get an anonymous S3 filesystem object.
|
|
22
|
+
|
|
23
|
+
Initializes and returns an ``s3fs.S3FileSystem`` for anonymous access
|
|
24
|
+
to public S3 buckets, configured for the 'us-east-2' region.
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
s3fs.S3FileSystem
|
|
29
|
+
An S3 filesystem object.
|
|
30
|
+
|
|
31
|
+
"""
|
|
22
32
|
return s3fs.S3FileSystem(anon=True, client_kwargs={"region_name": "us-east-2"})
|
|
23
33
|
|
|
24
34
|
|
|
25
35
|
def get_s3path(s3_bucket: str, filepath: str) -> str:
|
|
26
|
-
"""
|
|
36
|
+
"""Construct an S3 URI from a bucket and file path.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
s3_bucket : str
|
|
41
|
+
The S3 bucket name (e.g., "s3://my-bucket").
|
|
42
|
+
filepath : str
|
|
43
|
+
The path to the file within the bucket.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
str
|
|
48
|
+
The full S3 URI (e.g., "s3://my-bucket/path/to/file").
|
|
49
|
+
|
|
50
|
+
"""
|
|
27
51
|
return f"{s3_bucket}/{filepath}"
|
|
28
52
|
|
|
29
53
|
|
|
30
|
-
def download_s3_file(s3_path: str, local_path: Path, s3_open_neuro: bool):
|
|
31
|
-
"""Download
|
|
54
|
+
def download_s3_file(s3_path: str, local_path: Path, s3_open_neuro: bool) -> Path:
|
|
55
|
+
"""Download a single file from S3 to a local path.
|
|
56
|
+
|
|
57
|
+
Handles the download of a raw EEG data file from an S3 bucket, caching it
|
|
58
|
+
at the specified local path. Creates parent directories if they do not exist.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
s3_path : str
|
|
63
|
+
The full S3 URI of the file to download.
|
|
64
|
+
local_path : pathlib.Path
|
|
65
|
+
The local file path where the downloaded file will be saved.
|
|
66
|
+
s3_open_neuro : bool
|
|
67
|
+
A flag indicating if the S3 bucket is the OpenNeuro main bucket, which
|
|
68
|
+
may affect path handling.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
pathlib.Path
|
|
73
|
+
The local path to the downloaded file.
|
|
74
|
+
|
|
75
|
+
"""
|
|
32
76
|
filesystem = get_s3_filesystem()
|
|
33
77
|
if not s3_open_neuro:
|
|
34
78
|
s3_path = re.sub(r"(^|/)ds\d{6}/", r"\1", s3_path, count=1)
|
|
@@ -51,8 +95,31 @@ def download_dependencies(
|
|
|
51
95
|
dataset_folder: Path,
|
|
52
96
|
record: dict[str, Any],
|
|
53
97
|
s3_open_neuro: bool,
|
|
54
|
-
):
|
|
55
|
-
"""Download all BIDS dependency files from S3
|
|
98
|
+
) -> None:
|
|
99
|
+
"""Download all BIDS dependency files from S3.
|
|
100
|
+
|
|
101
|
+
Iterates through a list of BIDS dependency files, downloads each from the
|
|
102
|
+
specified S3 bucket, and caches them in the appropriate local directory
|
|
103
|
+
structure.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
s3_bucket : str
|
|
108
|
+
The S3 bucket to download from.
|
|
109
|
+
bids_dependencies : list of str
|
|
110
|
+
A list of dependency file paths relative to the S3 bucket root.
|
|
111
|
+
bids_dependencies_original : list of str
|
|
112
|
+
The original dependency paths, used for resolving local cache paths.
|
|
113
|
+
cache_dir : pathlib.Path
|
|
114
|
+
The root directory for caching.
|
|
115
|
+
dataset_folder : pathlib.Path
|
|
116
|
+
The specific folder for the dataset within the cache directory.
|
|
117
|
+
record : dict
|
|
118
|
+
The metadata record for the main data file, used to resolve paths.
|
|
119
|
+
s3_open_neuro : bool
|
|
120
|
+
Flag for OpenNeuro-specific path handling.
|
|
121
|
+
|
|
122
|
+
"""
|
|
56
123
|
filesystem = get_s3_filesystem()
|
|
57
124
|
for i, dep in enumerate(bids_dependencies):
|
|
58
125
|
if not s3_open_neuro:
|
|
@@ -78,8 +145,27 @@ def download_dependencies(
|
|
|
78
145
|
_filesystem_get(filesystem=filesystem, s3path=s3path, filepath=filepath)
|
|
79
146
|
|
|
80
147
|
|
|
81
|
-
def _filesystem_get(filesystem: s3fs.S3FileSystem, s3path: str, filepath: Path):
|
|
82
|
-
"""
|
|
148
|
+
def _filesystem_get(filesystem: s3fs.S3FileSystem, s3path: str, filepath: Path) -> Path:
|
|
149
|
+
"""Perform the file download using fsspec with a progress bar.
|
|
150
|
+
|
|
151
|
+
Internal helper function that wraps the ``filesystem.get`` call to include
|
|
152
|
+
a TQDM progress bar.
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
filesystem : s3fs.S3FileSystem
|
|
157
|
+
The filesystem object to use for the download.
|
|
158
|
+
s3path : str
|
|
159
|
+
The full S3 URI of the source file.
|
|
160
|
+
filepath : pathlib.Path
|
|
161
|
+
The local destination path.
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
pathlib.Path
|
|
166
|
+
The local path to the downloaded file.
|
|
167
|
+
|
|
168
|
+
"""
|
|
83
169
|
info = filesystem.info(s3path)
|
|
84
170
|
size = info.get("size") or info.get("Size")
|
|
85
171
|
|