eegdash 0.3.3.dev61__py3-none-any.whl → 0.5.0.dev180784713__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eegdash/__init__.py +19 -6
- eegdash/api.py +336 -539
- eegdash/bids_eeg_metadata.py +495 -0
- eegdash/const.py +349 -0
- eegdash/dataset/__init__.py +28 -0
- eegdash/dataset/base.py +311 -0
- eegdash/dataset/bids_dataset.py +641 -0
- eegdash/dataset/dataset.py +692 -0
- eegdash/dataset/dataset_summary.csv +255 -0
- eegdash/dataset/registry.py +287 -0
- eegdash/downloader.py +197 -0
- eegdash/features/__init__.py +15 -13
- eegdash/features/datasets.py +329 -138
- eegdash/features/decorators.py +105 -13
- eegdash/features/extractors.py +233 -63
- eegdash/features/feature_bank/__init__.py +12 -12
- eegdash/features/feature_bank/complexity.py +22 -20
- eegdash/features/feature_bank/connectivity.py +27 -28
- eegdash/features/feature_bank/csp.py +3 -1
- eegdash/features/feature_bank/dimensionality.py +6 -6
- eegdash/features/feature_bank/signal.py +29 -30
- eegdash/features/feature_bank/spectral.py +40 -44
- eegdash/features/feature_bank/utils.py +8 -0
- eegdash/features/inspect.py +126 -15
- eegdash/features/serialization.py +58 -17
- eegdash/features/utils.py +90 -16
- eegdash/hbn/__init__.py +28 -0
- eegdash/hbn/preprocessing.py +105 -0
- eegdash/hbn/windows.py +428 -0
- eegdash/logging.py +54 -0
- eegdash/mongodb.py +55 -24
- eegdash/paths.py +52 -0
- eegdash/utils.py +29 -1
- eegdash-0.5.0.dev180784713.dist-info/METADATA +121 -0
- eegdash-0.5.0.dev180784713.dist-info/RECORD +38 -0
- eegdash-0.5.0.dev180784713.dist-info/licenses/LICENSE +29 -0
- eegdash/data_config.py +0 -34
- eegdash/data_utils.py +0 -687
- eegdash/dataset.py +0 -69
- eegdash/preprocessing.py +0 -63
- eegdash-0.3.3.dev61.dist-info/METADATA +0 -192
- eegdash-0.3.3.dev61.dist-info/RECORD +0 -28
- eegdash-0.3.3.dev61.dist-info/licenses/LICENSE +0 -23
- {eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/WHEEL +0 -0
- {eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/top_level.txt +0 -0
eegdash/downloader.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
# Authors: The EEGDash contributors.
|
|
2
|
+
# License: BSD-3-Clause
|
|
3
|
+
# Copyright the EEGDash contributors.
|
|
4
|
+
|
|
5
|
+
"""File downloading utilities for EEG data from cloud storage.
|
|
6
|
+
|
|
7
|
+
This module provides functions for downloading EEG data files and BIDS dependencies from
|
|
8
|
+
AWS S3 storage, with support for caching and progress tracking. It handles the communication
|
|
9
|
+
between the EEGDash metadata database and the actual EEG data stored in the cloud.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import s3fs
|
|
17
|
+
from fsspec.callbacks import TqdmCallback
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_s3_filesystem() -> s3fs.S3FileSystem:
|
|
21
|
+
"""Get an anonymous S3 filesystem object.
|
|
22
|
+
|
|
23
|
+
Initializes and returns an ``s3fs.S3FileSystem`` for anonymous access
|
|
24
|
+
to public S3 buckets, configured for the 'us-east-2' region.
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
s3fs.S3FileSystem
|
|
29
|
+
An S3 filesystem object.
|
|
30
|
+
|
|
31
|
+
"""
|
|
32
|
+
return s3fs.S3FileSystem(anon=True, client_kwargs={"region_name": "us-east-2"})
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_s3path(s3_bucket: str, filepath: str) -> str:
|
|
36
|
+
"""Construct an S3 URI from a bucket and file path.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
s3_bucket : str
|
|
41
|
+
The S3 bucket name (e.g., "s3://my-bucket").
|
|
42
|
+
filepath : str
|
|
43
|
+
The path to the file within the bucket.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
str
|
|
48
|
+
The full S3 URI (e.g., "s3://my-bucket/path/to/file").
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
return f"{s3_bucket}/{filepath}"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def download_s3_file(s3_path: str, local_path: Path, s3_open_neuro: bool) -> Path:
|
|
55
|
+
"""Download a single file from S3 to a local path.
|
|
56
|
+
|
|
57
|
+
Handles the download of a raw EEG data file from an S3 bucket, caching it
|
|
58
|
+
at the specified local path. Creates parent directories if they do not exist.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
s3_path : str
|
|
63
|
+
The full S3 URI of the file to download.
|
|
64
|
+
local_path : pathlib.Path
|
|
65
|
+
The local file path where the downloaded file will be saved.
|
|
66
|
+
s3_open_neuro : bool
|
|
67
|
+
A flag indicating if the S3 bucket is the OpenNeuro main bucket, which
|
|
68
|
+
may affect path handling.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
pathlib.Path
|
|
73
|
+
The local path to the downloaded file.
|
|
74
|
+
|
|
75
|
+
"""
|
|
76
|
+
filesystem = get_s3_filesystem()
|
|
77
|
+
if not s3_open_neuro:
|
|
78
|
+
s3_path = re.sub(r"(^|/)ds\d{6}/", r"\1", s3_path, count=1)
|
|
79
|
+
# TODO: remove this hack when competition is over
|
|
80
|
+
if s3_path.endswith(".set"):
|
|
81
|
+
s3_path = s3_path[:-4] + ".bdf"
|
|
82
|
+
local_path = local_path.with_suffix(".bdf")
|
|
83
|
+
|
|
84
|
+
local_path.parent.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
_filesystem_get(filesystem=filesystem, s3path=s3_path, filepath=local_path)
|
|
86
|
+
|
|
87
|
+
return local_path
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def download_dependencies(
|
|
91
|
+
s3_bucket: str,
|
|
92
|
+
bids_dependencies: list[str],
|
|
93
|
+
bids_dependencies_original: list[str],
|
|
94
|
+
cache_dir: Path,
|
|
95
|
+
dataset_folder: Path,
|
|
96
|
+
record: dict[str, Any],
|
|
97
|
+
s3_open_neuro: bool,
|
|
98
|
+
) -> None:
|
|
99
|
+
"""Download all BIDS dependency files from S3.
|
|
100
|
+
|
|
101
|
+
Iterates through a list of BIDS dependency files, downloads each from the
|
|
102
|
+
specified S3 bucket, and caches them in the appropriate local directory
|
|
103
|
+
structure.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
s3_bucket : str
|
|
108
|
+
The S3 bucket to download from.
|
|
109
|
+
bids_dependencies : list of str
|
|
110
|
+
A list of dependency file paths relative to the S3 bucket root.
|
|
111
|
+
bids_dependencies_original : list of str
|
|
112
|
+
The original dependency paths, used for resolving local cache paths.
|
|
113
|
+
cache_dir : pathlib.Path
|
|
114
|
+
The root directory for caching.
|
|
115
|
+
dataset_folder : pathlib.Path
|
|
116
|
+
The specific folder for the dataset within the cache directory.
|
|
117
|
+
record : dict
|
|
118
|
+
The metadata record for the main data file, used to resolve paths.
|
|
119
|
+
s3_open_neuro : bool
|
|
120
|
+
Flag for OpenNeuro-specific path handling.
|
|
121
|
+
|
|
122
|
+
"""
|
|
123
|
+
filesystem = get_s3_filesystem()
|
|
124
|
+
for i, dep in enumerate(bids_dependencies):
|
|
125
|
+
if not s3_open_neuro:
|
|
126
|
+
if dep.endswith(".set"):
|
|
127
|
+
dep = dep[:-4] + ".bdf"
|
|
128
|
+
|
|
129
|
+
s3path = get_s3path(s3_bucket, dep)
|
|
130
|
+
if not s3_open_neuro:
|
|
131
|
+
dep = bids_dependencies_original[i]
|
|
132
|
+
|
|
133
|
+
dep_path = Path(dep)
|
|
134
|
+
if dep_path.parts and dep_path.parts[0] == record.get("dataset"):
|
|
135
|
+
dep_local = Path(dataset_folder, *dep_path.parts[1:])
|
|
136
|
+
else:
|
|
137
|
+
dep_local = Path(dataset_folder) / dep_path
|
|
138
|
+
filepath = cache_dir / dep_local
|
|
139
|
+
if not s3_open_neuro:
|
|
140
|
+
if filepath.suffix == ".set":
|
|
141
|
+
filepath = filepath.with_suffix(".bdf")
|
|
142
|
+
|
|
143
|
+
if not filepath.exists():
|
|
144
|
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
145
|
+
_filesystem_get(filesystem=filesystem, s3path=s3path, filepath=filepath)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _filesystem_get(filesystem: s3fs.S3FileSystem, s3path: str, filepath: Path) -> Path:
|
|
149
|
+
"""Perform the file download using fsspec with a progress bar.
|
|
150
|
+
|
|
151
|
+
Internal helper function that wraps the ``filesystem.get`` call to include
|
|
152
|
+
a TQDM progress bar.
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
filesystem : s3fs.S3FileSystem
|
|
157
|
+
The filesystem object to use for the download.
|
|
158
|
+
s3path : str
|
|
159
|
+
The full S3 URI of the source file.
|
|
160
|
+
filepath : pathlib.Path
|
|
161
|
+
The local destination path.
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
pathlib.Path
|
|
166
|
+
The local path to the downloaded file.
|
|
167
|
+
|
|
168
|
+
"""
|
|
169
|
+
info = filesystem.info(s3path)
|
|
170
|
+
size = info.get("size") or info.get("Size")
|
|
171
|
+
|
|
172
|
+
callback = TqdmCallback(
|
|
173
|
+
size=size,
|
|
174
|
+
tqdm_kwargs=dict(
|
|
175
|
+
desc=f"Downloading {Path(s3path).name}",
|
|
176
|
+
unit="B",
|
|
177
|
+
unit_scale=True,
|
|
178
|
+
unit_divisor=1024,
|
|
179
|
+
dynamic_ncols=True,
|
|
180
|
+
leave=True,
|
|
181
|
+
mininterval=0.2,
|
|
182
|
+
smoothing=0.1,
|
|
183
|
+
miniters=1,
|
|
184
|
+
bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} "
|
|
185
|
+
"[{elapsed}<{remaining}, {rate_fmt}]",
|
|
186
|
+
),
|
|
187
|
+
)
|
|
188
|
+
filesystem.get(s3path, str(filepath), callback=callback)
|
|
189
|
+
return filepath
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
__all__ = [
|
|
193
|
+
"download_s3_file",
|
|
194
|
+
"download_dependencies",
|
|
195
|
+
"get_s3path",
|
|
196
|
+
"get_s3_filesystem",
|
|
197
|
+
]
|
eegdash/features/__init__.py
CHANGED
|
@@ -15,17 +15,13 @@ from .extractors import (
|
|
|
15
15
|
UnivariateFeature,
|
|
16
16
|
)
|
|
17
17
|
from .feature_bank import ( # Complexity; Connectivity; CSP; Dimensionality; Signal; Spectral
|
|
18
|
-
CoherenceFeatureExtractor,
|
|
19
18
|
CommonSpatialPattern,
|
|
20
|
-
DBSpectralFeatureExtractor,
|
|
21
|
-
EntropyFeatureExtractor,
|
|
22
|
-
HilbertFeatureExtractor,
|
|
23
|
-
NormalizedSpectralFeatureExtractor,
|
|
24
|
-
SpectralFeatureExtractor,
|
|
25
19
|
complexity_approx_entropy,
|
|
20
|
+
complexity_entropy_preprocessor,
|
|
26
21
|
complexity_lempel_ziv,
|
|
27
22
|
complexity_sample_entropy,
|
|
28
23
|
complexity_svd_entropy,
|
|
24
|
+
connectivity_coherency_preprocessor,
|
|
29
25
|
connectivity_imaginary_coherence,
|
|
30
26
|
connectivity_lagged_coherence,
|
|
31
27
|
connectivity_magnitude_square_coherence,
|
|
@@ -35,6 +31,7 @@ from .feature_bank import ( # Complexity; Connectivity; CSP; Dimensionality; Si
|
|
|
35
31
|
dimensionality_katz_fractal_dim,
|
|
36
32
|
dimensionality_petrosian_fractal_dim,
|
|
37
33
|
signal_decorrelation_time,
|
|
34
|
+
signal_hilbert_preprocessor,
|
|
38
35
|
signal_hjorth_activity,
|
|
39
36
|
signal_hjorth_complexity,
|
|
40
37
|
signal_hjorth_mobility,
|
|
@@ -49,18 +46,22 @@ from .feature_bank import ( # Complexity; Connectivity; CSP; Dimensionality; Si
|
|
|
49
46
|
signal_variance,
|
|
50
47
|
signal_zero_crossings,
|
|
51
48
|
spectral_bands_power,
|
|
49
|
+
spectral_db_preprocessor,
|
|
52
50
|
spectral_edge,
|
|
53
51
|
spectral_entropy,
|
|
54
52
|
spectral_hjorth_activity,
|
|
55
53
|
spectral_hjorth_complexity,
|
|
56
54
|
spectral_hjorth_mobility,
|
|
57
55
|
spectral_moment,
|
|
56
|
+
spectral_normalized_preprocessor,
|
|
57
|
+
spectral_preprocessor,
|
|
58
58
|
spectral_root_total_power,
|
|
59
59
|
spectral_slope,
|
|
60
60
|
)
|
|
61
61
|
from .inspect import (
|
|
62
62
|
get_all_feature_extractors,
|
|
63
63
|
get_all_feature_kinds,
|
|
64
|
+
get_all_feature_preprocessors,
|
|
64
65
|
get_all_features,
|
|
65
66
|
get_feature_kind,
|
|
66
67
|
get_feature_predecessors,
|
|
@@ -82,9 +83,10 @@ __all__ = [
|
|
|
82
83
|
"MultivariateFeature",
|
|
83
84
|
"TrainableFeature",
|
|
84
85
|
"UnivariateFeature",
|
|
85
|
-
"
|
|
86
|
+
"get_all_feature_preprocessors",
|
|
86
87
|
"get_all_feature_kinds",
|
|
87
88
|
"get_all_features",
|
|
89
|
+
"get_all_feature_extractors",
|
|
88
90
|
"get_feature_kind",
|
|
89
91
|
"get_feature_predecessors",
|
|
90
92
|
"load_features_concat_dataset",
|
|
@@ -92,13 +94,13 @@ __all__ = [
|
|
|
92
94
|
"fit_feature_extractors",
|
|
93
95
|
# Feature part
|
|
94
96
|
# Complexity
|
|
95
|
-
"
|
|
97
|
+
"complexity_entropy_preprocessor",
|
|
96
98
|
"complexity_approx_entropy",
|
|
97
99
|
"complexity_sample_entropy",
|
|
98
100
|
"complexity_svd_entropy",
|
|
99
101
|
"complexity_lempel_ziv",
|
|
100
102
|
# Connectivity
|
|
101
|
-
"
|
|
103
|
+
"connectivity_coherency_preprocessor",
|
|
102
104
|
"connectivity_magnitude_square_coherence",
|
|
103
105
|
"connectivity_imaginary_coherence",
|
|
104
106
|
"connectivity_lagged_coherence",
|
|
@@ -111,7 +113,7 @@ __all__ = [
|
|
|
111
113
|
"dimensionality_hurst_exp",
|
|
112
114
|
"dimensionality_detrended_fluctuation_analysis",
|
|
113
115
|
# Signal
|
|
114
|
-
"
|
|
116
|
+
"signal_hilbert_preprocessor",
|
|
115
117
|
"signal_mean",
|
|
116
118
|
"signal_variance",
|
|
117
119
|
"signal_skewness",
|
|
@@ -127,9 +129,9 @@ __all__ = [
|
|
|
127
129
|
"signal_hjorth_complexity",
|
|
128
130
|
"signal_decorrelation_time",
|
|
129
131
|
# Spectral
|
|
130
|
-
"
|
|
131
|
-
"
|
|
132
|
-
"
|
|
132
|
+
"spectral_preprocessor",
|
|
133
|
+
"spectral_normalized_preprocessor",
|
|
134
|
+
"spectral_db_preprocessor",
|
|
133
135
|
"spectral_root_total_power",
|
|
134
136
|
"spectral_moment",
|
|
135
137
|
"spectral_entropy",
|