eegdash 0.3.3.dev61__py3-none-any.whl → 0.5.0.dev180784713__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. eegdash/__init__.py +19 -6
  2. eegdash/api.py +336 -539
  3. eegdash/bids_eeg_metadata.py +495 -0
  4. eegdash/const.py +349 -0
  5. eegdash/dataset/__init__.py +28 -0
  6. eegdash/dataset/base.py +311 -0
  7. eegdash/dataset/bids_dataset.py +641 -0
  8. eegdash/dataset/dataset.py +692 -0
  9. eegdash/dataset/dataset_summary.csv +255 -0
  10. eegdash/dataset/registry.py +287 -0
  11. eegdash/downloader.py +197 -0
  12. eegdash/features/__init__.py +15 -13
  13. eegdash/features/datasets.py +329 -138
  14. eegdash/features/decorators.py +105 -13
  15. eegdash/features/extractors.py +233 -63
  16. eegdash/features/feature_bank/__init__.py +12 -12
  17. eegdash/features/feature_bank/complexity.py +22 -20
  18. eegdash/features/feature_bank/connectivity.py +27 -28
  19. eegdash/features/feature_bank/csp.py +3 -1
  20. eegdash/features/feature_bank/dimensionality.py +6 -6
  21. eegdash/features/feature_bank/signal.py +29 -30
  22. eegdash/features/feature_bank/spectral.py +40 -44
  23. eegdash/features/feature_bank/utils.py +8 -0
  24. eegdash/features/inspect.py +126 -15
  25. eegdash/features/serialization.py +58 -17
  26. eegdash/features/utils.py +90 -16
  27. eegdash/hbn/__init__.py +28 -0
  28. eegdash/hbn/preprocessing.py +105 -0
  29. eegdash/hbn/windows.py +428 -0
  30. eegdash/logging.py +54 -0
  31. eegdash/mongodb.py +55 -24
  32. eegdash/paths.py +52 -0
  33. eegdash/utils.py +29 -1
  34. eegdash-0.5.0.dev180784713.dist-info/METADATA +121 -0
  35. eegdash-0.5.0.dev180784713.dist-info/RECORD +38 -0
  36. eegdash-0.5.0.dev180784713.dist-info/licenses/LICENSE +29 -0
  37. eegdash/data_config.py +0 -34
  38. eegdash/data_utils.py +0 -687
  39. eegdash/dataset.py +0 -69
  40. eegdash/preprocessing.py +0 -63
  41. eegdash-0.3.3.dev61.dist-info/METADATA +0 -192
  42. eegdash-0.3.3.dev61.dist-info/RECORD +0 -28
  43. eegdash-0.3.3.dev61.dist-info/licenses/LICENSE +0 -23
  44. {eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/WHEEL +0 -0
  45. {eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/top_level.txt +0 -0
eegdash/downloader.py ADDED
@@ -0,0 +1,197 @@
1
+ # Authors: The EEGDash contributors.
2
+ # License: BSD-3-Clause
3
+ # Copyright the EEGDash contributors.
4
+
5
+ """File downloading utilities for EEG data from cloud storage.
6
+
7
+ This module provides functions for downloading EEG data files and BIDS dependencies from
8
+ AWS S3 storage, with support for caching and progress tracking. It handles the communication
9
+ between the EEGDash metadata database and the actual EEG data stored in the cloud.
10
+ """
11
+
12
+ import re
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import s3fs
17
+ from fsspec.callbacks import TqdmCallback
18
+
19
+
20
+ def get_s3_filesystem() -> s3fs.S3FileSystem:
21
+ """Get an anonymous S3 filesystem object.
22
+
23
+ Initializes and returns an ``s3fs.S3FileSystem`` for anonymous access
24
+ to public S3 buckets, configured for the 'us-east-2' region.
25
+
26
+ Returns
27
+ -------
28
+ s3fs.S3FileSystem
29
+ An S3 filesystem object.
30
+
31
+ """
32
+ return s3fs.S3FileSystem(anon=True, client_kwargs={"region_name": "us-east-2"})
33
+
34
+
35
+ def get_s3path(s3_bucket: str, filepath: str) -> str:
36
+ """Construct an S3 URI from a bucket and file path.
37
+
38
+ Parameters
39
+ ----------
40
+ s3_bucket : str
41
+ The S3 bucket name (e.g., "s3://my-bucket").
42
+ filepath : str
43
+ The path to the file within the bucket.
44
+
45
+ Returns
46
+ -------
47
+ str
48
+ The full S3 URI (e.g., "s3://my-bucket/path/to/file").
49
+
50
+ """
51
+ return f"{s3_bucket}/{filepath}"
52
+
53
+
54
+ def download_s3_file(s3_path: str, local_path: Path, s3_open_neuro: bool) -> Path:
55
+ """Download a single file from S3 to a local path.
56
+
57
+ Handles the download of a raw EEG data file from an S3 bucket, caching it
58
+ at the specified local path. Creates parent directories if they do not exist.
59
+
60
+ Parameters
61
+ ----------
62
+ s3_path : str
63
+ The full S3 URI of the file to download.
64
+ local_path : pathlib.Path
65
+ The local file path where the downloaded file will be saved.
66
+ s3_open_neuro : bool
67
+ A flag indicating if the S3 bucket is the OpenNeuro main bucket, which
68
+ may affect path handling.
69
+
70
+ Returns
71
+ -------
72
+ pathlib.Path
73
+ The local path to the downloaded file.
74
+
75
+ """
76
+ filesystem = get_s3_filesystem()
77
+ if not s3_open_neuro:
78
+ s3_path = re.sub(r"(^|/)ds\d{6}/", r"\1", s3_path, count=1)
79
+ # TODO: remove this hack when competition is over
80
+ if s3_path.endswith(".set"):
81
+ s3_path = s3_path[:-4] + ".bdf"
82
+ local_path = local_path.with_suffix(".bdf")
83
+
84
+ local_path.parent.mkdir(parents=True, exist_ok=True)
85
+ _filesystem_get(filesystem=filesystem, s3path=s3_path, filepath=local_path)
86
+
87
+ return local_path
88
+
89
+
90
+ def download_dependencies(
91
+ s3_bucket: str,
92
+ bids_dependencies: list[str],
93
+ bids_dependencies_original: list[str],
94
+ cache_dir: Path,
95
+ dataset_folder: Path,
96
+ record: dict[str, Any],
97
+ s3_open_neuro: bool,
98
+ ) -> None:
99
+ """Download all BIDS dependency files from S3.
100
+
101
+ Iterates through a list of BIDS dependency files, downloads each from the
102
+ specified S3 bucket, and caches them in the appropriate local directory
103
+ structure.
104
+
105
+ Parameters
106
+ ----------
107
+ s3_bucket : str
108
+ The S3 bucket to download from.
109
+ bids_dependencies : list of str
110
+ A list of dependency file paths relative to the S3 bucket root.
111
+ bids_dependencies_original : list of str
112
+ The original dependency paths, used for resolving local cache paths.
113
+ cache_dir : pathlib.Path
114
+ The root directory for caching.
115
+ dataset_folder : pathlib.Path
116
+ The specific folder for the dataset within the cache directory.
117
+ record : dict
118
+ The metadata record for the main data file, used to resolve paths.
119
+ s3_open_neuro : bool
120
+ Flag for OpenNeuro-specific path handling.
121
+
122
+ """
123
+ filesystem = get_s3_filesystem()
124
+ for i, dep in enumerate(bids_dependencies):
125
+ if not s3_open_neuro:
126
+ if dep.endswith(".set"):
127
+ dep = dep[:-4] + ".bdf"
128
+
129
+ s3path = get_s3path(s3_bucket, dep)
130
+ if not s3_open_neuro:
131
+ dep = bids_dependencies_original[i]
132
+
133
+ dep_path = Path(dep)
134
+ if dep_path.parts and dep_path.parts[0] == record.get("dataset"):
135
+ dep_local = Path(dataset_folder, *dep_path.parts[1:])
136
+ else:
137
+ dep_local = Path(dataset_folder) / dep_path
138
+ filepath = cache_dir / dep_local
139
+ if not s3_open_neuro:
140
+ if filepath.suffix == ".set":
141
+ filepath = filepath.with_suffix(".bdf")
142
+
143
+ if not filepath.exists():
144
+ filepath.parent.mkdir(parents=True, exist_ok=True)
145
+ _filesystem_get(filesystem=filesystem, s3path=s3path, filepath=filepath)
146
+
147
+
148
+ def _filesystem_get(filesystem: s3fs.S3FileSystem, s3path: str, filepath: Path) -> Path:
149
+ """Perform the file download using fsspec with a progress bar.
150
+
151
+ Internal helper function that wraps the ``filesystem.get`` call to include
152
+ a TQDM progress bar.
153
+
154
+ Parameters
155
+ ----------
156
+ filesystem : s3fs.S3FileSystem
157
+ The filesystem object to use for the download.
158
+ s3path : str
159
+ The full S3 URI of the source file.
160
+ filepath : pathlib.Path
161
+ The local destination path.
162
+
163
+ Returns
164
+ -------
165
+ pathlib.Path
166
+ The local path to the downloaded file.
167
+
168
+ """
169
+ info = filesystem.info(s3path)
170
+ size = info.get("size") or info.get("Size")
171
+
172
+ callback = TqdmCallback(
173
+ size=size,
174
+ tqdm_kwargs=dict(
175
+ desc=f"Downloading {Path(s3path).name}",
176
+ unit="B",
177
+ unit_scale=True,
178
+ unit_divisor=1024,
179
+ dynamic_ncols=True,
180
+ leave=True,
181
+ mininterval=0.2,
182
+ smoothing=0.1,
183
+ miniters=1,
184
+ bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} "
185
+ "[{elapsed}<{remaining}, {rate_fmt}]",
186
+ ),
187
+ )
188
+ filesystem.get(s3path, str(filepath), callback=callback)
189
+ return filepath
190
+
191
+
192
+ __all__ = [
193
+ "download_s3_file",
194
+ "download_dependencies",
195
+ "get_s3path",
196
+ "get_s3_filesystem",
197
+ ]
@@ -15,17 +15,13 @@ from .extractors import (
15
15
  UnivariateFeature,
16
16
  )
17
17
  from .feature_bank import ( # Complexity; Connectivity; CSP; Dimensionality; Signal; Spectral
18
- CoherenceFeatureExtractor,
19
18
  CommonSpatialPattern,
20
- DBSpectralFeatureExtractor,
21
- EntropyFeatureExtractor,
22
- HilbertFeatureExtractor,
23
- NormalizedSpectralFeatureExtractor,
24
- SpectralFeatureExtractor,
25
19
  complexity_approx_entropy,
20
+ complexity_entropy_preprocessor,
26
21
  complexity_lempel_ziv,
27
22
  complexity_sample_entropy,
28
23
  complexity_svd_entropy,
24
+ connectivity_coherency_preprocessor,
29
25
  connectivity_imaginary_coherence,
30
26
  connectivity_lagged_coherence,
31
27
  connectivity_magnitude_square_coherence,
@@ -35,6 +31,7 @@ from .feature_bank import ( # Complexity; Connectivity; CSP; Dimensionality; Si
35
31
  dimensionality_katz_fractal_dim,
36
32
  dimensionality_petrosian_fractal_dim,
37
33
  signal_decorrelation_time,
34
+ signal_hilbert_preprocessor,
38
35
  signal_hjorth_activity,
39
36
  signal_hjorth_complexity,
40
37
  signal_hjorth_mobility,
@@ -49,18 +46,22 @@ from .feature_bank import ( # Complexity; Connectivity; CSP; Dimensionality; Si
49
46
  signal_variance,
50
47
  signal_zero_crossings,
51
48
  spectral_bands_power,
49
+ spectral_db_preprocessor,
52
50
  spectral_edge,
53
51
  spectral_entropy,
54
52
  spectral_hjorth_activity,
55
53
  spectral_hjorth_complexity,
56
54
  spectral_hjorth_mobility,
57
55
  spectral_moment,
56
+ spectral_normalized_preprocessor,
57
+ spectral_preprocessor,
58
58
  spectral_root_total_power,
59
59
  spectral_slope,
60
60
  )
61
61
  from .inspect import (
62
62
  get_all_feature_extractors,
63
63
  get_all_feature_kinds,
64
+ get_all_feature_preprocessors,
64
65
  get_all_features,
65
66
  get_feature_kind,
66
67
  get_feature_predecessors,
@@ -82,9 +83,10 @@ __all__ = [
82
83
  "MultivariateFeature",
83
84
  "TrainableFeature",
84
85
  "UnivariateFeature",
85
- "get_all_feature_extractors",
86
+ "get_all_feature_preprocessors",
86
87
  "get_all_feature_kinds",
87
88
  "get_all_features",
89
+ "get_all_feature_extractors",
88
90
  "get_feature_kind",
89
91
  "get_feature_predecessors",
90
92
  "load_features_concat_dataset",
@@ -92,13 +94,13 @@ __all__ = [
92
94
  "fit_feature_extractors",
93
95
  # Feature part
94
96
  # Complexity
95
- "EntropyFeatureExtractor",
97
+ "complexity_entropy_preprocessor",
96
98
  "complexity_approx_entropy",
97
99
  "complexity_sample_entropy",
98
100
  "complexity_svd_entropy",
99
101
  "complexity_lempel_ziv",
100
102
  # Connectivity
101
- "CoherenceFeatureExtractor",
103
+ "connectivity_coherency_preprocessor",
102
104
  "connectivity_magnitude_square_coherence",
103
105
  "connectivity_imaginary_coherence",
104
106
  "connectivity_lagged_coherence",
@@ -111,7 +113,7 @@ __all__ = [
111
113
  "dimensionality_hurst_exp",
112
114
  "dimensionality_detrended_fluctuation_analysis",
113
115
  # Signal
114
- "HilbertFeatureExtractor",
116
+ "signal_hilbert_preprocessor",
115
117
  "signal_mean",
116
118
  "signal_variance",
117
119
  "signal_skewness",
@@ -127,9 +129,9 @@ __all__ = [
127
129
  "signal_hjorth_complexity",
128
130
  "signal_decorrelation_time",
129
131
  # Spectral
130
- "SpectralFeatureExtractor",
131
- "NormalizedSpectralFeatureExtractor",
132
- "DBSpectralFeatureExtractor",
132
+ "spectral_preprocessor",
133
+ "spectral_normalized_preprocessor",
134
+ "spectral_db_preprocessor",
133
135
  "spectral_root_total_power",
134
136
  "spectral_moment",
135
137
  "spectral_entropy",