eegdash 0.3.8__py3-none-any.whl → 0.3.9.dev129__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/__init__.py +1 -1
- eegdash/api.py +72 -157
- eegdash/bids_eeg_metadata.py +149 -27
- eegdash/data_utils.py +63 -254
- eegdash/dataset/dataset.py +50 -44
- eegdash/dataset/registry.py +5 -16
- eegdash/downloader.py +176 -0
- eegdash/features/datasets.py +4 -3
- eegdash/hbn/preprocessing.py +1 -3
- eegdash/hbn/windows.py +0 -2
- eegdash/logging.py +23 -0
- {eegdash-0.3.8.dist-info → eegdash-0.3.9.dev129.dist-info}/METADATA +4 -2
- {eegdash-0.3.8.dist-info → eegdash-0.3.9.dev129.dist-info}/RECORD +16 -14
- {eegdash-0.3.8.dist-info → eegdash-0.3.9.dev129.dist-info}/WHEEL +0 -0
- {eegdash-0.3.8.dist-info → eegdash-0.3.9.dev129.dist-info}/licenses/LICENSE +0 -0
- {eegdash-0.3.8.dist-info → eegdash-0.3.9.dev129.dist-info}/top_level.txt +0 -0
eegdash/downloader.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import tempfile
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
from urllib.parse import urlsplit
|
|
6
|
+
|
|
7
|
+
import mne
|
|
8
|
+
import numpy as np
|
|
9
|
+
import s3fs
|
|
10
|
+
import xarray as xr
|
|
11
|
+
from fsspec.callbacks import TqdmCallback
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_s3_filesystem():
|
|
15
|
+
"""Returns an S3FileSystem object."""
|
|
16
|
+
return s3fs.S3FileSystem(anon=True, client_kwargs={"region_name": "us-east-2"})
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_s3path(s3_bucket: str, filepath: str) -> str:
|
|
20
|
+
"""Helper to form an AWS S3 URI for the given relative filepath."""
|
|
21
|
+
return f"{s3_bucket}/{filepath}"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def download_s3_file(s3_path: str, local_path: Path, s3_open_neuro: bool):
|
|
25
|
+
"""Download function that gets the raw EEG data from S3."""
|
|
26
|
+
filesystem = get_s3_filesystem()
|
|
27
|
+
if not s3_open_neuro:
|
|
28
|
+
s3_path = re.sub(r"(^|/)ds\d{6}/", r"\1", s3_path, count=1)
|
|
29
|
+
# TODO: remove this hack when competition is over
|
|
30
|
+
if s3_path.endswith(".set"):
|
|
31
|
+
s3_path = s3_path[:-4] + ".bdf"
|
|
32
|
+
local_path = local_path.with_suffix(".bdf")
|
|
33
|
+
|
|
34
|
+
local_path.parent.mkdir(parents=True, exist_ok=True)
|
|
35
|
+
_filesystem_get(filesystem=filesystem, s3path=s3_path, filepath=local_path)
|
|
36
|
+
|
|
37
|
+
return local_path
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def download_dependencies(
|
|
41
|
+
s3_bucket: str,
|
|
42
|
+
bids_dependencies: list[str],
|
|
43
|
+
bids_dependencies_original: list[str],
|
|
44
|
+
cache_dir: Path,
|
|
45
|
+
dataset_folder: Path,
|
|
46
|
+
record: dict[str, Any],
|
|
47
|
+
s3_open_neuro: bool,
|
|
48
|
+
):
|
|
49
|
+
"""Download all BIDS dependency files from S3 and cache them locally."""
|
|
50
|
+
filesystem = get_s3_filesystem()
|
|
51
|
+
for i, dep in enumerate(bids_dependencies):
|
|
52
|
+
if not s3_open_neuro:
|
|
53
|
+
if dep.endswith(".set"):
|
|
54
|
+
dep = dep[:-4] + ".bdf"
|
|
55
|
+
|
|
56
|
+
s3path = get_s3path(s3_bucket, dep)
|
|
57
|
+
if not s3_open_neuro:
|
|
58
|
+
dep = bids_dependencies_original[i]
|
|
59
|
+
|
|
60
|
+
dep_path = Path(dep)
|
|
61
|
+
if dep_path.parts and dep_path.parts[0] == record.get("dataset"):
|
|
62
|
+
dep_local = Path(dataset_folder, *dep_path.parts[1:])
|
|
63
|
+
else:
|
|
64
|
+
dep_local = Path(dataset_folder) / dep_path
|
|
65
|
+
filepath = cache_dir / dep_local
|
|
66
|
+
if not s3_open_neuro:
|
|
67
|
+
if filepath.suffix == ".set":
|
|
68
|
+
filepath = filepath.with_suffix(".bdf")
|
|
69
|
+
|
|
70
|
+
if not filepath.exists():
|
|
71
|
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
_filesystem_get(filesystem=filesystem, s3path=s3path, filepath=filepath)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _filesystem_get(filesystem: s3fs.S3FileSystem, s3path: str, filepath: Path):
|
|
76
|
+
"""Helper to download a file from S3 with a progress bar."""
|
|
77
|
+
info = filesystem.info(s3path)
|
|
78
|
+
size = info.get("size") or info.get("Size")
|
|
79
|
+
|
|
80
|
+
callback = TqdmCallback(
|
|
81
|
+
size=size,
|
|
82
|
+
tqdm_kwargs=dict(
|
|
83
|
+
desc=f"Downloading {Path(s3path).name}",
|
|
84
|
+
unit="B",
|
|
85
|
+
unit_scale=True,
|
|
86
|
+
unit_divisor=1024,
|
|
87
|
+
dynamic_ncols=True,
|
|
88
|
+
leave=True,
|
|
89
|
+
mininterval=0.2,
|
|
90
|
+
smoothing=0.1,
|
|
91
|
+
miniters=1,
|
|
92
|
+
bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} "
|
|
93
|
+
"[{elapsed}<{remaining}, {rate_fmt}]",
|
|
94
|
+
),
|
|
95
|
+
)
|
|
96
|
+
filesystem.get(s3path, str(filepath), callback=callback)
|
|
97
|
+
return filepath
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def load_eeg_from_s3(s3path: str):
|
|
101
|
+
"""Load EEG data from an S3 URI into an ``xarray.DataArray``.
|
|
102
|
+
|
|
103
|
+
Preserves the original filename, downloads sidecar files when applicable
|
|
104
|
+
(e.g., ``.fdt`` for EEGLAB, ``.vmrk``/``.eeg`` for BrainVision), and uses
|
|
105
|
+
MNE's direct readers.
|
|
106
|
+
|
|
107
|
+
Parameters
|
|
108
|
+
----------
|
|
109
|
+
s3path : str
|
|
110
|
+
An S3 URI (should start with "s3://").
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
xr.DataArray
|
|
115
|
+
EEG data with dimensions ``("channel", "time")``.
|
|
116
|
+
|
|
117
|
+
Raises
|
|
118
|
+
------
|
|
119
|
+
ValueError
|
|
120
|
+
If the file extension is unsupported.
|
|
121
|
+
|
|
122
|
+
"""
|
|
123
|
+
filesystem = get_s3_filesystem()
|
|
124
|
+
# choose a temp dir so sidecars can be colocated
|
|
125
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
126
|
+
# Derive local filenames from the S3 key to keep base name consistent
|
|
127
|
+
s3_key = urlsplit(s3path).path # e.g., "/dsXXXX/sub-.../..._eeg.set"
|
|
128
|
+
basename = Path(s3_key).name
|
|
129
|
+
ext = Path(basename).suffix.lower()
|
|
130
|
+
local_main = Path(tmpdir) / basename
|
|
131
|
+
|
|
132
|
+
# Download main file
|
|
133
|
+
with (
|
|
134
|
+
filesystem.open(s3path, mode="rb") as fsrc,
|
|
135
|
+
open(local_main, "wb") as fdst,
|
|
136
|
+
):
|
|
137
|
+
fdst.write(fsrc.read())
|
|
138
|
+
|
|
139
|
+
# Determine and fetch any required sidecars
|
|
140
|
+
sidecars: list[str] = []
|
|
141
|
+
if ext == ".set": # EEGLAB
|
|
142
|
+
sidecars = [".fdt"]
|
|
143
|
+
elif ext == ".vhdr": # BrainVision
|
|
144
|
+
sidecars = [".vmrk", ".eeg", ".dat", ".raw"]
|
|
145
|
+
|
|
146
|
+
for sc_ext in sidecars:
|
|
147
|
+
sc_key = s3_key[: -len(ext)] + sc_ext
|
|
148
|
+
sc_uri = f"s3://{urlsplit(s3path).netloc}{sc_key}"
|
|
149
|
+
try:
|
|
150
|
+
# If sidecar exists, download next to the main file
|
|
151
|
+
info = filesystem.info(sc_uri)
|
|
152
|
+
if info:
|
|
153
|
+
sc_local = Path(tmpdir) / Path(sc_key).name
|
|
154
|
+
with (
|
|
155
|
+
filesystem.open(sc_uri, mode="rb") as fsrc,
|
|
156
|
+
open(sc_local, "wb") as fdst,
|
|
157
|
+
):
|
|
158
|
+
fdst.write(fsrc.read())
|
|
159
|
+
except Exception:
|
|
160
|
+
# Sidecar not present; skip silently
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
# Read using appropriate MNE reader
|
|
164
|
+
raw = mne.io.read_raw(str(local_main), preload=True, verbose=False)
|
|
165
|
+
|
|
166
|
+
data = raw.get_data()
|
|
167
|
+
fs = raw.info["sfreq"]
|
|
168
|
+
max_time = data.shape[1] / fs
|
|
169
|
+
time_steps = np.linspace(0, max_time, data.shape[1]).squeeze()
|
|
170
|
+
channel_names = raw.ch_names
|
|
171
|
+
|
|
172
|
+
return xr.DataArray(
|
|
173
|
+
data=data,
|
|
174
|
+
dims=["channel", "time"],
|
|
175
|
+
coords={"time": time_steps, "channel": channel_names},
|
|
176
|
+
)
|
eegdash/features/datasets.py
CHANGED
|
@@ -3,7 +3,6 @@ from __future__ import annotations
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
5
|
import shutil
|
|
6
|
-
import warnings
|
|
7
6
|
from collections.abc import Callable
|
|
8
7
|
from typing import Dict, List
|
|
9
8
|
|
|
@@ -17,6 +16,8 @@ from braindecode.datasets.base import (
|
|
|
17
16
|
_create_description,
|
|
18
17
|
)
|
|
19
18
|
|
|
19
|
+
from ..logging import logger
|
|
20
|
+
|
|
20
21
|
|
|
21
22
|
class FeaturesDataset(EEGWindowsDataset):
|
|
22
23
|
"""Returns samples from a pandas DataFrame object along with a target.
|
|
@@ -283,7 +284,7 @@ class FeaturesConcatDataset(BaseConcatDataset):
|
|
|
283
284
|
# the following will be True for all datasets preprocessed and
|
|
284
285
|
# stored in parallel with braindecode.preprocessing.preprocess
|
|
285
286
|
if i_ds + 1 + offset < n_sub_dirs:
|
|
286
|
-
|
|
287
|
+
logger.warning(
|
|
287
288
|
f"The number of saved datasets ({i_ds + 1 + offset}) "
|
|
288
289
|
f"does not match the number of existing "
|
|
289
290
|
f"subdirectories ({n_sub_dirs}). You may now "
|
|
@@ -294,7 +295,7 @@ class FeaturesConcatDataset(BaseConcatDataset):
|
|
|
294
295
|
# if path contains files or directories that were not touched, raise
|
|
295
296
|
# warning
|
|
296
297
|
if path_contents:
|
|
297
|
-
|
|
298
|
+
logger.warning(
|
|
298
299
|
f"Chosen directory {path} contains other "
|
|
299
300
|
f"subdirectories or files {path_contents}."
|
|
300
301
|
)
|
eegdash/hbn/preprocessing.py
CHANGED
eegdash/hbn/windows.py
CHANGED
|
@@ -7,8 +7,6 @@ from mne_bids import get_bids_path_from_fname
|
|
|
7
7
|
|
|
8
8
|
from braindecode.datasets.base import BaseConcatDataset
|
|
9
9
|
|
|
10
|
-
logger = logging.getLogger("eegdash")
|
|
11
|
-
|
|
12
10
|
|
|
13
11
|
def build_trial_table(events_df: pd.DataFrame) -> pd.DataFrame:
|
|
14
12
|
"""One row per contrast trial with stimulus/response metrics."""
|
eegdash/logging.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from rich.logging import RichHandler
|
|
4
|
+
|
|
5
|
+
# Get the root logger
|
|
6
|
+
root_logger = logging.getLogger()
|
|
7
|
+
|
|
8
|
+
# --- This is the key part ---
|
|
9
|
+
# 1. Remove any handlers that may have been added by default
|
|
10
|
+
root_logger.handlers = []
|
|
11
|
+
|
|
12
|
+
# 2. Add your RichHandler
|
|
13
|
+
root_logger.addHandler(RichHandler(rich_tracebacks=True, markup=True))
|
|
14
|
+
# ---------------------------
|
|
15
|
+
|
|
16
|
+
# 3. Set the level for the root logger
|
|
17
|
+
root_logger.setLevel(logging.INFO)
|
|
18
|
+
|
|
19
|
+
# Now, get your package-specific logger. It will inherit the
|
|
20
|
+
# configuration from the root logger we just set up.
|
|
21
|
+
logger = logging.getLogger("eegdash")
|
|
22
|
+
|
|
23
|
+
logger.setLevel(logging.INFO)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.9.dev129
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
6
6
|
License-Expression: GPL-3.0-only
|
|
@@ -25,7 +25,7 @@ Requires-Python: >=3.10
|
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
26
26
|
License-File: LICENSE
|
|
27
27
|
Requires-Dist: braindecode>=1.0
|
|
28
|
-
Requires-Dist: mne_bids>=0.
|
|
28
|
+
Requires-Dist: mne_bids>=0.17.0
|
|
29
29
|
Requires-Dist: numba
|
|
30
30
|
Requires-Dist: numpy
|
|
31
31
|
Requires-Dist: pandas
|
|
@@ -41,6 +41,7 @@ Requires-Dist: pymatreader
|
|
|
41
41
|
Requires-Dist: eeglabio
|
|
42
42
|
Requires-Dist: tabulate
|
|
43
43
|
Requires-Dist: docstring_inheritance
|
|
44
|
+
Requires-Dist: rich
|
|
44
45
|
Provides-Extra: tests
|
|
45
46
|
Requires-Dist: pytest; extra == "tests"
|
|
46
47
|
Requires-Dist: pytest-cov; extra == "tests"
|
|
@@ -63,6 +64,7 @@ Requires-Dist: memory_profiler; extra == "docs"
|
|
|
63
64
|
Requires-Dist: ipython; extra == "docs"
|
|
64
65
|
Requires-Dist: lightgbm; extra == "docs"
|
|
65
66
|
Requires-Dist: plotly; extra == "docs"
|
|
67
|
+
Requires-Dist: nbformat; extra == "docs"
|
|
66
68
|
Provides-Extra: all
|
|
67
69
|
Requires-Dist: eegdash[docs]; extra == "all"
|
|
68
70
|
Requires-Dist: eegdash[dev]; extra == "all"
|
|
@@ -1,17 +1,19 @@
|
|
|
1
|
-
eegdash/__init__.py,sha256=
|
|
2
|
-
eegdash/api.py,sha256=
|
|
3
|
-
eegdash/bids_eeg_metadata.py,sha256=
|
|
1
|
+
eegdash/__init__.py,sha256=WLMYpZuTIJ5DnzhwE40o4aqt8iwVpxdmOn9ezY7hw60,284
|
|
2
|
+
eegdash/api.py,sha256=Eh-CxLOhDthmVeAubezXPu0eSOhRuYBN23iGIV75EVM,37675
|
|
3
|
+
eegdash/bids_eeg_metadata.py,sha256=XUkQp2M8zQ_wH5JC8lQiVR0TWssOjSdrdEWkgwGjiZ8,13699
|
|
4
4
|
eegdash/const.py,sha256=qdFBEL9kIrsj9CdxbXhBkR61R3CrTGSaj5Iq0YOACIs,7313
|
|
5
|
-
eegdash/data_utils.py,sha256=
|
|
5
|
+
eegdash/data_utils.py,sha256=dePEXcJefo3gmD534bb576p8v9jErDNLDHQoDGHqH-g,26006
|
|
6
|
+
eegdash/downloader.py,sha256=B-8u0c39F4inV-v_WgYZrUKAGPQOdfqC2RX1qNrRQYM,5808
|
|
7
|
+
eegdash/logging.py,sha256=SZdB7WLT5b2okecWpvLx4UWUxg3DiA11Z5d9lhYdDyc,616
|
|
6
8
|
eegdash/mongodb.py,sha256=GD3WgA253oFgpzOHrYaj4P1mRjNtDMT5Oj4kVvHswjI,2006
|
|
7
9
|
eegdash/paths.py,sha256=246xkectTxDAYcREs1Qma_F1Y-oSmLlb0hn0F2Za5Ss,866
|
|
8
10
|
eegdash/utils.py,sha256=7TfQ9D0LrAJ7FgnSXEvWgeHWK2QqaqS-_WcWXD86ObQ,408
|
|
9
11
|
eegdash/dataset/__init__.py,sha256=Qmzki5G8GaFlzTb10e4SmC3WkKuJyo1Ckii15tCEHAo,157
|
|
10
|
-
eegdash/dataset/dataset.py,sha256=
|
|
12
|
+
eegdash/dataset/dataset.py,sha256=DbQX_ajPExWE-6DmqCHWEDyJV-6eZI-dY7gi8y125BQ,7070
|
|
11
13
|
eegdash/dataset/dataset_summary.csv,sha256=XF0vdHz77DFyVLTaET8lL5gQQ4r-q1xAfSDWH5GTPLA,23655
|
|
12
|
-
eegdash/dataset/registry.py,sha256=
|
|
14
|
+
eegdash/dataset/registry.py,sha256=genOqAuf9cQBnHhPqRwfLP7S1XsnkLot6sLyJozPtf4,4150
|
|
13
15
|
eegdash/features/__init__.py,sha256=BXNhjvL4_SSFAY1lcP9nyGpkbJNtoOMH4AHlF6OyABo,4078
|
|
14
|
-
eegdash/features/datasets.py,sha256=
|
|
16
|
+
eegdash/features/datasets.py,sha256=eV4d86EU4fu1yoIMdPQnot6YZDRGG4qE9h77lk7iVhU,18317
|
|
15
17
|
eegdash/features/decorators.py,sha256=v0qaJz_dcX703p1fvFYbAIXmwK3d8naYGlq7fRVKn_w,1313
|
|
16
18
|
eegdash/features/extractors.py,sha256=H7h6tP3dKoRcjDJpWWAo0ppmokCq5QlhqMcehYwYV9s,6845
|
|
17
19
|
eegdash/features/inspect.py,sha256=PmbWhx5H_WqpnorUpWONUSkUtaIHkZblRa_Xyk7Szyc,1569
|
|
@@ -26,10 +28,10 @@ eegdash/features/feature_bank/signal.py,sha256=3Tb8z9gX7iZipxQJ9DSyy30JfdmW58kgv
|
|
|
26
28
|
eegdash/features/feature_bank/spectral.py,sha256=bNB7skusePs1gX7NOU6yRlw_Gr4UOCkO_ylkCgybzug,3319
|
|
27
29
|
eegdash/features/feature_bank/utils.py,sha256=DGh-Q7-XFIittP7iBBxvsJaZrlVvuY5mw-G7q6C-PCI,1237
|
|
28
30
|
eegdash/hbn/__init__.py,sha256=U8mK64napnKU746C5DOwkX7W7sg3iW5kb_cVv2pfFq0,394
|
|
29
|
-
eegdash/hbn/preprocessing.py,sha256=
|
|
30
|
-
eegdash/hbn/windows.py,sha256=
|
|
31
|
-
eegdash-0.3.
|
|
32
|
-
eegdash-0.3.
|
|
33
|
-
eegdash-0.3.
|
|
34
|
-
eegdash-0.3.
|
|
35
|
-
eegdash-0.3.
|
|
31
|
+
eegdash/hbn/preprocessing.py,sha256=zMkDFXQxWLn-Iy0cHRk-JL5c479HpclgIAGm-xuKPFg,2196
|
|
32
|
+
eegdash/hbn/windows.py,sha256=Obv4L2hP8ACancUawkMYbiusfvhAw-mG_vqyDDMwOJ8,9890
|
|
33
|
+
eegdash-0.3.9.dev129.dist-info/licenses/LICENSE,sha256=asisR-xupy_NrQBFXnx6yqXeZcYWLvbAaiETl25iXT0,931
|
|
34
|
+
eegdash-0.3.9.dev129.dist-info/METADATA,sha256=_G43SOhR91fq4TW8pEem03XOxPDerz57mH0CejyMtvc,10362
|
|
35
|
+
eegdash-0.3.9.dev129.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
36
|
+
eegdash-0.3.9.dev129.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
|
|
37
|
+
eegdash-0.3.9.dev129.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|