eegdash 0.3.2.dev52__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/PKG-INFO +16 -28
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/README.md +12 -2
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/__init__.py +3 -1
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/api.py +71 -59
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/data_utils.py +41 -2
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/dataset.py +13 -0
- eegdash-0.3.3/eegdash/registry.py +72 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash.egg-info/PKG-INFO +16 -28
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash.egg-info/SOURCES.txt +3 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash.egg-info/requires.txt +2 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/pyproject.toml +4 -3
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/tests/test_dataset.py +13 -1
- eegdash-0.3.3/tests/test_dataset_registration.py +33 -0
- eegdash-0.3.3/tests/test_functional.py +28 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/LICENSE +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/data_config.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/__init__.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/datasets.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/decorators.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/extractors.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/feature_bank/__init__.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/feature_bank/complexity.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/feature_bank/connectivity.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/feature_bank/csp.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/feature_bank/dimensionality.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/feature_bank/signal.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/feature_bank/spectral.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/feature_bank/utils.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/inspect.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/serialization.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/features/utils.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/mongodb.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/preprocessing.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash/utils.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash.egg-info/dependency_links.txt +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/eegdash.egg-info/top_level.txt +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/setup.cfg +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/tests/test_correctness.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/tests/test_eegdash.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/tests/test_init.py +0 -0
- {eegdash-0.3.2.dev52 → eegdash-0.3.3}/tests/test_mongo_connection.py +0 -0
|
@@ -1,35 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
6
|
-
License:
|
|
7
|
-
|
|
8
|
-
Copyright (C) 2024-2025
|
|
9
|
-
|
|
10
|
-
Young Truong, UCSD, dt.young112@gmail.com
|
|
11
|
-
Arnaud Delorme, UCSD, adelorme@ucsd.edu
|
|
12
|
-
Aviv Dotan, BGU, avivdot@bgu.post.ac.il
|
|
13
|
-
Oren Shriki, BGU, shrikio@bgu.ac.il
|
|
14
|
-
Bruno Aristimunha, b.aristimunha@gmail.com
|
|
15
|
-
|
|
16
|
-
This program is free software; you can redistribute it and/or modify
|
|
17
|
-
it under the terms of the GNU General Public License as published by
|
|
18
|
-
the Free Software Foundation; either version 2 of the License, or
|
|
19
|
-
(at your option) any later version.
|
|
20
|
-
|
|
21
|
-
This program is distributed in the hope that it will be useful,
|
|
22
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
23
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
24
|
-
GNU General Public License for more details.
|
|
25
|
-
|
|
26
|
-
You should have received a copy of the GNU General Public License
|
|
27
|
-
along with this program; if not, write to the Free Software
|
|
28
|
-
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1.07 USA
|
|
29
|
-
|
|
6
|
+
License-Expression: GPL-3.0-only
|
|
30
7
|
Project-URL: Homepage, https://github.com/sccn/EEG-Dash-Data
|
|
31
8
|
Project-URL: Issues, https://github.com/sccn/EEG-Dash-Data/issues
|
|
32
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
33
9
|
Classifier: Operating System :: OS Independent
|
|
34
10
|
Classifier: Intended Audience :: Science/Research
|
|
35
11
|
Classifier: Intended Audience :: Developers
|
|
@@ -60,6 +36,8 @@ Requires-Dist: s3fs
|
|
|
60
36
|
Requires-Dist: scipy
|
|
61
37
|
Requires-Dist: tqdm
|
|
62
38
|
Requires-Dist: xarray
|
|
39
|
+
Requires-Dist: h5io>=0.2.4
|
|
40
|
+
Requires-Dist: pymatreader
|
|
63
41
|
Provides-Extra: tests
|
|
64
42
|
Requires-Dist: pytest; extra == "tests"
|
|
65
43
|
Requires-Dist: pytest-cov; extra == "tests"
|
|
@@ -98,6 +76,14 @@ Dynamic: license-file
|
|
|
98
76
|
|
|
99
77
|
# EEG-Dash
|
|
100
78
|
|
|
79
|
+
[](https://pypi.org/project/eegdash/)
|
|
80
|
+
[](https://sccn.github.io/eegdash)
|
|
81
|
+
|
|
82
|
+
[](LICENSE)
|
|
83
|
+
[](https://pypi.org/project/eegdash/)
|
|
84
|
+
[](https://pepy.tech/project/eegdash)
|
|
85
|
+
<!-- [](https://codecov.io/gh/sccn/eegdash) -->
|
|
86
|
+
|
|
101
87
|
To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
|
|
102
88
|
|
|
103
89
|
## Data source
|
|
@@ -143,7 +129,9 @@ To use the data from a single subject, enter:
|
|
|
143
129
|
from eegdash import EEGDashDataset
|
|
144
130
|
|
|
145
131
|
ds_NDARDB033FW5 = EEGDashDataset(
|
|
146
|
-
{"dataset": "ds005514", "task":
|
|
132
|
+
{"dataset": "ds005514", "task":
|
|
133
|
+
"RestingState", "subject": "NDARDB033FW5"},
|
|
134
|
+
cache_dir="."
|
|
147
135
|
)
|
|
148
136
|
```
|
|
149
137
|
|
|
@@ -155,7 +143,7 @@ To use the data from multiple subjects, enter:
|
|
|
155
143
|
from eegdash import EEGDashDataset
|
|
156
144
|
|
|
157
145
|
ds_ds005505rest = EEGDashDataset(
|
|
158
|
-
{"dataset": "ds005505", "task": "RestingState"}, target_name="sex"
|
|
146
|
+
{"dataset": "ds005505", "task": "RestingState"}, target_name="sex", cache_dir=".
|
|
159
147
|
)
|
|
160
148
|
```
|
|
161
149
|
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# EEG-Dash
|
|
2
2
|
|
|
3
|
+
[](https://pypi.org/project/eegdash/)
|
|
4
|
+
[](https://sccn.github.io/eegdash)
|
|
5
|
+
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
[](https://pypi.org/project/eegdash/)
|
|
8
|
+
[](https://pepy.tech/project/eegdash)
|
|
9
|
+
<!-- [](https://codecov.io/gh/sccn/eegdash) -->
|
|
10
|
+
|
|
3
11
|
To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
|
|
4
12
|
|
|
5
13
|
## Data source
|
|
@@ -45,7 +53,9 @@ To use the data from a single subject, enter:
|
|
|
45
53
|
from eegdash import EEGDashDataset
|
|
46
54
|
|
|
47
55
|
ds_NDARDB033FW5 = EEGDashDataset(
|
|
48
|
-
{"dataset": "ds005514", "task":
|
|
56
|
+
{"dataset": "ds005514", "task":
|
|
57
|
+
"RestingState", "subject": "NDARDB033FW5"},
|
|
58
|
+
cache_dir="."
|
|
49
59
|
)
|
|
50
60
|
```
|
|
51
61
|
|
|
@@ -57,7 +67,7 @@ To use the data from multiple subjects, enter:
|
|
|
57
67
|
from eegdash import EEGDashDataset
|
|
58
68
|
|
|
59
69
|
ds_ds005505rest = EEGDashDataset(
|
|
60
|
-
{"dataset": "ds005505", "task": "RestingState"}, target_name="sex"
|
|
70
|
+
{"dataset": "ds005505", "task": "RestingState"}, target_name="sex", cache_dir=".
|
|
61
71
|
)
|
|
62
72
|
```
|
|
63
73
|
|
|
@@ -534,6 +534,7 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
534
534
|
],
|
|
535
535
|
cache_dir: str = "~/eegdash_cache",
|
|
536
536
|
s3_bucket: str | None = None,
|
|
537
|
+
eeg_dash_instance=None,
|
|
537
538
|
**kwargs,
|
|
538
539
|
):
|
|
539
540
|
"""Create a new EEGDashDataset from a given query or local BIDS dataset directory
|
|
@@ -568,29 +569,51 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
568
569
|
"""
|
|
569
570
|
self.cache_dir = cache_dir
|
|
570
571
|
self.s3_bucket = s3_bucket
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
572
|
+
self.eeg_dash = eeg_dash_instance or EEGDash()
|
|
573
|
+
_owns_client = eeg_dash_instance is None
|
|
574
|
+
|
|
575
|
+
try:
|
|
576
|
+
if query:
|
|
577
|
+
datasets = self.find_datasets(query, description_fields, **kwargs)
|
|
578
|
+
elif data_dir:
|
|
579
|
+
if isinstance(data_dir, str):
|
|
580
|
+
datasets = self.load_bids_dataset(
|
|
581
|
+
dataset, data_dir, description_fields, s3_bucket, **kwargs
|
|
582
|
+
)
|
|
583
|
+
else:
|
|
584
|
+
assert len(data_dir) == len(dataset), (
|
|
585
|
+
"Number of datasets and their directories must match"
|
|
586
|
+
)
|
|
587
|
+
datasets = []
|
|
588
|
+
for i, _ in enumerate(data_dir):
|
|
589
|
+
datasets.extend(
|
|
590
|
+
self.load_bids_dataset(
|
|
591
|
+
dataset[i],
|
|
592
|
+
data_dir[i],
|
|
593
|
+
description_fields,
|
|
594
|
+
s3_bucket,
|
|
595
|
+
**kwargs,
|
|
596
|
+
)
|
|
597
|
+
)
|
|
578
598
|
else:
|
|
579
|
-
|
|
580
|
-
"
|
|
599
|
+
raise ValueError(
|
|
600
|
+
"Exactly one of 'query' or 'data_dir' must be provided."
|
|
581
601
|
)
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
602
|
+
finally:
|
|
603
|
+
# If we created the client, close it now that construction is done.
|
|
604
|
+
if _owns_client:
|
|
605
|
+
try:
|
|
606
|
+
self.eeg_dash.close()
|
|
607
|
+
except Exception:
|
|
608
|
+
# Don't let close errors break construction
|
|
609
|
+
pass
|
|
589
610
|
|
|
590
611
|
self.filesystem = S3FileSystem(
|
|
591
612
|
anon=True, client_kwargs={"region_name": "us-east-2"}
|
|
592
613
|
)
|
|
593
614
|
|
|
615
|
+
self.eeg_dash.close()
|
|
616
|
+
|
|
594
617
|
super().__init__(datasets)
|
|
595
618
|
|
|
596
619
|
def find_key_in_nested_dict(self, data: Any, target_key: str) -> Any:
|
|
@@ -628,27 +651,23 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
628
651
|
A list of EEGDashBaseDataset objects that match the query.
|
|
629
652
|
|
|
630
653
|
"""
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
for
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
description=description,
|
|
646
|
-
**kwargs,
|
|
647
|
-
)
|
|
654
|
+
datasets: list[EEGDashBaseDataset] = []
|
|
655
|
+
for record in self.eeg_dash.find(query):
|
|
656
|
+
description = {}
|
|
657
|
+
for field in description_fields:
|
|
658
|
+
value = self.find_key_in_nested_dict(record, field)
|
|
659
|
+
if value is not None:
|
|
660
|
+
description[field] = value
|
|
661
|
+
datasets.append(
|
|
662
|
+
EEGDashBaseDataset(
|
|
663
|
+
record,
|
|
664
|
+
self.cache_dir,
|
|
665
|
+
self.s3_bucket,
|
|
666
|
+
description=description,
|
|
667
|
+
**kwargs,
|
|
648
668
|
)
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
eeg_dash_instance.close()
|
|
669
|
+
)
|
|
670
|
+
return datasets
|
|
652
671
|
|
|
653
672
|
def load_bids_dataset(
|
|
654
673
|
self,
|
|
@@ -676,36 +695,28 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
676
695
|
data_dir=data_dir,
|
|
677
696
|
dataset=dataset,
|
|
678
697
|
)
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
s3_bucket=s3_bucket,
|
|
687
|
-
description_fields=description_fields,
|
|
688
|
-
)
|
|
689
|
-
for bids_file in bids_dataset.get_files()
|
|
698
|
+
datasets = Parallel(n_jobs=-1, prefer="threads", verbose=1)(
|
|
699
|
+
delayed(self.get_base_dataset_from_bids_file)(
|
|
700
|
+
bids_dataset=bids_dataset,
|
|
701
|
+
bids_file=bids_file,
|
|
702
|
+
s3_bucket=s3_bucket,
|
|
703
|
+
description_fields=description_fields,
|
|
704
|
+
**kwargs,
|
|
690
705
|
)
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
706
|
+
for bids_file in bids_dataset.get_files()
|
|
707
|
+
)
|
|
708
|
+
return datasets
|
|
694
709
|
|
|
695
710
|
def get_base_dataset_from_bids_file(
|
|
696
711
|
self,
|
|
697
|
-
bids_dataset: EEGBIDSDataset,
|
|
712
|
+
bids_dataset: "EEGBIDSDataset",
|
|
698
713
|
bids_file: str,
|
|
699
|
-
eeg_dash_instance: EEGDash,
|
|
700
714
|
s3_bucket: str | None,
|
|
701
715
|
description_fields: list[str],
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
record = eeg_dash_instance.load_eeg_attrs_from_bids_file(
|
|
707
|
-
bids_dataset, bids_file
|
|
708
|
-
)
|
|
716
|
+
**kwargs,
|
|
717
|
+
) -> "EEGDashBaseDataset":
|
|
718
|
+
"""Instantiate a single EEGDashBaseDataset given a local BIDS file (metadata only)."""
|
|
719
|
+
record = self.eeg_dash.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
|
|
709
720
|
description = {}
|
|
710
721
|
for field in description_fields:
|
|
711
722
|
value = self.find_key_in_nested_dict(record, field)
|
|
@@ -716,4 +727,5 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
716
727
|
self.cache_dir,
|
|
717
728
|
s3_bucket,
|
|
718
729
|
description=description,
|
|
730
|
+
**kwargs,
|
|
719
731
|
)
|
|
@@ -10,6 +10,7 @@ import numpy as np
|
|
|
10
10
|
import pandas as pd
|
|
11
11
|
import s3fs
|
|
12
12
|
from bids import BIDSLayout
|
|
13
|
+
from fsspec.callbacks import TqdmCallback
|
|
13
14
|
from joblib import Parallel, delayed
|
|
14
15
|
from mne._fiff.utils import _read_segments_file
|
|
15
16
|
from mne.io import BaseRaw
|
|
@@ -98,8 +99,27 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
98
99
|
self.s3file = re.sub(r"(^|/)ds\d{6}/", r"\1", self.s3file, count=1)
|
|
99
100
|
|
|
100
101
|
self.filecache.parent.mkdir(parents=True, exist_ok=True)
|
|
102
|
+
info = filesystem.info(self.s3file)
|
|
103
|
+
size = info.get("size") or info.get("Size")
|
|
104
|
+
|
|
105
|
+
callback = TqdmCallback(
|
|
106
|
+
size=size,
|
|
107
|
+
tqdm_kwargs=dict(
|
|
108
|
+
desc=f"Downloading {Path(self.s3file).name}",
|
|
109
|
+
unit="B",
|
|
110
|
+
unit_scale=True,
|
|
111
|
+
unit_divisor=1024,
|
|
112
|
+
dynamic_ncols=True,
|
|
113
|
+
leave=True,
|
|
114
|
+
mininterval=0.2,
|
|
115
|
+
smoothing=0.1,
|
|
116
|
+
miniters=1,
|
|
117
|
+
bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} "
|
|
118
|
+
"[{elapsed}<{remaining}, {rate_fmt}]",
|
|
119
|
+
),
|
|
120
|
+
)
|
|
121
|
+
filesystem.get(self.s3file, self.filecache, callback=callback)
|
|
101
122
|
|
|
102
|
-
filesystem.download(self.s3file, self.filecache)
|
|
103
123
|
self.filenames = [self.filecache]
|
|
104
124
|
|
|
105
125
|
def _download_dependencies(self) -> None:
|
|
@@ -119,7 +139,26 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
119
139
|
# in the case of the competition.
|
|
120
140
|
if not filepath.exists():
|
|
121
141
|
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
122
|
-
filesystem.
|
|
142
|
+
info = filesystem.info(s3path)
|
|
143
|
+
size = info.get("size") or info.get("Size")
|
|
144
|
+
|
|
145
|
+
callback = TqdmCallback(
|
|
146
|
+
size=size,
|
|
147
|
+
tqdm_kwargs=dict(
|
|
148
|
+
desc=f"Downloading {Path(s3path).name}",
|
|
149
|
+
unit="B",
|
|
150
|
+
unit_scale=True,
|
|
151
|
+
unit_divisor=1024,
|
|
152
|
+
dynamic_ncols=True,
|
|
153
|
+
leave=True,
|
|
154
|
+
mininterval=0.2,
|
|
155
|
+
smoothing=0.1,
|
|
156
|
+
miniters=1,
|
|
157
|
+
bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} "
|
|
158
|
+
"[{elapsed}<{remaining}, {rate_fmt}]",
|
|
159
|
+
),
|
|
160
|
+
)
|
|
161
|
+
filesystem.get(s3path, filepath, callback=callback)
|
|
123
162
|
|
|
124
163
|
def get_raw_bids_args(self) -> dict[str, Any]:
|
|
125
164
|
"""Helper to restrict the metadata record to the fields needed to locate a BIDS
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
1
3
|
from .api import EEGDashDataset
|
|
4
|
+
from .registry import register_openneuro_datasets
|
|
2
5
|
|
|
3
6
|
RELEASE_TO_OPENNEURO_DATASET_MAP = {
|
|
4
7
|
"R11": "ds005516",
|
|
@@ -67,3 +70,13 @@ class EEGChallengeDataset(EEGDashDataset):
|
|
|
67
70
|
s3_bucket=f"{s3_bucket}/{release}_L100",
|
|
68
71
|
**kwargs,
|
|
69
72
|
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
registered_classes = register_openneuro_datasets(
|
|
76
|
+
summary_file=Path(__file__).with_name("dataset_summary.csv"),
|
|
77
|
+
base_class=EEGDashDataset,
|
|
78
|
+
namespace=globals(),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
__all__ = ["EEGChallengeDataset"] + list(registered_classes.keys())
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def register_openneuro_datasets(
|
|
9
|
+
summary_file: str | Path,
|
|
10
|
+
*,
|
|
11
|
+
base_class=None,
|
|
12
|
+
namespace: Dict[str, Any] | None = None,
|
|
13
|
+
) -> Dict[str, type]:
|
|
14
|
+
"""Dynamically create dataset classes from a summary file.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
summary_file : str | Path
|
|
19
|
+
Path to a CSV file where each line starts with the dataset identifier.
|
|
20
|
+
base_class : type | None
|
|
21
|
+
Base class for the generated datasets. If ``None``, defaults to
|
|
22
|
+
:class:`eegdash.api.EEGDashDataset`.
|
|
23
|
+
namespace : dict | None
|
|
24
|
+
Mapping where the new classes will be registered. Defaults to the
|
|
25
|
+
module's global namespace.
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
dict
|
|
30
|
+
Mapping from class names to the generated classes.
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
if base_class is None:
|
|
34
|
+
from .api import EEGDashDataset as base_class # lazy import
|
|
35
|
+
|
|
36
|
+
summary_path = Path(summary_file)
|
|
37
|
+
namespace = namespace if namespace is not None else globals()
|
|
38
|
+
registered: Dict[str, type] = {}
|
|
39
|
+
|
|
40
|
+
with summary_path.open() as f:
|
|
41
|
+
reader = csv.reader(f)
|
|
42
|
+
for row in reader:
|
|
43
|
+
if not row:
|
|
44
|
+
continue
|
|
45
|
+
dataset_id = row[0].strip()
|
|
46
|
+
if not dataset_id or dataset_id.startswith("#"):
|
|
47
|
+
continue
|
|
48
|
+
class_name = dataset_id.upper()
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
cache_dir: str,
|
|
53
|
+
query: dict | None = None,
|
|
54
|
+
s3_bucket: str | None = None,
|
|
55
|
+
**kwargs,
|
|
56
|
+
):
|
|
57
|
+
q = {"dataset": self._dataset}
|
|
58
|
+
if query:
|
|
59
|
+
q.update(query)
|
|
60
|
+
super().__init__(
|
|
61
|
+
query=q, cache_dir=cache_dir, s3_bucket=s3_bucket, **kwargs
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
cls = type(
|
|
65
|
+
class_name,
|
|
66
|
+
(base_class,),
|
|
67
|
+
{"_dataset": dataset_id, "__init__": __init__},
|
|
68
|
+
)
|
|
69
|
+
namespace[class_name] = cls
|
|
70
|
+
registered[class_name] = cls
|
|
71
|
+
|
|
72
|
+
return registered
|
|
@@ -1,35 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
6
|
-
License:
|
|
7
|
-
|
|
8
|
-
Copyright (C) 2024-2025
|
|
9
|
-
|
|
10
|
-
Young Truong, UCSD, dt.young112@gmail.com
|
|
11
|
-
Arnaud Delorme, UCSD, adelorme@ucsd.edu
|
|
12
|
-
Aviv Dotan, BGU, avivdot@bgu.post.ac.il
|
|
13
|
-
Oren Shriki, BGU, shrikio@bgu.ac.il
|
|
14
|
-
Bruno Aristimunha, b.aristimunha@gmail.com
|
|
15
|
-
|
|
16
|
-
This program is free software; you can redistribute it and/or modify
|
|
17
|
-
it under the terms of the GNU General Public License as published by
|
|
18
|
-
the Free Software Foundation; either version 2 of the License, or
|
|
19
|
-
(at your option) any later version.
|
|
20
|
-
|
|
21
|
-
This program is distributed in the hope that it will be useful,
|
|
22
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
23
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
24
|
-
GNU General Public License for more details.
|
|
25
|
-
|
|
26
|
-
You should have received a copy of the GNU General Public License
|
|
27
|
-
along with this program; if not, write to the Free Software
|
|
28
|
-
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1.07 USA
|
|
29
|
-
|
|
6
|
+
License-Expression: GPL-3.0-only
|
|
30
7
|
Project-URL: Homepage, https://github.com/sccn/EEG-Dash-Data
|
|
31
8
|
Project-URL: Issues, https://github.com/sccn/EEG-Dash-Data/issues
|
|
32
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
33
9
|
Classifier: Operating System :: OS Independent
|
|
34
10
|
Classifier: Intended Audience :: Science/Research
|
|
35
11
|
Classifier: Intended Audience :: Developers
|
|
@@ -60,6 +36,8 @@ Requires-Dist: s3fs
|
|
|
60
36
|
Requires-Dist: scipy
|
|
61
37
|
Requires-Dist: tqdm
|
|
62
38
|
Requires-Dist: xarray
|
|
39
|
+
Requires-Dist: h5io>=0.2.4
|
|
40
|
+
Requires-Dist: pymatreader
|
|
63
41
|
Provides-Extra: tests
|
|
64
42
|
Requires-Dist: pytest; extra == "tests"
|
|
65
43
|
Requires-Dist: pytest-cov; extra == "tests"
|
|
@@ -98,6 +76,14 @@ Dynamic: license-file
|
|
|
98
76
|
|
|
99
77
|
# EEG-Dash
|
|
100
78
|
|
|
79
|
+
[](https://pypi.org/project/eegdash/)
|
|
80
|
+
[](https://sccn.github.io/eegdash)
|
|
81
|
+
|
|
82
|
+
[](LICENSE)
|
|
83
|
+
[](https://pypi.org/project/eegdash/)
|
|
84
|
+
[](https://pepy.tech/project/eegdash)
|
|
85
|
+
<!-- [](https://codecov.io/gh/sccn/eegdash) -->
|
|
86
|
+
|
|
101
87
|
To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
|
|
102
88
|
|
|
103
89
|
## Data source
|
|
@@ -143,7 +129,9 @@ To use the data from a single subject, enter:
|
|
|
143
129
|
from eegdash import EEGDashDataset
|
|
144
130
|
|
|
145
131
|
ds_NDARDB033FW5 = EEGDashDataset(
|
|
146
|
-
{"dataset": "ds005514", "task":
|
|
132
|
+
{"dataset": "ds005514", "task":
|
|
133
|
+
"RestingState", "subject": "NDARDB033FW5"},
|
|
134
|
+
cache_dir="."
|
|
147
135
|
)
|
|
148
136
|
```
|
|
149
137
|
|
|
@@ -155,7 +143,7 @@ To use the data from multiple subjects, enter:
|
|
|
155
143
|
from eegdash import EEGDashDataset
|
|
156
144
|
|
|
157
145
|
ds_ds005505rest = EEGDashDataset(
|
|
158
|
-
{"dataset": "ds005505", "task": "RestingState"}, target_name="sex"
|
|
146
|
+
{"dataset": "ds005505", "task": "RestingState"}, target_name="sex", cache_dir=".
|
|
159
147
|
)
|
|
160
148
|
```
|
|
161
149
|
|
|
@@ -8,6 +8,7 @@ eegdash/data_utils.py
|
|
|
8
8
|
eegdash/dataset.py
|
|
9
9
|
eegdash/mongodb.py
|
|
10
10
|
eegdash/preprocessing.py
|
|
11
|
+
eegdash/registry.py
|
|
11
12
|
eegdash/utils.py
|
|
12
13
|
eegdash.egg-info/PKG-INFO
|
|
13
14
|
eegdash.egg-info/SOURCES.txt
|
|
@@ -31,6 +32,8 @@ eegdash/features/feature_bank/spectral.py
|
|
|
31
32
|
eegdash/features/feature_bank/utils.py
|
|
32
33
|
tests/test_correctness.py
|
|
33
34
|
tests/test_dataset.py
|
|
35
|
+
tests/test_dataset_registration.py
|
|
34
36
|
tests/test_eegdash.py
|
|
37
|
+
tests/test_functional.py
|
|
35
38
|
tests/test_init.py
|
|
36
39
|
tests/test_mongo_connection.py
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
[build-system]
|
|
2
|
-
requires = ["setuptools>=
|
|
2
|
+
requires = ["setuptools>=77", "wheel"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
|
4
4
|
|
|
5
5
|
[project]
|
|
@@ -14,11 +14,10 @@ authors = [
|
|
|
14
14
|
]
|
|
15
15
|
description = "EEG data for machine learning"
|
|
16
16
|
readme = "README.md"
|
|
17
|
-
license =
|
|
17
|
+
license = "GPL-3.0-only"
|
|
18
18
|
requires-python = ">=3.10"
|
|
19
19
|
|
|
20
20
|
classifiers = [
|
|
21
|
-
"License :: OSI Approved :: MIT License",
|
|
22
21
|
"Operating System :: OS Independent",
|
|
23
22
|
"Intended Audience :: Science/Research",
|
|
24
23
|
"Intended Audience :: Developers",
|
|
@@ -48,6 +47,8 @@ dependencies = [
|
|
|
48
47
|
"scipy",
|
|
49
48
|
"tqdm",
|
|
50
49
|
"xarray",
|
|
50
|
+
"h5io >= 0.2.4",
|
|
51
|
+
"pymatreader",
|
|
51
52
|
]
|
|
52
53
|
|
|
53
54
|
[project.urls]
|
|
@@ -11,7 +11,7 @@ FILES_PER_RELEASE = [1342, 1405, 1812, 3342, 3326, 1227, 3100, 2320, 2885, 2516,
|
|
|
11
11
|
|
|
12
12
|
RELEASE_FILES = list(zip(RELEASES, FILES_PER_RELEASE))
|
|
13
13
|
|
|
14
|
-
CACHE_DIR = Path("
|
|
14
|
+
CACHE_DIR = (Path.home() / "mne_data" / "eeg_challenge_cache").resolve()
|
|
15
15
|
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
16
16
|
|
|
17
17
|
|
|
@@ -96,3 +96,15 @@ def test_consuming_data_r5():
|
|
|
96
96
|
)
|
|
97
97
|
raw = dataset_obj.datasets[0].raw
|
|
98
98
|
assert raw is not None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@pytest.mark.parametrize("eeg_dash_instance", [None, EEGDash()])
|
|
102
|
+
def test_eeg_dash_integration(eeg_dash_instance):
|
|
103
|
+
dataset_obj = EEGChallengeDataset(
|
|
104
|
+
release="R5",
|
|
105
|
+
query=dict(task="RestingState", subject="NDARAC350XUM"),
|
|
106
|
+
cache_dir=CACHE_DIR,
|
|
107
|
+
eeg_dash_instance=eeg_dash_instance,
|
|
108
|
+
)
|
|
109
|
+
raw = dataset_obj.datasets[0].raw
|
|
110
|
+
assert raw is not None
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DummyBase:
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_register_openneuro_datasets(tmp_path: Path):
|
|
10
|
+
module_path = Path(__file__).resolve().parents[1] / "eegdash" / "registry.py"
|
|
11
|
+
spec = importlib.util.spec_from_file_location("registry", module_path)
|
|
12
|
+
registry = importlib.util.module_from_spec(spec)
|
|
13
|
+
spec.loader.exec_module(registry)
|
|
14
|
+
|
|
15
|
+
summary = tmp_path / "dataset_summary.csv"
|
|
16
|
+
summary.write_text(
|
|
17
|
+
"\n".join(
|
|
18
|
+
[
|
|
19
|
+
"ds002718,18,18,1,74,250,14.844",
|
|
20
|
+
"ds000001,1,1,1,1,1,1",
|
|
21
|
+
]
|
|
22
|
+
)
|
|
23
|
+
)
|
|
24
|
+
namespace = {}
|
|
25
|
+
registered = registry.register_openneuro_datasets(
|
|
26
|
+
summary, namespace=namespace, base_class=DummyBase
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
assert set(registered) == {"DS002718", "DS000001"}
|
|
30
|
+
ds_class = registered["DS002718"]
|
|
31
|
+
assert ds_class is namespace["DS002718"]
|
|
32
|
+
assert issubclass(ds_class, DummyBase)
|
|
33
|
+
assert ds_class._dataset == "ds002718"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from eegdash.dataset import EEGDashDataset
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_progress_bar_output(capsys, tmp_path):
|
|
8
|
+
temporary = (Path.home() / "mne_data" / "temp").resolve()
|
|
9
|
+
|
|
10
|
+
if temporary.exists():
|
|
11
|
+
shutil.rmtree(temporary)
|
|
12
|
+
|
|
13
|
+
temporary.mkdir(parents=True, exist_ok=True)
|
|
14
|
+
|
|
15
|
+
eegdash_dataset = EEGDashDataset(
|
|
16
|
+
query={
|
|
17
|
+
"dataset": "ds005514",
|
|
18
|
+
"task": "RestingState",
|
|
19
|
+
"subject": "NDARDB033FW5",
|
|
20
|
+
},
|
|
21
|
+
cache_dir=temporary,
|
|
22
|
+
)
|
|
23
|
+
_ = eegdash_dataset.datasets[0].raw
|
|
24
|
+
|
|
25
|
+
out = capsys.readouterr()
|
|
26
|
+
# tqdm uses carriage returns; just assert a stable fragment:
|
|
27
|
+
assert "Downloading" in out.err
|
|
28
|
+
assert out.err # non-empty
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|