eegdash 0.3.5.dev87__tar.gz → 0.3.5.dev89__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- {eegdash-0.3.5.dev87/eegdash.egg-info → eegdash-0.3.5.dev89}/PKG-INFO +4 -17
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/__init__.py +1 -1
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/api.py +16 -4
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/data_utils.py +31 -7
- eegdash-0.3.5.dev89/eegdash/dataset.py +351 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89/eegdash.egg-info}/PKG-INFO +4 -17
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash.egg-info/SOURCES.txt +1 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash.egg-info/requires.txt +3 -16
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/pyproject.toml +3 -16
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_dataset.py +20 -11
- eegdash-0.3.5.dev89/tests/test_minirelease.py +112 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_offline.py +1 -1
- eegdash-0.3.5.dev87/eegdash/dataset.py +0 -82
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/LICENSE +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/MANIFEST.in +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/README.md +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/Makefile +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/conf.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/dataset_summary.rst +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/index.rst +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/install/install.rst +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/install/install_pip.rst +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/install/install_source.rst +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/overview.rst +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/data_config.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/dataset_summary.csv +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/__init__.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/datasets.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/decorators.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/extractors.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/__init__.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/complexity.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/connectivity.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/csp.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/dimensionality.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/signal.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/spectral.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/utils.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/inspect.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/serialization.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/utils.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/mongodb.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/preprocessing.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/registry.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/utils.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash.egg-info/dependency_links.txt +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash.egg-info/top_level.txt +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/setup.cfg +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_api.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_correctness.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_dataset_registration.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_eegdash.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_functional.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_init.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_mongo_connection.py +0 -0
- {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_query.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.3.5.
|
|
3
|
+
Version: 0.3.5.dev89
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
6
6
|
License-Expression: GPL-3.0-only
|
|
@@ -60,22 +60,9 @@ Requires-Dist: memory_profiler; extra == "docs"
|
|
|
60
60
|
Requires-Dist: ipython; extra == "docs"
|
|
61
61
|
Requires-Dist: lightgbm; extra == "docs"
|
|
62
62
|
Provides-Extra: all
|
|
63
|
-
Requires-Dist:
|
|
64
|
-
Requires-Dist:
|
|
65
|
-
Requires-Dist:
|
|
66
|
-
Requires-Dist: codecov; extra == "all"
|
|
67
|
-
Requires-Dist: pytest_cases; extra == "all"
|
|
68
|
-
Requires-Dist: pytest-benchmark; extra == "all"
|
|
69
|
-
Requires-Dist: sphinx; extra == "all"
|
|
70
|
-
Requires-Dist: sphinx_design; extra == "all"
|
|
71
|
-
Requires-Dist: sphinx_gallery; extra == "all"
|
|
72
|
-
Requires-Dist: sphinx_rtd_theme; extra == "all"
|
|
73
|
-
Requires-Dist: pydata-sphinx-theme; extra == "all"
|
|
74
|
-
Requires-Dist: sphinx-autobuild; extra == "all"
|
|
75
|
-
Requires-Dist: numpydoc; extra == "all"
|
|
76
|
-
Requires-Dist: memory_profiler; extra == "all"
|
|
77
|
-
Requires-Dist: ipython; extra == "all"
|
|
78
|
-
Requires-Dist: lightgbm; extra == "all"
|
|
63
|
+
Requires-Dist: eegdash[docs]; extra == "all"
|
|
64
|
+
Requires-Dist: eegdash[dev]; extra == "all"
|
|
65
|
+
Requires-Dist: eegdash[tests]; extra == "all"
|
|
79
66
|
Dynamic: license-file
|
|
80
67
|
|
|
81
68
|
# EEG-Dash
|
|
@@ -591,7 +591,7 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
591
591
|
self,
|
|
592
592
|
query: dict | None = None,
|
|
593
593
|
cache_dir: str = "~/eegdash_cache",
|
|
594
|
-
dataset: str | None = None,
|
|
594
|
+
dataset: str | list[str] | None = None,
|
|
595
595
|
description_fields: list[str] = [
|
|
596
596
|
"subject",
|
|
597
597
|
"session",
|
|
@@ -669,6 +669,13 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
669
669
|
}
|
|
670
670
|
base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in query_kwargs}
|
|
671
671
|
|
|
672
|
+
# If user provided a dataset name via the dedicated parameter (and we're not
|
|
673
|
+
# loading from a local directory), treat it as a query filter. Accept str or list.
|
|
674
|
+
if data_dir is None and dataset is not None:
|
|
675
|
+
# Allow callers to pass a single dataset id (str) or a list of them.
|
|
676
|
+
# If list is provided, let _build_query_from_kwargs turn it into $in later.
|
|
677
|
+
query_kwargs.setdefault("dataset", dataset)
|
|
678
|
+
|
|
672
679
|
if query and query_kwargs:
|
|
673
680
|
raise ValueError(
|
|
674
681
|
"Provide either a 'query' dictionary or keyword arguments for filtering, not both."
|
|
@@ -688,15 +695,20 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
688
695
|
]
|
|
689
696
|
elif data_dir:
|
|
690
697
|
# This path loads from a local directory and is not affected by DB query logic
|
|
691
|
-
if isinstance(data_dir, str
|
|
698
|
+
if isinstance(data_dir, (str, Path)):
|
|
692
699
|
datasets = self.load_bids_dataset(
|
|
693
|
-
dataset=dataset
|
|
700
|
+
dataset=dataset
|
|
701
|
+
if isinstance(dataset, str)
|
|
702
|
+
else (dataset[0] if dataset else None),
|
|
694
703
|
data_dir=data_dir,
|
|
695
704
|
description_fields=description_fields,
|
|
696
705
|
s3_bucket=s3_bucket,
|
|
697
706
|
**base_dataset_kwargs,
|
|
698
707
|
)
|
|
699
708
|
else:
|
|
709
|
+
assert dataset is not None, (
|
|
710
|
+
"dataset must be provided when passing multiple data_dir"
|
|
711
|
+
)
|
|
700
712
|
assert len(data_dir) == len(dataset), (
|
|
701
713
|
"Number of datasets and directories must match"
|
|
702
714
|
)
|
|
@@ -748,7 +760,7 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
748
760
|
|
|
749
761
|
def find_datasets(
|
|
750
762
|
self,
|
|
751
|
-
query: dict[str, Any],
|
|
763
|
+
query: dict[str, Any] | None,
|
|
752
764
|
description_fields: list[str],
|
|
753
765
|
query_kwargs: dict,
|
|
754
766
|
base_dataset_kwargs: dict,
|
|
@@ -2,6 +2,7 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
4
|
import re
|
|
5
|
+
import traceback
|
|
5
6
|
import warnings
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Any
|
|
@@ -66,9 +67,7 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
66
67
|
self.s3_open_neuro = True
|
|
67
68
|
|
|
68
69
|
self.filecache = self.cache_dir / record["bidspath"]
|
|
69
|
-
|
|
70
70
|
self.bids_root = self.cache_dir / record["dataset"]
|
|
71
|
-
|
|
72
71
|
self.bidspath = BIDSPath(
|
|
73
72
|
root=self.bids_root,
|
|
74
73
|
datatype="eeg",
|
|
@@ -99,6 +98,9 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
99
98
|
)
|
|
100
99
|
if not self.s3_open_neuro:
|
|
101
100
|
self.s3file = re.sub(r"(^|/)ds\d{6}/", r"\1", self.s3file, count=1)
|
|
101
|
+
if self.s3file.endswith(".set"):
|
|
102
|
+
self.s3file = self.s3file[:-4] + ".bdf"
|
|
103
|
+
self.filecache = self.filecache.with_suffix(".bdf")
|
|
102
104
|
|
|
103
105
|
self.filecache.parent.mkdir(parents=True, exist_ok=True)
|
|
104
106
|
info = filesystem.info(self.s3file)
|
|
@@ -132,11 +134,21 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
132
134
|
anon=True, client_kwargs={"region_name": "us-east-2"}
|
|
133
135
|
)
|
|
134
136
|
for i, dep in enumerate(self.bids_dependencies):
|
|
137
|
+
if not self.s3_open_neuro:
|
|
138
|
+
# fix this when our bucket is integrated into the
|
|
139
|
+
# mongodb
|
|
140
|
+
# if the file have ".set" replace to ".bdf"
|
|
141
|
+
if dep.endswith(".set"):
|
|
142
|
+
dep = dep[:-4] + ".bdf"
|
|
143
|
+
|
|
135
144
|
s3path = self.get_s3path(dep)
|
|
136
145
|
if not self.s3_open_neuro:
|
|
137
146
|
dep = self.bids_dependencies_original[i]
|
|
138
147
|
|
|
139
148
|
filepath = self.cache_dir / dep
|
|
149
|
+
if not self.s3_open_neuro:
|
|
150
|
+
if self.filecache.suffix == ".set":
|
|
151
|
+
self.filecache = self.filecache.with_suffix(".bdf")
|
|
140
152
|
# here, we download the dependency and it is fine
|
|
141
153
|
# in the case of the competition.
|
|
142
154
|
if not filepath.exists():
|
|
@@ -179,9 +191,23 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
179
191
|
# capturing any warnings
|
|
180
192
|
# to-do: remove this once is fixed on the mne-bids side.
|
|
181
193
|
with warnings.catch_warnings(record=True) as w:
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
194
|
+
try:
|
|
195
|
+
# TO-DO: remove this once is fixed on the our side
|
|
196
|
+
if not self.s3_open_neuro:
|
|
197
|
+
self.bidspath = self.bidspath.update(extension=".bdf")
|
|
198
|
+
|
|
199
|
+
self._raw = mne_bids.read_raw_bids(
|
|
200
|
+
bids_path=self.bidspath, verbose="ERROR"
|
|
201
|
+
)
|
|
202
|
+
except Exception as e:
|
|
203
|
+
logger.error(
|
|
204
|
+
f"Error while reading BIDS file: {self.bidspath}\n"
|
|
205
|
+
"This may be due to a missing or corrupted file.\n"
|
|
206
|
+
"Please check the file and try again."
|
|
207
|
+
)
|
|
208
|
+
logger.error(f"Exception: {e}")
|
|
209
|
+
logger.error(traceback.format_exc())
|
|
210
|
+
raise e
|
|
185
211
|
for warning in w:
|
|
186
212
|
logger.warning(
|
|
187
213
|
f"Warning while reading BIDS file: {warning.message}"
|
|
@@ -292,7 +318,6 @@ class EEGDashBaseRaw(BaseRaw):
|
|
|
292
318
|
)
|
|
293
319
|
|
|
294
320
|
def get_s3path(self, filepath):
|
|
295
|
-
print(f"Getting S3 path for {filepath}")
|
|
296
321
|
return f"{self._AWS_BUCKET}/{filepath}"
|
|
297
322
|
|
|
298
323
|
def _download_s3(self) -> None:
|
|
@@ -513,7 +538,6 @@ class EEGBIDSDataset:
|
|
|
513
538
|
with os.scandir(directory) as entries:
|
|
514
539
|
for entry in entries:
|
|
515
540
|
if entry.is_file() and entry.name.endswith(extension):
|
|
516
|
-
print("Adding ", entry.path)
|
|
517
541
|
result_files.append(entry.path)
|
|
518
542
|
elif entry.is_dir():
|
|
519
543
|
# check that entry path doesn't contain any name in ignore list
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from .api import EEGDashDataset
|
|
4
|
+
from .registry import register_openneuro_datasets
|
|
5
|
+
|
|
6
|
+
RELEASE_TO_OPENNEURO_DATASET_MAP = {
|
|
7
|
+
"R11": "ds005516",
|
|
8
|
+
"R10": "ds005515",
|
|
9
|
+
"R9": "ds005514",
|
|
10
|
+
"R8": "ds005512",
|
|
11
|
+
"R7": "ds005511",
|
|
12
|
+
"R6": "ds005510",
|
|
13
|
+
"R4": "ds005508",
|
|
14
|
+
"R5": "ds005509",
|
|
15
|
+
"R3": "ds005507",
|
|
16
|
+
"R2": "ds005506",
|
|
17
|
+
"R1": "ds005505",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
SUBJECT_MINI_RELEASE_MAP = {
|
|
21
|
+
"R11": [
|
|
22
|
+
"NDARAB678VYW",
|
|
23
|
+
"NDARAG788YV9",
|
|
24
|
+
"NDARAM946HJE",
|
|
25
|
+
"NDARAY977BZT",
|
|
26
|
+
"NDARAZ532KK0",
|
|
27
|
+
"NDARCE912ZXW",
|
|
28
|
+
"NDARCM214WFE",
|
|
29
|
+
"NDARDL033XRG",
|
|
30
|
+
"NDARDT889RT9",
|
|
31
|
+
"NDARDZ794ZVP",
|
|
32
|
+
"NDAREV869CPW",
|
|
33
|
+
"NDARFN221WW5",
|
|
34
|
+
"NDARFV289RKB",
|
|
35
|
+
"NDARFY623ZTE",
|
|
36
|
+
"NDARGA890MKA",
|
|
37
|
+
"NDARHN206XY3",
|
|
38
|
+
"NDARHP518FUR",
|
|
39
|
+
"NDARJL292RYV",
|
|
40
|
+
"NDARKM199DXW",
|
|
41
|
+
"NDARKW236TN7",
|
|
42
|
+
],
|
|
43
|
+
"R10": [
|
|
44
|
+
"NDARAR935TGZ",
|
|
45
|
+
"NDARAV474ADJ",
|
|
46
|
+
"NDARCB869VM8",
|
|
47
|
+
"NDARCJ667UPL",
|
|
48
|
+
"NDARCM677TC1",
|
|
49
|
+
"NDARET671FTC",
|
|
50
|
+
"NDARKM061NHZ",
|
|
51
|
+
"NDARLD501HDK",
|
|
52
|
+
"NDARLL176DJR",
|
|
53
|
+
"NDARMT791WDH",
|
|
54
|
+
"NDARMW299ZAB",
|
|
55
|
+
"NDARNC405WJA",
|
|
56
|
+
"NDARNP962TJK",
|
|
57
|
+
"NDARPB967KU7",
|
|
58
|
+
"NDARRU560AGK",
|
|
59
|
+
"NDARTB173LY2",
|
|
60
|
+
"NDARUW377KAE",
|
|
61
|
+
"NDARVH565FX9",
|
|
62
|
+
"NDARVP799KGY",
|
|
63
|
+
"NDARVY962GB5",
|
|
64
|
+
],
|
|
65
|
+
"R9": [
|
|
66
|
+
"NDARAC589YMB",
|
|
67
|
+
"NDARAC853CR6",
|
|
68
|
+
"NDARAH239PGG",
|
|
69
|
+
"NDARAL897CYV",
|
|
70
|
+
"NDARAN160GUF",
|
|
71
|
+
"NDARAP049KXJ",
|
|
72
|
+
"NDARAP457WB5",
|
|
73
|
+
"NDARAW216PM7",
|
|
74
|
+
"NDARBA004KBT",
|
|
75
|
+
"NDARBD328NUQ",
|
|
76
|
+
"NDARBF042LDM",
|
|
77
|
+
"NDARBH019KPD",
|
|
78
|
+
"NDARBH728DFK",
|
|
79
|
+
"NDARBM370JCB",
|
|
80
|
+
"NDARBU183TDJ",
|
|
81
|
+
"NDARBW971DCW",
|
|
82
|
+
"NDARBZ444ZHK",
|
|
83
|
+
"NDARCC620ZFT",
|
|
84
|
+
"NDARCD182XT1",
|
|
85
|
+
"NDARCK113CJM",
|
|
86
|
+
],
|
|
87
|
+
"R8": [
|
|
88
|
+
"NDARAB514MAJ",
|
|
89
|
+
"NDARAD571FLB",
|
|
90
|
+
"NDARAF003VCL",
|
|
91
|
+
"NDARAG191AE8",
|
|
92
|
+
"NDARAJ977PRJ",
|
|
93
|
+
"NDARAP912JK3",
|
|
94
|
+
"NDARAV454VF0",
|
|
95
|
+
"NDARAY298THW",
|
|
96
|
+
"NDARBJ375VP4",
|
|
97
|
+
"NDARBT436PMT",
|
|
98
|
+
"NDARBV630BK6",
|
|
99
|
+
"NDARCB627KDN",
|
|
100
|
+
"NDARCC059WTH",
|
|
101
|
+
"NDARCM953HKD",
|
|
102
|
+
"NDARCN681CXW",
|
|
103
|
+
"NDARCT889DMB",
|
|
104
|
+
"NDARDJ204EPU",
|
|
105
|
+
"NDARDJ544BU5",
|
|
106
|
+
"NDARDP292DVC",
|
|
107
|
+
"NDARDW178AC6",
|
|
108
|
+
],
|
|
109
|
+
"R7": [
|
|
110
|
+
"NDARAY475AKD",
|
|
111
|
+
"NDARBW026UGE",
|
|
112
|
+
"NDARCK162REX",
|
|
113
|
+
"NDARCK481KRH",
|
|
114
|
+
"NDARCV378MMX",
|
|
115
|
+
"NDARCX462NVA",
|
|
116
|
+
"NDARDJ970ELG",
|
|
117
|
+
"NDARDU617ZW1",
|
|
118
|
+
"NDAREM609ZXW",
|
|
119
|
+
"NDAREW074ZM2",
|
|
120
|
+
"NDARFE555KXB",
|
|
121
|
+
"NDARFT176NJP",
|
|
122
|
+
"NDARGK442YHH",
|
|
123
|
+
"NDARGM439FZD",
|
|
124
|
+
"NDARGT634DUJ",
|
|
125
|
+
"NDARHE283KZN",
|
|
126
|
+
"NDARHG260BM9",
|
|
127
|
+
"NDARHL684WYU",
|
|
128
|
+
"NDARHN224TPA",
|
|
129
|
+
"NDARHP841RMR",
|
|
130
|
+
],
|
|
131
|
+
"R6": [
|
|
132
|
+
"NDARAD224CRB",
|
|
133
|
+
"NDARAE301XTM",
|
|
134
|
+
"NDARAT680GJA",
|
|
135
|
+
"NDARCA578CEB",
|
|
136
|
+
"NDARDZ147ETZ",
|
|
137
|
+
"NDARFL793LDE",
|
|
138
|
+
"NDARFX710UZA",
|
|
139
|
+
"NDARGE994BMX",
|
|
140
|
+
"NDARGP191YHN",
|
|
141
|
+
"NDARGV436PFT",
|
|
142
|
+
"NDARHF545HFW",
|
|
143
|
+
"NDARHP039DBU",
|
|
144
|
+
"NDARHT774ZK1",
|
|
145
|
+
"NDARJA830BYV",
|
|
146
|
+
"NDARKB614KGY",
|
|
147
|
+
"NDARKM250ET5",
|
|
148
|
+
"NDARKZ085UKQ",
|
|
149
|
+
"NDARLB581AXF",
|
|
150
|
+
"NDARNJ899HW7",
|
|
151
|
+
"NDARRZ606EDP",
|
|
152
|
+
],
|
|
153
|
+
"R4": [
|
|
154
|
+
"NDARAC350BZ0",
|
|
155
|
+
"NDARAD615WLJ",
|
|
156
|
+
"NDARAG584XLU",
|
|
157
|
+
"NDARAH503YG1",
|
|
158
|
+
"NDARAX272ZJL",
|
|
159
|
+
"NDARAY461TZZ",
|
|
160
|
+
"NDARBC734UVY",
|
|
161
|
+
"NDARBL444FBA",
|
|
162
|
+
"NDARBT640EBN",
|
|
163
|
+
"NDARBU098PJT",
|
|
164
|
+
"NDARBU928LV0",
|
|
165
|
+
"NDARBV059CGE",
|
|
166
|
+
"NDARCG037CX4",
|
|
167
|
+
"NDARCG947ZC0",
|
|
168
|
+
"NDARCH001CN2",
|
|
169
|
+
"NDARCU001ZN7",
|
|
170
|
+
"NDARCW497XW2",
|
|
171
|
+
"NDARCX053GU5",
|
|
172
|
+
"NDARDF568GL5",
|
|
173
|
+
"NDARDJ092YKH",
|
|
174
|
+
],
|
|
175
|
+
"R5": [
|
|
176
|
+
"NDARAH793FBF",
|
|
177
|
+
"NDARAJ689BVN",
|
|
178
|
+
"NDARAP785CTE",
|
|
179
|
+
"NDARAU708TL8",
|
|
180
|
+
"NDARBE091BGD",
|
|
181
|
+
"NDARBE103DHM",
|
|
182
|
+
"NDARBF851NH6",
|
|
183
|
+
"NDARBH228RDW",
|
|
184
|
+
"NDARBJ674TVU",
|
|
185
|
+
"NDARBM433VER",
|
|
186
|
+
"NDARCA740UC8",
|
|
187
|
+
"NDARCU633GCZ",
|
|
188
|
+
"NDARCU736GZ1",
|
|
189
|
+
"NDARCU744XWL",
|
|
190
|
+
"NDARDC843HHM",
|
|
191
|
+
"NDARDH086ZKK",
|
|
192
|
+
"NDARDL305BT8",
|
|
193
|
+
"NDARDU853XZ6",
|
|
194
|
+
"NDARDV245WJG",
|
|
195
|
+
"NDAREC480KFA",
|
|
196
|
+
],
|
|
197
|
+
"R3": [
|
|
198
|
+
"NDARAA948VFH",
|
|
199
|
+
"NDARAD774HAZ",
|
|
200
|
+
"NDARAE828CML",
|
|
201
|
+
"NDARAG340ERT",
|
|
202
|
+
"NDARBA839HLG",
|
|
203
|
+
"NDARBE641DGZ",
|
|
204
|
+
"NDARBG574KF4",
|
|
205
|
+
"NDARBM642JFT",
|
|
206
|
+
"NDARCL016NHB",
|
|
207
|
+
"NDARCV944JA6",
|
|
208
|
+
"NDARCY178KJP",
|
|
209
|
+
"NDARDY150ZP9",
|
|
210
|
+
"NDAREC542MH3",
|
|
211
|
+
"NDAREK549XUQ",
|
|
212
|
+
"NDAREM887YY8",
|
|
213
|
+
"NDARFA815FXE",
|
|
214
|
+
"NDARFF644ZGD",
|
|
215
|
+
"NDARFV557XAA",
|
|
216
|
+
"NDARFV780ABD",
|
|
217
|
+
"NDARGB102NWJ",
|
|
218
|
+
],
|
|
219
|
+
"R2": [
|
|
220
|
+
"NDARAB793GL3",
|
|
221
|
+
"NDARAM675UR8",
|
|
222
|
+
"NDARBM839WR5",
|
|
223
|
+
"NDARBU730PN8",
|
|
224
|
+
"NDARCT974NAJ",
|
|
225
|
+
"NDARCW933FD5",
|
|
226
|
+
"NDARCZ770BRG",
|
|
227
|
+
"NDARDW741HCF",
|
|
228
|
+
"NDARDZ058NZN",
|
|
229
|
+
"NDAREC377AU2",
|
|
230
|
+
"NDAREM500WWH",
|
|
231
|
+
"NDAREV527ZRF",
|
|
232
|
+
"NDAREV601CE7",
|
|
233
|
+
"NDARFF070XHV",
|
|
234
|
+
"NDARFR108JNB",
|
|
235
|
+
"NDARFT305CG1",
|
|
236
|
+
"NDARGA056TMW",
|
|
237
|
+
"NDARGH775KF5",
|
|
238
|
+
"NDARGJ878ZP4",
|
|
239
|
+
"NDARHA387FPM",
|
|
240
|
+
],
|
|
241
|
+
"R1": [
|
|
242
|
+
"NDARAC904DMU",
|
|
243
|
+
"NDARAM704GKZ",
|
|
244
|
+
"NDARAP359UM6",
|
|
245
|
+
"NDARBD879MBX",
|
|
246
|
+
"NDARBH024NH2",
|
|
247
|
+
"NDARBK082PDD",
|
|
248
|
+
"NDARCA153NKE",
|
|
249
|
+
"NDARCE721YB5",
|
|
250
|
+
"NDARCJ594BWQ",
|
|
251
|
+
"NDARCN669XPR",
|
|
252
|
+
"NDARCW094JCG",
|
|
253
|
+
"NDARCZ947WU5",
|
|
254
|
+
"NDARDH670PXH",
|
|
255
|
+
"NDARDL511UND",
|
|
256
|
+
"NDARDU986RBM",
|
|
257
|
+
"NDAREM731BYM",
|
|
258
|
+
"NDAREN519BLJ",
|
|
259
|
+
"NDARFK610GY5",
|
|
260
|
+
"NDARFT581ZW5",
|
|
261
|
+
"NDARFW972KFQ",
|
|
262
|
+
],
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class EEGChallengeDataset(EEGDashDataset):
|
|
267
|
+
def __init__(
|
|
268
|
+
self,
|
|
269
|
+
release: str,
|
|
270
|
+
cache_dir: str,
|
|
271
|
+
mini: bool = True,
|
|
272
|
+
query: dict | None = None,
|
|
273
|
+
s3_bucket: str | None = "s3://nmdatasets/NeurIPS25",
|
|
274
|
+
**kwargs,
|
|
275
|
+
):
|
|
276
|
+
"""Create a new EEGDashDataset from a given query or local BIDS dataset directory
|
|
277
|
+
and dataset name. An EEGDashDataset is pooled collection of EEGDashBaseDataset
|
|
278
|
+
instances (individual recordings) and is a subclass of braindecode's BaseConcatDataset.
|
|
279
|
+
|
|
280
|
+
Parameters
|
|
281
|
+
----------
|
|
282
|
+
release: str
|
|
283
|
+
Release name. Can be one of ["R1", ..., "R11"]
|
|
284
|
+
mini: bool, default True
|
|
285
|
+
Whether to use the mini-release version of the dataset. It is recommended
|
|
286
|
+
to use the mini version for faster training and evaluation.
|
|
287
|
+
query : dict | None
|
|
288
|
+
Optionally a dictionary that specifies a query to be executed,
|
|
289
|
+
in addition to the dataset (automatically inferred from the release argument).
|
|
290
|
+
See EEGDash.find() for details on the query format.
|
|
291
|
+
cache_dir : str
|
|
292
|
+
A directory where the dataset will be cached locally.
|
|
293
|
+
s3_bucket : str | None
|
|
294
|
+
An optional S3 bucket URI to use instead of the
|
|
295
|
+
default OpenNeuro bucket for loading data files.
|
|
296
|
+
kwargs : dict
|
|
297
|
+
Additional keyword arguments to be passed to the EEGDashDataset
|
|
298
|
+
constructor.
|
|
299
|
+
|
|
300
|
+
"""
|
|
301
|
+
self.release = release
|
|
302
|
+
self.mini = mini
|
|
303
|
+
|
|
304
|
+
if release not in RELEASE_TO_OPENNEURO_DATASET_MAP:
|
|
305
|
+
raise ValueError(
|
|
306
|
+
f"Unknown release: {release}, expected one of {list(RELEASE_TO_OPENNEURO_DATASET_MAP.keys())}"
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
dataset_parameters = []
|
|
310
|
+
if isinstance(release, str):
|
|
311
|
+
dataset_parameters.append(RELEASE_TO_OPENNEURO_DATASET_MAP[release])
|
|
312
|
+
else:
|
|
313
|
+
raise ValueError(
|
|
314
|
+
f"Unknown release type: {type(release)}, the expected type is str."
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
if query and "dataset" in query:
|
|
318
|
+
raise ValueError(
|
|
319
|
+
"Query using the parameters `dataset` with the class EEGChallengeDataset is not possible."
|
|
320
|
+
"Please use the release argument instead, or the object EEGDashDataset instead."
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
if self.mini:
|
|
324
|
+
if query and "subject" in query:
|
|
325
|
+
raise ValueError(
|
|
326
|
+
"Query using the parameters `subject` with the class EEGChallengeDataset and `mini==True` is not possible."
|
|
327
|
+
"Please don't use the `subject` selection twice."
|
|
328
|
+
"Set `mini=False` to use the `subject` selection."
|
|
329
|
+
)
|
|
330
|
+
kwargs["subject"] = SUBJECT_MINI_RELEASE_MAP[release]
|
|
331
|
+
s3_bucket = f"{s3_bucket}/{release}_mini_L100_bdf"
|
|
332
|
+
else:
|
|
333
|
+
s3_bucket = f"{s3_bucket}/{release}_L100_bdf"
|
|
334
|
+
|
|
335
|
+
super().__init__(
|
|
336
|
+
dataset=dataset_parameters,
|
|
337
|
+
query=query,
|
|
338
|
+
cache_dir=cache_dir,
|
|
339
|
+
s3_bucket=s3_bucket,
|
|
340
|
+
**kwargs,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
registered_classes = register_openneuro_datasets(
|
|
345
|
+
summary_file=Path(__file__).with_name("dataset_summary.csv"),
|
|
346
|
+
base_class=EEGDashDataset,
|
|
347
|
+
namespace=globals(),
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
__all__ = ["EEGChallengeDataset"] + list(registered_classes.keys())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.3.5.
|
|
3
|
+
Version: 0.3.5.dev89
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
6
6
|
License-Expression: GPL-3.0-only
|
|
@@ -60,22 +60,9 @@ Requires-Dist: memory_profiler; extra == "docs"
|
|
|
60
60
|
Requires-Dist: ipython; extra == "docs"
|
|
61
61
|
Requires-Dist: lightgbm; extra == "docs"
|
|
62
62
|
Provides-Extra: all
|
|
63
|
-
Requires-Dist:
|
|
64
|
-
Requires-Dist:
|
|
65
|
-
Requires-Dist:
|
|
66
|
-
Requires-Dist: codecov; extra == "all"
|
|
67
|
-
Requires-Dist: pytest_cases; extra == "all"
|
|
68
|
-
Requires-Dist: pytest-benchmark; extra == "all"
|
|
69
|
-
Requires-Dist: sphinx; extra == "all"
|
|
70
|
-
Requires-Dist: sphinx_design; extra == "all"
|
|
71
|
-
Requires-Dist: sphinx_gallery; extra == "all"
|
|
72
|
-
Requires-Dist: sphinx_rtd_theme; extra == "all"
|
|
73
|
-
Requires-Dist: pydata-sphinx-theme; extra == "all"
|
|
74
|
-
Requires-Dist: sphinx-autobuild; extra == "all"
|
|
75
|
-
Requires-Dist: numpydoc; extra == "all"
|
|
76
|
-
Requires-Dist: memory_profiler; extra == "all"
|
|
77
|
-
Requires-Dist: ipython; extra == "all"
|
|
78
|
-
Requires-Dist: lightgbm; extra == "all"
|
|
63
|
+
Requires-Dist: eegdash[docs]; extra == "all"
|
|
64
|
+
Requires-Dist: eegdash[dev]; extra == "all"
|
|
65
|
+
Requires-Dist: eegdash[tests]; extra == "all"
|
|
79
66
|
Dynamic: license-file
|
|
80
67
|
|
|
81
68
|
# EEG-Dash
|
|
@@ -16,22 +16,9 @@ eeglabio
|
|
|
16
16
|
tabulate
|
|
17
17
|
|
|
18
18
|
[all]
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
codecov
|
|
23
|
-
pytest_cases
|
|
24
|
-
pytest-benchmark
|
|
25
|
-
sphinx
|
|
26
|
-
sphinx_design
|
|
27
|
-
sphinx_gallery
|
|
28
|
-
sphinx_rtd_theme
|
|
29
|
-
pydata-sphinx-theme
|
|
30
|
-
sphinx-autobuild
|
|
31
|
-
numpydoc
|
|
32
|
-
memory_profiler
|
|
33
|
-
ipython
|
|
34
|
-
lightgbm
|
|
19
|
+
eegdash[docs]
|
|
20
|
+
eegdash[dev]
|
|
21
|
+
eegdash[tests]
|
|
35
22
|
|
|
36
23
|
[dev]
|
|
37
24
|
pre-commit
|
|
@@ -84,22 +84,9 @@ docs = [
|
|
|
84
84
|
]
|
|
85
85
|
|
|
86
86
|
all = [
|
|
87
|
-
"
|
|
88
|
-
"
|
|
89
|
-
"
|
|
90
|
-
"codecov",
|
|
91
|
-
"pytest_cases",
|
|
92
|
-
"pytest-benchmark",
|
|
93
|
-
"sphinx",
|
|
94
|
-
"sphinx_design",
|
|
95
|
-
"sphinx_gallery",
|
|
96
|
-
"sphinx_rtd_theme",
|
|
97
|
-
"pydata-sphinx-theme",
|
|
98
|
-
"sphinx-autobuild",
|
|
99
|
-
"numpydoc",
|
|
100
|
-
"memory_profiler",
|
|
101
|
-
"ipython",
|
|
102
|
-
"lightgbm",
|
|
87
|
+
"eegdash[docs]",
|
|
88
|
+
"eegdash[dev]",
|
|
89
|
+
"eegdash[tests]",
|
|
103
90
|
]
|
|
104
91
|
|
|
105
92
|
[tool.setuptools]
|
|
@@ -16,7 +16,7 @@ CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def _load_release(release):
|
|
19
|
-
ds = EEGChallengeDataset(release=release, cache_dir=CACHE_DIR)
|
|
19
|
+
ds = EEGChallengeDataset(release=release, mini=False, cache_dir=CACHE_DIR)
|
|
20
20
|
getattr(ds, "description", None)
|
|
21
21
|
return ds
|
|
22
22
|
|
|
@@ -31,10 +31,10 @@ def warmed_mongo():
|
|
|
31
31
|
|
|
32
32
|
def test_eeg_challenge_dataset_initialization():
|
|
33
33
|
"""Test the initialization of EEGChallengeDataset."""
|
|
34
|
-
dataset = EEGChallengeDataset(release="R5", cache_dir=CACHE_DIR)
|
|
34
|
+
dataset = EEGChallengeDataset(release="R5", mini=False, cache_dir=CACHE_DIR)
|
|
35
35
|
|
|
36
36
|
release = "R5"
|
|
37
|
-
expected_bucket_prefix = f"s3://nmdatasets/NeurIPS25/{release}
|
|
37
|
+
expected_bucket_prefix = f"s3://nmdatasets/NeurIPS25/{release}_L100_bdf"
|
|
38
38
|
assert dataset.s3_bucket == expected_bucket_prefix, (
|
|
39
39
|
f"Unexpected s3_bucket: {dataset.s3_bucket} (expected {expected_bucket_prefix})"
|
|
40
40
|
)
|
|
@@ -60,7 +60,7 @@ def test_eeg_challenge_dataset_initialization():
|
|
|
60
60
|
|
|
61
61
|
@pytest.mark.parametrize("release, number_files", RELEASE_FILES)
|
|
62
62
|
def test_eeg_challenge_dataset_amount_files(release, number_files):
|
|
63
|
-
dataset = EEGChallengeDataset(release=release, cache_dir=CACHE_DIR)
|
|
63
|
+
dataset = EEGChallengeDataset(release=release, mini=False, cache_dir=CACHE_DIR)
|
|
64
64
|
assert len(dataset.datasets) == number_files
|
|
65
65
|
|
|
66
66
|
|
|
@@ -88,22 +88,31 @@ def test_mongodb_load_under_sometime(release):
|
|
|
88
88
|
assert duration < 30, f"{release} took {duration:.2f}s"
|
|
89
89
|
|
|
90
90
|
|
|
91
|
-
|
|
91
|
+
@pytest.mark.parametrize("mini", [True, False])
|
|
92
|
+
@pytest.mark.parametrize("release", RELEASES)
|
|
93
|
+
def test_consuming_one_raw(release, mini):
|
|
94
|
+
if mini:
|
|
95
|
+
cache_dir = CACHE_DIR / "mini"
|
|
96
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
97
|
+
else:
|
|
98
|
+
cache_dir = CACHE_DIR
|
|
92
99
|
dataset_obj = EEGChallengeDataset(
|
|
93
|
-
release=
|
|
94
|
-
|
|
95
|
-
cache_dir=
|
|
100
|
+
release=release,
|
|
101
|
+
task="RestingState",
|
|
102
|
+
cache_dir=cache_dir,
|
|
103
|
+
mini=mini,
|
|
96
104
|
)
|
|
97
105
|
raw = dataset_obj.datasets[0].raw
|
|
98
106
|
assert raw is not None
|
|
99
107
|
|
|
100
108
|
|
|
101
109
|
@pytest.mark.parametrize("eeg_dash_instance", [None, EEGDash()])
|
|
102
|
-
def test_eeg_dash_integration(eeg_dash_instance):
|
|
110
|
+
def test_eeg_dash_integration(eeg_dash_instance, release="R5", mini=True):
|
|
103
111
|
dataset_obj = EEGChallengeDataset(
|
|
104
|
-
release=
|
|
105
|
-
|
|
112
|
+
release=release,
|
|
113
|
+
task="RestingState",
|
|
106
114
|
cache_dir=CACHE_DIR,
|
|
115
|
+
mini=mini,
|
|
107
116
|
eeg_dash_instance=eeg_dash_instance,
|
|
108
117
|
)
|
|
109
118
|
raw = dataset_obj.datasets[0].raw
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from eegdash.dataset import EEGChallengeDataset
|
|
7
|
+
|
|
8
|
+
# Shared cache directory constant for all tests in the suite.
|
|
9
|
+
EEG_CHALLENGE_CACHE_DIR = (
|
|
10
|
+
Path.home() / "mne_data" / "eeg_challenge_cache" / "mini"
|
|
11
|
+
).resolve()
|
|
12
|
+
EEG_CHALLENGE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.fixture(scope="session")
|
|
16
|
+
def warmed_mongo():
|
|
17
|
+
"""Skip tests gracefully if Mongo is not reachable."""
|
|
18
|
+
try:
|
|
19
|
+
# Lazy import to avoid circulars; constructing EEGChallengeDataset will touch DB
|
|
20
|
+
_ = EEGChallengeDataset(
|
|
21
|
+
release="R5", mini=True, cache_dir=EEG_CHALLENGE_CACHE_DIR
|
|
22
|
+
)
|
|
23
|
+
except Exception:
|
|
24
|
+
pytest.skip("Mongo not reachable")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_minirelease_vs_full_counts_and_subjects(warmed_mongo):
|
|
28
|
+
"""Mini release should have fewer files and (typically) fewer subjects than full release."""
|
|
29
|
+
release = "R5"
|
|
30
|
+
|
|
31
|
+
ds_mini = EEGChallengeDataset(
|
|
32
|
+
release=release, mini=True, cache_dir=EEG_CHALLENGE_CACHE_DIR
|
|
33
|
+
)
|
|
34
|
+
ds_full = EEGChallengeDataset(
|
|
35
|
+
release=release, mini=False, cache_dir=EEG_CHALLENGE_CACHE_DIR
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# File count: mini must be strictly smaller than full
|
|
39
|
+
assert len(ds_mini.datasets) < len(ds_full.datasets)
|
|
40
|
+
|
|
41
|
+
# Subject cardinality: mini should be strictly less than full, and > 0
|
|
42
|
+
subj_mini = ds_mini.description["subject"].nunique()
|
|
43
|
+
subj_full = ds_full.description["subject"].nunique()
|
|
44
|
+
assert subj_mini > 0
|
|
45
|
+
assert subj_mini < subj_full
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_minirelease_subject_raw_equivalence(warmed_mongo):
|
|
49
|
+
"""For a subject present in the mini set, loading that subject in mini vs full yields identical raw data."""
|
|
50
|
+
release = "R5"
|
|
51
|
+
|
|
52
|
+
# Pick a concrete subject from the mini set to avoid guessing
|
|
53
|
+
ds_mini_all = EEGChallengeDataset(
|
|
54
|
+
release=release, mini=True, cache_dir=EEG_CHALLENGE_CACHE_DIR
|
|
55
|
+
)
|
|
56
|
+
assert len(ds_mini_all.datasets) > 0
|
|
57
|
+
subject = ds_mini_all.description["subject"].iloc[0]
|
|
58
|
+
|
|
59
|
+
ds_mini = EEGChallengeDataset(
|
|
60
|
+
release=release, mini=True, cache_dir=EEG_CHALLENGE_CACHE_DIR, subject=subject
|
|
61
|
+
)
|
|
62
|
+
ds_full = EEGChallengeDataset(
|
|
63
|
+
release=release, mini=False, cache_dir=EEG_CHALLENGE_CACHE_DIR, subject=subject
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
assert len(ds_mini.datasets) > 0
|
|
67
|
+
assert len(ds_full.datasets) > 0
|
|
68
|
+
|
|
69
|
+
# Identify a common BIDS file (bidspath) present in both (bucket prefixes differ between mini/full)
|
|
70
|
+
mini_paths = {d.record["bidspath"] for d in ds_mini.datasets}
|
|
71
|
+
full_paths = {d.record["bidspath"] for d in ds_full.datasets}
|
|
72
|
+
intersection = mini_paths & full_paths
|
|
73
|
+
assert intersection, "No common recordings found for the chosen subject"
|
|
74
|
+
|
|
75
|
+
common_path = next(iter(intersection))
|
|
76
|
+
mini_idx = next(
|
|
77
|
+
i for i, d in enumerate(ds_mini.datasets) if d.record["bidspath"] == common_path
|
|
78
|
+
)
|
|
79
|
+
full_idx = next(
|
|
80
|
+
i for i, d in enumerate(ds_full.datasets) if d.record["bidspath"] == common_path
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
raw_mini = ds_mini.datasets[mini_idx].raw
|
|
84
|
+
raw_full = ds_full.datasets[full_idx].raw
|
|
85
|
+
|
|
86
|
+
# Basic metadata equivalence
|
|
87
|
+
assert raw_mini.info["sfreq"] == raw_full.info["sfreq"]
|
|
88
|
+
assert raw_mini.info["nchan"] == raw_full.info["nchan"]
|
|
89
|
+
assert raw_mini.ch_names == raw_full.ch_names
|
|
90
|
+
|
|
91
|
+
# Compare a small data slice to ensure content equality (avoid loading entire arrays into memory)
|
|
92
|
+
n_samples = min(1000, raw_mini.n_times, raw_full.n_times)
|
|
93
|
+
assert n_samples > 0
|
|
94
|
+
data_mini = raw_mini.get_data(picks=[0], start=0, stop=n_samples)
|
|
95
|
+
data_full = raw_full.get_data(picks=[0], start=0, stop=n_samples)
|
|
96
|
+
assert np.allclose(data_mini, data_full, rtol=1e-6, atol=0), (
|
|
97
|
+
"Raw data mismatch between mini and full"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_minirelease_consume_everything(warmed_mongo):
|
|
102
|
+
"""Simply try to load all data in the mini release to catch any errors."""
|
|
103
|
+
release = "R5"
|
|
104
|
+
ds_mini = EEGChallengeDataset(
|
|
105
|
+
release=release, mini=True, cache_dir=EEG_CHALLENGE_CACHE_DIR
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
for dataset in ds_mini.datasets:
|
|
109
|
+
raw = dataset.raw # noqa: F841
|
|
110
|
+
description = dataset.description # noqa: F841
|
|
111
|
+
assert raw is not None
|
|
112
|
+
assert description is not None
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
from .api import EEGDashDataset
|
|
4
|
-
from .registry import register_openneuro_datasets
|
|
5
|
-
|
|
6
|
-
RELEASE_TO_OPENNEURO_DATASET_MAP = {
|
|
7
|
-
"R11": "ds005516",
|
|
8
|
-
"R10": "ds005515",
|
|
9
|
-
"R9": "ds005514",
|
|
10
|
-
"R8": "ds005512",
|
|
11
|
-
"R7": "ds005511",
|
|
12
|
-
"R6": "ds005510",
|
|
13
|
-
"R4": "ds005508",
|
|
14
|
-
"R5": "ds005509",
|
|
15
|
-
"R3": "ds005507",
|
|
16
|
-
"R2": "ds005506",
|
|
17
|
-
"R1": "ds005505",
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class EEGChallengeDataset(EEGDashDataset):
|
|
22
|
-
def __init__(
|
|
23
|
-
self,
|
|
24
|
-
release: str,
|
|
25
|
-
cache_dir: str,
|
|
26
|
-
query: dict | None = None,
|
|
27
|
-
s3_bucket: str | None = "s3://nmdatasets/NeurIPS25",
|
|
28
|
-
**kwargs,
|
|
29
|
-
):
|
|
30
|
-
"""Create a new EEGDashDataset from a given query or local BIDS dataset directory
|
|
31
|
-
and dataset name. An EEGDashDataset is pooled collection of EEGDashBaseDataset
|
|
32
|
-
instances (individual recordings) and is a subclass of braindecode's BaseConcatDataset.
|
|
33
|
-
|
|
34
|
-
Parameters
|
|
35
|
-
----------
|
|
36
|
-
release: str
|
|
37
|
-
Release name. Can be one of ["R1", ..., "R11"]
|
|
38
|
-
query : dict | None
|
|
39
|
-
Optionally a dictionary that specifies a query to be executed,
|
|
40
|
-
in addition to the dataset (automatically inferred from the release argument).
|
|
41
|
-
See EEGDash.find() for details on the query format.
|
|
42
|
-
cache_dir : str
|
|
43
|
-
A directory where the dataset will be cached locally.
|
|
44
|
-
s3_bucket : str | None
|
|
45
|
-
An optional S3 bucket URI to use instead of the
|
|
46
|
-
default OpenNeuro bucket for loading data files.
|
|
47
|
-
kwargs : dict
|
|
48
|
-
Additional keyword arguments to be passed to the EEGDashDataset
|
|
49
|
-
constructor.
|
|
50
|
-
|
|
51
|
-
"""
|
|
52
|
-
self.release = release
|
|
53
|
-
if release not in RELEASE_TO_OPENNEURO_DATASET_MAP:
|
|
54
|
-
raise ValueError(f"Unknown release: {release}")
|
|
55
|
-
|
|
56
|
-
dataset = RELEASE_TO_OPENNEURO_DATASET_MAP[release]
|
|
57
|
-
if query is None:
|
|
58
|
-
query = {"dataset": dataset}
|
|
59
|
-
elif "dataset" not in query:
|
|
60
|
-
query["dataset"] = dataset
|
|
61
|
-
elif query["dataset"] != dataset:
|
|
62
|
-
raise ValueError(
|
|
63
|
-
f"Query dataset {query['dataset']} does not match the release {release} "
|
|
64
|
-
f"which corresponds to dataset {dataset}."
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
super().__init__(
|
|
68
|
-
query=query,
|
|
69
|
-
cache_dir=cache_dir,
|
|
70
|
-
s3_bucket=f"{s3_bucket}/{release}_L100",
|
|
71
|
-
**kwargs,
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
registered_classes = register_openneuro_datasets(
|
|
76
|
-
summary_file=Path(__file__).with_name("dataset_summary.csv"),
|
|
77
|
-
base_class=EEGDashDataset,
|
|
78
|
-
namespace=globals(),
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
__all__ = ["EEGChallengeDataset"] + list(registered_classes.keys())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|