eegdash 0.3.7.dev105__tar.gz → 0.3.7.dev107__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- {eegdash-0.3.7.dev105/eegdash.egg-info → eegdash-0.3.7.dev107}/PKG-INFO +1 -1
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/__init__.py +1 -1
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/api.py +88 -41
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/bids_eeg_metadata.py +75 -5
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/data_utils.py +118 -26
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107/eegdash.egg-info}/PKG-INFO +1 -1
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash.egg-info/SOURCES.txt +0 -1
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/pyproject.toml +7 -6
- eegdash-0.3.7.dev107/tests/test_offline.py +157 -0
- eegdash-0.3.7.dev105/tests/test_offline.py +0 -51
- eegdash-0.3.7.dev105/tests/test_offline_bids_matching.py +0 -119
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/LICENSE +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/MANIFEST.in +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/README.md +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/docs/Makefile +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/docs/source/conf.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/docs/source/dataset_summary.rst +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/docs/source/index.rst +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/docs/source/install/install.rst +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/docs/source/install/install_pip.rst +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/docs/source/install/install_source.rst +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/docs/source/overview.rst +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/const.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/dataset/__init__.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/dataset/dataset.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/dataset/registry.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/__init__.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/datasets.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/decorators.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/extractors.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/feature_bank/__init__.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/feature_bank/complexity.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/feature_bank/connectivity.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/feature_bank/csp.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/feature_bank/dimensionality.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/feature_bank/signal.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/feature_bank/spectral.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/feature_bank/utils.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/inspect.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/serialization.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/utils.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/mongodb.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/utils.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash.egg-info/dependency_links.txt +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash.egg-info/requires.txt +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash.egg-info/top_level.txt +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/setup.cfg +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/tests/test_api.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/tests/test_cache_folder_suffix.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/tests/test_challenge_kwargs.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/tests/test_correctness.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/tests/test_dataset.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/tests/test_dataset_registration.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/tests/test_eegdash.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/tests/test_functional.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/tests/test_init.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/tests/test_minirelease.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/tests/test_mongo_connection.py +0 -0
- {eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/tests/test_query.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.3.7.
|
|
3
|
+
Version: 0.3.7.dev107
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
6
6
|
License-Expression: GPL-3.0-only
|
|
@@ -18,13 +18,21 @@ from s3fs import S3FileSystem
|
|
|
18
18
|
|
|
19
19
|
from braindecode.datasets import BaseConcatDataset
|
|
20
20
|
|
|
21
|
-
from .bids_eeg_metadata import
|
|
21
|
+
from .bids_eeg_metadata import (
|
|
22
|
+
build_query_from_kwargs,
|
|
23
|
+
load_eeg_attrs_from_bids_file,
|
|
24
|
+
merge_participants_fields,
|
|
25
|
+
normalize_key,
|
|
26
|
+
)
|
|
22
27
|
from .const import (
|
|
23
28
|
ALLOWED_QUERY_FIELDS,
|
|
24
29
|
RELEASE_TO_OPENNEURO_DATASET_MAP,
|
|
25
30
|
)
|
|
26
31
|
from .const import config as data_config
|
|
27
|
-
from .data_utils import
|
|
32
|
+
from .data_utils import (
|
|
33
|
+
EEGBIDSDataset,
|
|
34
|
+
EEGDashBaseDataset,
|
|
35
|
+
)
|
|
28
36
|
from .mongodb import MongoConnectionManager
|
|
29
37
|
|
|
30
38
|
logger = logging.getLogger("eegdash")
|
|
@@ -784,20 +792,49 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
784
792
|
f"Offline mode is enabled, but local data_dir {self.data_dir} does not exist."
|
|
785
793
|
)
|
|
786
794
|
records = self._find_local_bids_records(self.data_dir, self.query)
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
795
|
+
# Try to enrich from local participants.tsv to restore requested fields
|
|
796
|
+
try:
|
|
797
|
+
bids_ds = EEGBIDSDataset(
|
|
798
|
+
data_dir=str(self.data_dir), dataset=self.query["dataset"]
|
|
799
|
+
) # type: ignore[index]
|
|
800
|
+
except Exception:
|
|
801
|
+
bids_ds = None
|
|
802
|
+
|
|
803
|
+
datasets = []
|
|
804
|
+
for record in records:
|
|
805
|
+
# Start with entity values from filename
|
|
806
|
+
desc: dict[str, Any] = {
|
|
807
|
+
k: record.get(k)
|
|
808
|
+
for k in ("subject", "session", "run", "task")
|
|
809
|
+
if record.get(k) is not None
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
if bids_ds is not None:
|
|
813
|
+
try:
|
|
814
|
+
rel_from_dataset = Path(record["bidspath"]).relative_to(
|
|
815
|
+
record["dataset"]
|
|
816
|
+
) # type: ignore[index]
|
|
817
|
+
local_file = (self.data_dir / rel_from_dataset).as_posix()
|
|
818
|
+
part_row = bids_ds.subject_participant_tsv(local_file)
|
|
819
|
+
desc = merge_participants_fields(
|
|
820
|
+
description=desc,
|
|
821
|
+
participants_row=part_row
|
|
822
|
+
if isinstance(part_row, dict)
|
|
823
|
+
else None,
|
|
824
|
+
description_fields=description_fields,
|
|
825
|
+
)
|
|
826
|
+
except Exception:
|
|
827
|
+
pass
|
|
828
|
+
|
|
829
|
+
datasets.append(
|
|
830
|
+
EEGDashBaseDataset(
|
|
831
|
+
record=record,
|
|
832
|
+
cache_dir=self.cache_dir,
|
|
833
|
+
s3_bucket=self.s3_bucket,
|
|
834
|
+
description=desc,
|
|
835
|
+
**base_dataset_kwargs,
|
|
836
|
+
)
|
|
798
837
|
)
|
|
799
|
-
for record in records
|
|
800
|
-
]
|
|
801
838
|
elif self.query:
|
|
802
839
|
# This is the DB query path that we are improving
|
|
803
840
|
datasets = self._find_datasets(
|
|
@@ -882,23 +919,16 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
882
919
|
else:
|
|
883
920
|
matching_args[finder_key] = [entity_val]
|
|
884
921
|
|
|
885
|
-
|
|
922
|
+
matched_paths = find_matching_paths(
|
|
886
923
|
root=str(dataset_root),
|
|
887
924
|
datatypes=["eeg"],
|
|
888
925
|
suffixes=["eeg"],
|
|
889
926
|
ignore_json=True,
|
|
890
927
|
**matching_args,
|
|
891
928
|
)
|
|
929
|
+
records_out: list[dict] = []
|
|
892
930
|
|
|
893
|
-
|
|
894
|
-
seen_files: set[str] = set()
|
|
895
|
-
|
|
896
|
-
for bids_path in paths:
|
|
897
|
-
fpath = str(Path(bids_path.fpath).resolve())
|
|
898
|
-
if fpath in seen_files:
|
|
899
|
-
continue
|
|
900
|
-
seen_files.add(fpath)
|
|
901
|
-
|
|
931
|
+
for bids_path in matched_paths:
|
|
902
932
|
# Build bidspath as dataset_id / relative_path_from_dataset_root (POSIX)
|
|
903
933
|
rel_from_root = (
|
|
904
934
|
Path(bids_path.fpath)
|
|
@@ -915,29 +945,37 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
915
945
|
"session": (bids_path.session or None),
|
|
916
946
|
"task": (bids_path.task or None),
|
|
917
947
|
"run": (bids_path.run or None),
|
|
918
|
-
# minimal fields to satisfy BaseDataset
|
|
948
|
+
# minimal fields to satisfy BaseDataset from eegdash
|
|
919
949
|
"bidsdependencies": [], # not needed to just run.
|
|
920
950
|
"modality": "eeg",
|
|
921
|
-
#
|
|
922
|
-
"sampling_frequency":
|
|
923
|
-
"nchans":
|
|
924
|
-
"ntimes":
|
|
951
|
+
# minimal numeric defaults for offline length calculation
|
|
952
|
+
"sampling_frequency": None,
|
|
953
|
+
"nchans": None,
|
|
954
|
+
"ntimes": None,
|
|
925
955
|
}
|
|
926
|
-
|
|
956
|
+
records_out.append(rec)
|
|
927
957
|
|
|
928
|
-
return
|
|
958
|
+
return records_out
|
|
929
959
|
|
|
930
960
|
def _find_key_in_nested_dict(self, data: Any, target_key: str) -> Any:
|
|
931
|
-
"""
|
|
932
|
-
|
|
961
|
+
"""Recursively search for target_key in nested dicts/lists with normalized matching.
|
|
962
|
+
|
|
963
|
+
This makes lookups tolerant to naming differences like "p-factor" vs "p_factor".
|
|
964
|
+
Returns the first match or None.
|
|
933
965
|
"""
|
|
966
|
+
norm_target = normalize_key(target_key)
|
|
934
967
|
if isinstance(data, dict):
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
if
|
|
940
|
-
return
|
|
968
|
+
for k, v in data.items():
|
|
969
|
+
if normalize_key(k) == norm_target:
|
|
970
|
+
return v
|
|
971
|
+
res = self._find_key_in_nested_dict(v, target_key)
|
|
972
|
+
if res is not None:
|
|
973
|
+
return res
|
|
974
|
+
elif isinstance(data, list):
|
|
975
|
+
for item in data:
|
|
976
|
+
res = self._find_key_in_nested_dict(item, target_key)
|
|
977
|
+
if res is not None:
|
|
978
|
+
return res
|
|
941
979
|
return None
|
|
942
980
|
|
|
943
981
|
def _find_datasets(
|
|
@@ -969,11 +1007,20 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
969
1007
|
self.records = self.eeg_dash_instance.find(query)
|
|
970
1008
|
|
|
971
1009
|
for record in self.records:
|
|
972
|
-
description = {}
|
|
1010
|
+
description: dict[str, Any] = {}
|
|
1011
|
+
# Requested fields first (normalized matching)
|
|
973
1012
|
for field in description_fields:
|
|
974
1013
|
value = self._find_key_in_nested_dict(record, field)
|
|
975
1014
|
if value is not None:
|
|
976
1015
|
description[field] = value
|
|
1016
|
+
# Merge all participants.tsv columns generically
|
|
1017
|
+
part = self._find_key_in_nested_dict(record, "participant_tsv")
|
|
1018
|
+
if isinstance(part, dict):
|
|
1019
|
+
description = merge_participants_fields(
|
|
1020
|
+
description=description,
|
|
1021
|
+
participants_row=part,
|
|
1022
|
+
description_fields=description_fields,
|
|
1023
|
+
)
|
|
977
1024
|
datasets.append(
|
|
978
1025
|
EEGDashBaseDataset(
|
|
979
1026
|
record,
|
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import re
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from typing import Any
|
|
4
5
|
|
|
5
6
|
from .const import ALLOWED_QUERY_FIELDS
|
|
6
7
|
from .const import config as data_config
|
|
7
|
-
from .data_utils import EEGBIDSDataset
|
|
8
8
|
|
|
9
9
|
logger = logging.getLogger("eegdash")
|
|
10
10
|
|
|
11
11
|
__all__ = [
|
|
12
12
|
"build_query_from_kwargs",
|
|
13
13
|
"load_eeg_attrs_from_bids_file",
|
|
14
|
+
"merge_participants_fields",
|
|
15
|
+
"normalize_key",
|
|
14
16
|
]
|
|
15
17
|
|
|
16
18
|
|
|
@@ -70,7 +72,7 @@ def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
|
|
|
70
72
|
return query
|
|
71
73
|
|
|
72
74
|
|
|
73
|
-
def _get_raw_extensions(bids_file: str, bids_dataset
|
|
75
|
+
def _get_raw_extensions(bids_file: str, bids_dataset) -> list[str]:
|
|
74
76
|
"""Helper to find paths to additional "sidecar" files that may be associated
|
|
75
77
|
with a given main data file in a BIDS dataset; paths are returned as relative to
|
|
76
78
|
the parent dataset path.
|
|
@@ -92,9 +94,7 @@ def _get_raw_extensions(bids_file: str, bids_dataset: EEGBIDSDataset) -> list[st
|
|
|
92
94
|
]
|
|
93
95
|
|
|
94
96
|
|
|
95
|
-
def load_eeg_attrs_from_bids_file(
|
|
96
|
-
bids_dataset: EEGBIDSDataset, bids_file: str
|
|
97
|
-
) -> dict[str, Any]:
|
|
97
|
+
def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any]:
|
|
98
98
|
"""Build the metadata record for a given BIDS file (single recording) in a BIDS dataset.
|
|
99
99
|
|
|
100
100
|
Attributes are at least the ones defined in data_config attributes (set to None if missing),
|
|
@@ -182,3 +182,73 @@ def load_eeg_attrs_from_bids_file(
|
|
|
182
182
|
attrs[field] = None
|
|
183
183
|
|
|
184
184
|
return attrs
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def normalize_key(key: str) -> str:
|
|
188
|
+
"""Normalize a metadata key for robust matching.
|
|
189
|
+
|
|
190
|
+
Lowercase and replace non-alphanumeric characters with underscores, then strip
|
|
191
|
+
leading/trailing underscores. This allows tolerant matching such as
|
|
192
|
+
"p-factor" ≈ "p_factor" ≈ "P Factor".
|
|
193
|
+
"""
|
|
194
|
+
return re.sub(r"[^a-z0-9]+", "_", str(key).lower()).strip("_")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def merge_participants_fields(
|
|
198
|
+
description: dict[str, Any],
|
|
199
|
+
participants_row: dict[str, Any] | None,
|
|
200
|
+
description_fields: list[str] | None = None,
|
|
201
|
+
) -> dict[str, Any]:
|
|
202
|
+
"""Merge participants.tsv fields into a dataset description dictionary.
|
|
203
|
+
|
|
204
|
+
- Preserves existing entries in ``description`` (no overwrites).
|
|
205
|
+
- Fills requested ``description_fields`` first, preserving their original names.
|
|
206
|
+
- Adds all remaining participants columns generically using normalized keys
|
|
207
|
+
unless a matching requested field already captured them.
|
|
208
|
+
|
|
209
|
+
Parameters
|
|
210
|
+
----------
|
|
211
|
+
description : dict
|
|
212
|
+
Current description to be enriched in-place and returned.
|
|
213
|
+
participants_row : dict | None
|
|
214
|
+
A mapping of participants.tsv columns for the current subject.
|
|
215
|
+
description_fields : list[str] | None
|
|
216
|
+
Optional list of requested description fields. When provided, matching is
|
|
217
|
+
performed by normalized names; the original requested field names are kept.
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
dict
|
|
222
|
+
The enriched description (same object as input for convenience).
|
|
223
|
+
|
|
224
|
+
"""
|
|
225
|
+
if not isinstance(description, dict) or not isinstance(participants_row, dict):
|
|
226
|
+
return description
|
|
227
|
+
|
|
228
|
+
# Normalize participants keys and keep first non-None value per normalized key
|
|
229
|
+
norm_map: dict[str, Any] = {}
|
|
230
|
+
for part_key, part_value in participants_row.items():
|
|
231
|
+
norm_key = normalize_key(part_key)
|
|
232
|
+
if norm_key not in norm_map and part_value is not None:
|
|
233
|
+
norm_map[norm_key] = part_value
|
|
234
|
+
|
|
235
|
+
# Ensure description_fields is a list for matching
|
|
236
|
+
requested = list(description_fields or [])
|
|
237
|
+
|
|
238
|
+
# 1) Fill requested fields first using normalized matching, preserving names
|
|
239
|
+
for key in requested:
|
|
240
|
+
if key in description:
|
|
241
|
+
continue
|
|
242
|
+
requested_norm_key = normalize_key(key)
|
|
243
|
+
if requested_norm_key in norm_map:
|
|
244
|
+
description[key] = norm_map[requested_norm_key]
|
|
245
|
+
|
|
246
|
+
# 2) Add remaining participants columns generically under normalized names,
|
|
247
|
+
# unless a requested field already captured them
|
|
248
|
+
requested_norm = {normalize_key(k) for k in requested}
|
|
249
|
+
for norm_key, part_value in norm_map.items():
|
|
250
|
+
if norm_key in requested_norm:
|
|
251
|
+
continue
|
|
252
|
+
if norm_key not in description:
|
|
253
|
+
description[norm_key] = part_value
|
|
254
|
+
return description
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
import io
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
4
|
import os
|
|
4
5
|
import re
|
|
5
6
|
import traceback
|
|
6
7
|
import warnings
|
|
8
|
+
from contextlib import redirect_stderr
|
|
7
9
|
from pathlib import Path
|
|
8
10
|
from typing import Any
|
|
9
11
|
|
|
@@ -91,19 +93,8 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
91
93
|
root=self.bids_root,
|
|
92
94
|
datatype="eeg",
|
|
93
95
|
suffix="eeg",
|
|
94
|
-
# extension='.bdf',
|
|
95
96
|
**self.bids_kwargs,
|
|
96
97
|
)
|
|
97
|
-
# TO-DO: remove this once find a better solution using mne-bids or update competition dataset
|
|
98
|
-
try:
|
|
99
|
-
_ = str(self.bidspath)
|
|
100
|
-
except RuntimeError:
|
|
101
|
-
try:
|
|
102
|
-
self.bidspath = self.bidspath.update(extension=".bdf")
|
|
103
|
-
self.filecache = self.filecache.with_suffix(".bdf")
|
|
104
|
-
except Exception as e:
|
|
105
|
-
logger.error(f"Error while updating BIDS path: {e}")
|
|
106
|
-
raise e
|
|
107
98
|
|
|
108
99
|
self.s3file = self._get_s3path(record["bidspath"])
|
|
109
100
|
self.bids_dependencies = record["bidsdependencies"]
|
|
@@ -182,8 +173,11 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
182
173
|
dep_local = Path(self.dataset_folder) / dep_path
|
|
183
174
|
filepath = self.cache_dir / dep_local
|
|
184
175
|
if not self.s3_open_neuro:
|
|
176
|
+
if filepath.suffix == ".set":
|
|
177
|
+
filepath = filepath.with_suffix(".bdf")
|
|
185
178
|
if self.filecache.suffix == ".set":
|
|
186
179
|
self.filecache = self.filecache.with_suffix(".bdf")
|
|
180
|
+
|
|
187
181
|
# here, we download the dependency and it is fine
|
|
188
182
|
# in the case of the competition.
|
|
189
183
|
if not filepath.exists():
|
|
@@ -218,6 +212,12 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
218
212
|
|
|
219
213
|
def _ensure_raw(self) -> None:
|
|
220
214
|
"""Download the S3 file and BIDS dependencies if not already cached."""
|
|
215
|
+
# TO-DO: remove this once is fixed on the our side
|
|
216
|
+
# for the competition
|
|
217
|
+
if not self.s3_open_neuro:
|
|
218
|
+
self.bidspath = self.bidspath.update(extension=".bdf")
|
|
219
|
+
self.filecache = self.filecache.with_suffix(".bdf")
|
|
220
|
+
|
|
221
221
|
if not os.path.exists(self.filecache): # not preload
|
|
222
222
|
if self.bids_dependencies:
|
|
223
223
|
self._download_dependencies()
|
|
@@ -226,13 +226,50 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
226
226
|
# capturing any warnings
|
|
227
227
|
# to-do: remove this once is fixed on the mne-bids side.
|
|
228
228
|
with warnings.catch_warnings(record=True) as w:
|
|
229
|
+
# Ensure all warnings are captured into 'w' and not shown to users
|
|
230
|
+
warnings.simplefilter("always")
|
|
229
231
|
try:
|
|
230
|
-
#
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
232
|
+
# mne-bids emits RuntimeWarnings to stderr; silence stderr during read
|
|
233
|
+
_stderr_buffer = io.StringIO()
|
|
234
|
+
with redirect_stderr(_stderr_buffer):
|
|
235
|
+
self._raw = mne_bids.read_raw_bids(
|
|
236
|
+
bids_path=self.bidspath, verbose="ERROR"
|
|
237
|
+
)
|
|
238
|
+
# Parse unmapped participants.tsv fields reported by mne-bids and
|
|
239
|
+
# inject them into Raw.info and the dataset description generically.
|
|
240
|
+
extras = self._extract_unmapped_participants_from_warnings(w)
|
|
241
|
+
if extras:
|
|
242
|
+
# 1) Attach to Raw.info under subject_info.participants_extras
|
|
243
|
+
try:
|
|
244
|
+
subject_info = self._raw.info.get("subject_info") or {}
|
|
245
|
+
if not isinstance(subject_info, dict):
|
|
246
|
+
subject_info = {}
|
|
247
|
+
pe = subject_info.get("participants_extras") or {}
|
|
248
|
+
if not isinstance(pe, dict):
|
|
249
|
+
pe = {}
|
|
250
|
+
# Merge without overwriting
|
|
251
|
+
for k, v in extras.items():
|
|
252
|
+
pe.setdefault(k, v)
|
|
253
|
+
subject_info["participants_extras"] = pe
|
|
254
|
+
self._raw.info["subject_info"] = subject_info
|
|
255
|
+
except Exception:
|
|
256
|
+
# Non-fatal; continue
|
|
257
|
+
pass
|
|
258
|
+
|
|
259
|
+
# 2) Also add to this dataset's description, if possible, so
|
|
260
|
+
# targets can be selected later without naming specifics.
|
|
261
|
+
try:
|
|
262
|
+
import pandas as _pd # local import to avoid top-level cost
|
|
263
|
+
|
|
264
|
+
if isinstance(self.description, dict):
|
|
265
|
+
for k, v in extras.items():
|
|
266
|
+
self.description.setdefault(k, v)
|
|
267
|
+
elif isinstance(self.description, _pd.Series):
|
|
268
|
+
for k, v in extras.items():
|
|
269
|
+
if k not in self.description.index:
|
|
270
|
+
self.description.loc[k] = v
|
|
271
|
+
except Exception:
|
|
272
|
+
pass
|
|
236
273
|
except Exception as e:
|
|
237
274
|
logger.error(
|
|
238
275
|
f"Error while reading BIDS file: {self.bidspath}\n"
|
|
@@ -242,10 +279,60 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
242
279
|
logger.error(f"Exception: {e}")
|
|
243
280
|
logger.error(traceback.format_exc())
|
|
244
281
|
raise e
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
282
|
+
# Filter noisy mapping notices from mne-bids; surface others
|
|
283
|
+
for captured_warning in w:
|
|
284
|
+
try:
|
|
285
|
+
msg = str(captured_warning.message)
|
|
286
|
+
except Exception:
|
|
287
|
+
continue
|
|
288
|
+
# Suppress verbose participants mapping messages
|
|
289
|
+
if "Unable to map the following column" in msg and "MNE" in msg:
|
|
290
|
+
logger.debug(
|
|
291
|
+
"Suppressed mne-bids mapping warning while reading BIDS file: %s",
|
|
292
|
+
msg,
|
|
293
|
+
)
|
|
294
|
+
continue
|
|
295
|
+
logger.warning("Warning while reading BIDS file: %s", msg)
|
|
296
|
+
|
|
297
|
+
def _extract_unmapped_participants_from_warnings(
|
|
298
|
+
self, warnings_list: list[Any]
|
|
299
|
+
) -> dict[str, Any]:
|
|
300
|
+
"""Scan captured warnings from mne-bids and extract unmapped participants.tsv
|
|
301
|
+
entries in a generic way.
|
|
302
|
+
|
|
303
|
+
Optionally, the column name can carry a note in parentheses that we ignore
|
|
304
|
+
for key/value extraction. Returns a mapping of column name -> raw value.
|
|
305
|
+
"""
|
|
306
|
+
extras: dict[str, Any] = {}
|
|
307
|
+
header = "Unable to map the following column(s) to MNE:"
|
|
308
|
+
for wr in warnings_list:
|
|
309
|
+
try:
|
|
310
|
+
msg = str(wr.message)
|
|
311
|
+
except Exception:
|
|
312
|
+
continue
|
|
313
|
+
if header not in msg:
|
|
314
|
+
continue
|
|
315
|
+
lines = msg.splitlines()
|
|
316
|
+
# Find the header line, then parse subsequent lines as entries
|
|
317
|
+
try:
|
|
318
|
+
idx = next(i for i, ln in enumerate(lines) if header in ln)
|
|
319
|
+
except StopIteration:
|
|
320
|
+
idx = -1
|
|
321
|
+
for line in lines[idx + 1 :]:
|
|
322
|
+
line = line.strip()
|
|
323
|
+
if not line:
|
|
324
|
+
continue
|
|
325
|
+
# Pattern: <col>(optional note): <value>
|
|
326
|
+
# Examples: "gender: F", "Ethnicity: Indian", "foo (ignored): bar"
|
|
327
|
+
m = re.match(r"^([^:]+?)(?:\s*\([^)]*\))?\s*:\s*(.*)$", line)
|
|
328
|
+
if not m:
|
|
329
|
+
continue
|
|
330
|
+
col = m.group(1).strip()
|
|
331
|
+
val = m.group(2).strip()
|
|
332
|
+
# Keep original column names as provided to stay agnostic
|
|
333
|
+
if col and col not in extras:
|
|
334
|
+
extras[col] = val
|
|
335
|
+
return extras
|
|
249
336
|
|
|
250
337
|
# === BaseDataset and PyTorch Dataset interface ===
|
|
251
338
|
|
|
@@ -264,11 +351,16 @@ class EEGDashBaseDataset(BaseDataset):
|
|
|
264
351
|
def __len__(self) -> int:
|
|
265
352
|
"""Return the number of samples in the dataset."""
|
|
266
353
|
if self._raw is None:
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
354
|
+
if (
|
|
355
|
+
self.record["ntimes"] is None
|
|
356
|
+
or self.record["sampling_frequency"] is None
|
|
357
|
+
):
|
|
358
|
+
self._ensure_raw()
|
|
359
|
+
else:
|
|
360
|
+
# FIXME: this is a bit strange and should definitely not change as a side effect
|
|
361
|
+
# of accessing the data (which it will, since ntimes is the actual length but rounded down)
|
|
362
|
+
return int(self.record["ntimes"] * self.record["sampling_frequency"])
|
|
363
|
+
return len(self._raw)
|
|
272
364
|
|
|
273
365
|
@property
|
|
274
366
|
def raw(self):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.3.7.
|
|
3
|
+
Version: 0.3.7.dev107
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
6
6
|
License-Expression: GPL-3.0-only
|
|
@@ -114,13 +114,14 @@ line-length = 88
|
|
|
114
114
|
target-version = "py311"
|
|
115
115
|
|
|
116
116
|
[tool.isort]
|
|
117
|
-
skip
|
|
118
|
-
|
|
119
|
-
py_version
|
|
120
|
-
profile
|
|
121
|
-
sections
|
|
122
|
-
known_first_party
|
|
117
|
+
skip = [".gitignore"]
|
|
118
|
+
# Format examples too; pre-commit selects files, no need for skip_glob
|
|
119
|
+
py_version = 312
|
|
120
|
+
profile = "black"
|
|
121
|
+
sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
|
|
122
|
+
known_first_party = ["eegdash", "braindecode"]
|
|
123
123
|
lines_between_sections = 1
|
|
124
|
+
atomic = false
|
|
124
125
|
|
|
125
126
|
[pytest]
|
|
126
127
|
testpaths = ["tests"]
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import platformdirs
|
|
4
|
+
|
|
5
|
+
from eegdash.const import RELEASE_TO_OPENNEURO_DATASET_MAP
|
|
6
|
+
from eegdash.dataset.dataset import EEGChallengeDataset
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_offline_real_data_end_to_end():
|
|
10
|
+
"""Use real data like in the tutorial: prefetch (online) then go offline.
|
|
11
|
+
|
|
12
|
+
- Prefetch via EEGChallengeDataset (mini release) to the user cache
|
|
13
|
+
- Instantiate an offline EEGChallengeDataset pointing at the cache
|
|
14
|
+
- Compare raw shapes for one subject and basic description columns
|
|
15
|
+
"""
|
|
16
|
+
release = "R2"
|
|
17
|
+
_ = RELEASE_TO_OPENNEURO_DATASET_MAP[release]
|
|
18
|
+
cache_dir = Path(platformdirs.user_cache_dir("EEGDash"))
|
|
19
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
|
|
21
|
+
# Online: construct challenge dataset (mini) and prefetch first subject
|
|
22
|
+
# Limit to a single subject to keep the test lean
|
|
23
|
+
subject_id = "NDARAB793GL3" # part of R2 mini set
|
|
24
|
+
ds_online = EEGChallengeDataset(
|
|
25
|
+
release=release,
|
|
26
|
+
cache_dir=cache_dir,
|
|
27
|
+
task="RestingState",
|
|
28
|
+
mini=True,
|
|
29
|
+
subject=subject_id,
|
|
30
|
+
)
|
|
31
|
+
assert len(ds_online.datasets) > 0
|
|
32
|
+
first_online = ds_online.datasets[0]
|
|
33
|
+
# Trigger download of this subject's files (raw + sidecars)
|
|
34
|
+
_ = first_online.raw
|
|
35
|
+
|
|
36
|
+
# Offline: enumerate locally cached data
|
|
37
|
+
ds_offline = EEGChallengeDataset(
|
|
38
|
+
release=release,
|
|
39
|
+
cache_dir=cache_dir,
|
|
40
|
+
task="RestingState",
|
|
41
|
+
download=False,
|
|
42
|
+
subject=subject_id,
|
|
43
|
+
)
|
|
44
|
+
assert len(ds_offline.datasets) == 1
|
|
45
|
+
first_offline = ds_offline.datasets[0]
|
|
46
|
+
|
|
47
|
+
# Compare raw shapes for the same subject online vs offline
|
|
48
|
+
shape_online = first_online.raw.get_data().shape
|
|
49
|
+
shape_offline = first_offline.raw.get_data().shape
|
|
50
|
+
assert shape_online == shape_offline
|
|
51
|
+
|
|
52
|
+
# Basic description columns present
|
|
53
|
+
for col in ("subject", "task"):
|
|
54
|
+
assert col in ds_offline.description.columns
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_offline_real_bidspath_and_cache_suffix():
|
|
58
|
+
"""Verify bidspath root and local cache folder for real data (tutorial style)."""
|
|
59
|
+
release = "R2"
|
|
60
|
+
dataset_id = RELEASE_TO_OPENNEURO_DATASET_MAP[release]
|
|
61
|
+
cache_dir = Path(platformdirs.user_cache_dir("EEGDash"))
|
|
62
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
|
|
64
|
+
subject_id = "NDARAB793GL3"
|
|
65
|
+
ds_offline = EEGChallengeDataset(
|
|
66
|
+
release=release,
|
|
67
|
+
cache_dir=cache_dir,
|
|
68
|
+
task="RestingState",
|
|
69
|
+
download=False,
|
|
70
|
+
subject=subject_id,
|
|
71
|
+
)
|
|
72
|
+
assert len(ds_offline.datasets) == 1
|
|
73
|
+
base = ds_offline.datasets[0]
|
|
74
|
+
# bidspath must start with dataset id (not suffixed cache folder)
|
|
75
|
+
assert base.record["bidspath"].split("/")[0] == dataset_id
|
|
76
|
+
# local BIDS root points to suffixed folder used by the challenge
|
|
77
|
+
assert (cache_dir / f"{dataset_id}-bdf-mini").exists()
|
|
78
|
+
assert base.bids_root == cache_dir / f"{dataset_id}-bdf-mini"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_offline_real_records_description_shape():
|
|
82
|
+
"""Reconstruct from records and compare description row counts (tutorial-like)."""
|
|
83
|
+
release = "R2"
|
|
84
|
+
cache_dir = Path(platformdirs.user_cache_dir("EEGDash"))
|
|
85
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
|
|
87
|
+
subject_id = "NDARAB793GL3"
|
|
88
|
+
ds_offline = EEGChallengeDataset(
|
|
89
|
+
release=release,
|
|
90
|
+
cache_dir=cache_dir,
|
|
91
|
+
task="RestingState",
|
|
92
|
+
download=False,
|
|
93
|
+
subject=subject_id,
|
|
94
|
+
)
|
|
95
|
+
assert len(ds_offline.datasets) == 1
|
|
96
|
+
|
|
97
|
+
# Recreate a dataset from the exact offline records
|
|
98
|
+
records = [bd.record for bd in ds_offline.datasets]
|
|
99
|
+
ds_from_records = EEGChallengeDataset(
|
|
100
|
+
release=release, cache_dir=cache_dir, task="RestingState", records=records
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
assert ds_offline.description.shape[0] == ds_from_records.description.shape[0]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_online_vs_records_vs_offline_single_subject():
|
|
107
|
+
"""Compare online vs records-injection vs offline for a single subject.
|
|
108
|
+
|
|
109
|
+
Ensures consistent row counts and identical raw data shapes across modes.
|
|
110
|
+
"""
|
|
111
|
+
release = "R2"
|
|
112
|
+
subject_id = "NDARAB793GL3"
|
|
113
|
+
cache_dir = Path(platformdirs.user_cache_dir("EEGDash"))
|
|
114
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
115
|
+
|
|
116
|
+
# Online for a single subject, and prefetch raw
|
|
117
|
+
ds_online = EEGChallengeDataset(
|
|
118
|
+
release=release,
|
|
119
|
+
cache_dir=cache_dir,
|
|
120
|
+
task="RestingState",
|
|
121
|
+
mini=True,
|
|
122
|
+
subject=subject_id,
|
|
123
|
+
)
|
|
124
|
+
assert len(ds_online.datasets) == 1
|
|
125
|
+
online_base = ds_online.datasets[0]
|
|
126
|
+
_ = online_base.raw
|
|
127
|
+
|
|
128
|
+
# From records (inject the online records directly)
|
|
129
|
+
records = [d.record for d in ds_online.datasets]
|
|
130
|
+
ds_records = EEGChallengeDataset(
|
|
131
|
+
release=release,
|
|
132
|
+
cache_dir=cache_dir,
|
|
133
|
+
task="RestingState",
|
|
134
|
+
records=records,
|
|
135
|
+
)
|
|
136
|
+
assert len(ds_records.datasets) == 1
|
|
137
|
+
|
|
138
|
+
# Offline: enumerate from cache for same subject
|
|
139
|
+
ds_offline = EEGChallengeDataset(
|
|
140
|
+
release=release,
|
|
141
|
+
cache_dir=cache_dir,
|
|
142
|
+
task="RestingState",
|
|
143
|
+
download=False,
|
|
144
|
+
subject=subject_id,
|
|
145
|
+
)
|
|
146
|
+
assert len(ds_offline.datasets) == 1
|
|
147
|
+
|
|
148
|
+
# Compare row counts in description
|
|
149
|
+
assert ds_online.description.shape[0] == 1
|
|
150
|
+
assert ds_records.description.shape[0] == 1
|
|
151
|
+
assert ds_offline.description.shape[0] == 1
|
|
152
|
+
|
|
153
|
+
# Compare raw shapes across modes
|
|
154
|
+
shape_online = ds_online.datasets[0].raw.get_data().shape
|
|
155
|
+
shape_records = ds_records.datasets[0].raw.get_data().shape
|
|
156
|
+
shape_offline = ds_offline.datasets[0].raw.get_data().shape
|
|
157
|
+
assert shape_online == shape_records == shape_offline
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
from eegdash import EEGDash, EEGDashDataset
|
|
6
|
-
|
|
7
|
-
CACHE_DIR = (Path.home() / "mne_data" / "openneuro").resolve()
|
|
8
|
-
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def test_dataset_loads_without_eegdash(monkeypatch):
|
|
12
|
-
"""Dataset should load from records without contacting network resources."""
|
|
13
|
-
eeg_dash = EEGDash()
|
|
14
|
-
|
|
15
|
-
records = eeg_dash.find(
|
|
16
|
-
dataset="ds005509", subject="NDARAC350XUM", task="RestingState"
|
|
17
|
-
)
|
|
18
|
-
|
|
19
|
-
# test with internet
|
|
20
|
-
dataset_internet = EEGDashDataset(
|
|
21
|
-
query=dict(task="RestingState", subject="NDARAC350XUM", dataset="ds005509"),
|
|
22
|
-
cache_dir=CACHE_DIR,
|
|
23
|
-
eeg_dash_instance=eeg_dash,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
# Monkeypatch any network calls inside EEGDashDataset to raise if called
|
|
27
|
-
monkeypatch.setattr(
|
|
28
|
-
EEGDashDataset,
|
|
29
|
-
"_find_datasets",
|
|
30
|
-
lambda *args, **kwargs: pytest.skip(
|
|
31
|
-
"Skipping network download in offline test"
|
|
32
|
-
),
|
|
33
|
-
)
|
|
34
|
-
monkeypatch.setattr(
|
|
35
|
-
EEGDashDataset,
|
|
36
|
-
"_find_datasets",
|
|
37
|
-
lambda *args, **kwargs: pytest.skip(
|
|
38
|
-
"Skipping network download in offline test"
|
|
39
|
-
),
|
|
40
|
-
)
|
|
41
|
-
# TO-DO: discover way to do this pytest
|
|
42
|
-
|
|
43
|
-
dataset_without_internet = EEGDashDataset(
|
|
44
|
-
dataset="ds005509", records=records, cache_dir=CACHE_DIR, eeg_dash_instance=None
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
assert dataset_internet.datasets[0].raw == dataset_without_internet.datasets[0].raw
|
|
48
|
-
assert (
|
|
49
|
-
dataset_internet.datasets[0].record
|
|
50
|
-
== dataset_without_internet.datasets[0].record
|
|
51
|
-
)
|
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
from eegdash.api import EEGDashDataset
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def _touch(p: Path):
|
|
9
|
-
p.parent.mkdir(parents=True, exist_ok=True)
|
|
10
|
-
p.touch()
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def make_minimal_bids(tmp_path: Path, dataset_id: str, folder_name: str | None = None):
|
|
14
|
-
"""Create minimal BIDS-like structure under tmp_path/folder_name or dataset_id.
|
|
15
|
-
|
|
16
|
-
The filenames will always embed the dataset_id in bidspath semantics; the folder
|
|
17
|
-
name can include suffixes to simulate cache suffixing (e.g., ds-xxx-bdf-mini).
|
|
18
|
-
"""
|
|
19
|
-
root = tmp_path / (folder_name or dataset_id)
|
|
20
|
-
# Create a few EEG files with different entities
|
|
21
|
-
_touch(
|
|
22
|
-
root / "sub-01" / "ses-01" / "eeg" / "sub-01_ses-01_task-rest_run-01_eeg.edf"
|
|
23
|
-
)
|
|
24
|
-
_touch(
|
|
25
|
-
root / "sub-02" / "ses-01" / "eeg" / "sub-02_ses-01_task-rest_run-01_eeg.edf"
|
|
26
|
-
)
|
|
27
|
-
_touch(root / "sub-02" / "ses-02" / "eeg" / "sub-02_ses-02_task-eo_run-01_eeg.bdf")
|
|
28
|
-
return root
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def test_offline_match_all(tmp_path: Path):
|
|
32
|
-
dataset_id = "ds-local"
|
|
33
|
-
make_minimal_bids(tmp_path, dataset_id)
|
|
34
|
-
ds = EEGDashDataset(cache_dir=tmp_path, dataset=dataset_id, download=False)
|
|
35
|
-
assert len(ds.datasets) == 3
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def test_offline_filter_subject(tmp_path: Path):
|
|
39
|
-
dataset_id = "ds-local"
|
|
40
|
-
make_minimal_bids(tmp_path, dataset_id)
|
|
41
|
-
ds = EEGDashDataset(
|
|
42
|
-
cache_dir=tmp_path, dataset=dataset_id, subject="01", download=False
|
|
43
|
-
)
|
|
44
|
-
assert len(ds.datasets) == 1
|
|
45
|
-
rec = ds.datasets[0].record
|
|
46
|
-
assert rec["subject"] == "01"
|
|
47
|
-
assert rec["task"] == "rest"
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def test_offline_filter_lists(tmp_path: Path):
|
|
51
|
-
dataset_id = "ds-local"
|
|
52
|
-
make_minimal_bids(tmp_path, dataset_id)
|
|
53
|
-
ds = EEGDashDataset(
|
|
54
|
-
cache_dir=tmp_path,
|
|
55
|
-
dataset=dataset_id,
|
|
56
|
-
subject=["01", "02"],
|
|
57
|
-
task=["rest"],
|
|
58
|
-
download=False,
|
|
59
|
-
)
|
|
60
|
-
# two rest recordings across subjects
|
|
61
|
-
assert len(ds.datasets) == 2
|
|
62
|
-
tasks = sorted([d.record["task"] for d in ds.datasets])
|
|
63
|
-
assert tasks == ["rest", "rest"]
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def test_offline_filter_session(tmp_path: Path):
|
|
67
|
-
dataset_id = "ds-local"
|
|
68
|
-
make_minimal_bids(tmp_path, dataset_id)
|
|
69
|
-
ds = EEGDashDataset(
|
|
70
|
-
cache_dir=tmp_path, dataset=dataset_id, session="02", download=False
|
|
71
|
-
)
|
|
72
|
-
assert len(ds.datasets) == 1
|
|
73
|
-
rec = ds.datasets[0].record
|
|
74
|
-
assert rec["session"] == "02"
|
|
75
|
-
assert rec["task"] == "eo"
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def test_offline_bidspath_and_suffix_rewrite(tmp_path: Path, monkeypatch):
|
|
79
|
-
"""Bidspath should start with dataset id (no suffix) while files are stored
|
|
80
|
-
under suffixed cache root when s3_bucket indicates preprocessing.
|
|
81
|
-
Also ensure no S3 is touched in offline path.
|
|
82
|
-
"""
|
|
83
|
-
dataset_id = "ds-local"
|
|
84
|
-
folder_name = f"{dataset_id}-bdf-mini"
|
|
85
|
-
make_minimal_bids(tmp_path, dataset_id, folder_name=folder_name)
|
|
86
|
-
|
|
87
|
-
# Make S3 usage explode if called; offline should not call it
|
|
88
|
-
import eegdash.api as api_mod
|
|
89
|
-
|
|
90
|
-
class Boom:
|
|
91
|
-
def __init__(self, *a, **k):
|
|
92
|
-
raise AssertionError(
|
|
93
|
-
"S3FileSystem should not be instantiated in offline mode"
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
monkeypatch.setattr(api_mod, "S3FileSystem", Boom)
|
|
97
|
-
|
|
98
|
-
ds = EEGDashDataset(
|
|
99
|
-
cache_dir=tmp_path,
|
|
100
|
-
dataset=dataset_id,
|
|
101
|
-
download=False,
|
|
102
|
-
s3_bucket="s3://example/some_bdf_mini_bucket",
|
|
103
|
-
eeg_dash_instance=object(), # prevent constructing real EEGDash (which touches S3FileSystem)
|
|
104
|
-
)
|
|
105
|
-
assert len(ds.datasets) == 3
|
|
106
|
-
base = ds.datasets[0]
|
|
107
|
-
|
|
108
|
-
# Records should keep bidspath starting with dataset id (no suffix)
|
|
109
|
-
assert base.record["bidspath"].split("/")[0] == dataset_id
|
|
110
|
-
|
|
111
|
-
# Local writes should target suffixed folder
|
|
112
|
-
assert base.bids_root == tmp_path / folder_name
|
|
113
|
-
assert str(base.filecache).startswith(str((tmp_path / folder_name).resolve()))
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
def test_offline_missing_dir_raises(tmp_path: Path):
|
|
117
|
-
dataset_id = "ds-does-not-exist"
|
|
118
|
-
with pytest.raises(ValueError, match="Offline mode is enabled, but local data_dir"):
|
|
119
|
-
EEGDashDataset(cache_dir=tmp_path, dataset=dataset_id, download=False)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eegdash-0.3.7.dev105 → eegdash-0.3.7.dev107}/eegdash/features/feature_bank/dimensionality.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|