eegdash 0.3.5.dev87__tar.gz → 0.3.5.dev89__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

Files changed (56) hide show
  1. {eegdash-0.3.5.dev87/eegdash.egg-info → eegdash-0.3.5.dev89}/PKG-INFO +4 -17
  2. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/__init__.py +1 -1
  3. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/api.py +16 -4
  4. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/data_utils.py +31 -7
  5. eegdash-0.3.5.dev89/eegdash/dataset.py +351 -0
  6. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89/eegdash.egg-info}/PKG-INFO +4 -17
  7. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash.egg-info/SOURCES.txt +1 -0
  8. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash.egg-info/requires.txt +3 -16
  9. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/pyproject.toml +3 -16
  10. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_dataset.py +20 -11
  11. eegdash-0.3.5.dev89/tests/test_minirelease.py +112 -0
  12. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_offline.py +1 -1
  13. eegdash-0.3.5.dev87/eegdash/dataset.py +0 -82
  14. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/LICENSE +0 -0
  15. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/MANIFEST.in +0 -0
  16. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/README.md +0 -0
  17. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/Makefile +0 -0
  18. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/conf.py +0 -0
  19. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/dataset_summary.rst +0 -0
  20. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/index.rst +0 -0
  21. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/install/install.rst +0 -0
  22. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/install/install_pip.rst +0 -0
  23. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/install/install_source.rst +0 -0
  24. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/docs/source/overview.rst +0 -0
  25. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/data_config.py +0 -0
  26. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/dataset_summary.csv +0 -0
  27. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/__init__.py +0 -0
  28. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/datasets.py +0 -0
  29. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/decorators.py +0 -0
  30. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/extractors.py +0 -0
  31. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/__init__.py +0 -0
  32. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/complexity.py +0 -0
  33. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/connectivity.py +0 -0
  34. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/csp.py +0 -0
  35. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/dimensionality.py +0 -0
  36. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/signal.py +0 -0
  37. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/spectral.py +0 -0
  38. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/feature_bank/utils.py +0 -0
  39. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/inspect.py +0 -0
  40. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/serialization.py +0 -0
  41. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/features/utils.py +0 -0
  42. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/mongodb.py +0 -0
  43. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/preprocessing.py +0 -0
  44. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/registry.py +0 -0
  45. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash/utils.py +0 -0
  46. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash.egg-info/dependency_links.txt +0 -0
  47. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/eegdash.egg-info/top_level.txt +0 -0
  48. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/setup.cfg +0 -0
  49. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_api.py +0 -0
  50. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_correctness.py +0 -0
  51. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_dataset_registration.py +0 -0
  52. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_eegdash.py +0 -0
  53. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_functional.py +0 -0
  54. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_init.py +0 -0
  55. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_mongo_connection.py +0 -0
  56. {eegdash-0.3.5.dev87 → eegdash-0.3.5.dev89}/tests/test_query.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.3.5.dev87
3
+ Version: 0.3.5.dev89
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License-Expression: GPL-3.0-only
@@ -60,22 +60,9 @@ Requires-Dist: memory_profiler; extra == "docs"
60
60
  Requires-Dist: ipython; extra == "docs"
61
61
  Requires-Dist: lightgbm; extra == "docs"
62
62
  Provides-Extra: all
63
- Requires-Dist: pre-commit; extra == "all"
64
- Requires-Dist: pytest; extra == "all"
65
- Requires-Dist: pytest-cov; extra == "all"
66
- Requires-Dist: codecov; extra == "all"
67
- Requires-Dist: pytest_cases; extra == "all"
68
- Requires-Dist: pytest-benchmark; extra == "all"
69
- Requires-Dist: sphinx; extra == "all"
70
- Requires-Dist: sphinx_design; extra == "all"
71
- Requires-Dist: sphinx_gallery; extra == "all"
72
- Requires-Dist: sphinx_rtd_theme; extra == "all"
73
- Requires-Dist: pydata-sphinx-theme; extra == "all"
74
- Requires-Dist: sphinx-autobuild; extra == "all"
75
- Requires-Dist: numpydoc; extra == "all"
76
- Requires-Dist: memory_profiler; extra == "all"
77
- Requires-Dist: ipython; extra == "all"
78
- Requires-Dist: lightgbm; extra == "all"
63
+ Requires-Dist: eegdash[docs]; extra == "all"
64
+ Requires-Dist: eegdash[dev]; extra == "all"
65
+ Requires-Dist: eegdash[tests]; extra == "all"
79
66
  Dynamic: license-file
80
67
 
81
68
  # EEG-Dash
@@ -7,4 +7,4 @@ __init__mongo_client()
7
7
 
8
8
  __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset"]
9
9
 
10
- __version__ = "0.3.5.dev87"
10
+ __version__ = "0.3.5.dev89"
@@ -591,7 +591,7 @@ class EEGDashDataset(BaseConcatDataset):
591
591
  self,
592
592
  query: dict | None = None,
593
593
  cache_dir: str = "~/eegdash_cache",
594
- dataset: str | None = None,
594
+ dataset: str | list[str] | None = None,
595
595
  description_fields: list[str] = [
596
596
  "subject",
597
597
  "session",
@@ -669,6 +669,13 @@ class EEGDashDataset(BaseConcatDataset):
669
669
  }
670
670
  base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in query_kwargs}
671
671
 
672
+ # If user provided a dataset name via the dedicated parameter (and we're not
673
+ # loading from a local directory), treat it as a query filter. Accept str or list.
674
+ if data_dir is None and dataset is not None:
675
+ # Allow callers to pass a single dataset id (str) or a list of them.
676
+ # If list is provided, let _build_query_from_kwargs turn it into $in later.
677
+ query_kwargs.setdefault("dataset", dataset)
678
+
672
679
  if query and query_kwargs:
673
680
  raise ValueError(
674
681
  "Provide either a 'query' dictionary or keyword arguments for filtering, not both."
@@ -688,15 +695,20 @@ class EEGDashDataset(BaseConcatDataset):
688
695
  ]
689
696
  elif data_dir:
690
697
  # This path loads from a local directory and is not affected by DB query logic
691
- if isinstance(data_dir, str) or isinstance(data_dir, Path):
698
+ if isinstance(data_dir, (str, Path)):
692
699
  datasets = self.load_bids_dataset(
693
- dataset=dataset,
700
+ dataset=dataset
701
+ if isinstance(dataset, str)
702
+ else (dataset[0] if dataset else None),
694
703
  data_dir=data_dir,
695
704
  description_fields=description_fields,
696
705
  s3_bucket=s3_bucket,
697
706
  **base_dataset_kwargs,
698
707
  )
699
708
  else:
709
+ assert dataset is not None, (
710
+ "dataset must be provided when passing multiple data_dir"
711
+ )
700
712
  assert len(data_dir) == len(dataset), (
701
713
  "Number of datasets and directories must match"
702
714
  )
@@ -748,7 +760,7 @@ class EEGDashDataset(BaseConcatDataset):
748
760
 
749
761
  def find_datasets(
750
762
  self,
751
- query: dict[str, Any],
763
+ query: dict[str, Any] | None,
752
764
  description_fields: list[str],
753
765
  query_kwargs: dict,
754
766
  base_dataset_kwargs: dict,
@@ -2,6 +2,7 @@ import json
2
2
  import logging
3
3
  import os
4
4
  import re
5
+ import traceback
5
6
  import warnings
6
7
  from pathlib import Path
7
8
  from typing import Any
@@ -66,9 +67,7 @@ class EEGDashBaseDataset(BaseDataset):
66
67
  self.s3_open_neuro = True
67
68
 
68
69
  self.filecache = self.cache_dir / record["bidspath"]
69
-
70
70
  self.bids_root = self.cache_dir / record["dataset"]
71
-
72
71
  self.bidspath = BIDSPath(
73
72
  root=self.bids_root,
74
73
  datatype="eeg",
@@ -99,6 +98,9 @@ class EEGDashBaseDataset(BaseDataset):
99
98
  )
100
99
  if not self.s3_open_neuro:
101
100
  self.s3file = re.sub(r"(^|/)ds\d{6}/", r"\1", self.s3file, count=1)
101
+ if self.s3file.endswith(".set"):
102
+ self.s3file = self.s3file[:-4] + ".bdf"
103
+ self.filecache = self.filecache.with_suffix(".bdf")
102
104
 
103
105
  self.filecache.parent.mkdir(parents=True, exist_ok=True)
104
106
  info = filesystem.info(self.s3file)
@@ -132,11 +134,21 @@ class EEGDashBaseDataset(BaseDataset):
132
134
  anon=True, client_kwargs={"region_name": "us-east-2"}
133
135
  )
134
136
  for i, dep in enumerate(self.bids_dependencies):
137
+ if not self.s3_open_neuro:
138
+ # fix this when our bucket is integrated into the
139
+ # mongodb
140
+ # if the file have ".set" replace to ".bdf"
141
+ if dep.endswith(".set"):
142
+ dep = dep[:-4] + ".bdf"
143
+
135
144
  s3path = self.get_s3path(dep)
136
145
  if not self.s3_open_neuro:
137
146
  dep = self.bids_dependencies_original[i]
138
147
 
139
148
  filepath = self.cache_dir / dep
149
+ if not self.s3_open_neuro:
150
+ if self.filecache.suffix == ".set":
151
+ self.filecache = self.filecache.with_suffix(".bdf")
140
152
  # here, we download the dependency and it is fine
141
153
  # in the case of the competition.
142
154
  if not filepath.exists():
@@ -179,9 +191,23 @@ class EEGDashBaseDataset(BaseDataset):
179
191
  # capturing any warnings
180
192
  # to-do: remove this once is fixed on the mne-bids side.
181
193
  with warnings.catch_warnings(record=True) as w:
182
- self._raw = mne_bids.read_raw_bids(
183
- bids_path=self.bidspath, verbose="ERROR"
184
- )
194
+ try:
195
+ # TO-DO: remove this once is fixed on the our side
196
+ if not self.s3_open_neuro:
197
+ self.bidspath = self.bidspath.update(extension=".bdf")
198
+
199
+ self._raw = mne_bids.read_raw_bids(
200
+ bids_path=self.bidspath, verbose="ERROR"
201
+ )
202
+ except Exception as e:
203
+ logger.error(
204
+ f"Error while reading BIDS file: {self.bidspath}\n"
205
+ "This may be due to a missing or corrupted file.\n"
206
+ "Please check the file and try again."
207
+ )
208
+ logger.error(f"Exception: {e}")
209
+ logger.error(traceback.format_exc())
210
+ raise e
185
211
  for warning in w:
186
212
  logger.warning(
187
213
  f"Warning while reading BIDS file: {warning.message}"
@@ -292,7 +318,6 @@ class EEGDashBaseRaw(BaseRaw):
292
318
  )
293
319
 
294
320
  def get_s3path(self, filepath):
295
- print(f"Getting S3 path for {filepath}")
296
321
  return f"{self._AWS_BUCKET}/{filepath}"
297
322
 
298
323
  def _download_s3(self) -> None:
@@ -513,7 +538,6 @@ class EEGBIDSDataset:
513
538
  with os.scandir(directory) as entries:
514
539
  for entry in entries:
515
540
  if entry.is_file() and entry.name.endswith(extension):
516
- print("Adding ", entry.path)
517
541
  result_files.append(entry.path)
518
542
  elif entry.is_dir():
519
543
  # check that entry path doesn't contain any name in ignore list
@@ -0,0 +1,351 @@
1
+ from pathlib import Path
2
+
3
+ from .api import EEGDashDataset
4
+ from .registry import register_openneuro_datasets
5
+
6
+ RELEASE_TO_OPENNEURO_DATASET_MAP = {
7
+ "R11": "ds005516",
8
+ "R10": "ds005515",
9
+ "R9": "ds005514",
10
+ "R8": "ds005512",
11
+ "R7": "ds005511",
12
+ "R6": "ds005510",
13
+ "R4": "ds005508",
14
+ "R5": "ds005509",
15
+ "R3": "ds005507",
16
+ "R2": "ds005506",
17
+ "R1": "ds005505",
18
+ }
19
+
20
+ SUBJECT_MINI_RELEASE_MAP = {
21
+ "R11": [
22
+ "NDARAB678VYW",
23
+ "NDARAG788YV9",
24
+ "NDARAM946HJE",
25
+ "NDARAY977BZT",
26
+ "NDARAZ532KK0",
27
+ "NDARCE912ZXW",
28
+ "NDARCM214WFE",
29
+ "NDARDL033XRG",
30
+ "NDARDT889RT9",
31
+ "NDARDZ794ZVP",
32
+ "NDAREV869CPW",
33
+ "NDARFN221WW5",
34
+ "NDARFV289RKB",
35
+ "NDARFY623ZTE",
36
+ "NDARGA890MKA",
37
+ "NDARHN206XY3",
38
+ "NDARHP518FUR",
39
+ "NDARJL292RYV",
40
+ "NDARKM199DXW",
41
+ "NDARKW236TN7",
42
+ ],
43
+ "R10": [
44
+ "NDARAR935TGZ",
45
+ "NDARAV474ADJ",
46
+ "NDARCB869VM8",
47
+ "NDARCJ667UPL",
48
+ "NDARCM677TC1",
49
+ "NDARET671FTC",
50
+ "NDARKM061NHZ",
51
+ "NDARLD501HDK",
52
+ "NDARLL176DJR",
53
+ "NDARMT791WDH",
54
+ "NDARMW299ZAB",
55
+ "NDARNC405WJA",
56
+ "NDARNP962TJK",
57
+ "NDARPB967KU7",
58
+ "NDARRU560AGK",
59
+ "NDARTB173LY2",
60
+ "NDARUW377KAE",
61
+ "NDARVH565FX9",
62
+ "NDARVP799KGY",
63
+ "NDARVY962GB5",
64
+ ],
65
+ "R9": [
66
+ "NDARAC589YMB",
67
+ "NDARAC853CR6",
68
+ "NDARAH239PGG",
69
+ "NDARAL897CYV",
70
+ "NDARAN160GUF",
71
+ "NDARAP049KXJ",
72
+ "NDARAP457WB5",
73
+ "NDARAW216PM7",
74
+ "NDARBA004KBT",
75
+ "NDARBD328NUQ",
76
+ "NDARBF042LDM",
77
+ "NDARBH019KPD",
78
+ "NDARBH728DFK",
79
+ "NDARBM370JCB",
80
+ "NDARBU183TDJ",
81
+ "NDARBW971DCW",
82
+ "NDARBZ444ZHK",
83
+ "NDARCC620ZFT",
84
+ "NDARCD182XT1",
85
+ "NDARCK113CJM",
86
+ ],
87
+ "R8": [
88
+ "NDARAB514MAJ",
89
+ "NDARAD571FLB",
90
+ "NDARAF003VCL",
91
+ "NDARAG191AE8",
92
+ "NDARAJ977PRJ",
93
+ "NDARAP912JK3",
94
+ "NDARAV454VF0",
95
+ "NDARAY298THW",
96
+ "NDARBJ375VP4",
97
+ "NDARBT436PMT",
98
+ "NDARBV630BK6",
99
+ "NDARCB627KDN",
100
+ "NDARCC059WTH",
101
+ "NDARCM953HKD",
102
+ "NDARCN681CXW",
103
+ "NDARCT889DMB",
104
+ "NDARDJ204EPU",
105
+ "NDARDJ544BU5",
106
+ "NDARDP292DVC",
107
+ "NDARDW178AC6",
108
+ ],
109
+ "R7": [
110
+ "NDARAY475AKD",
111
+ "NDARBW026UGE",
112
+ "NDARCK162REX",
113
+ "NDARCK481KRH",
114
+ "NDARCV378MMX",
115
+ "NDARCX462NVA",
116
+ "NDARDJ970ELG",
117
+ "NDARDU617ZW1",
118
+ "NDAREM609ZXW",
119
+ "NDAREW074ZM2",
120
+ "NDARFE555KXB",
121
+ "NDARFT176NJP",
122
+ "NDARGK442YHH",
123
+ "NDARGM439FZD",
124
+ "NDARGT634DUJ",
125
+ "NDARHE283KZN",
126
+ "NDARHG260BM9",
127
+ "NDARHL684WYU",
128
+ "NDARHN224TPA",
129
+ "NDARHP841RMR",
130
+ ],
131
+ "R6": [
132
+ "NDARAD224CRB",
133
+ "NDARAE301XTM",
134
+ "NDARAT680GJA",
135
+ "NDARCA578CEB",
136
+ "NDARDZ147ETZ",
137
+ "NDARFL793LDE",
138
+ "NDARFX710UZA",
139
+ "NDARGE994BMX",
140
+ "NDARGP191YHN",
141
+ "NDARGV436PFT",
142
+ "NDARHF545HFW",
143
+ "NDARHP039DBU",
144
+ "NDARHT774ZK1",
145
+ "NDARJA830BYV",
146
+ "NDARKB614KGY",
147
+ "NDARKM250ET5",
148
+ "NDARKZ085UKQ",
149
+ "NDARLB581AXF",
150
+ "NDARNJ899HW7",
151
+ "NDARRZ606EDP",
152
+ ],
153
+ "R4": [
154
+ "NDARAC350BZ0",
155
+ "NDARAD615WLJ",
156
+ "NDARAG584XLU",
157
+ "NDARAH503YG1",
158
+ "NDARAX272ZJL",
159
+ "NDARAY461TZZ",
160
+ "NDARBC734UVY",
161
+ "NDARBL444FBA",
162
+ "NDARBT640EBN",
163
+ "NDARBU098PJT",
164
+ "NDARBU928LV0",
165
+ "NDARBV059CGE",
166
+ "NDARCG037CX4",
167
+ "NDARCG947ZC0",
168
+ "NDARCH001CN2",
169
+ "NDARCU001ZN7",
170
+ "NDARCW497XW2",
171
+ "NDARCX053GU5",
172
+ "NDARDF568GL5",
173
+ "NDARDJ092YKH",
174
+ ],
175
+ "R5": [
176
+ "NDARAH793FBF",
177
+ "NDARAJ689BVN",
178
+ "NDARAP785CTE",
179
+ "NDARAU708TL8",
180
+ "NDARBE091BGD",
181
+ "NDARBE103DHM",
182
+ "NDARBF851NH6",
183
+ "NDARBH228RDW",
184
+ "NDARBJ674TVU",
185
+ "NDARBM433VER",
186
+ "NDARCA740UC8",
187
+ "NDARCU633GCZ",
188
+ "NDARCU736GZ1",
189
+ "NDARCU744XWL",
190
+ "NDARDC843HHM",
191
+ "NDARDH086ZKK",
192
+ "NDARDL305BT8",
193
+ "NDARDU853XZ6",
194
+ "NDARDV245WJG",
195
+ "NDAREC480KFA",
196
+ ],
197
+ "R3": [
198
+ "NDARAA948VFH",
199
+ "NDARAD774HAZ",
200
+ "NDARAE828CML",
201
+ "NDARAG340ERT",
202
+ "NDARBA839HLG",
203
+ "NDARBE641DGZ",
204
+ "NDARBG574KF4",
205
+ "NDARBM642JFT",
206
+ "NDARCL016NHB",
207
+ "NDARCV944JA6",
208
+ "NDARCY178KJP",
209
+ "NDARDY150ZP9",
210
+ "NDAREC542MH3",
211
+ "NDAREK549XUQ",
212
+ "NDAREM887YY8",
213
+ "NDARFA815FXE",
214
+ "NDARFF644ZGD",
215
+ "NDARFV557XAA",
216
+ "NDARFV780ABD",
217
+ "NDARGB102NWJ",
218
+ ],
219
+ "R2": [
220
+ "NDARAB793GL3",
221
+ "NDARAM675UR8",
222
+ "NDARBM839WR5",
223
+ "NDARBU730PN8",
224
+ "NDARCT974NAJ",
225
+ "NDARCW933FD5",
226
+ "NDARCZ770BRG",
227
+ "NDARDW741HCF",
228
+ "NDARDZ058NZN",
229
+ "NDAREC377AU2",
230
+ "NDAREM500WWH",
231
+ "NDAREV527ZRF",
232
+ "NDAREV601CE7",
233
+ "NDARFF070XHV",
234
+ "NDARFR108JNB",
235
+ "NDARFT305CG1",
236
+ "NDARGA056TMW",
237
+ "NDARGH775KF5",
238
+ "NDARGJ878ZP4",
239
+ "NDARHA387FPM",
240
+ ],
241
+ "R1": [
242
+ "NDARAC904DMU",
243
+ "NDARAM704GKZ",
244
+ "NDARAP359UM6",
245
+ "NDARBD879MBX",
246
+ "NDARBH024NH2",
247
+ "NDARBK082PDD",
248
+ "NDARCA153NKE",
249
+ "NDARCE721YB5",
250
+ "NDARCJ594BWQ",
251
+ "NDARCN669XPR",
252
+ "NDARCW094JCG",
253
+ "NDARCZ947WU5",
254
+ "NDARDH670PXH",
255
+ "NDARDL511UND",
256
+ "NDARDU986RBM",
257
+ "NDAREM731BYM",
258
+ "NDAREN519BLJ",
259
+ "NDARFK610GY5",
260
+ "NDARFT581ZW5",
261
+ "NDARFW972KFQ",
262
+ ],
263
+ }
264
+
265
+
266
+ class EEGChallengeDataset(EEGDashDataset):
267
+ def __init__(
268
+ self,
269
+ release: str,
270
+ cache_dir: str,
271
+ mini: bool = True,
272
+ query: dict | None = None,
273
+ s3_bucket: str | None = "s3://nmdatasets/NeurIPS25",
274
+ **kwargs,
275
+ ):
276
+ """Create a new EEGDashDataset from a given query or local BIDS dataset directory
277
+ and dataset name. An EEGDashDataset is pooled collection of EEGDashBaseDataset
278
+ instances (individual recordings) and is a subclass of braindecode's BaseConcatDataset.
279
+
280
+ Parameters
281
+ ----------
282
+ release: str
283
+ Release name. Can be one of ["R1", ..., "R11"]
284
+ mini: bool, default True
285
+ Whether to use the mini-release version of the dataset. It is recommended
286
+ to use the mini version for faster training and evaluation.
287
+ query : dict | None
288
+ Optionally a dictionary that specifies a query to be executed,
289
+ in addition to the dataset (automatically inferred from the release argument).
290
+ See EEGDash.find() for details on the query format.
291
+ cache_dir : str
292
+ A directory where the dataset will be cached locally.
293
+ s3_bucket : str | None
294
+ An optional S3 bucket URI to use instead of the
295
+ default OpenNeuro bucket for loading data files.
296
+ kwargs : dict
297
+ Additional keyword arguments to be passed to the EEGDashDataset
298
+ constructor.
299
+
300
+ """
301
+ self.release = release
302
+ self.mini = mini
303
+
304
+ if release not in RELEASE_TO_OPENNEURO_DATASET_MAP:
305
+ raise ValueError(
306
+ f"Unknown release: {release}, expected one of {list(RELEASE_TO_OPENNEURO_DATASET_MAP.keys())}"
307
+ )
308
+
309
+ dataset_parameters = []
310
+ if isinstance(release, str):
311
+ dataset_parameters.append(RELEASE_TO_OPENNEURO_DATASET_MAP[release])
312
+ else:
313
+ raise ValueError(
314
+ f"Unknown release type: {type(release)}, the expected type is str."
315
+ )
316
+
317
+ if query and "dataset" in query:
318
+ raise ValueError(
319
+ "Query using the parameters `dataset` with the class EEGChallengeDataset is not possible."
320
+ "Please use the release argument instead, or the object EEGDashDataset instead."
321
+ )
322
+
323
+ if self.mini:
324
+ if query and "subject" in query:
325
+ raise ValueError(
326
+ "Query using the parameters `subject` with the class EEGChallengeDataset and `mini==True` is not possible."
327
+ "Please don't use the `subject` selection twice."
328
+ "Set `mini=False` to use the `subject` selection."
329
+ )
330
+ kwargs["subject"] = SUBJECT_MINI_RELEASE_MAP[release]
331
+ s3_bucket = f"{s3_bucket}/{release}_mini_L100_bdf"
332
+ else:
333
+ s3_bucket = f"{s3_bucket}/{release}_L100_bdf"
334
+
335
+ super().__init__(
336
+ dataset=dataset_parameters,
337
+ query=query,
338
+ cache_dir=cache_dir,
339
+ s3_bucket=s3_bucket,
340
+ **kwargs,
341
+ )
342
+
343
+
344
+ registered_classes = register_openneuro_datasets(
345
+ summary_file=Path(__file__).with_name("dataset_summary.csv"),
346
+ base_class=EEGDashDataset,
347
+ namespace=globals(),
348
+ )
349
+
350
+
351
+ __all__ = ["EEGChallengeDataset"] + list(registered_classes.keys())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.3.5.dev87
3
+ Version: 0.3.5.dev89
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License-Expression: GPL-3.0-only
@@ -60,22 +60,9 @@ Requires-Dist: memory_profiler; extra == "docs"
60
60
  Requires-Dist: ipython; extra == "docs"
61
61
  Requires-Dist: lightgbm; extra == "docs"
62
62
  Provides-Extra: all
63
- Requires-Dist: pre-commit; extra == "all"
64
- Requires-Dist: pytest; extra == "all"
65
- Requires-Dist: pytest-cov; extra == "all"
66
- Requires-Dist: codecov; extra == "all"
67
- Requires-Dist: pytest_cases; extra == "all"
68
- Requires-Dist: pytest-benchmark; extra == "all"
69
- Requires-Dist: sphinx; extra == "all"
70
- Requires-Dist: sphinx_design; extra == "all"
71
- Requires-Dist: sphinx_gallery; extra == "all"
72
- Requires-Dist: sphinx_rtd_theme; extra == "all"
73
- Requires-Dist: pydata-sphinx-theme; extra == "all"
74
- Requires-Dist: sphinx-autobuild; extra == "all"
75
- Requires-Dist: numpydoc; extra == "all"
76
- Requires-Dist: memory_profiler; extra == "all"
77
- Requires-Dist: ipython; extra == "all"
78
- Requires-Dist: lightgbm; extra == "all"
63
+ Requires-Dist: eegdash[docs]; extra == "all"
64
+ Requires-Dist: eegdash[dev]; extra == "all"
65
+ Requires-Dist: eegdash[tests]; extra == "all"
79
66
  Dynamic: license-file
80
67
 
81
68
  # EEG-Dash
@@ -47,6 +47,7 @@ tests/test_dataset_registration.py
47
47
  tests/test_eegdash.py
48
48
  tests/test_functional.py
49
49
  tests/test_init.py
50
+ tests/test_minirelease.py
50
51
  tests/test_mongo_connection.py
51
52
  tests/test_offline.py
52
53
  tests/test_query.py
@@ -16,22 +16,9 @@ eeglabio
16
16
  tabulate
17
17
 
18
18
  [all]
19
- pre-commit
20
- pytest
21
- pytest-cov
22
- codecov
23
- pytest_cases
24
- pytest-benchmark
25
- sphinx
26
- sphinx_design
27
- sphinx_gallery
28
- sphinx_rtd_theme
29
- pydata-sphinx-theme
30
- sphinx-autobuild
31
- numpydoc
32
- memory_profiler
33
- ipython
34
- lightgbm
19
+ eegdash[docs]
20
+ eegdash[dev]
21
+ eegdash[tests]
35
22
 
36
23
  [dev]
37
24
  pre-commit
@@ -84,22 +84,9 @@ docs = [
84
84
  ]
85
85
 
86
86
  all = [
87
- "pre-commit",
88
- "pytest",
89
- "pytest-cov",
90
- "codecov",
91
- "pytest_cases",
92
- "pytest-benchmark",
93
- "sphinx",
94
- "sphinx_design",
95
- "sphinx_gallery",
96
- "sphinx_rtd_theme",
97
- "pydata-sphinx-theme",
98
- "sphinx-autobuild",
99
- "numpydoc",
100
- "memory_profiler",
101
- "ipython",
102
- "lightgbm",
87
+ "eegdash[docs]",
88
+ "eegdash[dev]",
89
+ "eegdash[tests]",
103
90
  ]
104
91
 
105
92
  [tool.setuptools]
@@ -16,7 +16,7 @@ CACHE_DIR.mkdir(parents=True, exist_ok=True)
16
16
 
17
17
 
18
18
  def _load_release(release):
19
- ds = EEGChallengeDataset(release=release, cache_dir=CACHE_DIR)
19
+ ds = EEGChallengeDataset(release=release, mini=False, cache_dir=CACHE_DIR)
20
20
  getattr(ds, "description", None)
21
21
  return ds
22
22
 
@@ -31,10 +31,10 @@ def warmed_mongo():
31
31
 
32
32
  def test_eeg_challenge_dataset_initialization():
33
33
  """Test the initialization of EEGChallengeDataset."""
34
- dataset = EEGChallengeDataset(release="R5", cache_dir=CACHE_DIR)
34
+ dataset = EEGChallengeDataset(release="R5", mini=False, cache_dir=CACHE_DIR)
35
35
 
36
36
  release = "R5"
37
- expected_bucket_prefix = f"s3://nmdatasets/NeurIPS25/{release}_L100"
37
+ expected_bucket_prefix = f"s3://nmdatasets/NeurIPS25/{release}_L100_bdf"
38
38
  assert dataset.s3_bucket == expected_bucket_prefix, (
39
39
  f"Unexpected s3_bucket: {dataset.s3_bucket} (expected {expected_bucket_prefix})"
40
40
  )
@@ -60,7 +60,7 @@ def test_eeg_challenge_dataset_initialization():
60
60
 
61
61
  @pytest.mark.parametrize("release, number_files", RELEASE_FILES)
62
62
  def test_eeg_challenge_dataset_amount_files(release, number_files):
63
- dataset = EEGChallengeDataset(release=release, cache_dir=CACHE_DIR)
63
+ dataset = EEGChallengeDataset(release=release, mini=False, cache_dir=CACHE_DIR)
64
64
  assert len(dataset.datasets) == number_files
65
65
 
66
66
 
@@ -88,22 +88,31 @@ def test_mongodb_load_under_sometime(release):
88
88
  assert duration < 30, f"{release} took {duration:.2f}s"
89
89
 
90
90
 
91
- def test_consuming_data_r5():
91
+ @pytest.mark.parametrize("mini", [True, False])
92
+ @pytest.mark.parametrize("release", RELEASES)
93
+ def test_consuming_one_raw(release, mini):
94
+ if mini:
95
+ cache_dir = CACHE_DIR / "mini"
96
+ cache_dir.mkdir(parents=True, exist_ok=True)
97
+ else:
98
+ cache_dir = CACHE_DIR
92
99
  dataset_obj = EEGChallengeDataset(
93
- release="R5",
94
- query=dict(task="RestingState", subject="NDARAC350XUM"),
95
- cache_dir=CACHE_DIR,
100
+ release=release,
101
+ task="RestingState",
102
+ cache_dir=cache_dir,
103
+ mini=mini,
96
104
  )
97
105
  raw = dataset_obj.datasets[0].raw
98
106
  assert raw is not None
99
107
 
100
108
 
101
109
  @pytest.mark.parametrize("eeg_dash_instance", [None, EEGDash()])
102
- def test_eeg_dash_integration(eeg_dash_instance):
110
+ def test_eeg_dash_integration(eeg_dash_instance, release="R5", mini=True):
103
111
  dataset_obj = EEGChallengeDataset(
104
- release="R5",
105
- query=dict(task="RestingState", subject="NDARAC350XUM"),
112
+ release=release,
113
+ task="RestingState",
106
114
  cache_dir=CACHE_DIR,
115
+ mini=mini,
107
116
  eeg_dash_instance=eeg_dash_instance,
108
117
  )
109
118
  raw = dataset_obj.datasets[0].raw
@@ -0,0 +1,112 @@
1
+ from pathlib import Path
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ from eegdash.dataset import EEGChallengeDataset
7
+
8
+ # Shared cache directory constant for all tests in the suite.
9
+ EEG_CHALLENGE_CACHE_DIR = (
10
+ Path.home() / "mne_data" / "eeg_challenge_cache" / "mini"
11
+ ).resolve()
12
+ EEG_CHALLENGE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
13
+
14
+
15
+ @pytest.fixture(scope="session")
16
+ def warmed_mongo():
17
+ """Skip tests gracefully if Mongo is not reachable."""
18
+ try:
19
+ # Lazy import to avoid circulars; constructing EEGChallengeDataset will touch DB
20
+ _ = EEGChallengeDataset(
21
+ release="R5", mini=True, cache_dir=EEG_CHALLENGE_CACHE_DIR
22
+ )
23
+ except Exception:
24
+ pytest.skip("Mongo not reachable")
25
+
26
+
27
+ def test_minirelease_vs_full_counts_and_subjects(warmed_mongo):
28
+ """Mini release should have fewer files and (typically) fewer subjects than full release."""
29
+ release = "R5"
30
+
31
+ ds_mini = EEGChallengeDataset(
32
+ release=release, mini=True, cache_dir=EEG_CHALLENGE_CACHE_DIR
33
+ )
34
+ ds_full = EEGChallengeDataset(
35
+ release=release, mini=False, cache_dir=EEG_CHALLENGE_CACHE_DIR
36
+ )
37
+
38
+ # File count: mini must be strictly smaller than full
39
+ assert len(ds_mini.datasets) < len(ds_full.datasets)
40
+
41
+ # Subject cardinality: mini should be strictly less than full, and > 0
42
+ subj_mini = ds_mini.description["subject"].nunique()
43
+ subj_full = ds_full.description["subject"].nunique()
44
+ assert subj_mini > 0
45
+ assert subj_mini < subj_full
46
+
47
+
48
+ def test_minirelease_subject_raw_equivalence(warmed_mongo):
49
+ """For a subject present in the mini set, loading that subject in mini vs full yields identical raw data."""
50
+ release = "R5"
51
+
52
+ # Pick a concrete subject from the mini set to avoid guessing
53
+ ds_mini_all = EEGChallengeDataset(
54
+ release=release, mini=True, cache_dir=EEG_CHALLENGE_CACHE_DIR
55
+ )
56
+ assert len(ds_mini_all.datasets) > 0
57
+ subject = ds_mini_all.description["subject"].iloc[0]
58
+
59
+ ds_mini = EEGChallengeDataset(
60
+ release=release, mini=True, cache_dir=EEG_CHALLENGE_CACHE_DIR, subject=subject
61
+ )
62
+ ds_full = EEGChallengeDataset(
63
+ release=release, mini=False, cache_dir=EEG_CHALLENGE_CACHE_DIR, subject=subject
64
+ )
65
+
66
+ assert len(ds_mini.datasets) > 0
67
+ assert len(ds_full.datasets) > 0
68
+
69
+ # Identify a common BIDS file (bidspath) present in both (bucket prefixes differ between mini/full)
70
+ mini_paths = {d.record["bidspath"] for d in ds_mini.datasets}
71
+ full_paths = {d.record["bidspath"] for d in ds_full.datasets}
72
+ intersection = mini_paths & full_paths
73
+ assert intersection, "No common recordings found for the chosen subject"
74
+
75
+ common_path = next(iter(intersection))
76
+ mini_idx = next(
77
+ i for i, d in enumerate(ds_mini.datasets) if d.record["bidspath"] == common_path
78
+ )
79
+ full_idx = next(
80
+ i for i, d in enumerate(ds_full.datasets) if d.record["bidspath"] == common_path
81
+ )
82
+
83
+ raw_mini = ds_mini.datasets[mini_idx].raw
84
+ raw_full = ds_full.datasets[full_idx].raw
85
+
86
+ # Basic metadata equivalence
87
+ assert raw_mini.info["sfreq"] == raw_full.info["sfreq"]
88
+ assert raw_mini.info["nchan"] == raw_full.info["nchan"]
89
+ assert raw_mini.ch_names == raw_full.ch_names
90
+
91
+ # Compare a small data slice to ensure content equality (avoid loading entire arrays into memory)
92
+ n_samples = min(1000, raw_mini.n_times, raw_full.n_times)
93
+ assert n_samples > 0
94
+ data_mini = raw_mini.get_data(picks=[0], start=0, stop=n_samples)
95
+ data_full = raw_full.get_data(picks=[0], start=0, stop=n_samples)
96
+ assert np.allclose(data_mini, data_full, rtol=1e-6, atol=0), (
97
+ "Raw data mismatch between mini and full"
98
+ )
99
+
100
+
101
+ def test_minirelease_consume_everything(warmed_mongo):
102
+ """Simply try to load all data in the mini release to catch any errors."""
103
+ release = "R5"
104
+ ds_mini = EEGChallengeDataset(
105
+ release=release, mini=True, cache_dir=EEG_CHALLENGE_CACHE_DIR
106
+ )
107
+
108
+ for dataset in ds_mini.datasets:
109
+ raw = dataset.raw # noqa: F841
110
+ description = dataset.description # noqa: F841
111
+ assert raw is not None
112
+ assert description is not None
@@ -4,7 +4,7 @@ import pytest
4
4
 
5
5
  from eegdash import EEGDash, EEGDashDataset
6
6
 
7
- CACHE_DIR = (Path.home() / "mne_data" / "eeg_challenge_cache").resolve()
7
+ CACHE_DIR = (Path.home() / "mne_data" / "openneuro").resolve()
8
8
  CACHE_DIR.mkdir(parents=True, exist_ok=True)
9
9
 
10
10
 
@@ -1,82 +0,0 @@
1
- from pathlib import Path
2
-
3
- from .api import EEGDashDataset
4
- from .registry import register_openneuro_datasets
5
-
6
- RELEASE_TO_OPENNEURO_DATASET_MAP = {
7
- "R11": "ds005516",
8
- "R10": "ds005515",
9
- "R9": "ds005514",
10
- "R8": "ds005512",
11
- "R7": "ds005511",
12
- "R6": "ds005510",
13
- "R4": "ds005508",
14
- "R5": "ds005509",
15
- "R3": "ds005507",
16
- "R2": "ds005506",
17
- "R1": "ds005505",
18
- }
19
-
20
-
21
- class EEGChallengeDataset(EEGDashDataset):
22
- def __init__(
23
- self,
24
- release: str,
25
- cache_dir: str,
26
- query: dict | None = None,
27
- s3_bucket: str | None = "s3://nmdatasets/NeurIPS25",
28
- **kwargs,
29
- ):
30
- """Create a new EEGDashDataset from a given query or local BIDS dataset directory
31
- and dataset name. An EEGDashDataset is pooled collection of EEGDashBaseDataset
32
- instances (individual recordings) and is a subclass of braindecode's BaseConcatDataset.
33
-
34
- Parameters
35
- ----------
36
- release: str
37
- Release name. Can be one of ["R1", ..., "R11"]
38
- query : dict | None
39
- Optionally a dictionary that specifies a query to be executed,
40
- in addition to the dataset (automatically inferred from the release argument).
41
- See EEGDash.find() for details on the query format.
42
- cache_dir : str
43
- A directory where the dataset will be cached locally.
44
- s3_bucket : str | None
45
- An optional S3 bucket URI to use instead of the
46
- default OpenNeuro bucket for loading data files.
47
- kwargs : dict
48
- Additional keyword arguments to be passed to the EEGDashDataset
49
- constructor.
50
-
51
- """
52
- self.release = release
53
- if release not in RELEASE_TO_OPENNEURO_DATASET_MAP:
54
- raise ValueError(f"Unknown release: {release}")
55
-
56
- dataset = RELEASE_TO_OPENNEURO_DATASET_MAP[release]
57
- if query is None:
58
- query = {"dataset": dataset}
59
- elif "dataset" not in query:
60
- query["dataset"] = dataset
61
- elif query["dataset"] != dataset:
62
- raise ValueError(
63
- f"Query dataset {query['dataset']} does not match the release {release} "
64
- f"which corresponds to dataset {dataset}."
65
- )
66
-
67
- super().__init__(
68
- query=query,
69
- cache_dir=cache_dir,
70
- s3_bucket=f"{s3_bucket}/{release}_L100",
71
- **kwargs,
72
- )
73
-
74
-
75
- registered_classes = register_openneuro_datasets(
76
- summary_file=Path(__file__).with_name("dataset_summary.csv"),
77
- base_class=EEGDashDataset,
78
- namespace=globals(),
79
- )
80
-
81
-
82
- __all__ = ["EEGChallengeDataset"] + list(registered_classes.keys())
File without changes
File without changes
File without changes
File without changes