eegdash 0.3.6.dev182011805__py3-none-any.whl → 0.3.7.dev104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

eegdash/__init__.py CHANGED
@@ -1,10 +1,11 @@
1
1
  from .api import EEGDash, EEGDashDataset
2
2
  from .dataset import EEGChallengeDataset
3
+ from .hbn import preprocessing # noqa: F401
3
4
  from .utils import __init__mongo_client
4
5
 
5
6
  __init__mongo_client()
6
7
 
7
8
 
8
- __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset"]
9
+ __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset", "preprocessing"]
9
10
 
10
- __version__ = "0.3.6.dev182011805"
11
+ __version__ = "0.3.7.dev104"
eegdash/api.py CHANGED
@@ -6,15 +6,18 @@ from typing import Any, Mapping
6
6
 
7
7
  import mne
8
8
  import numpy as np
9
+ import platformdirs
9
10
  import xarray as xr
10
11
  from dotenv import load_dotenv
11
12
  from joblib import Parallel, delayed
13
+ from mne.utils import warn
12
14
  from mne_bids import get_bids_path_from_fname, read_raw_bids
13
15
  from pymongo import InsertOne, UpdateOne
14
16
  from s3fs import S3FileSystem
15
17
 
16
18
  from braindecode.datasets import BaseConcatDataset
17
19
 
20
+ from .const import RELEASE_TO_OPENNEURO_DATASET_MAP
18
21
  from .data_config import config as data_config
19
22
  from .data_utils import EEGBIDSDataset, EEGDashBaseDataset
20
23
  from .mongodb import MongoConnectionManager
@@ -693,9 +696,8 @@ class EEGDash:
693
696
  class EEGDashDataset(BaseConcatDataset):
694
697
  def __init__(
695
698
  self,
696
- query: dict | None = None,
697
- cache_dir: str = "~/eegdash_cache",
698
- dataset: str | list[str] | None = None,
699
+ cache_dir: str | Path,
700
+ query: dict[str, Any] = None,
699
701
  description_fields: list[str] = [
700
702
  "subject",
701
703
  "session",
@@ -706,9 +708,10 @@ class EEGDashDataset(BaseConcatDataset):
706
708
  "sex",
707
709
  ],
708
710
  s3_bucket: str | None = None,
709
- data_dir: str | None = None,
710
711
  eeg_dash_instance=None,
711
712
  records: list[dict] | None = None,
713
+ offline_mode: bool = False,
714
+ n_jobs: int = -1,
712
715
  **kwargs,
713
716
  ):
714
717
  """Create a new EEGDashDataset from a given query or local BIDS dataset directory
@@ -754,35 +757,54 @@ class EEGDashDataset(BaseConcatDataset):
754
757
  records : list[dict] | None
755
758
  Optional list of pre-fetched metadata records. If provided, the dataset is
756
759
  constructed directly from these records without querying MongoDB.
760
+ offline_mode : bool
761
+ If True, do not attempt to query MongoDB at all. This is useful if you want to
762
+ work with a local cache only, or if you are offline.
763
+ n_jobs : int
764
+ The number of jobs to run in parallel (default is -1, meaning using all processors).
757
765
  kwargs : dict
758
766
  Additional keyword arguments to be passed to the EEGDashBaseDataset
759
767
  constructor.
760
768
 
761
769
  """
762
- self.cache_dir = cache_dir
770
+ self.cache_dir = Path(cache_dir or platformdirs.user_cache_dir("EEGDash"))
771
+ if not self.cache_dir.exists():
772
+ warn(f"Cache directory does not exist, creating it: {self.cache_dir}")
773
+ self.cache_dir.mkdir(exist_ok=True, parents=True)
763
774
  self.s3_bucket = s3_bucket
764
775
  self.eeg_dash = eeg_dash_instance
776
+
777
+ # Separate query kwargs from other kwargs passed to the BaseDataset constructor
778
+ self.query = query or {}
779
+ self.query.update(
780
+ {k: v for k, v in kwargs.items() if k in EEGDash._ALLOWED_QUERY_FIELDS}
781
+ )
782
+ base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in self.query}
783
+ if "dataset" not in self.query:
784
+ raise ValueError("You must provide a 'dataset' argument")
785
+
786
+ self.data_dir = self.cache_dir / self.query["dataset"]
787
+ if self.query["dataset"] in RELEASE_TO_OPENNEURO_DATASET_MAP.values():
788
+ warn(
789
+ "If you are not participating in the competition, you can ignore this warning!"
790
+ "\n\n"
791
+ "EEG 2025 Competition Data Notice:\n"
792
+ "---------------------------------\n"
793
+ " You are loading the dataset that is used in the EEG 2025 Competition:\n"
794
+ "IMPORTANT: The data accessed via `EEGDashDataset` is NOT identical to what you get from `EEGChallengeDataset` object directly.\n"
795
+ "and it is not what you will use for the competition. Downsampling and filtering were applied to the data"
796
+ "to allow more people to participate.\n"
797
+ "\n"
798
+ "If you are participating in the competition, always use `EEGChallengeDataset` to ensure consistency with the challenge data.\n"
799
+ "\n",
800
+ UserWarning,
801
+ module="eegdash",
802
+ )
765
803
  _owns_client = False
766
804
  if self.eeg_dash is None and records is None:
767
805
  self.eeg_dash = EEGDash()
768
806
  _owns_client = True
769
807
 
770
- # Separate query kwargs from other kwargs passed to the BaseDataset constructor
771
- query_kwargs = {
772
- k: v for k, v in kwargs.items() if k in EEGDash._ALLOWED_QUERY_FIELDS
773
- }
774
- base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in query_kwargs}
775
-
776
- # If user provided a dataset name via the dedicated parameter (and we're not
777
- # loading from a local directory), treat it as a query filter. Accept str or list.
778
- if data_dir is None and dataset is not None:
779
- # Allow callers to pass a single dataset id (str) or a list of them.
780
- # If list is provided, let _build_query_from_kwargs turn it into $in later.
781
- query_kwargs.setdefault("dataset", dataset)
782
-
783
- # Allow mixing raw DB query with additional keyword filters. Both will be
784
- # merged by EEGDash.find() (logical AND), so we do not raise here.
785
-
786
808
  try:
787
809
  if records is not None:
788
810
  self.records = records
@@ -795,42 +817,26 @@ class EEGDashDataset(BaseConcatDataset):
795
817
  )
796
818
  for record in self.records
797
819
  ]
798
- elif data_dir:
799
- # This path loads from a local directory and is not affected by DB query logic
800
- if isinstance(data_dir, (str, Path)):
801
- datasets = self.load_bids_dataset(
802
- dataset=dataset
803
- if isinstance(dataset, str)
804
- else (dataset[0] if dataset else None),
805
- data_dir=data_dir,
820
+ elif offline_mode: # only assume local data is complete if in offline mode
821
+ if self.data_dir.exists():
822
+ # This path loads from a local directory and is not affected by DB query logic
823
+ datasets = self.load_bids_daxtaset(
824
+ dataset=self.query["dataset"],
825
+ data_dir=self.data_dir,
806
826
  description_fields=description_fields,
807
827
  s3_bucket=s3_bucket,
828
+ n_jobs=n_jobs,
808
829
  **base_dataset_kwargs,
809
830
  )
810
831
  else:
811
- assert dataset is not None, (
812
- "dataset must be provided when passing multiple data_dir"
813
- )
814
- assert len(data_dir) == len(dataset), (
815
- "Number of datasets and directories must match"
832
+ raise ValueError(
833
+ f"Offline mode is enabled, but local data_dir {self.data_dir} does not exist."
816
834
  )
817
- datasets = []
818
- for i, _ in enumerate(data_dir):
819
- datasets.extend(
820
- self.load_bids_dataset(
821
- dataset=dataset[i],
822
- data_dir=data_dir[i],
823
- description_fields=description_fields,
824
- s3_bucket=s3_bucket,
825
- **base_dataset_kwargs,
826
- )
827
- )
828
- elif query is not None or query_kwargs:
835
+ elif self.query:
829
836
  # This is the DB query path that we are improving
830
- datasets = self.find_datasets(
831
- query=query,
837
+ datasets = self._find_datasets(
838
+ query=self.eeg_dash._build_query_from_kwargs(**self.query),
832
839
  description_fields=description_fields,
833
- query_kwargs=query_kwargs,
834
840
  base_dataset_kwargs=base_dataset_kwargs,
835
841
  )
836
842
  # We only need filesystem if we need to access S3
@@ -860,11 +866,10 @@ class EEGDashDataset(BaseConcatDataset):
860
866
  return result
861
867
  return None
862
868
 
863
- def find_datasets(
869
+ def _find_datasets(
864
870
  self,
865
871
  query: dict[str, Any] | None,
866
872
  description_fields: list[str],
867
- query_kwargs: dict,
868
873
  base_dataset_kwargs: dict,
869
874
  ) -> list[EEGDashBaseDataset]:
870
875
  """Helper method to find datasets in the MongoDB collection that satisfy the
@@ -888,11 +893,7 @@ class EEGDashDataset(BaseConcatDataset):
888
893
  """
889
894
  datasets: list[EEGDashBaseDataset] = []
890
895
 
891
- # Build records using either a raw query OR keyword filters, but not both.
892
- # Note: callers may accidentally pass an empty dict for `query` along with
893
- # kwargs. In that case, treat it as if no query was provided and rely on kwargs.
894
- # Always delegate merging of raw query + kwargs to EEGDash.find
895
- self.records = self.eeg_dash.find(query, **query_kwargs)
896
+ self.records = self.eeg_dash.find(query)
896
897
 
897
898
  for record in self.records:
898
899
  description = {}
@@ -903,8 +904,8 @@ class EEGDashDataset(BaseConcatDataset):
903
904
  datasets.append(
904
905
  EEGDashBaseDataset(
905
906
  record,
906
- self.cache_dir,
907
- self.s3_bucket,
907
+ cache_dir=self.cache_dir,
908
+ s3_bucket=self.s3_bucket,
908
909
  description=description,
909
910
  **base_dataset_kwargs,
910
911
  )
@@ -917,6 +918,7 @@ class EEGDashDataset(BaseConcatDataset):
917
918
  data_dir: str | Path,
918
919
  description_fields: list[str],
919
920
  s3_bucket: str | None = None,
921
+ n_jobs: int = -1,
920
922
  **kwargs,
921
923
  ):
922
924
  """Helper method to load a single local BIDS dataset and return it as a list of
@@ -931,13 +933,17 @@ class EEGDashDataset(BaseConcatDataset):
931
933
  description_fields : list[str]
932
934
  A list of fields to be extracted from the dataset records
933
935
  and included in the returned dataset description(s).
936
+ s3_bucket : str | None
937
+ The S3 bucket to upload the dataset files to (if any).
938
+ n_jobs : int
939
+ The number of jobs to run in parallel (default is -1, meaning using all processors).
934
940
 
935
941
  """
936
942
  bids_dataset = EEGBIDSDataset(
937
943
  data_dir=data_dir,
938
944
  dataset=dataset,
939
945
  )
940
- datasets = Parallel(n_jobs=-1, prefer="threads", verbose=1)(
946
+ datasets = Parallel(n_jobs=n_jobs, prefer="threads", verbose=1)(
941
947
  delayed(self.get_base_dataset_from_bids_file)(
942
948
  bids_dataset=bids_dataset,
943
949
  bids_file=bids_file,
eegdash/const.py ADDED
@@ -0,0 +1,258 @@
1
+ RELEASE_TO_OPENNEURO_DATASET_MAP = {
2
+ "R11": "ds005516",
3
+ "R10": "ds005515",
4
+ "R9": "ds005514",
5
+ "R8": "ds005512",
6
+ "R7": "ds005511",
7
+ "R6": "ds005510",
8
+ "R4": "ds005508",
9
+ "R5": "ds005509",
10
+ "R3": "ds005507",
11
+ "R2": "ds005506",
12
+ "R1": "ds005505",
13
+ }
14
+
15
+ SUBJECT_MINI_RELEASE_MAP = {
16
+ "R11": [
17
+ "NDARAB678VYW",
18
+ "NDARAG788YV9",
19
+ "NDARAM946HJE",
20
+ "NDARAY977BZT",
21
+ "NDARAZ532KK0",
22
+ "NDARCE912ZXW",
23
+ "NDARCM214WFE",
24
+ "NDARDL033XRG",
25
+ "NDARDT889RT9",
26
+ "NDARDZ794ZVP",
27
+ "NDAREV869CPW",
28
+ "NDARFN221WW5",
29
+ "NDARFV289RKB",
30
+ "NDARFY623ZTE",
31
+ "NDARGA890MKA",
32
+ "NDARHN206XY3",
33
+ "NDARHP518FUR",
34
+ "NDARJL292RYV",
35
+ "NDARKM199DXW",
36
+ "NDARKW236TN7",
37
+ ],
38
+ "R10": [
39
+ "NDARAR935TGZ",
40
+ "NDARAV474ADJ",
41
+ "NDARCB869VM8",
42
+ "NDARCJ667UPL",
43
+ "NDARCM677TC1",
44
+ "NDARET671FTC",
45
+ "NDARKM061NHZ",
46
+ "NDARLD501HDK",
47
+ "NDARLL176DJR",
48
+ "NDARMT791WDH",
49
+ "NDARMW299ZAB",
50
+ "NDARNC405WJA",
51
+ "NDARNP962TJK",
52
+ "NDARPB967KU7",
53
+ "NDARRU560AGK",
54
+ "NDARTB173LY2",
55
+ "NDARUW377KAE",
56
+ "NDARVH565FX9",
57
+ "NDARVP799KGY",
58
+ "NDARVY962GB5",
59
+ ],
60
+ "R9": [
61
+ "NDARAC589YMB",
62
+ "NDARAC853CR6",
63
+ "NDARAH239PGG",
64
+ "NDARAL897CYV",
65
+ "NDARAN160GUF",
66
+ "NDARAP049KXJ",
67
+ "NDARAP457WB5",
68
+ "NDARAW216PM7",
69
+ "NDARBA004KBT",
70
+ "NDARBD328NUQ",
71
+ "NDARBF042LDM",
72
+ "NDARBH019KPD",
73
+ "NDARBH728DFK",
74
+ "NDARBM370JCB",
75
+ "NDARBU183TDJ",
76
+ "NDARBW971DCW",
77
+ "NDARBZ444ZHK",
78
+ "NDARCC620ZFT",
79
+ "NDARCD182XT1",
80
+ "NDARCK113CJM",
81
+ ],
82
+ "R8": [
83
+ "NDARAB514MAJ",
84
+ "NDARAD571FLB",
85
+ "NDARAF003VCL",
86
+ "NDARAG191AE8",
87
+ "NDARAJ977PRJ",
88
+ "NDARAP912JK3",
89
+ "NDARAV454VF0",
90
+ "NDARAY298THW",
91
+ "NDARBJ375VP4",
92
+ "NDARBT436PMT",
93
+ "NDARBV630BK6",
94
+ "NDARCB627KDN",
95
+ "NDARCC059WTH",
96
+ "NDARCM953HKD",
97
+ "NDARCN681CXW",
98
+ "NDARCT889DMB",
99
+ "NDARDJ204EPU",
100
+ "NDARDJ544BU5",
101
+ "NDARDP292DVC",
102
+ "NDARDW178AC6",
103
+ ],
104
+ "R7": [
105
+ "NDARAY475AKD",
106
+ "NDARBW026UGE",
107
+ "NDARCK162REX",
108
+ "NDARCK481KRH",
109
+ "NDARCV378MMX",
110
+ "NDARCX462NVA",
111
+ "NDARDJ970ELG",
112
+ "NDARDU617ZW1",
113
+ "NDAREM609ZXW",
114
+ "NDAREW074ZM2",
115
+ "NDARFE555KXB",
116
+ "NDARFT176NJP",
117
+ "NDARGK442YHH",
118
+ "NDARGM439FZD",
119
+ "NDARGT634DUJ",
120
+ "NDARHE283KZN",
121
+ "NDARHG260BM9",
122
+ "NDARHL684WYU",
123
+ "NDARHN224TPA",
124
+ "NDARHP841RMR",
125
+ ],
126
+ "R6": [
127
+ "NDARAD224CRB",
128
+ "NDARAE301XTM",
129
+ "NDARAT680GJA",
130
+ "NDARCA578CEB",
131
+ "NDARDZ147ETZ",
132
+ "NDARFL793LDE",
133
+ "NDARFX710UZA",
134
+ "NDARGE994BMX",
135
+ "NDARGP191YHN",
136
+ "NDARGV436PFT",
137
+ "NDARHF545HFW",
138
+ "NDARHP039DBU",
139
+ "NDARHT774ZK1",
140
+ "NDARJA830BYV",
141
+ "NDARKB614KGY",
142
+ "NDARKM250ET5",
143
+ "NDARKZ085UKQ",
144
+ "NDARLB581AXF",
145
+ "NDARNJ899HW7",
146
+ "NDARRZ606EDP",
147
+ ],
148
+ "R4": [
149
+ "NDARAC350BZ0",
150
+ "NDARAD615WLJ",
151
+ "NDARAG584XLU",
152
+ "NDARAH503YG1",
153
+ "NDARAX272ZJL",
154
+ "NDARAY461TZZ",
155
+ "NDARBC734UVY",
156
+ "NDARBL444FBA",
157
+ "NDARBT640EBN",
158
+ "NDARBU098PJT",
159
+ "NDARBU928LV0",
160
+ "NDARBV059CGE",
161
+ "NDARCG037CX4",
162
+ "NDARCG947ZC0",
163
+ "NDARCH001CN2",
164
+ "NDARCU001ZN7",
165
+ "NDARCW497XW2",
166
+ "NDARCX053GU5",
167
+ "NDARDF568GL5",
168
+ "NDARDJ092YKH",
169
+ ],
170
+ "R5": [
171
+ "NDARAH793FBF",
172
+ "NDARAJ689BVN",
173
+ "NDARAP785CTE",
174
+ "NDARAU708TL8",
175
+ "NDARBE091BGD",
176
+ "NDARBE103DHM",
177
+ "NDARBF851NH6",
178
+ "NDARBH228RDW",
179
+ "NDARBJ674TVU",
180
+ "NDARBM433VER",
181
+ "NDARCA740UC8",
182
+ "NDARCU633GCZ",
183
+ "NDARCU736GZ1",
184
+ "NDARCU744XWL",
185
+ "NDARDC843HHM",
186
+ "NDARDH086ZKK",
187
+ "NDARDL305BT8",
188
+ "NDARDU853XZ6",
189
+ "NDARDV245WJG",
190
+ "NDAREC480KFA",
191
+ ],
192
+ "R3": [
193
+ "NDARAA948VFH",
194
+ "NDARAD774HAZ",
195
+ "NDARAE828CML",
196
+ "NDARAG340ERT",
197
+ "NDARBA839HLG",
198
+ "NDARBE641DGZ",
199
+ "NDARBG574KF4",
200
+ "NDARBM642JFT",
201
+ "NDARCL016NHB",
202
+ "NDARCV944JA6",
203
+ "NDARCY178KJP",
204
+ "NDARDY150ZP9",
205
+ "NDAREC542MH3",
206
+ "NDAREK549XUQ",
207
+ "NDAREM887YY8",
208
+ "NDARFA815FXE",
209
+ "NDARFF644ZGD",
210
+ "NDARFV557XAA",
211
+ "NDARFV780ABD",
212
+ "NDARGB102NWJ",
213
+ ],
214
+ "R2": [
215
+ "NDARAB793GL3",
216
+ "NDARAM675UR8",
217
+ "NDARBM839WR5",
218
+ "NDARBU730PN8",
219
+ "NDARCT974NAJ",
220
+ "NDARCW933FD5",
221
+ "NDARCZ770BRG",
222
+ "NDARDW741HCF",
223
+ "NDARDZ058NZN",
224
+ "NDAREC377AU2",
225
+ "NDAREM500WWH",
226
+ "NDAREV527ZRF",
227
+ "NDAREV601CE7",
228
+ "NDARFF070XHV",
229
+ "NDARFR108JNB",
230
+ "NDARFT305CG1",
231
+ "NDARGA056TMW",
232
+ "NDARGH775KF5",
233
+ "NDARGJ878ZP4",
234
+ "NDARHA387FPM",
235
+ ],
236
+ "R1": [
237
+ "NDARAC904DMU",
238
+ "NDARAM704GKZ",
239
+ "NDARAP359UM6",
240
+ "NDARBD879MBX",
241
+ "NDARBH024NH2",
242
+ "NDARBK082PDD",
243
+ "NDARCA153NKE",
244
+ "NDARCE721YB5",
245
+ "NDARCJ594BWQ",
246
+ "NDARCN669XPR",
247
+ "NDARCW094JCG",
248
+ "NDARCZ947WU5",
249
+ "NDARDH670PXH",
250
+ "NDARDL511UND",
251
+ "NDARDU986RBM",
252
+ "NDAREM731BYM",
253
+ "NDAREN519BLJ",
254
+ "NDARFK610GY5",
255
+ "NDARFT581ZW5",
256
+ "NDARFW972KFQ",
257
+ ],
258
+ }
eegdash/dataset.py CHANGED
@@ -1,266 +1,13 @@
1
+ import logging
1
2
  from pathlib import Path
2
3
 
4
+ from mne.utils import warn
5
+
3
6
  from .api import EEGDashDataset
7
+ from .const import RELEASE_TO_OPENNEURO_DATASET_MAP, SUBJECT_MINI_RELEASE_MAP
4
8
  from .registry import register_openneuro_datasets
5
9
 
6
- RELEASE_TO_OPENNEURO_DATASET_MAP = {
7
- "R11": "ds005516",
8
- "R10": "ds005515",
9
- "R9": "ds005514",
10
- "R8": "ds005512",
11
- "R7": "ds005511",
12
- "R6": "ds005510",
13
- "R4": "ds005508",
14
- "R5": "ds005509",
15
- "R3": "ds005507",
16
- "R2": "ds005506",
17
- "R1": "ds005505",
18
- }
19
-
20
- SUBJECT_MINI_RELEASE_MAP = {
21
- "R11": [
22
- "NDARAB678VYW",
23
- "NDARAG788YV9",
24
- "NDARAM946HJE",
25
- "NDARAY977BZT",
26
- "NDARAZ532KK0",
27
- "NDARCE912ZXW",
28
- "NDARCM214WFE",
29
- "NDARDL033XRG",
30
- "NDARDT889RT9",
31
- "NDARDZ794ZVP",
32
- "NDAREV869CPW",
33
- "NDARFN221WW5",
34
- "NDARFV289RKB",
35
- "NDARFY623ZTE",
36
- "NDARGA890MKA",
37
- "NDARHN206XY3",
38
- "NDARHP518FUR",
39
- "NDARJL292RYV",
40
- "NDARKM199DXW",
41
- "NDARKW236TN7",
42
- ],
43
- "R10": [
44
- "NDARAR935TGZ",
45
- "NDARAV474ADJ",
46
- "NDARCB869VM8",
47
- "NDARCJ667UPL",
48
- "NDARCM677TC1",
49
- "NDARET671FTC",
50
- "NDARKM061NHZ",
51
- "NDARLD501HDK",
52
- "NDARLL176DJR",
53
- "NDARMT791WDH",
54
- "NDARMW299ZAB",
55
- "NDARNC405WJA",
56
- "NDARNP962TJK",
57
- "NDARPB967KU7",
58
- "NDARRU560AGK",
59
- "NDARTB173LY2",
60
- "NDARUW377KAE",
61
- "NDARVH565FX9",
62
- "NDARVP799KGY",
63
- "NDARVY962GB5",
64
- ],
65
- "R9": [
66
- "NDARAC589YMB",
67
- "NDARAC853CR6",
68
- "NDARAH239PGG",
69
- "NDARAL897CYV",
70
- "NDARAN160GUF",
71
- "NDARAP049KXJ",
72
- "NDARAP457WB5",
73
- "NDARAW216PM7",
74
- "NDARBA004KBT",
75
- "NDARBD328NUQ",
76
- "NDARBF042LDM",
77
- "NDARBH019KPD",
78
- "NDARBH728DFK",
79
- "NDARBM370JCB",
80
- "NDARBU183TDJ",
81
- "NDARBW971DCW",
82
- "NDARBZ444ZHK",
83
- "NDARCC620ZFT",
84
- "NDARCD182XT1",
85
- "NDARCK113CJM",
86
- ],
87
- "R8": [
88
- "NDARAB514MAJ",
89
- "NDARAD571FLB",
90
- "NDARAF003VCL",
91
- "NDARAG191AE8",
92
- "NDARAJ977PRJ",
93
- "NDARAP912JK3",
94
- "NDARAV454VF0",
95
- "NDARAY298THW",
96
- "NDARBJ375VP4",
97
- "NDARBT436PMT",
98
- "NDARBV630BK6",
99
- "NDARCB627KDN",
100
- "NDARCC059WTH",
101
- "NDARCM953HKD",
102
- "NDARCN681CXW",
103
- "NDARCT889DMB",
104
- "NDARDJ204EPU",
105
- "NDARDJ544BU5",
106
- "NDARDP292DVC",
107
- "NDARDW178AC6",
108
- ],
109
- "R7": [
110
- "NDARAY475AKD",
111
- "NDARBW026UGE",
112
- "NDARCK162REX",
113
- "NDARCK481KRH",
114
- "NDARCV378MMX",
115
- "NDARCX462NVA",
116
- "NDARDJ970ELG",
117
- "NDARDU617ZW1",
118
- "NDAREM609ZXW",
119
- "NDAREW074ZM2",
120
- "NDARFE555KXB",
121
- "NDARFT176NJP",
122
- "NDARGK442YHH",
123
- "NDARGM439FZD",
124
- "NDARGT634DUJ",
125
- "NDARHE283KZN",
126
- "NDARHG260BM9",
127
- "NDARHL684WYU",
128
- "NDARHN224TPA",
129
- "NDARHP841RMR",
130
- ],
131
- "R6": [
132
- "NDARAD224CRB",
133
- "NDARAE301XTM",
134
- "NDARAT680GJA",
135
- "NDARCA578CEB",
136
- "NDARDZ147ETZ",
137
- "NDARFL793LDE",
138
- "NDARFX710UZA",
139
- "NDARGE994BMX",
140
- "NDARGP191YHN",
141
- "NDARGV436PFT",
142
- "NDARHF545HFW",
143
- "NDARHP039DBU",
144
- "NDARHT774ZK1",
145
- "NDARJA830BYV",
146
- "NDARKB614KGY",
147
- "NDARKM250ET5",
148
- "NDARKZ085UKQ",
149
- "NDARLB581AXF",
150
- "NDARNJ899HW7",
151
- "NDARRZ606EDP",
152
- ],
153
- "R4": [
154
- "NDARAC350BZ0",
155
- "NDARAD615WLJ",
156
- "NDARAG584XLU",
157
- "NDARAH503YG1",
158
- "NDARAX272ZJL",
159
- "NDARAY461TZZ",
160
- "NDARBC734UVY",
161
- "NDARBL444FBA",
162
- "NDARBT640EBN",
163
- "NDARBU098PJT",
164
- "NDARBU928LV0",
165
- "NDARBV059CGE",
166
- "NDARCG037CX4",
167
- "NDARCG947ZC0",
168
- "NDARCH001CN2",
169
- "NDARCU001ZN7",
170
- "NDARCW497XW2",
171
- "NDARCX053GU5",
172
- "NDARDF568GL5",
173
- "NDARDJ092YKH",
174
- ],
175
- "R5": [
176
- "NDARAH793FBF",
177
- "NDARAJ689BVN",
178
- "NDARAP785CTE",
179
- "NDARAU708TL8",
180
- "NDARBE091BGD",
181
- "NDARBE103DHM",
182
- "NDARBF851NH6",
183
- "NDARBH228RDW",
184
- "NDARBJ674TVU",
185
- "NDARBM433VER",
186
- "NDARCA740UC8",
187
- "NDARCU633GCZ",
188
- "NDARCU736GZ1",
189
- "NDARCU744XWL",
190
- "NDARDC843HHM",
191
- "NDARDH086ZKK",
192
- "NDARDL305BT8",
193
- "NDARDU853XZ6",
194
- "NDARDV245WJG",
195
- "NDAREC480KFA",
196
- ],
197
- "R3": [
198
- "NDARAA948VFH",
199
- "NDARAD774HAZ",
200
- "NDARAE828CML",
201
- "NDARAG340ERT",
202
- "NDARBA839HLG",
203
- "NDARBE641DGZ",
204
- "NDARBG574KF4",
205
- "NDARBM642JFT",
206
- "NDARCL016NHB",
207
- "NDARCV944JA6",
208
- "NDARCY178KJP",
209
- "NDARDY150ZP9",
210
- "NDAREC542MH3",
211
- "NDAREK549XUQ",
212
- "NDAREM887YY8",
213
- "NDARFA815FXE",
214
- "NDARFF644ZGD",
215
- "NDARFV557XAA",
216
- "NDARFV780ABD",
217
- "NDARGB102NWJ",
218
- ],
219
- "R2": [
220
- "NDARAB793GL3",
221
- "NDARAM675UR8",
222
- "NDARBM839WR5",
223
- "NDARBU730PN8",
224
- "NDARCT974NAJ",
225
- "NDARCW933FD5",
226
- "NDARCZ770BRG",
227
- "NDARDW741HCF",
228
- "NDARDZ058NZN",
229
- "NDAREC377AU2",
230
- "NDAREM500WWH",
231
- "NDAREV527ZRF",
232
- "NDAREV601CE7",
233
- "NDARFF070XHV",
234
- "NDARFR108JNB",
235
- "NDARFT305CG1",
236
- "NDARGA056TMW",
237
- "NDARGH775KF5",
238
- "NDARGJ878ZP4",
239
- "NDARHA387FPM",
240
- ],
241
- "R1": [
242
- "NDARAC904DMU",
243
- "NDARAM704GKZ",
244
- "NDARAP359UM6",
245
- "NDARBD879MBX",
246
- "NDARBH024NH2",
247
- "NDARBK082PDD",
248
- "NDARCA153NKE",
249
- "NDARCE721YB5",
250
- "NDARCJ594BWQ",
251
- "NDARCN669XPR",
252
- "NDARCW094JCG",
253
- "NDARCZ947WU5",
254
- "NDARDH670PXH",
255
- "NDARDL511UND",
256
- "NDARDU986RBM",
257
- "NDAREM731BYM",
258
- "NDAREN519BLJ",
259
- "NDARFK610GY5",
260
- "NDARFT581ZW5",
261
- "NDARFW972KFQ",
262
- ],
263
- }
10
+ logger = logging.getLogger("eegdash")
264
11
 
265
12
 
266
13
  class EEGChallengeDataset(EEGDashDataset):
@@ -334,8 +81,26 @@ class EEGChallengeDataset(EEGDashDataset):
334
81
  else:
335
82
  s3_bucket = f"{s3_bucket}/{release}_L100_bdf"
336
83
 
84
+ warn(
85
+ "\n\n"
86
+ "[EEGChallengeDataset] EEG 2025 Competition Data Notice:\n"
87
+ "-------------------------------------------------------\n"
88
+ "This object loads the HBN dataset that has been preprocessed for the EEG Challenge:\n"
89
+ " - Downsampled from 500Hz to 100Hz\n"
90
+ " - Bandpass filtered (0.5–50 Hz)\n"
91
+ "\n"
92
+ "For full preprocessing details, see:\n"
93
+ " https://github.com/eeg2025/downsample-datasets\n"
94
+ "\n"
95
+ "IMPORTANT: The data accessed via `EEGChallengeDataset` is NOT identical to what you get from `EEGDashDataset` directly.\n"
96
+ "If you are participating in the competition, always use `EEGChallengeDataset` to ensure consistency with the challenge data.\n"
97
+ "\n",
98
+ UserWarning,
99
+ module="eegdash",
100
+ )
101
+
337
102
  super().__init__(
338
- dataset=dataset_parameters,
103
+ dataset=RELEASE_TO_OPENNEURO_DATASET_MAP[release],
339
104
  query=query,
340
105
  cache_dir=cache_dir,
341
106
  s3_bucket=s3_bucket,
eegdash/registry.py CHANGED
@@ -57,7 +57,7 @@ def register_openneuro_datasets(
57
57
 
58
58
  init = make_init(dataset_id)
59
59
 
60
- doc = f"""Create an instance for OpenNeuro dataset ``{dataset_id}``.
60
+ doc = f"""OpenNeuro dataset ``{dataset_id}``.
61
61
 
62
62
  {markdown_table(row_series)}
63
63
 
@@ -69,11 +69,15 @@ def register_openneuro_datasets(
69
69
  Extra Mongo query merged with ``{{'dataset': '{dataset_id}'}}``.
70
70
  s3_bucket : str | None
71
71
  Optional S3 bucket name.
72
+ subject : str | None
73
+ Optional subject identifier.
74
+ task : str | None
75
+ Optional task identifier.
72
76
  **kwargs
73
77
  Passed through to {base_class.__name__}.
74
78
  """
75
79
 
76
- init.__doc__ = doc
80
+ # init.__doc__ = doc
77
81
 
78
82
  cls = type(
79
83
  class_name,
@@ -101,6 +105,7 @@ def markdown_table(row_series: pd.Series) -> str:
101
105
  """Create a reStructuredText grid table from a pandas Series."""
102
106
  if row_series.empty:
103
107
  return ""
108
+ dataset_id = row_series["dataset"]
104
109
 
105
110
  # Prepare the dataframe with user's suggested logic
106
111
  df = (
@@ -112,6 +117,7 @@ def markdown_table(row_series: pd.Series) -> str:
112
117
  "n_tasks": "#Classes",
113
118
  "sampling_freqs": "Freq(Hz)",
114
119
  "duration_hours_total": "Duration(H)",
120
+ "size": "Size",
115
121
  }
116
122
  )
117
123
  .reindex(
@@ -122,6 +128,7 @@ def markdown_table(row_series: pd.Series) -> str:
122
128
  "#Classes",
123
129
  "Freq(Hz)",
124
130
  "Duration(H)",
131
+ "Size",
125
132
  ]
126
133
  )
127
134
  .infer_objects(copy=False)
@@ -131,6 +138,9 @@ def markdown_table(row_series: pd.Series) -> str:
131
138
  # Use tabulate for the final rst formatting
132
139
  table = tabulate(df, headers="keys", tablefmt="rst", showindex=False)
133
140
 
141
+ # Add a caption for the table
142
+ caption = f"Short overview of dataset {dataset_id} more details in the `Nemar documentation <https://nemar.org/dataexplorer/detail?dataset_id={dataset_id}>`_."
143
+ # adding caption below the table
134
144
  # Indent the table to fit within the admonition block
135
145
  indented_table = "\n".join(" " + line for line in table.split("\n"))
136
- return f"\n\n{indented_table}"
146
+ return f"\n\n{indented_table}\n\n{caption}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.3.6.dev182011805
3
+ Version: 0.3.7.dev104
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License-Expression: GPL-3.0-only
@@ -48,6 +48,7 @@ Requires-Dist: pytest_cases; extra == "tests"
48
48
  Requires-Dist: pytest-benchmark; extra == "tests"
49
49
  Provides-Extra: dev
50
50
  Requires-Dist: pre-commit; extra == "dev"
51
+ Requires-Dist: ipykernel; extra == "dev"
51
52
  Provides-Extra: docs
52
53
  Requires-Dist: sphinx; extra == "docs"
53
54
  Requires-Dist: sphinx_design; extra == "docs"
@@ -55,10 +56,12 @@ Requires-Dist: sphinx_gallery; extra == "docs"
55
56
  Requires-Dist: sphinx_rtd_theme; extra == "docs"
56
57
  Requires-Dist: pydata-sphinx-theme; extra == "docs"
57
58
  Requires-Dist: sphinx-autobuild; extra == "docs"
59
+ Requires-Dist: sphinx-sitemap; extra == "docs"
58
60
  Requires-Dist: numpydoc; extra == "docs"
59
61
  Requires-Dist: memory_profiler; extra == "docs"
60
62
  Requires-Dist: ipython; extra == "docs"
61
63
  Requires-Dist: lightgbm; extra == "docs"
64
+ Requires-Dist: plotly; extra == "docs"
62
65
  Provides-Extra: all
63
66
  Requires-Dist: eegdash[docs]; extra == "all"
64
67
  Requires-Dist: eegdash[dev]; extra == "all"
@@ -1,12 +1,12 @@
1
- eegdash/__init__.py,sha256=NLvINqQgb9EuPvIl_VMrEupLgfbk5827M_Wgbl_pX2g,247
2
- eegdash/api.py,sha256=yotN4vqurZAxVA4q_DK4z0mhh9P8sbpxKzvyxuRSkcQ,38538
1
+ eegdash/__init__.py,sha256=A70xhDRmnPwJULFWRxt9Nx-AbZRTh13WJoBDyzviKHQ,303
2
+ eegdash/api.py,sha256=7QTComMkbOdHumlzdOrNV2kqHy9R9HG2Gefo_eLBy-U,38948
3
+ eegdash/const.py,sha256=syrXxcqFyl4dxAetOuhPyCYZ2xgilsLunJRVzx9TCeA,5806
3
4
  eegdash/data_config.py,sha256=OS6ERO-jHrnEOfMJUehY7ieABdsRw_qWzOKJ4pzSfqw,1323
4
5
  eegdash/data_utils.py,sha256=mi9pscui-BPpRH9ovRtGWiSwHG5QN6K_IvJdYaING2I,27679
5
- eegdash/dataset.py,sha256=iGi7m2FNhLgJxxwSsB9JIy01p4tmdlJIPzdL5CDAJU4,9446
6
+ eegdash/dataset.py,sha256=6Tgj_1j4DNoaPoMnhtancDtPG6bxODnbPlXkDzGjtrQ,4716
6
7
  eegdash/dataset_summary.csv,sha256=9Rw9PawiQ9a_OBRJYKarrzb4UFSGpkGULhYB0MYUieE,14740
7
8
  eegdash/mongodb.py,sha256=GD3WgA253oFgpzOHrYaj4P1mRjNtDMT5Oj4kVvHswjI,2006
8
- eegdash/preprocessing.py,sha256=7S_TTRKPKEk47tTnh2D6WExBt4cctAMxUxGDjJqq5lU,2221
9
- eegdash/registry.py,sha256=cxqX53GYyDvg5DkiqJkvjqHDPI72JTPlI4qVh2sILu8,3873
9
+ eegdash/registry.py,sha256=jBR2tGE4YJL4yhbZcn2CN4jaC-ttyVN0wmsCR1uWzoU,4329
10
10
  eegdash/utils.py,sha256=wU9CBQZLW_LIQIBwhgQm5bU4X-rSsVNPdeF2iE4QGJ4,410
11
11
  eegdash/features/__init__.py,sha256=BXNhjvL4_SSFAY1lcP9nyGpkbJNtoOMH4AHlF6OyABo,4078
12
12
  eegdash/features/datasets.py,sha256=kU1DO70ArSIy-LF1hHD2NN4iT-kJrI0mVpSkyV_OSeI,18301
@@ -23,8 +23,8 @@ eegdash/features/feature_bank/dimensionality.py,sha256=j_Ds71Y1AbV2uLFQj8EuXQ4kz
23
23
  eegdash/features/feature_bank/signal.py,sha256=3Tb8z9gX7iZipxQJ9DSyy30JfdmW58kgvimSyZX74p8,3404
24
24
  eegdash/features/feature_bank/spectral.py,sha256=bNB7skusePs1gX7NOU6yRlw_Gr4UOCkO_ylkCgybzug,3319
25
25
  eegdash/features/feature_bank/utils.py,sha256=DGh-Q7-XFIittP7iBBxvsJaZrlVvuY5mw-G7q6C-PCI,1237
26
- eegdash-0.3.6.dev182011805.dist-info/licenses/LICENSE,sha256=asisR-xupy_NrQBFXnx6yqXeZcYWLvbAaiETl25iXT0,931
27
- eegdash-0.3.6.dev182011805.dist-info/METADATA,sha256=jBWyMgXrjtrbTGvALa7gKU0TpsUcVQoByszJ8xS-TdQ,9932
28
- eegdash-0.3.6.dev182011805.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
- eegdash-0.3.6.dev182011805.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
30
- eegdash-0.3.6.dev182011805.dist-info/RECORD,,
26
+ eegdash-0.3.7.dev104.dist-info/licenses/LICENSE,sha256=asisR-xupy_NrQBFXnx6yqXeZcYWLvbAaiETl25iXT0,931
27
+ eegdash-0.3.7.dev104.dist-info/METADATA,sha256=vFsNOyHy0Bq1XBvV_aDoNLSdtuK4H4hzpvU67tfnRJE,10053
28
+ eegdash-0.3.7.dev104.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ eegdash-0.3.7.dev104.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
30
+ eegdash-0.3.7.dev104.dist-info/RECORD,,
eegdash/preprocessing.py DELETED
@@ -1,63 +0,0 @@
1
- import logging
2
-
3
- import mne
4
- import numpy as np
5
-
6
- from braindecode.preprocessing import Preprocessor
7
-
8
- logger = logging.getLogger("eegdash")
9
-
10
-
11
- class hbn_ec_ec_reannotation(Preprocessor):
12
- """Preprocessor to reannotate the raw data for eyes open and eyes closed events.
13
-
14
- This processor is designed for HBN datasets.
15
-
16
- """
17
-
18
- def __init__(self):
19
- super().__init__(fn=self.transform, apply_on_array=False)
20
-
21
- def transform(self, raw):
22
- """Reannotate the raw data to create new events for eyes open and eyes closed
23
-
24
- This function modifies the raw MNE object by creating new events based on
25
- the existing annotations for "instructed_toCloseEyes" and "instructed_toOpenEyes".
26
- It generates new events every 2 seconds within specified time ranges after
27
- the original events, and replaces the existing annotations with these new events.
28
-
29
- Parameters
30
- ----------
31
- raw : mne.io.Raw
32
- The raw MNE object containing EEG data and annotations.
33
-
34
- """
35
- events, event_id = mne.events_from_annotations(raw)
36
-
37
- logger.info("Original events found with ids: %s", event_id)
38
-
39
- # Create new events array for 2-second segments
40
- new_events = []
41
- sfreq = raw.info["sfreq"]
42
- for event in events[events[:, 2] == event_id["instructed_toCloseEyes"]]:
43
- # For each original event, create events every 2 seconds from 15s to 29s after
44
- start_times = event[0] + np.arange(15, 29, 2) * sfreq
45
- new_events.extend([[int(t), 0, 1] for t in start_times])
46
-
47
- for event in events[events[:, 2] == event_id["instructed_toOpenEyes"]]:
48
- # For each original event, create events every 2 seconds from 5s to 19s after
49
- start_times = event[0] + np.arange(5, 19, 2) * sfreq
50
- new_events.extend([[int(t), 0, 2] for t in start_times])
51
-
52
- # replace events in raw
53
- new_events = np.array(new_events)
54
-
55
- annot_from_events = mne.annotations_from_events(
56
- events=new_events,
57
- event_desc={1: "eyes_closed", 2: "eyes_open"},
58
- sfreq=raw.info["sfreq"],
59
- )
60
-
61
- raw.set_annotations(annot_from_events)
62
-
63
- return raw