eegdash 0.3.5.dev87__py3-none-any.whl → 0.3.5.dev92__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

eegdash/__init__.py CHANGED
@@ -7,4 +7,4 @@ __init__mongo_client()
7
7
 
8
8
  __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset"]
9
9
 
10
- __version__ = "0.3.5.dev87"
10
+ __version__ = "0.3.5.dev92"
eegdash/api.py CHANGED
@@ -90,12 +90,16 @@ class EEGDash:
90
90
  ) -> list[Mapping[str, Any]]:
91
91
  """Find records in the MongoDB collection.
92
92
 
93
- This method can be called in two ways:
93
+ This method supports four usage patterns:
94
94
  1. With a pre-built MongoDB query dictionary (positional argument):
95
95
  >>> eegdash.find({"dataset": "ds002718", "subject": {"$in": ["012", "013"]}})
96
96
  2. With user-friendly keyword arguments for simple and multi-value queries:
97
97
  >>> eegdash.find(dataset="ds002718", subject="012")
98
98
  >>> eegdash.find(dataset="ds002718", subject=["012", "013"])
99
+ 3. With an explicit empty query to return all documents:
100
+ >>> eegdash.find({}) # fetches all records (use with care)
101
+ 4. By combining a raw query with kwargs (merged via logical AND):
102
+ >>> eegdash.find({"dataset": "ds002718"}, subject=["012", "013"]) # yields {"$and":[{"dataset":"ds002718"}, {"subject":{"$in":["012","013"]}}]}
99
103
 
100
104
  Parameters
101
105
  ----------
@@ -110,26 +114,34 @@ class EEGDash:
110
114
  list:
111
115
  A list of DB records (string-keyed dictionaries) that match the query.
112
116
 
113
- Raises
114
- ------
115
- ValueError
116
- If both a `query` dictionary and keyword arguments are provided.
117
-
118
117
  """
119
- if query is not None and kwargs:
120
- raise ValueError(
121
- "Provide either a positional 'query' dictionary or keyword arguments, not both."
122
- )
123
-
124
- final_query = {}
125
- if query is not None:
126
- final_query = query
127
- elif kwargs:
128
- final_query = self._build_query_from_kwargs(**kwargs)
118
+ final_query: dict[str, Any] | None = None
119
+
120
+ # Accept explicit empty dict {} to mean "match all"
121
+ raw_query = query if isinstance(query, dict) else None
122
+ kwargs_query = self._build_query_from_kwargs(**kwargs) if kwargs else None
123
+
124
+ # Determine presence, treating {} as a valid raw query
125
+ has_raw = isinstance(raw_query, dict)
126
+ has_kwargs = kwargs_query is not None
127
+
128
+ if has_raw and has_kwargs:
129
+ # Detect conflicting constraints on the same field (e.g., task specified
130
+ # differently in both places) and raise a clear error instead of silently
131
+ # producing an empty result.
132
+ self._raise_if_conflicting_constraints(raw_query, kwargs_query)
133
+ # Merge with logical AND so both constraints apply
134
+ if raw_query: # non-empty dict adds constraints
135
+ final_query = {"$and": [raw_query, kwargs_query]}
136
+ else: # {} adds nothing; use kwargs_query only
137
+ final_query = kwargs_query
138
+ elif has_raw:
139
+ # May be {} meaning match-all, or a non-empty dict
140
+ final_query = raw_query
141
+ elif has_kwargs:
142
+ final_query = kwargs_query
129
143
  else:
130
- # By default, an empty query {} returns all documents.
131
- # This can be dangerous, so we can either allow it or raise an error.
132
- # Let's require an explicit query for safety.
144
+ # Avoid accidental full scans
133
145
  raise ValueError(
134
146
  "find() requires a query dictionary or at least one keyword argument. "
135
147
  "To find all documents, use find({})."
@@ -224,9 +236,12 @@ class EEGDash:
224
236
  return record
225
237
 
226
238
  def _build_query_from_kwargs(self, **kwargs) -> dict[str, Any]:
227
- """Builds and validates a MongoDB query from user-friendly keyword arguments.
239
+ """Build and validate a MongoDB query from user-friendly keyword arguments.
228
240
 
229
- Translates list values into MongoDB's `$in` operator.
241
+ Improvements:
242
+ - Reject None values and empty/whitespace-only strings
243
+ - For list/tuple/set values: strip strings, drop None/empties, deduplicate, and use `$in`
244
+ - Preserve scalars as exact matches
230
245
  """
231
246
  # 1. Validate that all provided keys are allowed for querying
232
247
  unknown_fields = set(kwargs.keys()) - self._ALLOWED_QUERY_FIELDS
@@ -239,19 +254,108 @@ class EEGDash:
239
254
  # 2. Construct the query dictionary
240
255
  query = {}
241
256
  for key, value in kwargs.items():
242
- if isinstance(value, (list, tuple)):
243
- if not value:
257
+ # None is not a valid constraint
258
+ if value is None:
259
+ raise ValueError(
260
+ f"Received None for query parameter '{key}'. Provide a concrete value."
261
+ )
262
+
263
+ # Handle list-like values as multi-constraints
264
+ if isinstance(value, (list, tuple, set)):
265
+ cleaned: list[Any] = []
266
+ for item in value:
267
+ if item is None:
268
+ continue
269
+ if isinstance(item, str):
270
+ item = item.strip()
271
+ if not item:
272
+ continue
273
+ cleaned.append(item)
274
+ # Deduplicate while preserving order
275
+ cleaned = list(dict.fromkeys(cleaned))
276
+ if not cleaned:
244
277
  raise ValueError(
245
278
  f"Received an empty list for query parameter '{key}'. This is not supported."
246
279
  )
247
- # If the value is a list, use the `$in` operator for multi-search
248
- query[key] = {"$in": value}
280
+ query[key] = {"$in": cleaned}
249
281
  else:
250
- # Otherwise, it's a direct match
282
+ # Scalars: trim strings and validate
283
+ if isinstance(value, str):
284
+ value = value.strip()
285
+ if not value:
286
+ raise ValueError(
287
+ f"Received an empty string for query parameter '{key}'."
288
+ )
251
289
  query[key] = value
252
290
 
253
291
  return query
254
292
 
293
+ # --- Query merging and conflict detection helpers ---
294
+ def _extract_simple_constraint(self, query: dict[str, Any], key: str):
295
+ """Extract a simple constraint for a given key from a query dict.
296
+
297
+ Supports only top-level equality (key: value) and $in (key: {"$in": [...]})
298
+ constraints. Returns a tuple (kind, value) where kind is "eq" or "in". If the
299
+ key is not present or uses other operators, returns None.
300
+ """
301
+ if not isinstance(query, dict) or key not in query:
302
+ return None
303
+ val = query[key]
304
+ if isinstance(val, dict):
305
+ if "$in" in val and isinstance(val["$in"], (list, tuple)):
306
+ return ("in", list(val["$in"]))
307
+ return None # unsupported operator shape for conflict checking
308
+ else:
309
+ return ("eq", val)
310
+
311
+ def _raise_if_conflicting_constraints(
312
+ self, raw_query: dict[str, Any], kwargs_query: dict[str, Any]
313
+ ) -> None:
314
+ """Raise ValueError if both query sources define incompatible constraints.
315
+
316
+ We conservatively check only top-level fields with simple equality or $in
317
+ constraints. If a field appears in both queries and constraints are mutually
318
+ exclusive, raise an explicit error to avoid silent empty result sets.
319
+ """
320
+ if not raw_query or not kwargs_query:
321
+ return
322
+
323
+ # Only consider fields we generally allow; skip meta operators like $and
324
+ raw_keys = set(raw_query.keys()) & self._ALLOWED_QUERY_FIELDS
325
+ kw_keys = set(kwargs_query.keys()) & self._ALLOWED_QUERY_FIELDS
326
+ dup_keys = raw_keys & kw_keys
327
+ for key in dup_keys:
328
+ rc = self._extract_simple_constraint(raw_query, key)
329
+ kc = self._extract_simple_constraint(kwargs_query, key)
330
+ if rc is None or kc is None:
331
+ # If either side is non-simple, skip conflict detection for this key
332
+ continue
333
+
334
+ r_kind, r_val = rc
335
+ k_kind, k_val = kc
336
+
337
+ # Normalize to sets when appropriate for simpler checks
338
+ if r_kind == "eq" and k_kind == "eq":
339
+ if r_val != k_val:
340
+ raise ValueError(
341
+ f"Conflicting constraints for '{key}': query={r_val!r} vs kwargs={k_val!r}"
342
+ )
343
+ elif r_kind == "in" and k_kind == "eq":
344
+ if k_val not in r_val:
345
+ raise ValueError(
346
+ f"Conflicting constraints for '{key}': query in {r_val!r} vs kwargs={k_val!r}"
347
+ )
348
+ elif r_kind == "eq" and k_kind == "in":
349
+ if r_val not in k_val:
350
+ raise ValueError(
351
+ f"Conflicting constraints for '{key}': query={r_val!r} vs kwargs in {k_val!r}"
352
+ )
353
+ elif r_kind == "in" and k_kind == "in":
354
+ if len(set(r_val).intersection(k_val)) == 0:
355
+ raise ValueError(
356
+ f"Conflicting constraints for '{key}': disjoint sets {r_val!r} and {k_val!r}"
357
+ )
358
+
255
359
  def load_eeg_data_from_s3(self, s3path: str) -> xr.DataArray:
256
360
  """Load an EEGLAB .set file from an AWS S3 URI and return it as an xarray DataArray.
257
361
 
@@ -591,7 +695,7 @@ class EEGDashDataset(BaseConcatDataset):
591
695
  self,
592
696
  query: dict | None = None,
593
697
  cache_dir: str = "~/eegdash_cache",
594
- dataset: str | None = None,
698
+ dataset: str | list[str] | None = None,
595
699
  description_fields: list[str] = [
596
700
  "subject",
597
701
  "session",
@@ -669,10 +773,15 @@ class EEGDashDataset(BaseConcatDataset):
669
773
  }
670
774
  base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in query_kwargs}
671
775
 
672
- if query and query_kwargs:
673
- raise ValueError(
674
- "Provide either a 'query' dictionary or keyword arguments for filtering, not both."
675
- )
776
+ # If user provided a dataset name via the dedicated parameter (and we're not
777
+ # loading from a local directory), treat it as a query filter. Accept str or list.
778
+ if data_dir is None and dataset is not None:
779
+ # Allow callers to pass a single dataset id (str) or a list of them.
780
+ # If list is provided, let _build_query_from_kwargs turn it into $in later.
781
+ query_kwargs.setdefault("dataset", dataset)
782
+
783
+ # Allow mixing raw DB query with additional keyword filters. Both will be
784
+ # merged by EEGDash.find() (logical AND), so we do not raise here.
676
785
 
677
786
  try:
678
787
  if records is not None:
@@ -688,15 +797,20 @@ class EEGDashDataset(BaseConcatDataset):
688
797
  ]
689
798
  elif data_dir:
690
799
  # This path loads from a local directory and is not affected by DB query logic
691
- if isinstance(data_dir, str) or isinstance(data_dir, Path):
800
+ if isinstance(data_dir, (str, Path)):
692
801
  datasets = self.load_bids_dataset(
693
- dataset=dataset,
802
+ dataset=dataset
803
+ if isinstance(dataset, str)
804
+ else (dataset[0] if dataset else None),
694
805
  data_dir=data_dir,
695
806
  description_fields=description_fields,
696
807
  s3_bucket=s3_bucket,
697
808
  **base_dataset_kwargs,
698
809
  )
699
810
  else:
811
+ assert dataset is not None, (
812
+ "dataset must be provided when passing multiple data_dir"
813
+ )
700
814
  assert len(data_dir) == len(dataset), (
701
815
  "Number of datasets and directories must match"
702
816
  )
@@ -711,7 +825,7 @@ class EEGDashDataset(BaseConcatDataset):
711
825
  **base_dataset_kwargs,
712
826
  )
713
827
  )
714
- elif query or query_kwargs:
828
+ elif query is not None or query_kwargs:
715
829
  # This is the DB query path that we are improving
716
830
  datasets = self.find_datasets(
717
831
  query=query,
@@ -748,7 +862,7 @@ class EEGDashDataset(BaseConcatDataset):
748
862
 
749
863
  def find_datasets(
750
864
  self,
751
- query: dict[str, Any],
865
+ query: dict[str, Any] | None,
752
866
  description_fields: list[str],
753
867
  query_kwargs: dict,
754
868
  base_dataset_kwargs: dict,
@@ -774,6 +888,10 @@ class EEGDashDataset(BaseConcatDataset):
774
888
  """
775
889
  datasets: list[EEGDashBaseDataset] = []
776
890
 
891
+ # Build records using either a raw query OR keyword filters, but not both.
892
+ # Note: callers may accidentally pass an empty dict for `query` along with
893
+ # kwargs. In that case, treat it as if no query was provided and rely on kwargs.
894
+ # Always delegate merging of raw query + kwargs to EEGDash.find
777
895
  self.records = self.eeg_dash.find(query, **query_kwargs)
778
896
 
779
897
  for record in self.records:
eegdash/data_utils.py CHANGED
@@ -2,6 +2,7 @@ import json
2
2
  import logging
3
3
  import os
4
4
  import re
5
+ import traceback
5
6
  import warnings
6
7
  from pathlib import Path
7
8
  from typing import Any
@@ -66,9 +67,7 @@ class EEGDashBaseDataset(BaseDataset):
66
67
  self.s3_open_neuro = True
67
68
 
68
69
  self.filecache = self.cache_dir / record["bidspath"]
69
-
70
70
  self.bids_root = self.cache_dir / record["dataset"]
71
-
72
71
  self.bidspath = BIDSPath(
73
72
  root=self.bids_root,
74
73
  datatype="eeg",
@@ -99,6 +98,9 @@ class EEGDashBaseDataset(BaseDataset):
99
98
  )
100
99
  if not self.s3_open_neuro:
101
100
  self.s3file = re.sub(r"(^|/)ds\d{6}/", r"\1", self.s3file, count=1)
101
+ if self.s3file.endswith(".set"):
102
+ self.s3file = self.s3file[:-4] + ".bdf"
103
+ self.filecache = self.filecache.with_suffix(".bdf")
102
104
 
103
105
  self.filecache.parent.mkdir(parents=True, exist_ok=True)
104
106
  info = filesystem.info(self.s3file)
@@ -132,11 +134,21 @@ class EEGDashBaseDataset(BaseDataset):
132
134
  anon=True, client_kwargs={"region_name": "us-east-2"}
133
135
  )
134
136
  for i, dep in enumerate(self.bids_dependencies):
137
+ if not self.s3_open_neuro:
138
+ # fix this when our bucket is integrated into the
139
+ # mongodb
140
+ # if the file have ".set" replace to ".bdf"
141
+ if dep.endswith(".set"):
142
+ dep = dep[:-4] + ".bdf"
143
+
135
144
  s3path = self.get_s3path(dep)
136
145
  if not self.s3_open_neuro:
137
146
  dep = self.bids_dependencies_original[i]
138
147
 
139
148
  filepath = self.cache_dir / dep
149
+ if not self.s3_open_neuro:
150
+ if self.filecache.suffix == ".set":
151
+ self.filecache = self.filecache.with_suffix(".bdf")
140
152
  # here, we download the dependency and it is fine
141
153
  # in the case of the competition.
142
154
  if not filepath.exists():
@@ -179,9 +191,23 @@ class EEGDashBaseDataset(BaseDataset):
179
191
  # capturing any warnings
180
192
  # to-do: remove this once is fixed on the mne-bids side.
181
193
  with warnings.catch_warnings(record=True) as w:
182
- self._raw = mne_bids.read_raw_bids(
183
- bids_path=self.bidspath, verbose="ERROR"
184
- )
194
+ try:
195
+ # TO-DO: remove this once is fixed on the our side
196
+ if not self.s3_open_neuro:
197
+ self.bidspath = self.bidspath.update(extension=".bdf")
198
+
199
+ self._raw = mne_bids.read_raw_bids(
200
+ bids_path=self.bidspath, verbose="ERROR"
201
+ )
202
+ except Exception as e:
203
+ logger.error(
204
+ f"Error while reading BIDS file: {self.bidspath}\n"
205
+ "This may be due to a missing or corrupted file.\n"
206
+ "Please check the file and try again."
207
+ )
208
+ logger.error(f"Exception: {e}")
209
+ logger.error(traceback.format_exc())
210
+ raise e
185
211
  for warning in w:
186
212
  logger.warning(
187
213
  f"Warning while reading BIDS file: {warning.message}"
@@ -292,7 +318,6 @@ class EEGDashBaseRaw(BaseRaw):
292
318
  )
293
319
 
294
320
  def get_s3path(self, filepath):
295
- print(f"Getting S3 path for {filepath}")
296
321
  return f"{self._AWS_BUCKET}/{filepath}"
297
322
 
298
323
  def _download_s3(self) -> None:
@@ -513,7 +538,6 @@ class EEGBIDSDataset:
513
538
  with os.scandir(directory) as entries:
514
539
  for entry in entries:
515
540
  if entry.is_file() and entry.name.endswith(extension):
516
- print("Adding ", entry.path)
517
541
  result_files.append(entry.path)
518
542
  elif entry.is_dir():
519
543
  # check that entry path doesn't contain any name in ignore list
eegdash/dataset.py CHANGED
@@ -17,12 +17,258 @@ RELEASE_TO_OPENNEURO_DATASET_MAP = {
17
17
  "R1": "ds005505",
18
18
  }
19
19
 
20
+ SUBJECT_MINI_RELEASE_MAP = {
21
+ "R11": [
22
+ "NDARAB678VYW",
23
+ "NDARAG788YV9",
24
+ "NDARAM946HJE",
25
+ "NDARAY977BZT",
26
+ "NDARAZ532KK0",
27
+ "NDARCE912ZXW",
28
+ "NDARCM214WFE",
29
+ "NDARDL033XRG",
30
+ "NDARDT889RT9",
31
+ "NDARDZ794ZVP",
32
+ "NDAREV869CPW",
33
+ "NDARFN221WW5",
34
+ "NDARFV289RKB",
35
+ "NDARFY623ZTE",
36
+ "NDARGA890MKA",
37
+ "NDARHN206XY3",
38
+ "NDARHP518FUR",
39
+ "NDARJL292RYV",
40
+ "NDARKM199DXW",
41
+ "NDARKW236TN7",
42
+ ],
43
+ "R10": [
44
+ "NDARAR935TGZ",
45
+ "NDARAV474ADJ",
46
+ "NDARCB869VM8",
47
+ "NDARCJ667UPL",
48
+ "NDARCM677TC1",
49
+ "NDARET671FTC",
50
+ "NDARKM061NHZ",
51
+ "NDARLD501HDK",
52
+ "NDARLL176DJR",
53
+ "NDARMT791WDH",
54
+ "NDARMW299ZAB",
55
+ "NDARNC405WJA",
56
+ "NDARNP962TJK",
57
+ "NDARPB967KU7",
58
+ "NDARRU560AGK",
59
+ "NDARTB173LY2",
60
+ "NDARUW377KAE",
61
+ "NDARVH565FX9",
62
+ "NDARVP799KGY",
63
+ "NDARVY962GB5",
64
+ ],
65
+ "R9": [
66
+ "NDARAC589YMB",
67
+ "NDARAC853CR6",
68
+ "NDARAH239PGG",
69
+ "NDARAL897CYV",
70
+ "NDARAN160GUF",
71
+ "NDARAP049KXJ",
72
+ "NDARAP457WB5",
73
+ "NDARAW216PM7",
74
+ "NDARBA004KBT",
75
+ "NDARBD328NUQ",
76
+ "NDARBF042LDM",
77
+ "NDARBH019KPD",
78
+ "NDARBH728DFK",
79
+ "NDARBM370JCB",
80
+ "NDARBU183TDJ",
81
+ "NDARBW971DCW",
82
+ "NDARBZ444ZHK",
83
+ "NDARCC620ZFT",
84
+ "NDARCD182XT1",
85
+ "NDARCK113CJM",
86
+ ],
87
+ "R8": [
88
+ "NDARAB514MAJ",
89
+ "NDARAD571FLB",
90
+ "NDARAF003VCL",
91
+ "NDARAG191AE8",
92
+ "NDARAJ977PRJ",
93
+ "NDARAP912JK3",
94
+ "NDARAV454VF0",
95
+ "NDARAY298THW",
96
+ "NDARBJ375VP4",
97
+ "NDARBT436PMT",
98
+ "NDARBV630BK6",
99
+ "NDARCB627KDN",
100
+ "NDARCC059WTH",
101
+ "NDARCM953HKD",
102
+ "NDARCN681CXW",
103
+ "NDARCT889DMB",
104
+ "NDARDJ204EPU",
105
+ "NDARDJ544BU5",
106
+ "NDARDP292DVC",
107
+ "NDARDW178AC6",
108
+ ],
109
+ "R7": [
110
+ "NDARAY475AKD",
111
+ "NDARBW026UGE",
112
+ "NDARCK162REX",
113
+ "NDARCK481KRH",
114
+ "NDARCV378MMX",
115
+ "NDARCX462NVA",
116
+ "NDARDJ970ELG",
117
+ "NDARDU617ZW1",
118
+ "NDAREM609ZXW",
119
+ "NDAREW074ZM2",
120
+ "NDARFE555KXB",
121
+ "NDARFT176NJP",
122
+ "NDARGK442YHH",
123
+ "NDARGM439FZD",
124
+ "NDARGT634DUJ",
125
+ "NDARHE283KZN",
126
+ "NDARHG260BM9",
127
+ "NDARHL684WYU",
128
+ "NDARHN224TPA",
129
+ "NDARHP841RMR",
130
+ ],
131
+ "R6": [
132
+ "NDARAD224CRB",
133
+ "NDARAE301XTM",
134
+ "NDARAT680GJA",
135
+ "NDARCA578CEB",
136
+ "NDARDZ147ETZ",
137
+ "NDARFL793LDE",
138
+ "NDARFX710UZA",
139
+ "NDARGE994BMX",
140
+ "NDARGP191YHN",
141
+ "NDARGV436PFT",
142
+ "NDARHF545HFW",
143
+ "NDARHP039DBU",
144
+ "NDARHT774ZK1",
145
+ "NDARJA830BYV",
146
+ "NDARKB614KGY",
147
+ "NDARKM250ET5",
148
+ "NDARKZ085UKQ",
149
+ "NDARLB581AXF",
150
+ "NDARNJ899HW7",
151
+ "NDARRZ606EDP",
152
+ ],
153
+ "R4": [
154
+ "NDARAC350BZ0",
155
+ "NDARAD615WLJ",
156
+ "NDARAG584XLU",
157
+ "NDARAH503YG1",
158
+ "NDARAX272ZJL",
159
+ "NDARAY461TZZ",
160
+ "NDARBC734UVY",
161
+ "NDARBL444FBA",
162
+ "NDARBT640EBN",
163
+ "NDARBU098PJT",
164
+ "NDARBU928LV0",
165
+ "NDARBV059CGE",
166
+ "NDARCG037CX4",
167
+ "NDARCG947ZC0",
168
+ "NDARCH001CN2",
169
+ "NDARCU001ZN7",
170
+ "NDARCW497XW2",
171
+ "NDARCX053GU5",
172
+ "NDARDF568GL5",
173
+ "NDARDJ092YKH",
174
+ ],
175
+ "R5": [
176
+ "NDARAH793FBF",
177
+ "NDARAJ689BVN",
178
+ "NDARAP785CTE",
179
+ "NDARAU708TL8",
180
+ "NDARBE091BGD",
181
+ "NDARBE103DHM",
182
+ "NDARBF851NH6",
183
+ "NDARBH228RDW",
184
+ "NDARBJ674TVU",
185
+ "NDARBM433VER",
186
+ "NDARCA740UC8",
187
+ "NDARCU633GCZ",
188
+ "NDARCU736GZ1",
189
+ "NDARCU744XWL",
190
+ "NDARDC843HHM",
191
+ "NDARDH086ZKK",
192
+ "NDARDL305BT8",
193
+ "NDARDU853XZ6",
194
+ "NDARDV245WJG",
195
+ "NDAREC480KFA",
196
+ ],
197
+ "R3": [
198
+ "NDARAA948VFH",
199
+ "NDARAD774HAZ",
200
+ "NDARAE828CML",
201
+ "NDARAG340ERT",
202
+ "NDARBA839HLG",
203
+ "NDARBE641DGZ",
204
+ "NDARBG574KF4",
205
+ "NDARBM642JFT",
206
+ "NDARCL016NHB",
207
+ "NDARCV944JA6",
208
+ "NDARCY178KJP",
209
+ "NDARDY150ZP9",
210
+ "NDAREC542MH3",
211
+ "NDAREK549XUQ",
212
+ "NDAREM887YY8",
213
+ "NDARFA815FXE",
214
+ "NDARFF644ZGD",
215
+ "NDARFV557XAA",
216
+ "NDARFV780ABD",
217
+ "NDARGB102NWJ",
218
+ ],
219
+ "R2": [
220
+ "NDARAB793GL3",
221
+ "NDARAM675UR8",
222
+ "NDARBM839WR5",
223
+ "NDARBU730PN8",
224
+ "NDARCT974NAJ",
225
+ "NDARCW933FD5",
226
+ "NDARCZ770BRG",
227
+ "NDARDW741HCF",
228
+ "NDARDZ058NZN",
229
+ "NDAREC377AU2",
230
+ "NDAREM500WWH",
231
+ "NDAREV527ZRF",
232
+ "NDAREV601CE7",
233
+ "NDARFF070XHV",
234
+ "NDARFR108JNB",
235
+ "NDARFT305CG1",
236
+ "NDARGA056TMW",
237
+ "NDARGH775KF5",
238
+ "NDARGJ878ZP4",
239
+ "NDARHA387FPM",
240
+ ],
241
+ "R1": [
242
+ "NDARAC904DMU",
243
+ "NDARAM704GKZ",
244
+ "NDARAP359UM6",
245
+ "NDARBD879MBX",
246
+ "NDARBH024NH2",
247
+ "NDARBK082PDD",
248
+ "NDARCA153NKE",
249
+ "NDARCE721YB5",
250
+ "NDARCJ594BWQ",
251
+ "NDARCN669XPR",
252
+ "NDARCW094JCG",
253
+ "NDARCZ947WU5",
254
+ "NDARDH670PXH",
255
+ "NDARDL511UND",
256
+ "NDARDU986RBM",
257
+ "NDAREM731BYM",
258
+ "NDAREN519BLJ",
259
+ "NDARFK610GY5",
260
+ "NDARFT581ZW5",
261
+ "NDARFW972KFQ",
262
+ ],
263
+ }
264
+
20
265
 
21
266
  class EEGChallengeDataset(EEGDashDataset):
22
267
  def __init__(
23
268
  self,
24
269
  release: str,
25
270
  cache_dir: str,
271
+ mini: bool = True,
26
272
  query: dict | None = None,
27
273
  s3_bucket: str | None = "s3://nmdatasets/NeurIPS25",
28
274
  **kwargs,
@@ -35,6 +281,9 @@ class EEGChallengeDataset(EEGDashDataset):
35
281
  ----------
36
282
  release: str
37
283
  Release name. Can be one of ["R1", ..., "R11"]
284
+ mini: bool, default True
285
+ Whether to use the mini-release version of the dataset. It is recommended
286
+ to use the mini version for faster training and evaluation.
38
287
  query : dict | None
39
288
  Optionally a dictionary that specifies a query to be executed,
40
289
  in addition to the dataset (automatically inferred from the release argument).
@@ -50,24 +299,46 @@ class EEGChallengeDataset(EEGDashDataset):
50
299
 
51
300
  """
52
301
  self.release = release
302
+ self.mini = mini
303
+
53
304
  if release not in RELEASE_TO_OPENNEURO_DATASET_MAP:
54
- raise ValueError(f"Unknown release: {release}")
55
-
56
- dataset = RELEASE_TO_OPENNEURO_DATASET_MAP[release]
57
- if query is None:
58
- query = {"dataset": dataset}
59
- elif "dataset" not in query:
60
- query["dataset"] = dataset
61
- elif query["dataset"] != dataset:
62
305
  raise ValueError(
63
- f"Query dataset {query['dataset']} does not match the release {release} "
64
- f"which corresponds to dataset {dataset}."
306
+ f"Unknown release: {release}, expected one of {list(RELEASE_TO_OPENNEURO_DATASET_MAP.keys())}"
307
+ )
308
+
309
+ dataset_parameters = []
310
+ if isinstance(release, str):
311
+ dataset_parameters.append(RELEASE_TO_OPENNEURO_DATASET_MAP[release])
312
+ else:
313
+ raise ValueError(
314
+ f"Unknown release type: {type(release)}, the expected type is str."
65
315
  )
66
316
 
317
+ if query and "dataset" in query:
318
+ raise ValueError(
319
+ "Query using the parameters `dataset` with the class EEGChallengeDataset is not possible."
320
+ "Please use the release argument instead, or the object EEGDashDataset instead."
321
+ )
322
+
323
+ if self.mini:
324
+ # Disallow mixing subject selection with mini=True since mini already
325
+ # applies a predefined subject subset.
326
+ if (query and "subject" in query) or ("subject" in kwargs):
327
+ raise ValueError(
328
+ "Query using the parameters `subject` with the class EEGChallengeDataset and `mini==True` is not possible."
329
+ "Please don't use the `subject` selection twice."
330
+ "Set `mini=False` to use the `subject` selection."
331
+ )
332
+ kwargs["subject"] = SUBJECT_MINI_RELEASE_MAP[release]
333
+ s3_bucket = f"{s3_bucket}/{release}_mini_L100_bdf"
334
+ else:
335
+ s3_bucket = f"{s3_bucket}/{release}_L100_bdf"
336
+
67
337
  super().__init__(
338
+ dataset=dataset_parameters,
68
339
  query=query,
69
340
  cache_dir=cache_dir,
70
- s3_bucket=f"{s3_bucket}/{release}_L100",
341
+ s3_bucket=s3_bucket,
71
342
  **kwargs,
72
343
  )
73
344
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.3.5.dev87
3
+ Version: 0.3.5.dev92
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License-Expression: GPL-3.0-only
@@ -60,22 +60,9 @@ Requires-Dist: memory_profiler; extra == "docs"
60
60
  Requires-Dist: ipython; extra == "docs"
61
61
  Requires-Dist: lightgbm; extra == "docs"
62
62
  Provides-Extra: all
63
- Requires-Dist: pre-commit; extra == "all"
64
- Requires-Dist: pytest; extra == "all"
65
- Requires-Dist: pytest-cov; extra == "all"
66
- Requires-Dist: codecov; extra == "all"
67
- Requires-Dist: pytest_cases; extra == "all"
68
- Requires-Dist: pytest-benchmark; extra == "all"
69
- Requires-Dist: sphinx; extra == "all"
70
- Requires-Dist: sphinx_design; extra == "all"
71
- Requires-Dist: sphinx_gallery; extra == "all"
72
- Requires-Dist: sphinx_rtd_theme; extra == "all"
73
- Requires-Dist: pydata-sphinx-theme; extra == "all"
74
- Requires-Dist: sphinx-autobuild; extra == "all"
75
- Requires-Dist: numpydoc; extra == "all"
76
- Requires-Dist: memory_profiler; extra == "all"
77
- Requires-Dist: ipython; extra == "all"
78
- Requires-Dist: lightgbm; extra == "all"
63
+ Requires-Dist: eegdash[docs]; extra == "all"
64
+ Requires-Dist: eegdash[dev]; extra == "all"
65
+ Requires-Dist: eegdash[tests]; extra == "all"
79
66
  Dynamic: license-file
80
67
 
81
68
  # EEG-Dash
@@ -1,8 +1,8 @@
1
- eegdash/__init__.py,sha256=VuKtyUq59YapSDPTr_g71JTGfHqjbtJUilFQ-hrOTzc,240
2
- eegdash/api.py,sha256=KjmEVkfltLR5EwRnmnPp5rEDS5Oa6_dnprif9EVpeQs,32351
1
+ eegdash/__init__.py,sha256=ao48gkXYHPAJnu73RWcBgDLvEE3uNWOScbioW4nbEn4,240
2
+ eegdash/api.py,sha256=yotN4vqurZAxVA4q_DK4z0mhh9P8sbpxKzvyxuRSkcQ,38538
3
3
  eegdash/data_config.py,sha256=OS6ERO-jHrnEOfMJUehY7ieABdsRw_qWzOKJ4pzSfqw,1323
4
- eegdash/data_utils.py,sha256=_dycnPmGfTbYs7bc6edHxUn_m01dLYtp92_k44ffEoY,26475
5
- eegdash/dataset.py,sha256=ooLoxMFy2I8BY9gJl6ncTp_Gz-Rq0Z-o4NJyyomxLcU,2670
4
+ eegdash/data_utils.py,sha256=mi9pscui-BPpRH9ovRtGWiSwHG5QN6K_IvJdYaING2I,27679
5
+ eegdash/dataset.py,sha256=iGi7m2FNhLgJxxwSsB9JIy01p4tmdlJIPzdL5CDAJU4,9446
6
6
  eegdash/dataset_summary.csv,sha256=EfnPciglkf4Vgc8dDq_1x7Woeeze1II8vOhx60g4yhc,8670
7
7
  eegdash/mongodb.py,sha256=GD3WgA253oFgpzOHrYaj4P1mRjNtDMT5Oj4kVvHswjI,2006
8
8
  eegdash/preprocessing.py,sha256=7S_TTRKPKEk47tTnh2D6WExBt4cctAMxUxGDjJqq5lU,2221
@@ -23,8 +23,8 @@ eegdash/features/feature_bank/dimensionality.py,sha256=j_Ds71Y1AbV2uLFQj8EuXQ4kz
23
23
  eegdash/features/feature_bank/signal.py,sha256=3Tb8z9gX7iZipxQJ9DSyy30JfdmW58kgvimSyZX74p8,3404
24
24
  eegdash/features/feature_bank/spectral.py,sha256=bNB7skusePs1gX7NOU6yRlw_Gr4UOCkO_ylkCgybzug,3319
25
25
  eegdash/features/feature_bank/utils.py,sha256=DGh-Q7-XFIittP7iBBxvsJaZrlVvuY5mw-G7q6C-PCI,1237
26
- eegdash-0.3.5.dev87.dist-info/licenses/LICENSE,sha256=asisR-xupy_NrQBFXnx6yqXeZcYWLvbAaiETl25iXT0,931
27
- eegdash-0.3.5.dev87.dist-info/METADATA,sha256=pyOun6m8eRMa6tsrwGoP4yPTgvDrQuKYGJOZmFd2azs,10485
28
- eegdash-0.3.5.dev87.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
- eegdash-0.3.5.dev87.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
30
- eegdash-0.3.5.dev87.dist-info/RECORD,,
26
+ eegdash-0.3.5.dev92.dist-info/licenses/LICENSE,sha256=asisR-xupy_NrQBFXnx6yqXeZcYWLvbAaiETl25iXT0,931
27
+ eegdash-0.3.5.dev92.dist-info/METADATA,sha256=ThukZMsprgai1PZFYVbURPOmzV3xTMmCPJi-j_sQZps,9925
28
+ eegdash-0.3.5.dev92.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ eegdash-0.3.5.dev92.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
30
+ eegdash-0.3.5.dev92.dist-info/RECORD,,