eegdash 0.3.9.dev182388821__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

eegdash/api.py CHANGED
@@ -1,23 +1,31 @@
1
- import logging
1
+ # Authors: The EEGDash contributors.
2
+ # License: GNU General Public License
3
+ # Copyright the EEGDash contributors.
4
+
5
+ """High-level interface to the EEGDash metadata database.
6
+
7
+ This module provides the main EEGDash class which serves as the primary entry point for
8
+ interacting with the EEGDash ecosystem. It offers methods to query, insert, and update
9
+ metadata records stored in the EEGDash MongoDB database, and includes utilities to load
10
+ EEG data from S3 for matched records.
11
+ """
12
+
2
13
  import os
3
- import tempfile
4
14
  from pathlib import Path
5
15
  from typing import Any, Mapping
6
- from urllib.parse import urlsplit
7
16
 
8
17
  import mne
9
- import numpy as np
10
- import xarray as xr
11
18
  from docstring_inheritance import NumpyDocstringInheritanceInitMeta
12
19
  from dotenv import load_dotenv
13
- from joblib import Parallel, delayed
14
- from mne.utils import warn
15
- from mne_bids import find_matching_paths, get_bids_path_from_fname, read_raw_bids
20
+ from mne_bids import find_matching_paths
16
21
  from pymongo import InsertOne, UpdateOne
17
- from s3fs import S3FileSystem
22
+ from rich.console import Console
23
+ from rich.panel import Panel
24
+ from rich.text import Text
18
25
 
19
26
  from braindecode.datasets import BaseConcatDataset
20
27
 
28
+ from . import downloader
21
29
  from .bids_eeg_metadata import (
22
30
  build_query_from_kwargs,
23
31
  load_eeg_attrs_from_bids_file,
@@ -33,10 +41,10 @@ from .data_utils import (
33
41
  EEGBIDSDataset,
34
42
  EEGDashBaseDataset,
35
43
  )
44
+ from .logging import logger
36
45
  from .mongodb import MongoConnectionManager
37
46
  from .paths import get_default_cache_dir
38
-
39
- logger = logging.getLogger("eegdash")
47
+ from .utils import _init_mongo_client
40
48
 
41
49
 
42
50
  class EEGDash:
@@ -74,19 +82,26 @@ class EEGDash:
74
82
 
75
83
  if self.is_public:
76
84
  DB_CONNECTION_STRING = mne.utils.get_config("EEGDASH_DB_URI")
85
+ if not DB_CONNECTION_STRING:
86
+ try:
87
+ _init_mongo_client()
88
+ DB_CONNECTION_STRING = mne.utils.get_config("EEGDASH_DB_URI")
89
+ except Exception:
90
+ DB_CONNECTION_STRING = None
77
91
  else:
78
92
  load_dotenv()
79
93
  DB_CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING")
80
94
 
81
95
  # Use singleton to get MongoDB client, database, and collection
96
+ if not DB_CONNECTION_STRING:
97
+ raise RuntimeError(
98
+ "No MongoDB connection string configured. Set MNE config 'EEGDASH_DB_URI' "
99
+ "or environment variable 'DB_CONNECTION_STRING'."
100
+ )
82
101
  self.__client, self.__db, self.__collection = MongoConnectionManager.get_client(
83
102
  DB_CONNECTION_STRING, is_staging
84
103
  )
85
104
 
86
- self.filesystem = S3FileSystem(
87
- anon=True, client_kwargs={"region_name": "us-east-2"}
88
- )
89
-
90
105
  def find(
91
106
  self, query: dict[str, Any] = None, /, **kwargs
92
107
  ) -> list[Mapping[str, Any]]:
@@ -197,17 +212,22 @@ class EEGDash:
197
212
  return doc is not None
198
213
 
199
214
  def _validate_input(self, record: dict[str, Any]) -> dict[str, Any]:
200
- """Internal method to validate the input record against the expected schema.
215
+ """Validate the input record against the expected schema.
201
216
 
202
217
  Parameters
203
218
  ----------
204
- record: dict
219
+ record : dict
205
220
  A dictionary representing the EEG data record to be validated.
206
221
 
207
222
  Returns
208
223
  -------
209
- dict:
210
- Returns the record itself on success, or raises a ValueError if the record is invalid.
224
+ dict
225
+ The record itself on success.
226
+
227
+ Raises
228
+ ------
229
+ ValueError
230
+ If the record is missing required keys or has values of the wrong type.
211
231
 
212
232
  """
213
233
  input_types = {
@@ -237,20 +257,44 @@ class EEGDash:
237
257
  return record
238
258
 
239
259
  def _build_query_from_kwargs(self, **kwargs) -> dict[str, Any]:
240
- """Internal helper to build a validated MongoDB query from keyword args.
260
+ """Build a validated MongoDB query from keyword arguments.
261
+
262
+ This delegates to the module-level builder used across the package.
263
+
264
+ Parameters
265
+ ----------
266
+ **kwargs
267
+ Keyword arguments to convert into a MongoDB query.
268
+
269
+ Returns
270
+ -------
271
+ dict
272
+ A MongoDB query dictionary.
241
273
 
242
- This delegates to the module-level builder used across the package and
243
- is exposed here for testing and convenience.
244
274
  """
245
275
  return build_query_from_kwargs(**kwargs)
246
276
 
247
- # --- Query merging and conflict detection helpers ---
248
- def _extract_simple_constraint(self, query: dict[str, Any], key: str):
277
+ def _extract_simple_constraint(
278
+ self, query: dict[str, Any], key: str
279
+ ) -> tuple[str, Any] | None:
249
280
  """Extract a simple constraint for a given key from a query dict.
250
281
 
251
- Supports only top-level equality (key: value) and $in (key: {"$in": [...]})
252
- constraints. Returns a tuple (kind, value) where kind is "eq" or "in". If the
253
- key is not present or uses other operators, returns None.
282
+ Supports top-level equality (e.g., ``{'subject': '01'}``) and ``$in``
283
+ (e.g., ``{'subject': {'$in': ['01', '02']}}``) constraints.
284
+
285
+ Parameters
286
+ ----------
287
+ query : dict
288
+ The MongoDB query dictionary.
289
+ key : str
290
+ The key for which to extract the constraint.
291
+
292
+ Returns
293
+ -------
294
+ tuple or None
295
+ A tuple of (kind, value) where kind is "eq" or "in", or None if the
296
+ constraint is not present or unsupported.
297
+
254
298
  """
255
299
  if not isinstance(query, dict) or key not in query:
256
300
  return None
@@ -260,16 +304,28 @@ class EEGDash:
260
304
  return ("in", list(val["$in"]))
261
305
  return None # unsupported operator shape for conflict checking
262
306
  else:
263
- return ("eq", val)
307
+ return "eq", val
264
308
 
265
309
  def _raise_if_conflicting_constraints(
266
310
  self, raw_query: dict[str, Any], kwargs_query: dict[str, Any]
267
311
  ) -> None:
268
- """Raise ValueError if both query sources define incompatible constraints.
312
+ """Raise ValueError if query sources have incompatible constraints.
313
+
314
+ Checks for mutually exclusive constraints on the same field to avoid
315
+ silent empty results.
316
+
317
+ Parameters
318
+ ----------
319
+ raw_query : dict
320
+ The raw MongoDB query dictionary.
321
+ kwargs_query : dict
322
+ The query dictionary built from keyword arguments.
323
+
324
+ Raises
325
+ ------
326
+ ValueError
327
+ If conflicting constraints are found.
269
328
 
270
- We conservatively check only top-level fields with simple equality or $in
271
- constraints. If a field appears in both queries and constraints are mutually
272
- exclusive, raise an explicit error to avoid silent empty result sets.
273
329
  """
274
330
  if not raw_query or not kwargs_query:
275
331
  return
@@ -310,115 +366,6 @@ class EEGDash:
310
366
  f"Conflicting constraints for '{key}': disjoint sets {r_val!r} and {k_val!r}"
311
367
  )
312
368
 
313
- def load_eeg_data_from_s3(self, s3path: str) -> xr.DataArray:
314
- """Load EEG data from an S3 URI into an ``xarray.DataArray``.
315
-
316
- Preserves the original filename, downloads sidecar files when applicable
317
- (e.g., ``.fdt`` for EEGLAB, ``.vmrk``/``.eeg`` for BrainVision), and uses
318
- MNE's direct readers.
319
-
320
- Parameters
321
- ----------
322
- s3path : str
323
- An S3 URI (should start with "s3://").
324
-
325
- Returns
326
- -------
327
- xr.DataArray
328
- EEG data with dimensions ``("channel", "time")``.
329
-
330
- Raises
331
- ------
332
- ValueError
333
- If the file extension is unsupported.
334
-
335
- """
336
- # choose a temp dir so sidecars can be colocated
337
- with tempfile.TemporaryDirectory() as tmpdir:
338
- # Derive local filenames from the S3 key to keep base name consistent
339
- s3_key = urlsplit(s3path).path # e.g., "/dsXXXX/sub-.../..._eeg.set"
340
- basename = Path(s3_key).name
341
- ext = Path(basename).suffix.lower()
342
- local_main = Path(tmpdir) / basename
343
-
344
- # Download main file
345
- with (
346
- self.filesystem.open(s3path, mode="rb") as fsrc,
347
- open(local_main, "wb") as fdst,
348
- ):
349
- fdst.write(fsrc.read())
350
-
351
- # Determine and fetch any required sidecars
352
- sidecars: list[str] = []
353
- if ext == ".set": # EEGLAB
354
- sidecars = [".fdt"]
355
- elif ext == ".vhdr": # BrainVision
356
- sidecars = [".vmrk", ".eeg", ".dat", ".raw"]
357
-
358
- for sc_ext in sidecars:
359
- sc_key = s3_key[: -len(ext)] + sc_ext
360
- sc_uri = f"s3://{urlsplit(s3path).netloc}{sc_key}"
361
- try:
362
- # If sidecar exists, download next to the main file
363
- info = self.filesystem.info(sc_uri)
364
- if info:
365
- sc_local = Path(tmpdir) / Path(sc_key).name
366
- with (
367
- self.filesystem.open(sc_uri, mode="rb") as fsrc,
368
- open(sc_local, "wb") as fdst,
369
- ):
370
- fdst.write(fsrc.read())
371
- except Exception:
372
- # Sidecar not present; skip silently
373
- pass
374
-
375
- # Read using appropriate MNE reader
376
- raw = mne.io.read_raw(str(local_main), preload=True, verbose=False)
377
-
378
- data = raw.get_data()
379
- fs = raw.info["sfreq"]
380
- max_time = data.shape[1] / fs
381
- time_steps = np.linspace(0, max_time, data.shape[1]).squeeze()
382
- channel_names = raw.ch_names
383
-
384
- return xr.DataArray(
385
- data=data,
386
- dims=["channel", "time"],
387
- coords={"time": time_steps, "channel": channel_names},
388
- )
389
-
390
- def load_eeg_data_from_bids_file(self, bids_file: str) -> xr.DataArray:
391
- """Load EEG data from a local BIDS-formatted file.
392
-
393
- Parameters
394
- ----------
395
- bids_file : str
396
- Path to a BIDS-compliant EEG file (e.g., ``*_eeg.edf``, ``*_eeg.bdf``,
397
- ``*_eeg.vhdr``, ``*_eeg.set``).
398
-
399
- Returns
400
- -------
401
- xr.DataArray
402
- EEG data with dimensions ``("channel", "time")``.
403
-
404
- """
405
- bids_path = get_bids_path_from_fname(bids_file, verbose=False)
406
- raw_object = read_raw_bids(bids_path=bids_path, verbose=False)
407
- eeg_data = raw_object.get_data()
408
-
409
- fs = raw_object.info["sfreq"]
410
- max_time = eeg_data.shape[1] / fs
411
- time_steps = np.linspace(0, max_time, eeg_data.shape[1]).squeeze() # in seconds
412
-
413
- channel_names = raw_object.ch_names
414
-
415
- eeg_xarray = xr.DataArray(
416
- data=eeg_data,
417
- dims=["channel", "time"],
418
- coords={"time": time_steps, "channel": channel_names},
419
- )
420
- return eeg_xarray
421
-
422
369
  def add_bids_dataset(
423
370
  self, dataset: str, data_dir: str, overwrite: bool = True
424
371
  ) -> None:
@@ -482,84 +429,59 @@ class EEGDash:
482
429
  logger.info("Upserted: %s", result.upserted_count)
483
430
  logger.info("Errors: %s ", result.bulk_api_result.get("writeErrors", []))
484
431
 
485
- def get(self, query: dict[str, Any]) -> list[xr.DataArray]:
486
- """Download and return EEG data arrays for records matching a query.
432
+ def _add_request(self, record: dict) -> InsertOne:
433
+ """Create a MongoDB insertion request for a record.
487
434
 
488
435
  Parameters
489
436
  ----------
490
- query : dict
491
- MongoDB query used to select records.
437
+ record : dict
438
+ The record to insert.
492
439
 
493
440
  Returns
494
441
  -------
495
- list of xr.DataArray
496
- EEG data for each matching record, with dimensions ``("channel", "time")``.
497
-
498
- Notes
499
- -----
500
- Retrieval runs in parallel. Downloaded files are read and discarded
501
- (no on-disk caching here).
442
+ InsertOne
443
+ A PyMongo ``InsertOne`` object.
502
444
 
503
445
  """
504
- sessions = self.find(query)
505
- results = []
506
- if sessions:
507
- logger.info("Found %s records", len(sessions))
508
- results = Parallel(
509
- n_jobs=-1 if len(sessions) > 1 else 1, prefer="threads", verbose=1
510
- )(
511
- delayed(self.load_eeg_data_from_s3)(self._get_s3path(session))
512
- for session in sessions
513
- )
514
- return results
446
+ return InsertOne(record)
515
447
 
516
- def _get_s3path(self, record: Mapping[str, Any] | str) -> str:
517
- """Build an S3 URI from a DB record or a relative path.
448
+ def add(self, record: dict) -> None:
449
+ """Add a single record to the MongoDB collection.
518
450
 
519
451
  Parameters
520
452
  ----------
521
- record : dict or str
522
- Either a DB record containing a ``'bidspath'`` key, or a relative
523
- path string under the OpenNeuro bucket.
524
-
525
- Returns
526
- -------
527
- str
528
- Fully qualified S3 URI.
529
-
530
- Raises
531
- ------
532
- ValueError
533
- If a mapping is provided but ``'bidspath'`` is missing.
453
+ record : dict
454
+ The record to add.
534
455
 
535
456
  """
536
- if isinstance(record, str):
537
- rel = record
538
- else:
539
- rel = record.get("bidspath")
540
- if not rel:
541
- raise ValueError("Record missing 'bidspath' for S3 path resolution")
542
- return f"s3://openneuro.org/{rel}"
543
-
544
- def _add_request(self, record: dict):
545
- """Internal helper method to create a MongoDB insertion request for a record."""
546
- return InsertOne(record)
547
-
548
- def add(self, record: dict):
549
- """Add a single record to the MongoDB collection."""
550
457
  try:
551
458
  self.__collection.insert_one(record)
552
459
  except ValueError as e:
553
460
  logger.error("Validation error for record: %s ", record["data_name"])
554
461
  logger.error(e)
555
- except:
556
- logger.error("Error adding record: %s ", record["data_name"])
462
+ except Exception as exc:
463
+ logger.error(
464
+ "Error adding record: %s ", record.get("data_name", "<unknown>")
465
+ )
466
+ logger.debug("Add operation failed", exc_info=exc)
467
+
468
+ def _update_request(self, record: dict) -> UpdateOne:
469
+ """Create a MongoDB update request for a record.
557
470
 
558
- def _update_request(self, record: dict):
559
- """Internal helper method to create a MongoDB update request for a record."""
471
+ Parameters
472
+ ----------
473
+ record : dict
474
+ The record to update.
475
+
476
+ Returns
477
+ -------
478
+ UpdateOne
479
+ A PyMongo ``UpdateOne`` object.
480
+
481
+ """
560
482
  return UpdateOne({"data_name": record["data_name"]}, {"$set": record})
561
483
 
562
- def update(self, record: dict):
484
+ def update(self, record: dict) -> None:
563
485
  """Update a single record in the MongoDB collection.
564
486
 
565
487
  Parameters
@@ -572,62 +494,88 @@ class EEGDash:
572
494
  self.__collection.update_one(
573
495
  {"data_name": record["data_name"]}, {"$set": record}
574
496
  )
575
- except: # silent failure
576
- logger.error("Error updating record: %s", record["data_name"])
497
+ except Exception as exc: # log and continue
498
+ logger.error(
499
+ "Error updating record: %s", record.get("data_name", "<unknown>")
500
+ )
501
+ logger.debug("Update operation failed", exc_info=exc)
577
502
 
578
503
  def exists(self, query: dict[str, Any]) -> bool:
579
- """Alias for :meth:`exist` provided for API clarity."""
504
+ """Check if at least one record matches the query.
505
+
506
+ This is an alias for :meth:`exist`.
507
+
508
+ Parameters
509
+ ----------
510
+ query : dict
511
+ MongoDB query to check for existence.
512
+
513
+ Returns
514
+ -------
515
+ bool
516
+ True if a matching record exists, False otherwise.
517
+
518
+ """
580
519
  return self.exist(query)
581
520
 
582
- def remove_field(self, record, field):
583
- """Remove a specific field from a record in the MongoDB collection.
521
+ def remove_field(self, record: dict, field: str) -> None:
522
+ """Remove a field from a specific record in the MongoDB collection.
584
523
 
585
524
  Parameters
586
525
  ----------
587
526
  record : dict
588
- Record identifying object with ``data_name``.
527
+ Record-identifying object with a ``data_name`` key.
589
528
  field : str
590
- Field name to remove.
529
+ The name of the field to remove.
591
530
 
592
531
  """
593
532
  self.__collection.update_one(
594
533
  {"data_name": record["data_name"]}, {"$unset": {field: 1}}
595
534
  )
596
535
 
597
- def remove_field_from_db(self, field):
598
- """Remove a field from all records (destructive).
536
+ def remove_field_from_db(self, field: str) -> None:
537
+ """Remove a field from all records in the database.
538
+
539
+ .. warning::
540
+ This is a destructive operation and cannot be undone.
599
541
 
600
542
  Parameters
601
543
  ----------
602
544
  field : str
603
- Field name to remove from every document.
545
+ The name of the field to remove from all documents.
604
546
 
605
547
  """
606
548
  self.__collection.update_many({}, {"$unset": {field: 1}})
607
549
 
608
550
  @property
609
551
  def collection(self):
610
- """Return the MongoDB collection object."""
611
- return self.__collection
552
+ """The underlying PyMongo ``Collection`` object.
612
553
 
613
- def close(self):
614
- """Backward-compatibility no-op; connections are managed globally.
554
+ Returns
555
+ -------
556
+ pymongo.collection.Collection
557
+ The collection object used for database interactions.
615
558
 
616
- Notes
617
- -----
618
- Connections are managed by :class:`MongoConnectionManager`. Use
619
- :meth:`close_all_connections` to explicitly close all clients.
559
+ """
560
+ return self.__collection
561
+
562
+ def close(self) -> None:
563
+ """Close the MongoDB connection.
620
564
 
565
+ .. deprecated:: 0.1
566
+ Connections are now managed globally by :class:`MongoConnectionManager`.
567
+ This method is a no-op and will be removed in a future version.
568
+ Use :meth:`EEGDash.close_all_connections` to close all clients.
621
569
  """
622
570
  # Individual instances no longer close the shared client
623
571
  pass
624
572
 
625
573
  @classmethod
626
- def close_all_connections(cls):
627
- """Close all MongoDB client connections managed by the singleton."""
574
+ def close_all_connections(cls) -> None:
575
+ """Close all MongoDB client connections managed by the singleton manager."""
628
576
  MongoConnectionManager.close_all()
629
577
 
630
- def __del__(self):
578
+ def __del__(self) -> None:
631
579
  """Destructor; no explicit action needed due to global connection manager."""
632
580
  # No longer needed since we're using singleton pattern
633
581
  pass
@@ -640,22 +588,59 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
640
588
 
641
589
  Examples
642
590
  --------
643
- # Find by single subject
644
- >>> ds = EEGDashDataset(dataset="ds005505", subject="NDARCA153NKE")
645
-
646
- # Find by a list of subjects and a specific task
647
- >>> subjects = ["NDARCA153NKE", "NDARXT792GY8"]
648
- >>> ds = EEGDashDataset(dataset="ds005505", subject=subjects, task="RestingState")
649
-
650
- # Use a raw MongoDB query for advanced filtering
651
- >>> raw_query = {"dataset": "ds005505", "subject": {"$in": subjects}}
652
- >>> ds = EEGDashDataset(query=raw_query)
591
+ Basic usage with dataset and subject filtering:
592
+
593
+ >>> from eegdash import EEGDashDataset
594
+ >>> dataset = EEGDashDataset(
595
+ ... cache_dir="./data",
596
+ ... dataset="ds002718",
597
+ ... subject="012"
598
+ ... )
599
+ >>> print(f"Number of recordings: {len(dataset)}")
600
+
601
+ Filter by multiple subjects and specific task:
602
+
603
+ >>> subjects = ["012", "013", "014"]
604
+ >>> dataset = EEGDashDataset(
605
+ ... cache_dir="./data",
606
+ ... dataset="ds002718",
607
+ ... subject=subjects,
608
+ ... task="RestingState"
609
+ ... )
610
+
611
+ Load and inspect EEG data from recordings:
612
+
613
+ >>> if len(dataset) > 0:
614
+ ... recording = dataset[0]
615
+ ... raw = recording.load()
616
+ ... print(f"Sampling rate: {raw.info['sfreq']} Hz")
617
+ ... print(f"Number of channels: {len(raw.ch_names)}")
618
+ ... print(f"Duration: {raw.times[-1]:.1f} seconds")
619
+
620
+ Advanced filtering with raw MongoDB queries:
621
+
622
+ >>> from eegdash import EEGDashDataset
623
+ >>> query = {
624
+ ... "dataset": "ds002718",
625
+ ... "subject": {"$in": ["012", "013"]},
626
+ ... "task": "RestingState"
627
+ ... }
628
+ >>> dataset = EEGDashDataset(cache_dir="./data", query=query)
629
+
630
+ Working with dataset collections and braindecode integration:
631
+
632
+ >>> # EEGDashDataset is a braindecode BaseConcatDataset
633
+ >>> for i, recording in enumerate(dataset):
634
+ ... if i >= 2: # limit output
635
+ ... break
636
+ ... print(f"Recording {i}: {recording.description}")
637
+ ... raw = recording.load()
638
+ ... print(f" Channels: {len(raw.ch_names)}, Duration: {raw.times[-1]:.1f}s")
653
639
 
654
640
  Parameters
655
641
  ----------
656
642
  cache_dir : str | Path
657
- Directory where data are cached locally. If not specified, a default
658
- cache directory under the user cache is used.
643
+ Directory where data are cached locally.
659
644
  query : dict | None
660
645
  Raw MongoDB query to filter records. If provided, it is merged with
661
646
  keyword filtering arguments (see ``**kwargs``) using logical AND.
@@ -726,13 +711,21 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
726
711
  self.records = records
727
712
  self.download = download
728
713
  self.n_jobs = n_jobs
729
- self.eeg_dash_instance = eeg_dash_instance or EEGDash()
714
+ self.eeg_dash_instance = eeg_dash_instance
730
715
 
731
- # Resolve a unified cache directory across code/tests/CI
732
- self.cache_dir = Path(cache_dir or get_default_cache_dir())
716
+ self.cache_dir = cache_dir
717
+ if self.cache_dir == "" or self.cache_dir is None:
718
+ self.cache_dir = get_default_cache_dir()
719
+ logger.warning(
720
+ f"Cache directory is empty, using the eegdash default path: {self.cache_dir}"
721
+ )
722
+
723
+ self.cache_dir = Path(self.cache_dir)
733
724
 
734
725
  if not self.cache_dir.exists():
735
- warn(f"Cache directory does not exist, creating it: {self.cache_dir}")
726
+ logger.warning(
727
+ f"Cache directory does not exist, creating it: {self.cache_dir}"
728
+ )
736
729
  self.cache_dir.mkdir(exist_ok=True, parents=True)
737
730
 
738
731
  # Separate query kwargs from other kwargs passed to the BaseDataset constructor
@@ -772,21 +765,29 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
772
765
  not _suppress_comp_warning
773
766
  and self.query["dataset"] in RELEASE_TO_OPENNEURO_DATASET_MAP.values()
774
767
  ):
775
- warn(
776
- "If you are not participating in the competition, you can ignore this warning!"
777
- "\n\n"
778
- "EEG 2025 Competition Data Notice:\n"
779
- "---------------------------------\n"
780
- " You are loading the dataset that is used in the EEG 2025 Competition:\n"
781
- "IMPORTANT: The data accessed via `EEGDashDataset` is NOT identical to what you get from `EEGChallengeDataset` object directly.\n"
782
- "and it is not what you will use for the competition. Downsampling and filtering were applied to the data"
783
- "to allow more people to participate.\n"
784
- "\n"
785
- "If you are participating in the competition, always use `EEGChallengeDataset` to ensure consistency with the challenge data.\n"
786
- "\n",
787
- UserWarning,
788
- module="eegdash",
768
+ message_text = Text.from_markup(
769
+ "[italic]This notice is only for users who are participating in the [link=https://eeg2025.github.io/]EEG 2025 Competition[/link].[/italic]\n\n"
770
+ "[bold]EEG 2025 Competition Data Notice![/bold]\n"
771
+ "You are loading one of the datasets that is used in competition, but via `EEGDashDataset`.\n\n"
772
+ "[bold red]IMPORTANT[/bold red]: \n"
773
+ "If you download data from `EEGDashDataset`, it is [u]NOT[/u] identical to the official \n"
774
+ "competition data, which is accessed via `EEGChallengeDataset`. "
775
+ "The competition data has been downsampled and filtered.\n\n"
776
+ "[bold]If you are participating in the competition, \nyou must use the `EEGChallengeDataset` object to ensure consistency.[/bold] \n\n"
777
+ "If you are not participating in the competition, you can ignore this message."
789
778
  )
779
+ warning_panel = Panel(
780
+ message_text,
781
+ title="[yellow]EEG 2025 Competition Data Notice[/yellow]",
782
+ subtitle="[cyan]Source: EEGDashDataset[/cyan]",
783
+ border_style="yellow",
784
+ )
785
+
786
+ try:
787
+ Console().print(warning_panel)
788
+ except Exception:
789
+ logger.warning(str(message_text))
790
+
790
791
  if records is not None:
791
792
  self.records = records
792
793
  datasets = [
@@ -848,16 +849,15 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
848
849
  )
849
850
  )
850
851
  elif self.query:
851
- # This is the DB query path that we are improving
852
+ if self.eeg_dash_instance is None:
853
+ self.eeg_dash_instance = EEGDash()
852
854
  datasets = self._find_datasets(
853
855
  query=build_query_from_kwargs(**self.query),
854
856
  description_fields=description_fields,
855
857
  base_dataset_kwargs=base_dataset_kwargs,
856
858
  )
857
859
  # We only need filesystem if we need to access S3
858
- self.filesystem = S3FileSystem(
859
- anon=True, client_kwargs={"region_name": "us-east-2"}
860
- )
860
+ self.filesystem = downloader.get_s3_filesystem()
861
861
  else:
862
862
  raise ValueError(
863
863
  "You must provide either 'records', a 'data_dir', or a query/keyword arguments for filtering."
@@ -870,45 +870,30 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
870
870
  ) -> list[dict]:
871
871
  """Discover local BIDS EEG files and build minimal records.
872
872
 
873
- This helper enumerates EEG recordings under ``dataset_root`` via
874
- ``mne_bids.find_matching_paths`` and applies entity filters to produce a
875
- list of records suitable for ``EEGDashBaseDataset``. No network access
876
- is performed and files are not read.
873
+ Enumerates EEG recordings under ``dataset_root`` using
874
+ ``mne_bids.find_matching_paths`` and applies entity filters to produce
875
+ records suitable for :class:`EEGDashBaseDataset`. No network access is
876
+ performed, and files are not read.
877
877
 
878
878
  Parameters
879
879
  ----------
880
880
  dataset_root : Path
881
- Local dataset directory. May be the plain dataset folder (e.g.,
882
- ``ds005509``) or a suffixed cache variant (e.g.,
883
- ``ds005509-bdf-mini``).
884
- filters : dict of {str, Any}
885
- Query filters. Must include ``'dataset'`` with the dataset id (without
886
- local suffixes). May include BIDS entities ``'subject'``,
887
- ``'session'``, ``'task'``, and ``'run'``. Each value can be a scalar
888
- or a sequence of scalars.
881
+ Local dataset directory (e.g., ``/path/to/cache/ds005509``).
882
+ filters : dict
883
+ Query filters. Must include ``'dataset'`` and may include BIDS
884
+ entities like ``'subject'``, ``'session'``, etc.
889
885
 
890
886
  Returns
891
887
  -------
892
- records : list of dict
893
- One record per matched EEG file with at least:
894
-
895
- - ``'data_name'``
896
- - ``'dataset'`` (dataset id, without suffixes)
897
- - ``'bidspath'`` (normalized to start with the dataset id)
898
- - ``'subject'``, ``'session'``, ``'task'``, ``'run'`` (may be None)
899
- - ``'bidsdependencies'`` (empty list)
900
- - ``'modality'`` (``"eeg"``)
901
- - ``'sampling_frequency'``, ``'nchans'``, ``'ntimes'`` (minimal
902
- defaults for offline usage)
888
+ list of dict
889
+ A list of records, one for each matched EEG file. Each record
890
+ contains BIDS entities, paths, and minimal metadata for offline use.
903
891
 
904
892
  Notes
905
893
  -----
906
- - Matching uses ``datatypes=['eeg']`` and ``suffixes=['eeg']``.
907
- - ``bidspath`` is constructed as
908
- ``<dataset_id> / <relative_path_from_dataset_root>`` to ensure the
909
- first path component is the dataset id (without local cache suffixes).
910
- - Minimal defaults are set for ``sampling_frequency``, ``nchans``, and
911
- ``ntimes`` to satisfy dataset length requirements offline.
894
+ Matching is performed for ``datatypes=['eeg']`` and ``suffixes=['eeg']``.
895
+ The ``bidspath`` is normalized to ensure it starts with the dataset ID,
896
+ even for suffixed cache directories.
912
897
 
913
898
  """
914
899
  dataset_id = filters["dataset"]
@@ -970,10 +955,22 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
970
955
  return records_out
971
956
 
972
957
  def _find_key_in_nested_dict(self, data: Any, target_key: str) -> Any:
973
- """Recursively search for target_key in nested dicts/lists with normalized matching.
958
+ """Recursively search for a key in nested dicts/lists.
959
+
960
+ Performs a case-insensitive and underscore/hyphen-agnostic search.
961
+
962
+ Parameters
963
+ ----------
964
+ data : Any
965
+ The nested data structure (dicts, lists) to search.
966
+ target_key : str
967
+ The key to search for.
968
+
969
+ Returns
970
+ -------
971
+ Any
972
+ The value of the first matching key, or None if not found.
974
973
 
975
- This makes lookups tolerant to naming differences like "p-factor" vs "p_factor".
976
- Returns the first match or None.
977
974
  """
978
975
  norm_target = normalize_key(target_key)
979
976
  if isinstance(data, dict):
@@ -996,23 +993,25 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
996
993
  description_fields: list[str],
997
994
  base_dataset_kwargs: dict,
998
995
  ) -> list[EEGDashBaseDataset]:
999
- """Helper method to find datasets in the MongoDB collection that satisfy the
1000
- given query and return them as a list of EEGDashBaseDataset objects.
996
+ """Find and construct datasets from a MongoDB query.
997
+
998
+ Queries the database, then creates a list of
999
+ :class:`EEGDashBaseDataset` objects from the results.
1001
1000
 
1002
1001
  Parameters
1003
1002
  ----------
1004
- query : dict
1005
- The query object, as in EEGDash.find().
1006
- description_fields : list[str]
1007
- A list of fields to be extracted from the dataset records and included in
1008
- the returned dataset description(s).
1009
- kwargs: additional keyword arguments to be passed to the EEGDashBaseDataset
1010
- constructor.
1003
+ query : dict, optional
1004
+ The MongoDB query to execute.
1005
+ description_fields : list of str
1006
+ Fields to extract from each record for the dataset description.
1007
+ base_dataset_kwargs : dict
1008
+ Additional keyword arguments to pass to the
1009
+ :class:`EEGDashBaseDataset` constructor.
1011
1010
 
1012
1011
  Returns
1013
1012
  -------
1014
- list :
1015
- A list of EEGDashBaseDataset objects that match the query.
1013
+ list of EEGDashBaseDataset
1014
+ A list of dataset objects matching the query.
1016
1015
 
1017
1016
  """
1018
1017
  datasets: list[EEGDashBaseDataset] = []
@@ -1043,3 +1042,6 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
1043
1042
  )
1044
1043
  )
1045
1044
  return datasets
1045
+
1046
+
1047
+ __all__ = ["EEGDash", "EEGDashDataset"]