eegdash 0.4.0.dev173498563__py3-none-any.whl → 0.4.1.dev185__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

eegdash/api.py CHANGED
@@ -15,35 +15,20 @@ from pathlib import Path
15
15
  from typing import Any, Mapping
16
16
 
17
17
  import mne
18
- from docstring_inheritance import NumpyDocstringInheritanceInitMeta
19
- from dotenv import load_dotenv
20
- from mne_bids import find_matching_paths
18
+ from mne.utils import _soft_import
21
19
  from pymongo import InsertOne, UpdateOne
22
- from rich.console import Console
23
- from rich.panel import Panel
24
- from rich.text import Text
25
20
 
26
- from braindecode.datasets import BaseConcatDataset
27
-
28
- from . import downloader
29
21
  from .bids_eeg_metadata import (
30
22
  build_query_from_kwargs,
31
23
  load_eeg_attrs_from_bids_file,
32
- merge_participants_fields,
33
- normalize_key,
34
24
  )
35
25
  from .const import (
36
26
  ALLOWED_QUERY_FIELDS,
37
- RELEASE_TO_OPENNEURO_DATASET_MAP,
38
27
  )
39
28
  from .const import config as data_config
40
- from .data_utils import (
41
- EEGBIDSDataset,
42
- EEGDashBaseDataset,
43
- )
29
+ from .dataset.bids_dataset import EEGBIDSDataset
44
30
  from .logging import logger
45
31
  from .mongodb import MongoConnectionManager
46
- from .paths import get_default_cache_dir
47
32
  from .utils import _init_mongo_client
48
33
 
49
34
 
@@ -89,7 +74,8 @@ class EEGDash:
89
74
  except Exception:
90
75
  DB_CONNECTION_STRING = None
91
76
  else:
92
- load_dotenv()
77
+ dotenv = _soft_import("dotenv", "eegdash[full] is necessary.")
78
+ dotenv.load_dotenv()
93
79
  DB_CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING")
94
80
 
95
81
  # Use singleton to get MongoDB client, database, and collection
@@ -212,17 +198,22 @@ class EEGDash:
212
198
  return doc is not None
213
199
 
214
200
  def _validate_input(self, record: dict[str, Any]) -> dict[str, Any]:
215
- """Internal method to validate the input record against the expected schema.
201
+ """Validate the input record against the expected schema.
216
202
 
217
203
  Parameters
218
204
  ----------
219
- record: dict
205
+ record : dict
220
206
  A dictionary representing the EEG data record to be validated.
221
207
 
222
208
  Returns
223
209
  -------
224
- dict:
225
- Returns the record itself on success, or raises a ValueError if the record is invalid.
210
+ dict
211
+ The record itself on success.
212
+
213
+ Raises
214
+ ------
215
+ ValueError
216
+ If the record is missing required keys or has values of the wrong type.
226
217
 
227
218
  """
228
219
  input_types = {
@@ -252,20 +243,44 @@ class EEGDash:
252
243
  return record
253
244
 
254
245
  def _build_query_from_kwargs(self, **kwargs) -> dict[str, Any]:
255
- """Internal helper to build a validated MongoDB query from keyword args.
246
+ """Build a validated MongoDB query from keyword arguments.
247
+
248
+ This delegates to the module-level builder used across the package.
249
+
250
+ Parameters
251
+ ----------
252
+ **kwargs
253
+ Keyword arguments to convert into a MongoDB query.
254
+
255
+ Returns
256
+ -------
257
+ dict
258
+ A MongoDB query dictionary.
256
259
 
257
- This delegates to the module-level builder used across the package and
258
- is exposed here for testing and convenience.
259
260
  """
260
261
  return build_query_from_kwargs(**kwargs)
261
262
 
262
- # --- Query merging and conflict detection helpers ---
263
- def _extract_simple_constraint(self, query: dict[str, Any], key: str):
263
+ def _extract_simple_constraint(
264
+ self, query: dict[str, Any], key: str
265
+ ) -> tuple[str, Any] | None:
264
266
  """Extract a simple constraint for a given key from a query dict.
265
267
 
266
- Supports only top-level equality (key: value) and $in (key: {"$in": [...]})
267
- constraints. Returns a tuple (kind, value) where kind is "eq" or "in". If the
268
- key is not present or uses other operators, returns None.
268
+ Supports top-level equality (e.g., ``{'subject': '01'}``) and ``$in``
269
+ (e.g., ``{'subject': {'$in': ['01', '02']}}``) constraints.
270
+
271
+ Parameters
272
+ ----------
273
+ query : dict
274
+ The MongoDB query dictionary.
275
+ key : str
276
+ The key for which to extract the constraint.
277
+
278
+ Returns
279
+ -------
280
+ tuple or None
281
+ A tuple of (kind, value) where kind is "eq" or "in", or None if the
282
+ constraint is not present or unsupported.
283
+
269
284
  """
270
285
  if not isinstance(query, dict) or key not in query:
271
286
  return None
@@ -275,16 +290,28 @@ class EEGDash:
275
290
  return ("in", list(val["$in"]))
276
291
  return None # unsupported operator shape for conflict checking
277
292
  else:
278
- return ("eq", val)
293
+ return "eq", val
279
294
 
280
295
  def _raise_if_conflicting_constraints(
281
296
  self, raw_query: dict[str, Any], kwargs_query: dict[str, Any]
282
297
  ) -> None:
283
- """Raise ValueError if both query sources define incompatible constraints.
298
+ """Raise ValueError if query sources have incompatible constraints.
299
+
300
+ Checks for mutually exclusive constraints on the same field to avoid
301
+ silent empty results.
302
+
303
+ Parameters
304
+ ----------
305
+ raw_query : dict
306
+ The raw MongoDB query dictionary.
307
+ kwargs_query : dict
308
+ The query dictionary built from keyword arguments.
309
+
310
+ Raises
311
+ ------
312
+ ValueError
313
+ If conflicting constraints are found.
284
314
 
285
- We conservatively check only top-level fields with simple equality or $in
286
- constraints. If a field appears in both queries and constraints are mutually
287
- exclusive, raise an explicit error to avoid silent empty result sets.
288
315
  """
289
316
  if not raw_query or not kwargs_query:
290
317
  return
@@ -388,12 +415,31 @@ class EEGDash:
388
415
  logger.info("Upserted: %s", result.upserted_count)
389
416
  logger.info("Errors: %s ", result.bulk_api_result.get("writeErrors", []))
390
417
 
391
- def _add_request(self, record: dict):
392
- """Internal helper method to create a MongoDB insertion request for a record."""
418
+ def _add_request(self, record: dict) -> InsertOne:
419
+ """Create a MongoDB insertion request for a record.
420
+
421
+ Parameters
422
+ ----------
423
+ record : dict
424
+ The record to insert.
425
+
426
+ Returns
427
+ -------
428
+ InsertOne
429
+ A PyMongo ``InsertOne`` object.
430
+
431
+ """
393
432
  return InsertOne(record)
394
433
 
395
- def add(self, record: dict):
396
- """Add a single record to the MongoDB collection."""
434
+ def add(self, record: dict) -> None:
435
+ """Add a single record to the MongoDB collection.
436
+
437
+ Parameters
438
+ ----------
439
+ record : dict
440
+ The record to add.
441
+
442
+ """
397
443
  try:
398
444
  self.__collection.insert_one(record)
399
445
  except ValueError as e:
@@ -405,11 +451,23 @@ class EEGDash:
405
451
  )
406
452
  logger.debug("Add operation failed", exc_info=exc)
407
453
 
408
- def _update_request(self, record: dict):
409
- """Internal helper method to create a MongoDB update request for a record."""
454
+ def _update_request(self, record: dict) -> UpdateOne:
455
+ """Create a MongoDB update request for a record.
456
+
457
+ Parameters
458
+ ----------
459
+ record : dict
460
+ The record to update.
461
+
462
+ Returns
463
+ -------
464
+ UpdateOne
465
+ A PyMongo ``UpdateOne`` object.
466
+
467
+ """
410
468
  return UpdateOne({"data_name": record["data_name"]}, {"$set": record})
411
469
 
412
- def update(self, record: dict):
470
+ def update(self, record: dict) -> None:
413
471
  """Update a single record in the MongoDB collection.
414
472
 
415
473
  Parameters
@@ -429,525 +487,84 @@ class EEGDash:
429
487
  logger.debug("Update operation failed", exc_info=exc)
430
488
 
431
489
  def exists(self, query: dict[str, Any]) -> bool:
432
- """Alias for :meth:`exist` provided for API clarity."""
490
+ """Check if at least one record matches the query.
491
+
492
+ This is an alias for :meth:`exist`.
493
+
494
+ Parameters
495
+ ----------
496
+ query : dict
497
+ MongoDB query to check for existence.
498
+
499
+ Returns
500
+ -------
501
+ bool
502
+ True if a matching record exists, False otherwise.
503
+
504
+ """
433
505
  return self.exist(query)
434
506
 
435
- def remove_field(self, record, field):
436
- """Remove a specific field from a record in the MongoDB collection.
507
+ def remove_field(self, record: dict, field: str) -> None:
508
+ """Remove a field from a specific record in the MongoDB collection.
437
509
 
438
510
  Parameters
439
511
  ----------
440
512
  record : dict
441
- Record identifying object with ``data_name``.
513
+ Record-identifying object with a ``data_name`` key.
442
514
  field : str
443
- Field name to remove.
515
+ The name of the field to remove.
444
516
 
445
517
  """
446
518
  self.__collection.update_one(
447
519
  {"data_name": record["data_name"]}, {"$unset": {field: 1}}
448
520
  )
449
521
 
450
- def remove_field_from_db(self, field):
451
- """Remove a field from all records (destructive).
522
+ def remove_field_from_db(self, field: str) -> None:
523
+ """Remove a field from all records in the database.
524
+
525
+ .. warning::
526
+ This is a destructive operation and cannot be undone.
452
527
 
453
528
  Parameters
454
529
  ----------
455
530
  field : str
456
- Field name to remove from every document.
531
+ The name of the field to remove from all documents.
457
532
 
458
533
  """
459
534
  self.__collection.update_many({}, {"$unset": {field: 1}})
460
535
 
461
536
  @property
462
537
  def collection(self):
463
- """Return the MongoDB collection object."""
464
- return self.__collection
538
+ """The underlying PyMongo ``Collection`` object.
539
+
540
+ Returns
541
+ -------
542
+ pymongo.collection.Collection
543
+ The collection object used for database interactions.
465
544
 
466
- def close(self):
467
- """Backward-compatibility no-op; connections are managed globally.
545
+ """
546
+ return self.__collection
468
547
 
469
- Notes
470
- -----
471
- Connections are managed by :class:`MongoConnectionManager`. Use
472
- :meth:`close_all_connections` to explicitly close all clients.
548
+ def close(self) -> None:
549
+ """Close the MongoDB connection.
473
550
 
551
+ .. deprecated:: 0.1
552
+ Connections are now managed globally by :class:`MongoConnectionManager`.
553
+ This method is a no-op and will be removed in a future version.
554
+ Use :meth:`EEGDash.close_all_connections` to close all clients.
474
555
  """
475
556
  # Individual instances no longer close the shared client
476
557
  pass
477
558
 
478
559
  @classmethod
479
- def close_all_connections(cls):
480
- """Close all MongoDB client connections managed by the singleton."""
560
+ def close_all_connections(cls) -> None:
561
+ """Close all MongoDB client connections managed by the singleton manager."""
481
562
  MongoConnectionManager.close_all()
482
563
 
483
- def __del__(self):
564
+ def __del__(self) -> None:
484
565
  """Destructor; no explicit action needed due to global connection manager."""
485
566
  # No longer needed since we're using singleton pattern
486
567
  pass
487
568
 
488
569
 
489
- class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitMeta):
490
- """Create a new EEGDashDataset from a given query or local BIDS dataset directory
491
- and dataset name. An EEGDashDataset is pooled collection of EEGDashBaseDataset
492
- instances (individual recordings) and is a subclass of braindecode's BaseConcatDataset.
493
-
494
- Examples
495
- --------
496
- Basic usage with dataset and subject filtering:
497
-
498
- >>> from eegdash import EEGDashDataset
499
- >>> dataset = EEGDashDataset(
500
- ... cache_dir="./data",
501
- ... dataset="ds002718",
502
- ... subject="012"
503
- ... )
504
- >>> print(f"Number of recordings: {len(dataset)}")
505
-
506
- Filter by multiple subjects and specific task:
507
-
508
- >>> subjects = ["012", "013", "014"]
509
- >>> dataset = EEGDashDataset(
510
- ... cache_dir="./data",
511
- ... dataset="ds002718",
512
- ... subject=subjects,
513
- ... task="RestingState"
514
- ... )
515
-
516
- Load and inspect EEG data from recordings:
517
-
518
- >>> if len(dataset) > 0:
519
- ... recording = dataset[0]
520
- ... raw = recording.load()
521
- ... print(f"Sampling rate: {raw.info['sfreq']} Hz")
522
- ... print(f"Number of channels: {len(raw.ch_names)}")
523
- ... print(f"Duration: {raw.times[-1]:.1f} seconds")
524
-
525
- Advanced filtering with raw MongoDB queries:
526
-
527
- >>> from eegdash import EEGDashDataset
528
- >>> query = {
529
- ... "dataset": "ds002718",
530
- ... "subject": {"$in": ["012", "013"]},
531
- ... "task": "RestingState"
532
- ... }
533
- >>> dataset = EEGDashDataset(cache_dir="./data", query=query)
534
-
535
- Working with dataset collections and braindecode integration:
536
-
537
- >>> # EEGDashDataset is a braindecode BaseConcatDataset
538
- >>> for i, recording in enumerate(dataset):
539
- ... if i >= 2: # limit output
540
- ... break
541
- ... print(f"Recording {i}: {recording.description}")
542
- ... raw = recording.load()
543
- ... print(f" Channels: {len(raw.ch_names)}, Duration: {raw.times[-1]:.1f}s")
544
-
545
- Parameters
546
- ----------
547
- cache_dir : str | Path
548
- Directory where data are cached locally.
549
- query : dict | None
550
- Raw MongoDB query to filter records. If provided, it is merged with
551
- keyword filtering arguments (see ``**kwargs``) using logical AND.
552
- You must provide at least a ``dataset`` (either in ``query`` or
553
- as a keyword argument). Only fields in ``ALLOWED_QUERY_FIELDS`` are
554
- considered for filtering.
555
- dataset : str
556
- Dataset identifier (e.g., ``"ds002718"``). Required if ``query`` does
557
- not already specify a dataset.
558
- task : str | list[str]
559
- Task name(s) to filter by (e.g., ``"RestingState"``).
560
- subject : str | list[str]
561
- Subject identifier(s) to filter by (e.g., ``"NDARCA153NKE"``).
562
- session : str | list[str]
563
- Session identifier(s) to filter by (e.g., ``"1"``).
564
- run : str | list[str]
565
- Run identifier(s) to filter by (e.g., ``"1"``).
566
- description_fields : list[str]
567
- Fields to extract from each record and include in dataset descriptions
568
- (e.g., "subject", "session", "run", "task").
569
- s3_bucket : str | None
570
- Optional S3 bucket URI (e.g., "s3://mybucket") to use instead of the
571
- default OpenNeuro bucket when downloading data files.
572
- records : list[dict] | None
573
- Pre-fetched metadata records. If provided, the dataset is constructed
574
- directly from these records and no MongoDB query is performed.
575
- download : bool, default True
576
- If False, load from local BIDS files only. Local data are expected
577
- under ``cache_dir / dataset``; no DB or S3 access is attempted.
578
- n_jobs : int
579
- Number of parallel jobs to use where applicable (-1 uses all cores).
580
- eeg_dash_instance : EEGDash | None
581
- Optional existing EEGDash client to reuse for DB queries. If None,
582
- a new client is created on demand, not used in the case of no download.
583
- **kwargs : dict
584
- Additional keyword arguments serving two purposes:
585
-
586
- - Filtering: any keys present in ``ALLOWED_QUERY_FIELDS`` are treated as
587
- query filters (e.g., ``dataset``, ``subject``, ``task``, ...).
588
- - Dataset options: remaining keys are forwarded to
589
- ``EEGDashBaseDataset``.
590
-
591
- """
592
-
593
- def __init__(
594
- self,
595
- cache_dir: str | Path,
596
- query: dict[str, Any] = None,
597
- description_fields: list[str] = [
598
- "subject",
599
- "session",
600
- "run",
601
- "task",
602
- "age",
603
- "gender",
604
- "sex",
605
- ],
606
- s3_bucket: str | None = None,
607
- records: list[dict] | None = None,
608
- download: bool = True,
609
- n_jobs: int = -1,
610
- eeg_dash_instance: EEGDash | None = None,
611
- **kwargs,
612
- ):
613
- # Parameters that don't need validation
614
- _suppress_comp_warning: bool = kwargs.pop("_suppress_comp_warning", False)
615
- self.s3_bucket = s3_bucket
616
- self.records = records
617
- self.download = download
618
- self.n_jobs = n_jobs
619
- self.eeg_dash_instance = eeg_dash_instance
620
-
621
- self.cache_dir = cache_dir
622
- if self.cache_dir == "" or self.cache_dir is None:
623
- self.cache_dir = get_default_cache_dir()
624
- logger.warning(
625
- f"Cache directory is empty, using the eegdash default path: {self.cache_dir}"
626
- )
627
-
628
- self.cache_dir = Path(self.cache_dir)
629
-
630
- if not self.cache_dir.exists():
631
- logger.warning(
632
- f"Cache directory does not exist, creating it: {self.cache_dir}"
633
- )
634
- self.cache_dir.mkdir(exist_ok=True, parents=True)
635
-
636
- # Separate query kwargs from other kwargs passed to the BaseDataset constructor
637
- self.query = query or {}
638
- self.query.update(
639
- {k: v for k, v in kwargs.items() if k in ALLOWED_QUERY_FIELDS}
640
- )
641
- base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in self.query}
642
- if "dataset" not in self.query:
643
- # If explicit records are provided, infer dataset from records
644
- if isinstance(records, list) and records and isinstance(records[0], dict):
645
- inferred = records[0].get("dataset")
646
- if inferred:
647
- self.query["dataset"] = inferred
648
- else:
649
- raise ValueError("You must provide a 'dataset' argument")
650
- else:
651
- raise ValueError("You must provide a 'dataset' argument")
652
-
653
- # Decide on a dataset subfolder name for cache isolation. If using
654
- # challenge/preprocessed buckets (e.g., BDF, mini subsets), append
655
- # informative suffixes to avoid overlapping with the original dataset.
656
- dataset_folder = self.query["dataset"]
657
- if self.s3_bucket:
658
- suffixes: list[str] = []
659
- bucket_lower = str(self.s3_bucket).lower()
660
- if "bdf" in bucket_lower:
661
- suffixes.append("bdf")
662
- if "mini" in bucket_lower:
663
- suffixes.append("mini")
664
- if suffixes:
665
- dataset_folder = f"{dataset_folder}-{'-'.join(suffixes)}"
666
-
667
- self.data_dir = self.cache_dir / dataset_folder
668
-
669
- if (
670
- not _suppress_comp_warning
671
- and self.query["dataset"] in RELEASE_TO_OPENNEURO_DATASET_MAP.values()
672
- ):
673
- message_text = Text.from_markup(
674
- "[italic]This notice is only for users who are participating in the [link=https://eeg2025.github.io/]EEG 2025 Competition[/link].[/italic]\n\n"
675
- "[bold]EEG 2025 Competition Data Notice![/bold]\n"
676
- "You are loading one of the datasets that is used in competition, but via `EEGDashDataset`.\n\n"
677
- "[bold red]IMPORTANT[/bold red]: \n"
678
- "If you download data from `EEGDashDataset`, it is [u]NOT[/u] identical to the official \n"
679
- "competition data, which is accessed via `EEGChallengeDataset`. "
680
- "The competition data has been downsampled and filtered.\n\n"
681
- "[bold]If you are participating in the competition, \nyou must use the `EEGChallengeDataset` object to ensure consistency.[/bold] \n\n"
682
- "If you are not participating in the competition, you can ignore this message."
683
- )
684
- warning_panel = Panel(
685
- message_text,
686
- title="[yellow]EEG 2025 Competition Data Notice[/yellow]",
687
- subtitle="[cyan]Source: EEGDashDataset[/cyan]",
688
- border_style="yellow",
689
- )
690
-
691
- try:
692
- Console().print(warning_panel)
693
- except Exception:
694
- logger.warning(str(message_text))
695
-
696
- if records is not None:
697
- self.records = records
698
- datasets = [
699
- EEGDashBaseDataset(
700
- record,
701
- self.cache_dir,
702
- self.s3_bucket,
703
- **base_dataset_kwargs,
704
- )
705
- for record in self.records
706
- ]
707
- elif not download: # only assume local data is complete if not downloading
708
- if not self.data_dir.exists():
709
- raise ValueError(
710
- f"Offline mode is enabled, but local data_dir {self.data_dir} does not exist."
711
- )
712
- records = self._find_local_bids_records(self.data_dir, self.query)
713
- # Try to enrich from local participants.tsv to restore requested fields
714
- try:
715
- bids_ds = EEGBIDSDataset(
716
- data_dir=str(self.data_dir), dataset=self.query["dataset"]
717
- ) # type: ignore[index]
718
- except Exception:
719
- bids_ds = None
720
-
721
- datasets = []
722
- for record in records:
723
- # Start with entity values from filename
724
- desc: dict[str, Any] = {
725
- k: record.get(k)
726
- for k in ("subject", "session", "run", "task")
727
- if record.get(k) is not None
728
- }
729
-
730
- if bids_ds is not None:
731
- try:
732
- rel_from_dataset = Path(record["bidspath"]).relative_to(
733
- record["dataset"]
734
- ) # type: ignore[index]
735
- local_file = (self.data_dir / rel_from_dataset).as_posix()
736
- part_row = bids_ds.subject_participant_tsv(local_file)
737
- desc = merge_participants_fields(
738
- description=desc,
739
- participants_row=part_row
740
- if isinstance(part_row, dict)
741
- else None,
742
- description_fields=description_fields,
743
- )
744
- except Exception:
745
- pass
746
-
747
- datasets.append(
748
- EEGDashBaseDataset(
749
- record=record,
750
- cache_dir=self.cache_dir,
751
- s3_bucket=self.s3_bucket,
752
- description=desc,
753
- **base_dataset_kwargs,
754
- )
755
- )
756
- elif self.query:
757
- if self.eeg_dash_instance is None:
758
- self.eeg_dash_instance = EEGDash()
759
- datasets = self._find_datasets(
760
- query=build_query_from_kwargs(**self.query),
761
- description_fields=description_fields,
762
- base_dataset_kwargs=base_dataset_kwargs,
763
- )
764
- # We only need filesystem if we need to access S3
765
- self.filesystem = downloader.get_s3_filesystem()
766
- else:
767
- raise ValueError(
768
- "You must provide either 'records', a 'data_dir', or a query/keyword arguments for filtering."
769
- )
770
-
771
- super().__init__(datasets)
772
-
773
- def _find_local_bids_records(
774
- self, dataset_root: Path, filters: dict[str, Any]
775
- ) -> list[dict]:
776
- """Discover local BIDS EEG files and build minimal records.
777
-
778
- This helper enumerates EEG recordings under ``dataset_root`` via
779
- ``mne_bids.find_matching_paths`` and applies entity filters to produce a
780
- list of records suitable for ``EEGDashBaseDataset``. No network access
781
- is performed and files are not read.
782
-
783
- Parameters
784
- ----------
785
- dataset_root : Path
786
- Local dataset directory. May be the plain dataset folder (e.g.,
787
- ``ds005509``) or a suffixed cache variant (e.g.,
788
- ``ds005509-bdf-mini``).
789
- filters : dict of {str, Any}
790
- Query filters. Must include ``'dataset'`` with the dataset id (without
791
- local suffixes). May include BIDS entities ``'subject'``,
792
- ``'session'``, ``'task'``, and ``'run'``. Each value can be a scalar
793
- or a sequence of scalars.
794
-
795
- Returns
796
- -------
797
- records : list of dict
798
- One record per matched EEG file with at least:
799
-
800
- - ``'data_name'``
801
- - ``'dataset'`` (dataset id, without suffixes)
802
- - ``'bidspath'`` (normalized to start with the dataset id)
803
- - ``'subject'``, ``'session'``, ``'task'``, ``'run'`` (may be None)
804
- - ``'bidsdependencies'`` (empty list)
805
- - ``'modality'`` (``"eeg"``)
806
- - ``'sampling_frequency'``, ``'nchans'``, ``'ntimes'`` (minimal
807
- defaults for offline usage)
808
-
809
- Notes
810
- -----
811
- - Matching uses ``datatypes=['eeg']`` and ``suffixes=['eeg']``.
812
- - ``bidspath`` is constructed as
813
- ``<dataset_id> / <relative_path_from_dataset_root>`` to ensure the
814
- first path component is the dataset id (without local cache suffixes).
815
- - Minimal defaults are set for ``sampling_frequency``, ``nchans``, and
816
- ``ntimes`` to satisfy dataset length requirements offline.
817
-
818
- """
819
- dataset_id = filters["dataset"]
820
- arg_map = {
821
- "subjects": "subject",
822
- "sessions": "session",
823
- "tasks": "task",
824
- "runs": "run",
825
- }
826
- matching_args: dict[str, list[str]] = {}
827
- for finder_key, entity_key in arg_map.items():
828
- entity_val = filters.get(entity_key)
829
- if entity_val is None:
830
- continue
831
- if isinstance(entity_val, (list, tuple, set)):
832
- entity_vals = list(entity_val)
833
- if not entity_vals:
834
- continue
835
- matching_args[finder_key] = entity_vals
836
- else:
837
- matching_args[finder_key] = [entity_val]
838
-
839
- matched_paths = find_matching_paths(
840
- root=str(dataset_root),
841
- datatypes=["eeg"],
842
- suffixes=["eeg"],
843
- ignore_json=True,
844
- **matching_args,
845
- )
846
- records_out: list[dict] = []
847
-
848
- for bids_path in matched_paths:
849
- # Build bidspath as dataset_id / relative_path_from_dataset_root (POSIX)
850
- rel_from_root = (
851
- Path(bids_path.fpath)
852
- .resolve()
853
- .relative_to(Path(bids_path.root).resolve())
854
- )
855
- bidspath = f"{dataset_id}/{rel_from_root.as_posix()}"
856
-
857
- rec = {
858
- "data_name": f"{dataset_id}_{Path(bids_path.fpath).name}",
859
- "dataset": dataset_id,
860
- "bidspath": bidspath,
861
- "subject": (bids_path.subject or None),
862
- "session": (bids_path.session or None),
863
- "task": (bids_path.task or None),
864
- "run": (bids_path.run or None),
865
- # minimal fields to satisfy BaseDataset from eegdash
866
- "bidsdependencies": [], # not needed to just run.
867
- "modality": "eeg",
868
- # minimal numeric defaults for offline length calculation
869
- "sampling_frequency": None,
870
- "nchans": None,
871
- "ntimes": None,
872
- }
873
- records_out.append(rec)
874
-
875
- return records_out
876
-
877
- def _find_key_in_nested_dict(self, data: Any, target_key: str) -> Any:
878
- """Recursively search for target_key in nested dicts/lists with normalized matching.
879
-
880
- This makes lookups tolerant to naming differences like "p-factor" vs "p_factor".
881
- Returns the first match or None.
882
- """
883
- norm_target = normalize_key(target_key)
884
- if isinstance(data, dict):
885
- for k, v in data.items():
886
- if normalize_key(k) == norm_target:
887
- return v
888
- res = self._find_key_in_nested_dict(v, target_key)
889
- if res is not None:
890
- return res
891
- elif isinstance(data, list):
892
- for item in data:
893
- res = self._find_key_in_nested_dict(item, target_key)
894
- if res is not None:
895
- return res
896
- return None
897
-
898
- def _find_datasets(
899
- self,
900
- query: dict[str, Any] | None,
901
- description_fields: list[str],
902
- base_dataset_kwargs: dict,
903
- ) -> list[EEGDashBaseDataset]:
904
- """Helper method to find datasets in the MongoDB collection that satisfy the
905
- given query and return them as a list of EEGDashBaseDataset objects.
906
-
907
- Parameters
908
- ----------
909
- query : dict
910
- The query object, as in EEGDash.find().
911
- description_fields : list[str]
912
- A list of fields to be extracted from the dataset records and included in
913
- the returned dataset description(s).
914
- kwargs: additional keyword arguments to be passed to the EEGDashBaseDataset
915
- constructor.
916
-
917
- Returns
918
- -------
919
- list :
920
- A list of EEGDashBaseDataset objects that match the query.
921
-
922
- """
923
- datasets: list[EEGDashBaseDataset] = []
924
- self.records = self.eeg_dash_instance.find(query)
925
-
926
- for record in self.records:
927
- description: dict[str, Any] = {}
928
- # Requested fields first (normalized matching)
929
- for field in description_fields:
930
- value = self._find_key_in_nested_dict(record, field)
931
- if value is not None:
932
- description[field] = value
933
- # Merge all participants.tsv columns generically
934
- part = self._find_key_in_nested_dict(record, "participant_tsv")
935
- if isinstance(part, dict):
936
- description = merge_participants_fields(
937
- description=description,
938
- participants_row=part,
939
- description_fields=description_fields,
940
- )
941
- datasets.append(
942
- EEGDashBaseDataset(
943
- record,
944
- cache_dir=self.cache_dir,
945
- s3_bucket=self.s3_bucket,
946
- description=description,
947
- **base_dataset_kwargs,
948
- )
949
- )
950
- return datasets
951
-
952
-
953
- __all__ = ["EEGDash", "EEGDashDataset"]
570
+ __all__ = ["EEGDash"]