eegdash 0.3.9.dev182388821__py3-none-any.whl → 0.4.0.dev144__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

eegdash/__init__.py CHANGED
@@ -1,3 +1,14 @@
1
+ # Authors: The EEGDash contributors.
2
+ # License: GNU General Public License
3
+ # Copyright the EEGDash contributors.
4
+
5
+ """EEGDash: A comprehensive platform for EEG data management and analysis.
6
+
7
+ EEGDash provides a unified interface for accessing, querying, and analyzing large-scale
8
+ EEG datasets. It integrates with cloud storage, MongoDB databases, and machine learning
9
+ frameworks to streamline EEG research workflows.
10
+ """
11
+
1
12
  from .api import EEGDash, EEGDashDataset
2
13
  from .dataset import EEGChallengeDataset
3
14
  from .hbn import preprocessing
@@ -7,4 +18,4 @@ _init_mongo_client()
7
18
 
8
19
  __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset", "preprocessing"]
9
20
 
10
- __version__ = "0.3.9.dev182388821"
21
+ __version__ = "0.4.0.dev144"
eegdash/api.py CHANGED
@@ -1,9 +1,18 @@
1
- import logging
1
+ # Authors: The EEGDash contributors.
2
+ # License: GNU General Public License
3
+ # Copyright the EEGDash contributors.
4
+
5
+ """High-level interface to the EEGDash metadata database.
6
+
7
+ This module provides the main EEGDash class which serves as the primary entry point for
8
+ interacting with the EEGDash ecosystem. It offers methods to query, insert, and update
9
+ metadata records stored in the EEGDash MongoDB database, and includes utilities to load
10
+ EEG data from S3 for matched records.
11
+ """
12
+
2
13
  import os
3
- import tempfile
4
14
  from pathlib import Path
5
15
  from typing import Any, Mapping
6
- from urllib.parse import urlsplit
7
16
 
8
17
  import mne
9
18
  import numpy as np
@@ -11,13 +20,15 @@ import xarray as xr
11
20
  from docstring_inheritance import NumpyDocstringInheritanceInitMeta
12
21
  from dotenv import load_dotenv
13
22
  from joblib import Parallel, delayed
14
- from mne.utils import warn
15
23
  from mne_bids import find_matching_paths, get_bids_path_from_fname, read_raw_bids
16
24
  from pymongo import InsertOne, UpdateOne
17
- from s3fs import S3FileSystem
25
+ from rich.console import Console
26
+ from rich.panel import Panel
27
+ from rich.text import Text
18
28
 
19
29
  from braindecode.datasets import BaseConcatDataset
20
30
 
31
+ from . import downloader
21
32
  from .bids_eeg_metadata import (
22
33
  build_query_from_kwargs,
23
34
  load_eeg_attrs_from_bids_file,
@@ -33,10 +44,10 @@ from .data_utils import (
33
44
  EEGBIDSDataset,
34
45
  EEGDashBaseDataset,
35
46
  )
47
+ from .logging import logger
36
48
  from .mongodb import MongoConnectionManager
37
49
  from .paths import get_default_cache_dir
38
-
39
- logger = logging.getLogger("eegdash")
50
+ from .utils import _init_mongo_client
40
51
 
41
52
 
42
53
  class EEGDash:
@@ -74,19 +85,26 @@ class EEGDash:
74
85
 
75
86
  if self.is_public:
76
87
  DB_CONNECTION_STRING = mne.utils.get_config("EEGDASH_DB_URI")
88
+ if not DB_CONNECTION_STRING:
89
+ try:
90
+ _init_mongo_client()
91
+ DB_CONNECTION_STRING = mne.utils.get_config("EEGDASH_DB_URI")
92
+ except Exception:
93
+ DB_CONNECTION_STRING = None
77
94
  else:
78
95
  load_dotenv()
79
96
  DB_CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING")
80
97
 
81
98
  # Use singleton to get MongoDB client, database, and collection
99
+ if not DB_CONNECTION_STRING:
100
+ raise RuntimeError(
101
+ "No MongoDB connection string configured. Set MNE config 'EEGDASH_DB_URI' "
102
+ "or environment variable 'DB_CONNECTION_STRING'."
103
+ )
82
104
  self.__client, self.__db, self.__collection = MongoConnectionManager.get_client(
83
105
  DB_CONNECTION_STRING, is_staging
84
106
  )
85
107
 
86
- self.filesystem = S3FileSystem(
87
- anon=True, client_kwargs={"region_name": "us-east-2"}
88
- )
89
-
90
108
  def find(
91
109
  self, query: dict[str, Any] = None, /, **kwargs
92
110
  ) -> list[Mapping[str, Any]]:
@@ -310,83 +328,6 @@ class EEGDash:
310
328
  f"Conflicting constraints for '{key}': disjoint sets {r_val!r} and {k_val!r}"
311
329
  )
312
330
 
313
- def load_eeg_data_from_s3(self, s3path: str) -> xr.DataArray:
314
- """Load EEG data from an S3 URI into an ``xarray.DataArray``.
315
-
316
- Preserves the original filename, downloads sidecar files when applicable
317
- (e.g., ``.fdt`` for EEGLAB, ``.vmrk``/``.eeg`` for BrainVision), and uses
318
- MNE's direct readers.
319
-
320
- Parameters
321
- ----------
322
- s3path : str
323
- An S3 URI (should start with "s3://").
324
-
325
- Returns
326
- -------
327
- xr.DataArray
328
- EEG data with dimensions ``("channel", "time")``.
329
-
330
- Raises
331
- ------
332
- ValueError
333
- If the file extension is unsupported.
334
-
335
- """
336
- # choose a temp dir so sidecars can be colocated
337
- with tempfile.TemporaryDirectory() as tmpdir:
338
- # Derive local filenames from the S3 key to keep base name consistent
339
- s3_key = urlsplit(s3path).path # e.g., "/dsXXXX/sub-.../..._eeg.set"
340
- basename = Path(s3_key).name
341
- ext = Path(basename).suffix.lower()
342
- local_main = Path(tmpdir) / basename
343
-
344
- # Download main file
345
- with (
346
- self.filesystem.open(s3path, mode="rb") as fsrc,
347
- open(local_main, "wb") as fdst,
348
- ):
349
- fdst.write(fsrc.read())
350
-
351
- # Determine and fetch any required sidecars
352
- sidecars: list[str] = []
353
- if ext == ".set": # EEGLAB
354
- sidecars = [".fdt"]
355
- elif ext == ".vhdr": # BrainVision
356
- sidecars = [".vmrk", ".eeg", ".dat", ".raw"]
357
-
358
- for sc_ext in sidecars:
359
- sc_key = s3_key[: -len(ext)] + sc_ext
360
- sc_uri = f"s3://{urlsplit(s3path).netloc}{sc_key}"
361
- try:
362
- # If sidecar exists, download next to the main file
363
- info = self.filesystem.info(sc_uri)
364
- if info:
365
- sc_local = Path(tmpdir) / Path(sc_key).name
366
- with (
367
- self.filesystem.open(sc_uri, mode="rb") as fsrc,
368
- open(sc_local, "wb") as fdst,
369
- ):
370
- fdst.write(fsrc.read())
371
- except Exception:
372
- # Sidecar not present; skip silently
373
- pass
374
-
375
- # Read using appropriate MNE reader
376
- raw = mne.io.read_raw(str(local_main), preload=True, verbose=False)
377
-
378
- data = raw.get_data()
379
- fs = raw.info["sfreq"]
380
- max_time = data.shape[1] / fs
381
- time_steps = np.linspace(0, max_time, data.shape[1]).squeeze()
382
- channel_names = raw.ch_names
383
-
384
- return xr.DataArray(
385
- data=data,
386
- dims=["channel", "time"],
387
- coords={"time": time_steps, "channel": channel_names},
388
- )
389
-
390
331
  def load_eeg_data_from_bids_file(self, bids_file: str) -> xr.DataArray:
391
332
  """Load EEG data from a local BIDS-formatted file.
392
333
 
@@ -508,39 +449,13 @@ class EEGDash:
508
449
  results = Parallel(
509
450
  n_jobs=-1 if len(sessions) > 1 else 1, prefer="threads", verbose=1
510
451
  )(
511
- delayed(self.load_eeg_data_from_s3)(self._get_s3path(session))
452
+ delayed(downloader.load_eeg_from_s3)(
453
+ downloader.get_s3path("s3://openneuro.org", session["bidspath"])
454
+ )
512
455
  for session in sessions
513
456
  )
514
457
  return results
515
458
 
516
- def _get_s3path(self, record: Mapping[str, Any] | str) -> str:
517
- """Build an S3 URI from a DB record or a relative path.
518
-
519
- Parameters
520
- ----------
521
- record : dict or str
522
- Either a DB record containing a ``'bidspath'`` key, or a relative
523
- path string under the OpenNeuro bucket.
524
-
525
- Returns
526
- -------
527
- str
528
- Fully qualified S3 URI.
529
-
530
- Raises
531
- ------
532
- ValueError
533
- If a mapping is provided but ``'bidspath'`` is missing.
534
-
535
- """
536
- if isinstance(record, str):
537
- rel = record
538
- else:
539
- rel = record.get("bidspath")
540
- if not rel:
541
- raise ValueError("Record missing 'bidspath' for S3 path resolution")
542
- return f"s3://openneuro.org/{rel}"
543
-
544
459
  def _add_request(self, record: dict):
545
460
  """Internal helper method to create a MongoDB insertion request for a record."""
546
461
  return InsertOne(record)
@@ -552,8 +467,11 @@ class EEGDash:
552
467
  except ValueError as e:
553
468
  logger.error("Validation error for record: %s ", record["data_name"])
554
469
  logger.error(e)
555
- except:
556
- logger.error("Error adding record: %s ", record["data_name"])
470
+ except Exception as exc:
471
+ logger.error(
472
+ "Error adding record: %s ", record.get("data_name", "<unknown>")
473
+ )
474
+ logger.debug("Add operation failed", exc_info=exc)
557
475
 
558
476
  def _update_request(self, record: dict):
559
477
  """Internal helper method to create a MongoDB update request for a record."""
@@ -572,8 +490,11 @@ class EEGDash:
572
490
  self.__collection.update_one(
573
491
  {"data_name": record["data_name"]}, {"$set": record}
574
492
  )
575
- except: # silent failure
576
- logger.error("Error updating record: %s", record["data_name"])
493
+ except Exception as exc: # log and continue
494
+ logger.error(
495
+ "Error updating record: %s", record.get("data_name", "<unknown>")
496
+ )
497
+ logger.debug("Update operation failed", exc_info=exc)
577
498
 
578
499
  def exists(self, query: dict[str, Any]) -> bool:
579
500
  """Alias for :meth:`exist` provided for API clarity."""
@@ -640,22 +561,59 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
640
561
 
641
562
  Examples
642
563
  --------
643
- # Find by single subject
644
- >>> ds = EEGDashDataset(dataset="ds005505", subject="NDARCA153NKE")
645
-
646
- # Find by a list of subjects and a specific task
647
- >>> subjects = ["NDARCA153NKE", "NDARXT792GY8"]
648
- >>> ds = EEGDashDataset(dataset="ds005505", subject=subjects, task="RestingState")
649
-
650
- # Use a raw MongoDB query for advanced filtering
651
- >>> raw_query = {"dataset": "ds005505", "subject": {"$in": subjects}}
652
- >>> ds = EEGDashDataset(query=raw_query)
564
+ Basic usage with dataset and subject filtering:
565
+
566
+ >>> from eegdash import EEGDashDataset
567
+ >>> dataset = EEGDashDataset(
568
+ ... cache_dir="./data",
569
+ ... dataset="ds002718",
570
+ ... subject="012"
571
+ ... )
572
+ >>> print(f"Number of recordings: {len(dataset)}")
573
+
574
+ Filter by multiple subjects and specific task:
575
+
576
+ >>> subjects = ["012", "013", "014"]
577
+ >>> dataset = EEGDashDataset(
578
+ ... cache_dir="./data",
579
+ ... dataset="ds002718",
580
+ ... subject=subjects,
581
+ ... task="RestingState"
582
+ ... )
583
+
584
+ Load and inspect EEG data from recordings:
585
+
586
+ >>> if len(dataset) > 0:
587
+ ... recording = dataset[0]
588
+ ... raw = recording.load()
589
+ ... print(f"Sampling rate: {raw.info['sfreq']} Hz")
590
+ ... print(f"Number of channels: {len(raw.ch_names)}")
591
+ ... print(f"Duration: {raw.times[-1]:.1f} seconds")
592
+
593
+ Advanced filtering with raw MongoDB queries:
594
+
595
+ >>> from eegdash import EEGDashDataset
596
+ >>> query = {
597
+ ... "dataset": "ds002718",
598
+ ... "subject": {"$in": ["012", "013"]},
599
+ ... "task": "RestingState"
600
+ ... }
601
+ >>> dataset = EEGDashDataset(cache_dir="./data", query=query)
602
+
603
+ Working with dataset collections and braindecode integration:
604
+
605
+ >>> # EEGDashDataset is a braindecode BaseConcatDataset
606
+ >>> for i, recording in enumerate(dataset):
607
+ ... if i >= 2: # limit output
608
+ ... break
609
+ ... print(f"Recording {i}: {recording.description}")
610
+ ... raw = recording.load()
611
+ ... print(f" Channels: {len(raw.ch_names)}, Duration: {raw.times[-1]:.1f}s")
653
612
 
654
613
  Parameters
655
614
  ----------
656
615
  cache_dir : str | Path
657
- Directory where data are cached locally. If not specified, a default
658
- cache directory under the user cache is used.
616
+ Directory where data are cached locally.
659
617
  query : dict | None
660
618
  Raw MongoDB query to filter records. If provided, it is merged with
661
619
  keyword filtering arguments (see ``**kwargs``) using logical AND.
@@ -726,13 +684,21 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
726
684
  self.records = records
727
685
  self.download = download
728
686
  self.n_jobs = n_jobs
729
- self.eeg_dash_instance = eeg_dash_instance or EEGDash()
687
+ self.eeg_dash_instance = eeg_dash_instance
688
+
689
+ self.cache_dir = cache_dir
690
+ if self.cache_dir == "" or self.cache_dir is None:
691
+ self.cache_dir = get_default_cache_dir()
692
+ logger.warning(
693
+ f"Cache directory is empty, using the eegdash default path: {self.cache_dir}"
694
+ )
730
695
 
731
- # Resolve a unified cache directory across code/tests/CI
732
- self.cache_dir = Path(cache_dir or get_default_cache_dir())
696
+ self.cache_dir = Path(self.cache_dir)
733
697
 
734
698
  if not self.cache_dir.exists():
735
- warn(f"Cache directory does not exist, creating it: {self.cache_dir}")
699
+ logger.warning(
700
+ f"Cache directory does not exist, creating it: {self.cache_dir}"
701
+ )
736
702
  self.cache_dir.mkdir(exist_ok=True, parents=True)
737
703
 
738
704
  # Separate query kwargs from other kwargs passed to the BaseDataset constructor
@@ -772,21 +738,29 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
772
738
  not _suppress_comp_warning
773
739
  and self.query["dataset"] in RELEASE_TO_OPENNEURO_DATASET_MAP.values()
774
740
  ):
775
- warn(
776
- "If you are not participating in the competition, you can ignore this warning!"
777
- "\n\n"
778
- "EEG 2025 Competition Data Notice:\n"
779
- "---------------------------------\n"
780
- " You are loading the dataset that is used in the EEG 2025 Competition:\n"
781
- "IMPORTANT: The data accessed via `EEGDashDataset` is NOT identical to what you get from `EEGChallengeDataset` object directly.\n"
782
- "and it is not what you will use for the competition. Downsampling and filtering were applied to the data"
783
- "to allow more people to participate.\n"
784
- "\n"
785
- "If you are participating in the competition, always use `EEGChallengeDataset` to ensure consistency with the challenge data.\n"
786
- "\n",
787
- UserWarning,
788
- module="eegdash",
741
+ message_text = Text.from_markup(
742
+ "[italic]This notice is only for users who are participating in the [link=https://eeg2025.github.io/]EEG 2025 Competition[/link].[/italic]\n\n"
743
+ "[bold]EEG 2025 Competition Data Notice![/bold]\n"
744
+ "You are loading one of the datasets that is used in competition, but via `EEGDashDataset`.\n\n"
745
+ "[bold red]IMPORTANT[/bold red]: \n"
746
+ "If you download data from `EEGDashDataset`, it is [u]NOT[/u] identical to the official \n"
747
+ "competition data, which is accessed via `EEGChallengeDataset`. "
748
+ "The competition data has been downsampled and filtered.\n\n"
749
+ "[bold]If you are participating in the competition, \nyou must use the `EEGChallengeDataset` object to ensure consistency.[/bold] \n\n"
750
+ "If you are not participating in the competition, you can ignore this message."
751
+ )
752
+ warning_panel = Panel(
753
+ message_text,
754
+ title="[yellow]EEG 2025 Competition Data Notice[/yellow]",
755
+ subtitle="[cyan]Source: EEGDashDataset[/cyan]",
756
+ border_style="yellow",
789
757
  )
758
+
759
+ try:
760
+ Console().print(warning_panel)
761
+ except Exception:
762
+ logger.warning(str(message_text))
763
+
790
764
  if records is not None:
791
765
  self.records = records
792
766
  datasets = [
@@ -848,16 +822,15 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
848
822
  )
849
823
  )
850
824
  elif self.query:
851
- # This is the DB query path that we are improving
825
+ if self.eeg_dash_instance is None:
826
+ self.eeg_dash_instance = EEGDash()
852
827
  datasets = self._find_datasets(
853
828
  query=build_query_from_kwargs(**self.query),
854
829
  description_fields=description_fields,
855
830
  base_dataset_kwargs=base_dataset_kwargs,
856
831
  )
857
832
  # We only need filesystem if we need to access S3
858
- self.filesystem = S3FileSystem(
859
- anon=True, client_kwargs={"region_name": "us-east-2"}
860
- )
833
+ self.filesystem = downloader.get_s3_filesystem()
861
834
  else:
862
835
  raise ValueError(
863
836
  "You must provide either 'records', a 'data_dir', or a query/keyword arguments for filtering."
@@ -1,18 +1,34 @@
1
- import logging
1
+ # Authors: The EEGDash contributors.
2
+ # License: GNU General Public License
3
+ # Copyright the EEGDash contributors.
4
+
5
+ """BIDS metadata processing and query building utilities.
6
+
7
+ This module provides functions for processing BIDS-formatted EEG metadata, building database
8
+ queries from user parameters, and enriching metadata records with participant information.
9
+ It handles the translation between user-friendly query parameters and MongoDB query syntax.
10
+ """
11
+
2
12
  import re
3
13
  from pathlib import Path
4
14
  from typing import Any
5
15
 
16
+ import pandas as pd
17
+ from mne_bids import BIDSPath
18
+
6
19
  from .const import ALLOWED_QUERY_FIELDS
7
20
  from .const import config as data_config
8
-
9
- logger = logging.getLogger("eegdash")
21
+ from .logging import logger
10
22
 
11
23
  __all__ = [
12
24
  "build_query_from_kwargs",
13
25
  "load_eeg_attrs_from_bids_file",
14
26
  "merge_participants_fields",
15
27
  "normalize_key",
28
+ "participants_row_for_subject",
29
+ "participants_extras_from_tsv",
30
+ "attach_participants_extras",
31
+ "enrich_from_participants",
16
32
  ]
17
33
 
18
34
 
@@ -72,28 +88,6 @@ def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
72
88
  return query
73
89
 
74
90
 
75
- def _get_raw_extensions(bids_file: str, bids_dataset) -> list[str]:
76
- """Helper to find paths to additional "sidecar" files that may be associated
77
- with a given main data file in a BIDS dataset; paths are returned as relative to
78
- the parent dataset path.
79
-
80
- For example, if the input file is a .set file, this will return the relative path
81
- to a corresponding .fdt file (if any).
82
- """
83
- bids_file = Path(bids_file)
84
- extensions = {
85
- ".set": [".set", ".fdt"], # eeglab
86
- ".edf": [".edf"], # european
87
- ".vhdr": [".eeg", ".vhdr", ".vmrk", ".dat", ".raw"], # brainvision
88
- ".bdf": [".bdf"], # biosemi
89
- }
90
- return [
91
- str(bids_dataset._get_relative_bidspath(bids_file.with_suffix(suffix)))
92
- for suffix in extensions[bids_file.suffix]
93
- if bids_file.with_suffix(suffix).exists()
94
- ]
95
-
96
-
97
91
  def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any]:
98
92
  """Build the metadata record for a given BIDS file (single recording) in a BIDS dataset.
99
93
 
@@ -140,7 +134,7 @@ def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any
140
134
  eeg_json = None
141
135
 
142
136
  bids_dependencies_files = data_config["bids_dependencies_files"]
143
- bidsdependencies = []
137
+ bidsdependencies: list[str] = []
144
138
  for extension in bids_dependencies_files:
145
139
  try:
146
140
  dep_path = bids_dataset.get_bids_metadata_files(bids_file, extension)
@@ -151,7 +145,26 @@ def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any
151
145
  except Exception:
152
146
  pass
153
147
 
154
- bidsdependencies.extend(_get_raw_extensions(bids_file, bids_dataset))
148
+ bids_path = BIDSPath(
149
+ subject=bids_dataset.get_bids_file_attribute("subject", bids_file),
150
+ session=bids_dataset.get_bids_file_attribute("session", bids_file),
151
+ task=bids_dataset.get_bids_file_attribute("task", bids_file),
152
+ run=bids_dataset.get_bids_file_attribute("run", bids_file),
153
+ root=bids_dataset.bidsdir,
154
+ datatype=bids_dataset.get_bids_file_attribute("modality", bids_file),
155
+ suffix="eeg",
156
+ extension=Path(bids_file).suffix,
157
+ check=False,
158
+ )
159
+
160
+ sidecars_map = {
161
+ ".set": [".fdt"],
162
+ ".vhdr": [".eeg", ".vmrk", ".dat", ".raw"],
163
+ }
164
+ for ext in sidecars_map.get(bids_path.extension, []):
165
+ sidecar = bids_path.find_matching_sidecar(extension=ext, on_error="ignore")
166
+ if sidecar is not None:
167
+ bidsdependencies.append(str(bids_dataset._get_relative_bidspath(sidecar)))
155
168
 
156
169
  # Define field extraction functions with error handling
157
170
  field_extractors = {
@@ -252,3 +265,123 @@ def merge_participants_fields(
252
265
  if norm_key not in description:
253
266
  description[norm_key] = part_value
254
267
  return description
268
+
269
+
270
+ def participants_row_for_subject(
271
+ bids_root: str | Path,
272
+ subject: str,
273
+ id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
274
+ ) -> pd.Series | None:
275
+ """Load participants.tsv and return the row for a subject.
276
+
277
+ - Accepts either "01" or "sub-01" as the subject identifier.
278
+ - Returns a pandas Series for the first matching row, or None if not found.
279
+ """
280
+ try:
281
+ participants_tsv = Path(bids_root) / "participants.tsv"
282
+ if not participants_tsv.exists():
283
+ return None
284
+
285
+ df = pd.read_csv(
286
+ participants_tsv, sep="\t", dtype="string", keep_default_na=False
287
+ )
288
+ if df.empty:
289
+ return None
290
+
291
+ candidates = {str(subject), f"sub-{subject}"}
292
+ present_cols = [c for c in id_columns if c in df.columns]
293
+ if not present_cols:
294
+ return None
295
+
296
+ mask = pd.Series(False, index=df.index)
297
+ for col in present_cols:
298
+ mask |= df[col].isin(candidates)
299
+ match = df.loc[mask]
300
+ if match.empty:
301
+ return None
302
+ return match.iloc[0]
303
+ except Exception:
304
+ return None
305
+
306
+
307
+ def participants_extras_from_tsv(
308
+ bids_root: str | Path,
309
+ subject: str,
310
+ *,
311
+ id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
312
+ na_like: tuple[str, ...] = ("", "n/a", "na", "nan", "unknown", "none"),
313
+ ) -> dict[str, Any]:
314
+ """Return non-identifier, non-empty participants.tsv fields for a subject.
315
+
316
+ Uses vectorized pandas operations to drop id columns and NA-like values.
317
+ """
318
+ row = participants_row_for_subject(bids_root, subject, id_columns=id_columns)
319
+ if row is None:
320
+ return {}
321
+
322
+ # Drop identifier columns and clean values
323
+ extras = row.drop(labels=[c for c in id_columns if c in row.index], errors="ignore")
324
+ s = extras.astype("string").str.strip()
325
+ valid = ~s.isna() & ~s.str.lower().isin(na_like)
326
+ return s[valid].to_dict()
327
+
328
+
329
+ def attach_participants_extras(
330
+ raw: Any,
331
+ description: Any,
332
+ extras: dict[str, Any],
333
+ ) -> None:
334
+ """Attach extras to Raw.info and dataset description without overwriting.
335
+
336
+ - Adds to ``raw.info['subject_info']['participants_extras']``.
337
+ - Adds to ``description`` if dict or pandas Series (only missing keys).
338
+ """
339
+ if not extras:
340
+ return
341
+
342
+ # Raw.info enrichment
343
+ try:
344
+ subject_info = raw.info.get("subject_info") or {}
345
+ if not isinstance(subject_info, dict):
346
+ subject_info = {}
347
+ pe = subject_info.get("participants_extras") or {}
348
+ if not isinstance(pe, dict):
349
+ pe = {}
350
+ for k, v in extras.items():
351
+ pe.setdefault(k, v)
352
+ subject_info["participants_extras"] = pe
353
+ raw.info["subject_info"] = subject_info
354
+ except Exception:
355
+ pass
356
+
357
+ # Description enrichment
358
+ try:
359
+ import pandas as _pd # local import to avoid hard dependency at import time
360
+
361
+ if isinstance(description, dict):
362
+ for k, v in extras.items():
363
+ description.setdefault(k, v)
364
+ elif isinstance(description, _pd.Series):
365
+ missing = [k for k in extras.keys() if k not in description.index]
366
+ if missing:
367
+ description.loc[missing] = [extras[m] for m in missing]
368
+ except Exception:
369
+ pass
370
+
371
+
372
+ def enrich_from_participants(
373
+ bids_root: str | Path,
374
+ bidspath: BIDSPath,
375
+ raw: Any,
376
+ description: Any,
377
+ ) -> dict[str, Any]:
378
+ """Convenience wrapper: read participants.tsv and attach extras for this subject.
379
+
380
+ Returns the extras dictionary for further use if needed.
381
+ """
382
+ subject = getattr(bidspath, "subject", None)
383
+ if not subject:
384
+ return {}
385
+ extras = participants_extras_from_tsv(bids_root, subject)
386
+ attach_participants_extras(raw, description, extras)
387
+ return extras
eegdash/const.py CHANGED
@@ -1,3 +1,21 @@
1
+ # Authors: The EEGDash contributors.
2
+ # License: GNU General Public License
3
+ # Copyright the EEGDash contributors.
4
+
5
+ """Configuration constants and mappings for EEGDash.
6
+
7
+ This module contains global configuration settings, allowed query fields, and mapping
8
+ constants used throughout the EEGDash package. It defines the interface between EEGDash
9
+ releases and OpenNeuro dataset identifiers, as well as validation rules for database queries.
10
+ """
11
+
12
+ __all__ = [
13
+ "config",
14
+ "ALLOWED_QUERY_FIELDS",
15
+ "RELEASE_TO_OPENNEURO_DATASET_MAP",
16
+ "SUBJECT_MINI_RELEASE_MAP",
17
+ ]
18
+
1
19
  ALLOWED_QUERY_FIELDS = {
2
20
  "data_name",
3
21
  "dataset",