PyPI - eegdash - Versions diffs - 0.3.9.dev182388821__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

eegdash 0.3.9.dev182388821py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eegdash might be problematic. Click here for more details.

Files changed (31) hide show

eegdash/__init__.py +12 -1
eegdash/api.py +297 -295
eegdash/bids_eeg_metadata.py +297 -56
eegdash/const.py +43 -0
eegdash/data_utils.py +327 -430
eegdash/dataset/__init__.py +19 -1
eegdash/dataset/dataset.py +61 -33
eegdash/dataset/dataset_summary.csv +255 -256
eegdash/dataset/registry.py +163 -11
eegdash/downloader.py +197 -0
eegdash/features/datasets.py +323 -138
eegdash/features/decorators.py +88 -3
eegdash/features/extractors.py +203 -55
eegdash/features/feature_bank/complexity.py +7 -3
eegdash/features/feature_bank/dimensionality.py +1 -1
eegdash/features/inspect.py +80 -5
eegdash/features/serialization.py +49 -17
eegdash/features/utils.py +75 -8
eegdash/hbn/__init__.py +11 -0
eegdash/hbn/preprocessing.py +61 -19
eegdash/hbn/windows.py +157 -34
eegdash/logging.py +54 -0
eegdash/mongodb.py +55 -24
eegdash/paths.py +28 -5
eegdash/utils.py +29 -1
{eegdash-0.3.9.dev182388821.dist-info → eegdash-0.4.0.dist-info}/METADATA +11 -59
eegdash-0.4.0.dist-info/RECORD +37 -0
eegdash-0.3.9.dev182388821.dist-info/RECORD +0 -35
{eegdash-0.3.9.dev182388821.dist-info → eegdash-0.4.0.dist-info}/WHEEL +0 -0
{eegdash-0.3.9.dev182388821.dist-info → eegdash-0.4.0.dist-info}/licenses/LICENSE +0 -0
{eegdash-0.3.9.dev182388821.dist-info → eegdash-0.4.0.dist-info}/top_level.txt +0 -0

eegdash/api.py CHANGED Viewed

@@ -1,23 +1,31 @@
-import logging
+# Authors: The EEGDash contributors.
+# License: GNU General Public License
+# Copyright the EEGDash contributors.
+"""High-level interface to the EEGDash metadata database.
+This module provides the main EEGDash class which serves as the primary entry point for
+interacting with the EEGDash ecosystem. It offers methods to query, insert, and update
+metadata records stored in the EEGDash MongoDB database, and includes utilities to load
+EEG data from S3 for matched records.
+"""
 import os
-import tempfile
 from pathlib import Path
 from typing import Any, Mapping
-from urllib.parse import urlsplit
 import mne
-import numpy as np
-import xarray as xr
 from docstring_inheritance import NumpyDocstringInheritanceInitMeta
 from dotenv import load_dotenv
-from joblib import Parallel, delayed
-from mne.utils import warn
-from mne_bids import find_matching_paths, get_bids_path_from_fname, read_raw_bids
+from mne_bids import find_matching_paths
 from pymongo import InsertOne, UpdateOne
-from s3fs import S3FileSystem
+from rich.console import Console
+from rich.panel import Panel
+from rich.text import Text
 from braindecode.datasets import BaseConcatDataset
+from . import downloader
 from .bids_eeg_metadata import (
     build_query_from_kwargs,
     load_eeg_attrs_from_bids_file,
@@ -33,10 +41,10 @@ from .data_utils import (
     EEGBIDSDataset,
     EEGDashBaseDataset,
 )
+from .logging import logger
 from .mongodb import MongoConnectionManager
 from .paths import get_default_cache_dir
-logger = logging.getLogger("eegdash")
+from .utils import _init_mongo_client
 class EEGDash:
@@ -74,19 +82,26 @@ class EEGDash:
         if self.is_public:
             DB_CONNECTION_STRING = mne.utils.get_config("EEGDASH_DB_URI")
+            if not DB_CONNECTION_STRING:
+                try:
+                    _init_mongo_client()
+                    DB_CONNECTION_STRING = mne.utils.get_config("EEGDASH_DB_URI")
+                except Exception:
+                    DB_CONNECTION_STRING = None
         else:
             load_dotenv()
             DB_CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING")
         # Use singleton to get MongoDB client, database, and collection
+        if not DB_CONNECTION_STRING:
+            raise RuntimeError(
+                "No MongoDB connection string configured. Set MNE config 'EEGDASH_DB_URI' "
+                "or environment variable 'DB_CONNECTION_STRING'."
+            )
         self.__client, self.__db, self.__collection = MongoConnectionManager.get_client(
             DB_CONNECTION_STRING, is_staging
         )
-        self.filesystem = S3FileSystem(
-            anon=True, client_kwargs={"region_name": "us-east-2"}
-        )
     def find(
         self, query: dict[str, Any] = None, /, **kwargs
     ) -> list[Mapping[str, Any]]:
@@ -197,17 +212,22 @@ class EEGDash:
         return doc is not None
     def _validate_input(self, record: dict[str, Any]) -> dict[str, Any]:
-        """Internal method to validate the input record against the expected schema.
+        """Validate the input record against the expected schema.
         Parameters
         ----------
-        record: dict
+        record : dict
             A dictionary representing the EEG data record to be validated.
         Returns
         -------
-        dict:
-            Returns the record itself on success, or raises a ValueError if the record is invalid.
+        dict
+            The record itself on success.
+        Raises
+        ------
+        ValueError
+            If the record is missing required keys or has values of the wrong type.
         """
         input_types = {
@@ -237,20 +257,44 @@ class EEGDash:
         return record
     def _build_query_from_kwargs(self, **kwargs) -> dict[str, Any]:
-        """Internal helper to build a validated MongoDB query from keyword args.
+        """Build a validated MongoDB query from keyword arguments.
+        This delegates to the module-level builder used across the package.
+        Parameters
+        ----------
+        **kwargs
+            Keyword arguments to convert into a MongoDB query.
+        Returns
+        -------
+        dict
+            A MongoDB query dictionary.
-        This delegates to the module-level builder used across the package and
-        is exposed here for testing and convenience.
         """
         return build_query_from_kwargs(**kwargs)
-    # --- Query merging and conflict detection helpers ---
-    def _extract_simple_constraint(self, query: dict[str, Any], key: str):
+    def _extract_simple_constraint(
+        self, query: dict[str, Any], key: str
+    ) -> tuple[str, Any] | None:
         """Extract a simple constraint for a given key from a query dict.
-        Supports only top-level equality (key: value) and $in (key: {"$in": [...]})
-        constraints. Returns a tuple (kind, value) where kind is "eq" or "in". If the
-        key is not present or uses other operators, returns None.
+        Supports top-level equality (e.g., ``{'subject': '01'}``) and ``$in``
+        (e.g., ``{'subject': {'$in': ['01', '02']}}``) constraints.
+        Parameters
+        ----------
+        query : dict
+            The MongoDB query dictionary.
+        key : str
+            The key for which to extract the constraint.
+        Returns
+        -------
+        tuple or None
+            A tuple of (kind, value) where kind is "eq" or "in", or None if the
+            constraint is not present or unsupported.
         """
         if not isinstance(query, dict) or key not in query:
             return None
@@ -260,16 +304,28 @@ class EEGDash:
                 return ("in", list(val["$in"]))
             return None  # unsupported operator shape for conflict checking
         else:
-            return ("eq", val)
+            return "eq", val
     def _raise_if_conflicting_constraints(
         self, raw_query: dict[str, Any], kwargs_query: dict[str, Any]
     ) -> None:
-        """Raise ValueError if both query sources define incompatible constraints.
+        """Raise ValueError if query sources have incompatible constraints.
+        Checks for mutually exclusive constraints on the same field to avoid
+        silent empty results.
+        Parameters
+        ----------
+        raw_query : dict
+            The raw MongoDB query dictionary.
+        kwargs_query : dict
+            The query dictionary built from keyword arguments.
+        Raises
+        ------
+        ValueError
+            If conflicting constraints are found.
-        We conservatively check only top-level fields with simple equality or $in
-        constraints. If a field appears in both queries and constraints are mutually
-        exclusive, raise an explicit error to avoid silent empty result sets.
         """
         if not raw_query or not kwargs_query:
             return
@@ -310,115 +366,6 @@ class EEGDash:
                         f"Conflicting constraints for '{key}': disjoint sets {r_val!r} and {k_val!r}"
                     )
-    def load_eeg_data_from_s3(self, s3path: str) -> xr.DataArray:
-        """Load EEG data from an S3 URI into an ``xarray.DataArray``.
-        Preserves the original filename, downloads sidecar files when applicable
-        (e.g., ``.fdt`` for EEGLAB, ``.vmrk``/``.eeg`` for BrainVision), and uses
-        MNE's direct readers.
-        Parameters
-        ----------
-        s3path : str
-            An S3 URI (should start with "s3://").
-        Returns
-        -------
-        xr.DataArray
-            EEG data with dimensions ``("channel", "time")``.
-        Raises
-        ------
-        ValueError
-            If the file extension is unsupported.
-        """
-        # choose a temp dir so sidecars can be colocated
-        with tempfile.TemporaryDirectory() as tmpdir:
-            # Derive local filenames from the S3 key to keep base name consistent
-            s3_key = urlsplit(s3path).path  # e.g., "/dsXXXX/sub-.../..._eeg.set"
-            basename = Path(s3_key).name
-            ext = Path(basename).suffix.lower()
-            local_main = Path(tmpdir) / basename
-            # Download main file
-            with (
-                self.filesystem.open(s3path, mode="rb") as fsrc,
-                open(local_main, "wb") as fdst,
-            ):
-                fdst.write(fsrc.read())
-            # Determine and fetch any required sidecars
-            sidecars: list[str] = []
-            if ext == ".set":  # EEGLAB
-                sidecars = [".fdt"]
-            elif ext == ".vhdr":  # BrainVision
-                sidecars = [".vmrk", ".eeg", ".dat", ".raw"]
-            for sc_ext in sidecars:
-                sc_key = s3_key[: -len(ext)] + sc_ext
-                sc_uri = f"s3://{urlsplit(s3path).netloc}{sc_key}"
-                try:
-                    # If sidecar exists, download next to the main file
-                    info = self.filesystem.info(sc_uri)
-                    if info:
-                        sc_local = Path(tmpdir) / Path(sc_key).name
-                        with (
-                            self.filesystem.open(sc_uri, mode="rb") as fsrc,
-                            open(sc_local, "wb") as fdst,
-                        ):
-                            fdst.write(fsrc.read())
-                except Exception:
-                    # Sidecar not present; skip silently
-                    pass
-            # Read using appropriate MNE reader
-            raw = mne.io.read_raw(str(local_main), preload=True, verbose=False)
-            data = raw.get_data()
-            fs = raw.info["sfreq"]
-            max_time = data.shape[1] / fs
-            time_steps = np.linspace(0, max_time, data.shape[1]).squeeze()
-            channel_names = raw.ch_names
-            return xr.DataArray(
-                data=data,
-                dims=["channel", "time"],
-                coords={"time": time_steps, "channel": channel_names},
-            )
-    def load_eeg_data_from_bids_file(self, bids_file: str) -> xr.DataArray:
-        """Load EEG data from a local BIDS-formatted file.
-        Parameters
-        ----------
-        bids_file : str
-            Path to a BIDS-compliant EEG file (e.g., ``*_eeg.edf``, ``*_eeg.bdf``,
-            ``*_eeg.vhdr``, ``*_eeg.set``).
-        Returns
-        -------
-        xr.DataArray
-            EEG data with dimensions ``("channel", "time")``.
-        """
-        bids_path = get_bids_path_from_fname(bids_file, verbose=False)
-        raw_object = read_raw_bids(bids_path=bids_path, verbose=False)
-        eeg_data = raw_object.get_data()
-        fs = raw_object.info["sfreq"]
-        max_time = eeg_data.shape[1] / fs
-        time_steps = np.linspace(0, max_time, eeg_data.shape[1]).squeeze()  # in seconds
-        channel_names = raw_object.ch_names
-        eeg_xarray = xr.DataArray(
-            data=eeg_data,
-            dims=["channel", "time"],
-            coords={"time": time_steps, "channel": channel_names},
-        )
-        return eeg_xarray
     def add_bids_dataset(
         self, dataset: str, data_dir: str, overwrite: bool = True
     ) -> None:
@@ -482,84 +429,59 @@ class EEGDash:
             logger.info("Upserted: %s", result.upserted_count)
             logger.info("Errors: %s ", result.bulk_api_result.get("writeErrors", []))
-    def get(self, query: dict[str, Any]) -> list[xr.DataArray]:
-        """Download and return EEG data arrays for records matching a query.
+    def _add_request(self, record: dict) -> InsertOne:
+        """Create a MongoDB insertion request for a record.
         Parameters
         ----------
-        query : dict
-            MongoDB query used to select records.
+        record : dict
+            The record to insert.
         Returns
         -------
-        list of xr.DataArray
-            EEG data for each matching record, with dimensions ``("channel", "time")``.
-        Notes
-        -----
-        Retrieval runs in parallel. Downloaded files are read and discarded
-        (no on-disk caching here).
+        InsertOne
+            A PyMongo ``InsertOne`` object.
         """
-        sessions = self.find(query)
-        results = []
-        if sessions:
-            logger.info("Found %s records", len(sessions))
-            results = Parallel(
-                n_jobs=-1 if len(sessions) > 1 else 1, prefer="threads", verbose=1
-            )(
-                delayed(self.load_eeg_data_from_s3)(self._get_s3path(session))
-                for session in sessions
-            )
-        return results
+        return InsertOne(record)
-    def _get_s3path(self, record: Mapping[str, Any] | str) -> str:
-        """Build an S3 URI from a DB record or a relative path.
+    def add(self, record: dict) -> None:
+        """Add a single record to the MongoDB collection.
         Parameters
         ----------
-        record : dict or str
-            Either a DB record containing a ``'bidspath'`` key, or a relative
-            path string under the OpenNeuro bucket.
-        Returns
-        -------
-        str
-            Fully qualified S3 URI.
-        Raises
-        ------
-        ValueError
-            If a mapping is provided but ``'bidspath'`` is missing.
+        record : dict
+            The record to add.
         """
-        if isinstance(record, str):
-            rel = record
-        else:
-            rel = record.get("bidspath")
-            if not rel:
-                raise ValueError("Record missing 'bidspath' for S3 path resolution")
-        return f"s3://openneuro.org/{rel}"
-    def _add_request(self, record: dict):
-        """Internal helper method to create a MongoDB insertion request for a record."""
-        return InsertOne(record)
-    def add(self, record: dict):
-        """Add a single record to the MongoDB collection."""
         try:
             self.__collection.insert_one(record)
         except ValueError as e:
             logger.error("Validation error for record: %s ", record["data_name"])
             logger.error(e)
-        except:
-            logger.error("Error adding record: %s ", record["data_name"])
+        except Exception as exc:
+            logger.error(
+                "Error adding record: %s ", record.get("data_name", "<unknown>")
+            )
+            logger.debug("Add operation failed", exc_info=exc)
+    def _update_request(self, record: dict) -> UpdateOne:
+        """Create a MongoDB update request for a record.
-    def _update_request(self, record: dict):
-        """Internal helper method to create a MongoDB update request for a record."""
+        Parameters
+        ----------
+        record : dict
+            The record to update.
+        Returns
+        -------
+        UpdateOne
+            A PyMongo ``UpdateOne`` object.
+        """
         return UpdateOne({"data_name": record["data_name"]}, {"$set": record})
-    def update(self, record: dict):
+    def update(self, record: dict) -> None:
         """Update a single record in the MongoDB collection.
         Parameters
@@ -572,62 +494,88 @@ class EEGDash:
             self.__collection.update_one(
                 {"data_name": record["data_name"]}, {"$set": record}
             )
-        except:  # silent failure
-            logger.error("Error updating record: %s", record["data_name"])
+        except Exception as exc:  # log and continue
+            logger.error(
+                "Error updating record: %s", record.get("data_name", "<unknown>")
+            )
+            logger.debug("Update operation failed", exc_info=exc)
     def exists(self, query: dict[str, Any]) -> bool:
-        """Alias for :meth:`exist` provided for API clarity."""
+        """Check if at least one record matches the query.
+        This is an alias for :meth:`exist`.
+        Parameters
+        ----------
+        query : dict
+            MongoDB query to check for existence.
+        Returns
+        -------
+        bool
+            True if a matching record exists, False otherwise.
+        """
         return self.exist(query)
-    def remove_field(self, record, field):
-        """Remove a specific field from a record in the MongoDB collection.
+    def remove_field(self, record: dict, field: str) -> None:
+        """Remove a field from a specific record in the MongoDB collection.
         Parameters
         ----------
         record : dict
-            Record identifying object with ``data_name``.
+            Record-identifying object with a ``data_name`` key.
         field : str
-            Field name to remove.
+            The name of the field to remove.
         """
         self.__collection.update_one(
             {"data_name": record["data_name"]}, {"$unset": {field: 1}}
         )
-    def remove_field_from_db(self, field):
-        """Remove a field from all records (destructive).
+    def remove_field_from_db(self, field: str) -> None:
+        """Remove a field from all records in the database.
+        .. warning::
+            This is a destructive operation and cannot be undone.
         Parameters
         ----------
         field : str
-            Field name to remove from every document.
+            The name of the field to remove from all documents.
         """
         self.__collection.update_many({}, {"$unset": {field: 1}})
     @property
     def collection(self):
-        """Return the MongoDB collection object."""
-        return self.__collection
+        """The underlying PyMongo ``Collection`` object.
-    def close(self):
-        """Backward-compatibility no-op; connections are managed globally.
+        Returns
+        -------
+        pymongo.collection.Collection
+            The collection object used for database interactions.
-        Notes
-        -----
-        Connections are managed by :class:`MongoConnectionManager`. Use
-        :meth:`close_all_connections` to explicitly close all clients.
+        """
+        return self.__collection
+    def close(self) -> None:
+        """Close the MongoDB connection.
+        .. deprecated:: 0.1
+            Connections are now managed globally by :class:`MongoConnectionManager`.
+            This method is a no-op and will be removed in a future version.
+            Use :meth:`EEGDash.close_all_connections` to close all clients.
         """
         # Individual instances no longer close the shared client
         pass
     @classmethod
-    def close_all_connections(cls):
-        """Close all MongoDB client connections managed by the singleton."""
+    def close_all_connections(cls) -> None:
+        """Close all MongoDB client connections managed by the singleton manager."""
         MongoConnectionManager.close_all()
-    def __del__(self):
+    def __del__(self) -> None:
         """Destructor; no explicit action needed due to global connection manager."""
         # No longer needed since we're using singleton pattern
         pass
@@ -640,22 +588,59 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
     Examples
     --------
-    # Find by single subject
-    >>> ds = EEGDashDataset(dataset="ds005505", subject="NDARCA153NKE")
-    # Find by a list of subjects and a specific task
-    >>> subjects = ["NDARCA153NKE", "NDARXT792GY8"]
-    >>> ds = EEGDashDataset(dataset="ds005505", subject=subjects, task="RestingState")
-    # Use a raw MongoDB query for advanced filtering
-    >>> raw_query = {"dataset": "ds005505", "subject": {"$in": subjects}}
-    >>> ds = EEGDashDataset(query=raw_query)
+    Basic usage with dataset and subject filtering:
+    >>> from eegdash import EEGDashDataset
+    >>> dataset = EEGDashDataset(
+    ...     cache_dir="./data",
+    ...     dataset="ds002718",
+    ...     subject="012"
+    ... )
+    >>> print(f"Number of recordings: {len(dataset)}")
+    Filter by multiple subjects and specific task:
+    >>> subjects = ["012", "013", "014"]
+    >>> dataset = EEGDashDataset(
+    ...     cache_dir="./data",
+    ...     dataset="ds002718",
+    ...     subject=subjects,
+    ...     task="RestingState"
+    ... )
+    Load and inspect EEG data from recordings:
+    >>> if len(dataset) > 0:
+    ...     recording = dataset[0]
+    ...     raw = recording.load()
+    ...     print(f"Sampling rate: {raw.info['sfreq']} Hz")
+    ...     print(f"Number of channels: {len(raw.ch_names)}")
+    ...     print(f"Duration: {raw.times[-1]:.1f} seconds")
+    Advanced filtering with raw MongoDB queries:
+    >>> from eegdash import EEGDashDataset
+    >>> query = {
+    ...     "dataset": "ds002718",
+    ...     "subject": {"$in": ["012", "013"]},
+    ...     "task": "RestingState"
+    ... }
+    >>> dataset = EEGDashDataset(cache_dir="./data", query=query)
+    Working with dataset collections and braindecode integration:
+    >>> # EEGDashDataset is a braindecode BaseConcatDataset
+    >>> for i, recording in enumerate(dataset):
+    ...     if i >= 2:  # limit output
+    ...         break
+    ...     print(f"Recording {i}: {recording.description}")
+    ...     raw = recording.load()
+    ...     print(f"  Channels: {len(raw.ch_names)}, Duration: {raw.times[-1]:.1f}s")
     Parameters
     ----------
     cache_dir : str | Path
-        Directory where data are cached locally. If not specified, a default
-        cache directory under the user cache is used.
+        Directory where data are cached locally.
     query : dict | None
         Raw MongoDB query to filter records. If provided, it is merged with
         keyword filtering arguments (see ``**kwargs``) using logical AND.
@@ -726,13 +711,21 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
         self.records = records
         self.download = download
         self.n_jobs = n_jobs
-        self.eeg_dash_instance = eeg_dash_instance or EEGDash()
+        self.eeg_dash_instance = eeg_dash_instance
-        # Resolve a unified cache directory across code/tests/CI
-        self.cache_dir = Path(cache_dir or get_default_cache_dir())
+        self.cache_dir = cache_dir
+        if self.cache_dir == "" or self.cache_dir is None:
+            self.cache_dir = get_default_cache_dir()
+            logger.warning(
+                f"Cache directory is empty, using the eegdash default path: {self.cache_dir}"
+            )
+        self.cache_dir = Path(self.cache_dir)
         if not self.cache_dir.exists():
-            warn(f"Cache directory does not exist, creating it: {self.cache_dir}")
+            logger.warning(
+                f"Cache directory does not exist, creating it: {self.cache_dir}"
+            )
             self.cache_dir.mkdir(exist_ok=True, parents=True)
         # Separate query kwargs from other kwargs passed to the BaseDataset constructor
@@ -772,21 +765,29 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
             not _suppress_comp_warning
             and self.query["dataset"] in RELEASE_TO_OPENNEURO_DATASET_MAP.values()
         ):
-            warn(
-                "If you are not participating in the competition, you can ignore this warning!"
-                "\n\n"
-                "EEG 2025 Competition Data Notice:\n"
-                "---------------------------------\n"
-                " You are loading the dataset that is used in the EEG 2025 Competition:\n"
-                "IMPORTANT: The data accessed via `EEGDashDataset` is NOT identical to what you get from `EEGChallengeDataset` object directly.\n"
-                "and it is not what you will use for the competition. Downsampling and filtering were applied to the data"
-                "to allow more people to participate.\n"
-                "\n"
-                "If you are participating in the competition, always use `EEGChallengeDataset` to ensure consistency with the challenge data.\n"
-                "\n",
-                UserWarning,
-                module="eegdash",
+            message_text = Text.from_markup(
+                "[italic]This notice is only for users who are participating in the [link=https://eeg2025.github.io/]EEG 2025 Competition[/link].[/italic]\n\n"
+                "[bold]EEG 2025 Competition Data Notice![/bold]\n"
+                "You are loading one of the datasets that is used in competition, but via `EEGDashDataset`.\n\n"
+                "[bold red]IMPORTANT[/bold red]: \n"
+                "If you download data from `EEGDashDataset`, it is [u]NOT[/u] identical to the official \n"
+                "competition data, which is accessed via `EEGChallengeDataset`. "
+                "The competition data has been downsampled and filtered.\n\n"
+                "[bold]If you are participating in the competition, \nyou must use the `EEGChallengeDataset` object to ensure consistency.[/bold] \n\n"
+                "If you are not participating in the competition, you can ignore this message."
             )
+            warning_panel = Panel(
+                message_text,
+                title="[yellow]EEG 2025 Competition Data Notice[/yellow]",
+                subtitle="[cyan]Source: EEGDashDataset[/cyan]",
+                border_style="yellow",
+            )
+            try:
+                Console().print(warning_panel)
+            except Exception:
+                logger.warning(str(message_text))
         if records is not None:
             self.records = records
             datasets = [
@@ -848,16 +849,15 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
                     )
                 )
         elif self.query:
-            # This is the DB query path that we are improving
+            if self.eeg_dash_instance is None:
+                self.eeg_dash_instance = EEGDash()
             datasets = self._find_datasets(
                 query=build_query_from_kwargs(**self.query),
                 description_fields=description_fields,
                 base_dataset_kwargs=base_dataset_kwargs,
             )
             # We only need filesystem if we need to access S3
-            self.filesystem = S3FileSystem(
-                anon=True, client_kwargs={"region_name": "us-east-2"}
-            )
+            self.filesystem = downloader.get_s3_filesystem()
         else:
             raise ValueError(
                 "You must provide either 'records', a 'data_dir', or a query/keyword arguments for filtering."
@@ -870,45 +870,30 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
     ) -> list[dict]:
         """Discover local BIDS EEG files and build minimal records.
-        This helper enumerates EEG recordings under ``dataset_root`` via
-        ``mne_bids.find_matching_paths`` and applies entity filters to produce a
-        list of records suitable for ``EEGDashBaseDataset``. No network access
-        is performed and files are not read.
+        Enumerates EEG recordings under ``dataset_root`` using
+        ``mne_bids.find_matching_paths`` and applies entity filters to produce
+        records suitable for :class:`EEGDashBaseDataset`. No network access is
+        performed, and files are not read.
         Parameters
         ----------
         dataset_root : Path
-            Local dataset directory. May be the plain dataset folder (e.g.,
-            ``ds005509``) or a suffixed cache variant (e.g.,
-            ``ds005509-bdf-mini``).
-        filters : dict of {str, Any}
-            Query filters. Must include ``'dataset'`` with the dataset id (without
-            local suffixes). May include BIDS entities ``'subject'``,
-            ``'session'``, ``'task'``, and ``'run'``. Each value can be a scalar
-            or a sequence of scalars.
+            Local dataset directory (e.g., ``/path/to/cache/ds005509``).
+        filters : dict
+            Query filters. Must include ``'dataset'`` and may include BIDS
+            entities like ``'subject'``, ``'session'``, etc.
         Returns
         -------
-        records : list of dict
-            One record per matched EEG file with at least:
-            - ``'data_name'``
-            - ``'dataset'`` (dataset id, without suffixes)
-            - ``'bidspath'`` (normalized to start with the dataset id)
-            - ``'subject'``, ``'session'``, ``'task'``, ``'run'`` (may be None)
-            - ``'bidsdependencies'`` (empty list)
-            - ``'modality'`` (``"eeg"``)
-            - ``'sampling_frequency'``, ``'nchans'``, ``'ntimes'`` (minimal
-              defaults for offline usage)
+        list of dict
+            A list of records, one for each matched EEG file. Each record
+            contains BIDS entities, paths, and minimal metadata for offline use.
         Notes
         -----
-        - Matching uses ``datatypes=['eeg']`` and ``suffixes=['eeg']``.
-        - ``bidspath`` is constructed as
-          ``<dataset_id> / <relative_path_from_dataset_root>`` to ensure the
-          first path component is the dataset id (without local cache suffixes).
-        - Minimal defaults are set for ``sampling_frequency``, ``nchans``, and
-          ``ntimes`` to satisfy dataset length requirements offline.
+        Matching is performed for ``datatypes=['eeg']`` and ``suffixes=['eeg']``.
+        The ``bidspath`` is normalized to ensure it starts with the dataset ID,
+        even for suffixed cache directories.
         """
         dataset_id = filters["dataset"]
@@ -970,10 +955,22 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
         return records_out
     def _find_key_in_nested_dict(self, data: Any, target_key: str) -> Any:
-        """Recursively search for target_key in nested dicts/lists with normalized matching.
+        """Recursively search for a key in nested dicts/lists.
+        Performs a case-insensitive and underscore/hyphen-agnostic search.
+        Parameters
+        ----------
+        data : Any
+            The nested data structure (dicts, lists) to search.
+        target_key : str
+            The key to search for.
+        Returns
+        -------
+        Any
+            The value of the first matching key, or None if not found.
-        This makes lookups tolerant to naming differences like "p-factor" vs "p_factor".
-        Returns the first match or None.
         """
         norm_target = normalize_key(target_key)
         if isinstance(data, dict):
@@ -996,23 +993,25 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
         description_fields: list[str],
         base_dataset_kwargs: dict,
     ) -> list[EEGDashBaseDataset]:
-        """Helper method to find datasets in the MongoDB collection that satisfy the
-        given query and return them as a list of EEGDashBaseDataset objects.
+        """Find and construct datasets from a MongoDB query.
+        Queries the database, then creates a list of
+        :class:`EEGDashBaseDataset` objects from the results.
         Parameters
         ----------
-        query : dict
-            The query object, as in EEGDash.find().
-        description_fields : list[str]
-            A list of fields to be extracted from the dataset records and included in
-            the returned dataset description(s).
-        kwargs: additional keyword arguments to be passed to the EEGDashBaseDataset
-            constructor.
+        query : dict, optional
+            The MongoDB query to execute.
+        description_fields : list of str
+            Fields to extract from each record for the dataset description.
+        base_dataset_kwargs : dict
+            Additional keyword arguments to pass to the
+            :class:`EEGDashBaseDataset` constructor.
         Returns
         -------
-        list :
-            A list of EEGDashBaseDataset objects that match the query.
+        list of EEGDashBaseDataset
+            A list of dataset objects matching the query.
         """
         datasets: list[EEGDashBaseDataset] = []
@@ -1043,3 +1042,6 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
                 )
             )
         return datasets
+__all__ = ["EEGDash", "EEGDashDataset"]

eegdash 0.3.9.dev182388821__py3-none-any.whl → 0.4.0__py3-none-any.whl

Potentially problematic release.

eegdash 0.3.9.dev182388821py3-none-any.whl → 0.4.0py3-none-any.whl