PyPI - eegdash - Versions diffs - 0.3.7.dev104__py3-none-any.whl → 0.3.7.dev105__py3-none-any.whl - Mend

eegdash 0.3.7.dev104py3-none-any.whl → 0.3.7.dev105py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eegdash might be problematic. Click here for more details.

Files changed (16) hide show

eegdash/__init__.py +4 -4
eegdash/api.py +429 -422
eegdash/bids_eeg_metadata.py +184 -0
eegdash/const.py +48 -0
eegdash/data_utils.py +68 -28
eegdash/dataset/__init__.py +4 -0
eegdash/{dataset.py → dataset/dataset.py} +53 -10
eegdash/{registry.py → dataset/registry.py} +3 -3
eegdash/utils.py +1 -1
{eegdash-0.3.7.dev104.dist-info → eegdash-0.3.7.dev105.dist-info}/METADATA +1 -1
{eegdash-0.3.7.dev104.dist-info → eegdash-0.3.7.dev105.dist-info}/RECORD +14 -14
eegdash/data_config.py +0 -34
eegdash/dataset_summary.csv +0 -256
{eegdash-0.3.7.dev104.dist-info → eegdash-0.3.7.dev105.dist-info}/WHEEL +0 -0
{eegdash-0.3.7.dev104.dist-info → eegdash-0.3.7.dev105.dist-info}/licenses/LICENSE +0 -0
{eegdash-0.3.7.dev104.dist-info → eegdash-0.3.7.dev105.dist-info}/top_level.txt +0 -0

eegdash/bids_eeg_metadata.py ADDED Viewed

@@ -0,0 +1,184 @@
+import logging
+from pathlib import Path
+from typing import Any
+from .const import ALLOWED_QUERY_FIELDS
+from .const import config as data_config
+from .data_utils import EEGBIDSDataset
+logger = logging.getLogger("eegdash")
+__all__ = [
+    "build_query_from_kwargs",
+    "load_eeg_attrs_from_bids_file",
+]
+def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
+    """Build and validate a MongoDB query from user-friendly keyword arguments.
+    Improvements:
+    - Reject None values and empty/whitespace-only strings
+    - For list/tuple/set values: strip strings, drop None/empties, deduplicate, and use `$in`
+    - Preserve scalars as exact matches
+    """
+    # 1. Validate that all provided keys are allowed for querying
+    unknown_fields = set(kwargs.keys()) - ALLOWED_QUERY_FIELDS
+    if unknown_fields:
+        raise ValueError(
+            f"Unsupported query field(s): {', '.join(sorted(unknown_fields))}. "
+            f"Allowed fields are: {', '.join(sorted(ALLOWED_QUERY_FIELDS))}"
+        )
+    # 2. Construct the query dictionary
+    query = {}
+    for key, value in kwargs.items():
+        # None is not a valid constraint
+        if value is None:
+            raise ValueError(
+                f"Received None for query parameter '{key}'. Provide a concrete value."
+            )
+        # Handle list-like values as multi-constraints
+        if isinstance(value, (list, tuple, set)):
+            cleaned: list[Any] = []
+            for item in value:
+                if item is None:
+                    continue
+                if isinstance(item, str):
+                    item = item.strip()
+                    if not item:
+                        continue
+                cleaned.append(item)
+            # Deduplicate while preserving order
+            cleaned = list(dict.fromkeys(cleaned))
+            if not cleaned:
+                raise ValueError(
+                    f"Received an empty list for query parameter '{key}'. This is not supported."
+                )
+            query[key] = {"$in": cleaned}
+        else:
+            # Scalars: trim strings and validate
+            if isinstance(value, str):
+                value = value.strip()
+                if not value:
+                    raise ValueError(
+                        f"Received an empty string for query parameter '{key}'."
+                    )
+            query[key] = value
+    return query
+def _get_raw_extensions(bids_file: str, bids_dataset: EEGBIDSDataset) -> list[str]:
+    """Helper to find paths to additional "sidecar" files that may be associated
+    with a given main data file in a BIDS dataset; paths are returned as relative to
+    the parent dataset path.
+    For example, if the input file is a .set file, this will return the relative path
+    to a corresponding .fdt file (if any).
+    """
+    bids_file = Path(bids_file)
+    extensions = {
+        ".set": [".set", ".fdt"],  # eeglab
+        ".edf": [".edf"],  # european
+        ".vhdr": [".eeg", ".vhdr", ".vmrk", ".dat", ".raw"],  # brainvision
+        ".bdf": [".bdf"],  # biosemi
+    }
+    return [
+        str(bids_dataset._get_relative_bidspath(bids_file.with_suffix(suffix)))
+        for suffix in extensions[bids_file.suffix]
+        if bids_file.with_suffix(suffix).exists()
+    ]
+def load_eeg_attrs_from_bids_file(
+    bids_dataset: EEGBIDSDataset, bids_file: str
+) -> dict[str, Any]:
+    """Build the metadata record for a given BIDS file (single recording) in a BIDS dataset.
+    Attributes are at least the ones defined in data_config attributes (set to None if missing),
+    but are typically a superset, and include, among others, the paths to relevant
+    meta-data files needed to load and interpret the file in question.
+    Parameters
+    ----------
+    bids_dataset : EEGBIDSDataset
+        The BIDS dataset object containing the file.
+    bids_file : str
+        The path to the BIDS file within the dataset.
+    Returns
+    -------
+    dict:
+        A dictionary representing the metadata record for the given file. This is the
+        same format as the records stored in the database.
+    """
+    if bids_file not in bids_dataset.files:
+        raise ValueError(f"{bids_file} not in {bids_dataset.dataset}")
+    # Initialize attrs with None values for all expected fields
+    attrs = {field: None for field in data_config["attributes"].keys()}
+    file = Path(bids_file).name
+    dsnumber = bids_dataset.dataset
+    # extract openneuro path by finding the first occurrence of the dataset name in the filename and remove the path before that
+    openneuro_path = dsnumber + bids_file.split(dsnumber)[1]
+    # Update with actual values where available
+    try:
+        participants_tsv = bids_dataset.subject_participant_tsv(bids_file)
+    except Exception as e:
+        logger.error("Error getting participants_tsv: %s", str(e))
+        participants_tsv = None
+    try:
+        eeg_json = bids_dataset.eeg_json(bids_file)
+    except Exception as e:
+        logger.error("Error getting eeg_json: %s", str(e))
+        eeg_json = None
+    bids_dependencies_files = data_config["bids_dependencies_files"]
+    bidsdependencies = []
+    for extension in bids_dependencies_files:
+        try:
+            dep_path = bids_dataset.get_bids_metadata_files(bids_file, extension)
+            dep_path = [
+                str(bids_dataset.get_relative_bidspath(dep)) for dep in dep_path
+            ]
+            bidsdependencies.extend(dep_path)
+        except Exception:
+            pass
+    bidsdependencies.extend(_get_raw_extensions(bids_file, bids_dataset))
+    # Define field extraction functions with error handling
+    field_extractors = {
+        "data_name": lambda: f"{bids_dataset.dataset}_{file}",
+        "dataset": lambda: bids_dataset.dataset,
+        "bidspath": lambda: openneuro_path,
+        "subject": lambda: bids_dataset.get_bids_file_attribute("subject", bids_file),
+        "task": lambda: bids_dataset.get_bids_file_attribute("task", bids_file),
+        "session": lambda: bids_dataset.get_bids_file_attribute("session", bids_file),
+        "run": lambda: bids_dataset.get_bids_file_attribute("run", bids_file),
+        "modality": lambda: bids_dataset.get_bids_file_attribute("modality", bids_file),
+        "sampling_frequency": lambda: bids_dataset.get_bids_file_attribute(
+            "sfreq", bids_file
+        ),
+        "nchans": lambda: bids_dataset.get_bids_file_attribute("nchans", bids_file),
+        "ntimes": lambda: bids_dataset.get_bids_file_attribute("ntimes", bids_file),
+        "participant_tsv": lambda: participants_tsv,
+        "eeg_json": lambda: eeg_json,
+        "bidsdependencies": lambda: bidsdependencies,
+    }
+    # Dynamically populate attrs with error handling
+    for field, extractor in field_extractors.items():
+        try:
+            attrs[field] = extractor()
+        except Exception as e:
+            logger.error("Error extracting %s : %s", field, str(e))
+            attrs[field] = None
+    return attrs

eegdash/const.py CHANGED Viewed

@@ -1,3 +1,16 @@
+ALLOWED_QUERY_FIELDS = {
+    "data_name",
+    "dataset",
+    "subject",
+    "task",
+    "session",
+    "run",
+    "modality",
+    "sampling_frequency",
+    "nchans",
+    "ntimes",
+}
 RELEASE_TO_OPENNEURO_DATASET_MAP = {
     "R11": "ds005516",
     "R10": "ds005515",
@@ -256,3 +269,38 @@ SUBJECT_MINI_RELEASE_MAP = {
         "NDARFW972KFQ",
     ],
 }
+config = {
+    "required_fields": ["data_name"],
+    # Default set of user-facing primary record attributes expected in the database. Records
+    # where any of these are missing will be loaded with the respective attribute set to None.
+    # Additional fields may be returned if they are present in the database, notably bidsdependencies.
+    "attributes": {
+        "data_name": "str",
+        "dataset": "str",
+        "bidspath": "str",
+        "subject": "str",
+        "task": "str",
+        "session": "str",
+        "run": "str",
+        "sampling_frequency": "float",
+        "modality": "str",
+        "nchans": "int",
+        "ntimes": "int",  # note: this is really the number of seconds in the data, rounded down
+    },
+    # queryable descriptive fields for a given recording
+    "description_fields": ["subject", "session", "run", "task", "age", "gender", "sex"],
+    # list of filenames that may be present in the BIDS dataset directory that are used
+    # to load and interpret a given BIDS recording.
+    "bids_dependencies_files": [
+        "dataset_description.json",
+        "participants.tsv",
+        "events.tsv",
+        "events.json",
+        "eeg.json",
+        "electrodes.tsv",
+        "channels.tsv",
+        "coordsystem.json",
+    ],
+    "accepted_query_fields": ["data_name", "dataset"],
+}

eegdash/data_utils.py CHANGED Viewed

@@ -57,7 +57,7 @@ class EEGDashBaseDataset(BaseDataset):
         super().__init__(None, **kwargs)
         self.record = record
         self.cache_dir = Path(cache_dir)
-        self.bids_kwargs = self.get_raw_bids_args()
+        self.bids_kwargs = self._get_raw_bids_args()
         if s3_bucket:
             self.s3_bucket = s3_bucket
@@ -66,16 +66,46 @@ class EEGDashBaseDataset(BaseDataset):
             self.s3_bucket = self._AWS_BUCKET
             self.s3_open_neuro = True
-        self.filecache = self.cache_dir / record["bidspath"]
-        self.bids_root = self.cache_dir / record["dataset"]
+        # Compute a dataset folder name under cache_dir that encodes preprocessing
+        # (e.g., bdf, mini) to avoid overlapping with the original dataset cache.
+        self.dataset_folder = record.get("dataset", "")
+        if s3_bucket:
+            suffixes: list[str] = []
+            bucket_lower = str(s3_bucket).lower()
+            if "bdf" in bucket_lower:
+                suffixes.append("bdf")
+            if "mini" in bucket_lower:
+                suffixes.append("mini")
+            if suffixes:
+                self.dataset_folder = f"{self.dataset_folder}-{'-'.join(suffixes)}"
+        # Place files under the dataset-specific folder (with suffix if any)
+        rel = Path(record["bidspath"])  # usually starts with dataset id
+        if rel.parts and rel.parts[0] == record.get("dataset"):
+            rel = Path(self.dataset_folder, *rel.parts[1:])
+        else:
+            rel = Path(self.dataset_folder) / rel
+        self.filecache = self.cache_dir / rel
+        self.bids_root = self.cache_dir / self.dataset_folder
         self.bidspath = BIDSPath(
             root=self.bids_root,
             datatype="eeg",
             suffix="eeg",
+            # extension='.bdf',
             **self.bids_kwargs,
         )
+        # TO-DO: remove this once find a better solution using mne-bids or update competition dataset
+        try:
+            _ = str(self.bidspath)
+        except RuntimeError:
+            try:
+                self.bidspath = self.bidspath.update(extension=".bdf")
+                self.filecache = self.filecache.with_suffix(".bdf")
+            except Exception as e:
+                logger.error(f"Error while updating BIDS path: {e}")
+                raise e
-        self.s3file = self.get_s3path(record["bidspath"])
+        self.s3file = self._get_s3path(record["bidspath"])
         self.bids_dependencies = record["bidsdependencies"]
         # Temporary fix for BIDS dependencies path
         # just to release to the competition
@@ -87,7 +117,7 @@ class EEGDashBaseDataset(BaseDataset):
         self._raw = None
-    def get_s3path(self, filepath: str) -> str:
+    def _get_s3path(self, filepath: str) -> str:
         """Helper to form an AWS S3 URI for the given relative filepath."""
         return f"{self.s3_bucket}/{filepath}"
@@ -141,11 +171,16 @@ class EEGDashBaseDataset(BaseDataset):
                 if dep.endswith(".set"):
                     dep = dep[:-4] + ".bdf"
-            s3path = self.get_s3path(dep)
+            s3path = self._get_s3path(dep)
             if not self.s3_open_neuro:
                 dep = self.bids_dependencies_original[i]
-            filepath = self.cache_dir / dep
+            dep_path = Path(dep)
+            if dep_path.parts and dep_path.parts[0] == self.record.get("dataset"):
+                dep_local = Path(self.dataset_folder, *dep_path.parts[1:])
+            else:
+                dep_local = Path(self.dataset_folder) / dep_path
+            filepath = self.cache_dir / dep_local
             if not self.s3_open_neuro:
                 if self.filecache.suffix == ".set":
                     self.filecache = self.filecache.with_suffix(".bdf")
@@ -174,14 +209,14 @@ class EEGDashBaseDataset(BaseDataset):
                 )
                 filesystem.get(s3path, filepath, callback=callback)
-    def get_raw_bids_args(self) -> dict[str, Any]:
+    def _get_raw_bids_args(self) -> dict[str, Any]:
         """Helper to restrict the metadata record to the fields needed to locate a BIDS
         recording.
         """
         desired_fields = ["subject", "session", "task", "run"]
         return {k: self.record[k] for k in desired_fields if self.record[k]}
-    def check_and_get_raw(self) -> None:
+    def _ensure_raw(self) -> None:
         """Download the S3 file and BIDS dependencies if not already cached."""
         if not os.path.exists(self.filecache):  # not preload
             if self.bids_dependencies:
@@ -195,7 +230,6 @@ class EEGDashBaseDataset(BaseDataset):
                     # TO-DO: remove this once is fixed on the our side
                     if not self.s3_open_neuro:
                         self.bidspath = self.bidspath.update(extension=".bdf")
                     self._raw = mne_bids.read_raw_bids(
                         bids_path=self.bidspath, verbose="ERROR"
                     )
@@ -242,7 +276,7 @@ class EEGDashBaseDataset(BaseDataset):
         retrieval if not yet done so.
         """
         if self._raw is None:
-            self.check_and_get_raw()
+            self._ensure_raw()
         return self._raw
     @raw.setter
@@ -300,7 +334,7 @@ class EEGDashBaseRaw(BaseRaw):
                 chtype = "eog"
             ch_types.append(chtype)
         info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
-        self.s3file = self.get_s3path(input_fname)
+        self.s3file = self._get_s3path(input_fname)
         self.cache_dir = Path(cache_dir)
         self.filecache = self.cache_dir / input_fname
         self.bids_dependencies = bids_dependencies
@@ -317,7 +351,7 @@ class EEGDashBaseRaw(BaseRaw):
             verbose=verbose,
         )
-    def get_s3path(self, filepath):
+    def _get_s3path(self, filepath):
         return f"{self._AWS_BUCKET}/{filepath}"
     def _download_s3(self) -> None:
@@ -333,7 +367,7 @@ class EEGDashBaseRaw(BaseRaw):
             anon=True, client_kwargs={"region_name": "us-east-2"}
         )
         for dep in self.bids_dependencies:
-            s3path = self.get_s3path(dep)
+            s3path = self._get_s3path(dep)
             filepath = self.cache_dir / dep
             if not filepath.exists():
                 filepath.parent.mkdir(parents=True, exist_ok=True)
@@ -394,11 +428,17 @@ class EEGBIDSDataset:
             raise ValueError("data_dir must be specified and must exist")
         self.bidsdir = Path(data_dir)
         self.dataset = dataset
-        assert str(self.bidsdir).endswith(self.dataset)
+        # Accept exact dataset folder or a variant with informative suffixes
+        # (e.g., dsXXXXX-bdf, dsXXXXX-bdf-mini) to avoid collisions.
+        dir_name = self.bidsdir.name
+        if not (dir_name == self.dataset or dir_name.startswith(self.dataset + "-")):
+            raise AssertionError(
+                f"BIDS directory '{dir_name}' does not correspond to dataset '{self.dataset}'"
+            )
         self.layout = BIDSLayout(data_dir)
         # get all recording files in the bids directory
-        self.files = self.get_recordings(self.layout)
+        self.files = self._get_recordings(self.layout)
         assert len(self.files) > 0, ValueError(
             "Unable to construct EEG dataset. No EEG recordings found."
         )
@@ -408,7 +448,7 @@ class EEGBIDSDataset:
         """Check if the dataset is EEG."""
         return self.get_bids_file_attribute("modality", self.files[0]).lower() == "eeg"
-    def get_recordings(self, layout: BIDSLayout) -> list[str]:
+    def _get_recordings(self, layout: BIDSLayout) -> list[str]:
         """Get a list of all EEG recording files in the BIDS layout."""
         files = []
         for ext, exts in self.RAW_EXTENSIONS.items():
@@ -417,12 +457,12 @@ class EEGBIDSDataset:
                 break
         return files
-    def get_relative_bidspath(self, filename: str) -> str:
+    def _get_relative_bidspath(self, filename: str) -> str:
         """Make the given file path relative to the BIDS directory."""
         bids_parent_dir = self.bidsdir.parent.absolute()
         return str(Path(filename).relative_to(bids_parent_dir))
-    def get_property_from_filename(self, property: str, filename: str) -> str:
+    def _get_property_from_filename(self, property: str, filename: str) -> str:
         """Parse a property out of a BIDS-compliant filename. Returns an empty string
         if not found.
         """
@@ -434,7 +474,7 @@ class EEGBIDSDataset:
             lookup = re.search(rf"{property}-(.*?)[_\/]", filename)
         return lookup.group(1) if lookup else ""
-    def merge_json_inheritance(self, json_files: list[str | Path]) -> dict:
+    def _merge_json_inheritance(self, json_files: list[str | Path]) -> dict:
         """Internal helper to merge list of json files found by get_bids_file_inheritance,
         expecting the order (from left to right) is from lowest
         level to highest level, and return a merged dictionary
@@ -445,7 +485,7 @@ class EEGBIDSDataset:
             json_dict.update(json.load(open(f)))  # FIXME: should close file
         return json_dict
-    def get_bids_file_inheritance(
+    def _get_bids_file_inheritance(
         self, path: str | Path, basename: str, extension: str
     ) -> list[Path]:
         """Get all file paths that apply to the basename file in the specified directory
@@ -492,7 +532,7 @@ class EEGBIDSDataset:
         else:
             # call get_bids_file_inheritance recursively with parent directory
             bids_files.extend(
-                self.get_bids_file_inheritance(path.parent, basename, extension)
+                self._get_bids_file_inheritance(path.parent, basename, extension)
             )
             return bids_files
@@ -523,12 +563,12 @@ class EEGBIDSDataset:
         path, filename = os.path.split(filepath)
         basename = filename[: filename.rfind("_")]
         # metadata files
-        meta_files = self.get_bids_file_inheritance(
+        meta_files = self._get_bids_file_inheritance(
             path, basename, metadata_file_extension
         )
         return meta_files
-    def scan_directory(self, directory: str, extension: str) -> list[Path]:
+    def _scan_directory(self, directory: str, extension: str) -> list[Path]:
         """Return a list of file paths that end with the given extension in the specified
         directory. Ignores certain special directories like .git, .datalad, derivatives,
         and code.
@@ -545,7 +585,7 @@ class EEGBIDSDataset:
                         result_files.append(entry.path)  # Add directory to scan later
         return result_files
-    def get_files_with_extension_parallel(
+    def _get_files_with_extension_parallel(
         self, directory: str, extension: str = ".set", max_workers: int = -1
     ) -> list[Path]:
         """Efficiently scan a directory and its subdirectories for files that end with
@@ -577,7 +617,7 @@ class EEGBIDSDataset:
             )
             # Run the scan_directory function in parallel across directories
             results = Parallel(n_jobs=max_workers, prefer="threads", verbose=1)(
-                delayed(self.scan_directory)(d, extension) for d in dirs_to_scan
+                delayed(self._scan_directory)(d, extension) for d in dirs_to_scan
             )
             # Reset the directories to scan and process the results
@@ -682,7 +722,7 @@ class EEGBIDSDataset:
     def num_times(self, data_filepath: str) -> int:
         """Get the approximate number of time points in the EEG recording based on the BIDS metadata."""
         eeg_jsons = self.get_bids_metadata_files(data_filepath, "eeg.json")
-        eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
+        eeg_json_dict = self._merge_json_inheritance(eeg_jsons)
         return int(
             eeg_json_dict["SamplingFrequency"] * eeg_json_dict["RecordingDuration"]
         )
@@ -705,7 +745,7 @@ class EEGBIDSDataset:
     def eeg_json(self, data_filepath: str) -> dict[str, Any]:
         """Get BIDS eeg.json metadata for the given data file path."""
         eeg_jsons = self.get_bids_metadata_files(data_filepath, "eeg.json")
-        eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
+        eeg_json_dict = self._merge_json_inheritance(eeg_jsons)
         return eeg_json_dict
     def channel_tsv(self, data_filepath: str) -> dict[str, Any]:

eegdash/dataset/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .dataset import EEGChallengeDataset
+from .registry import register_openneuro_datasets
+__all__ = ["EEGChallengeDataset", "register_openneuro_datasets"]

eegdash/{dataset.py → dataset/dataset.py} RENAMED Viewed

@@ -3,8 +3,9 @@ from pathlib import Path
 from mne.utils import warn
-from .api import EEGDashDataset
-from .const import RELEASE_TO_OPENNEURO_DATASET_MAP, SUBJECT_MINI_RELEASE_MAP
+from ..api import EEGDashDataset
+from ..bids_eeg_metadata import build_query_from_kwargs
+from ..const import RELEASE_TO_OPENNEURO_DATASET_MAP, SUBJECT_MINI_RELEASE_MAP
 from .registry import register_openneuro_datasets
 logger = logging.getLogger("eegdash")
@@ -68,15 +69,56 @@ class EEGChallengeDataset(EEGDashDataset):
             )
         if self.mini:
-            # Disallow mixing subject selection with mini=True since mini already
-            # applies a predefined subject subset.
-            if (query and "subject" in query) or ("subject" in kwargs):
-                raise ValueError(
-                    "Query using the parameters `subject` with the class EEGChallengeDataset and `mini==True` is not possible."
-                    "Please don't use the `subject` selection twice."
-                    "Set `mini=False` to use the `subject` selection."
+            # When using the mini release, restrict subjects to the predefined subset.
+            # If the user specifies subject(s), ensure they all belong to the mini subset;
+            # otherwise, default to the full mini subject list for this release.
+            allowed_subjects = set(SUBJECT_MINI_RELEASE_MAP[release])
+            # Normalize potential 'subjects' -> 'subject' for convenience
+            if "subjects" in kwargs and "subject" not in kwargs:
+                kwargs["subject"] = kwargs.pop("subjects")
+            # Collect user-requested subjects from kwargs/query. We canonicalize
+            # kwargs via build_query_from_kwargs to leverage existing validation,
+            # and support Mongo-style {"$in": [...]} shapes from a raw query.
+            requested_subjects: list[str] = []
+            # From kwargs
+            if "subject" in kwargs and kwargs["subject"] is not None:
+                # Use the shared query builder to normalize scalars/lists
+                built = build_query_from_kwargs(subject=kwargs["subject"])
+                s_val = built.get("subject")
+                if isinstance(s_val, dict) and "$in" in s_val:
+                    requested_subjects.extend(list(s_val["$in"]))
+                elif s_val is not None:
+                    requested_subjects.append(s_val)  # type: ignore[arg-type]
+            # From query (top-level only)
+            if query and isinstance(query, dict) and "subject" in query:
+                qval = query["subject"]
+                if isinstance(qval, dict) and "$in" in qval:
+                    requested_subjects.extend(list(qval["$in"]))
+                elif isinstance(qval, (list, tuple, set)):
+                    requested_subjects.extend(list(qval))
+                elif qval is not None:
+                    requested_subjects.append(qval)
+            # Validate if any subjects were explicitly requested
+            if requested_subjects:
+                invalid = sorted(
+                    {s for s in requested_subjects if s not in allowed_subjects}
                 )
-            kwargs["subject"] = SUBJECT_MINI_RELEASE_MAP[release]
+                if invalid:
+                    raise ValueError(
+                        "Some requested subject(s) are not part of the mini release for "
+                        f"{release}: {invalid}. Allowed subjects: {sorted(allowed_subjects)}"
+                    )
+                # Do not override user selection; keep their (validated) subjects as-is.
+            else:
+                # No subject specified by the user: default to the full mini subset
+                kwargs["subject"] = sorted(allowed_subjects)
             s3_bucket = f"{s3_bucket}/{release}_mini_L100_bdf"
         else:
             s3_bucket = f"{s3_bucket}/{release}_L100_bdf"
@@ -104,6 +146,7 @@ class EEGChallengeDataset(EEGDashDataset):
             query=query,
             cache_dir=cache_dir,
             s3_bucket=s3_bucket,
+            _suppress_comp_warning=True,
             **kwargs,
         )

eegdash/{registry.py → dataset/registry.py} RENAMED Viewed

@@ -16,7 +16,7 @@ def register_openneuro_datasets(
 ) -> Dict[str, type]:
     """Dynamically create dataset classes from a summary file."""
     if base_class is None:
-        from .api import EEGDashDataset as base_class  # lazy import
+        from ..api import EEGDashDataset as base_class  # lazy import
     summary_path = Path(summary_file)
     namespace = namespace if namespace is not None else globals()
@@ -59,7 +59,7 @@ def register_openneuro_datasets(
         doc = f"""OpenNeuro dataset ``{dataset_id}``.
-        {markdown_table(row_series)}
+        {_markdown_table(row_series)}
         Parameters
         ----------
@@ -101,7 +101,7 @@ def register_openneuro_datasets(
     return registered
-def markdown_table(row_series: pd.Series) -> str:
+def _markdown_table(row_series: pd.Series) -> str:
     """Create a reStructuredText grid table from a pandas Series."""
     if row_series.empty:
         return ""

eegdash/utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from mne.utils import get_config, set_config, use_log_level
-def __init__mongo_client():
+def _init_mongo_client():
     with use_log_level("ERROR"):
         if get_config("EEGDASH_DB_URI") is None:
             set_config(

{eegdash-0.3.7.dev104.dist-info → eegdash-0.3.7.dev105.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: eegdash
-Version: 0.3.7.dev104
+Version: 0.3.7.dev105
 Summary: EEG data for machine learning
 Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
 License-Expression: GPL-3.0-only

eegdash 0.3.7.dev104__py3-none-any.whl → 0.3.7.dev105__py3-none-any.whl

Potentially problematic release.

eegdash 0.3.7.dev104py3-none-any.whl → 0.3.7.dev105py3-none-any.whl