PyPI - eegdash - Versions diffs - 0.3.4.dev70__py3-none-any.whl → 0.3.5.dev80__py3-none-any.whl - Mend

eegdash 0.3.4.dev70py3-none-any.whl → 0.3.5.dev80py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eegdash might be problematic. Click here for more details.

Files changed (7) hide show

eegdash/__init__.py CHANGED Viewed

@@ -7,4 +7,4 @@ __init__mongo_client()
 __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset"]
-__version__ = "0.3.4.dev70"
+__version__ = "0.3.5.dev80"

eegdash/api.py CHANGED Viewed

@@ -9,6 +9,7 @@ import numpy as np
 import xarray as xr
 from dotenv import load_dotenv
 from joblib import Parallel, delayed
+from mne_bids import get_bids_path_from_fname, read_raw_bids
 from pymongo import InsertOne, UpdateOne
 from s3fs import S3FileSystem
@@ -34,6 +35,19 @@ class EEGDash:
     """
+    _ALLOWED_QUERY_FIELDS = {
+        "data_name",
+        "dataset",
+        "subject",
+        "task",
+        "session",
+        "run",
+        "modality",
+        "sampling_frequency",
+        "nchans",
+        "ntimes",
+    }
     def __init__(self, *, is_public: bool = True, is_staging: bool = False) -> None:
         """Create new instance of the EEGDash Database client.
@@ -71,34 +85,59 @@ class EEGDash:
             anon=True, client_kwargs={"region_name": "us-east-2"}
         )
-    def find(self, query: dict[str, Any], *args, **kwargs) -> list[Mapping[str, Any]]:
-        """Find records in the MongoDB collection that satisfy the given query.
+    def find(
+        self, query: dict[str, Any] = None, /, **kwargs
+    ) -> list[Mapping[str, Any]]:
+        """Find records in the MongoDB collection.
+        This method can be called in two ways:
+        1. With a pre-built MongoDB query dictionary (positional argument):
+           >>> eegdash.find({"dataset": "ds002718", "subject": {"$in": ["012", "013"]}})
+        2. With user-friendly keyword arguments for simple and multi-value queries:
+           >>> eegdash.find(dataset="ds002718", subject="012")
+           >>> eegdash.find(dataset="ds002718", subject=["012", "013"])
         Parameters
         ----------
-        query: dict
-            A dictionary that specifies the query to be executed; this is a reference
-            document that is used to match records in the MongoDB collection.
-        args:
-            Additional positional arguments for the MongoDB find() method; see
-            https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find
-        kwargs:
-            Additional keyword arguments for the MongoDB find() method.
+        query: dict, optional
+            A complete MongoDB query dictionary. This is a positional-only argument.
+        **kwargs:
+            Keyword arguments representing field-value pairs for the query.
+            Values can be single items (str, int) or lists of items for multi-search.
         Returns
         -------
         list:
             A list of DB records (string-keyed dictionaries) that match the query.
-        Example
-        -------
-        >>> eegdash = EEGDash()
-        >>> eegdash.find({"dataset": "ds002718", "subject": "012"})
+        Raises
+        ------
+        ValueError
+            If both a `query` dictionary and keyword arguments are provided.
         """
-        results = self.__collection.find(query, *args, **kwargs)
+        if query is not None and kwargs:
+            raise ValueError(
+                "Provide either a positional 'query' dictionary or keyword arguments, not both."
+            )
-        return [result for result in results]
+        final_query = {}
+        if query is not None:
+            final_query = query
+        elif kwargs:
+            final_query = self._build_query_from_kwargs(**kwargs)
+        else:
+            # By default, an empty query {} returns all documents.
+            # This can be dangerous, so we can either allow it or raise an error.
+            # Let's require an explicit query for safety.
+            raise ValueError(
+                "find() requires a query dictionary or at least one keyword argument. "
+                "To find all documents, use find({})."
+            )
+        results = self.__collection.find(final_query)
+        return list(results)
     def exist(self, query: dict[str, Any]) -> bool:
         """Return True if at least one record matches the query, else False.
@@ -184,6 +223,35 @@ class EEGDash:
         return record
+    def _build_query_from_kwargs(self, **kwargs) -> dict[str, Any]:
+        """Builds and validates a MongoDB query from user-friendly keyword arguments.
+        Translates list values into MongoDB's `$in` operator.
+        """
+        # 1. Validate that all provided keys are allowed for querying
+        unknown_fields = set(kwargs.keys()) - self._ALLOWED_QUERY_FIELDS
+        if unknown_fields:
+            raise ValueError(
+                f"Unsupported query field(s): {', '.join(sorted(unknown_fields))}. "
+                f"Allowed fields are: {', '.join(sorted(self._ALLOWED_QUERY_FIELDS))}"
+            )
+        # 2. Construct the query dictionary
+        query = {}
+        for key, value in kwargs.items():
+            if isinstance(value, (list, tuple)):
+                if not value:
+                    raise ValueError(
+                        f"Received an empty list for query parameter '{key}'. This is not supported."
+                    )
+                # If the value is a list, use the `$in` operator for multi-search
+                query[key] = {"$in": value}
+            else:
+                # Otherwise, it's a direct match
+                query[key] = value
+        return query
     def load_eeg_data_from_s3(self, s3path: str) -> xr.DataArray:
         """Load an EEGLAB .set file from an AWS S3 URI and return it as an xarray DataArray.
@@ -218,14 +286,15 @@ class EEGDash:
         Parameters
         ----------
         bids_file : str
-            Path to the file on the local filesystem.
+            Path to the BIDS-compliant file on the local filesystem.
         Notes
         -----
         Currently, only non-epoched .set files are supported.
         """
-        raw_object = mne.io.read_raw(bids_file)
+        bids_path = get_bids_path_from_fname(bids_file, verbose=False)
+        raw_object = read_raw_bids(bids_path=bids_path, verbose=False)
         eeg_data = raw_object.get_data()
         fs = raw_object.info["sfreq"]
@@ -521,8 +590,8 @@ class EEGDashDataset(BaseConcatDataset):
     def __init__(
         self,
         query: dict | None = None,
-        data_dir: str | list | None = None,
-        dataset: str | list | None = None,
+        cache_dir: str = "~/eegdash_cache",
+        dataset: str | None = None,
         description_fields: list[str] = [
             "subject",
             "session",
@@ -532,36 +601,55 @@ class EEGDashDataset(BaseConcatDataset):
             "gender",
             "sex",
         ],
-        cache_dir: str = "~/eegdash_cache",
         s3_bucket: str | None = None,
+        data_dir: str | None = None,
         eeg_dash_instance=None,
+        records: list[dict] | None = None,
         **kwargs,
     ):
         """Create a new EEGDashDataset from a given query or local BIDS dataset directory
         and dataset name. An EEGDashDataset is pooled collection of EEGDashBaseDataset
         instances (individual recordings) and is a subclass of braindecode's BaseConcatDataset.
+        Querying Examples:
+        ------------------
+        # Find by single subject
+        >>> ds = EEGDashDataset(dataset="ds005505", subject="NDARCA153NKE")
+        # Find by a list of subjects and a specific task
+        >>> subjects = ["NDARCA153NKE", "NDARXT792GY8"]
+        >>> ds = EEGDashDataset(dataset="ds005505", subject=subjects, task="RestingState")
+        # Use a raw MongoDB query for advanced filtering
+        >>> raw_query = {"dataset": "ds005505", "subject": {"$in": subjects}}
+        >>> ds = EEGDashDataset(query=raw_query)
         Parameters
         ----------
         query : dict | None
-            Optionally a dictionary that specifies the query to be executed; see
-            EEGDash.find() for details on the query format.
-        data_dir : str | list[str] | None
-            Optionally a string or a list of strings specifying one or more local
-            BIDS dataset directories from which to load the EEG data files. Exactly one
+            A raw MongoDB query dictionary. If provided, keyword arguments for filtering are ignored.
+        **kwargs : dict
+            Keyword arguments for filtering (e.g., `subject="X"`, `task=["T1", "T2"]`) and/or
+            arguments to be passed to the EEGDashBaseDataset constructor (e.g., `subject=...`).
+        cache_dir : str
+            A directory where the dataset will be cached locally.
+        data_dir : str | None
+            Optionally a string specifying a local BIDS dataset directory from which to load the EEG data files. Exactly one
             of query or data_dir must be provided.
-        dataset : str | list[str] | None
-            If data_dir is given, a name or list of names for for the dataset(s) to be loaded.
+        dataset : str | None
+            If data_dir is given, a name for the dataset to be loaded.
         description_fields : list[str]
             A list of fields to be extracted from the dataset records
             and included in the returned data description(s). Examples are typical
             subject metadata fields such as "subject", "session", "run", "task", etc.;
             see also data_config.description_fields for the default set of fields.
-        cache_dir : str
-            A directory where the dataset will be cached locally.
         s3_bucket : str | None
             An optional S3 bucket URI (e.g., "s3://mybucket") to use instead of the
             default OpenNeuro bucket for loading data files
+        records : list[dict] | None
+            Optional list of pre-fetched metadata records. If provided, the dataset is
+            constructed directly from these records without querying MongoDB.
         kwargs : dict
             Additional keyword arguments to be passed to the EEGDashBaseDataset
             constructor.
@@ -569,50 +657,79 @@ class EEGDashDataset(BaseConcatDataset):
         """
         self.cache_dir = cache_dir
         self.s3_bucket = s3_bucket
-        self.eeg_dash = eeg_dash_instance or EEGDash()
-        _owns_client = eeg_dash_instance is None
+        self.eeg_dash = eeg_dash_instance
+        _owns_client = False
+        if self.eeg_dash is None and records is None:
+            self.eeg_dash = EEGDash()
+            _owns_client = True
+        # Separate query kwargs from other kwargs passed to the BaseDataset constructor
+        query_kwargs = {
+            k: v for k, v in kwargs.items() if k in EEGDash._ALLOWED_QUERY_FIELDS
+        }
+        base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in query_kwargs}
+        if query and query_kwargs:
+            raise ValueError(
+                "Provide either a 'query' dictionary or keyword arguments for filtering, not both."
+            )
         try:
-            if query:
-                datasets = self.find_datasets(query, description_fields, **kwargs)
+            if records is not None:
+                self.records = records
+                datasets = [
+                    EEGDashBaseDataset(
+                        record,
+                        self.cache_dir,
+                        self.s3_bucket,
+                        **base_dataset_kwargs,
+                    )
+                    for record in self.records
+                ]
             elif data_dir:
-                if isinstance(data_dir, str):
+                # This path loads from a local directory and is not affected by DB query logic
+                if isinstance(data_dir, str) or isinstance(data_dir, Path):
                     datasets = self.load_bids_dataset(
-                        dataset, data_dir, description_fields, s3_bucket, **kwargs
+                        dataset=dataset,
+                        data_dir=data_dir,
+                        description_fields=description_fields,
+                        s3_bucket=s3_bucket,
+                        **base_dataset_kwargs,
                     )
                 else:
                     assert len(data_dir) == len(dataset), (
-                        "Number of datasets and their directories must match"
+                        "Number of datasets and directories must match"
                     )
                     datasets = []
                     for i, _ in enumerate(data_dir):
                         datasets.extend(
                             self.load_bids_dataset(
-                                dataset[i],
-                                data_dir[i],
-                                description_fields,
-                                s3_bucket,
-                                **kwargs,
+                                dataset=dataset[i],
+                                data_dir=data_dir[i],
+                                description_fields=description_fields,
+                                s3_bucket=s3_bucket,
+                                **base_dataset_kwargs,
                             )
                         )
+            elif query or query_kwargs:
+                # This is the DB query path that we are improving
+                datasets = self.find_datasets(
+                    query=query,
+                    description_fields=description_fields,
+                    query_kwargs=query_kwargs,
+                    base_dataset_kwargs=base_dataset_kwargs,
+                )
+                # We only need filesystem if we need to access S3
+                self.filesystem = S3FileSystem(
+                    anon=True, client_kwargs={"region_name": "us-east-2"}
+                )
             else:
                 raise ValueError(
-                    "Exactly one of 'query' or 'data_dir' must be provided."
+                    "You must provide either 'records', a 'data_dir', or a query/keyword arguments for filtering."
                 )
         finally:
-            # If we created the client, close it now that construction is done.
-            if _owns_client:
-                try:
-                    self.eeg_dash.close()
-                except Exception:
-                    # Don't let close errors break construction
-                    pass
-        self.filesystem = S3FileSystem(
-            anon=True, client_kwargs={"region_name": "us-east-2"}
-        )
-        self.eeg_dash.close()
+            if _owns_client and self.eeg_dash is not None:
+                self.eeg_dash.close()
         super().__init__(datasets)
@@ -630,7 +747,11 @@ class EEGDashDataset(BaseConcatDataset):
         return None
     def find_datasets(
-        self, query: dict[str, Any], description_fields: list[str], **kwargs
+        self,
+        query: dict[str, Any],
+        description_fields: list[str],
+        query_kwargs: dict,
+        base_dataset_kwargs: dict,
     ) -> list[EEGDashBaseDataset]:
         """Helper method to find datasets in the MongoDB collection that satisfy the
         given query and return them as a list of EEGDashBaseDataset objects.
@@ -652,7 +773,10 @@ class EEGDashDataset(BaseConcatDataset):
         """
         datasets: list[EEGDashBaseDataset] = []
-        for record in self.eeg_dash.find(query):
+        self.records = self.eeg_dash.find(query, **query_kwargs)
+        for record in self.records:
             description = {}
             for field in description_fields:
                 value = self.find_key_in_nested_dict(record, field)
@@ -664,15 +788,15 @@ class EEGDashDataset(BaseConcatDataset):
                     self.cache_dir,
                     self.s3_bucket,
                     description=description,
-                    **kwargs,
+                    **base_dataset_kwargs,
                 )
             )
         return datasets
     def load_bids_dataset(
         self,
-        dataset,
-        data_dir,
+        dataset: str,
+        data_dir: str | Path,
         description_fields: list[str],
         s3_bucket: str | None = None,
         **kwargs,

{eegdash-0.3.4.dev70.dist-info → eegdash-0.3.5.dev80.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: eegdash
-Version: 0.3.4.dev70
+Version: 0.3.5.dev80
 Summary: EEG data for machine learning
 Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
 License-Expression: GPL-3.0-only
@@ -38,6 +38,7 @@ Requires-Dist: tqdm
 Requires-Dist: xarray
 Requires-Dist: h5io>=0.2.4
 Requires-Dist: pymatreader
+Requires-Dist: eeglabio
 Requires-Dist: tabulate
 Provides-Extra: tests
 Requires-Dist: pytest; extra == "tests"

{eegdash-0.3.4.dev70.dist-info → eegdash-0.3.5.dev80.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-eegdash/__init__.py,sha256=z1uESq6VO66_4UpTGGFDW06PF_7WagRrULPFrTXrsYI,240
-eegdash/api.py,sha256=OqOZ27GYURSAZwTQHSs0QcW_6Mq1i_5XHP6KMcihb8A,27295
+eegdash/__init__.py,sha256=K-EaG_ZHr-O4aH8SHFg7PP_rbyqlvoa3JcBdlGsXlTU,240
+eegdash/api.py,sha256=KjmEVkfltLR5EwRnmnPp5rEDS5Oa6_dnprif9EVpeQs,32351
 eegdash/data_config.py,sha256=OS6ERO-jHrnEOfMJUehY7ieABdsRw_qWzOKJ4pzSfqw,1323
 eegdash/data_utils.py,sha256=_dycnPmGfTbYs7bc6edHxUn_m01dLYtp92_k44ffEoY,26475
 eegdash/dataset.py,sha256=ooLoxMFy2I8BY9gJl6ncTp_Gz-Rq0Z-o4NJyyomxLcU,2670
@@ -23,8 +23,8 @@ eegdash/features/feature_bank/dimensionality.py,sha256=j_Ds71Y1AbV2uLFQj8EuXQ4kz
 eegdash/features/feature_bank/signal.py,sha256=3Tb8z9gX7iZipxQJ9DSyy30JfdmW58kgvimSyZX74p8,3404
 eegdash/features/feature_bank/spectral.py,sha256=bNB7skusePs1gX7NOU6yRlw_Gr4UOCkO_ylkCgybzug,3319
 eegdash/features/feature_bank/utils.py,sha256=DGh-Q7-XFIittP7iBBxvsJaZrlVvuY5mw-G7q6C-PCI,1237
-eegdash-0.3.4.dev70.dist-info/licenses/LICENSE,sha256=asisR-xupy_NrQBFXnx6yqXeZcYWLvbAaiETl25iXT0,931
-eegdash-0.3.4.dev70.dist-info/METADATA,sha256=5jX-LB-ep0hcsCio2zFUKO3201B_0sa5gTbeha0I24k,10364
-eegdash-0.3.4.dev70.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-eegdash-0.3.4.dev70.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
-eegdash-0.3.4.dev70.dist-info/RECORD,,
+eegdash-0.3.5.dev80.dist-info/licenses/LICENSE,sha256=asisR-xupy_NrQBFXnx6yqXeZcYWLvbAaiETl25iXT0,931
+eegdash-0.3.5.dev80.dist-info/METADATA,sha256=R0-JDW1_w2p1JJjffDbuYSlHJKGv0g7nGmyl3_AtJfY,10388
+eegdash-0.3.5.dev80.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+eegdash-0.3.5.dev80.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
+eegdash-0.3.5.dev80.dist-info/RECORD,,

{eegdash-0.3.4.dev70.dist-info → eegdash-0.3.5.dev80.dist-info}/WHEEL RENAMED Viewed

File without changes

{eegdash-0.3.4.dev70.dist-info → eegdash-0.3.5.dev80.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{eegdash-0.3.4.dev70.dist-info → eegdash-0.3.5.dev80.dist-info}/top_level.txt RENAMED Viewed

File without changes

eegdash 0.3.4.dev70__py3-none-any.whl → 0.3.5.dev80__py3-none-any.whl

Potentially problematic release.

eegdash 0.3.4.dev70py3-none-any.whl → 0.3.5.dev80py3-none-any.whl