PyPI - eegdash - Versions diffs - 0.2.0__tar.gz → 0.2.1.dev178237806__tar.gz - Mend

eegdash 0.2.0tar.gz → 0.2.1.dev178237806tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eegdash might be problematic. Click here for more details.

Files changed (70) hide show

{eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: eegdash
-Version: 0.2.0
+Version: 0.2.1.dev178237806
 Summary: EEG data for machine learning
 Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
 License: GNU General Public License
@@ -43,7 +43,7 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
-Requires-Python: >3.10
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: braindecode>=1.0
@@ -63,6 +63,7 @@ Requires-Dist: pytest; extra == "tests"
 Requires-Dist: pytest-cov; extra == "tests"
 Requires-Dist: codecov; extra == "tests"
 Requires-Dist: pytest_cases; extra == "tests"
+Requires-Dist: pytest-benchmark; extra == "tests"
 Provides-Extra: dev
 Requires-Dist: pre-commit; extra == "dev"
 Provides-Extra: docs
@@ -164,7 +165,3 @@ EEG-DaSh is a collaborative initiative between the United States and Israel, sup
-python3 -m pip install --upgrade build
-python3 -m build
-python3 -m pip install --upgrade twine
-python3 -m twine upload --repository eegdash dist/*

{eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/README.md RENAMED Viewed

@@ -80,7 +80,3 @@ EEG-DaSh is a collaborative initiative between the United States and Israel, sup
-python3 -m pip install --upgrade build
-python3 -m build
-python3 -m pip install --upgrade twine
-python3 -m twine upload --repository eegdash dist/*

{eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/__init__.py RENAMED Viewed

@@ -5,4 +5,4 @@ from .utils import __init__mongo_client
 __init__mongo_client()
 __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset"]
-__version__ = "0.2.0"
+__version__ = "0.2.1.dev178237806"

{eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/api.py RENAMED Viewed

@@ -9,13 +9,14 @@ import numpy as np
 import xarray as xr
 from dotenv import load_dotenv
 from joblib import Parallel, delayed
-from pymongo import InsertOne, MongoClient, UpdateOne
+from pymongo import InsertOne, UpdateOne
 from s3fs import S3FileSystem
 from braindecode.datasets import BaseConcatDataset
 from .data_config import config as data_config
 from .data_utils import EEGBIDSDataset, EEGDashBaseDataset
+from .mongodb import MongoConnectionManager
 logger = logging.getLogger("eegdash")
@@ -55,6 +56,7 @@ class EEGDash:
         """
         self.config = data_config
         self.is_public = is_public
+        self.is_staging = is_staging
         if self.is_public:
             DB_CONNECTION_STRING = mne.utils.get_config("EEGDASH_DB_URI")
@@ -62,31 +64,15 @@ class EEGDash:
             load_dotenv()
             DB_CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING")
-        self.__client = MongoClient(DB_CONNECTION_STRING)
-        self.__db = (
-            self.__client["eegdash"]
-            if not is_staging
-            else self.__client["eegdashstaging"]
+        # Use singleton to get MongoDB client, database, and collection
+        self.__client, self.__db, self.__collection = MongoConnectionManager.get_client(
+            DB_CONNECTION_STRING, is_staging
         )
-        self.__collection = self.__db["records"]
         self.filesystem = S3FileSystem(
             anon=True, client_kwargs={"region_name": "us-east-2"}
         )
-    # MongoDB Operations
-    # These methods provide a high-level interface to interact with the MongoDB
-    # collection, allowing users to find, add, and update EEG data records.
-    # - find:
-    # - exist:
-    # - add_request:
-    # - add:
-    # - update_request:
-    # - remove_field:
-    # - remove_field_from_db:
-    # - close: Close the MongoDB connection.
-    # - __del__: Destructor to close the MongoDB connection.
     def find(self, query: dict[str, Any], *args, **kwargs) -> list[Mapping[str, Any]]:
         """Find records in the MongoDB collection that satisfy the given query.
@@ -117,26 +103,48 @@ class EEGDash:
         return [result for result in results]
     def exist(self, query: dict[str, Any]) -> bool:
-        """Check if the given query matches any records in the MongoDB collection.
+        """Return True if at least one record matches the query, else False.
-        Note that currently only a limited set of query fields is allowed here.
+        This is a lightweight existence check that uses MongoDB's ``find_one``
+        instead of fetching all matching documents (which would be wasteful in
+        both time and memory for broad queries). Only a restricted set of
+        fields is accepted to avoid accidental full scans caused by malformed
+        or unsupported keys.
         Parameters
         ----------
-        query: dict
-            A dictionary that specifies the query to be executed; this is a reference
-            document that is used to match records in the MongoDB collection.
+        query : dict
+            Mapping of allowed field(s) to value(s). Allowed keys: ``data_name``
+            and ``dataset``. The query must not be empty.
         Returns
         -------
-        bool:
-            True if at least one record matches the query, False otherwise.
+        bool
+            True if at least one matching record exists; False otherwise.
+        Raises
+        ------
+        TypeError
+            If ``query`` is not a dict.
+        ValueError
+            If ``query`` is empty or contains unsupported field names.
         """
-        accepted_query_fields = ["data_name", "dataset"]
-        assert all(field in accepted_query_fields for field in query.keys())
-        sessions = self.find(query)
-        return len(sessions) > 0
+        if not isinstance(query, dict):
+            raise TypeError("query must be a dict")
+        if not query:
+            raise ValueError("query cannot be empty")
+        accepted_query_fields = {"data_name", "dataset"}
+        unknown = set(query.keys()) - accepted_query_fields
+        if unknown:
+            raise ValueError(
+                f"Unsupported query field(s): {', '.join(sorted(unknown))}. "
+                f"Allowed: {sorted(accepted_query_fields)}"
+            )
+        doc = self.__collection.find_one(query, projection={"_id": 1})
+        return doc is not None
     def _validate_input(self, record: dict[str, Any]) -> dict[str, Any]:
         """Internal method to validate the input record against the expected schema.
@@ -491,13 +499,24 @@ class EEGDash:
         return self.__collection
     def close(self):
-        """Close the MongoDB client connection."""
-        if hasattr(self, "_EEGDash__client"):
-            self.__client.close()
+        """Close the MongoDB client connection.
+        Note: Since MongoDB clients are now managed by a singleton,
+        this method no longer closes connections. Use close_all_connections()
+        class method to close all connections if needed.
+        """
+        # Individual instances no longer close the shared client
+        pass
+    @classmethod
+    def close_all_connections(cls):
+        """Close all MongoDB client connections managed by the singleton."""
+        MongoConnectionManager.close_all()
     def __del__(self):
         """Ensure connection is closed when object is deleted."""
-        self.close()
+        # No longer needed since we're using singleton pattern
+        pass
 class EEGDashDataset(BaseConcatDataset):
@@ -651,28 +670,6 @@ class EEGDashDataset(BaseConcatDataset):
             and included in the returned dataset description(s).
         """
-        def get_base_dataset_from_bids_file(
-            bids_dataset: EEGBIDSDataset,
-            bids_file: str,
-            eeg_dash_instance: EEGDash,
-            s3_bucket: str | None,
-        ) -> EEGDashBaseDataset:
-            """Instantiate a single EEGDashBaseDataset given a local BIDS file. Note
-            this does not actually load the data from disk, but will access the metadata.
-            """
-            record = eeg_dash_instance.load_eeg_attrs_from_bids_file(
-                bids_dataset, bids_file
-            )
-            description = {}
-            for field in description_fields:
-                value = self.find_key_in_nested_dict(record, field)
-                if value is not None:
-                    description[field] = value
-            return EEGDashBaseDataset(
-                record, self.cache_dir, s3_bucket, description=description, **kwargs
-            )
         bids_dataset = EEGBIDSDataset(
             data_dir=data_dir,
             dataset=dataset,
@@ -680,11 +677,41 @@ class EEGDashDataset(BaseConcatDataset):
         eeg_dash_instance = EEGDash()
         try:
             datasets = Parallel(n_jobs=-1, prefer="threads", verbose=1)(
-                delayed(get_base_dataset_from_bids_file)(
-                    bids_dataset, bids_file, eeg_dash_instance, s3_bucket
+                delayed(self.get_base_dataset_from_bids_file)(
+                    bids_dataset=bids_dataset,
+                    bids_file=bids_file,
+                    eeg_dash_instance=eeg_dash_instance,
+                    s3_bucket=s3_bucket,
+                    description_fields=description_fields,
                 )
                 for bids_file in bids_dataset.get_files()
             )
             return datasets
         finally:
             eeg_dash_instance.close()
+    def get_base_dataset_from_bids_file(
+        self,
+        bids_dataset: EEGBIDSDataset,
+        bids_file: str,
+        eeg_dash_instance: EEGDash,
+        s3_bucket: str | None,
+        description_fields: list[str],
+    ) -> EEGDashBaseDataset:
+        """Instantiate a single EEGDashBaseDataset given a local BIDS file. Note
+        this does not actually load the data from disk, but will access the metadata.
+        """
+        record = eeg_dash_instance.load_eeg_attrs_from_bids_file(
+            bids_dataset, bids_file
+        )
+        description = {}
+        for field in description_fields:
+            value = self.find_key_in_nested_dict(record, field)
+            if value is not None:
+                description[field] = value
+        return EEGDashBaseDataset(
+            record,
+            self.cache_dir,
+            s3_bucket,
+            description=description,
+        )

{eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/dataset.py RENAMED Viewed

@@ -5,8 +5,9 @@ class EEGChallengeDataset(EEGDashDataset):
     def __init__(
         self,
         release: str = "R5",
+        query: dict | None = None,
         cache_dir: str = ".eegdash_cache",
-        s3_bucket: str | None = "s3://nmdatasets/NeurIPS25/R5_L100",
+        s3_bucket: str | None = "s3://nmdatasets/NeurIPS25/",
         **kwargs,
     ):
         """Create a new EEGDashDataset from a given query or local BIDS dataset directory
@@ -15,27 +16,19 @@ class EEGChallengeDataset(EEGDashDataset):
         Parameters
         ----------
+        release: str
+            Release name. Can be one of ["R1", ..., "R11"]
         query : dict | None
-            Optionally a dictionary that specifies the query to be executed; see
-            EEGDash.find() for details on the query format.
-        data_dir : str | list[str] | None
-            Optionally a string or a list of strings specifying one or more local
-            BIDS dataset directories from which to load the EEG data files. Exactly one
-            of query or data_dir must be provided.
-        dataset : str | list[str] | None
-            If data_dir is given, a name or list of names for for the dataset(s) to be loaded.
-        description_fields : list[str]
-            A list of fields to be extracted from the dataset records
-            and included in the returned data description(s). Examples are typical
-            subject metadata fields such as "subject", "session", "run", "task", etc.;
-            see also data_config.description_fields for the default set of fields.
+            Optionally a dictionary that specifies a query to be executed,
+            in addition to the dataset (automatically inferred from the release argument).
+            See EEGDash.find() for details on the query format.
         cache_dir : str
             A directory where the dataset will be cached locally.
         s3_bucket : str | None
             An optional S3 bucket URI to use instead of the
             default OpenNeuro bucket for loading data files.
         kwargs : dict
-            Additional keyword arguments to be passed to the EEGDashBaseDataset
+            Additional keyword arguments to be passed to the EEGDashDataset
             constructor.
         """
@@ -52,9 +45,25 @@ class EEGChallengeDataset(EEGDashDataset):
             "R2": "ds005506",
             "R1": "ds005505",
         }
+        self.release = release
+        if release not in dsnumber_release_map:
+            raise ValueError(f"Unknown release: {release}")
+        dataset = dsnumber_release_map[release]
+        if query is None:
+            query = {"dataset": dataset}
+        elif "dataset" not in query:
+            query["dataset"] = dataset
+        elif query["dataset"] != dataset:
+            raise ValueError(
+                f"Query dataset {query['dataset']} does not match the release {release} "
+                f"which corresponds to dataset {dataset}."
+            )
         super().__init__(
-            query={"dataset": dsnumber_release_map[release]},
+            query=query,
             cache_dir=cache_dir,
-            s3_bucket=s3_bucket,
+            s3_bucket=f"{s3_bucket}/{release}_L100",
             **kwargs,
         )

eegdash-0.2.1.dev178237806/eegdash/mongodb.py ADDED Viewed

@@ -0,0 +1,66 @@
+import threading
+from pymongo import MongoClient
+# MongoDB Operations
+# These methods provide a high-level interface to interact with the MongoDB
+# collection, allowing users to find, add, and update EEG data records.
+# - find:
+# - exist:
+# - add_request:
+# - add:
+# - update_request:
+# - remove_field:
+# - remove_field_from_db:
+# - close: Close the MongoDB connection.
+# - __del__: Destructor to close the MongoDB connection.
+class MongoConnectionManager:
+    """Singleton class to manage MongoDB client connections."""
+    _instances = {}
+    _lock = threading.Lock()
+    @classmethod
+    def get_client(cls, connection_string: str, is_staging: bool = False):
+        """Get or create a MongoDB client for the given connection string and staging flag.
+        Parameters
+        ----------
+        connection_string : str
+            The MongoDB connection string
+        is_staging : bool
+            Whether to use staging database
+        Returns
+        -------
+        tuple
+            A tuple of (client, database, collection)
+        """
+        # Create a unique key based on connection string and staging flag
+        key = (connection_string, is_staging)
+        if key not in cls._instances:
+            with cls._lock:
+                # Double-check pattern to avoid race conditions
+                if key not in cls._instances:
+                    client = MongoClient(connection_string)
+                    db_name = "eegdashstaging" if is_staging else "eegdash"
+                    db = client[db_name]
+                    collection = db["records"]
+                    cls._instances[key] = (client, db, collection)
+        return cls._instances[key]
+    @classmethod
+    def close_all(cls):
+        """Close all MongoDB client connections."""
+        with cls._lock:
+            for client, _, _ in cls._instances.values():
+                try:
+                    client.close()
+                except Exception:
+                    pass
+            cls._instances.clear()

eegdash-0.2.1.dev178237806/eegdash/utils.py ADDED Viewed

@@ -0,0 +1,11 @@
+from mne.utils import get_config, set_config, use_log_level
+def __init__mongo_client():
+    with use_log_level("ERROR"):
+        if get_config("EEGDASH_DB_URI") is None:
+            set_config(
+                "EEGDASH_DB_URI",
+                "mongodb+srv://eegdash-user:mdzoMjQcHWTVnKDq@cluster0.vz35p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0",
+                set_env=True,
+            )

{eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: eegdash
-Version: 0.2.0
+Version: 0.2.1.dev178237806
 Summary: EEG data for machine learning
 Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
 License: GNU General Public License
@@ -43,7 +43,7 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
-Requires-Python: >3.10
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: braindecode>=1.0
@@ -63,6 +63,7 @@ Requires-Dist: pytest; extra == "tests"
 Requires-Dist: pytest-cov; extra == "tests"
 Requires-Dist: codecov; extra == "tests"
 Requires-Dist: pytest_cases; extra == "tests"
+Requires-Dist: pytest-benchmark; extra == "tests"
 Provides-Extra: dev
 Requires-Dist: pre-commit; extra == "dev"
 Provides-Extra: docs
@@ -164,7 +165,3 @@ EEG-DaSh is a collaborative initiative between the United States and Israel, sup
-python3 -m pip install --upgrade build
-python3 -m build
-python3 -m pip install --upgrade twine
-python3 -m twine upload --repository eegdash dist/*

{eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,25 +1,12 @@
-.gitignore
-.pre-commit-config.yaml
-.readthedocs.yaml
-DevNotes.md
 LICENSE
 README.md
-datasets.md
 pyproject.toml
-.github/workflows/pre-commit.yaml
-.github/workflows/tests.yml
-docs/Makefile
-docs/architecture2.pptx
-docs/conf.py
-docs/convert_xls_2_martkdown.py
-docs/datasets.xlsx
-docs/index.rst
-docs/make.bat
 eegdash/__init__.py
 eegdash/api.py
 eegdash/data_config.py
 eegdash/data_utils.py
 eegdash/dataset.py
+eegdash/mongodb.py
 eegdash/preprocessing.py
 eegdash/utils.py
 eegdash.egg-info/PKG-INFO
@@ -42,22 +29,8 @@ eegdash/features/feature_bank/dimensionality.py
 eegdash/features/feature_bank/signal.py
 eegdash/features/feature_bank/spectral.py
 eegdash/features/feature_bank/utils.py
-notebooks/scratch.ipynb
-notebooks/scratch_features.ipynb
-notebooks/scratch_features2.ipynb
-notebooks/test_pybids_braindecode_BIDSDataset.ipynb
-notebooks/tutorial_audi_oddball.ipynb
-notebooks/tutorial_eoec.ipynb
-notebooks/tutorial_features_eoec.ipynb
-notebooks/tutorial_p3_oddball.ipynb
-notebooks/tutorial_pfactor_classification.ipynb
-notebooks/tutorial_pfactor_features.ipynb
-notebooks/tutorial_sex_classification.ipynb
-scripts/data_ingest.py
-scripts/datasets.json
-scripts/scan_openneuro.py
-tests/__init__.py
 tests/test_correctness.py
-tests/test_database.py
 tests/test_dataset.py
-tests/test_init.py
+tests/test_eegdash.py
+tests/test_init.py
+tests/test_mongo_connection.py

{eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash.egg-info/requires.txt RENAMED Viewed

@@ -36,3 +36,4 @@ pytest
 pytest-cov
 codecov
 pytest_cases
+pytest-benchmark

{eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/pyproject.toml RENAMED Viewed

@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools"]
+requires = ["setuptools>=64", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
@@ -13,7 +13,7 @@ authors = [
 description     = "EEG data for machine learning"
 readme          = "README.md"
 license         = { file = "LICENSE" }
-requires-python = ">3.10"
+requires-python = ">=3.10"
 classifiers = [
     "License :: OSI Approved :: MIT License",
@@ -59,6 +59,7 @@ tests = [
     'pytest-cov',
     'codecov',
     'pytest_cases',
+    'pytest-benchmark',
 ]
 dev = [
   "pre-commit"

{eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/tests/test_correctness.py RENAMED Viewed

@@ -82,7 +82,6 @@ def preprocess_instance(eeg_dash_dataset):
     ]
     pre_processed_dir = cache_folder / "preprocessed"
     pre_processed_dir.mkdir(parents=True, exist_ok=True)
     try:
         eeg_dash_dataset = load_concat_dataset(
             pre_processed_dir,

eegdash-0.2.1.dev178237806/tests/test_dataset.py ADDED Viewed

@@ -0,0 +1,82 @@
+import time
+import pytest
+from eegdash.api import EEGDash
+from eegdash.dataset import EEGChallengeDataset
+RELEASES = ["R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11"]
+FILES_PER_RELEASE = [1342, 1405, 1812, 3342, 3326, 1227, 3100, 2320, 2885, 2516, 3397]
+RELEASE_FILES = list(zip(RELEASES, FILES_PER_RELEASE))
+def _load_release(release):
+    ds = EEGChallengeDataset(release=release)
+    getattr(ds, "description", None)
+    return ds
+@pytest.fixture(scope="session")
+def warmed_mongo():
+    try:
+        EEGDash()
+    except Exception:
+        pytest.skip("Mongo not reachable")
+def test_eeg_challenge_dataset_initialization():
+    """Test the initialization of EEGChallengeDataset."""
+    dataset = EEGChallengeDataset(release="R5")
+    release = "R5"
+    expected_bucket_prefix = f"s3://nmdatasets/NeurIPS25//{release}_L100"
+    assert dataset.s3_bucket == expected_bucket_prefix, (
+        f"Unexpected s3_bucket: {dataset.s3_bucket} (expected {expected_bucket_prefix})"
+    )
+    # Expected components (kept explicit for readability & easier future edits)
+    expected_dataset = "ds005509"
+    expected_subject = "sub-NDARAC350XUM"
+    expected_task = "DespicableMe"
+    expected_suffix = (
+        f"{expected_dataset}/{expected_subject}/eeg/"
+        f"{expected_subject}_task-{expected_task}_eeg.set"
+    )
+    expected_full_path = f"{dataset.s3_bucket}/{expected_suffix}"
+    first_file = dataset.datasets[0].s3file
+    assert first_file == expected_full_path, (
+        "Mismatch in first dataset s3 file path.\n"
+        f"Got     : {first_file}\n"
+        f"Expected: {expected_full_path}"
+    )
+@pytest.mark.parametrize("release, number_files", RELEASE_FILES)
+def test_eeg_challenge_dataset_amount_files(release, number_files):
+    dataset = EEGChallengeDataset(release=release)
+    assert len(dataset.datasets) == number_files
+@pytest.mark.parametrize("release", RELEASES)
+def test_mongodb_load_benchmark(benchmark, warmed_mongo, release):
+    # Group makes the report nicer when comparing releases
+    benchmark.group = "EEGChallengeDataset.load"
+    result = benchmark.pedantic(
+        _load_release,
+        args=(release,),
+        iterations=1,  # I/O-bound → 1 iteration per round
+        rounds=5,  # take min/median across several cold-ish runs
+        warmup_rounds=1,  # do one warmup round
+    )
+    assert result is not None
+@pytest.mark.parametrize("release", RELEASES)
+def test_mongodb_load_under_slo(release):
+    start_time = time.perf_counter()
+    _ = EEGChallengeDataset(release=release)
+    duration = time.perf_counter() - start_time
+    assert duration < 10, f"{release} took {duration:.2f}s"

eegdash 0.2.0__tar.gz → 0.2.1.dev178237806__tar.gz

Potentially problematic release.

eegdash 0.2.0tar.gz → 0.2.1.dev178237806tar.gz