eegdash 0.2.0__tar.gz → 0.2.1.dev178237806__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

Files changed (70) hide show
  1. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/PKG-INFO +3 -6
  2. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/README.md +0 -4
  3. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/__init__.py +1 -1
  4. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/api.py +86 -59
  5. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/dataset.py +26 -17
  6. eegdash-0.2.1.dev178237806/eegdash/mongodb.py +66 -0
  7. eegdash-0.2.1.dev178237806/eegdash/utils.py +11 -0
  8. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash.egg-info/PKG-INFO +3 -6
  9. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash.egg-info/SOURCES.txt +4 -31
  10. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash.egg-info/requires.txt +1 -0
  11. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/pyproject.toml +3 -2
  12. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/tests/test_correctness.py +0 -1
  13. eegdash-0.2.1.dev178237806/tests/test_dataset.py +82 -0
  14. eegdash-0.2.1.dev178237806/tests/test_eegdash.py +83 -0
  15. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/tests/test_init.py +0 -3
  16. eegdash-0.2.1.dev178237806/tests/test_mongo_connection.py +115 -0
  17. eegdash-0.2.0/.github/workflows/pre-commit.yaml +0 -14
  18. eegdash-0.2.0/.github/workflows/tests.yml +0 -49
  19. eegdash-0.2.0/.gitignore +0 -17
  20. eegdash-0.2.0/.pre-commit-config.yaml +0 -48
  21. eegdash-0.2.0/.readthedocs.yaml +0 -24
  22. eegdash-0.2.0/DevNotes.md +0 -29
  23. eegdash-0.2.0/datasets.md +0 -254
  24. eegdash-0.2.0/docs/Makefile +0 -20
  25. eegdash-0.2.0/docs/architecture2.pptx +0 -0
  26. eegdash-0.2.0/docs/conf.py +0 -31
  27. eegdash-0.2.0/docs/convert_xls_2_martkdown.py +0 -36
  28. eegdash-0.2.0/docs/datasets.xlsx +0 -0
  29. eegdash-0.2.0/docs/index.rst +0 -17
  30. eegdash-0.2.0/docs/make.bat +0 -35
  31. eegdash-0.2.0/eegdash/utils.py +0 -11
  32. eegdash-0.2.0/notebooks/scratch.ipynb +0 -1097
  33. eegdash-0.2.0/notebooks/scratch_features.ipynb +0 -462465
  34. eegdash-0.2.0/notebooks/scratch_features2.ipynb +0 -55014
  35. eegdash-0.2.0/notebooks/test_pybids_braindecode_BIDSDataset.ipynb +0 -646
  36. eegdash-0.2.0/notebooks/tutorial_audi_oddball.ipynb +0 -441
  37. eegdash-0.2.0/notebooks/tutorial_eoec.ipynb +0 -515
  38. eegdash-0.2.0/notebooks/tutorial_features_eoec.ipynb +0 -58788
  39. eegdash-0.2.0/notebooks/tutorial_p3_oddball.ipynb +0 -511
  40. eegdash-0.2.0/notebooks/tutorial_pfactor_classification.ipynb +0 -4786
  41. eegdash-0.2.0/notebooks/tutorial_pfactor_features.ipynb +0 -38380
  42. eegdash-0.2.0/notebooks/tutorial_sex_classification.ipynb +0 -549
  43. eegdash-0.2.0/scripts/data_ingest.py +0 -404
  44. eegdash-0.2.0/scripts/datasets.json +0 -1
  45. eegdash-0.2.0/scripts/scan_openneuro.py +0 -67
  46. eegdash-0.2.0/tests/__init__.py +0 -5
  47. eegdash-0.2.0/tests/test_database.py +0 -21
  48. eegdash-0.2.0/tests/test_dataset.py +0 -14
  49. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/LICENSE +0 -0
  50. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/data_config.py +0 -0
  51. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/data_utils.py +0 -0
  52. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/__init__.py +0 -0
  53. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/datasets.py +0 -0
  54. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/decorators.py +0 -0
  55. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/extractors.py +0 -0
  56. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/feature_bank/__init__.py +0 -0
  57. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/feature_bank/complexity.py +0 -0
  58. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/feature_bank/connectivity.py +0 -0
  59. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/feature_bank/csp.py +0 -0
  60. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/feature_bank/dimensionality.py +0 -0
  61. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/feature_bank/signal.py +0 -0
  62. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/feature_bank/spectral.py +0 -0
  63. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/feature_bank/utils.py +0 -0
  64. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/inspect.py +0 -0
  65. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/serialization.py +0 -0
  66. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/features/utils.py +0 -0
  67. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash/preprocessing.py +0 -0
  68. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash.egg-info/dependency_links.txt +0 -0
  69. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/eegdash.egg-info/top_level.txt +0 -0
  70. {eegdash-0.2.0 → eegdash-0.2.1.dev178237806}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.2.0
3
+ Version: 0.2.1.dev178237806
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License: GNU General Public License
@@ -43,7 +43,7 @@ Classifier: Programming Language :: Python :: 3
43
43
  Classifier: Programming Language :: Python :: 3.10
44
44
  Classifier: Programming Language :: Python :: 3.11
45
45
  Classifier: Programming Language :: Python :: 3.12
46
- Requires-Python: >3.10
46
+ Requires-Python: >=3.10
47
47
  Description-Content-Type: text/markdown
48
48
  License-File: LICENSE
49
49
  Requires-Dist: braindecode>=1.0
@@ -63,6 +63,7 @@ Requires-Dist: pytest; extra == "tests"
63
63
  Requires-Dist: pytest-cov; extra == "tests"
64
64
  Requires-Dist: codecov; extra == "tests"
65
65
  Requires-Dist: pytest_cases; extra == "tests"
66
+ Requires-Dist: pytest-benchmark; extra == "tests"
66
67
  Provides-Extra: dev
67
68
  Requires-Dist: pre-commit; extra == "dev"
68
69
  Provides-Extra: docs
@@ -164,7 +165,3 @@ EEG-DaSh is a collaborative initiative between the United States and Israel, sup
164
165
 
165
166
 
166
167
 
167
- python3 -m pip install --upgrade build
168
- python3 -m build
169
- python3 -m pip install --upgrade twine
170
- python3 -m twine upload --repository eegdash dist/*
@@ -80,7 +80,3 @@ EEG-DaSh is a collaborative initiative between the United States and Israel, sup
80
80
 
81
81
 
82
82
 
83
- python3 -m pip install --upgrade build
84
- python3 -m build
85
- python3 -m pip install --upgrade twine
86
- python3 -m twine upload --repository eegdash dist/*
@@ -5,4 +5,4 @@ from .utils import __init__mongo_client
5
5
  __init__mongo_client()
6
6
 
7
7
  __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset"]
8
- __version__ = "0.2.0"
8
+ __version__ = "0.2.1.dev178237806"
@@ -9,13 +9,14 @@ import numpy as np
9
9
  import xarray as xr
10
10
  from dotenv import load_dotenv
11
11
  from joblib import Parallel, delayed
12
- from pymongo import InsertOne, MongoClient, UpdateOne
12
+ from pymongo import InsertOne, UpdateOne
13
13
  from s3fs import S3FileSystem
14
14
 
15
15
  from braindecode.datasets import BaseConcatDataset
16
16
 
17
17
  from .data_config import config as data_config
18
18
  from .data_utils import EEGBIDSDataset, EEGDashBaseDataset
19
+ from .mongodb import MongoConnectionManager
19
20
 
20
21
  logger = logging.getLogger("eegdash")
21
22
 
@@ -55,6 +56,7 @@ class EEGDash:
55
56
  """
56
57
  self.config = data_config
57
58
  self.is_public = is_public
59
+ self.is_staging = is_staging
58
60
 
59
61
  if self.is_public:
60
62
  DB_CONNECTION_STRING = mne.utils.get_config("EEGDASH_DB_URI")
@@ -62,31 +64,15 @@ class EEGDash:
62
64
  load_dotenv()
63
65
  DB_CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING")
64
66
 
65
- self.__client = MongoClient(DB_CONNECTION_STRING)
66
- self.__db = (
67
- self.__client["eegdash"]
68
- if not is_staging
69
- else self.__client["eegdashstaging"]
67
+ # Use singleton to get MongoDB client, database, and collection
68
+ self.__client, self.__db, self.__collection = MongoConnectionManager.get_client(
69
+ DB_CONNECTION_STRING, is_staging
70
70
  )
71
- self.__collection = self.__db["records"]
72
71
 
73
72
  self.filesystem = S3FileSystem(
74
73
  anon=True, client_kwargs={"region_name": "us-east-2"}
75
74
  )
76
75
 
77
- # MongoDB Operations
78
- # These methods provide a high-level interface to interact with the MongoDB
79
- # collection, allowing users to find, add, and update EEG data records.
80
- # - find:
81
- # - exist:
82
- # - add_request:
83
- # - add:
84
- # - update_request:
85
- # - remove_field:
86
- # - remove_field_from_db:
87
- # - close: Close the MongoDB connection.
88
- # - __del__: Destructor to close the MongoDB connection.
89
-
90
76
  def find(self, query: dict[str, Any], *args, **kwargs) -> list[Mapping[str, Any]]:
91
77
  """Find records in the MongoDB collection that satisfy the given query.
92
78
 
@@ -117,26 +103,48 @@ class EEGDash:
117
103
  return [result for result in results]
118
104
 
119
105
  def exist(self, query: dict[str, Any]) -> bool:
120
- """Check if the given query matches any records in the MongoDB collection.
106
+ """Return True if at least one record matches the query, else False.
121
107
 
122
- Note that currently only a limited set of query fields is allowed here.
108
+ This is a lightweight existence check that uses MongoDB's ``find_one``
109
+ instead of fetching all matching documents (which would be wasteful in
110
+ both time and memory for broad queries). Only a restricted set of
111
+ fields is accepted to avoid accidental full scans caused by malformed
112
+ or unsupported keys.
123
113
 
124
114
  Parameters
125
115
  ----------
126
- query: dict
127
- A dictionary that specifies the query to be executed; this is a reference
128
- document that is used to match records in the MongoDB collection.
116
+ query : dict
117
+ Mapping of allowed field(s) to value(s). Allowed keys: ``data_name``
118
+ and ``dataset``. The query must not be empty.
129
119
 
130
120
  Returns
131
121
  -------
132
- bool:
133
- True if at least one record matches the query, False otherwise.
122
+ bool
123
+ True if at least one matching record exists; False otherwise.
124
+
125
+ Raises
126
+ ------
127
+ TypeError
128
+ If ``query`` is not a dict.
129
+ ValueError
130
+ If ``query`` is empty or contains unsupported field names.
134
131
 
135
132
  """
136
- accepted_query_fields = ["data_name", "dataset"]
137
- assert all(field in accepted_query_fields for field in query.keys())
138
- sessions = self.find(query)
139
- return len(sessions) > 0
133
+ if not isinstance(query, dict):
134
+ raise TypeError("query must be a dict")
135
+ if not query:
136
+ raise ValueError("query cannot be empty")
137
+
138
+ accepted_query_fields = {"data_name", "dataset"}
139
+ unknown = set(query.keys()) - accepted_query_fields
140
+ if unknown:
141
+ raise ValueError(
142
+ f"Unsupported query field(s): {', '.join(sorted(unknown))}. "
143
+ f"Allowed: {sorted(accepted_query_fields)}"
144
+ )
145
+
146
+ doc = self.__collection.find_one(query, projection={"_id": 1})
147
+ return doc is not None
140
148
 
141
149
  def _validate_input(self, record: dict[str, Any]) -> dict[str, Any]:
142
150
  """Internal method to validate the input record against the expected schema.
@@ -491,13 +499,24 @@ class EEGDash:
491
499
  return self.__collection
492
500
 
493
501
  def close(self):
494
- """Close the MongoDB client connection."""
495
- if hasattr(self, "_EEGDash__client"):
496
- self.__client.close()
502
+ """Close the MongoDB client connection.
503
+
504
+ Note: Since MongoDB clients are now managed by a singleton,
505
+ this method no longer closes connections. Use close_all_connections()
506
+ class method to close all connections if needed.
507
+ """
508
+ # Individual instances no longer close the shared client
509
+ pass
510
+
511
+ @classmethod
512
+ def close_all_connections(cls):
513
+ """Close all MongoDB client connections managed by the singleton."""
514
+ MongoConnectionManager.close_all()
497
515
 
498
516
  def __del__(self):
499
517
  """Ensure connection is closed when object is deleted."""
500
- self.close()
518
+ # No longer needed since we're using singleton pattern
519
+ pass
501
520
 
502
521
 
503
522
  class EEGDashDataset(BaseConcatDataset):
@@ -651,28 +670,6 @@ class EEGDashDataset(BaseConcatDataset):
651
670
  and included in the returned dataset description(s).
652
671
 
653
672
  """
654
-
655
- def get_base_dataset_from_bids_file(
656
- bids_dataset: EEGBIDSDataset,
657
- bids_file: str,
658
- eeg_dash_instance: EEGDash,
659
- s3_bucket: str | None,
660
- ) -> EEGDashBaseDataset:
661
- """Instantiate a single EEGDashBaseDataset given a local BIDS file. Note
662
- this does not actually load the data from disk, but will access the metadata.
663
- """
664
- record = eeg_dash_instance.load_eeg_attrs_from_bids_file(
665
- bids_dataset, bids_file
666
- )
667
- description = {}
668
- for field in description_fields:
669
- value = self.find_key_in_nested_dict(record, field)
670
- if value is not None:
671
- description[field] = value
672
- return EEGDashBaseDataset(
673
- record, self.cache_dir, s3_bucket, description=description, **kwargs
674
- )
675
-
676
673
  bids_dataset = EEGBIDSDataset(
677
674
  data_dir=data_dir,
678
675
  dataset=dataset,
@@ -680,11 +677,41 @@ class EEGDashDataset(BaseConcatDataset):
680
677
  eeg_dash_instance = EEGDash()
681
678
  try:
682
679
  datasets = Parallel(n_jobs=-1, prefer="threads", verbose=1)(
683
- delayed(get_base_dataset_from_bids_file)(
684
- bids_dataset, bids_file, eeg_dash_instance, s3_bucket
680
+ delayed(self.get_base_dataset_from_bids_file)(
681
+ bids_dataset=bids_dataset,
682
+ bids_file=bids_file,
683
+ eeg_dash_instance=eeg_dash_instance,
684
+ s3_bucket=s3_bucket,
685
+ description_fields=description_fields,
685
686
  )
686
687
  for bids_file in bids_dataset.get_files()
687
688
  )
688
689
  return datasets
689
690
  finally:
690
691
  eeg_dash_instance.close()
692
+
693
+ def get_base_dataset_from_bids_file(
694
+ self,
695
+ bids_dataset: EEGBIDSDataset,
696
+ bids_file: str,
697
+ eeg_dash_instance: EEGDash,
698
+ s3_bucket: str | None,
699
+ description_fields: list[str],
700
+ ) -> EEGDashBaseDataset:
701
+ """Instantiate a single EEGDashBaseDataset given a local BIDS file. Note
702
+ this does not actually load the data from disk, but will access the metadata.
703
+ """
704
+ record = eeg_dash_instance.load_eeg_attrs_from_bids_file(
705
+ bids_dataset, bids_file
706
+ )
707
+ description = {}
708
+ for field in description_fields:
709
+ value = self.find_key_in_nested_dict(record, field)
710
+ if value is not None:
711
+ description[field] = value
712
+ return EEGDashBaseDataset(
713
+ record,
714
+ self.cache_dir,
715
+ s3_bucket,
716
+ description=description,
717
+ )
@@ -5,8 +5,9 @@ class EEGChallengeDataset(EEGDashDataset):
5
5
  def __init__(
6
6
  self,
7
7
  release: str = "R5",
8
+ query: dict | None = None,
8
9
  cache_dir: str = ".eegdash_cache",
9
- s3_bucket: str | None = "s3://nmdatasets/NeurIPS25/R5_L100",
10
+ s3_bucket: str | None = "s3://nmdatasets/NeurIPS25/",
10
11
  **kwargs,
11
12
  ):
12
13
  """Create a new EEGDashDataset from a given query or local BIDS dataset directory
@@ -15,27 +16,19 @@ class EEGChallengeDataset(EEGDashDataset):
15
16
 
16
17
  Parameters
17
18
  ----------
19
+ release: str
20
+ Release name. Can be one of ["R1", ..., "R11"]
18
21
  query : dict | None
19
- Optionally a dictionary that specifies the query to be executed; see
20
- EEGDash.find() for details on the query format.
21
- data_dir : str | list[str] | None
22
- Optionally a string or a list of strings specifying one or more local
23
- BIDS dataset directories from which to load the EEG data files. Exactly one
24
- of query or data_dir must be provided.
25
- dataset : str | list[str] | None
26
- If data_dir is given, a name or list of names for for the dataset(s) to be loaded.
27
- description_fields : list[str]
28
- A list of fields to be extracted from the dataset records
29
- and included in the returned data description(s). Examples are typical
30
- subject metadata fields such as "subject", "session", "run", "task", etc.;
31
- see also data_config.description_fields for the default set of fields.
22
+ Optionally a dictionary that specifies a query to be executed,
23
+ in addition to the dataset (automatically inferred from the release argument).
24
+ See EEGDash.find() for details on the query format.
32
25
  cache_dir : str
33
26
  A directory where the dataset will be cached locally.
34
27
  s3_bucket : str | None
35
28
  An optional S3 bucket URI to use instead of the
36
29
  default OpenNeuro bucket for loading data files.
37
30
  kwargs : dict
38
- Additional keyword arguments to be passed to the EEGDashBaseDataset
31
+ Additional keyword arguments to be passed to the EEGDashDataset
39
32
  constructor.
40
33
 
41
34
  """
@@ -52,9 +45,25 @@ class EEGChallengeDataset(EEGDashDataset):
52
45
  "R2": "ds005506",
53
46
  "R1": "ds005505",
54
47
  }
48
+
49
+ self.release = release
50
+ if release not in dsnumber_release_map:
51
+ raise ValueError(f"Unknown release: {release}")
52
+
53
+ dataset = dsnumber_release_map[release]
54
+ if query is None:
55
+ query = {"dataset": dataset}
56
+ elif "dataset" not in query:
57
+ query["dataset"] = dataset
58
+ elif query["dataset"] != dataset:
59
+ raise ValueError(
60
+ f"Query dataset {query['dataset']} does not match the release {release} "
61
+ f"which corresponds to dataset {dataset}."
62
+ )
63
+
55
64
  super().__init__(
56
- query={"dataset": dsnumber_release_map[release]},
65
+ query=query,
57
66
  cache_dir=cache_dir,
58
- s3_bucket=s3_bucket,
67
+ s3_bucket=f"{s3_bucket}/{release}_L100",
59
68
  **kwargs,
60
69
  )
@@ -0,0 +1,66 @@
1
+ import threading
2
+
3
+ from pymongo import MongoClient
4
+
5
+ # MongoDB Operations
6
+ # These methods provide a high-level interface to interact with the MongoDB
7
+ # collection, allowing users to find, add, and update EEG data records.
8
+ # - find:
9
+ # - exist:
10
+ # - add_request:
11
+ # - add:
12
+ # - update_request:
13
+ # - remove_field:
14
+ # - remove_field_from_db:
15
+ # - close: Close the MongoDB connection.
16
+ # - __del__: Destructor to close the MongoDB connection.
17
+
18
+
19
+ class MongoConnectionManager:
20
+ """Singleton class to manage MongoDB client connections."""
21
+
22
+ _instances = {}
23
+ _lock = threading.Lock()
24
+
25
+ @classmethod
26
+ def get_client(cls, connection_string: str, is_staging: bool = False):
27
+ """Get or create a MongoDB client for the given connection string and staging flag.
28
+
29
+ Parameters
30
+ ----------
31
+ connection_string : str
32
+ The MongoDB connection string
33
+ is_staging : bool
34
+ Whether to use staging database
35
+
36
+ Returns
37
+ -------
38
+ tuple
39
+ A tuple of (client, database, collection)
40
+
41
+ """
42
+ # Create a unique key based on connection string and staging flag
43
+ key = (connection_string, is_staging)
44
+
45
+ if key not in cls._instances:
46
+ with cls._lock:
47
+ # Double-check pattern to avoid race conditions
48
+ if key not in cls._instances:
49
+ client = MongoClient(connection_string)
50
+ db_name = "eegdashstaging" if is_staging else "eegdash"
51
+ db = client[db_name]
52
+ collection = db["records"]
53
+ cls._instances[key] = (client, db, collection)
54
+
55
+ return cls._instances[key]
56
+
57
+ @classmethod
58
+ def close_all(cls):
59
+ """Close all MongoDB client connections."""
60
+ with cls._lock:
61
+ for client, _, _ in cls._instances.values():
62
+ try:
63
+ client.close()
64
+ except Exception:
65
+ pass
66
+ cls._instances.clear()
@@ -0,0 +1,11 @@
1
+ from mne.utils import get_config, set_config, use_log_level
2
+
3
+
4
+ def __init__mongo_client():
5
+ with use_log_level("ERROR"):
6
+ if get_config("EEGDASH_DB_URI") is None:
7
+ set_config(
8
+ "EEGDASH_DB_URI",
9
+ "mongodb+srv://eegdash-user:mdzoMjQcHWTVnKDq@cluster0.vz35p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0",
10
+ set_env=True,
11
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.2.0
3
+ Version: 0.2.1.dev178237806
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License: GNU General Public License
@@ -43,7 +43,7 @@ Classifier: Programming Language :: Python :: 3
43
43
  Classifier: Programming Language :: Python :: 3.10
44
44
  Classifier: Programming Language :: Python :: 3.11
45
45
  Classifier: Programming Language :: Python :: 3.12
46
- Requires-Python: >3.10
46
+ Requires-Python: >=3.10
47
47
  Description-Content-Type: text/markdown
48
48
  License-File: LICENSE
49
49
  Requires-Dist: braindecode>=1.0
@@ -63,6 +63,7 @@ Requires-Dist: pytest; extra == "tests"
63
63
  Requires-Dist: pytest-cov; extra == "tests"
64
64
  Requires-Dist: codecov; extra == "tests"
65
65
  Requires-Dist: pytest_cases; extra == "tests"
66
+ Requires-Dist: pytest-benchmark; extra == "tests"
66
67
  Provides-Extra: dev
67
68
  Requires-Dist: pre-commit; extra == "dev"
68
69
  Provides-Extra: docs
@@ -164,7 +165,3 @@ EEG-DaSh is a collaborative initiative between the United States and Israel, sup
164
165
 
165
166
 
166
167
 
167
- python3 -m pip install --upgrade build
168
- python3 -m build
169
- python3 -m pip install --upgrade twine
170
- python3 -m twine upload --repository eegdash dist/*
@@ -1,25 +1,12 @@
1
- .gitignore
2
- .pre-commit-config.yaml
3
- .readthedocs.yaml
4
- DevNotes.md
5
1
  LICENSE
6
2
  README.md
7
- datasets.md
8
3
  pyproject.toml
9
- .github/workflows/pre-commit.yaml
10
- .github/workflows/tests.yml
11
- docs/Makefile
12
- docs/architecture2.pptx
13
- docs/conf.py
14
- docs/convert_xls_2_martkdown.py
15
- docs/datasets.xlsx
16
- docs/index.rst
17
- docs/make.bat
18
4
  eegdash/__init__.py
19
5
  eegdash/api.py
20
6
  eegdash/data_config.py
21
7
  eegdash/data_utils.py
22
8
  eegdash/dataset.py
9
+ eegdash/mongodb.py
23
10
  eegdash/preprocessing.py
24
11
  eegdash/utils.py
25
12
  eegdash.egg-info/PKG-INFO
@@ -42,22 +29,8 @@ eegdash/features/feature_bank/dimensionality.py
42
29
  eegdash/features/feature_bank/signal.py
43
30
  eegdash/features/feature_bank/spectral.py
44
31
  eegdash/features/feature_bank/utils.py
45
- notebooks/scratch.ipynb
46
- notebooks/scratch_features.ipynb
47
- notebooks/scratch_features2.ipynb
48
- notebooks/test_pybids_braindecode_BIDSDataset.ipynb
49
- notebooks/tutorial_audi_oddball.ipynb
50
- notebooks/tutorial_eoec.ipynb
51
- notebooks/tutorial_features_eoec.ipynb
52
- notebooks/tutorial_p3_oddball.ipynb
53
- notebooks/tutorial_pfactor_classification.ipynb
54
- notebooks/tutorial_pfactor_features.ipynb
55
- notebooks/tutorial_sex_classification.ipynb
56
- scripts/data_ingest.py
57
- scripts/datasets.json
58
- scripts/scan_openneuro.py
59
- tests/__init__.py
60
32
  tests/test_correctness.py
61
- tests/test_database.py
62
33
  tests/test_dataset.py
63
- tests/test_init.py
34
+ tests/test_eegdash.py
35
+ tests/test_init.py
36
+ tests/test_mongo_connection.py
@@ -36,3 +36,4 @@ pytest
36
36
  pytest-cov
37
37
  codecov
38
38
  pytest_cases
39
+ pytest-benchmark
@@ -1,5 +1,5 @@
1
1
  [build-system]
2
- requires = ["setuptools"]
2
+ requires = ["setuptools>=64", "wheel"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
@@ -13,7 +13,7 @@ authors = [
13
13
  description = "EEG data for machine learning"
14
14
  readme = "README.md"
15
15
  license = { file = "LICENSE" }
16
- requires-python = ">3.10"
16
+ requires-python = ">=3.10"
17
17
 
18
18
  classifiers = [
19
19
  "License :: OSI Approved :: MIT License",
@@ -59,6 +59,7 @@ tests = [
59
59
  'pytest-cov',
60
60
  'codecov',
61
61
  'pytest_cases',
62
+ 'pytest-benchmark',
62
63
  ]
63
64
  dev = [
64
65
  "pre-commit"
@@ -82,7 +82,6 @@ def preprocess_instance(eeg_dash_dataset):
82
82
  ]
83
83
  pre_processed_dir = cache_folder / "preprocessed"
84
84
  pre_processed_dir.mkdir(parents=True, exist_ok=True)
85
-
86
85
  try:
87
86
  eeg_dash_dataset = load_concat_dataset(
88
87
  pre_processed_dir,
@@ -0,0 +1,82 @@
1
+ import time
2
+
3
+ import pytest
4
+
5
+ from eegdash.api import EEGDash
6
+ from eegdash.dataset import EEGChallengeDataset
7
+
8
+ RELEASES = ["R1", "R2", "R3", "R4", "R5", "R6", "R7", "R8", "R9", "R10", "R11"]
9
+ FILES_PER_RELEASE = [1342, 1405, 1812, 3342, 3326, 1227, 3100, 2320, 2885, 2516, 3397]
10
+
11
+ RELEASE_FILES = list(zip(RELEASES, FILES_PER_RELEASE))
12
+
13
+
14
+ def _load_release(release):
15
+ ds = EEGChallengeDataset(release=release)
16
+ getattr(ds, "description", None)
17
+ return ds
18
+
19
+
20
+ @pytest.fixture(scope="session")
21
+ def warmed_mongo():
22
+ try:
23
+ EEGDash()
24
+ except Exception:
25
+ pytest.skip("Mongo not reachable")
26
+
27
+
28
+ def test_eeg_challenge_dataset_initialization():
29
+ """Test the initialization of EEGChallengeDataset."""
30
+ dataset = EEGChallengeDataset(release="R5")
31
+
32
+ release = "R5"
33
+ expected_bucket_prefix = f"s3://nmdatasets/NeurIPS25//{release}_L100"
34
+ assert dataset.s3_bucket == expected_bucket_prefix, (
35
+ f"Unexpected s3_bucket: {dataset.s3_bucket} (expected {expected_bucket_prefix})"
36
+ )
37
+
38
+ # Expected components (kept explicit for readability & easier future edits)
39
+ expected_dataset = "ds005509"
40
+ expected_subject = "sub-NDARAC350XUM"
41
+ expected_task = "DespicableMe"
42
+ expected_suffix = (
43
+ f"{expected_dataset}/{expected_subject}/eeg/"
44
+ f"{expected_subject}_task-{expected_task}_eeg.set"
45
+ )
46
+
47
+ expected_full_path = f"{dataset.s3_bucket}/{expected_suffix}"
48
+ first_file = dataset.datasets[0].s3file
49
+
50
+ assert first_file == expected_full_path, (
51
+ "Mismatch in first dataset s3 file path.\n"
52
+ f"Got : {first_file}\n"
53
+ f"Expected: {expected_full_path}"
54
+ )
55
+
56
+
57
+ @pytest.mark.parametrize("release, number_files", RELEASE_FILES)
58
+ def test_eeg_challenge_dataset_amount_files(release, number_files):
59
+ dataset = EEGChallengeDataset(release=release)
60
+ assert len(dataset.datasets) == number_files
61
+
62
+
63
+ @pytest.mark.parametrize("release", RELEASES)
64
+ def test_mongodb_load_benchmark(benchmark, warmed_mongo, release):
65
+ # Group makes the report nicer when comparing releases
66
+ benchmark.group = "EEGChallengeDataset.load"
67
+ result = benchmark.pedantic(
68
+ _load_release,
69
+ args=(release,),
70
+ iterations=1, # I/O-bound → 1 iteration per round
71
+ rounds=5, # take min/median across several cold-ish runs
72
+ warmup_rounds=1, # do one warmup round
73
+ )
74
+ assert result is not None
75
+
76
+
77
+ @pytest.mark.parametrize("release", RELEASES)
78
+ def test_mongodb_load_under_slo(release):
79
+ start_time = time.perf_counter()
80
+ _ = EEGChallengeDataset(release=release)
81
+ duration = time.perf_counter() - start_time
82
+ assert duration < 10, f"{release} took {duration:.2f}s"