eegdash 0.3.9.dev170082126__tar.gz → 0.4.0.dev132__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

Files changed (70) hide show
  1. {eegdash-0.3.9.dev170082126/eegdash.egg-info → eegdash-0.4.0.dev132}/PKG-INFO +5 -56
  2. eegdash-0.4.0.dev132/README.md +45 -0
  3. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/docs/Makefile +3 -1
  4. eegdash-0.4.0.dev132/docs/source/api.rst +39 -0
  5. eegdash-0.4.0.dev132/docs/source/api_core.rst +18 -0
  6. eegdash-0.4.0.dev132/docs/source/api_features.rst +16 -0
  7. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/docs/source/index.rst +1 -1
  8. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/__init__.py +1 -1
  9. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/api.py +68 -145
  10. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/bids_eeg_metadata.py +149 -27
  11. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/data_utils.py +63 -254
  12. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/dataset/dataset.py +27 -21
  13. eegdash-0.4.0.dev132/eegdash/downloader.py +176 -0
  14. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/datasets.py +4 -3
  15. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/hbn/preprocessing.py +1 -3
  16. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/hbn/windows.py +0 -2
  17. eegdash-0.4.0.dev132/eegdash/logging.py +23 -0
  18. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132/eegdash.egg-info}/PKG-INFO +5 -56
  19. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash.egg-info/SOURCES.txt +7 -1
  20. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash.egg-info/requires.txt +2 -1
  21. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/pyproject.toml +2 -1
  22. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/tests/test_api.py +28 -1
  23. eegdash-0.4.0.dev132/tests/test_bids_dependencies.py +67 -0
  24. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/tests/test_cache_folder_suffix.py +10 -10
  25. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/tests/test_challenge_kwargs.py +12 -10
  26. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/tests/test_correctness.py +6 -18
  27. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/tests/test_dataset.py +16 -59
  28. eegdash-0.4.0.dev132/tests/test_downloader.py +132 -0
  29. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/tests/test_init.py +2 -5
  30. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/tests/test_minirelease.py +11 -27
  31. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/tests/test_offline.py +4 -13
  32. eegdash-0.3.9.dev170082126/README.md +0 -97
  33. eegdash-0.3.9.dev170082126/tests/test_functional.py +0 -28
  34. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/LICENSE +0 -0
  35. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/MANIFEST.in +0 -0
  36. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/docs/source/conf.py +0 -0
  37. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/docs/source/dataset_summary.rst +0 -0
  38. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/docs/source/install/install.rst +0 -0
  39. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/docs/source/install/install_pip.rst +0 -0
  40. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/docs/source/install/install_source.rst +0 -0
  41. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/docs/source/overview.rst +0 -0
  42. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/const.py +0 -0
  43. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/dataset/__init__.py +0 -0
  44. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/dataset/dataset_summary.csv +0 -0
  45. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/dataset/registry.py +0 -0
  46. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/__init__.py +0 -0
  47. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/decorators.py +0 -0
  48. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/extractors.py +0 -0
  49. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/feature_bank/__init__.py +0 -0
  50. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/feature_bank/complexity.py +0 -0
  51. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/feature_bank/connectivity.py +0 -0
  52. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/feature_bank/csp.py +0 -0
  53. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/feature_bank/dimensionality.py +0 -0
  54. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/feature_bank/signal.py +0 -0
  55. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/feature_bank/spectral.py +0 -0
  56. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/feature_bank/utils.py +0 -0
  57. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/inspect.py +0 -0
  58. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/serialization.py +0 -0
  59. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/features/utils.py +0 -0
  60. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/hbn/__init__.py +0 -0
  61. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/mongodb.py +0 -0
  62. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/paths.py +0 -0
  63. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash/utils.py +0 -0
  64. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash.egg-info/dependency_links.txt +0 -0
  65. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/eegdash.egg-info/top_level.txt +0 -0
  66. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/setup.cfg +0 -0
  67. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/tests/test_dataset_registration.py +0 -0
  68. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/tests/test_eegdash.py +0 -0
  69. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/tests/test_mongo_connection.py +0 -0
  70. {eegdash-0.3.9.dev170082126 → eegdash-0.4.0.dev132}/tests/test_query.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.3.9.dev170082126
3
+ Version: 0.4.0.dev132
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License-Expression: GPL-3.0-only
@@ -25,7 +25,7 @@ Requires-Python: >=3.10
25
25
  Description-Content-Type: text/markdown
26
26
  License-File: LICENSE
27
27
  Requires-Dist: braindecode>=1.0
28
- Requires-Dist: mne_bids>=0.16.0
28
+ Requires-Dist: mne_bids>=0.17.0
29
29
  Requires-Dist: numba
30
30
  Requires-Dist: numpy
31
31
  Requires-Dist: pandas
@@ -41,6 +41,7 @@ Requires-Dist: pymatreader
41
41
  Requires-Dist: eeglabio
42
42
  Requires-Dist: tabulate
43
43
  Requires-Dist: docstring_inheritance
44
+ Requires-Dist: rich
44
45
  Provides-Extra: tests
45
46
  Requires-Dist: pytest; extra == "tests"
46
47
  Requires-Dist: pytest-cov; extra == "tests"
@@ -86,22 +87,6 @@ To leverage recent and ongoing advancements in large-scale computational methods
86
87
 
87
88
  The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
88
89
 
89
- ## Featured data
90
-
91
- The following HBN datasets are currently featured on EEGDash. Documentation about these datasets is available [here](https://neuromechanist.github.io/data/hbn/).
92
-
93
- | DatasetID | Participants | Files | Sessions | Population | Channels | Is 10-20? | Modality | Size |
94
- |---|---|---|---|---|---|---|---|---|
95
- | [ds005505](https://nemar.org/dataexplorer/detail?dataset_id=ds005505) | 136 | 5393 | 1 | Healthy | 129 | other | Visual | 103 GB |
96
- | [ds005506](https://nemar.org/dataexplorer/detail?dataset_id=ds005506) | 150 | 5645 | 1 | Healthy | 129 | other | Visual | 112 GB |
97
- | [ds005507](https://nemar.org/dataexplorer/detail?dataset_id=ds005507) | 184 | 7273 | 1 | Healthy | 129 | other | Visual | 140 GB |
98
- | [ds005508](https://nemar.org/dataexplorer/detail?dataset_id=ds005508) | 324 | 13393 | 1 | Healthy | 129 | other | Visual | 230 GB |
99
- | [ds005510](https://nemar.org/dataexplorer/detail?dataset_id=ds005510) | 135 | 4933 | 1 | Healthy | 129 | other | Visual | 91 GB |
100
- | [ds005512](https://nemar.org/dataexplorer/detail?dataset_id=ds005512) | 257 | 9305 | 1 | Healthy | 129 | other | Visual | 157 GB |
101
- | [ds005514](https://nemar.org/dataexplorer/detail?dataset_id=ds005514) | 295 | 11565 | 1 | Healthy | 129 | other | Visual | 185 GB |
102
-
103
- A total of [246 other datasets](datasets.md) are also available through EEGDash.
104
-
105
90
  ## Data format
106
91
 
107
92
  EEGDash queries return a **Pytorch Dataset** formatted to facilitate machine learning (ML) and deep learning (DL) applications. PyTorch Datasets are the best format for EEGDash queries because they provide an efficient, scalable, and flexible structure for machine learning (ML) and deep learning (DL) applications. They allow seamless integration with PyTorch’s DataLoader, enabling efficient batching, shuffling, and parallel data loading, which is essential for training deep learning models on large EEG datasets.
@@ -113,47 +98,11 @@ EEGDash datasets are processed using the popular [braindecode](https://braindeco
113
98
  ## EEG-Dash usage
114
99
 
115
100
  ### Install
116
- Use your preferred Python environment manager with Python > 3.9 to install the package.
101
+ Use your preferred Python environment manager with Python > 3.10 to install the package.
117
102
  * To install the eegdash package, use the following command: `pip install eegdash`
118
103
  * To verify the installation, start a Python session and type: `from eegdash import EEGDash`
119
104
 
120
- ### Data access
121
-
122
- To use the data from a single subject, enter:
123
-
124
- ```python
125
- from eegdash import EEGDashDataset
126
-
127
- ds_NDARDB033FW5 = EEGDashDataset(
128
- {"dataset": "ds005514", "task":
129
- "RestingState", "subject": "NDARDB033FW5"},
130
- cache_dir="."
131
- )
132
- ```
133
-
134
- This will search and download the metadata for the task **RestingState** for subject **NDARDB033FW5** in BIDS dataset **ds005514**. The actual data will not be downloaded at this stage. Following standard practice, data is only downloaded once it is processed. The **ds_NDARDB033FW5** object is a fully functional braindecode dataset, which is itself a PyTorch dataset. This [tutorial](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_eoec.ipynb) shows how to preprocess the EEG data, extracting portions of the data containing eyes-open and eyes-closed segments, then perform eyes-open vs. eyes-closed classification using a (shallow) deep-learning model.
135
-
136
- To use the data from multiple subjects, enter:
137
-
138
- ```python
139
- from eegdash import EEGDashDataset
140
-
141
- ds_ds005505rest = EEGDashDataset(
142
- {"dataset": "ds005505", "task": "RestingState"}, target_name="sex", cache_dir=".
143
- )
144
- ```
145
-
146
- This will search and download the metadata for the task 'RestingState' for all subjects in BIDS dataset 'ds005505' (a total of 136). As above, the actual data will not be downloaded at this stage so this command is quick to execute. Also, the target class for each subject is assigned using the target_name parameter. This means that this object is ready to be directly fed to a deep learning model, although the [tutorial script](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_sex_classification.ipynb) performs minimal processing on it, prior to training a deep-learning model. Because 14 gigabytes of data are downloaded, this tutorial takes about 10 minutes to execute.
147
-
148
- ### Automatic caching
149
-
150
- By default, EEGDash caches downloaded data under a single, consistent folder:
151
-
152
- - If ``EEGDASH_CACHE_DIR`` is set in your environment, that path is used.
153
- - Else, if MNE’s ``MNE_DATA`` config is set, that path is used to align with other EEG tooling.
154
- - Otherwise, ``.eegdash_cache`` in the current working directory is used.
155
-
156
- This means that if you run the tutorial [scripts](https://github.com/sccn/EEGDash/tree/develop/notebooks), the data will only be downloaded the first time the script is executed and reused thereafter.
105
+ Please check our tutorial webpages to explore what you can do with [eegdash](https://eegdash.org/)!
157
106
 
158
107
  ## Education -- Coming soon...
159
108
 
@@ -0,0 +1,45 @@
1
+ # EEG-Dash
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/eegdash)](https://pypi.org/project/eegdash/)
4
+ [![Docs](https://img.shields.io/badge/docs-stable-brightgreen.svg)](https://sccn.github.io/eegdash)
5
+
6
+ [![License: GPL-2.0-or-later](https://img.shields.io/badge/License-GPL--2.0--or--later-blue.svg)](LICENSE)
7
+ [![Python versions](https://img.shields.io/pypi/pyversions/eegdash.svg)](https://pypi.org/project/eegdash/)
8
+ [![Downloads](https://pepy.tech/badge/eegdash)](https://pepy.tech/project/eegdash)
9
+ <!-- [![Coverage](https://img.shields.io/codecov/c/github/sccn/eegdash)](https://codecov.io/gh/sccn/eegdash) -->
10
+
11
+ To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
12
+
13
+ ## Data source
14
+
15
+ The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
16
+
17
+ ## Data format
18
+
19
+ EEGDash queries return a **Pytorch Dataset** formatted to facilitate machine learning (ML) and deep learning (DL) applications. PyTorch Datasets are the best format for EEGDash queries because they provide an efficient, scalable, and flexible structure for machine learning (ML) and deep learning (DL) applications. They allow seamless integration with PyTorch’s DataLoader, enabling efficient batching, shuffling, and parallel data loading, which is essential for training deep learning models on large EEG datasets.
20
+
21
+ ## Data preprocessing
22
+
23
+ EEGDash datasets are processed using the popular [braindecode](https://braindecode.org/stable/index.html) library. In fact, EEGDash datasets are braindecode datasets, which are themselves PyTorch datasets. This means that any preprocessing possible on braindecode datasets is also possible on EEGDash datasets. Refer to [braindecode](https://braindecode.org/stable/index.html) tutorials for guidance on preprocessing EEG data.
24
+
25
+ ## EEG-Dash usage
26
+
27
+ ### Install
28
+ Use your preferred Python environment manager with Python > 3.10 to install the package.
29
+ * To install the eegdash package, use the following command: `pip install eegdash`
30
+ * To verify the installation, start a Python session and type: `from eegdash import EEGDash`
31
+
32
+ Please check our tutorial webpages to explore what you can do with [eegdash](https://eegdash.org/)!
33
+
34
+ ## Education -- Coming soon...
35
+
36
+ We organize workshops and educational events to foster cross-cultural education and student training, offering both online and in-person opportunities in collaboration with US and Israeli partners. Events for 2025 will be announced via the EEGLABNEWS mailing list. Be sure to [subscribe](https://sccn.ucsd.edu/mailman/listinfo/eeglabnews).
37
+
38
+ ## About EEG-DaSh
39
+
40
+ EEG-DaSh is a collaborative initiative between the United States and Israel, supported by the National Science Foundation (NSF). The partnership brings together experts from the Swartz Center for Computational Neuroscience (SCCN) at the University of California San Diego (UCSD) and Ben-Gurion University (BGU) in Israel.
41
+
42
+ ![Screenshot 2024-10-03 at 09 14 06](https://github.com/user-attachments/assets/327639d3-c3b4-46b1-9335-37803209b0d3)
43
+
44
+
45
+
@@ -11,7 +11,9 @@ help:
11
11
 
12
12
  .PHONY: apidoc
13
13
  apidoc:
14
- @python -m sphinx.ext.apidoc -o "$(APIDIR)" "../$(PKG)" -f -e -M
14
+ # Generate API docs using the top-level package so modules are importable
15
+ # as eegdash.* instead of an unimportable bare 'dataset.*'
16
+ @python -m sphinx.ext.apidoc -o "$(APIDIR)/dataset" "../$(PKG)" -f -e -M
15
17
 
16
18
  # Standard build runs examples
17
19
  html: apidoc
@@ -0,0 +1,39 @@
1
+ .. _api:
2
+
3
+ API Reference
4
+ =============
5
+
6
+ .. grid:: 2
7
+ :gutter: 2
8
+
9
+ .. grid-item-card:: Core
10
+ :class-header: sd-bg-primary sd-text-white sd-font-weight-bold
11
+ :link: api_core
12
+ :link-type: doc
13
+
14
+ Core utilities for EEGDash.
15
+
16
+ .. grid-item-card:: Features
17
+ :class-header: sd-bg-primary sd-text-white sd-font-weight-bold
18
+ :link: api_features
19
+ :link-type: doc
20
+
21
+ Feature extraction and processing routines.
22
+
23
+ Datasets
24
+ --------
25
+
26
+ .. toctree::
27
+ :maxdepth: 2
28
+
29
+ api/dataset/modules
30
+
31
+ .. note::
32
+ The dataset API above is generated automatically during the build.
33
+
34
+ .. toctree::
35
+ :hidden:
36
+
37
+ api_core
38
+ api_features
39
+
@@ -0,0 +1,18 @@
1
+ Core API
2
+ ========
3
+
4
+ .. currentmodule:: eegdash
5
+
6
+ .. autosummary::
7
+ :toctree: generated/api-core
8
+ :recursive:
9
+
10
+ api
11
+ bids_eeg_metadata
12
+ const
13
+ data_utils
14
+ hbn
15
+ mongodb
16
+ paths
17
+ utils
18
+
@@ -0,0 +1,16 @@
1
+ Feature API
2
+ ===========
3
+
4
+ .. autosummary::
5
+ :toctree: generated/api-features
6
+ :recursive:
7
+
8
+ eegdash.features
9
+ eegdash.features.datasets
10
+ eegdash.features.decorators
11
+ eegdash.features.extractors
12
+ eegdash.features.inspect
13
+ eegdash.features.serialization
14
+ eegdash.features.utils
15
+ eegdash.features.feature_bank
16
+
@@ -55,6 +55,6 @@ The archive is currently still in :bdg-danger:`beta testing` mode, so be kind.
55
55
 
56
56
  Overview <overview>
57
57
  Install <install/install>
58
- API <api/modules.rst>
58
+ API <api>
59
59
  Dataset Summary <dataset_summary>
60
60
  Examples <generated/auto_examples/index>
@@ -7,4 +7,4 @@ _init_mongo_client()
7
7
 
8
8
  __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset", "preprocessing"]
9
9
 
10
- __version__ = "0.3.9.dev170082126"
10
+ __version__ = "0.4.0.dev132"
@@ -1,9 +1,6 @@
1
- import logging
2
1
  import os
3
- import tempfile
4
2
  from pathlib import Path
5
3
  from typing import Any, Mapping
6
- from urllib.parse import urlsplit
7
4
 
8
5
  import mne
9
6
  import numpy as np
@@ -11,13 +8,15 @@ import xarray as xr
11
8
  from docstring_inheritance import NumpyDocstringInheritanceInitMeta
12
9
  from dotenv import load_dotenv
13
10
  from joblib import Parallel, delayed
14
- from mne.utils import warn
15
11
  from mne_bids import find_matching_paths, get_bids_path_from_fname, read_raw_bids
16
12
  from pymongo import InsertOne, UpdateOne
17
- from s3fs import S3FileSystem
13
+ from rich.console import Console
14
+ from rich.panel import Panel
15
+ from rich.text import Text
18
16
 
19
17
  from braindecode.datasets import BaseConcatDataset
20
18
 
19
+ from . import downloader
21
20
  from .bids_eeg_metadata import (
22
21
  build_query_from_kwargs,
23
22
  load_eeg_attrs_from_bids_file,
@@ -33,10 +32,10 @@ from .data_utils import (
33
32
  EEGBIDSDataset,
34
33
  EEGDashBaseDataset,
35
34
  )
35
+ from .logging import logger
36
36
  from .mongodb import MongoConnectionManager
37
37
  from .paths import get_default_cache_dir
38
-
39
- logger = logging.getLogger("eegdash")
38
+ from .utils import _init_mongo_client
40
39
 
41
40
 
42
41
  class EEGDash:
@@ -74,19 +73,26 @@ class EEGDash:
74
73
 
75
74
  if self.is_public:
76
75
  DB_CONNECTION_STRING = mne.utils.get_config("EEGDASH_DB_URI")
76
+ if not DB_CONNECTION_STRING:
77
+ try:
78
+ _init_mongo_client()
79
+ DB_CONNECTION_STRING = mne.utils.get_config("EEGDASH_DB_URI")
80
+ except Exception:
81
+ DB_CONNECTION_STRING = None
77
82
  else:
78
83
  load_dotenv()
79
84
  DB_CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING")
80
85
 
81
86
  # Use singleton to get MongoDB client, database, and collection
87
+ if not DB_CONNECTION_STRING:
88
+ raise RuntimeError(
89
+ "No MongoDB connection string configured. Set MNE config 'EEGDASH_DB_URI' "
90
+ "or environment variable 'DB_CONNECTION_STRING'."
91
+ )
82
92
  self.__client, self.__db, self.__collection = MongoConnectionManager.get_client(
83
93
  DB_CONNECTION_STRING, is_staging
84
94
  )
85
95
 
86
- self.filesystem = S3FileSystem(
87
- anon=True, client_kwargs={"region_name": "us-east-2"}
88
- )
89
-
90
96
  def find(
91
97
  self, query: dict[str, Any] = None, /, **kwargs
92
98
  ) -> list[Mapping[str, Any]]:
@@ -310,83 +316,6 @@ class EEGDash:
310
316
  f"Conflicting constraints for '{key}': disjoint sets {r_val!r} and {k_val!r}"
311
317
  )
312
318
 
313
- def load_eeg_data_from_s3(self, s3path: str) -> xr.DataArray:
314
- """Load EEG data from an S3 URI into an ``xarray.DataArray``.
315
-
316
- Preserves the original filename, downloads sidecar files when applicable
317
- (e.g., ``.fdt`` for EEGLAB, ``.vmrk``/``.eeg`` for BrainVision), and uses
318
- MNE's direct readers.
319
-
320
- Parameters
321
- ----------
322
- s3path : str
323
- An S3 URI (should start with "s3://").
324
-
325
- Returns
326
- -------
327
- xr.DataArray
328
- EEG data with dimensions ``("channel", "time")``.
329
-
330
- Raises
331
- ------
332
- ValueError
333
- If the file extension is unsupported.
334
-
335
- """
336
- # choose a temp dir so sidecars can be colocated
337
- with tempfile.TemporaryDirectory() as tmpdir:
338
- # Derive local filenames from the S3 key to keep base name consistent
339
- s3_key = urlsplit(s3path).path # e.g., "/dsXXXX/sub-.../..._eeg.set"
340
- basename = Path(s3_key).name
341
- ext = Path(basename).suffix.lower()
342
- local_main = Path(tmpdir) / basename
343
-
344
- # Download main file
345
- with (
346
- self.filesystem.open(s3path, mode="rb") as fsrc,
347
- open(local_main, "wb") as fdst,
348
- ):
349
- fdst.write(fsrc.read())
350
-
351
- # Determine and fetch any required sidecars
352
- sidecars: list[str] = []
353
- if ext == ".set": # EEGLAB
354
- sidecars = [".fdt"]
355
- elif ext == ".vhdr": # BrainVision
356
- sidecars = [".vmrk", ".eeg", ".dat", ".raw"]
357
-
358
- for sc_ext in sidecars:
359
- sc_key = s3_key[: -len(ext)] + sc_ext
360
- sc_uri = f"s3://{urlsplit(s3path).netloc}{sc_key}"
361
- try:
362
- # If sidecar exists, download next to the main file
363
- info = self.filesystem.info(sc_uri)
364
- if info:
365
- sc_local = Path(tmpdir) / Path(sc_key).name
366
- with (
367
- self.filesystem.open(sc_uri, mode="rb") as fsrc,
368
- open(sc_local, "wb") as fdst,
369
- ):
370
- fdst.write(fsrc.read())
371
- except Exception:
372
- # Sidecar not present; skip silently
373
- pass
374
-
375
- # Read using appropriate MNE reader
376
- raw = mne.io.read_raw(str(local_main), preload=True, verbose=False)
377
-
378
- data = raw.get_data()
379
- fs = raw.info["sfreq"]
380
- max_time = data.shape[1] / fs
381
- time_steps = np.linspace(0, max_time, data.shape[1]).squeeze()
382
- channel_names = raw.ch_names
383
-
384
- return xr.DataArray(
385
- data=data,
386
- dims=["channel", "time"],
387
- coords={"time": time_steps, "channel": channel_names},
388
- )
389
-
390
319
  def load_eeg_data_from_bids_file(self, bids_file: str) -> xr.DataArray:
391
320
  """Load EEG data from a local BIDS-formatted file.
392
321
 
@@ -508,39 +437,13 @@ class EEGDash:
508
437
  results = Parallel(
509
438
  n_jobs=-1 if len(sessions) > 1 else 1, prefer="threads", verbose=1
510
439
  )(
511
- delayed(self.load_eeg_data_from_s3)(self._get_s3path(session))
440
+ delayed(downloader.load_eeg_from_s3)(
441
+ downloader.get_s3path("s3://openneuro.org", session["bidspath"])
442
+ )
512
443
  for session in sessions
513
444
  )
514
445
  return results
515
446
 
516
- def _get_s3path(self, record: Mapping[str, Any] | str) -> str:
517
- """Build an S3 URI from a DB record or a relative path.
518
-
519
- Parameters
520
- ----------
521
- record : dict or str
522
- Either a DB record containing a ``'bidspath'`` key, or a relative
523
- path string under the OpenNeuro bucket.
524
-
525
- Returns
526
- -------
527
- str
528
- Fully qualified S3 URI.
529
-
530
- Raises
531
- ------
532
- ValueError
533
- If a mapping is provided but ``'bidspath'`` is missing.
534
-
535
- """
536
- if isinstance(record, str):
537
- rel = record
538
- else:
539
- rel = record.get("bidspath")
540
- if not rel:
541
- raise ValueError("Record missing 'bidspath' for S3 path resolution")
542
- return f"s3://openneuro.org/{rel}"
543
-
544
447
  def _add_request(self, record: dict):
545
448
  """Internal helper method to create a MongoDB insertion request for a record."""
546
449
  return InsertOne(record)
@@ -552,8 +455,11 @@ class EEGDash:
552
455
  except ValueError as e:
553
456
  logger.error("Validation error for record: %s ", record["data_name"])
554
457
  logger.error(e)
555
- except:
556
- logger.error("Error adding record: %s ", record["data_name"])
458
+ except Exception as exc:
459
+ logger.error(
460
+ "Error adding record: %s ", record.get("data_name", "<unknown>")
461
+ )
462
+ logger.debug("Add operation failed", exc_info=exc)
557
463
 
558
464
  def _update_request(self, record: dict):
559
465
  """Internal helper method to create a MongoDB update request for a record."""
@@ -572,8 +478,11 @@ class EEGDash:
572
478
  self.__collection.update_one(
573
479
  {"data_name": record["data_name"]}, {"$set": record}
574
480
  )
575
- except: # silent failure
576
- logger.error("Error updating record: %s", record["data_name"])
481
+ except Exception as exc: # log and continue
482
+ logger.error(
483
+ "Error updating record: %s", record.get("data_name", "<unknown>")
484
+ )
485
+ logger.debug("Update operation failed", exc_info=exc)
577
486
 
578
487
  def exists(self, query: dict[str, Any]) -> bool:
579
488
  """Alias for :meth:`exist` provided for API clarity."""
@@ -654,8 +563,7 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
654
563
  Parameters
655
564
  ----------
656
565
  cache_dir : str | Path
657
- Directory where data are cached locally. If not specified, a default
658
- cache directory under the user cache is used.
566
+ Directory where data are cached locally.
659
567
  query : dict | None
660
568
  Raw MongoDB query to filter records. If provided, it is merged with
661
569
  keyword filtering arguments (see ``**kwargs``) using logical AND.
@@ -726,13 +634,21 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
726
634
  self.records = records
727
635
  self.download = download
728
636
  self.n_jobs = n_jobs
729
- self.eeg_dash_instance = eeg_dash_instance or EEGDash()
637
+ self.eeg_dash_instance = eeg_dash_instance
730
638
 
731
- # Resolve a unified cache directory across code/tests/CI
732
- self.cache_dir = Path(cache_dir or get_default_cache_dir())
639
+ self.cache_dir = cache_dir
640
+ if self.cache_dir == "" or self.cache_dir is None:
641
+ self.cache_dir = get_default_cache_dir()
642
+ logger.warning(
643
+ f"Cache directory is empty, using the eegdash default path: {self.cache_dir}"
644
+ )
645
+
646
+ self.cache_dir = Path(self.cache_dir)
733
647
 
734
648
  if not self.cache_dir.exists():
735
- warn(f"Cache directory does not exist, creating it: {self.cache_dir}")
649
+ logger.warning(
650
+ f"Cache directory does not exist, creating it: {self.cache_dir}"
651
+ )
736
652
  self.cache_dir.mkdir(exist_ok=True, parents=True)
737
653
 
738
654
  # Separate query kwargs from other kwargs passed to the BaseDataset constructor
@@ -772,21 +688,29 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
772
688
  not _suppress_comp_warning
773
689
  and self.query["dataset"] in RELEASE_TO_OPENNEURO_DATASET_MAP.values()
774
690
  ):
775
- warn(
776
- "If you are not participating in the competition, you can ignore this warning!"
777
- "\n\n"
778
- "EEG 2025 Competition Data Notice:\n"
779
- "---------------------------------\n"
780
- " You are loading the dataset that is used in the EEG 2025 Competition:\n"
781
- "IMPORTANT: The data accessed via `EEGDashDataset` is NOT identical to what you get from `EEGChallengeDataset` object directly.\n"
782
- "and it is not what you will use for the competition. Downsampling and filtering were applied to the data"
783
- "to allow more people to participate.\n"
784
- "\n"
785
- "If you are participating in the competition, always use `EEGChallengeDataset` to ensure consistency with the challenge data.\n"
786
- "\n",
787
- UserWarning,
788
- module="eegdash",
691
+ message_text = Text.from_markup(
692
+ "[italic]This notice is only for users who are participating in the [link=https://eeg2025.github.io/]EEG 2025 Competition[/link].[/italic]\n\n"
693
+ "[bold]EEG 2025 Competition Data Notice![/bold]\n"
694
+ "You are loading one of the datasets that is used in competition, but via `EEGDashDataset`.\n\n"
695
+ "[bold red]IMPORTANT[/bold red]: \n"
696
+ "If you download data from `EEGDashDataset`, it is [u]NOT[/u] identical to the official \n"
697
+ "competition data, which is accessed via `EEGChallengeDataset`. "
698
+ "The competition data has been downsampled and filtered.\n\n"
699
+ "[bold]If you are participating in the competition, \nyou must use the `EEGChallengeDataset` object to ensure consistency.[/bold] \n\n"
700
+ "If you are not participating in the competition, you can ignore this message."
789
701
  )
702
+ warning_panel = Panel(
703
+ message_text,
704
+ title="[yellow]EEG 2025 Competition Data Notice[/yellow]",
705
+ subtitle="[cyan]Source: EEGDashDataset[/cyan]",
706
+ border_style="yellow",
707
+ )
708
+
709
+ try:
710
+ Console().print(warning_panel)
711
+ except Exception:
712
+ logger.warning(str(message_text))
713
+
790
714
  if records is not None:
791
715
  self.records = records
792
716
  datasets = [
@@ -848,16 +772,15 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
848
772
  )
849
773
  )
850
774
  elif self.query:
851
- # This is the DB query path that we are improving
775
+ if self.eeg_dash_instance is None:
776
+ self.eeg_dash_instance = EEGDash()
852
777
  datasets = self._find_datasets(
853
778
  query=build_query_from_kwargs(**self.query),
854
779
  description_fields=description_fields,
855
780
  base_dataset_kwargs=base_dataset_kwargs,
856
781
  )
857
782
  # We only need filesystem if we need to access S3
858
- self.filesystem = S3FileSystem(
859
- anon=True, client_kwargs={"region_name": "us-east-2"}
860
- )
783
+ self.filesystem = downloader.get_s3_filesystem()
861
784
  else:
862
785
  raise ValueError(
863
786
  "You must provide either 'records', a 'data_dir', or a query/keyword arguments for filtering."