eegdash 0.0.8__tar.gz → 0.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

Files changed (28) hide show
  1. {eegdash-0.0.8/src/eegdash.egg-info → eegdash-0.0.9}/PKG-INFO +13 -47
  2. {eegdash-0.0.8 → eegdash-0.0.9}/README.md +5 -44
  3. {eegdash-0.0.8 → eegdash-0.0.9}/pyproject.toml +8 -3
  4. eegdash-0.0.9/src/eegdash/data_config.py +28 -0
  5. {eegdash-0.0.8 → eegdash-0.0.9}/src/eegdash/data_utils.py +55 -56
  6. eegdash-0.0.9/src/eegdash/features/__init__.py +25 -0
  7. eegdash-0.0.9/src/eegdash/features/datasets.py +453 -0
  8. eegdash-0.0.9/src/eegdash/features/decorators.py +43 -0
  9. eegdash-0.0.9/src/eegdash/features/extractors.py +209 -0
  10. eegdash-0.0.9/src/eegdash/features/feature_bank/__init__.py +6 -0
  11. eegdash-0.0.9/src/eegdash/features/feature_bank/complexity.py +97 -0
  12. eegdash-0.0.9/src/eegdash/features/feature_bank/connectivity.py +99 -0
  13. eegdash-0.0.9/src/eegdash/features/feature_bank/csp.py +102 -0
  14. eegdash-0.0.9/src/eegdash/features/feature_bank/dimensionality.py +108 -0
  15. eegdash-0.0.9/src/eegdash/features/feature_bank/signal.py +103 -0
  16. eegdash-0.0.9/src/eegdash/features/feature_bank/spectral.py +134 -0
  17. eegdash-0.0.9/src/eegdash/features/serialization.py +87 -0
  18. eegdash-0.0.9/src/eegdash/features/utils.py +114 -0
  19. {eegdash-0.0.8 → eegdash-0.0.9}/src/eegdash/main.py +98 -50
  20. {eegdash-0.0.8 → eegdash-0.0.9/src/eegdash.egg-info}/PKG-INFO +13 -47
  21. eegdash-0.0.9/src/eegdash.egg-info/SOURCES.txt +25 -0
  22. {eegdash-0.0.8 → eegdash-0.0.9}/src/eegdash.egg-info/requires.txt +5 -0
  23. eegdash-0.0.8/src/eegdash.egg-info/SOURCES.txt +0 -11
  24. {eegdash-0.0.8 → eegdash-0.0.9}/LICENSE +0 -0
  25. {eegdash-0.0.8 → eegdash-0.0.9}/setup.cfg +0 -0
  26. {eegdash-0.0.8 → eegdash-0.0.9}/src/eegdash/__init__.py +0 -0
  27. {eegdash-0.0.8 → eegdash-0.0.9}/src/eegdash.egg-info/dependency_links.txt +0 -0
  28. {eegdash-0.0.8 → eegdash-0.0.9}/src/eegdash.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>
6
6
  License: GNU General Public License
@@ -24,8 +24,8 @@ License: GNU General Public License
24
24
  along with this program; if not, write to the Free Software
25
25
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1.07 USA
26
26
 
27
- Project-URL: Homepage, https://github.com/sccn/EEG-Dash-Data
28
- Project-URL: Issues, https://github.com/sccn/EEG-Dash-Data/issues
27
+ Project-URL: Homepage, https://eegdash.org
28
+ Project-URL: Issues, https://github.com/sccn/EEGDash/issues
29
29
  Classifier: Programming Language :: Python :: 3
30
30
  Classifier: License :: OSI Approved :: MIT License
31
31
  Classifier: Operating System :: OS Independent
@@ -42,6 +42,11 @@ Requires-Dist: pymongo
42
42
  Requires-Dist: joblib
43
43
  Requires-Dist: braindecode
44
44
  Requires-Dist: mne-bids
45
+ Requires-Dist: pybids
46
+ Requires-Dist: pymatreader
47
+ Requires-Dist: pyarrow
48
+ Requires-Dist: tqdm
49
+ Requires-Dist: numba
45
50
  Dynamic: license-file
46
51
 
47
52
  # EEG-Dash
@@ -50,60 +55,21 @@ To leverage recent and ongoing advancements in large-scale computational methods
50
55
  ## Data source
51
56
  The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
52
57
 
53
- ## Available data
58
+ ## Featured data
54
59
 
55
- The following datasets are currently available on EEGDash.
60
+ The following HBN datasets are currently featured on EEGDash. Documentation about these datasets is available [here](https://neuromechanist.github.io/data/hbn/).
56
61
 
57
62
  | DatasetID | Participants | Files | Sessions | Population | Channels | Is 10-20? | Modality | Size |
58
63
  |---|---|---|---|---|---|---|---|---|
59
- | [ds002181](https://nemar.org/dataexplorer/detail?dataset_id=ds002181) | 20 | 949 | 1 | Healthy | 63 | 10-20 | Visual | 0.163 GB |
60
- | [ds002578](https://nemar.org/dataexplorer/detail?dataset_id=ds002578) | 2 | 22 | 1 | Healthy | 256 | 10-20 | Visual | 0.001 TB |
61
- | [ds002680](https://nemar.org/dataexplorer/detail?dataset_id=ds002680) | 14 | 4977 | 2 | Healthy | 0 | 10-20 | Visual | 0.01 TB |
62
- | [ds002691](https://nemar.org/dataexplorer/detail?dataset_id=ds002691) | 20 | 146 | 1 | Healthy | 32 | other | Visual | 0.001 TB |
63
- | [ds002718](https://nemar.org/dataexplorer/detail?dataset_id=ds002718) | 18 | 582 | 1 | Healthy | 70 | other | Visual | 0.005 TB |
64
- | [ds003061](https://nemar.org/dataexplorer/detail?dataset_id=ds003061) | 13 | 282 | 1 | Not specified | 64 | 10-20 | Auditory | 0.002 TB |
65
- | [ds003690](https://nemar.org/dataexplorer/detail?dataset_id=ds003690) | 75 | 2630 | 1 | Healthy | 61 | 10-20 | Auditory | 0.023 TB |
66
- | [ds003805](https://nemar.org/dataexplorer/detail?dataset_id=ds003805) | 1 | 10 | 1 | Healthy | 19 | 10-20 | Multisensory | 0 TB |
67
- | [ds003838](https://nemar.org/dataexplorer/detail?dataset_id=ds003838) | 65 | 947 | 1 | Healthy | 63 | 10-20 | Auditory | 100.2 GB |
68
- | [ds004010](https://nemar.org/dataexplorer/detail?dataset_id=ds004010) | 24 | 102 | 1 | Healthy | 64 | other | Multisensory | 0.025 TB |
69
- | [ds004040](https://nemar.org/dataexplorer/detail?dataset_id=ds004040) | 13 | 160 | 2 | Healthy | 64 | 10-20 | Auditory | 0.012 TB |
70
- | [ds004350](https://nemar.org/dataexplorer/detail?dataset_id=ds004350) | 24 | 960 | 2 | Healthy | 64 | other | Visual | 0.023 TB |
71
- | [ds004362](https://nemar.org/dataexplorer/detail?dataset_id=ds004362) | 109 | 9162 | 1 | Healthy | 64 | 10-20 | Visual | 0.008 TB |
72
- | [ds004504](https://nemar.org/dataexplorer/detail?dataset_id=ds004504) | 88 | 269 | 1 | Dementia | 19 | 10-20 | Resting State | 2.6 GB |
73
- | [ds004554](https://nemar.org/dataexplorer/detail?dataset_id=ds004554) | 16 | 101 | 1 | Healthy | 99 | 10-20 | Visual | 0.009 TB |
74
- | [ds004635](https://nemar.org/dataexplorer/detail?dataset_id=ds004635) | 48 | 292 | 1 | Healthy | 129 | other | Multisensory | 26.1 GB |
75
- | [ds004657](https://nemar.org/dataexplorer/detail?dataset_id=ds004657) | 24 | 838 | 6 | Not specified | 64 | 10-20 | Motor | 43.1 GB |
76
- | [ds004660](https://nemar.org/dataexplorer/detail?dataset_id=ds004660) | 21 | 299 | 1 | Healthy | 32 | 10-20 | Multisensory | 7.2 GB |
77
- | [ds004661](https://nemar.org/dataexplorer/detail?dataset_id=ds004661) | 17 | 90 | 1 | Not specified | 64 | 10-20 | Multisensory | 1.4 GB |
78
- | [ds004745](https://nemar.org/dataexplorer/detail?dataset_id=ds004745) | 52 | 762 | 1 | Healthy | 64 | ? | Auditory | 0 TB |
79
- | [ds004785](https://nemar.org/dataexplorer/detail?dataset_id=ds004785) | 17 | 74 | 1 | Healthy | 32 | ? | Motor | 0 TB |
80
- | [ds004841](https://nemar.org/dataexplorer/detail?dataset_id=ds004841) | 20 | 1034 | 2 | Not specified | 64 | 10-20 | Multisensory | 7.3 GB |
81
- | [ds004842](https://nemar.org/dataexplorer/detail?dataset_id=ds004842) | 14 | 719 | 2 | Not specified | 64 | ? | Multisensory | 5.2 GB |
82
- | [ds004843](https://nemar.org/dataexplorer/detail?dataset_id=ds004843) | 14 | 649 | 1 | Not specified | 64 | ? | Visual | 7.7 GB |
83
- | [ds004844](https://nemar.org/dataexplorer/detail?dataset_id=ds004844) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 22.3 GB |
84
- | [ds004849](https://nemar.org/dataexplorer/detail?dataset_id=ds004849) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
85
- | [ds004850](https://nemar.org/dataexplorer/detail?dataset_id=ds004850) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
86
- | [ds004851](https://nemar.org/dataexplorer/detail?dataset_id=ds004851) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
87
- | [ds004852](https://nemar.org/dataexplorer/detail?dataset_id=ds004852) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
88
- | [ds004853](https://nemar.org/dataexplorer/detail?dataset_id=ds004853) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
89
- | [ds004854](https://nemar.org/dataexplorer/detail?dataset_id=ds004854) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
90
- | [ds004855](https://nemar.org/dataexplorer/detail?dataset_id=ds004855) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
91
- | [ds005034](https://nemar.org/dataexplorer/detail?dataset_id=ds005034) | 25 | 406 | 2 | Healthy | 129 | ? | Visual | 61.4 GB |
92
- | [ds005079](https://nemar.org/dataexplorer/detail?dataset_id=ds005079) | 1 | 210 | 12 | Healthy | 64 | ? | Multisensory | 1.7 GB |
93
- | [ds005342](https://nemar.org/dataexplorer/detail?dataset_id=ds005342) | 32 | 134 | 1 | Healthy | 17 | ? | Visual | 2 GB |
94
- | [ds005410](https://nemar.org/dataexplorer/detail?dataset_id=ds005410) | 81 | 492 | 1 | Healthy | 63 | ? | ? | 19.8 GB |
95
64
  | [ds005505](https://nemar.org/dataexplorer/detail?dataset_id=ds005505) | 136 | 5393 | 1 | Healthy | 129 | other | Visual | 103 GB |
96
65
  | [ds005506](https://nemar.org/dataexplorer/detail?dataset_id=ds005506) | 150 | 5645 | 1 | Healthy | 129 | other | Visual | 112 GB |
97
66
  | [ds005507](https://nemar.org/dataexplorer/detail?dataset_id=ds005507) | 184 | 7273 | 1 | Healthy | 129 | other | Visual | 140 GB |
98
67
  | [ds005508](https://nemar.org/dataexplorer/detail?dataset_id=ds005508) | 324 | 13393 | 1 | Healthy | 129 | other | Visual | 230 GB |
99
- | [ds005509](https://nemar.org/dataexplorer/detail?dataset_id=ds005509) | 330 | 19980 | 1 | Healthy | 129 | other | Visual | 224 GB |
100
68
  | [ds005510](https://nemar.org/dataexplorer/detail?dataset_id=ds005510) | 135 | 4933 | 1 | Healthy | 129 | other | Visual | 91 GB |
101
- | [ds005511](https://nemar.org/dataexplorer/detail?dataset_id=ds005511) | 381 | 18604 | 1 | Healthy | 129 | other | Visual | 245 GB |
102
69
  | [ds005512](https://nemar.org/dataexplorer/detail?dataset_id=ds005512) | 257 | 9305 | 1 | Healthy | 129 | other | Visual | 157 GB |
103
70
  | [ds005514](https://nemar.org/dataexplorer/detail?dataset_id=ds005514) | 295 | 11565 | 1 | Healthy | 129 | other | Visual | 185 GB |
104
- | [ds005672](https://nemar.org/dataexplorer/detail?dataset_id=ds005672) | 3 | 18 | 1 | Healthy | 64 | 10-20 | Visual | 4.2 GB |
105
- | [ds005697](https://nemar.org/dataexplorer/detail?dataset_id=ds005697) | 52 | 210 | 1 | Healthy | 64 | 10-20 | Visual | 67 GB |
106
- | [ds005787](https://nemar.org/dataexplorer/detail?dataset_id=ds005787) | 30 | ? | 4 | Healthy | 64 | 10-20 | Visual | 185 GB |
71
+
72
+ A total of [246 other datasets](datasets.md) are also available through EEGDash.
107
73
 
108
74
  ## Data format
109
75
  EEGDash queries return a **Pytorch Dataset** formatted to facilitate machine learning (ML) and deep learning (DL) applications. PyTorch Datasets are the best format for EEGDash queries because they provide an efficient, scalable, and flexible structure for machine learning (ML) and deep learning (DL) applications. They allow seamless integration with PyTorch’s DataLoader, enabling efficient batching, shuffling, and parallel data loading, which is essential for training deep learning models on large EEG datasets.
@@ -115,7 +81,7 @@ EEGDash datasets are processed using the popular [BrainDecode](https://braindeco
115
81
 
116
82
  ### Install
117
83
  Use your preferred Python environment manager with Python > 3.9 to install the package.
118
- * To install the eegdash package, use the following temporary command (a direct pip install eegdash option will be available soon): `pip install -i https://test.pypi.org/simple/eegdash`
84
+ * To install the eegdash package, use the following command: `pip install eegdash`
119
85
  * To verify the installation, start a Python session and type: `from eegdash import EEGDash`
120
86
 
121
87
  ### Data access
@@ -4,60 +4,21 @@ To leverage recent and ongoing advancements in large-scale computational methods
4
4
  ## Data source
5
5
  The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
6
6
 
7
- ## Available data
7
+ ## Featured data
8
8
 
9
- The following datasets are currently available on EEGDash.
9
+ The following HBN datasets are currently featured on EEGDash. Documentation about these datasets is available [here](https://neuromechanist.github.io/data/hbn/).
10
10
 
11
11
  | DatasetID | Participants | Files | Sessions | Population | Channels | Is 10-20? | Modality | Size |
12
12
  |---|---|---|---|---|---|---|---|---|
13
- | [ds002181](https://nemar.org/dataexplorer/detail?dataset_id=ds002181) | 20 | 949 | 1 | Healthy | 63 | 10-20 | Visual | 0.163 GB |
14
- | [ds002578](https://nemar.org/dataexplorer/detail?dataset_id=ds002578) | 2 | 22 | 1 | Healthy | 256 | 10-20 | Visual | 0.001 TB |
15
- | [ds002680](https://nemar.org/dataexplorer/detail?dataset_id=ds002680) | 14 | 4977 | 2 | Healthy | 0 | 10-20 | Visual | 0.01 TB |
16
- | [ds002691](https://nemar.org/dataexplorer/detail?dataset_id=ds002691) | 20 | 146 | 1 | Healthy | 32 | other | Visual | 0.001 TB |
17
- | [ds002718](https://nemar.org/dataexplorer/detail?dataset_id=ds002718) | 18 | 582 | 1 | Healthy | 70 | other | Visual | 0.005 TB |
18
- | [ds003061](https://nemar.org/dataexplorer/detail?dataset_id=ds003061) | 13 | 282 | 1 | Not specified | 64 | 10-20 | Auditory | 0.002 TB |
19
- | [ds003690](https://nemar.org/dataexplorer/detail?dataset_id=ds003690) | 75 | 2630 | 1 | Healthy | 61 | 10-20 | Auditory | 0.023 TB |
20
- | [ds003805](https://nemar.org/dataexplorer/detail?dataset_id=ds003805) | 1 | 10 | 1 | Healthy | 19 | 10-20 | Multisensory | 0 TB |
21
- | [ds003838](https://nemar.org/dataexplorer/detail?dataset_id=ds003838) | 65 | 947 | 1 | Healthy | 63 | 10-20 | Auditory | 100.2 GB |
22
- | [ds004010](https://nemar.org/dataexplorer/detail?dataset_id=ds004010) | 24 | 102 | 1 | Healthy | 64 | other | Multisensory | 0.025 TB |
23
- | [ds004040](https://nemar.org/dataexplorer/detail?dataset_id=ds004040) | 13 | 160 | 2 | Healthy | 64 | 10-20 | Auditory | 0.012 TB |
24
- | [ds004350](https://nemar.org/dataexplorer/detail?dataset_id=ds004350) | 24 | 960 | 2 | Healthy | 64 | other | Visual | 0.023 TB |
25
- | [ds004362](https://nemar.org/dataexplorer/detail?dataset_id=ds004362) | 109 | 9162 | 1 | Healthy | 64 | 10-20 | Visual | 0.008 TB |
26
- | [ds004504](https://nemar.org/dataexplorer/detail?dataset_id=ds004504) | 88 | 269 | 1 | Dementia | 19 | 10-20 | Resting State | 2.6 GB |
27
- | [ds004554](https://nemar.org/dataexplorer/detail?dataset_id=ds004554) | 16 | 101 | 1 | Healthy | 99 | 10-20 | Visual | 0.009 TB |
28
- | [ds004635](https://nemar.org/dataexplorer/detail?dataset_id=ds004635) | 48 | 292 | 1 | Healthy | 129 | other | Multisensory | 26.1 GB |
29
- | [ds004657](https://nemar.org/dataexplorer/detail?dataset_id=ds004657) | 24 | 838 | 6 | Not specified | 64 | 10-20 | Motor | 43.1 GB |
30
- | [ds004660](https://nemar.org/dataexplorer/detail?dataset_id=ds004660) | 21 | 299 | 1 | Healthy | 32 | 10-20 | Multisensory | 7.2 GB |
31
- | [ds004661](https://nemar.org/dataexplorer/detail?dataset_id=ds004661) | 17 | 90 | 1 | Not specified | 64 | 10-20 | Multisensory | 1.4 GB |
32
- | [ds004745](https://nemar.org/dataexplorer/detail?dataset_id=ds004745) | 52 | 762 | 1 | Healthy | 64 | ? | Auditory | 0 TB |
33
- | [ds004785](https://nemar.org/dataexplorer/detail?dataset_id=ds004785) | 17 | 74 | 1 | Healthy | 32 | ? | Motor | 0 TB |
34
- | [ds004841](https://nemar.org/dataexplorer/detail?dataset_id=ds004841) | 20 | 1034 | 2 | Not specified | 64 | 10-20 | Multisensory | 7.3 GB |
35
- | [ds004842](https://nemar.org/dataexplorer/detail?dataset_id=ds004842) | 14 | 719 | 2 | Not specified | 64 | ? | Multisensory | 5.2 GB |
36
- | [ds004843](https://nemar.org/dataexplorer/detail?dataset_id=ds004843) | 14 | 649 | 1 | Not specified | 64 | ? | Visual | 7.7 GB |
37
- | [ds004844](https://nemar.org/dataexplorer/detail?dataset_id=ds004844) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 22.3 GB |
38
- | [ds004849](https://nemar.org/dataexplorer/detail?dataset_id=ds004849) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
39
- | [ds004850](https://nemar.org/dataexplorer/detail?dataset_id=ds004850) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
40
- | [ds004851](https://nemar.org/dataexplorer/detail?dataset_id=ds004851) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
41
- | [ds004852](https://nemar.org/dataexplorer/detail?dataset_id=ds004852) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
42
- | [ds004853](https://nemar.org/dataexplorer/detail?dataset_id=ds004853) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
43
- | [ds004854](https://nemar.org/dataexplorer/detail?dataset_id=ds004854) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
44
- | [ds004855](https://nemar.org/dataexplorer/detail?dataset_id=ds004855) | 17 | 481 | 4 | Not specified | 64 | ? | Multisensory | 0.077 GB |
45
- | [ds005034](https://nemar.org/dataexplorer/detail?dataset_id=ds005034) | 25 | 406 | 2 | Healthy | 129 | ? | Visual | 61.4 GB |
46
- | [ds005079](https://nemar.org/dataexplorer/detail?dataset_id=ds005079) | 1 | 210 | 12 | Healthy | 64 | ? | Multisensory | 1.7 GB |
47
- | [ds005342](https://nemar.org/dataexplorer/detail?dataset_id=ds005342) | 32 | 134 | 1 | Healthy | 17 | ? | Visual | 2 GB |
48
- | [ds005410](https://nemar.org/dataexplorer/detail?dataset_id=ds005410) | 81 | 492 | 1 | Healthy | 63 | ? | ? | 19.8 GB |
49
13
  | [ds005505](https://nemar.org/dataexplorer/detail?dataset_id=ds005505) | 136 | 5393 | 1 | Healthy | 129 | other | Visual | 103 GB |
50
14
  | [ds005506](https://nemar.org/dataexplorer/detail?dataset_id=ds005506) | 150 | 5645 | 1 | Healthy | 129 | other | Visual | 112 GB |
51
15
  | [ds005507](https://nemar.org/dataexplorer/detail?dataset_id=ds005507) | 184 | 7273 | 1 | Healthy | 129 | other | Visual | 140 GB |
52
16
  | [ds005508](https://nemar.org/dataexplorer/detail?dataset_id=ds005508) | 324 | 13393 | 1 | Healthy | 129 | other | Visual | 230 GB |
53
- | [ds005509](https://nemar.org/dataexplorer/detail?dataset_id=ds005509) | 330 | 19980 | 1 | Healthy | 129 | other | Visual | 224 GB |
54
17
  | [ds005510](https://nemar.org/dataexplorer/detail?dataset_id=ds005510) | 135 | 4933 | 1 | Healthy | 129 | other | Visual | 91 GB |
55
- | [ds005511](https://nemar.org/dataexplorer/detail?dataset_id=ds005511) | 381 | 18604 | 1 | Healthy | 129 | other | Visual | 245 GB |
56
18
  | [ds005512](https://nemar.org/dataexplorer/detail?dataset_id=ds005512) | 257 | 9305 | 1 | Healthy | 129 | other | Visual | 157 GB |
57
19
  | [ds005514](https://nemar.org/dataexplorer/detail?dataset_id=ds005514) | 295 | 11565 | 1 | Healthy | 129 | other | Visual | 185 GB |
58
- | [ds005672](https://nemar.org/dataexplorer/detail?dataset_id=ds005672) | 3 | 18 | 1 | Healthy | 64 | 10-20 | Visual | 4.2 GB |
59
- | [ds005697](https://nemar.org/dataexplorer/detail?dataset_id=ds005697) | 52 | 210 | 1 | Healthy | 64 | 10-20 | Visual | 67 GB |
60
- | [ds005787](https://nemar.org/dataexplorer/detail?dataset_id=ds005787) | 30 | ? | 4 | Healthy | 64 | 10-20 | Visual | 185 GB |
20
+
21
+ A total of [246 other datasets](datasets.md) are also available through EEGDash.
61
22
 
62
23
  ## Data format
63
24
  EEGDash queries return a **Pytorch Dataset** formatted to facilitate machine learning (ML) and deep learning (DL) applications. PyTorch Datasets are the best format for EEGDash queries because they provide an efficient, scalable, and flexible structure for machine learning (ML) and deep learning (DL) applications. They allow seamless integration with PyTorch’s DataLoader, enabling efficient batching, shuffling, and parallel data loading, which is essential for training deep learning models on large EEG datasets.
@@ -69,7 +30,7 @@ EEGDash datasets are processed using the popular [BrainDecode](https://braindeco
69
30
 
70
31
  ### Install
71
32
  Use your preferred Python environment manager with Python > 3.9 to install the package.
72
- * To install the eegdash package, use the following temporary command (a direct pip install eegdash option will be available soon): `pip install -i https://test.pypi.org/simple/eegdash`
33
+ * To install the eegdash package, use the following command: `pip install eegdash`
73
34
  * To verify the installation, start a Python session and type: `from eegdash import EEGDash`
74
35
 
75
36
  ### Data access
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "eegdash"
7
- version = "0.0.8"
7
+ version = "0.0.9"
8
8
  authors = [
9
9
  { name="Young Truong", email="dt.young112@gmail.com" },
10
10
  { name="Arnaud Delorme", email="adelorme@gmail.com" },
@@ -29,8 +29,13 @@ dependencies = [
29
29
  "joblib",
30
30
  "braindecode",
31
31
  "mne-bids",
32
+ "pybids",
33
+ "pymatreader",
34
+ "pyarrow",
35
+ "tqdm",
36
+ "numba",
32
37
  ]
33
38
  [project.urls]
34
- Homepage = "https://github.com/sccn/EEG-Dash-Data"
35
- Issues = "https://github.com/sccn/EEG-Dash-Data/issues"
39
+ Homepage = "https://eegdash.org"
40
+ Issues = "https://github.com/sccn/EEGDash/issues"
36
41
 
@@ -0,0 +1,28 @@
1
+ config = {
2
+ "required_fields": ["data_name"],
3
+ "attributes": {
4
+ "data_name": "str",
5
+ "dataset": "str",
6
+ "bidspath": "str",
7
+ "subject": "str",
8
+ "task": "str",
9
+ "session": "str",
10
+ "run": "str",
11
+ "sampling_frequency": "float",
12
+ "modality": "str",
13
+ "nchans": "int",
14
+ "ntimes": "int"
15
+ },
16
+ "description_fields": ["subject", "session", "run", "task", "age", "gender", "sex"],
17
+ "bids_dependencies_files": [
18
+ "dataset_description.json",
19
+ "participants.tsv",
20
+ "events.tsv",
21
+ "events.json",
22
+ "eeg.json",
23
+ "electrodes.tsv",
24
+ "channels.tsv",
25
+ "coordsystem.json"
26
+ ],
27
+ "accepted_query_fields": ["data_name", "dataset"]
28
+ }
@@ -17,6 +17,7 @@ import mne_bids
17
17
  from mne_bids import (
18
18
  BIDSPath,
19
19
  )
20
+ from bids import BIDSLayout
20
21
 
21
22
  class EEGDashBaseDataset(BaseDataset):
22
23
  """Returns samples from an mne.io.Raw object along with a target.
@@ -96,7 +97,7 @@ class EEGDashBaseDataset(BaseDataset):
96
97
 
97
98
  def __len__(self):
98
99
  if self._raw is None:
99
- return self.record['rawdatainfo']['ntimes']
100
+ return int(self.record['ntimes'] * self.record['sampling_frequency'])
100
101
  else:
101
102
  return len(self._raw)
102
103
 
@@ -216,39 +217,49 @@ class EEGDashBaseRaw(BaseRaw):
216
217
  _read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
217
218
 
218
219
 
219
- class BIDSDataset():
220
+ class EEGBIDSDataset():
220
221
  ALLOWED_FILE_FORMAT = ['eeglab', 'brainvision', 'biosemi', 'european']
221
- RAW_EXTENSION = {
222
- 'eeglab': '.set',
223
- 'brainvision': '.vhdr',
224
- 'biosemi': '.bdf',
225
- 'european': '.edf'
226
- }
222
+ RAW_EXTENSIONS = {
223
+ '.set': ['.set', '.fdt'], # eeglab
224
+ '.edf': ['.edf'], # european
225
+ '.vhdr': ['.eeg', '.vhdr', '.vmrk', '.dat', '.raw'], # brainvision
226
+ '.bdf': ['.bdf'], # biosemi
227
+ }
227
228
  METADATA_FILE_EXTENSIONS = ['eeg.json', 'channels.tsv', 'electrodes.tsv', 'events.tsv', 'events.json']
228
229
  def __init__(self,
229
230
  data_dir=None, # location of bids dataset
230
231
  dataset='', # dataset name
231
- raw_format='eeglab', # format of raw data
232
232
  ):
233
233
  if data_dir is None or not os.path.exists(data_dir):
234
234
  raise ValueError('data_dir must be specified and must exist')
235
235
  self.bidsdir = Path(data_dir)
236
236
  self.dataset = dataset
237
237
  assert str(self.bidsdir).endswith(self.dataset)
238
-
239
- if raw_format.lower() not in self.ALLOWED_FILE_FORMAT:
240
- raise ValueError('raw_format must be one of {}'.format(self.ALLOWED_FILE_FORMAT))
241
- self.raw_format = raw_format.lower()
242
-
243
- # get all .set files in the bids directory
244
- temp_dir = (Path().resolve() / 'data')
245
- if not os.path.exists(temp_dir):
246
- os.mkdir(temp_dir)
247
- if not os.path.exists(temp_dir / f'{dataset}_files.npy'):
248
- self.files = self.get_files_with_extension_parallel(self.bidsdir, extension=self.RAW_EXTENSION[self.raw_format])
249
- np.save(temp_dir / f'{dataset}_files.npy', self.files)
250
- else:
251
- self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
238
+ self.layout = BIDSLayout(data_dir)
239
+
240
+ # get all recording files in the bids directory
241
+ self.files = self.get_recordings(self.layout)
242
+ assert len(self.files) > 0, ValueError('Unable to construct EEG dataset. No EEG recordings found.')
243
+ assert self.check_eeg_dataset(), ValueError('Dataset is not an EEG dataset.')
244
+ # temp_dir = (Path().resolve() / 'data')
245
+ # if not os.path.exists(temp_dir):
246
+ # os.mkdir(temp_dir)
247
+ # if not os.path.exists(temp_dir / f'{dataset}_files.npy'):
248
+ # self.files = self.get_files_with_extension_parallel(self.bidsdir, extension=self.RAW_EXTENSION[self.raw_format])
249
+ # np.save(temp_dir / f'{dataset}_files.npy', self.files)
250
+ # else:
251
+ # self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
252
+
253
+ def check_eeg_dataset(self):
254
+ return self.get_bids_file_attribute('modality', self.files[0]).lower() == 'eeg'
255
+
256
+ def get_recordings(self, layout:BIDSLayout):
257
+ files = []
258
+ for ext, exts in self.RAW_EXTENSIONS.items():
259
+ files = layout.get(extension=ext, return_type='filename')
260
+ if files:
261
+ break
262
+ return files
252
263
 
253
264
  def get_relative_bidspath(self, filename):
254
265
  bids_parent_dir = self.bidsdir.parent
@@ -301,11 +312,6 @@ class BIDSDataset():
301
312
  filepath = path / file
302
313
  bids_files.append(filepath)
303
314
 
304
- # cur_file_basename = file[:file.rfind('_')] # TODO: change to just search for any file with extension
305
- # if file.endswith(extension) and cur_file_basename in basename:
306
- # filepath = path / file
307
- # bids_files.append(filepath)
308
-
309
315
  # check if file is in top level directory
310
316
  if any(file in os.listdir(path) for file in top_level_files):
311
317
  return bids_files
@@ -338,7 +344,7 @@ class BIDSDataset():
338
344
 
339
345
  def scan_directory(self, directory, extension):
340
346
  result_files = []
341
- directory_to_ignore = ['.git']
347
+ directory_to_ignore = ['.git', '.datalad', 'derivatives', 'code']
342
348
  with os.scandir(directory) as entries:
343
349
  for entry in entries:
344
350
  if entry.is_file() and entry.name.endswith(extension):
@@ -419,32 +425,22 @@ class BIDSDataset():
419
425
  json_dict.update(json.load(f))
420
426
  return json_dict
421
427
 
422
- def sfreq(self, data_filepath):
423
- json_files = self.get_bids_metadata_files(data_filepath, 'eeg.json')
424
- if len(json_files) == 0:
425
- raise ValueError('No eeg.json found')
426
-
427
- metadata = self.resolve_bids_json(json_files)
428
- if 'SamplingFrequency' not in metadata:
429
- raise ValueError('SamplingFrequency not found in metadata')
430
- else:
431
- return metadata['SamplingFrequency']
432
-
433
- def task(self, data_filepath):
434
- return self.get_property_from_filename('task', data_filepath)
435
-
436
- def session(self, data_filepath):
437
- return self.get_property_from_filename('session', data_filepath)
438
-
439
- def run(self, data_filepath):
440
- return self.get_property_from_filename('run', data_filepath)
441
-
442
- def subject(self, data_filepath):
443
- return self.get_property_from_filename('sub', data_filepath)
444
-
445
- def num_channels(self, data_filepath):
446
- channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
447
- return len(channels_tsv)
428
+ def get_bids_file_attribute(self, attribute, data_filepath):
429
+ entities = self.layout.parse_file_entities(data_filepath)
430
+ bidsfile = self.layout.get(**entities)[0]
431
+ attributes = bidsfile.get_entities(metadata='all')
432
+ attribute_mapping = {
433
+ 'sfreq': 'SamplingFrequency',
434
+ 'modality': 'datatype',
435
+ 'task': 'task',
436
+ 'session': 'session',
437
+ 'run': 'run',
438
+ 'subject': 'subject',
439
+ 'ntimes': 'RecordingDuration',
440
+ 'nchans': 'EEGChannelCount'
441
+ }
442
+ attribute_value = attributes.get(attribute_mapping.get(attribute), None)
443
+ return attribute_value
448
444
 
449
445
  def channel_labels(self, data_filepath):
450
446
  channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
@@ -462,9 +458,12 @@ class BIDSDataset():
462
458
  def subject_participant_tsv(self, data_filepath):
463
459
  '''Get participants_tsv info of a subject based on filepath'''
464
460
  participants_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'participants.tsv')[0], sep='\t')
461
+ # if participants_tsv is not empty
462
+ if participants_tsv.empty:
463
+ return {}
465
464
  # set 'participant_id' as index
466
465
  participants_tsv.set_index('participant_id', inplace=True)
467
- subject = f'sub-{self.subject(data_filepath)}'
466
+ subject = f"sub-{self.get_bids_file_attribute('subject', data_filepath)}"
468
467
  return participants_tsv.loc[subject].to_dict()
469
468
 
470
469
  def eeg_json(self, data_filepath):
@@ -0,0 +1,25 @@
1
+ # Features datasets
2
+ from .datasets import FeaturesDataset, FeaturesConcatDataset
3
+ from .serialization import load_features_concat_dataset
4
+
5
+ # Feature extraction
6
+ from .extractors import (
7
+ FeatureExtractor,
8
+ FitableFeature,
9
+ UnivariateFeature,
10
+ BivariateFeature,
11
+ DirectedBivariateFeature,
12
+ MultivariateFeature,
13
+ )
14
+ from .decorators import (
15
+ FeaturePredecessor,
16
+ FeatureKind,
17
+ univariate_feature,
18
+ bivariate_feature,
19
+ directed_bivariate_feature,
20
+ multivariate_feature,
21
+ )
22
+ from .utils import extract_features, fit_feature_extractors
23
+
24
+ # Features:
25
+ from .feature_bank import *