eegdash 0.0.9__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

Files changed (75) hide show
  1. eegdash-0.2.0/.github/workflows/pre-commit.yaml +14 -0
  2. eegdash-0.2.0/.github/workflows/tests.yml +49 -0
  3. eegdash-0.2.0/.gitignore +17 -0
  4. eegdash-0.2.0/.pre-commit-config.yaml +48 -0
  5. eegdash-0.2.0/.readthedocs.yaml +24 -0
  6. eegdash-0.2.0/DevNotes.md +29 -0
  7. {eegdash-0.0.9 → eegdash-0.2.0}/LICENSE +1 -0
  8. {eegdash-0.0.9/src/eegdash.egg-info → eegdash-0.2.0}/PKG-INFO +67 -20
  9. {eegdash-0.0.9 → eegdash-0.2.0}/README.md +16 -2
  10. eegdash-0.2.0/datasets.md +254 -0
  11. eegdash-0.2.0/docs/Makefile +20 -0
  12. eegdash-0.2.0/docs/architecture2.pptx +0 -0
  13. eegdash-0.2.0/docs/conf.py +31 -0
  14. eegdash-0.2.0/docs/convert_xls_2_martkdown.py +36 -0
  15. eegdash-0.2.0/docs/datasets.xlsx +0 -0
  16. eegdash-0.2.0/docs/index.rst +17 -0
  17. eegdash-0.2.0/docs/make.bat +35 -0
  18. eegdash-0.2.0/eegdash/__init__.py +8 -0
  19. eegdash-0.2.0/eegdash/api.py +690 -0
  20. eegdash-0.2.0/eegdash/data_config.py +34 -0
  21. eegdash-0.2.0/eegdash/data_utils.py +623 -0
  22. eegdash-0.2.0/eegdash/dataset.py +60 -0
  23. eegdash-0.2.0/eegdash/features/__init__.py +53 -0
  24. {eegdash-0.0.9/src → eegdash-0.2.0}/eegdash/features/datasets.py +62 -23
  25. {eegdash-0.0.9/src → eegdash-0.2.0}/eegdash/features/decorators.py +14 -6
  26. {eegdash-0.0.9/src → eegdash-0.2.0}/eegdash/features/extractors.py +22 -22
  27. {eegdash-0.0.9/src → eegdash-0.2.0}/eegdash/features/feature_bank/complexity.py +6 -3
  28. eegdash-0.2.0/eegdash/features/feature_bank/connectivity.py +59 -0
  29. {eegdash-0.0.9/src → eegdash-0.2.0}/eegdash/features/feature_bank/csp.py +3 -4
  30. {eegdash-0.0.9/src → eegdash-0.2.0}/eegdash/features/feature_bank/dimensionality.py +8 -5
  31. {eegdash-0.0.9/src → eegdash-0.2.0}/eegdash/features/feature_bank/signal.py +30 -4
  32. {eegdash-0.0.9/src → eegdash-0.2.0}/eegdash/features/feature_bank/spectral.py +10 -28
  33. eegdash-0.2.0/eegdash/features/feature_bank/utils.py +48 -0
  34. eegdash-0.2.0/eegdash/features/inspect.py +48 -0
  35. {eegdash-0.0.9/src → eegdash-0.2.0}/eegdash/features/serialization.py +4 -5
  36. {eegdash-0.0.9/src → eegdash-0.2.0}/eegdash/features/utils.py +9 -7
  37. eegdash-0.2.0/eegdash/preprocessing.py +65 -0
  38. eegdash-0.2.0/eegdash/utils.py +11 -0
  39. {eegdash-0.0.9 → eegdash-0.2.0/eegdash.egg-info}/PKG-INFO +67 -20
  40. eegdash-0.2.0/eegdash.egg-info/SOURCES.txt +63 -0
  41. eegdash-0.2.0/eegdash.egg-info/requires.txt +38 -0
  42. eegdash-0.2.0/eegdash.egg-info/top_level.txt +1 -0
  43. eegdash-0.2.0/notebooks/scratch.ipynb +1097 -0
  44. eegdash-0.2.0/notebooks/scratch_features.ipynb +462465 -0
  45. eegdash-0.2.0/notebooks/scratch_features2.ipynb +55014 -0
  46. eegdash-0.2.0/notebooks/test_pybids_braindecode_BIDSDataset.ipynb +646 -0
  47. eegdash-0.2.0/notebooks/tutorial_audi_oddball.ipynb +441 -0
  48. eegdash-0.2.0/notebooks/tutorial_eoec.ipynb +515 -0
  49. eegdash-0.2.0/notebooks/tutorial_features_eoec.ipynb +58788 -0
  50. eegdash-0.2.0/notebooks/tutorial_p3_oddball.ipynb +511 -0
  51. eegdash-0.2.0/notebooks/tutorial_pfactor_classification.ipynb +4786 -0
  52. eegdash-0.2.0/notebooks/tutorial_pfactor_features.ipynb +38380 -0
  53. eegdash-0.2.0/notebooks/tutorial_sex_classification.ipynb +549 -0
  54. eegdash-0.2.0/pyproject.toml +114 -0
  55. eegdash-0.2.0/scripts/data_ingest.py +404 -0
  56. eegdash-0.2.0/scripts/datasets.json +1 -0
  57. eegdash-0.2.0/scripts/scan_openneuro.py +67 -0
  58. eegdash-0.2.0/tests/__init__.py +5 -0
  59. eegdash-0.2.0/tests/test_correctness.py +210 -0
  60. eegdash-0.2.0/tests/test_database.py +21 -0
  61. eegdash-0.2.0/tests/test_dataset.py +14 -0
  62. eegdash-0.2.0/tests/test_init.py +43 -0
  63. eegdash-0.0.9/pyproject.toml +0 -41
  64. eegdash-0.0.9/src/eegdash/__init__.py +0 -1
  65. eegdash-0.0.9/src/eegdash/data_config.py +0 -28
  66. eegdash-0.0.9/src/eegdash/data_utils.py +0 -480
  67. eegdash-0.0.9/src/eegdash/features/__init__.py +0 -25
  68. eegdash-0.0.9/src/eegdash/features/feature_bank/connectivity.py +0 -99
  69. eegdash-0.0.9/src/eegdash/main.py +0 -359
  70. eegdash-0.0.9/src/eegdash.egg-info/SOURCES.txt +0 -25
  71. eegdash-0.0.9/src/eegdash.egg-info/requires.txt +0 -15
  72. eegdash-0.0.9/src/eegdash.egg-info/top_level.txt +0 -2
  73. {eegdash-0.0.9/src → eegdash-0.2.0}/eegdash/features/feature_bank/__init__.py +3 -3
  74. {eegdash-0.0.9/src → eegdash-0.2.0}/eegdash.egg-info/dependency_links.txt +0 -0
  75. {eegdash-0.0.9 → eegdash-0.2.0}/setup.cfg +0 -0
@@ -0,0 +1,14 @@
1
+ name: pre-commit-codestyle
2
+ concurrency:
3
+ group: ${{ github.workflow }}-${{ github.event.number }}-${{ github.event.ref }}
4
+ cancel-in-progress: true
5
+
6
+ on: [push]
7
+
8
+ jobs:
9
+ pre-commit:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - uses: actions/setup-python@v5
14
+ - uses: pre-commit/action@v3.0.1
@@ -0,0 +1,49 @@
1
+ name: tests
2
+ concurrency:
3
+ group: ${{ github.workflow }}-${{ github.event.number }}-${{ github.event.ref }}
4
+ cancel-in-progress: true
5
+ on:
6
+ push:
7
+ branches:
8
+ - "*"
9
+ pull_request:
10
+ branches:
11
+ - '*' # all branches, including forks
12
+
13
+ jobs:
14
+ test:
15
+ runs-on: ${{ matrix.os }}
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ os: [ "ubuntu-latest", "macos-latest", "windows-latest" ]
20
+ python-version: ["3.10", "3.12"] # first and last supported Python version
21
+ steps:
22
+ ## Install Braindecode
23
+ - name: Checking Out Repository
24
+ uses: actions/checkout@v4
25
+ # Cache MNE Data
26
+ # The cache key here is fixed except for os
27
+ # so if you download a new mne dataset in the code, best to manually increment the key below
28
+ - name: Create/Restore MNE Data Cache
29
+ id: cache-mne_data
30
+ uses: actions/cache@v3
31
+ with:
32
+ path: ~/mne_data
33
+ key: ${{ runner.os }}-v3
34
+
35
+ - name: Install uv and set the python version
36
+ uses: astral-sh/setup-uv@v5
37
+ with:
38
+ enable-cache: true
39
+ python-version: ${{ matrix.python-version }}
40
+ - name: Show Python Version
41
+ run: python --version
42
+ - name: Install EEGDash from Current Checkout
43
+ run: uv pip install -e .[tests]
44
+ # Show EEGDash Version
45
+ - name: Show EEGDash Version
46
+ run: python -c "import eegdash; print(eegdash.__version__)"
47
+ - name: Run Tests
48
+ run: pytest -vvv -s --tb=long --durations=0 --log-cli-level=INFO --cov=eegdash --cov-report=xml tests --verbose
49
+
@@ -0,0 +1,17 @@
1
+ *.egg-info
2
+ *.npy
3
+ */*.npy
4
+ */*/*.npy
5
+ eegdash/data/
6
+ **/__pycache__/
7
+ .venv
8
+ .secret
9
+ note
10
+ tests/.testeegdash/
11
+ dist/
12
+ **/.eegdash_cache/
13
+ **/*/eegdash.egg-info/
14
+ .env
15
+ tests/data/
16
+ .ruff_cache/
17
+ .vscode/
@@ -0,0 +1,48 @@
1
+ default_language_version:
2
+ python: python3
3
+ ci:
4
+ autofix_commit_msg: '[pre-commit.ci] auto fixes from pre-commit.com hooks'
5
+ autofix_prs: true
6
+ autoupdate_branch: master
7
+ autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
8
+ autoupdate_schedule: weekly
9
+ skip: []
10
+ submodules: false
11
+ repos:
12
+ - repo: https://github.com/astral-sh/ruff-pre-commit
13
+ rev: v0.12.7
14
+ hooks:
15
+ - id: ruff
16
+ name: ruff lint & format
17
+ args:
18
+ - --fix
19
+ - --show-fixes
20
+ # Selects all rules from your original config (E,W,F,I,D) and enables import sorting (I)
21
+ # and unused import removal (F401 is part of 'F' and is no longer ignored).
22
+ - --select=E,W,F,D,NPY201
23
+ - --ignore=E402,E501,D100,D101,D102,D103,D104,D105,D107,D205,D400,D415,D417,F403,D401,E741,E722,D419
24
+ files: ^(eegdash|docs|examples|notebooks|scripts|tests)/
25
+ exclude: \.ipynb$
26
+ - id: ruff-format
27
+ name: ruff format code
28
+ - repo: https://github.com/codespell-project/codespell
29
+ rev: v2.4.1
30
+ hooks:
31
+ - id: codespell
32
+ args:
33
+ - --ignore-words-list=carin,splitted,meaned,wil,whats,additionals,alle,alot,bund,currenty,datas,farenheit,falsy,fo,haa,hass,iif,incomfort,ines,ist,nam,nd,pres,pullrequests,resset,rime,ser,serie,te,technik,ue,unsecure,withing,zar,mane,THIRDPARTY
34
+ - --skip="./.*,*.csv,*.json,*.ambr,*.toml"
35
+ - --quiet-level=2
36
+ exclude_types:
37
+ - csv
38
+ - json
39
+ - repo: https://github.com/asottile/blacken-docs
40
+ rev: 1.19.1
41
+ hooks:
42
+ - id: blacken-docs
43
+ exclude: ^.github|CONTRIBUTING.md
44
+ - repo: https://github.com/PyCQA/isort
45
+ rev: 6.0.1
46
+ hooks:
47
+ - id: isort
48
+ exclude: ^\.gitignore
@@ -0,0 +1,24 @@
1
+
2
+ # Read the Docs configuration file
3
+ # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4
+
5
+ # Required
6
+ version: 2
7
+
8
+ # Set the OS, Python version, and other tools you might need
9
+ build:
10
+ os: ubuntu-24.04
11
+ tools:
12
+ python: "3.13"
13
+
14
+ # Build documentation in the "docs/" directory with Sphinx
15
+ sphinx:
16
+ configuration: docs/conf.py
17
+
18
+ # Optionally, but recommended,
19
+ # declare the Python requirements required to build your documentation
20
+ # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
21
+ # python:
22
+ # install:
23
+ # - requirements: docs/requirements.txt
24
+
@@ -0,0 +1,29 @@
1
+ ## Install locally
2
+ pip install -r requirements.txt
3
+
4
+ pip uninstall eegdash -y
5
+ python -m pip install --editable /Users/arno/Python/EEG-Dash-Data
6
+ # Warning use the exact command above, pip install by itself might not work
7
+
8
+ ### check if working from different folders
9
+ python -c "from eegdash import EEGDashDataset; print(EEGDashDataset)"
10
+
11
+ ## Run hooks
12
+ pip install pre-commit
13
+ pre-commit install
14
+ pre-commit run --all-files
15
+
16
+ ## Create package and release on Pypi
17
+ Documentation is at https://packaging.python.org/en/latest/tutorials/packaging-projects/
18
+ - Update version in pyproject.toml
19
+ - Run "python -m build"
20
+ - "python -m twine upload --repository testpypi dist/*" OR "python -m twine upload dist/*"
21
+ Look for API token in email (different for test and regular)
22
+
23
+ ## Populate database
24
+ - Log on mongodb.com with sccn user sccn3709@gmail.com (see email for pass)
25
+ - Change eegdash or eegdashstaging in main.py
26
+ - Run script/data_ingest.py
27
+
28
+ # Remount
29
+ sudo sshfs -o allow_other,IdentityFile=/home/dung/.ssh/id_rsa arno@login.expanse.sdsc.edu:/expanse/projects/nemar /mnt/nemar/
@@ -4,6 +4,7 @@ Copyright (C) 2024-2025
4
4
 
5
5
  Young Truong, UCSD, dt.young112@gmail.com
6
6
  Arnaud Delorme, UCSD, adelorme@ucsd.edu
7
+ Bruno Aristimunha, b.aristimunha@gmail.com
7
8
 
8
9
  This program is free software; you can redistribute it and/or modify
9
10
  it under the terms of the GNU General Public License as published by
@@ -1,14 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.0.9
3
+ Version: 0.2.0
4
4
  Summary: EEG data for machine learning
5
- Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>
5
+ Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License: GNU General Public License
7
7
 
8
8
  Copyright (C) 2024-2025
9
9
 
10
10
  Young Truong, UCSD, dt.young112@gmail.com
11
11
  Arnaud Delorme, UCSD, adelorme@ucsd.edu
12
+ Bruno Aristimunha, b.aristimunha@gmail.com
12
13
 
13
14
  This program is free software; you can redistribute it and/or modify
14
15
  it under the terms of the GNU General Public License as published by
@@ -24,35 +25,69 @@ License: GNU General Public License
24
25
  along with this program; if not, write to the Free Software
25
26
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1.07 USA
26
27
 
27
- Project-URL: Homepage, https://eegdash.org
28
- Project-URL: Issues, https://github.com/sccn/EEGDash/issues
29
- Classifier: Programming Language :: Python :: 3
28
+ Project-URL: Homepage, https://github.com/sccn/EEG-Dash-Data
29
+ Project-URL: Issues, https://github.com/sccn/EEG-Dash-Data/issues
30
30
  Classifier: License :: OSI Approved :: MIT License
31
31
  Classifier: Operating System :: OS Independent
32
- Requires-Python: >=3.8
32
+ Classifier: Intended Audience :: Science/Research
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: Programming Language :: Python
35
+ Classifier: Topic :: Software Development
36
+ Classifier: Topic :: Scientific/Engineering
37
+ Classifier: Development Status :: 3 - Alpha
38
+ Classifier: Operating System :: Microsoft :: Windows
39
+ Classifier: Operating System :: POSIX
40
+ Classifier: Operating System :: Unix
41
+ Classifier: Operating System :: MacOS
42
+ Classifier: Programming Language :: Python :: 3
43
+ Classifier: Programming Language :: Python :: 3.10
44
+ Classifier: Programming Language :: Python :: 3.11
45
+ Classifier: Programming Language :: Python :: 3.12
46
+ Requires-Python: >3.10
33
47
  Description-Content-Type: text/markdown
34
48
  License-File: LICENSE
35
- Requires-Dist: xarray
49
+ Requires-Dist: braindecode>=1.0
50
+ Requires-Dist: mne_bids>=0.16.0
51
+ Requires-Dist: numba
52
+ Requires-Dist: numpy
53
+ Requires-Dist: pandas
54
+ Requires-Dist: pybids
55
+ Requires-Dist: pymongo
36
56
  Requires-Dist: python-dotenv
37
57
  Requires-Dist: s3fs
38
- Requires-Dist: mne
39
- Requires-Dist: pynwb
40
- Requires-Dist: h5py
41
- Requires-Dist: pymongo
42
- Requires-Dist: joblib
43
- Requires-Dist: braindecode
44
- Requires-Dist: mne-bids
45
- Requires-Dist: pybids
46
- Requires-Dist: pymatreader
47
- Requires-Dist: pyarrow
58
+ Requires-Dist: scipy
48
59
  Requires-Dist: tqdm
49
- Requires-Dist: numba
60
+ Requires-Dist: xarray
61
+ Provides-Extra: tests
62
+ Requires-Dist: pytest; extra == "tests"
63
+ Requires-Dist: pytest-cov; extra == "tests"
64
+ Requires-Dist: codecov; extra == "tests"
65
+ Requires-Dist: pytest_cases; extra == "tests"
66
+ Provides-Extra: dev
67
+ Requires-Dist: pre-commit; extra == "dev"
68
+ Provides-Extra: docs
69
+ Requires-Dist: sphinx; extra == "docs"
70
+ Requires-Dist: sphinx_gallery; extra == "docs"
71
+ Requires-Dist: sphinx_rtd_theme; extra == "docs"
72
+ Requires-Dist: numpydoc; extra == "docs"
73
+ Provides-Extra: all
74
+ Requires-Dist: pytest; extra == "all"
75
+ Requires-Dist: pytest-cov; extra == "all"
76
+ Requires-Dist: codecov; extra == "all"
77
+ Requires-Dist: pytest_cases; extra == "all"
78
+ Requires-Dist: pre-commit; extra == "all"
79
+ Requires-Dist: sphinx; extra == "all"
80
+ Requires-Dist: sphinx_gallery; extra == "all"
81
+ Requires-Dist: sphinx_rtd_theme; extra == "all"
82
+ Requires-Dist: numpydoc; extra == "all"
50
83
  Dynamic: license-file
51
84
 
52
85
  # EEG-Dash
86
+
53
87
  To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
54
88
 
55
89
  ## Data source
90
+
56
91
  The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
57
92
 
58
93
  ## Featured data
@@ -72,9 +107,11 @@ The following HBN datasets are currently featured on EEGDash. Documentation abou
72
107
  A total of [246 other datasets](datasets.md) are also available through EEGDash.
73
108
 
74
109
  ## Data format
110
+
75
111
  EEGDash queries return a **Pytorch Dataset** formatted to facilitate machine learning (ML) and deep learning (DL) applications. PyTorch Datasets are the best format for EEGDash queries because they provide an efficient, scalable, and flexible structure for machine learning (ML) and deep learning (DL) applications. They allow seamless integration with PyTorch’s DataLoader, enabling efficient batching, shuffling, and parallel data loading, which is essential for training deep learning models on large EEG datasets.
76
112
 
77
113
  ## Data preprocessing
114
+
78
115
  EEGDash datasets are processed using the popular [BrainDecode](https://braindecode.org/stable/index.html) library. In fact, EEGDash datasets are BrainDecode datasets, which are themselves PyTorch datasets. This means that any preprocessing possible on BrainDecode datasets is also possible on EEGDash datasets. Refer to [BrainDecode](https://braindecode.org/stable/index.html) tutorials for guidance on preprocessing EEG data.
79
116
 
80
117
  ## EEG-Dash usage
@@ -90,7 +127,10 @@ To use the data from a single subject, enter:
90
127
 
91
128
  ```python
92
129
  from eegdash import EEGDashDataset
93
- ds_NDARDB033FW5 = EEGDashDataset({'dataset': 'ds005514', 'task': 'RestingState', 'subject': 'NDARDB033FW5'})
130
+
131
+ ds_NDARDB033FW5 = EEGDashDataset(
132
+ {"dataset": "ds005514", "task": "RestingState", "subject": "NDARDB033FW5"}
133
+ )
94
134
  ```
95
135
 
96
136
  This will search and download the metadata for the task **RestingState** for subject **NDARDB033FW5** in BIDS dataset **ds005514**. The actual data will not be downloaded at this stage. Following standard practice, data is only downloaded once it is processed. The **ds_NDARDB033FW5** object is a fully functional BrainDecode dataset, which is itself a PyTorch dataset. This [tutorial](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_eoec.ipynb) shows how to preprocess the EEG data, extracting portions of the data containing eyes-open and eyes-closed segments, then perform eyes-open vs. eyes-closed classification using a (shallow) deep-learning model.
@@ -99,7 +139,10 @@ To use the data from multiple subjects, enter:
99
139
 
100
140
  ```python
101
141
  from eegdash import EEGDashDataset
102
- ds_ds005505rest = EEGDashDataset({'dataset': 'ds005505', 'task': 'RestingState'}, target_name='sex')
142
+
143
+ ds_ds005505rest = EEGDashDataset(
144
+ {"dataset": "ds005505", "task": "RestingState"}, target_name="sex"
145
+ )
103
146
  ```
104
147
 
105
148
  This will search and download the metadata for the task 'RestingState' for all subjects in BIDS dataset 'ds005505' (a total of 136). As above, the actual data will not be downloaded at this stage so this command is quick to execute. Also, the target class for each subject is assigned using the target_name parameter. This means that this object is ready to be directly fed to a deep learning model, although the [tutorial script](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_sex_classification.ipynb) performs minimal processing on it, prior to training a deep-learning model. Because 14 gigabytes of data are downloaded, this tutorial takes about 10 minutes to execute.
@@ -121,3 +164,7 @@ EEG-DaSh is a collaborative initiative between the United States and Israel, sup
121
164
 
122
165
 
123
166
 
167
+ python3 -m pip install --upgrade build
168
+ python3 -m build
169
+ python3 -m pip install --upgrade twine
170
+ python3 -m twine upload --repository eegdash dist/*
@@ -1,7 +1,9 @@
1
1
  # EEG-Dash
2
+
2
3
  To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
3
4
 
4
5
  ## Data source
6
+
5
7
  The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
6
8
 
7
9
  ## Featured data
@@ -21,9 +23,11 @@ The following HBN datasets are currently featured on EEGDash. Documentation abou
21
23
  A total of [246 other datasets](datasets.md) are also available through EEGDash.
22
24
 
23
25
  ## Data format
26
+
24
27
  EEGDash queries return a **Pytorch Dataset** formatted to facilitate machine learning (ML) and deep learning (DL) applications. PyTorch Datasets are the best format for EEGDash queries because they provide an efficient, scalable, and flexible structure for machine learning (ML) and deep learning (DL) applications. They allow seamless integration with PyTorch’s DataLoader, enabling efficient batching, shuffling, and parallel data loading, which is essential for training deep learning models on large EEG datasets.
25
28
 
26
29
  ## Data preprocessing
30
+
27
31
  EEGDash datasets are processed using the popular [BrainDecode](https://braindecode.org/stable/index.html) library. In fact, EEGDash datasets are BrainDecode datasets, which are themselves PyTorch datasets. This means that any preprocessing possible on BrainDecode datasets is also possible on EEGDash datasets. Refer to [BrainDecode](https://braindecode.org/stable/index.html) tutorials for guidance on preprocessing EEG data.
28
32
 
29
33
  ## EEG-Dash usage
@@ -39,7 +43,10 @@ To use the data from a single subject, enter:
39
43
 
40
44
  ```python
41
45
  from eegdash import EEGDashDataset
42
- ds_NDARDB033FW5 = EEGDashDataset({'dataset': 'ds005514', 'task': 'RestingState', 'subject': 'NDARDB033FW5'})
46
+
47
+ ds_NDARDB033FW5 = EEGDashDataset(
48
+ {"dataset": "ds005514", "task": "RestingState", "subject": "NDARDB033FW5"}
49
+ )
43
50
  ```
44
51
 
45
52
  This will search and download the metadata for the task **RestingState** for subject **NDARDB033FW5** in BIDS dataset **ds005514**. The actual data will not be downloaded at this stage. Following standard practice, data is only downloaded once it is processed. The **ds_NDARDB033FW5** object is a fully functional BrainDecode dataset, which is itself a PyTorch dataset. This [tutorial](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_eoec.ipynb) shows how to preprocess the EEG data, extracting portions of the data containing eyes-open and eyes-closed segments, then perform eyes-open vs. eyes-closed classification using a (shallow) deep-learning model.
@@ -48,7 +55,10 @@ To use the data from multiple subjects, enter:
48
55
 
49
56
  ```python
50
57
  from eegdash import EEGDashDataset
51
- ds_ds005505rest = EEGDashDataset({'dataset': 'ds005505', 'task': 'RestingState'}, target_name='sex')
58
+
59
+ ds_ds005505rest = EEGDashDataset(
60
+ {"dataset": "ds005505", "task": "RestingState"}, target_name="sex"
61
+ )
52
62
  ```
53
63
 
54
64
  This will search and download the metadata for the task 'RestingState' for all subjects in BIDS dataset 'ds005505' (a total of 136). As above, the actual data will not be downloaded at this stage so this command is quick to execute. Also, the target class for each subject is assigned using the target_name parameter. This means that this object is ready to be directly fed to a deep learning model, although the [tutorial script](https://github.com/sccn/EEGDash/blob/develop/notebooks/tutorial_sex_classification.ipynb) performs minimal processing on it, prior to training a deep-learning model. Because 14 gigabytes of data are downloaded, this tutorial takes about 10 minutes to execute.
@@ -70,3 +80,7 @@ EEG-DaSh is a collaborative initiative between the United States and Israel, sup
70
80
 
71
81
 
72
82
 
83
+ python3 -m pip install --upgrade build
84
+ python3 -m build
85
+ python3 -m pip install --upgrade twine
86
+ python3 -m twine upload --repository eegdash dist/*