eegdash 0.0.1__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- {eegdash-0.0.1/EEGDash.egg-info → eegdash-0.0.3}/PKG-INFO +75 -8
- {eegdash-0.0.1 → eegdash-0.0.3}/README.md +72 -6
- {eegdash-0.0.1 → eegdash-0.0.3}/pyproject.toml +6 -3
- {eegdash-0.0.1/eegdash → eegdash-0.0.3/src}/data_utils.py +131 -5
- {eegdash-0.0.1 → eegdash-0.0.3/src/eegdash.egg-info}/PKG-INFO +75 -8
- eegdash-0.0.3/src/eegdash.egg-info/SOURCES.txt +11 -0
- eegdash-0.0.3/src/eegdash.egg-info/requires.txt +1 -0
- eegdash-0.0.3/src/eegdash.egg-info/top_level.txt +4 -0
- eegdash-0.0.3/src/main.py +199 -0
- eegdash-0.0.1/EEGDash.egg-info/SOURCES.txt +0 -79
- eegdash-0.0.1/EEGDash.egg-info/top_level.txt +0 -1
- eegdash-0.0.1/eegdash/SignalStore/__init__.py +0 -0
- eegdash-0.0.1/eegdash/SignalStore/signalstore/__init__.py +0 -3
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/abstract_read_adapter.py +0 -13
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/schema_read_adapter.py +0 -16
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/vocabulary_read_adapter.py +0 -19
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/handmade_records/excel_study_organizer_read_adapter.py +0 -114
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/axona/axona_read_adapter.py +0 -912
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/ReadIntanSpikeFile.py +0 -140
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/intan_read_adapter.py +0 -29
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/__init__.py +0 -0
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/data_to_result.py +0 -62
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/get_bytes_per_data_block.py +0 -36
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/notch_filter.py +0 -50
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/qstring.py +0 -41
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_header.py +0 -135
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_one_data_block.py +0 -45
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/load_intan_rhd_format.py +0 -204
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/__init__.py +0 -0
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/data_to_result.py +0 -60
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/get_bytes_per_data_block.py +0 -37
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/notch_filter.py +0 -50
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/qstring.py +0 -41
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_header.py +0 -153
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_one_data_block.py +0 -47
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/load_intan_rhs_format.py +0 -213
- eegdash-0.0.1/eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/neurodata_without_borders/neurodata_without_borders_read_adapter.py +0 -14
- eegdash-0.0.1/eegdash/SignalStore/signalstore/operations/__init__.py +0 -4
- eegdash-0.0.1/eegdash/SignalStore/signalstore/operations/handler_executor.py +0 -22
- eegdash-0.0.1/eegdash/SignalStore/signalstore/operations/handler_factory.py +0 -41
- eegdash-0.0.1/eegdash/SignalStore/signalstore/operations/handlers/base_handler.py +0 -44
- eegdash-0.0.1/eegdash/SignalStore/signalstore/operations/handlers/domain/property_model_handlers.py +0 -79
- eegdash-0.0.1/eegdash/SignalStore/signalstore/operations/handlers/domain/schema_handlers.py +0 -3
- eegdash-0.0.1/eegdash/SignalStore/signalstore/operations/helpers/abstract_helper.py +0 -17
- eegdash-0.0.1/eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_extractor.py +0 -33
- eegdash-0.0.1/eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_rawio.py +0 -165
- eegdash-0.0.1/eegdash/SignalStore/signalstore/operations/helpers/spikeinterface_helper.py +0 -100
- eegdash-0.0.1/eegdash/SignalStore/signalstore/operations/helpers/wrappers/neo_wrappers.py +0 -21
- eegdash-0.0.1/eegdash/SignalStore/signalstore/operations/helpers/wrappers/nwb_wrappers.py +0 -27
- eegdash-0.0.1/eegdash/SignalStore/signalstore/store/__init__.py +0 -8
- eegdash-0.0.1/eegdash/SignalStore/signalstore/store/data_access_objects.py +0 -1181
- eegdash-0.0.1/eegdash/SignalStore/signalstore/store/datafile_adapters.py +0 -131
- eegdash-0.0.1/eegdash/SignalStore/signalstore/store/repositories.py +0 -928
- eegdash-0.0.1/eegdash/SignalStore/signalstore/store/store_errors.py +0 -68
- eegdash-0.0.1/eegdash/SignalStore/signalstore/store/unit_of_work.py +0 -97
- eegdash-0.0.1/eegdash/SignalStore/signalstore/store/unit_of_work_provider.py +0 -67
- eegdash-0.0.1/eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_recording.py +0 -1
- eegdash-0.0.1/eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_sorter.py +0 -1
- eegdash-0.0.1/eegdash/SignalStore/signalstore/utilities/testing/data_mocks.py +0 -513
- eegdash-0.0.1/eegdash/SignalStore/signalstore/utilities/tools/dataarrays.py +0 -49
- eegdash-0.0.1/eegdash/SignalStore/signalstore/utilities/tools/mongo_records.py +0 -25
- eegdash-0.0.1/eegdash/SignalStore/signalstore/utilities/tools/operation_response.py +0 -78
- eegdash-0.0.1/eegdash/SignalStore/signalstore/utilities/tools/purge_orchestration_response.py +0 -21
- eegdash-0.0.1/eegdash/SignalStore/signalstore/utilities/tools/quantities.py +0 -15
- eegdash-0.0.1/eegdash/SignalStore/signalstore/utilities/tools/strings.py +0 -38
- eegdash-0.0.1/eegdash/SignalStore/signalstore/utilities/tools/time.py +0 -17
- eegdash-0.0.1/eegdash/SignalStore/tests/conftest.py +0 -799
- eegdash-0.0.1/eegdash/SignalStore/tests/data/valid_data/data_arrays/make_fake_data.py +0 -59
- eegdash-0.0.1/eegdash/SignalStore/tests/unit/store/conftest.py +0 -0
- eegdash-0.0.1/eegdash/SignalStore/tests/unit/store/test_data_access_objects.py +0 -1235
- eegdash-0.0.1/eegdash/SignalStore/tests/unit/store/test_repositories.py +0 -1309
- eegdash-0.0.1/eegdash/SignalStore/tests/unit/store/test_unit_of_work.py +0 -7
- eegdash-0.0.1/eegdash/SignalStore/tests/unit/test_ci_cd.py +0 -8
- eegdash-0.0.1/eegdash/aws_ingest.py +0 -29
- eegdash-0.0.1/eegdash/main.py +0 -17
- eegdash-0.0.1/eegdash/signalstore_data_utils.py +0 -280
- eegdash-0.0.1/tests/__init__.py +0 -3
- {eegdash-0.0.1 → eegdash-0.0.3}/LICENSE +0 -0
- {eegdash-0.0.1 → eegdash-0.0.3}/setup.cfg +0 -0
- {eegdash-0.0.1/eegdash → eegdash-0.0.3/src}/__init__.py +0 -0
- {eegdash-0.0.1/EEGDash.egg-info → eegdash-0.0.3/src/eegdash.egg-info}/dependency_links.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>
|
|
6
6
|
License: GNU General Public License
|
|
@@ -32,12 +32,24 @@ Classifier: Operating System :: OS Independent
|
|
|
32
32
|
Requires-Python: >=3.8
|
|
33
33
|
Description-Content-Type: text/markdown
|
|
34
34
|
License-File: LICENSE
|
|
35
|
+
Requires-Dist: signalstore
|
|
35
36
|
|
|
36
37
|
# EEG-Dash
|
|
37
38
|
To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
|
|
38
39
|
|
|
39
40
|
## Data source
|
|
40
|
-
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will also incorporate data converted from NEMAR, which includes
|
|
41
|
+
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will also incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
|
|
42
|
+
|
|
43
|
+
## Datasets available
|
|
44
|
+
|
|
45
|
+
There are currently only two datasets made available for testing purposes.
|
|
46
|
+
|
|
47
|
+
| Dataset ID | Description | Participants | Channels | Task | NEMAR Link |
|
|
48
|
+
|------------|---------------------------------------------------------------------------------------------|--------------|-----------------|-----------------|------------------------------------------------------------------------------------------------|
|
|
49
|
+
| ds002718 | EEG dataset focused on face processing with MRI for source localization | 18 | 70 EEG, 2 EOG | FaceRecognition | [NEMAR ds002718](https://nemar.org/dataexplorer/detail?dataset_id=ds002718) |
|
|
50
|
+
| ds004745 | 8-Channel SSVEP EEG dataset with trials including voluntary movements to introduce artifacts | 6 | 8 EEG | SSVEP tasks | [NEMAR ds004745](https://nemar.org/dataexplorer/detail?dataset_id=ds004745) |
|
|
51
|
+
|
|
52
|
+
|
|
41
53
|
|
|
42
54
|
## Data formatting
|
|
43
55
|
The data in EEG-DaSh is formatted to facilitate machine learning (ML) and deep learning (DL) applications by using a simplified structure commonly adopted by these communities. This will involve converting raw MEEG data into a matrix format, where samples (e.g., individual EEG or MEG recordings) are represented by rows, and values (such as time or channel data) are represented by columns. The data is also divided into training and testing sets, with 80% of the data allocated for training and 20% for testing, ensuring a balanced representation of relevant labels across sets. Hierarchical Event Descriptor (HED) tags will be used to annotate labels, which will be stored in a text table, and detailed metadata, including dataset origins and methods. This formatting process will ensure that data is ready for ML/DL models, allowing for efficient training and testing of algorithms while preserving data integrity and reusability.
|
|
@@ -49,15 +61,70 @@ The data in EEG-DaSh is formatted to facilitate machine learning (ML) and deep l
|
|
|
49
61
|
|
|
50
62
|
The data in EEG-DaSh is accessed through Python and MATLAB libraries specifically designed for this platform. These libraries will use objects compatible with deep learning data storage formats in each language, such as <i>Torchvision.dataset</i> in Python and <i>DataStore</i> in MATLAB. Users can dynamically fetch data from the EEG-DaSh server which is then cached locally.
|
|
51
63
|
|
|
52
|
-
###
|
|
64
|
+
### Install
|
|
65
|
+
Use your preferred Python environment manager with Python > 3.9 to install the package. Here we show example using Conda environment with Python 3.11.5:
|
|
66
|
+
* Create a new environment Python 3.11.5 -> `conda create --name eegdash python=3.11.5`
|
|
67
|
+
* Switch to the right environment -> `conda activate eegdash`
|
|
68
|
+
* Install dependencies (this is a temporary link that will be updated soon) -> `pip install -r https://raw.githubusercontent.com/sccn/EEG-Dash-Data/refs/heads/develop/requirements.txt`
|
|
69
|
+
* Install _eegdash_ package (this is a temporary link that will be updated soon) -> `pip install -i https://test.pypi.org/simple/ eegdash`
|
|
70
|
+
* Check installation. Start a Python session and type `from eegdash import EEGDash`
|
|
71
|
+
|
|
72
|
+
### Python data access
|
|
73
|
+
|
|
74
|
+
To create a local object for accessing the database, use the following code:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from eegdash import EEGDash
|
|
78
|
+
EEGDashInstance = EEGDash()
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Once the object is instantiated, it can be utilized to search datasets. Providing an empty parameter will search the entire database and return all available datasets.
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
EEGDashInstance.find({})
|
|
85
|
+
```
|
|
86
|
+
A list of dataset is returned.
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
[{'schema_ref': 'eeg_signal',
|
|
90
|
+
'data_name': 'ds004745_sub-001_task-unnamed_eeg.set',
|
|
91
|
+
'dataset': 'ds004745',
|
|
92
|
+
'subject': '001',
|
|
93
|
+
'task': 'unnamed',
|
|
94
|
+
'session': '',
|
|
95
|
+
'run': '',
|
|
96
|
+
'modality': 'EEG',
|
|
97
|
+
'sampling_frequency': 1000,
|
|
98
|
+
'version_timestamp': 0,
|
|
99
|
+
'has_file': True,
|
|
100
|
+
'time_of_save': datetime.datetime(2024, 10, 25, 14, 11, 48, 843593, tzinfo=datetime.timezone.utc),
|
|
101
|
+
'time_of_removal': None}, ...
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Additionally, users can search for a specific dataset by specifying criteria.
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
EEGDashInstance.find({'task': 'FaceRecognition'})
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
After locating the desired dataset or data record, users can download it locally by executing the following command. This will return an xArray Python object.
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
XArrayData = EEGDashInstance.get({'task': 'FaceRecognition', 'subject': '019'})
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Optionally, this is how you may access the raw data for the first record. This will return an numpy array.
|
|
53
118
|
|
|
54
|
-
|
|
119
|
+
```python
|
|
120
|
+
npData = EEGDashInstance.get({'task': 'FaceRecognition', 'subject': '019'})[0].values
|
|
121
|
+
```
|
|
55
122
|
|
|
56
|
-
|
|
123
|
+
## Example use
|
|
57
124
|
|
|
58
|
-
|
|
125
|
+
This [example](tests/eegdash.ipynb) demonstrates the full workflow from data retrieval with `EEGDash` to model definition, data handling, and training in PyTorch.
|
|
59
126
|
|
|
60
|
-
## Education
|
|
127
|
+
## Education - Coming soon...
|
|
61
128
|
|
|
62
129
|
We organize workshops and educational events to foster cross-cultural education and student training, offering both online and in-person opportunities in collaboration with US and Israeli partners. There is no event planned for 2024. Events for 2025 will be advertised on the EEGLABNEWS mailing list so make sure to [subscribe](https://sccn.ucsd.edu/mailman/listinfo/eeglabnews).
|
|
63
130
|
|
|
@@ -2,7 +2,18 @@
|
|
|
2
2
|
To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
|
|
3
3
|
|
|
4
4
|
## Data source
|
|
5
|
-
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will also incorporate data converted from NEMAR, which includes
|
|
5
|
+
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will also incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
|
|
6
|
+
|
|
7
|
+
## Datasets available
|
|
8
|
+
|
|
9
|
+
There are currently only two datasets made available for testing purposes.
|
|
10
|
+
|
|
11
|
+
| Dataset ID | Description | Participants | Channels | Task | NEMAR Link |
|
|
12
|
+
|------------|---------------------------------------------------------------------------------------------|--------------|-----------------|-----------------|------------------------------------------------------------------------------------------------|
|
|
13
|
+
| ds002718 | EEG dataset focused on face processing with MRI for source localization | 18 | 70 EEG, 2 EOG | FaceRecognition | [NEMAR ds002718](https://nemar.org/dataexplorer/detail?dataset_id=ds002718) |
|
|
14
|
+
| ds004745 | 8-Channel SSVEP EEG dataset with trials including voluntary movements to introduce artifacts | 6 | 8 EEG | SSVEP tasks | [NEMAR ds004745](https://nemar.org/dataexplorer/detail?dataset_id=ds004745) |
|
|
15
|
+
|
|
16
|
+
|
|
6
17
|
|
|
7
18
|
## Data formatting
|
|
8
19
|
The data in EEG-DaSh is formatted to facilitate machine learning (ML) and deep learning (DL) applications by using a simplified structure commonly adopted by these communities. This will involve converting raw MEEG data into a matrix format, where samples (e.g., individual EEG or MEG recordings) are represented by rows, and values (such as time or channel data) are represented by columns. The data is also divided into training and testing sets, with 80% of the data allocated for training and 20% for testing, ensuring a balanced representation of relevant labels across sets. Hierarchical Event Descriptor (HED) tags will be used to annotate labels, which will be stored in a text table, and detailed metadata, including dataset origins and methods. This formatting process will ensure that data is ready for ML/DL models, allowing for efficient training and testing of algorithms while preserving data integrity and reusability.
|
|
@@ -14,15 +25,70 @@ The data in EEG-DaSh is formatted to facilitate machine learning (ML) and deep l
|
|
|
14
25
|
|
|
15
26
|
The data in EEG-DaSh is accessed through Python and MATLAB libraries specifically designed for this platform. These libraries will use objects compatible with deep learning data storage formats in each language, such as <i>Torchvision.dataset</i> in Python and <i>DataStore</i> in MATLAB. Users can dynamically fetch data from the EEG-DaSh server which is then cached locally.
|
|
16
27
|
|
|
17
|
-
###
|
|
28
|
+
### Install
|
|
29
|
+
Use your preferred Python environment manager with Python > 3.9 to install the package. Here we show example using Conda environment with Python 3.11.5:
|
|
30
|
+
* Create a new environment Python 3.11.5 -> `conda create --name eegdash python=3.11.5`
|
|
31
|
+
* Switch to the right environment -> `conda activate eegdash`
|
|
32
|
+
* Install dependencies (this is a temporary link that will be updated soon) -> `pip install -r https://raw.githubusercontent.com/sccn/EEG-Dash-Data/refs/heads/develop/requirements.txt`
|
|
33
|
+
* Install _eegdash_ package (this is a temporary link that will be updated soon) -> `pip install -i https://test.pypi.org/simple/ eegdash`
|
|
34
|
+
* Check installation. Start a Python session and type `from eegdash import EEGDash`
|
|
35
|
+
|
|
36
|
+
### Python data access
|
|
37
|
+
|
|
38
|
+
To create a local object for accessing the database, use the following code:
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from eegdash import EEGDash
|
|
42
|
+
EEGDashInstance = EEGDash()
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Once the object is instantiated, it can be utilized to search datasets. Providing an empty parameter will search the entire database and return all available datasets.
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
EEGDashInstance.find({})
|
|
49
|
+
```
|
|
50
|
+
A list of dataset is returned.
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
[{'schema_ref': 'eeg_signal',
|
|
54
|
+
'data_name': 'ds004745_sub-001_task-unnamed_eeg.set',
|
|
55
|
+
'dataset': 'ds004745',
|
|
56
|
+
'subject': '001',
|
|
57
|
+
'task': 'unnamed',
|
|
58
|
+
'session': '',
|
|
59
|
+
'run': '',
|
|
60
|
+
'modality': 'EEG',
|
|
61
|
+
'sampling_frequency': 1000,
|
|
62
|
+
'version_timestamp': 0,
|
|
63
|
+
'has_file': True,
|
|
64
|
+
'time_of_save': datetime.datetime(2024, 10, 25, 14, 11, 48, 843593, tzinfo=datetime.timezone.utc),
|
|
65
|
+
'time_of_removal': None}, ...
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Additionally, users can search for a specific dataset by specifying criteria.
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
EEGDashInstance.find({'task': 'FaceRecognition'})
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
After locating the desired dataset or data record, users can download it locally by executing the following command. This will return an xArray Python object.
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
XArrayData = EEGDashInstance.get({'task': 'FaceRecognition', 'subject': '019'})
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Optionally, this is how you may access the raw data for the first record. This will return an numpy array.
|
|
18
82
|
|
|
19
|
-
|
|
83
|
+
```python
|
|
84
|
+
npData = EEGDashInstance.get({'task': 'FaceRecognition', 'subject': '019'})[0].values
|
|
85
|
+
```
|
|
20
86
|
|
|
21
|
-
|
|
87
|
+
## Example use
|
|
22
88
|
|
|
23
|
-
|
|
89
|
+
This [example](tests/eegdash.ipynb) demonstrates the full workflow from data retrieval with `EEGDash` to model definition, data handling, and training in PyTorch.
|
|
24
90
|
|
|
25
|
-
## Education
|
|
91
|
+
## Education - Coming soon...
|
|
26
92
|
|
|
27
93
|
We organize workshops and educational events to foster cross-cultural education and student training, offering both online and in-person opportunities in collaboration with US and Israeli partners. There is no event planned for 2024. Events for 2025 will be advertised on the EEGLABNEWS mailing list so make sure to [subscribe](https://sccn.ucsd.edu/mailman/listinfo/eeglabnews).
|
|
28
94
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "eegdash"
|
|
7
|
-
version = "0.0.
|
|
7
|
+
version = "0.0.3"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name="Young Truong", email="dt.young112@gmail.com" },
|
|
10
10
|
{ name="Arnaud Delorme", email="adelorme@gmail.com" },
|
|
@@ -18,7 +18,10 @@ classifiers = [
|
|
|
18
18
|
"License :: OSI Approved :: MIT License",
|
|
19
19
|
"Operating System :: OS Independent",
|
|
20
20
|
]
|
|
21
|
-
|
|
21
|
+
dependencies = [
|
|
22
|
+
"signalstore"
|
|
23
|
+
]
|
|
22
24
|
[project.urls]
|
|
23
25
|
Homepage = "https://github.com/sccn/EEG-Dash-Data"
|
|
24
|
-
Issues = "https://github.com/sccn/EEG-Dash-Data/issues"
|
|
26
|
+
Issues = "https://github.com/sccn/EEG-Dash-Data/issues"
|
|
27
|
+
|
|
@@ -3,11 +3,104 @@ import sys
|
|
|
3
3
|
from joblib import Parallel, delayed
|
|
4
4
|
import mne
|
|
5
5
|
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
import re
|
|
8
9
|
import json
|
|
10
|
+
from mne.io import BaseRaw
|
|
11
|
+
from mne._fiff.utils import _find_channels, _read_segments_file
|
|
12
|
+
import s3fs
|
|
13
|
+
import tempfile
|
|
14
|
+
from mne._fiff.utils import _read_segments_file
|
|
9
15
|
|
|
10
|
-
|
|
16
|
+
class RawEEGDash(BaseRaw):
|
|
17
|
+
r"""Raw object from EEG-Dash connection with Openneuro S3 file.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
input_fname : path-like
|
|
22
|
+
Path to the S3 file
|
|
23
|
+
eog : list | tuple | 'auto'
|
|
24
|
+
Names or indices of channels that should be designated EOG channels.
|
|
25
|
+
If 'auto', the channel names containing ``EOG`` or ``EYE`` are used.
|
|
26
|
+
Defaults to empty tuple.
|
|
27
|
+
%(preload)s
|
|
28
|
+
Note that preload=False will be effective only if the data is stored
|
|
29
|
+
in a separate binary file.
|
|
30
|
+
%(uint16_codec)s
|
|
31
|
+
%(montage_units)s
|
|
32
|
+
%(verbose)s
|
|
33
|
+
|
|
34
|
+
See Also
|
|
35
|
+
--------
|
|
36
|
+
mne.io.Raw : Documentation of attributes and methods.
|
|
37
|
+
|
|
38
|
+
Notes
|
|
39
|
+
-----
|
|
40
|
+
.. versionadded:: 0.11.0
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
input_fname,
|
|
46
|
+
metadata,
|
|
47
|
+
eog=(),
|
|
48
|
+
preload=False,
|
|
49
|
+
*,
|
|
50
|
+
cache_dir='./.eegdash_cache',
|
|
51
|
+
uint16_codec=None,
|
|
52
|
+
montage_units="auto",
|
|
53
|
+
verbose=None,
|
|
54
|
+
):
|
|
55
|
+
'''
|
|
56
|
+
Get to work with S3 endpoint first, no caching
|
|
57
|
+
'''
|
|
58
|
+
# Create a simple RawArray
|
|
59
|
+
sfreq = metadata['sfreq'] # Sampling frequency
|
|
60
|
+
n_times = metadata['n_times']
|
|
61
|
+
ch_names = metadata['ch_names']
|
|
62
|
+
ch_types = []
|
|
63
|
+
for ch in metadata['ch_types']:
|
|
64
|
+
chtype = ch.lower()
|
|
65
|
+
if chtype == 'heog' or chtype == 'veog':
|
|
66
|
+
chtype = 'eog'
|
|
67
|
+
ch_types.append(chtype)
|
|
68
|
+
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
|
|
69
|
+
self.s3file = input_fname
|
|
70
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
71
|
+
self.filecache = os.path.join(cache_dir, os.path.basename(self.s3file))
|
|
72
|
+
|
|
73
|
+
if preload and not os.path.exists(self.filecache):
|
|
74
|
+
self._download_s3()
|
|
75
|
+
preload = self.filecache
|
|
76
|
+
|
|
77
|
+
super().__init__(
|
|
78
|
+
info,
|
|
79
|
+
preload,
|
|
80
|
+
last_samps=[n_times-1],
|
|
81
|
+
orig_format="single",
|
|
82
|
+
verbose=verbose,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def _download_s3(self):
|
|
86
|
+
filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
|
|
87
|
+
print('s3file', self.s3file)
|
|
88
|
+
print('filecache', self.filecache)
|
|
89
|
+
filesystem.download(self.s3file, self.filecache)
|
|
90
|
+
self.filenames = [self.filecache]
|
|
91
|
+
|
|
92
|
+
def _read_segment(
|
|
93
|
+
self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
|
|
94
|
+
):
|
|
95
|
+
if not os.path.exists(self.filecache): # not preload
|
|
96
|
+
self._download_s3()
|
|
97
|
+
else: # not preload and file is not cached
|
|
98
|
+
self.filenames = [self.filecache]
|
|
99
|
+
return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
|
|
100
|
+
|
|
101
|
+
def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
|
|
102
|
+
"""Read a chunk of data from the file."""
|
|
103
|
+
_read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
|
|
11
104
|
|
|
12
105
|
|
|
13
106
|
class BIDSDataset():
|
|
@@ -20,7 +113,7 @@ class BIDSDataset():
|
|
|
20
113
|
}
|
|
21
114
|
METADATA_FILE_EXTENSIONS = ['eeg.json', 'channels.tsv', 'electrodes.tsv', 'events.tsv', 'events.json']
|
|
22
115
|
def __init__(self,
|
|
23
|
-
data_dir=None, # location of
|
|
116
|
+
data_dir=None, # location of bids dataset
|
|
24
117
|
dataset='', # dataset name
|
|
25
118
|
raw_format='eeglab', # format of raw data
|
|
26
119
|
):
|
|
@@ -44,9 +137,25 @@ class BIDSDataset():
|
|
|
44
137
|
self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
|
|
45
138
|
|
|
46
139
|
def get_property_from_filename(self, property, filename):
|
|
47
|
-
|
|
140
|
+
import platform
|
|
141
|
+
if platform.system() == "Windows":
|
|
142
|
+
lookup = re.search(rf'{property}-(.*?)[_\\]', filename)
|
|
143
|
+
else:
|
|
144
|
+
lookup = re.search(rf'{property}-(.*?)[_\/]', filename)
|
|
48
145
|
return lookup.group(1) if lookup else ''
|
|
49
146
|
|
|
147
|
+
def merge_json_inheritance(self, json_files):
|
|
148
|
+
'''
|
|
149
|
+
Merge list of json files found by get_bids_file_inheritance,
|
|
150
|
+
expecting the order (from left to right) is from lowest level to highest level,
|
|
151
|
+
and return a merged dictionary
|
|
152
|
+
'''
|
|
153
|
+
json_files.reverse()
|
|
154
|
+
json_dict = {}
|
|
155
|
+
for f in json_files:
|
|
156
|
+
json_dict.update(json.load(open(f)))
|
|
157
|
+
return json_dict
|
|
158
|
+
|
|
50
159
|
def get_bids_file_inheritance(self, path, basename, extension):
|
|
51
160
|
'''
|
|
52
161
|
Get all files with given extension that applies to the basename file
|
|
@@ -68,7 +177,7 @@ class BIDSDataset():
|
|
|
68
177
|
for file in os.listdir(path):
|
|
69
178
|
# target_file = path / f"{cur_file_basename}_{extension}"
|
|
70
179
|
if os.path.isfile(path/file):
|
|
71
|
-
cur_file_basename = file[:file.rfind('_')]
|
|
180
|
+
cur_file_basename = file[:file.rfind('_')] # TODO: change to just search for any file with extension
|
|
72
181
|
if file.endswith(extension) and cur_file_basename in basename:
|
|
73
182
|
filepath = path / file
|
|
74
183
|
bids_files.append(filepath)
|
|
@@ -210,4 +319,21 @@ class BIDSDataset():
|
|
|
210
319
|
return self.get_property_from_filename('run', data_filepath)
|
|
211
320
|
|
|
212
321
|
def subject(self, data_filepath):
|
|
213
|
-
return self.get_property_from_filename('sub', data_filepath)
|
|
322
|
+
return self.get_property_from_filename('sub', data_filepath)
|
|
323
|
+
|
|
324
|
+
def num_channels(self, data_filepath):
|
|
325
|
+
channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
|
|
326
|
+
return len(channels_tsv)
|
|
327
|
+
|
|
328
|
+
def channel_labels(self, data_filepath):
|
|
329
|
+
channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
|
|
330
|
+
return channels_tsv['name'].tolist()
|
|
331
|
+
|
|
332
|
+
def channel_types(self, data_filepath):
|
|
333
|
+
channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
|
|
334
|
+
return channels_tsv['type'].tolist()
|
|
335
|
+
|
|
336
|
+
def num_times(self, data_filepath):
|
|
337
|
+
eeg_jsons = self.get_bids_metadata_files(data_filepath, 'eeg.json')
|
|
338
|
+
eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
|
|
339
|
+
return int(eeg_json_dict['SamplingFrequency'] * eeg_json_dict['RecordingDuration'])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>
|
|
6
6
|
License: GNU General Public License
|
|
@@ -32,12 +32,24 @@ Classifier: Operating System :: OS Independent
|
|
|
32
32
|
Requires-Python: >=3.8
|
|
33
33
|
Description-Content-Type: text/markdown
|
|
34
34
|
License-File: LICENSE
|
|
35
|
+
Requires-Dist: signalstore
|
|
35
36
|
|
|
36
37
|
# EEG-Dash
|
|
37
38
|
To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
|
|
38
39
|
|
|
39
40
|
## Data source
|
|
40
|
-
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will also incorporate data converted from NEMAR, which includes
|
|
41
|
+
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will also incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
|
|
42
|
+
|
|
43
|
+
## Datasets available
|
|
44
|
+
|
|
45
|
+
There are currently only two datasets made available for testing purposes.
|
|
46
|
+
|
|
47
|
+
| Dataset ID | Description | Participants | Channels | Task | NEMAR Link |
|
|
48
|
+
|------------|---------------------------------------------------------------------------------------------|--------------|-----------------|-----------------|------------------------------------------------------------------------------------------------|
|
|
49
|
+
| ds002718 | EEG dataset focused on face processing with MRI for source localization | 18 | 70 EEG, 2 EOG | FaceRecognition | [NEMAR ds002718](https://nemar.org/dataexplorer/detail?dataset_id=ds002718) |
|
|
50
|
+
| ds004745 | 8-Channel SSVEP EEG dataset with trials including voluntary movements to introduce artifacts | 6 | 8 EEG | SSVEP tasks | [NEMAR ds004745](https://nemar.org/dataexplorer/detail?dataset_id=ds004745) |
|
|
51
|
+
|
|
52
|
+
|
|
41
53
|
|
|
42
54
|
## Data formatting
|
|
43
55
|
The data in EEG-DaSh is formatted to facilitate machine learning (ML) and deep learning (DL) applications by using a simplified structure commonly adopted by these communities. This will involve converting raw MEEG data into a matrix format, where samples (e.g., individual EEG or MEG recordings) are represented by rows, and values (such as time or channel data) are represented by columns. The data is also divided into training and testing sets, with 80% of the data allocated for training and 20% for testing, ensuring a balanced representation of relevant labels across sets. Hierarchical Event Descriptor (HED) tags will be used to annotate labels, which will be stored in a text table, and detailed metadata, including dataset origins and methods. This formatting process will ensure that data is ready for ML/DL models, allowing for efficient training and testing of algorithms while preserving data integrity and reusability.
|
|
@@ -49,15 +61,70 @@ The data in EEG-DaSh is formatted to facilitate machine learning (ML) and deep l
|
|
|
49
61
|
|
|
50
62
|
The data in EEG-DaSh is accessed through Python and MATLAB libraries specifically designed for this platform. These libraries will use objects compatible with deep learning data storage formats in each language, such as <i>Torchvision.dataset</i> in Python and <i>DataStore</i> in MATLAB. Users can dynamically fetch data from the EEG-DaSh server which is then cached locally.
|
|
51
63
|
|
|
52
|
-
###
|
|
64
|
+
### Install
|
|
65
|
+
Use your preferred Python environment manager with Python > 3.9 to install the package. Here we show example using Conda environment with Python 3.11.5:
|
|
66
|
+
* Create a new environment Python 3.11.5 -> `conda create --name eegdash python=3.11.5`
|
|
67
|
+
* Switch to the right environment -> `conda activate eegdash`
|
|
68
|
+
* Install dependencies (this is a temporary link that will be updated soon) -> `pip install -r https://raw.githubusercontent.com/sccn/EEG-Dash-Data/refs/heads/develop/requirements.txt`
|
|
69
|
+
* Install _eegdash_ package (this is a temporary link that will be updated soon) -> `pip install -i https://test.pypi.org/simple/ eegdash`
|
|
70
|
+
* Check installation. Start a Python session and type `from eegdash import EEGDash`
|
|
71
|
+
|
|
72
|
+
### Python data access
|
|
73
|
+
|
|
74
|
+
To create a local object for accessing the database, use the following code:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from eegdash import EEGDash
|
|
78
|
+
EEGDashInstance = EEGDash()
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Once the object is instantiated, it can be utilized to search datasets. Providing an empty parameter will search the entire database and return all available datasets.
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
EEGDashInstance.find({})
|
|
85
|
+
```
|
|
86
|
+
A list of dataset is returned.
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
[{'schema_ref': 'eeg_signal',
|
|
90
|
+
'data_name': 'ds004745_sub-001_task-unnamed_eeg.set',
|
|
91
|
+
'dataset': 'ds004745',
|
|
92
|
+
'subject': '001',
|
|
93
|
+
'task': 'unnamed',
|
|
94
|
+
'session': '',
|
|
95
|
+
'run': '',
|
|
96
|
+
'modality': 'EEG',
|
|
97
|
+
'sampling_frequency': 1000,
|
|
98
|
+
'version_timestamp': 0,
|
|
99
|
+
'has_file': True,
|
|
100
|
+
'time_of_save': datetime.datetime(2024, 10, 25, 14, 11, 48, 843593, tzinfo=datetime.timezone.utc),
|
|
101
|
+
'time_of_removal': None}, ...
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Additionally, users can search for a specific dataset by specifying criteria.
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
EEGDashInstance.find({'task': 'FaceRecognition'})
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
After locating the desired dataset or data record, users can download it locally by executing the following command. This will return an xArray Python object.
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
XArrayData = EEGDashInstance.get({'task': 'FaceRecognition', 'subject': '019'})
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Optionally, this is how you may access the raw data for the first record. This will return an numpy array.
|
|
53
118
|
|
|
54
|
-
|
|
119
|
+
```python
|
|
120
|
+
npData = EEGDashInstance.get({'task': 'FaceRecognition', 'subject': '019'})[0].values
|
|
121
|
+
```
|
|
55
122
|
|
|
56
|
-
|
|
123
|
+
## Example use
|
|
57
124
|
|
|
58
|
-
|
|
125
|
+
This [example](tests/eegdash.ipynb) demonstrates the full workflow from data retrieval with `EEGDash` to model definition, data handling, and training in PyTorch.
|
|
59
126
|
|
|
60
|
-
## Education
|
|
127
|
+
## Education - Coming soon...
|
|
61
128
|
|
|
62
129
|
We organize workshops and educational events to foster cross-cultural education and student training, offering both online and in-person opportunities in collaboration with US and Israeli partners. There is no event planned for 2024. Events for 2025 will be advertised on the EEGLABNEWS mailing list so make sure to [subscribe](https://sccn.ucsd.edu/mailman/listinfo/eeglabnews).
|
|
63
130
|
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/__init__.py
|
|
5
|
+
src/data_utils.py
|
|
6
|
+
src/main.py
|
|
7
|
+
src/eegdash.egg-info/PKG-INFO
|
|
8
|
+
src/eegdash.egg-info/SOURCES.txt
|
|
9
|
+
src/eegdash.egg-info/dependency_links.txt
|
|
10
|
+
src/eegdash.egg-info/requires.txt
|
|
11
|
+
src/eegdash.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
signalstore
|