eegdash 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/data_utils.py +5 -1
- eegdash/{aws_ingest.py → script.py} +8 -12
- eegdash/signalstore_data_utils.py +352 -2
- {eegdash-0.0.1.dist-info → eegdash-0.0.2.dist-info}/METADATA +75 -8
- eegdash-0.0.2.dist-info/RECORD +10 -0
- {eegdash-0.0.1.dist-info → eegdash-0.0.2.dist-info}/WHEEL +1 -1
- eegdash/SignalStore/__init__.py +0 -0
- eegdash/SignalStore/signalstore/__init__.py +0 -3
- eegdash/SignalStore/signalstore/adapters/read_adapters/abstract_read_adapter.py +0 -13
- eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/schema_read_adapter.py +0 -16
- eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/vocabulary_read_adapter.py +0 -19
- eegdash/SignalStore/signalstore/adapters/read_adapters/handmade_records/excel_study_organizer_read_adapter.py +0 -114
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/axona/axona_read_adapter.py +0 -912
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/ReadIntanSpikeFile.py +0 -140
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/intan_read_adapter.py +0 -29
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/__init__.py +0 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/data_to_result.py +0 -62
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/get_bytes_per_data_block.py +0 -36
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/notch_filter.py +0 -50
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/qstring.py +0 -41
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_header.py +0 -135
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_one_data_block.py +0 -45
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/load_intan_rhd_format.py +0 -204
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/__init__.py +0 -0
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/data_to_result.py +0 -60
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/get_bytes_per_data_block.py +0 -37
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/notch_filter.py +0 -50
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/qstring.py +0 -41
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_header.py +0 -153
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_one_data_block.py +0 -47
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/load_intan_rhs_format.py +0 -213
- eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/neurodata_without_borders/neurodata_without_borders_read_adapter.py +0 -14
- eegdash/SignalStore/signalstore/operations/__init__.py +0 -4
- eegdash/SignalStore/signalstore/operations/handler_executor.py +0 -22
- eegdash/SignalStore/signalstore/operations/handler_factory.py +0 -41
- eegdash/SignalStore/signalstore/operations/handlers/base_handler.py +0 -44
- eegdash/SignalStore/signalstore/operations/handlers/domain/property_model_handlers.py +0 -79
- eegdash/SignalStore/signalstore/operations/handlers/domain/schema_handlers.py +0 -3
- eegdash/SignalStore/signalstore/operations/helpers/abstract_helper.py +0 -17
- eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_extractor.py +0 -33
- eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_rawio.py +0 -165
- eegdash/SignalStore/signalstore/operations/helpers/spikeinterface_helper.py +0 -100
- eegdash/SignalStore/signalstore/operations/helpers/wrappers/neo_wrappers.py +0 -21
- eegdash/SignalStore/signalstore/operations/helpers/wrappers/nwb_wrappers.py +0 -27
- eegdash/SignalStore/signalstore/store/__init__.py +0 -8
- eegdash/SignalStore/signalstore/store/data_access_objects.py +0 -1181
- eegdash/SignalStore/signalstore/store/datafile_adapters.py +0 -131
- eegdash/SignalStore/signalstore/store/repositories.py +0 -928
- eegdash/SignalStore/signalstore/store/store_errors.py +0 -68
- eegdash/SignalStore/signalstore/store/unit_of_work.py +0 -97
- eegdash/SignalStore/signalstore/store/unit_of_work_provider.py +0 -67
- eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_recording.py +0 -1
- eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_sorter.py +0 -1
- eegdash/SignalStore/signalstore/utilities/testing/data_mocks.py +0 -513
- eegdash/SignalStore/signalstore/utilities/tools/dataarrays.py +0 -49
- eegdash/SignalStore/signalstore/utilities/tools/mongo_records.py +0 -25
- eegdash/SignalStore/signalstore/utilities/tools/operation_response.py +0 -78
- eegdash/SignalStore/signalstore/utilities/tools/purge_orchestration_response.py +0 -21
- eegdash/SignalStore/signalstore/utilities/tools/quantities.py +0 -15
- eegdash/SignalStore/signalstore/utilities/tools/strings.py +0 -38
- eegdash/SignalStore/signalstore/utilities/tools/time.py +0 -17
- eegdash/SignalStore/tests/conftest.py +0 -799
- eegdash/SignalStore/tests/data/valid_data/data_arrays/make_fake_data.py +0 -59
- eegdash/SignalStore/tests/unit/store/conftest.py +0 -0
- eegdash/SignalStore/tests/unit/store/test_data_access_objects.py +0 -1235
- eegdash/SignalStore/tests/unit/store/test_repositories.py +0 -1309
- eegdash/SignalStore/tests/unit/store/test_unit_of_work.py +0 -7
- eegdash/SignalStore/tests/unit/test_ci_cd.py +0 -8
- eegdash-0.0.1.dist-info/RECORD +0 -72
- {eegdash-0.0.1.dist-info → eegdash-0.0.2.dist-info}/LICENSE +0 -0
- {eegdash-0.0.1.dist-info → eegdash-0.0.2.dist-info}/top_level.txt +0 -0
eegdash/data_utils.py
CHANGED
|
@@ -44,7 +44,11 @@ class BIDSDataset():
|
|
|
44
44
|
self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
|
|
45
45
|
|
|
46
46
|
def get_property_from_filename(self, property, filename):
|
|
47
|
-
|
|
47
|
+
import platform
|
|
48
|
+
if platform.system() == "Windows":
|
|
49
|
+
lookup = re.search(rf'{property}-(.*?)[_\\]', filename)
|
|
50
|
+
else:
|
|
51
|
+
lookup = re.search(rf'{property}-(.*?)[_\/]', filename)
|
|
48
52
|
return lookup.group(1) if lookup else ''
|
|
49
53
|
|
|
50
54
|
def get_bids_file_inheritance(self, path, basename, extension):
|
|
@@ -1,15 +1,5 @@
|
|
|
1
|
-
import sys
|
|
2
|
-
sys.path.append('..')
|
|
3
1
|
import argparse
|
|
4
|
-
from
|
|
5
|
-
|
|
6
|
-
def add_bids_dataset(args):
|
|
7
|
-
signalstore_aws = SignalstoreBIDS(
|
|
8
|
-
dbconnectionstring='mongodb://23.21.113.214:27017/?directConnection=true&serverSelectionTimeoutMS=2000&appName=mongosh+2.2.1',
|
|
9
|
-
local_filesystem=False,
|
|
10
|
-
project_name='eegdash',
|
|
11
|
-
)
|
|
12
|
-
signalstore_aws.add_bids_dataset(dataset=args.dataset, data_dir=args.data, raw_format='eeglab')
|
|
2
|
+
from signalstore_data_utils import SignalstoreOpenneuro
|
|
13
3
|
|
|
14
4
|
def main():
|
|
15
5
|
# Create the parser
|
|
@@ -23,7 +13,13 @@ def main():
|
|
|
23
13
|
args = parser.parse_args()
|
|
24
14
|
print('Arguments:', args)
|
|
25
15
|
|
|
26
|
-
|
|
16
|
+
signalstore = SignalstoreOpenneuro(
|
|
17
|
+
is_public=False,
|
|
18
|
+
local_filesystem=False,
|
|
19
|
+
)
|
|
20
|
+
hbn_datasets = ['ds005505', 'ds005510', 'ds005514','ds005512','ds005511','ds005509','ds005508','ds005507','ds005506']
|
|
21
|
+
for ds in hbn_datasets:
|
|
22
|
+
signalstore.add_bids_dataset(dataset=ds, data_dir=f'/mnt/nemar/openneuro/{ds}', raw_format='eeglab')
|
|
27
23
|
|
|
28
24
|
if __name__ == "__main__":
|
|
29
25
|
main()
|
|
@@ -4,7 +4,7 @@ import re
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import xarray as xr
|
|
6
6
|
import os
|
|
7
|
-
from
|
|
7
|
+
from signalstore.store import UnitOfWorkProvider
|
|
8
8
|
# from mongomock import MongoClient
|
|
9
9
|
from pymongo.mongo_client import MongoClient
|
|
10
10
|
from pymongo.server_api import ServerApi
|
|
@@ -13,8 +13,323 @@ from fsspec.implementations.dirfs import DirFileSystem
|
|
|
13
13
|
import pandas as pd
|
|
14
14
|
import json
|
|
15
15
|
import s3fs
|
|
16
|
-
from
|
|
16
|
+
from signalstore.store.data_access_objects import FileSystemDAO
|
|
17
|
+
from .data_utils import BIDSDataset
|
|
18
|
+
import tempfile
|
|
19
|
+
import mne
|
|
20
|
+
from joblib import Parallel, delayed
|
|
21
|
+
|
|
22
|
+
class SignalstoreOpenneuro():
|
|
23
|
+
AWS_BUCKET = 'openneuro.org'
|
|
24
|
+
PROJECT_NAME = 'eegdash'
|
|
25
|
+
def __init__(self,
|
|
26
|
+
dbconnectionstring="mongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000&appName=mongosh+2.3.1",
|
|
27
|
+
is_public=False,
|
|
28
|
+
local_filesystem=True,
|
|
29
|
+
):
|
|
30
|
+
self.is_public = is_public
|
|
31
|
+
self.project_name = self.PROJECT_NAME
|
|
32
|
+
if is_public:
|
|
33
|
+
dbconnectionstring='mongodb+srv://eegdash-user:mdzoMjQcHWTVnKDq@cluster0.vz35p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0',
|
|
34
|
+
else:
|
|
35
|
+
load_dotenv()
|
|
36
|
+
dbconnectionstring = os.getenv('DB_CONNECTION_STRING')
|
|
37
|
+
|
|
38
|
+
# Create a new client and connect to the server
|
|
39
|
+
client = MongoClient(dbconnectionstring, server_api=ServerApi('1'))
|
|
40
|
+
# Send a ping to confirm a successful connection
|
|
41
|
+
try:
|
|
42
|
+
client.admin.command('ping')
|
|
43
|
+
print("Pinged your deployment. You successfully connected to MongoDB!")
|
|
44
|
+
except Exception as e:
|
|
45
|
+
print(e)
|
|
46
|
+
|
|
47
|
+
memory_store = {}
|
|
48
|
+
self.filesystem = self.set_up_filesystem(is_local=local_filesystem)
|
|
49
|
+
self.uow_provider = UnitOfWorkProvider(
|
|
50
|
+
mongo_client=client,
|
|
51
|
+
filesystem=self.filesystem,
|
|
52
|
+
memory_store=memory_store,
|
|
53
|
+
default_filetype='zarr'
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
self.uow = self.uow_provider(self.PROJECT_NAME)
|
|
57
|
+
self.load_domain_models()
|
|
58
|
+
|
|
59
|
+
def set_up_filesystem(self, is_local=True):
|
|
60
|
+
if is_local:
|
|
61
|
+
cache_path='/mnt/nemar/dtyoung/eeg-dash-data' # path where signalstore netCDF files are stored
|
|
62
|
+
# Create a directory for the dataset
|
|
63
|
+
store_path = Path(cache_path)
|
|
64
|
+
if not os.path.exists(store_path):
|
|
65
|
+
os.makedirs(store_path)
|
|
66
|
+
|
|
67
|
+
filesystem = LocalFileSystem()
|
|
68
|
+
tmp_dir_fs = DirFileSystem(
|
|
69
|
+
store_path,
|
|
70
|
+
filesystem=filesystem
|
|
71
|
+
)
|
|
72
|
+
return tmp_dir_fs
|
|
73
|
+
else:
|
|
74
|
+
s3 = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
|
|
75
|
+
return s3
|
|
76
|
+
|
|
77
|
+
def load_domain_models(self):
|
|
78
|
+
dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
79
|
+
cwd = Path(dir_path)
|
|
80
|
+
domain_models_path = cwd / f"DomainModels/{self.project_name}/data_models.json"
|
|
81
|
+
metamodel_path = cwd / f"DomainModels/{self.project_name}/metamodels.json"
|
|
82
|
+
property_path = cwd / f"DomainModels/{self.project_name}/property_models.json"
|
|
83
|
+
with open(metamodel_path) as f:
|
|
84
|
+
metamodels = json.load(f)
|
|
85
|
+
|
|
86
|
+
with open(property_path) as f:
|
|
87
|
+
property_models = json.load(f)
|
|
88
|
+
|
|
89
|
+
# load domain models json file
|
|
90
|
+
with open(domain_models_path) as f:
|
|
91
|
+
domain_models = json.load(f)
|
|
92
|
+
|
|
93
|
+
with self.uow as uow:
|
|
94
|
+
for property_model in property_models:
|
|
95
|
+
if not uow.domain_models.exists(property_model['schema_name']):
|
|
96
|
+
uow.domain_models.add(property_model)
|
|
97
|
+
model = uow.domain_models.get(property_model['schema_name'])
|
|
98
|
+
print('property model: ', model['schema_name'])
|
|
99
|
+
for metamodel in metamodels:
|
|
100
|
+
if not uow.domain_models.exists(metamodel['schema_name']):
|
|
101
|
+
uow.domain_models.add(metamodel)
|
|
102
|
+
model = uow.domain_models.get(metamodel['schema_name'])
|
|
103
|
+
print('meta model: ', model['schema_name'])
|
|
104
|
+
for domain_model in domain_models:
|
|
105
|
+
if not uow.domain_models.exists(domain_model['schema_name']):
|
|
106
|
+
uow.domain_models.add(domain_model)
|
|
107
|
+
model = uow.domain_models.get(domain_model['schema_name'])
|
|
108
|
+
print('domain model: ', model['schema_name'])
|
|
109
|
+
uow.commit()
|
|
110
|
+
|
|
111
|
+
def extract_attribute(self, pattern, filename):
|
|
112
|
+
match = re.search(pattern, filename)
|
|
113
|
+
return match.group(1) if match else None
|
|
114
|
+
|
|
115
|
+
def load_eeg_attrs_from_bids_file(self, bids_dataset: BIDSDataset, bids_file):
|
|
116
|
+
'''
|
|
117
|
+
bids_file must be a file of the bids_dataset
|
|
118
|
+
'''
|
|
119
|
+
if bids_file not in bids_dataset.files:
|
|
120
|
+
raise ValueError(f'{bids_file} not in {bids_dataset.dataset}')
|
|
121
|
+
f = os.path.basename(bids_file)
|
|
122
|
+
dsnumber = bids_dataset.dataset
|
|
123
|
+
# extract openneuro path by finding the first occurrence of the dataset name in the filename and remove the path before that
|
|
124
|
+
openneuro_path = dsnumber + bids_file.split(dsnumber)[1]
|
|
125
|
+
|
|
126
|
+
attrs = {
|
|
127
|
+
'schema_ref': 'eeg_signal',
|
|
128
|
+
'data_name': f'{bids_dataset.dataset}_{f}',
|
|
129
|
+
'dataset': bids_dataset.dataset,
|
|
130
|
+
'bidspath': openneuro_path,
|
|
131
|
+
'subject': bids_dataset.subject(bids_file),
|
|
132
|
+
'task': bids_dataset.task(bids_file),
|
|
133
|
+
'session': bids_dataset.session(bids_file),
|
|
134
|
+
'run': bids_dataset.run(bids_file),
|
|
135
|
+
'sampling_frequency': bids_dataset.sfreq(bids_file),
|
|
136
|
+
'modality': 'EEG',
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return attrs
|
|
140
|
+
|
|
141
|
+
def load_eeg_data_from_s3(self, s3path):
|
|
142
|
+
# import boto3
|
|
143
|
+
# import scipy.io
|
|
144
|
+
# import io
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# # Initialize the S3 client
|
|
148
|
+
# s3 = boto3.client('s3')
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
# # S3 bucket and object key
|
|
152
|
+
# bucket_name = 'your-bucket-name'
|
|
153
|
+
# object_key = 'path/to/your/file.mat'
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# # Get the object from S3 and stream it into memory
|
|
157
|
+
# response = s3.get_object(Bucket=bucket_name, Key=object_key)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# # Read the content into a BytesIO buffer
|
|
161
|
+
# mat_file_stream = io.BytesIO(response['Body'].read())
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
# # Load the MAT file using scipy.io.loadmat
|
|
165
|
+
# data = scipy.io.loadmat(mat_file_stream)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# # Work with the data
|
|
169
|
+
# print(data)
|
|
170
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.set') as tmp:
|
|
171
|
+
with self.filesystem.open(s3path) as s3_file:
|
|
172
|
+
tmp.write(s3_file.read())
|
|
173
|
+
tmp_path = tmp.name
|
|
174
|
+
eeg_data = self.load_eeg_data_from_bids_file(tmp_path)
|
|
175
|
+
os.unlink(tmp_path)
|
|
176
|
+
return eeg_data
|
|
177
|
+
|
|
178
|
+
def load_eeg_data_from_bids_file(self, bids_file, eeg_attrs=None):
|
|
179
|
+
'''
|
|
180
|
+
bids_file must be a file of the bids_dataset
|
|
181
|
+
'''
|
|
182
|
+
EEG = mne.io.read_raw_eeglab(bids_file)
|
|
183
|
+
eeg_data = EEG.get_data()
|
|
184
|
+
|
|
185
|
+
fs = EEG.info['sfreq']
|
|
186
|
+
max_time = eeg_data.shape[1] / fs
|
|
187
|
+
time_steps = np.linspace(0, max_time, eeg_data.shape[1]).squeeze() # in seconds
|
|
188
|
+
|
|
189
|
+
channel_names = EEG.ch_names
|
|
190
|
+
|
|
191
|
+
eeg_xarray = xr.DataArray(
|
|
192
|
+
data=eeg_data,
|
|
193
|
+
dims=['channel','time'],
|
|
194
|
+
coords={
|
|
195
|
+
'time': time_steps,
|
|
196
|
+
'channel': channel_names
|
|
197
|
+
},
|
|
198
|
+
# attrs=attrs
|
|
199
|
+
)
|
|
200
|
+
return eeg_xarray
|
|
201
|
+
|
|
202
|
+
def exist(self, schema_ref='eeg_signal', data_name=''):
|
|
203
|
+
with self.uow as uow:
|
|
204
|
+
query = {
|
|
205
|
+
"schema_ref": schema_ref,
|
|
206
|
+
"data_name": data_name
|
|
207
|
+
}
|
|
208
|
+
sessions = uow.data.find(query)
|
|
209
|
+
if len(sessions) > 0:
|
|
210
|
+
return True
|
|
211
|
+
else:
|
|
212
|
+
return False
|
|
213
|
+
|
|
214
|
+
def add_bids_dataset(self, dataset, data_dir, raw_format='eeglab', overwrite=False):
|
|
215
|
+
'''
|
|
216
|
+
Create new records for the dataset in the MongoDB database if not found
|
|
217
|
+
'''
|
|
218
|
+
if self.is_public:
|
|
219
|
+
raise ValueError('This operation is not allowed for public users')
|
|
220
|
+
|
|
221
|
+
bids_dataset = BIDSDataset(
|
|
222
|
+
data_dir=data_dir,
|
|
223
|
+
dataset=dataset,
|
|
224
|
+
raw_format=raw_format,
|
|
225
|
+
)
|
|
226
|
+
for bids_file in bids_dataset.get_files():
|
|
227
|
+
print('bids raw file', bids_file)
|
|
228
|
+
|
|
229
|
+
signalstore_data_id = f"{dataset}_{os.path.basename(bids_file)}"
|
|
230
|
+
if overwrite:
|
|
231
|
+
self.remove(signalstore_data_id)
|
|
232
|
+
|
|
233
|
+
if self.exist(data_name=signalstore_data_id):
|
|
234
|
+
print('data already exist. skipped')
|
|
235
|
+
continue
|
|
236
|
+
else:
|
|
237
|
+
eeg_attrs = self.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
|
|
238
|
+
with self.uow as uow:
|
|
239
|
+
# Assume raw data already exists on Openneuro, recreating record only
|
|
240
|
+
eeg_attrs['has_file'] = True
|
|
241
|
+
print('adding record', eeg_attrs['data_name'])
|
|
242
|
+
uow.data.add(eeg_attrs)
|
|
243
|
+
uow.commit()
|
|
244
|
+
|
|
245
|
+
def update_bids_dataset(self, dataset, data_dir, raw_format='eeglab'):
|
|
246
|
+
'''
|
|
247
|
+
Create new records for the dataset in the MongoDB database if not found
|
|
248
|
+
'''
|
|
249
|
+
if self.is_public:
|
|
250
|
+
raise ValueError('This operation is not allowed for public users')
|
|
251
|
+
|
|
252
|
+
bids_dataset = BIDSDataset(
|
|
253
|
+
data_dir=data_dir,
|
|
254
|
+
dataset=dataset,
|
|
255
|
+
raw_format=raw_format,
|
|
256
|
+
)
|
|
257
|
+
for bids_file in bids_dataset.get_files():
|
|
258
|
+
print('bids raw file', bids_file)
|
|
259
|
+
|
|
260
|
+
signalstore_data_id = f"{dataset}_{os.path.basename(bids_file)}"
|
|
261
|
+
|
|
262
|
+
if not self.exist(data_name=signalstore_data_id):
|
|
263
|
+
raise ValueError('data not found')
|
|
264
|
+
else:
|
|
265
|
+
self.remove(data_name=signalstore_data_id)
|
|
266
|
+
|
|
267
|
+
eeg_attrs = self.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
|
|
268
|
+
with self.uow as uow:
|
|
269
|
+
# Assume raw data already exists on Openneuro, recreating record only
|
|
270
|
+
eeg_attrs['has_file'] = True
|
|
271
|
+
print('adding record', eeg_attrs['data_name'])
|
|
272
|
+
uow.data.add(eeg_attrs)
|
|
273
|
+
uow.commit()
|
|
274
|
+
|
|
275
|
+
def remove(self, schema_ref='eeg_signal', data_name=''):
|
|
276
|
+
if self.is_public:
|
|
277
|
+
raise ValueError('This operation is not allowed for public users')
|
|
278
|
+
|
|
279
|
+
print('Removing record', data_name)
|
|
280
|
+
with self.uow as uow:
|
|
281
|
+
sessions = uow.data.find({'schema_ref': schema_ref, 'data_name': data_name})
|
|
282
|
+
if len(sessions) > 0:
|
|
283
|
+
for session in sessions:
|
|
284
|
+
uow.data.remove(session['schema_ref'], session['data_name'])
|
|
285
|
+
uow.commit()
|
|
286
|
+
uow.purge()
|
|
287
|
+
assert len(uow.data.find({'schema_ref': schema_ref, 'data_name': data_name})) == 0, 'Data still exists'
|
|
17
288
|
|
|
289
|
+
def remove_all(self):
|
|
290
|
+
if self.is_public:
|
|
291
|
+
raise ValueError('This operation is not allowed for public users')
|
|
292
|
+
|
|
293
|
+
with self.uow as uow:
|
|
294
|
+
sessions = uow.data.find({})
|
|
295
|
+
print(len(sessions))
|
|
296
|
+
for session in range(len(sessions)):
|
|
297
|
+
uow.data.remove(session['schema_ref'], session['data_name'])
|
|
298
|
+
uow.commit()
|
|
299
|
+
|
|
300
|
+
uow.purge()
|
|
301
|
+
|
|
302
|
+
print('Verifying deletion job. Dataset length: ', len(uow.data.find({})))
|
|
303
|
+
|
|
304
|
+
def find(self, query:dict, validate=False):
|
|
305
|
+
'''
|
|
306
|
+
query: {
|
|
307
|
+
'dataset': 'dsxxxx',
|
|
308
|
+
|
|
309
|
+
}'''
|
|
310
|
+
with self.uow as uow:
|
|
311
|
+
sessions = uow.data.find(query, validate=validate)
|
|
312
|
+
if sessions:
|
|
313
|
+
print(f'Found {len(sessions)} records')
|
|
314
|
+
return sessions
|
|
315
|
+
else:
|
|
316
|
+
return []
|
|
317
|
+
|
|
318
|
+
def get(self, query:dict, validate=False):
|
|
319
|
+
'''
|
|
320
|
+
query: {
|
|
321
|
+
'dataset': 'dsxxxx',
|
|
322
|
+
|
|
323
|
+
}'''
|
|
324
|
+
with self.uow as uow:
|
|
325
|
+
sessions = uow.data.find(query, validate=validate)
|
|
326
|
+
results = []
|
|
327
|
+
if sessions:
|
|
328
|
+
print(f'Found {len(sessions)} records')
|
|
329
|
+
results = Parallel(n_jobs=-1, prefer="threads", verbose=1)(
|
|
330
|
+
delayed(self.load_eeg_data_from_s3)(Path(self.AWS_BUCKET) / session['bidspath']) for session in sessions
|
|
331
|
+
)
|
|
332
|
+
return results
|
|
18
333
|
|
|
19
334
|
class SignalstoreBIDS():
|
|
20
335
|
AWS_BUCKET = 'eegdash'
|
|
@@ -266,6 +581,41 @@ class SignalstoreBIDS():
|
|
|
266
581
|
else:
|
|
267
582
|
return []
|
|
268
583
|
|
|
584
|
+
class OpenneuroFileSystemDAO(FileSystemDAO):
|
|
585
|
+
def __init__(self):
|
|
586
|
+
filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
|
|
587
|
+
super().__init__(filesystem, project_dir='openneuro.org')
|
|
588
|
+
|
|
589
|
+
def get(self, schema_ref, data_name, version_timestamp=0, nth_most_recent=1, data_adapter=None):
|
|
590
|
+
"""Gets an object from the Openneuro S3 bucket.
|
|
591
|
+
Arguments:
|
|
592
|
+
schema_ref {str} -- The type of object to get.
|
|
593
|
+
data_name {str} -- The name of the object to get.
|
|
594
|
+
version_timestamp {str} -- The version_timestamp of the object to get.
|
|
595
|
+
Raises:
|
|
596
|
+
FileSystemDAOFileNotFoundError -- If the object is not found.
|
|
597
|
+
Returns:
|
|
598
|
+
dict -- The object.
|
|
599
|
+
"""
|
|
600
|
+
self._check_args(
|
|
601
|
+
schema_ref=schema_ref,
|
|
602
|
+
data_name=data_name,
|
|
603
|
+
nth_most_recent=nth_most_recent,
|
|
604
|
+
version_timestamp=version_timestamp,
|
|
605
|
+
data_adapter=data_adapter
|
|
606
|
+
)
|
|
607
|
+
if data_adapter is None:
|
|
608
|
+
data_adapter = self._default_data_adapter
|
|
609
|
+
else:
|
|
610
|
+
data_adapter.set_filesystem(self._fs)
|
|
611
|
+
path = self._get_file_path(schema_ref, data_name, version_timestamp, nth_most_recent, data_adapter)
|
|
612
|
+
if path is None:
|
|
613
|
+
return None
|
|
614
|
+
data_object = data_adapter.read_file(path)
|
|
615
|
+
data_object = self._deserialize(data_object)
|
|
616
|
+
return data_object
|
|
617
|
+
|
|
618
|
+
|
|
269
619
|
if __name__ == "__main__":
|
|
270
620
|
# sstore_hbn = SignalstoreHBN()
|
|
271
621
|
# sstore_hbn.add_data()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.2
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>
|
|
6
6
|
License: GNU General Public License
|
|
@@ -32,12 +32,24 @@ Classifier: Operating System :: OS Independent
|
|
|
32
32
|
Requires-Python: >=3.8
|
|
33
33
|
Description-Content-Type: text/markdown
|
|
34
34
|
License-File: LICENSE
|
|
35
|
+
Requires-Dist: signalstore
|
|
35
36
|
|
|
36
37
|
# EEG-Dash
|
|
37
38
|
To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
|
|
38
39
|
|
|
39
40
|
## Data source
|
|
40
|
-
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will also incorporate data converted from NEMAR, which includes
|
|
41
|
+
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will also incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
|
|
42
|
+
|
|
43
|
+
## Datasets available
|
|
44
|
+
|
|
45
|
+
There are currently only two datasets made available for testing purposes.
|
|
46
|
+
|
|
47
|
+
| Dataset ID | Description | Participants | Channels | Task | NEMAR Link |
|
|
48
|
+
|------------|---------------------------------------------------------------------------------------------|--------------|-----------------|-----------------|------------------------------------------------------------------------------------------------|
|
|
49
|
+
| ds002718 | EEG dataset focused on face processing with MRI for source localization | 18 | 70 EEG, 2 EOG | FaceRecognition | [NEMAR ds002718](https://nemar.org/dataexplorer/detail?dataset_id=ds002718) |
|
|
50
|
+
| ds004745 | 8-Channel SSVEP EEG dataset with trials including voluntary movements to introduce artifacts | 6 | 8 EEG | SSVEP tasks | [NEMAR ds004745](https://nemar.org/dataexplorer/detail?dataset_id=ds004745) |
|
|
51
|
+
|
|
52
|
+
|
|
41
53
|
|
|
42
54
|
## Data formatting
|
|
43
55
|
The data in EEG-DaSh is formatted to facilitate machine learning (ML) and deep learning (DL) applications by using a simplified structure commonly adopted by these communities. This will involve converting raw MEEG data into a matrix format, where samples (e.g., individual EEG or MEG recordings) are represented by rows, and values (such as time or channel data) are represented by columns. The data is also divided into training and testing sets, with 80% of the data allocated for training and 20% for testing, ensuring a balanced representation of relevant labels across sets. Hierarchical Event Descriptor (HED) tags will be used to annotate labels, which will be stored in a text table, and detailed metadata, including dataset origins and methods. This formatting process will ensure that data is ready for ML/DL models, allowing for efficient training and testing of algorithms while preserving data integrity and reusability.
|
|
@@ -49,15 +61,70 @@ The data in EEG-DaSh is formatted to facilitate machine learning (ML) and deep l
|
|
|
49
61
|
|
|
50
62
|
The data in EEG-DaSh is accessed through Python and MATLAB libraries specifically designed for this platform. These libraries will use objects compatible with deep learning data storage formats in each language, such as <i>Torchvision.dataset</i> in Python and <i>DataStore</i> in MATLAB. Users can dynamically fetch data from the EEG-DaSh server which is then cached locally.
|
|
51
63
|
|
|
52
|
-
###
|
|
64
|
+
### Install
|
|
65
|
+
Use your preferred Python environment manager with Python > 3.9 to install the package. Here we show example using Conda environment with Python 3.11.5:
|
|
66
|
+
* Create a new environment Python 3.11.5 -> `conda create --name eegdash python=3.11.5`
|
|
67
|
+
* Switch to the right environment -> `conda activate eegdash`
|
|
68
|
+
* Install dependencies (this is a temporary link that will be updated soon) -> `pip install -r https://raw.githubusercontent.com/sccn/EEG-Dash-Data/refs/heads/develop/requirements.txt`
|
|
69
|
+
* Install _eegdash_ package (this is a temporary link that will be updated soon) -> `pip install -i https://test.pypi.org/simple/ eegdash`
|
|
70
|
+
* Check installation. Start a Python session and type `from eegdash import EEGDash`
|
|
71
|
+
|
|
72
|
+
### Python data access
|
|
73
|
+
|
|
74
|
+
To create a local object for accessing the database, use the following code:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from eegdash import EEGDash
|
|
78
|
+
EEGDashInstance = EEGDash()
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Once the object is instantiated, it can be utilized to search datasets. Providing an empty parameter will search the entire database and return all available datasets.
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
EEGDashInstance.find({})
|
|
85
|
+
```
|
|
86
|
+
A list of dataset is returned.
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
[{'schema_ref': 'eeg_signal',
|
|
90
|
+
'data_name': 'ds004745_sub-001_task-unnamed_eeg.set',
|
|
91
|
+
'dataset': 'ds004745',
|
|
92
|
+
'subject': '001',
|
|
93
|
+
'task': 'unnamed',
|
|
94
|
+
'session': '',
|
|
95
|
+
'run': '',
|
|
96
|
+
'modality': 'EEG',
|
|
97
|
+
'sampling_frequency': 1000,
|
|
98
|
+
'version_timestamp': 0,
|
|
99
|
+
'has_file': True,
|
|
100
|
+
'time_of_save': datetime.datetime(2024, 10, 25, 14, 11, 48, 843593, tzinfo=datetime.timezone.utc),
|
|
101
|
+
'time_of_removal': None}, ...
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Additionally, users can search for a specific dataset by specifying criteria.
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
EEGDashInstance.find({'task': 'FaceRecognition'})
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
After locating the desired dataset or data record, users can download it locally by executing the following command:
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
EEGDashInstance.get({'task': 'FaceRecognition', 'subject': '019'})
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Optionally, this is how you may access the raw data for the first record.
|
|
53
118
|
|
|
54
|
-
|
|
119
|
+
```python
|
|
120
|
+
EEGDashInstance.get({'task': 'FaceRecognition', 'subject': '019'})[0].values
|
|
121
|
+
```
|
|
55
122
|
|
|
56
|
-
|
|
123
|
+
## Example use
|
|
57
124
|
|
|
58
|
-
|
|
125
|
+
This [example](tests/eegdash.ipynb) demonstrates the full workflow from data retrieval with `EEGDash` to model definition, data handling, and training in PyTorch.
|
|
59
126
|
|
|
60
|
-
## Education
|
|
127
|
+
## Education - Coming soon...
|
|
61
128
|
|
|
62
129
|
We organize workshops and educational events to foster cross-cultural education and student training, offering both online and in-person opportunities in collaboration with US and Israeli partners. There is no event planned for 2024. Events for 2025 will be advertised on the EEGLABNEWS mailing list so make sure to [subscribe](https://sccn.ucsd.edu/mailman/listinfo/eeglabnews).
|
|
63
130
|
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
eegdash/__init__.py,sha256=hgxE8COvPu3EV2Tq3GqtMk68fsd7bYvOs_0GO6rrzfk,32
|
|
2
|
+
eegdash/data_utils.py,sha256=CA4lC5MKSoxCp0uJWy_n2okGtTCof2svDzSGxHZcIo0,9080
|
|
3
|
+
eegdash/main.py,sha256=ANyrsVCvDiKNiQAmlQt9FcyOeCoD4Oe6Gq25LM2o38o,675
|
|
4
|
+
eegdash/script.py,sha256=IbxGybE9Bpx0fS9QEw2YMYkakARYsEFelH-xfzlPQxU,974
|
|
5
|
+
eegdash/signalstore_data_utils.py,sha256=g4nSYBIR5obhlKCC1erH4C_KrmuaGVu_JJpcC59yRMY,24198
|
|
6
|
+
eegdash-0.0.2.dist-info/LICENSE,sha256=Xafu48R-h_kyaNj2tuhfgdEv9_ovciktjUEgRRwMZ6w,812
|
|
7
|
+
eegdash-0.0.2.dist-info/METADATA,sha256=rreskaKqIRA0bBmHJOpwQpFLxTX9oX_YCZpk5yd1wBs,9335
|
|
8
|
+
eegdash-0.0.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
9
|
+
eegdash-0.0.2.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
|
|
10
|
+
eegdash-0.0.2.dist-info/RECORD,,
|
eegdash/SignalStore/__init__.py
DELETED
|
File without changes
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
|
|
3
|
-
class AbstractReadAdapter(ABC):
|
|
4
|
-
|
|
5
|
-
def __iter__(self):
|
|
6
|
-
return self.read().__iter__()
|
|
7
|
-
|
|
8
|
-
def __next__(self):
|
|
9
|
-
return self.read().__next__()
|
|
10
|
-
|
|
11
|
-
@abstractmethod
|
|
12
|
-
def read(self):
|
|
13
|
-
raise NotImplementedError('AbstractReadAdapter.read() not implemented.')
|
eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/schema_read_adapter.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
from signalstore.adapters.read_adapters.abstract_read_adapter import AbstractReadAdapter
|
|
2
|
-
import json
|
|
3
|
-
from upath import UPath
|
|
4
|
-
|
|
5
|
-
class SchemaReadAdapter(AbstractReadAdapter):
|
|
6
|
-
def __init__(self, directory):
|
|
7
|
-
self.dir = UPath(directory)
|
|
8
|
-
|
|
9
|
-
def read(self):
|
|
10
|
-
"""Reads JSON files that conform to the Neuroscikit data model schemata.
|
|
11
|
-
"""
|
|
12
|
-
for json_filepath in self.dir.glob('*.json'):
|
|
13
|
-
with open(json_filepath) as f:
|
|
14
|
-
yield dict(json.load(f))
|
|
15
|
-
|
|
16
|
-
|
eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/vocabulary_read_adapter.py
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
from signalstore.adapters.read_adapters.abstract_read_adapter import AbstractReadAdapter
|
|
2
|
-
|
|
3
|
-
import yaml
|
|
4
|
-
|
|
5
|
-
class VocabularyReadAdapter(AbstractReadAdapter):
|
|
6
|
-
def __init__(self, filepath):
|
|
7
|
-
self.filepath = filepath
|
|
8
|
-
|
|
9
|
-
def read(self):
|
|
10
|
-
"""Reads a YAML file and converts each data object into an xarray.DataArray with
|
|
11
|
-
the appropriate dimensions, coordinates and metadata attributes for the
|
|
12
|
-
Neuroscikit data model.
|
|
13
|
-
"""
|
|
14
|
-
with open(self.filepath) as f:
|
|
15
|
-
yaml_dict = yaml.load(f, Loader=yaml.FullLoader)
|
|
16
|
-
for key, value in yaml_dict.items():
|
|
17
|
-
record = {"name": key}
|
|
18
|
-
record.update(value)
|
|
19
|
-
yield record
|