eegdash 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

@@ -0,0 +1,65 @@
1
+ import logging
2
+
3
+ import mne
4
+ import numpy as np
5
+
6
+ from braindecode.preprocessing import (
7
+ Preprocessor,
8
+ )
9
+
10
+ logger = logging.getLogger("eegdash")
11
+
12
+
13
+ class hbn_ec_ec_reannotation(Preprocessor):
14
+ """Preprocessor to reannotate the raw data for eyes open and eyes closed events.
15
+
16
+ This processor is designed for HBN datasets.
17
+
18
+ """
19
+
20
+ def __init__(self):
21
+ super().__init__(fn=self.transform, apply_on_array=False)
22
+
23
+ def transform(self, raw):
24
+ """Reannotate the raw data to create new events for eyes open and eyes closed
25
+
26
+ This function modifies the raw MNE object by creating new events based on
27
+ the existing annotations for "instructed_toCloseEyes" and "instructed_toOpenEyes".
28
+ It generates new events every 2 seconds within specified time ranges after
29
+ the original events, and replaces the existing annotations with these new events.
30
+
31
+ Parameters
32
+ ----------
33
+ raw : mne.io.Raw
34
+ The raw MNE object containing EEG data and annotations.
35
+
36
+ """
37
+ events, event_id = mne.events_from_annotations(raw)
38
+
39
+ logger.info("Original events found with ids: %s", event_id)
40
+
41
+ # Create new events array for 2-second segments
42
+ new_events = []
43
+ sfreq = raw.info["sfreq"]
44
+ for event in events[events[:, 2] == event_id["instructed_toCloseEyes"]]:
45
+ # For each original event, create events every 2 seconds from 15s to 29s after
46
+ start_times = event[0] + np.arange(15, 29, 2) * sfreq
47
+ new_events.extend([[int(t), 0, 1] for t in start_times])
48
+
49
+ for event in events[events[:, 2] == event_id["instructed_toOpenEyes"]]:
50
+ # For each original event, create events every 2 seconds from 5s to 19s after
51
+ start_times = event[0] + np.arange(5, 19, 2) * sfreq
52
+ new_events.extend([[int(t), 0, 2] for t in start_times])
53
+
54
+ # replace events in raw
55
+ new_events = np.array(new_events)
56
+
57
+ annot_from_events = mne.annotations_from_events(
58
+ events=new_events,
59
+ event_desc={1: "eyes_closed", 2: "eyes_open"},
60
+ sfreq=raw.info["sfreq"],
61
+ )
62
+
63
+ raw.set_annotations(annot_from_events)
64
+
65
+ return raw
eegdash/utils.py ADDED
@@ -0,0 +1,11 @@
1
+ def __init__mongo_client():
2
+ from mne.utils import get_config, set_config
3
+
4
+ if get_config("EEGDASH_DB_URI") is None:
5
+ # Set the default MongoDB URI for EEGDash
6
+ # This is a placeholder and should be replaced with your actual MongoDB URI
7
+
8
+ set_config(
9
+ "EEGDASH_DB_URI",
10
+ "mongodb+srv://eegdash-user:mdzoMjQcHWTVnKDq@cluster0.vz35p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0",
11
+ )
@@ -1,14 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: EEG data for machine learning
5
- Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>
5
+ Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License: GNU General Public License
7
7
 
8
8
  Copyright (C) 2024-2025
9
9
 
10
10
  Young Truong, UCSD, dt.young112@gmail.com
11
11
  Arnaud Delorme, UCSD, adelorme@ucsd.edu
12
+ Bruno Aristimunha, b.aristimunha@gmail.com
12
13
 
13
14
  This program is free software; you can redistribute it and/or modify
14
15
  it under the terms of the GNU General Public License as published by
@@ -26,14 +27,27 @@ License: GNU General Public License
26
27
 
27
28
  Project-URL: Homepage, https://github.com/sccn/EEG-Dash-Data
28
29
  Project-URL: Issues, https://github.com/sccn/EEG-Dash-Data/issues
29
- Classifier: Programming Language :: Python :: 3
30
30
  Classifier: License :: OSI Approved :: MIT License
31
31
  Classifier: Operating System :: OS Independent
32
+ Classifier: Intended Audience :: Science/Research
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: Programming Language :: Python
35
+ Classifier: Topic :: Software Development
36
+ Classifier: Topic :: Scientific/Engineering
37
+ Classifier: Development Status :: 3 - Alpha
38
+ Classifier: Operating System :: Microsoft :: Windows
39
+ Classifier: Operating System :: POSIX
40
+ Classifier: Operating System :: Unix
41
+ Classifier: Operating System :: MacOS
42
+ Classifier: Programming Language :: Python :: 3
43
+ Classifier: Programming Language :: Python :: 3.10
44
+ Classifier: Programming Language :: Python :: 3.11
45
+ Classifier: Programming Language :: Python :: 3.12
32
46
  Requires-Python: >3.10
33
47
  Description-Content-Type: text/markdown
34
48
  License-File: LICENSE
35
- Requires-Dist: braindecode
36
- Requires-Dist: mne_bids
49
+ Requires-Dist: braindecode>=1.0
50
+ Requires-Dist: mne_bids>=0.16.0
37
51
  Requires-Dist: numba
38
52
  Requires-Dist: numpy
39
53
  Requires-Dist: pandas
@@ -44,13 +58,36 @@ Requires-Dist: s3fs
44
58
  Requires-Dist: scipy
45
59
  Requires-Dist: tqdm
46
60
  Requires-Dist: xarray
47
- Requires-Dist: pre-commit
61
+ Provides-Extra: tests
62
+ Requires-Dist: pytest; extra == "tests"
63
+ Requires-Dist: pytest-cov; extra == "tests"
64
+ Requires-Dist: codecov; extra == "tests"
65
+ Requires-Dist: pytest_cases; extra == "tests"
66
+ Provides-Extra: dev
67
+ Requires-Dist: pre-commit; extra == "dev"
68
+ Provides-Extra: docs
69
+ Requires-Dist: sphinx; extra == "docs"
70
+ Requires-Dist: sphinx_gallery; extra == "docs"
71
+ Requires-Dist: sphinx_rtd_theme; extra == "docs"
72
+ Requires-Dist: numpydoc; extra == "docs"
73
+ Provides-Extra: all
74
+ Requires-Dist: pytest; extra == "all"
75
+ Requires-Dist: pytest-cov; extra == "all"
76
+ Requires-Dist: codecov; extra == "all"
77
+ Requires-Dist: pytest_cases; extra == "all"
78
+ Requires-Dist: pre-commit; extra == "all"
79
+ Requires-Dist: sphinx; extra == "all"
80
+ Requires-Dist: sphinx_gallery; extra == "all"
81
+ Requires-Dist: sphinx_rtd_theme; extra == "all"
82
+ Requires-Dist: numpydoc; extra == "all"
48
83
  Dynamic: license-file
49
84
 
50
85
  # EEG-Dash
86
+
51
87
  To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
52
88
 
53
89
  ## Data source
90
+
54
91
  The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
55
92
 
56
93
  ## Featured data
@@ -70,9 +107,11 @@ The following HBN datasets are currently featured on EEGDash. Documentation abou
70
107
  A total of [246 other datasets](datasets.md) are also available through EEGDash.
71
108
 
72
109
  ## Data format
110
+
73
111
  EEGDash queries return a **Pytorch Dataset** formatted to facilitate machine learning (ML) and deep learning (DL) applications. PyTorch Datasets are the best format for EEGDash queries because they provide an efficient, scalable, and flexible structure for machine learning (ML) and deep learning (DL) applications. They allow seamless integration with PyTorch’s DataLoader, enabling efficient batching, shuffling, and parallel data loading, which is essential for training deep learning models on large EEG datasets.
74
112
 
75
113
  ## Data preprocessing
114
+
76
115
  EEGDash datasets are processed using the popular [BrainDecode](https://braindecode.org/stable/index.html) library. In fact, EEGDash datasets are BrainDecode datasets, which are themselves PyTorch datasets. This means that any preprocessing possible on BrainDecode datasets is also possible on EEGDash datasets. Refer to [BrainDecode](https://braindecode.org/stable/index.html) tutorials for guidance on preprocessing EEG data.
77
116
 
78
117
  ## EEG-Dash usage
@@ -125,3 +164,7 @@ EEG-DaSh is a collaborative initiative between the United States and Israel, sup
125
164
 
126
165
 
127
166
 
167
+ python3 -m pip install --upgrade build
168
+ python3 -m build
169
+ python3 -m pip install --upgrade twine
170
+ python3 -m twine upload --repository eegdash dist/*
@@ -0,0 +1,27 @@
1
+ eegdash/__init__.py,sha256=nMBZrB4bJs79rl9TZ_x-IVPNTHYJS6V55Pus9fErB5E,232
2
+ eegdash/api.py,sha256=1mbufz6qgujrY_9V2GWUP4Eqel_q4ns9XVrmbGcSJG0,25776
3
+ eegdash/data_config.py,sha256=OS6ERO-jHrnEOfMJUehY7ieABdsRw_qWzOKJ4pzSfqw,1323
4
+ eegdash/data_utils.py,sha256=mR0TtERYIefakGQ98jwAeeRVKSNDU9eBlUoH1AY9tnc,23663
5
+ eegdash/dataset.py,sha256=qXcE4JxxYj89VQ84sKmq7kGcunZqt1pp5wz7a62j_OQ,2460
6
+ eegdash/preprocessing.py,sha256=wvqAO8UgDoQQz7xjVykrl4V8AawS4tpKR4Vrr_9BovY,2230
7
+ eegdash/utils.py,sha256=ZxVW4ll5MaSZ_ht1L5p7YJxOtYi3b0547oa5W_jbH4A,450
8
+ eegdash/features/__init__.py,sha256=484CLxpPifc8ZQfeM8jWZLvtVKljCxn3qqlUCaq-Yxk,1284
9
+ eegdash/features/datasets.py,sha256=kU1DO70ArSIy-LF1hHD2NN4iT-kJrI0mVpSkyV_OSeI,18301
10
+ eegdash/features/decorators.py,sha256=v0qaJz_dcX703p1fvFYbAIXmwK3d8naYGlq7fRVKn_w,1313
11
+ eegdash/features/extractors.py,sha256=H7h6tP3dKoRcjDJpWWAo0ppmokCq5QlhqMcehYwYV9s,6845
12
+ eegdash/features/inspect.py,sha256=PmbWhx5H_WqpnorUpWONUSkUtaIHkZblRa_Xyk7Szyc,1569
13
+ eegdash/features/serialization.py,sha256=pNsTz0EeRPPYE-A61XK7UoMShI9YBEHQqC5STbzUU6A,2861
14
+ eegdash/features/utils.py,sha256=eM6DdyOpdVfNh7dSPykJ0WaTDtaGvkCQWAmW0G8v60Y,3784
15
+ eegdash/features/feature_bank/__init__.py,sha256=BKrM3aaggXrfey1yEjEBYaxOV5e3UK-o8oGeB30epOg,149
16
+ eegdash/features/feature_bank/complexity.py,sha256=Ds1GAXZ0LGM32xB4EZC2jbMljUBv0yicf2SkuyLvN5I,3183
17
+ eegdash/features/feature_bank/connectivity.py,sha256=bQ6KlxWm5GNpCS9ypLqBUr2L171Yq7wpBQT2tRQKTZ4,2159
18
+ eegdash/features/feature_bank/csp.py,sha256=YOzieLnOcqjvfrcjvg8R3S4SWuC1BqK5J5WXVNCCTc0,3304
19
+ eegdash/features/feature_bank/dimensionality.py,sha256=j_Ds71Y1AbV2uLFQj8EuXQ4kzofLBlQtPV5snMkF7i4,3965
20
+ eegdash/features/feature_bank/signal.py,sha256=3Tb8z9gX7iZipxQJ9DSyy30JfdmW58kgvimSyZX74p8,3404
21
+ eegdash/features/feature_bank/spectral.py,sha256=bNB7skusePs1gX7NOU6yRlw_Gr4UOCkO_ylkCgybzug,3319
22
+ eegdash/features/feature_bank/utils.py,sha256=DGh-Q7-XFIittP7iBBxvsJaZrlVvuY5mw-G7q6C-PCI,1237
23
+ eegdash-0.2.0.dist-info/licenses/LICENSE,sha256=KykUD4H3kw3HLz5bZ0kxMWwZotnk8rhkfCCerGyX2sk,855
24
+ eegdash-0.2.0.dist-info/METADATA,sha256=GhxMc7p2HvTZo9lZFjBX1tJ70VeMlMnaYBhvpqw0iG8,10220
25
+ eegdash-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
26
+ eegdash-0.2.0.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
27
+ eegdash-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -4,6 +4,7 @@ Copyright (C) 2024-2025
4
4
 
5
5
  Young Truong, UCSD, dt.young112@gmail.com
6
6
  Arnaud Delorme, UCSD, adelorme@ucsd.edu
7
+ Bruno Aristimunha, b.aristimunha@gmail.com
7
8
 
8
9
  This program is free software; you can redistribute it and/or modify
9
10
  it under the terms of the GNU General Public License as published by
eegdash/main.py DELETED
@@ -1,416 +0,0 @@
1
- import json
2
- import os
3
- import tempfile
4
- from collections import defaultdict
5
- from pathlib import Path
6
-
7
- import mne
8
- import numpy as np
9
- import pymongo
10
- import s3fs
11
- import xarray as xr
12
- from dotenv import load_dotenv
13
- from joblib import Parallel, delayed
14
- from pymongo import DeleteOne, InsertOne, MongoClient, UpdateOne
15
-
16
- from braindecode.datasets import BaseConcatDataset, BaseDataset
17
-
18
- from .data_config import config as data_config
19
- from .data_utils import EEGBIDSDataset, EEGDashBaseDataset, EEGDashBaseRaw
20
-
21
-
22
- class EEGDash:
23
- AWS_BUCKET = "s3://openneuro.org"
24
-
25
- def __init__(self, is_public=True):
26
- # Load config file
27
- # config_path = Path(__file__).parent / 'config.json'
28
- # with open(config_path, 'r') as f:
29
- # self.config = json.load(f)
30
-
31
- self.config = data_config
32
- if is_public:
33
- DB_CONNECTION_STRING = "mongodb+srv://eegdash-user:mdzoMjQcHWTVnKDq@cluster0.vz35p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
34
- else:
35
- load_dotenv()
36
- DB_CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING")
37
-
38
- self.__client = pymongo.MongoClient(DB_CONNECTION_STRING)
39
- self.__db = self.__client["eegdash"]
40
- self.__collection = self.__db["records"]
41
-
42
- self.is_public = is_public
43
- self.filesystem = s3fs.S3FileSystem(
44
- anon=True, client_kwargs={"region_name": "us-east-2"}
45
- )
46
-
47
- def find(self, *args):
48
- results = self.__collection.find(*args)
49
-
50
- # convert to list using get_item on each element
51
- return [result for result in results]
52
-
53
- def exist(self, query: dict):
54
- accepted_query_fields = ["data_name", "dataset"]
55
- assert all(field in accepted_query_fields for field in query.keys())
56
- sessions = self.find(query)
57
- return len(sessions) > 0
58
-
59
- def _validate_input(self, record: dict):
60
- input_types = {
61
- "data_name": str,
62
- "dataset": str,
63
- "bidspath": str,
64
- "subject": str,
65
- "task": str,
66
- "session": str,
67
- "run": str,
68
- "sampling_frequency": float,
69
- "modality": str,
70
- "nchans": int,
71
- "ntimes": int,
72
- "channel_types": list,
73
- "channel_names": list,
74
- }
75
- if "data_name" not in record:
76
- raise ValueError("Missing key: data_name")
77
- # check if args are in the keys and has correct type
78
- for key, value in record.items():
79
- if key not in input_types:
80
- raise ValueError(f"Invalid input: {key}")
81
- if not isinstance(value, input_types[key]):
82
- raise ValueError(f"Invalid input: {key}")
83
-
84
- return record
85
-
86
- def load_eeg_data_from_s3(self, s3path):
87
- with tempfile.NamedTemporaryFile(delete=False, suffix=".set") as tmp:
88
- with self.filesystem.open(s3path) as s3_file:
89
- tmp.write(s3_file.read())
90
- tmp_path = tmp.name
91
- eeg_data = self.load_eeg_data_from_bids_file(tmp_path)
92
- os.unlink(tmp_path)
93
- return eeg_data
94
-
95
- def load_eeg_data_from_bids_file(self, bids_file, eeg_attrs=None):
96
- """
97
- bids_file must be a file of the bids_dataset
98
- """
99
- EEG = mne.io.read_raw_eeglab(bids_file)
100
- eeg_data = EEG.get_data()
101
-
102
- fs = EEG.info["sfreq"]
103
- max_time = eeg_data.shape[1] / fs
104
- time_steps = np.linspace(0, max_time, eeg_data.shape[1]).squeeze() # in seconds
105
-
106
- channel_names = EEG.ch_names
107
-
108
- eeg_xarray = xr.DataArray(
109
- data=eeg_data,
110
- dims=["channel", "time"],
111
- coords={"time": time_steps, "channel": channel_names},
112
- # attrs=attrs
113
- )
114
- return eeg_xarray
115
-
116
- def get_raw_extensions(self, bids_file, bids_dataset: EEGBIDSDataset):
117
- bids_file = Path(bids_file)
118
- extensions = {
119
- ".set": [".set", ".fdt"], # eeglab
120
- ".edf": [".edf"], # european
121
- ".vhdr": [".eeg", ".vhdr", ".vmrk", ".dat", ".raw"], # brainvision
122
- ".bdf": [".bdf"], # biosemi
123
- }
124
- return [
125
- str(bids_dataset.get_relative_bidspath(bids_file.with_suffix(suffix)))
126
- for suffix in extensions[bids_file.suffix]
127
- if bids_file.with_suffix(suffix).exists()
128
- ]
129
-
130
- def load_eeg_attrs_from_bids_file(self, bids_dataset: EEGBIDSDataset, bids_file):
131
- """
132
- bids_file must be a file of the bids_dataset
133
- """
134
- if bids_file not in bids_dataset.files:
135
- raise ValueError(f"{bids_file} not in {bids_dataset.dataset}")
136
-
137
- # Initialize attrs with None values for all expected fields
138
- attrs = {field: None for field in self.config["attributes"].keys()}
139
-
140
- f = os.path.basename(bids_file)
141
- dsnumber = bids_dataset.dataset
142
- # extract openneuro path by finding the first occurrence of the dataset name in the filename and remove the path before that
143
- openneuro_path = dsnumber + bids_file.split(dsnumber)[1]
144
-
145
- # Update with actual values where available
146
- try:
147
- participants_tsv = bids_dataset.subject_participant_tsv(bids_file)
148
- except Exception as e:
149
- print(f"Error getting participants_tsv: {str(e)}")
150
- participants_tsv = None
151
-
152
- try:
153
- eeg_json = bids_dataset.eeg_json(bids_file)
154
- except Exception as e:
155
- print(f"Error getting eeg_json: {str(e)}")
156
- eeg_json = None
157
-
158
- bids_dependencies_files = self.config["bids_dependencies_files"]
159
- bidsdependencies = []
160
- for extension in bids_dependencies_files:
161
- try:
162
- dep_path = bids_dataset.get_bids_metadata_files(bids_file, extension)
163
- dep_path = [
164
- str(bids_dataset.get_relative_bidspath(dep)) for dep in dep_path
165
- ]
166
- bidsdependencies.extend(dep_path)
167
- except Exception as e:
168
- pass
169
-
170
- bidsdependencies.extend(self.get_raw_extensions(bids_file, bids_dataset))
171
-
172
- # Define field extraction functions with error handling
173
- field_extractors = {
174
- "data_name": lambda: f"{bids_dataset.dataset}_{f}",
175
- "dataset": lambda: bids_dataset.dataset,
176
- "bidspath": lambda: openneuro_path,
177
- "subject": lambda: bids_dataset.get_bids_file_attribute(
178
- "subject", bids_file
179
- ),
180
- "task": lambda: bids_dataset.get_bids_file_attribute("task", bids_file),
181
- "session": lambda: bids_dataset.get_bids_file_attribute(
182
- "session", bids_file
183
- ),
184
- "run": lambda: bids_dataset.get_bids_file_attribute("run", bids_file),
185
- "modality": lambda: bids_dataset.get_bids_file_attribute(
186
- "modality", bids_file
187
- ),
188
- "sampling_frequency": lambda: bids_dataset.get_bids_file_attribute(
189
- "sfreq", bids_file
190
- ),
191
- "nchans": lambda: bids_dataset.get_bids_file_attribute("nchans", bids_file),
192
- "ntimes": lambda: bids_dataset.get_bids_file_attribute("ntimes", bids_file),
193
- "participant_tsv": lambda: participants_tsv,
194
- "eeg_json": lambda: eeg_json,
195
- "bidsdependencies": lambda: bidsdependencies,
196
- }
197
-
198
- # Dynamically populate attrs with error handling
199
- for field, extractor in field_extractors.items():
200
- try:
201
- attrs[field] = extractor()
202
- except Exception as e:
203
- print(f"Error extracting {field}: {str(e)}")
204
- attrs[field] = None
205
-
206
- return attrs
207
-
208
- def add_bids_dataset(self, dataset, data_dir, overwrite=True):
209
- """
210
- Create new records for the dataset in the MongoDB database if not found
211
- """
212
- if self.is_public:
213
- raise ValueError("This operation is not allowed for public users")
214
-
215
- if not overwrite and self.exist({"dataset": dataset}):
216
- print(f"Dataset {dataset} already exists in the database")
217
- return
218
- try:
219
- bids_dataset = EEGBIDSDataset(
220
- data_dir=data_dir,
221
- dataset=dataset,
222
- )
223
- except Exception as e:
224
- print(f"Error creating bids dataset {dataset}: {str(e)}")
225
- raise e
226
- requests = []
227
- for bids_file in bids_dataset.get_files():
228
- try:
229
- data_id = f"{dataset}_{os.path.basename(bids_file)}"
230
-
231
- if self.exist({"data_name": data_id}):
232
- if overwrite:
233
- eeg_attrs = self.load_eeg_attrs_from_bids_file(
234
- bids_dataset, bids_file
235
- )
236
- requests.append(self.update_request(eeg_attrs))
237
- else:
238
- eeg_attrs = self.load_eeg_attrs_from_bids_file(
239
- bids_dataset, bids_file
240
- )
241
- requests.append(self.add_request(eeg_attrs))
242
- except:
243
- print("error adding record", bids_file)
244
-
245
- print("Number of database requests", len(requests))
246
-
247
- if requests:
248
- result = self.__collection.bulk_write(requests, ordered=False)
249
- print(f"Inserted: {result.inserted_count}")
250
- print(f"Modified: {result.modified_count}")
251
- print(f"Deleted: {result.deleted_count}")
252
- print(f"Upserted: {result.upserted_count}")
253
- print(f"Errors: {result.bulk_api_result.get('writeErrors', [])}")
254
-
255
- def get(self, query: dict):
256
- """
257
- query: {
258
- 'dataset': 'dsxxxx',
259
-
260
- }"""
261
- sessions = self.find(query)
262
- results = []
263
- if sessions:
264
- print(f"Found {len(sessions)} records")
265
- results = Parallel(
266
- n_jobs=-1 if len(sessions) > 1 else 1, prefer="threads", verbose=1
267
- )(
268
- delayed(self.load_eeg_data_from_s3)(self.get_s3path(session))
269
- for session in sessions
270
- )
271
- return results
272
-
273
- def add_request(self, record: dict):
274
- return InsertOne(record)
275
-
276
- def add(self, record: dict):
277
- try:
278
- # input_record = self._validate_input(record)
279
- self.__collection.insert_one(record)
280
- # silent failing
281
- except ValueError as e:
282
- print(f"Failed to validate record: {record['data_name']}")
283
- print(e)
284
- except:
285
- print(f"Error adding record: {record['data_name']}")
286
-
287
- def update_request(self, record: dict):
288
- return UpdateOne({"data_name": record["data_name"]}, {"$set": record})
289
-
290
- def update(self, record: dict):
291
- try:
292
- self.__collection.update_one(
293
- {"data_name": record["data_name"]}, {"$set": record}
294
- )
295
- except: # silent failure
296
- print(f"Error updating record {record['data_name']}")
297
-
298
- def remove_field(self, record, field):
299
- self.__collection.update_one(
300
- {"data_name": record["data_name"]}, {"$unset": {field: 1}}
301
- )
302
-
303
- def remove_field_from_db(self, field):
304
- self.__collection.update_many({}, {"$unset": {field: 1}})
305
-
306
- @property
307
- def collection(self):
308
- return self.__collection
309
-
310
-
311
- class EEGDashDataset(BaseConcatDataset):
312
- # CACHE_DIR = '.eegdash_cache'
313
- def __init__(
314
- self,
315
- query: dict = None,
316
- data_dir: str | list = None,
317
- dataset: str | list = None,
318
- description_fields: list[str] = [
319
- "subject",
320
- "session",
321
- "run",
322
- "task",
323
- "age",
324
- "gender",
325
- "sex",
326
- ],
327
- cache_dir: str = ".eegdash_cache",
328
- **kwargs,
329
- ):
330
- self.cache_dir = cache_dir
331
- if query:
332
- datasets = self.find_datasets(query, description_fields, **kwargs)
333
- elif data_dir:
334
- if type(data_dir) == str:
335
- datasets = self.load_bids_dataset(dataset, data_dir, description_fields)
336
- else:
337
- assert len(data_dir) == len(dataset), (
338
- "Number of datasets and their directories must match"
339
- )
340
- datasets = []
341
- for i in range(len(data_dir)):
342
- datasets.extend(
343
- self.load_bids_dataset(
344
- dataset[i], data_dir[i], description_fields
345
- )
346
- )
347
- # convert to list using get_item on each element
348
- super().__init__(datasets)
349
-
350
- def find_key_in_nested_dict(self, data, target_key):
351
- if isinstance(data, dict):
352
- if target_key in data:
353
- return data[target_key]
354
- for value in data.values():
355
- result = self.find_key_in_nested_dict(value, target_key)
356
- if result is not None:
357
- return result
358
- return None
359
-
360
- def find_datasets(self, query: dict, description_fields: list[str], **kwargs):
361
- eegdashObj = EEGDash()
362
- datasets = []
363
- for record in eegdashObj.find(query):
364
- description = {}
365
- for field in description_fields:
366
- value = self.find_key_in_nested_dict(record, field)
367
- if value is not None:
368
- description[field] = value
369
- datasets.append(
370
- EEGDashBaseDataset(
371
- record, self.cache_dir, description=description, **kwargs
372
- )
373
- )
374
- return datasets
375
-
376
- def load_bids_dataset(
377
- self,
378
- dataset,
379
- data_dir,
380
- description_fields: list[str],
381
- raw_format="eeglab",
382
- **kwargs,
383
- ):
384
- """ """
385
-
386
- def get_base_dataset_from_bids_file(bids_dataset, bids_file):
387
- record = eegdashObj.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
388
- description = {}
389
- for field in description_fields:
390
- value = self.find_key_in_nested_dict(record, field)
391
- if value is not None:
392
- description[field] = value
393
- return EEGDashBaseDataset(
394
- record, self.cache_dir, description=description, **kwargs
395
- )
396
-
397
- bids_dataset = EEGBIDSDataset(
398
- data_dir=data_dir,
399
- dataset=dataset,
400
- )
401
- eegdashObj = EEGDash()
402
- datasets = Parallel(n_jobs=-1, prefer="threads", verbose=1)(
403
- delayed(get_base_dataset_from_bids_file)(bids_dataset, bids_file)
404
- for bids_file in bids_dataset.get_files()
405
- )
406
- return datasets
407
-
408
-
409
- def main():
410
- eegdash = EEGDash()
411
- record = eegdash.find({"dataset": "ds005511", "subject": "NDARUF236HM7"})
412
- print(record)
413
-
414
-
415
- if __name__ == "__main__":
416
- main()
@@ -1,23 +0,0 @@
1
- eegdash/__init__.py,sha256=dyNvSv7ORVDYDz0P-XBNj_SApMlOqwt8LHQqfeuPKCg,105
2
- eegdash/data_config.py,sha256=sIwj7lnZ1hCjeFs-0CXeHn93btm9fX7mwgVTZVeVh-w,763
3
- eegdash/data_utils.py,sha256=LqAJygSpPpYEIerAnWHuHP0OMjd7jQtzXIodbvb0568,19436
4
- eegdash/main.py,sha256=CFI-Bro_oru5iRJdNQZ8IqeRPhrZKXj8wKoMdcrhFt8,14865
5
- eegdash/features/__init__.py,sha256=Ijhc-bLwysyF_HTmdJwbYoTHbxj2wxArs1xSUzhm7Hc,604
6
- eegdash/features/datasets.py,sha256=JB-VTfXTwfbxpgF9wq34gKK69YNCZPQwsnaKEXQisWk,17180
7
- eegdash/features/decorators.py,sha256=iVsbdQXGoLi-V6M9BgP6P8i_UzUtIAWQlf8Qq_LdRqY,1247
8
- eegdash/features/extractors.py,sha256=bITM4DXbW1Dq8Nm8hS3OrSGfRFV6-IwzkTzjiy_yg9k,6816
9
- eegdash/features/serialization.py,sha256=ceGcEvKCg4OsWyLpdAyJsvU1-6UXcvVx2q6nq58vt8Y,2873
10
- eegdash/features/utils.py,sha256=jjVNVLFSXFj3j7NWgEbUlt5faTrWKLLQY9ZYy0xLp_M,3782
11
- eegdash/features/feature_bank/__init__.py,sha256=BKrM3aaggXrfey1yEjEBYaxOV5e3UK-o8oGeB30epOg,149
12
- eegdash/features/feature_bank/complexity.py,sha256=WkLin-f1WTPUtcpkLDObY8nQYRsvpa08Xy9ly1k0hik,3017
13
- eegdash/features/feature_bank/connectivity.py,sha256=bQ6KlxWm5GNpCS9ypLqBUr2L171Yq7wpBQT2tRQKTZ4,2159
14
- eegdash/features/feature_bank/csp.py,sha256=O-kUijM47cOH7yfe7sYL9wT41w1dGaq6sOieh-h82pw,3300
15
- eegdash/features/feature_bank/dimensionality.py,sha256=e8rKpAT_xtZRsBDuVbznFx_daWdQj89Z3Zkt61Hs5qk,3734
16
- eegdash/features/feature_bank/signal.py,sha256=4jgIXRVS274puKfOnDNnqLoBP_yXRyP38iMnXRvobYo,2437
17
- eegdash/features/feature_bank/spectral.py,sha256=bNB7skusePs1gX7NOU6yRlw_Gr4UOCkO_ylkCgybzug,3319
18
- eegdash/features/feature_bank/utils.py,sha256=DGh-Q7-XFIittP7iBBxvsJaZrlVvuY5mw-G7q6C-PCI,1237
19
- eegdash-0.1.0.dist-info/licenses/LICENSE,sha256=Xafu48R-h_kyaNj2tuhfgdEv9_ovciktjUEgRRwMZ6w,812
20
- eegdash-0.1.0.dist-info/METADATA,sha256=RixWQ9dqP1IQzz_HCAZL2Sp-at190rx4ocpvy2DVaio,8551
21
- eegdash-0.1.0.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
22
- eegdash-0.1.0.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
23
- eegdash-0.1.0.dist-info/RECORD,,