eegdash 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

Files changed (75) hide show
  1. eegdash/data_utils.py → data_utils.py +131 -5
  2. {eegdash-0.0.1.dist-info → eegdash-0.0.3.dist-info}/METADATA +75 -8
  3. eegdash-0.0.3.dist-info/RECORD +8 -0
  4. {eegdash-0.0.1.dist-info → eegdash-0.0.3.dist-info}/WHEEL +1 -1
  5. eegdash-0.0.3.dist-info/top_level.txt +3 -0
  6. main.py +199 -0
  7. eegdash/SignalStore/__init__.py +0 -0
  8. eegdash/SignalStore/signalstore/__init__.py +0 -3
  9. eegdash/SignalStore/signalstore/adapters/read_adapters/abstract_read_adapter.py +0 -13
  10. eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/schema_read_adapter.py +0 -16
  11. eegdash/SignalStore/signalstore/adapters/read_adapters/domain_modeling/vocabulary_read_adapter.py +0 -19
  12. eegdash/SignalStore/signalstore/adapters/read_adapters/handmade_records/excel_study_organizer_read_adapter.py +0 -114
  13. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/axona/axona_read_adapter.py +0 -912
  14. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/ReadIntanSpikeFile.py +0 -140
  15. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/intan_read_adapter.py +0 -29
  16. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/__init__.py +0 -0
  17. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/data_to_result.py +0 -62
  18. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/get_bytes_per_data_block.py +0 -36
  19. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/notch_filter.py +0 -50
  20. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/qstring.py +0 -41
  21. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_header.py +0 -135
  22. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/intanutil/read_one_data_block.py +0 -45
  23. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhd_format/load_intan_rhd_format.py +0 -204
  24. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/__init__.py +0 -0
  25. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/data_to_result.py +0 -60
  26. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/get_bytes_per_data_block.py +0 -37
  27. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/notch_filter.py +0 -50
  28. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/qstring.py +0 -41
  29. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_header.py +0 -153
  30. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/intanutil/read_one_data_block.py +0 -47
  31. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/intan/load_intan_rhs_format/load_intan_rhs_format.py +0 -213
  32. eegdash/SignalStore/signalstore/adapters/read_adapters/recording_acquisitions/neurodata_without_borders/neurodata_without_borders_read_adapter.py +0 -14
  33. eegdash/SignalStore/signalstore/operations/__init__.py +0 -4
  34. eegdash/SignalStore/signalstore/operations/handler_executor.py +0 -22
  35. eegdash/SignalStore/signalstore/operations/handler_factory.py +0 -41
  36. eegdash/SignalStore/signalstore/operations/handlers/base_handler.py +0 -44
  37. eegdash/SignalStore/signalstore/operations/handlers/domain/property_model_handlers.py +0 -79
  38. eegdash/SignalStore/signalstore/operations/handlers/domain/schema_handlers.py +0 -3
  39. eegdash/SignalStore/signalstore/operations/helpers/abstract_helper.py +0 -17
  40. eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_extractor.py +0 -33
  41. eegdash/SignalStore/signalstore/operations/helpers/neuroscikit_rawio.py +0 -165
  42. eegdash/SignalStore/signalstore/operations/helpers/spikeinterface_helper.py +0 -100
  43. eegdash/SignalStore/signalstore/operations/helpers/wrappers/neo_wrappers.py +0 -21
  44. eegdash/SignalStore/signalstore/operations/helpers/wrappers/nwb_wrappers.py +0 -27
  45. eegdash/SignalStore/signalstore/store/__init__.py +0 -8
  46. eegdash/SignalStore/signalstore/store/data_access_objects.py +0 -1181
  47. eegdash/SignalStore/signalstore/store/datafile_adapters.py +0 -131
  48. eegdash/SignalStore/signalstore/store/repositories.py +0 -928
  49. eegdash/SignalStore/signalstore/store/store_errors.py +0 -68
  50. eegdash/SignalStore/signalstore/store/unit_of_work.py +0 -97
  51. eegdash/SignalStore/signalstore/store/unit_of_work_provider.py +0 -67
  52. eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_recording.py +0 -1
  53. eegdash/SignalStore/signalstore/utilities/data_adapters/spike_interface_adapters/si_sorter.py +0 -1
  54. eegdash/SignalStore/signalstore/utilities/testing/data_mocks.py +0 -513
  55. eegdash/SignalStore/signalstore/utilities/tools/dataarrays.py +0 -49
  56. eegdash/SignalStore/signalstore/utilities/tools/mongo_records.py +0 -25
  57. eegdash/SignalStore/signalstore/utilities/tools/operation_response.py +0 -78
  58. eegdash/SignalStore/signalstore/utilities/tools/purge_orchestration_response.py +0 -21
  59. eegdash/SignalStore/signalstore/utilities/tools/quantities.py +0 -15
  60. eegdash/SignalStore/signalstore/utilities/tools/strings.py +0 -38
  61. eegdash/SignalStore/signalstore/utilities/tools/time.py +0 -17
  62. eegdash/SignalStore/tests/conftest.py +0 -799
  63. eegdash/SignalStore/tests/data/valid_data/data_arrays/make_fake_data.py +0 -59
  64. eegdash/SignalStore/tests/unit/store/conftest.py +0 -0
  65. eegdash/SignalStore/tests/unit/store/test_data_access_objects.py +0 -1235
  66. eegdash/SignalStore/tests/unit/store/test_repositories.py +0 -1309
  67. eegdash/SignalStore/tests/unit/store/test_unit_of_work.py +0 -7
  68. eegdash/SignalStore/tests/unit/test_ci_cd.py +0 -8
  69. eegdash/aws_ingest.py +0 -29
  70. eegdash/main.py +0 -17
  71. eegdash/signalstore_data_utils.py +0 -280
  72. eegdash-0.0.1.dist-info/RECORD +0 -72
  73. eegdash-0.0.1.dist-info/top_level.txt +0 -1
  74. /eegdash/__init__.py → /__init__.py +0 -0
  75. {eegdash-0.0.1.dist-info → eegdash-0.0.3.dist-info}/LICENSE +0 -0
@@ -3,11 +3,104 @@ import sys
3
3
  from joblib import Parallel, delayed
4
4
  import mne
5
5
  import numpy as np
6
+ import pandas as pd
6
7
  from pathlib import Path
7
8
  import re
8
9
  import json
10
+ from mne.io import BaseRaw
11
+ from mne._fiff.utils import _find_channels, _read_segments_file
12
+ import s3fs
13
+ import tempfile
14
+ from mne._fiff.utils import _read_segments_file
9
15
 
10
- verbose = False
16
+ class RawEEGDash(BaseRaw):
17
+ r"""Raw object from EEG-Dash connection with Openneuro S3 file.
18
+
19
+ Parameters
20
+ ----------
21
+ input_fname : path-like
22
+ Path to the S3 file
23
+ eog : list | tuple | 'auto'
24
+ Names or indices of channels that should be designated EOG channels.
25
+ If 'auto', the channel names containing ``EOG`` or ``EYE`` are used.
26
+ Defaults to empty tuple.
27
+ %(preload)s
28
+ Note that preload=False will be effective only if the data is stored
29
+ in a separate binary file.
30
+ %(uint16_codec)s
31
+ %(montage_units)s
32
+ %(verbose)s
33
+
34
+ See Also
35
+ --------
36
+ mne.io.Raw : Documentation of attributes and methods.
37
+
38
+ Notes
39
+ -----
40
+ .. versionadded:: 0.11.0
41
+ """
42
+
43
+ def __init__(
44
+ self,
45
+ input_fname,
46
+ metadata,
47
+ eog=(),
48
+ preload=False,
49
+ *,
50
+ cache_dir='./.eegdash_cache',
51
+ uint16_codec=None,
52
+ montage_units="auto",
53
+ verbose=None,
54
+ ):
55
+ '''
56
+ Get to work with S3 endpoint first, no caching
57
+ '''
58
+ # Create a simple RawArray
59
+ sfreq = metadata['sfreq'] # Sampling frequency
60
+ n_times = metadata['n_times']
61
+ ch_names = metadata['ch_names']
62
+ ch_types = []
63
+ for ch in metadata['ch_types']:
64
+ chtype = ch.lower()
65
+ if chtype == 'heog' or chtype == 'veog':
66
+ chtype = 'eog'
67
+ ch_types.append(chtype)
68
+ info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
69
+ self.s3file = input_fname
70
+ os.makedirs(cache_dir, exist_ok=True)
71
+ self.filecache = os.path.join(cache_dir, os.path.basename(self.s3file))
72
+
73
+ if preload and not os.path.exists(self.filecache):
74
+ self._download_s3()
75
+ preload = self.filecache
76
+
77
+ super().__init__(
78
+ info,
79
+ preload,
80
+ last_samps=[n_times-1],
81
+ orig_format="single",
82
+ verbose=verbose,
83
+ )
84
+
85
+ def _download_s3(self):
86
+ filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
87
+ print('s3file', self.s3file)
88
+ print('filecache', self.filecache)
89
+ filesystem.download(self.s3file, self.filecache)
90
+ self.filenames = [self.filecache]
91
+
92
+ def _read_segment(
93
+ self, start=0, stop=None, sel=None, data_buffer=None, *, verbose=None
94
+ ):
95
+ if not os.path.exists(self.filecache): # not preload
96
+ self._download_s3()
97
+ else: # not preload and file is not cached
98
+ self.filenames = [self.filecache]
99
+ return super()._read_segment(start, stop, sel, data_buffer, verbose=verbose)
100
+
101
+ def _read_segment_file(self, data, idx, fi, start, stop, cals, mult):
102
+ """Read a chunk of data from the file."""
103
+ _read_segments_file(self, data, idx, fi, start, stop, cals, mult, dtype="<f4")
11
104
 
12
105
 
13
106
  class BIDSDataset():
@@ -20,7 +113,7 @@ class BIDSDataset():
20
113
  }
21
114
  METADATA_FILE_EXTENSIONS = ['eeg.json', 'channels.tsv', 'electrodes.tsv', 'events.tsv', 'events.json']
22
115
  def __init__(self,
23
- data_dir=None, # location of asr cleaned data
116
+ data_dir=None, # location of bids dataset
24
117
  dataset='', # dataset name
25
118
  raw_format='eeglab', # format of raw data
26
119
  ):
@@ -44,9 +137,25 @@ class BIDSDataset():
44
137
  self.files = np.load(temp_dir / f'{dataset}_files.npy', allow_pickle=True)
45
138
 
46
139
  def get_property_from_filename(self, property, filename):
47
- lookup = re.search(rf'{property}-(.*?)[_\/]', filename)
140
+ import platform
141
+ if platform.system() == "Windows":
142
+ lookup = re.search(rf'{property}-(.*?)[_\\]', filename)
143
+ else:
144
+ lookup = re.search(rf'{property}-(.*?)[_\/]', filename)
48
145
  return lookup.group(1) if lookup else ''
49
146
 
147
+ def merge_json_inheritance(self, json_files):
148
+ '''
149
+ Merge list of json files found by get_bids_file_inheritance,
150
+ expecting the order (from left to right) is from lowest level to highest level,
151
+ and return a merged dictionary
152
+ '''
153
+ json_files.reverse()
154
+ json_dict = {}
155
+ for f in json_files:
156
+ json_dict.update(json.load(open(f)))
157
+ return json_dict
158
+
50
159
  def get_bids_file_inheritance(self, path, basename, extension):
51
160
  '''
52
161
  Get all files with given extension that applies to the basename file
@@ -68,7 +177,7 @@ class BIDSDataset():
68
177
  for file in os.listdir(path):
69
178
  # target_file = path / f"{cur_file_basename}_{extension}"
70
179
  if os.path.isfile(path/file):
71
- cur_file_basename = file[:file.rfind('_')]
180
+ cur_file_basename = file[:file.rfind('_')] # TODO: change to just search for any file with extension
72
181
  if file.endswith(extension) and cur_file_basename in basename:
73
182
  filepath = path / file
74
183
  bids_files.append(filepath)
@@ -210,4 +319,21 @@ class BIDSDataset():
210
319
  return self.get_property_from_filename('run', data_filepath)
211
320
 
212
321
  def subject(self, data_filepath):
213
- return self.get_property_from_filename('sub', data_filepath)
322
+ return self.get_property_from_filename('sub', data_filepath)
323
+
324
+ def num_channels(self, data_filepath):
325
+ channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
326
+ return len(channels_tsv)
327
+
328
+ def channel_labels(self, data_filepath):
329
+ channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
330
+ return channels_tsv['name'].tolist()
331
+
332
+ def channel_types(self, data_filepath):
333
+ channels_tsv = pd.read_csv(self.get_bids_metadata_files(data_filepath, 'channels.tsv')[0], sep='\t')
334
+ return channels_tsv['type'].tolist()
335
+
336
+ def num_times(self, data_filepath):
337
+ eeg_jsons = self.get_bids_metadata_files(data_filepath, 'eeg.json')
338
+ eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
339
+ return int(eeg_json_dict['SamplingFrequency'] * eeg_json_dict['RecordingDuration'])
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: eegdash
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>
6
6
  License: GNU General Public License
@@ -32,12 +32,24 @@ Classifier: Operating System :: OS Independent
32
32
  Requires-Python: >=3.8
33
33
  Description-Content-Type: text/markdown
34
34
  License-File: LICENSE
35
+ Requires-Dist: signalstore
35
36
 
36
37
  # EEG-Dash
37
38
  To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
38
39
 
39
40
  ## Data source
40
- The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will also incorporate data converted from NEMAR, which includes a subset of the 330 MEEG BIDS-formatted datasets available on OpenNeuro, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
41
+ The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will also incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
42
+
43
+ ## Datasets available
44
+
45
+ There are currently only two datasets made available for testing purposes.
46
+
47
+ | Dataset ID | Description | Participants | Channels | Task | NEMAR Link |
48
+ |------------|---------------------------------------------------------------------------------------------|--------------|-----------------|-----------------|------------------------------------------------------------------------------------------------|
49
+ | ds002718 | EEG dataset focused on face processing with MRI for source localization | 18 | 70 EEG, 2 EOG | FaceRecognition | [NEMAR ds002718](https://nemar.org/dataexplorer/detail?dataset_id=ds002718) |
50
+ | ds004745 | 8-Channel SSVEP EEG dataset with trials including voluntary movements to introduce artifacts | 6 | 8 EEG | SSVEP tasks | [NEMAR ds004745](https://nemar.org/dataexplorer/detail?dataset_id=ds004745) |
51
+
52
+
41
53
 
42
54
  ## Data formatting
43
55
  The data in EEG-DaSh is formatted to facilitate machine learning (ML) and deep learning (DL) applications by using a simplified structure commonly adopted by these communities. This will involve converting raw MEEG data into a matrix format, where samples (e.g., individual EEG or MEG recordings) are represented by rows, and values (such as time or channel data) are represented by columns. The data is also divided into training and testing sets, with 80% of the data allocated for training and 20% for testing, ensuring a balanced representation of relevant labels across sets. Hierarchical Event Descriptor (HED) tags will be used to annotate labels, which will be stored in a text table, and detailed metadata, including dataset origins and methods. This formatting process will ensure that data is ready for ML/DL models, allowing for efficient training and testing of algorithms while preserving data integrity and reusability.
@@ -49,15 +61,70 @@ The data in EEG-DaSh is formatted to facilitate machine learning (ML) and deep l
49
61
 
50
62
  The data in EEG-DaSh is accessed through Python and MATLAB libraries specifically designed for this platform. These libraries will use objects compatible with deep learning data storage formats in each language, such as <i>Torchvision.dataset</i> in Python and <i>DataStore</i> in MATLAB. Users can dynamically fetch data from the EEG-DaSh server which is then cached locally.
51
63
 
52
- ### AWS S3
64
+ ### Install
65
+ Use your preferred Python environment manager with Python > 3.9 to install the package. Here we show example using Conda environment with Python 3.11.5:
66
+ * Create a new environment Python 3.11.5 -> `conda create --name eegdash python=3.11.5`
67
+ * Switch to the right environment -> `conda activate eegdash`
68
+ * Install dependencies (this is a temporary link that will be updated soon) -> `pip install -r https://raw.githubusercontent.com/sccn/EEG-Dash-Data/refs/heads/develop/requirements.txt`
69
+ * Install _eegdash_ package (this is a temporary link that will be updated soon) -> `pip install -i https://test.pypi.org/simple/ eegdash`
70
+ * Check installation. Start a Python session and type `from eegdash import EEGDash`
71
+
72
+ ### Python data access
73
+
74
+ To create a local object for accessing the database, use the following code:
75
+
76
+ ```python
77
+ from eegdash import EEGDash
78
+ EEGDashInstance = EEGDash()
79
+ ```
80
+
81
+ Once the object is instantiated, it can be utilized to search datasets. Providing an empty parameter will search the entire database and return all available datasets.
82
+
83
+ ```python
84
+ EEGDashInstance.find({})
85
+ ```
86
+ A list of dataset is returned.
87
+
88
+ ```python
89
+ [{'schema_ref': 'eeg_signal',
90
+ 'data_name': 'ds004745_sub-001_task-unnamed_eeg.set',
91
+ 'dataset': 'ds004745',
92
+ 'subject': '001',
93
+ 'task': 'unnamed',
94
+ 'session': '',
95
+ 'run': '',
96
+ 'modality': 'EEG',
97
+ 'sampling_frequency': 1000,
98
+ 'version_timestamp': 0,
99
+ 'has_file': True,
100
+ 'time_of_save': datetime.datetime(2024, 10, 25, 14, 11, 48, 843593, tzinfo=datetime.timezone.utc),
101
+ 'time_of_removal': None}, ...
102
+
103
+ ```
104
+
105
+ Additionally, users can search for a specific dataset by specifying criteria.
106
+
107
+ ```python
108
+ EEGDashInstance.find({'task': 'FaceRecognition'})
109
+ ```
110
+
111
+ After locating the desired dataset or data record, users can download it locally by executing the following command. This will return an xArray Python object.
112
+
113
+ ```python
114
+ XArrayData = EEGDashInstance.get({'task': 'FaceRecognition', 'subject': '019'})
115
+ ```
116
+
117
+ Optionally, this is how you may access the raw data for the first record. This will return an numpy array.
53
118
 
54
- Coming soon...
119
+ ```python
120
+ npData = EEGDashInstance.get({'task': 'FaceRecognition', 'subject': '019'})[0].values
121
+ ```
55
122
 
56
- ### EEG-Dash API
123
+ ## Example use
57
124
 
58
- Coming soon...
125
+ This [example](tests/eegdash.ipynb) demonstrates the full workflow from data retrieval with `EEGDash` to model definition, data handling, and training in PyTorch.
59
126
 
60
- ## Education
127
+ ## Education - Coming soon...
61
128
 
62
129
  We organize workshops and educational events to foster cross-cultural education and student training, offering both online and in-person opportunities in collaboration with US and Israeli partners. There is no event planned for 2024. Events for 2025 will be advertised on the EEGLABNEWS mailing list so make sure to [subscribe](https://sccn.ucsd.edu/mailman/listinfo/eeglabnews).
63
130
 
@@ -0,0 +1,8 @@
1
+ __init__.py,sha256=hgxE8COvPu3EV2Tq3GqtMk68fsd7bYvOs_0GO6rrzfk,32
2
+ data_utils.py,sha256=vzMGVp4PBWyRF8tbYNqkJs0QnUd5CzvmJUkpPfxdJh8,13491
3
+ main.py,sha256=fFZHHdVYNLqKr2X_NDB0XXla7A2QlHexgI9AD79_niY,7217
4
+ eegdash-0.0.3.dist-info/LICENSE,sha256=Xafu48R-h_kyaNj2tuhfgdEv9_ovciktjUEgRRwMZ6w,812
5
+ eegdash-0.0.3.dist-info/METADATA,sha256=k0Lvxj1hHHQOZtLt1id4qnp2LpzlN_AZ4Ed3hSoXZG8,9432
6
+ eegdash-0.0.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
7
+ eegdash-0.0.3.dist-info/top_level.txt,sha256=MyqA0HvmlirifVrDoM8jHKwKDiA_-XrVNsV6tFfhpAU,25
8
+ eegdash-0.0.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.2.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,3 @@
1
+ __init__
2
+ data_utils
3
+ main
main.py ADDED
@@ -0,0 +1,199 @@
1
+ import pymongo
2
+ from dotenv import load_dotenv
3
+ import os
4
+ import s3fs
5
+ from joblib import Parallel, delayed
6
+ import tempfile
7
+ import mne
8
+ import numpy as np
9
+ import xarray as xr
10
+ from .data_utils import BIDSDataset
11
+ class EEGDash:
12
+ AWS_BUCKET = 's3://openneuro.org'
13
+ def __init__(self,
14
+ is_public=True):
15
+ if is_public:
16
+ DB_CONNECTION_STRING="mongodb+srv://eegdash-user:mdzoMjQcHWTVnKDq@cluster0.vz35p.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
17
+ else:
18
+ load_dotenv()
19
+ DB_CONNECTION_STRING = os.getenv('DB_CONNECTION_STRING')
20
+
21
+ self.__client = pymongo.MongoClient(DB_CONNECTION_STRING)
22
+ self.__db = self.__client['eegdash']
23
+ self.__collection = self.__db['records']
24
+
25
+ self.is_public = is_public
26
+ self.filesystem = s3fs.S3FileSystem(anon=True, client_kwargs={'region_name': 'us-east-2'})
27
+
28
+ def find(self, *args):
29
+ results = self.__collection.find(*args)
30
+
31
+ # convert to list using get_item on each element
32
+ return [result for result in results]
33
+
34
+ def exist(self, schema_ref='eeg_signal', data_name=''):
35
+ query = {
36
+ "schema_ref": schema_ref,
37
+ "data_name": data_name
38
+ }
39
+ sessions = self.find(query)
40
+ return len(sessions) > 0
41
+
42
+ def add(self, record:dict):
43
+ input_record = self._validate_input(record)
44
+ print(input_record)
45
+ self.__collection.insert_one(input_record)
46
+
47
+ def _validate_input(self, record:dict):
48
+ input_types = {
49
+ 'schema_ref': str,
50
+ 'data_name': str,
51
+ 'dataset': str,
52
+ 'bidspath': str,
53
+ 'subject': str,
54
+ 'task': str,
55
+ 'session': str,
56
+ 'run': str,
57
+ 'sampling_frequency': float,
58
+ 'modality': str,
59
+ 'nchans': int,
60
+ 'ntimes': int,
61
+ 'channel_types': list,
62
+ 'channel_names': list,
63
+ }
64
+ record['schema_ref'] = 'eeg_signal'
65
+ if 'data_name' not in record:
66
+ raise ValueError("Missing key: data_name")
67
+ # check if args are in the keys and has correct type
68
+ for key,value in record.items():
69
+ if key not in input_types:
70
+ raise ValueError(f"Invalid input: {key}")
71
+ if not isinstance(value, input_types[key]):
72
+ raise ValueError(f"Invalid input: {key}")
73
+
74
+ return record
75
+
76
+ def load_eeg_data_from_s3(self, s3path):
77
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.set') as tmp:
78
+ with self.filesystem.open(s3path) as s3_file:
79
+ tmp.write(s3_file.read())
80
+ tmp_path = tmp.name
81
+ eeg_data = self.load_eeg_data_from_bids_file(tmp_path)
82
+ os.unlink(tmp_path)
83
+ return eeg_data
84
+
85
+ def load_eeg_data_from_bids_file(self, bids_file, eeg_attrs=None):
86
+ '''
87
+ bids_file must be a file of the bids_dataset
88
+ '''
89
+ EEG = mne.io.read_raw_eeglab(bids_file)
90
+ eeg_data = EEG.get_data()
91
+
92
+ fs = EEG.info['sfreq']
93
+ max_time = eeg_data.shape[1] / fs
94
+ time_steps = np.linspace(0, max_time, eeg_data.shape[1]).squeeze() # in seconds
95
+
96
+ channel_names = EEG.ch_names
97
+
98
+ eeg_xarray = xr.DataArray(
99
+ data=eeg_data,
100
+ dims=['channel','time'],
101
+ coords={
102
+ 'time': time_steps,
103
+ 'channel': channel_names
104
+ },
105
+ # attrs=attrs
106
+ )
107
+ return eeg_xarray
108
+
109
+ def load_eeg_attrs_from_bids_file(self, bids_dataset: BIDSDataset, bids_file):
110
+ '''
111
+ bids_file must be a file of the bids_dataset
112
+ '''
113
+ if bids_file not in bids_dataset.files:
114
+ raise ValueError(f'{bids_file} not in {bids_dataset.dataset}')
115
+ f = os.path.basename(bids_file)
116
+ dsnumber = bids_dataset.dataset
117
+ # extract openneuro path by finding the first occurrence of the dataset name in the filename and remove the path before that
118
+ openneuro_path = dsnumber + bids_file.split(dsnumber)[1]
119
+
120
+ attrs = {
121
+ 'schema_ref': 'eeg_signal',
122
+ 'data_name': f'{bids_dataset.dataset}_{f}',
123
+ 'dataset': bids_dataset.dataset,
124
+ 'bidspath': openneuro_path,
125
+ 'subject': bids_dataset.subject(bids_file),
126
+ 'nchans': bids_dataset.num_channels(bids_file),
127
+ 'ntimes': bids_dataset.num_times(bids_file),
128
+ 'channel_types': bids_dataset.channel_types(bids_file),
129
+ 'channel_names': bids_dataset.channel_labels(bids_file),
130
+ 'task': bids_dataset.task(bids_file),
131
+ 'session': bids_dataset.session(bids_file),
132
+ 'run': bids_dataset.run(bids_file),
133
+ 'sampling_frequency': bids_dataset.sfreq(bids_file),
134
+ 'modality': 'EEG',
135
+ }
136
+
137
+ return attrs
138
+
139
+ def add_bids_dataset(self, dataset, data_dir, raw_format='eeglab', overwrite=True):
140
+ '''
141
+ Create new records for the dataset in the MongoDB database if not found
142
+ '''
143
+ if self.is_public:
144
+ raise ValueError('This operation is not allowed for public users')
145
+
146
+ bids_dataset = BIDSDataset(
147
+ data_dir=data_dir,
148
+ dataset=dataset,
149
+ raw_format=raw_format,
150
+ )
151
+ for bids_file in bids_dataset.get_files():
152
+ print('bids raw file', bids_file)
153
+
154
+ signalstore_data_id = f"{dataset}_{os.path.basename(bids_file)}"
155
+
156
+ if self.exist(data_name=signalstore_data_id):
157
+ if overwrite:
158
+ eeg_attrs = self.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
159
+ print('updating record', eeg_attrs['data_name'])
160
+ self.update(eeg_attrs)
161
+ else:
162
+ print('data already exist and not overwriting. skipped')
163
+ continue
164
+ else:
165
+ eeg_attrs = self.load_eeg_attrs_from_bids_file(bids_dataset, bids_file)
166
+ # Assume raw data already exists on Openneuro, recreating record only
167
+ print('adding record', eeg_attrs['data_name'])
168
+ self.add(eeg_attrs)
169
+
170
+ def get_s3path(self, record):
171
+ return f"{self.AWS_BUCKET}/{record['bidspath']}"
172
+
173
+ def get(self, query:dict):
174
+ '''
175
+ query: {
176
+ 'dataset': 'dsxxxx',
177
+
178
+ }'''
179
+ sessions = self.find(query)
180
+ results = []
181
+ if sessions:
182
+ print(f'Found {len(sessions)} records')
183
+ results = Parallel(n_jobs=-1 if len(sessions) > 1 else 1, prefer="threads", verbose=1)(
184
+ delayed(self.load_eeg_data_from_s3)(self.get_s3path(session)) for session in sessions
185
+ )
186
+ return results
187
+
188
+ def update(self, record:dict):
189
+ record['schema_ref'] = 'eeg_signal'
190
+ self.__collection.update_one({'schema_ref': record['schema_ref'], 'data_name': record['data_name']},
191
+ {'$set': record}
192
+ )
193
+ def main():
194
+ eegdash = EEGDash()
195
+ record = eegdash.find({'dataset': 'ds005511', 'subject': 'NDARUF236HM7'})
196
+ print(record)
197
+
198
+ if __name__ == '__main__':
199
+ main()
File without changes
@@ -1,3 +0,0 @@
1
- from eegdash.SignalStore.signalstore.store.unit_of_work_provider import UnitOfWorkProvider
2
-
3
- __all__ = ['UnitOfWorkProvider']
@@ -1,13 +0,0 @@
1
- from abc import ABC, abstractmethod
2
-
3
- class AbstractReadAdapter(ABC):
4
-
5
- def __iter__(self):
6
- return self.read().__iter__()
7
-
8
- def __next__(self):
9
- return self.read().__next__()
10
-
11
- @abstractmethod
12
- def read(self):
13
- raise NotImplementedError('AbstractReadAdapter.read() not implemented.')
@@ -1,16 +0,0 @@
1
- from signalstore.adapters.read_adapters.abstract_read_adapter import AbstractReadAdapter
2
- import json
3
- from upath import UPath
4
-
5
- class SchemaReadAdapter(AbstractReadAdapter):
6
- def __init__(self, directory):
7
- self.dir = UPath(directory)
8
-
9
- def read(self):
10
- """Reads JSON files that conform to the Neuroscikit data model schemata.
11
- """
12
- for json_filepath in self.dir.glob('*.json'):
13
- with open(json_filepath) as f:
14
- yield dict(json.load(f))
15
-
16
-
@@ -1,19 +0,0 @@
1
- from signalstore.adapters.read_adapters.abstract_read_adapter import AbstractReadAdapter
2
-
3
- import yaml
4
-
5
- class VocabularyReadAdapter(AbstractReadAdapter):
6
- def __init__(self, filepath):
7
- self.filepath = filepath
8
-
9
- def read(self):
10
- """Reads a YAML file and converts each data object into an xarray.DataArray with
11
- the appropriate dimensions, coordinates and metadata attributes for the
12
- Neuroscikit data model.
13
- """
14
- with open(self.filepath) as f:
15
- yaml_dict = yaml.load(f, Loader=yaml.FullLoader)
16
- for key, value in yaml_dict.items():
17
- record = {"name": key}
18
- record.update(value)
19
- yield record