eegdash 0.3.4.dev70__tar.gz → 0.3.5.dev80__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- {eegdash-0.3.4.dev70/eegdash.egg-info → eegdash-0.3.5.dev80}/PKG-INFO +2 -1
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/docs/source/conf.py +1 -1
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/docs/source/index.rst +1 -0
- eegdash-0.3.5.dev80/docs/source/overview.rst +37 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/__init__.py +1 -1
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/api.py +185 -61
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80/eegdash.egg-info}/PKG-INFO +2 -1
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash.egg-info/SOURCES.txt +5 -1
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash.egg-info/requires.txt +1 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/pyproject.toml +1 -0
- eegdash-0.3.5.dev80/tests/test_api.py +63 -0
- eegdash-0.3.5.dev80/tests/test_offline.py +49 -0
- eegdash-0.3.5.dev80/tests/test_query.py +85 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/LICENSE +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/MANIFEST.in +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/README.md +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/docs/Makefile +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/docs/source/dataset_summary.rst +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/docs/source/install/install.rst +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/docs/source/install/install_pip.rst +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/docs/source/install/install_source.rst +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/data_config.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/data_utils.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/dataset.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/dataset_summary.csv +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/__init__.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/datasets.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/decorators.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/extractors.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/feature_bank/__init__.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/feature_bank/complexity.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/feature_bank/connectivity.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/feature_bank/csp.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/feature_bank/dimensionality.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/feature_bank/signal.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/feature_bank/spectral.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/feature_bank/utils.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/inspect.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/serialization.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/features/utils.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/mongodb.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/preprocessing.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/registry.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash/utils.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash.egg-info/dependency_links.txt +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/eegdash.egg-info/top_level.txt +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/setup.cfg +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/tests/test_correctness.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/tests/test_dataset.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/tests/test_dataset_registration.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/tests/test_eegdash.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/tests/test_functional.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/tests/test_init.py +0 -0
- {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev80}/tests/test_mongo_connection.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.5.dev80
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
6
6
|
License-Expression: GPL-3.0-only
|
|
@@ -38,6 +38,7 @@ Requires-Dist: tqdm
|
|
|
38
38
|
Requires-Dist: xarray
|
|
39
39
|
Requires-Dist: h5io>=0.2.4
|
|
40
40
|
Requires-Dist: pymatreader
|
|
41
|
+
Requires-Dist: eeglabio
|
|
41
42
|
Requires-Dist: tabulate
|
|
42
43
|
Provides-Extra: tests
|
|
43
44
|
Requires-Dist: pytest; extra == "tests"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
.. _overview:
|
|
2
|
+
|
|
3
|
+
========
|
|
4
|
+
Overview
|
|
5
|
+
========
|
|
6
|
+
|
|
7
|
+
eegdash is an interface designed to streamline the access and use of EEG data for machine learning applications. It is composed of three main components that work together to provide a seamless experience for researchers and developers.
|
|
8
|
+
|
|
9
|
+
The architecture of eegdash can be visualized as follows:
|
|
10
|
+
|
|
11
|
+
.. code-block:: text
|
|
12
|
+
|
|
13
|
+
+-----------------+
|
|
14
|
+
| MongoDB |
|
|
15
|
+
| (Metadata) |
|
|
16
|
+
+-----------------+
|
|
17
|
+
|
|
|
18
|
+
|
|
|
19
|
+
+-----------v-----------+ +-----------------+
|
|
20
|
+
| eegdash |<---->| S3 Filesystem |
|
|
21
|
+
| Interface | | (Raw Data) |
|
|
22
|
+
+-----------------------+ +-----------------+
|
|
23
|
+
|
|
|
24
|
+
|
|
|
25
|
+
+-----------v-----------+
|
|
26
|
+
| BIDS Parser |
|
|
27
|
+
+-----------------------+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
The components are:
|
|
32
|
+
|
|
33
|
+
* **MongoDB**: This is a NoSQL database that centralizes all the metadata related to the EEG datasets. It stores information about subjects, sessions, tasks, and other relevant details, allowing for fast and efficient querying.
|
|
34
|
+
|
|
35
|
+
* **S3 Filesystem**: The raw EEG data is stored in an S3-compatible object storage. This allows for scalable and reliable storage of large datasets. eegdash interacts with the S3 filesystem to download the data when it is needed.
|
|
36
|
+
|
|
37
|
+
* **BIDS Parser**: The BIDS (Brain Imaging Data Structure) parser is responsible for interpreting the structure of the datasets. It ensures that the data is organized in a standardized way, making it easier to work with and understand.
|
|
@@ -9,6 +9,7 @@ import numpy as np
|
|
|
9
9
|
import xarray as xr
|
|
10
10
|
from dotenv import load_dotenv
|
|
11
11
|
from joblib import Parallel, delayed
|
|
12
|
+
from mne_bids import get_bids_path_from_fname, read_raw_bids
|
|
12
13
|
from pymongo import InsertOne, UpdateOne
|
|
13
14
|
from s3fs import S3FileSystem
|
|
14
15
|
|
|
@@ -34,6 +35,19 @@ class EEGDash:
|
|
|
34
35
|
|
|
35
36
|
"""
|
|
36
37
|
|
|
38
|
+
_ALLOWED_QUERY_FIELDS = {
|
|
39
|
+
"data_name",
|
|
40
|
+
"dataset",
|
|
41
|
+
"subject",
|
|
42
|
+
"task",
|
|
43
|
+
"session",
|
|
44
|
+
"run",
|
|
45
|
+
"modality",
|
|
46
|
+
"sampling_frequency",
|
|
47
|
+
"nchans",
|
|
48
|
+
"ntimes",
|
|
49
|
+
}
|
|
50
|
+
|
|
37
51
|
def __init__(self, *, is_public: bool = True, is_staging: bool = False) -> None:
|
|
38
52
|
"""Create new instance of the EEGDash Database client.
|
|
39
53
|
|
|
@@ -71,34 +85,59 @@ class EEGDash:
|
|
|
71
85
|
anon=True, client_kwargs={"region_name": "us-east-2"}
|
|
72
86
|
)
|
|
73
87
|
|
|
74
|
-
def find(
|
|
75
|
-
|
|
88
|
+
def find(
|
|
89
|
+
self, query: dict[str, Any] = None, /, **kwargs
|
|
90
|
+
) -> list[Mapping[str, Any]]:
|
|
91
|
+
"""Find records in the MongoDB collection.
|
|
92
|
+
|
|
93
|
+
This method can be called in two ways:
|
|
94
|
+
1. With a pre-built MongoDB query dictionary (positional argument):
|
|
95
|
+
>>> eegdash.find({"dataset": "ds002718", "subject": {"$in": ["012", "013"]}})
|
|
96
|
+
2. With user-friendly keyword arguments for simple and multi-value queries:
|
|
97
|
+
>>> eegdash.find(dataset="ds002718", subject="012")
|
|
98
|
+
>>> eegdash.find(dataset="ds002718", subject=["012", "013"])
|
|
76
99
|
|
|
77
100
|
Parameters
|
|
78
101
|
----------
|
|
79
|
-
query: dict
|
|
80
|
-
A
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find
|
|
85
|
-
kwargs:
|
|
86
|
-
Additional keyword arguments for the MongoDB find() method.
|
|
102
|
+
query: dict, optional
|
|
103
|
+
A complete MongoDB query dictionary. This is a positional-only argument.
|
|
104
|
+
**kwargs:
|
|
105
|
+
Keyword arguments representing field-value pairs for the query.
|
|
106
|
+
Values can be single items (str, int) or lists of items for multi-search.
|
|
87
107
|
|
|
88
108
|
Returns
|
|
89
109
|
-------
|
|
90
110
|
list:
|
|
91
111
|
A list of DB records (string-keyed dictionaries) that match the query.
|
|
92
112
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
113
|
+
Raises
|
|
114
|
+
------
|
|
115
|
+
ValueError
|
|
116
|
+
If both a `query` dictionary and keyword arguments are provided.
|
|
97
117
|
|
|
98
118
|
"""
|
|
99
|
-
|
|
119
|
+
if query is not None and kwargs:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
"Provide either a positional 'query' dictionary or keyword arguments, not both."
|
|
122
|
+
)
|
|
100
123
|
|
|
101
|
-
|
|
124
|
+
final_query = {}
|
|
125
|
+
if query is not None:
|
|
126
|
+
final_query = query
|
|
127
|
+
elif kwargs:
|
|
128
|
+
final_query = self._build_query_from_kwargs(**kwargs)
|
|
129
|
+
else:
|
|
130
|
+
# By default, an empty query {} returns all documents.
|
|
131
|
+
# This can be dangerous, so we can either allow it or raise an error.
|
|
132
|
+
# Let's require an explicit query for safety.
|
|
133
|
+
raise ValueError(
|
|
134
|
+
"find() requires a query dictionary or at least one keyword argument. "
|
|
135
|
+
"To find all documents, use find({})."
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
results = self.__collection.find(final_query)
|
|
139
|
+
|
|
140
|
+
return list(results)
|
|
102
141
|
|
|
103
142
|
def exist(self, query: dict[str, Any]) -> bool:
|
|
104
143
|
"""Return True if at least one record matches the query, else False.
|
|
@@ -184,6 +223,35 @@ class EEGDash:
|
|
|
184
223
|
|
|
185
224
|
return record
|
|
186
225
|
|
|
226
|
+
def _build_query_from_kwargs(self, **kwargs) -> dict[str, Any]:
|
|
227
|
+
"""Builds and validates a MongoDB query from user-friendly keyword arguments.
|
|
228
|
+
|
|
229
|
+
Translates list values into MongoDB's `$in` operator.
|
|
230
|
+
"""
|
|
231
|
+
# 1. Validate that all provided keys are allowed for querying
|
|
232
|
+
unknown_fields = set(kwargs.keys()) - self._ALLOWED_QUERY_FIELDS
|
|
233
|
+
if unknown_fields:
|
|
234
|
+
raise ValueError(
|
|
235
|
+
f"Unsupported query field(s): {', '.join(sorted(unknown_fields))}. "
|
|
236
|
+
f"Allowed fields are: {', '.join(sorted(self._ALLOWED_QUERY_FIELDS))}"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# 2. Construct the query dictionary
|
|
240
|
+
query = {}
|
|
241
|
+
for key, value in kwargs.items():
|
|
242
|
+
if isinstance(value, (list, tuple)):
|
|
243
|
+
if not value:
|
|
244
|
+
raise ValueError(
|
|
245
|
+
f"Received an empty list for query parameter '{key}'. This is not supported."
|
|
246
|
+
)
|
|
247
|
+
# If the value is a list, use the `$in` operator for multi-search
|
|
248
|
+
query[key] = {"$in": value}
|
|
249
|
+
else:
|
|
250
|
+
# Otherwise, it's a direct match
|
|
251
|
+
query[key] = value
|
|
252
|
+
|
|
253
|
+
return query
|
|
254
|
+
|
|
187
255
|
def load_eeg_data_from_s3(self, s3path: str) -> xr.DataArray:
|
|
188
256
|
"""Load an EEGLAB .set file from an AWS S3 URI and return it as an xarray DataArray.
|
|
189
257
|
|
|
@@ -218,14 +286,15 @@ class EEGDash:
|
|
|
218
286
|
Parameters
|
|
219
287
|
----------
|
|
220
288
|
bids_file : str
|
|
221
|
-
Path to the file on the local filesystem.
|
|
289
|
+
Path to the BIDS-compliant file on the local filesystem.
|
|
222
290
|
|
|
223
291
|
Notes
|
|
224
292
|
-----
|
|
225
293
|
Currently, only non-epoched .set files are supported.
|
|
226
294
|
|
|
227
295
|
"""
|
|
228
|
-
|
|
296
|
+
bids_path = get_bids_path_from_fname(bids_file, verbose=False)
|
|
297
|
+
raw_object = read_raw_bids(bids_path=bids_path, verbose=False)
|
|
229
298
|
eeg_data = raw_object.get_data()
|
|
230
299
|
|
|
231
300
|
fs = raw_object.info["sfreq"]
|
|
@@ -521,8 +590,8 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
521
590
|
def __init__(
|
|
522
591
|
self,
|
|
523
592
|
query: dict | None = None,
|
|
524
|
-
|
|
525
|
-
dataset: str |
|
|
593
|
+
cache_dir: str = "~/eegdash_cache",
|
|
594
|
+
dataset: str | None = None,
|
|
526
595
|
description_fields: list[str] = [
|
|
527
596
|
"subject",
|
|
528
597
|
"session",
|
|
@@ -532,36 +601,55 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
532
601
|
"gender",
|
|
533
602
|
"sex",
|
|
534
603
|
],
|
|
535
|
-
cache_dir: str = "~/eegdash_cache",
|
|
536
604
|
s3_bucket: str | None = None,
|
|
605
|
+
data_dir: str | None = None,
|
|
537
606
|
eeg_dash_instance=None,
|
|
607
|
+
records: list[dict] | None = None,
|
|
538
608
|
**kwargs,
|
|
539
609
|
):
|
|
540
610
|
"""Create a new EEGDashDataset from a given query or local BIDS dataset directory
|
|
541
611
|
and dataset name. An EEGDashDataset is pooled collection of EEGDashBaseDataset
|
|
542
612
|
instances (individual recordings) and is a subclass of braindecode's BaseConcatDataset.
|
|
543
613
|
|
|
614
|
+
|
|
615
|
+
Querying Examples:
|
|
616
|
+
------------------
|
|
617
|
+
# Find by single subject
|
|
618
|
+
>>> ds = EEGDashDataset(dataset="ds005505", subject="NDARCA153NKE")
|
|
619
|
+
|
|
620
|
+
# Find by a list of subjects and a specific task
|
|
621
|
+
>>> subjects = ["NDARCA153NKE", "NDARXT792GY8"]
|
|
622
|
+
>>> ds = EEGDashDataset(dataset="ds005505", subject=subjects, task="RestingState")
|
|
623
|
+
|
|
624
|
+
# Use a raw MongoDB query for advanced filtering
|
|
625
|
+
>>> raw_query = {"dataset": "ds005505", "subject": {"$in": subjects}}
|
|
626
|
+
>>> ds = EEGDashDataset(query=raw_query)
|
|
627
|
+
|
|
544
628
|
Parameters
|
|
545
629
|
----------
|
|
546
630
|
query : dict | None
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
631
|
+
A raw MongoDB query dictionary. If provided, keyword arguments for filtering are ignored.
|
|
632
|
+
**kwargs : dict
|
|
633
|
+
Keyword arguments for filtering (e.g., `subject="X"`, `task=["T1", "T2"]`) and/or
|
|
634
|
+
arguments to be passed to the EEGDashBaseDataset constructor (e.g., `subject=...`).
|
|
635
|
+
cache_dir : str
|
|
636
|
+
A directory where the dataset will be cached locally.
|
|
637
|
+
data_dir : str | None
|
|
638
|
+
Optionally a string specifying a local BIDS dataset directory from which to load the EEG data files. Exactly one
|
|
552
639
|
of query or data_dir must be provided.
|
|
553
|
-
dataset : str |
|
|
554
|
-
If data_dir is given, a name
|
|
640
|
+
dataset : str | None
|
|
641
|
+
If data_dir is given, a name for the dataset to be loaded.
|
|
555
642
|
description_fields : list[str]
|
|
556
643
|
A list of fields to be extracted from the dataset records
|
|
557
644
|
and included in the returned data description(s). Examples are typical
|
|
558
645
|
subject metadata fields such as "subject", "session", "run", "task", etc.;
|
|
559
646
|
see also data_config.description_fields for the default set of fields.
|
|
560
|
-
cache_dir : str
|
|
561
|
-
A directory where the dataset will be cached locally.
|
|
562
647
|
s3_bucket : str | None
|
|
563
648
|
An optional S3 bucket URI (e.g., "s3://mybucket") to use instead of the
|
|
564
649
|
default OpenNeuro bucket for loading data files
|
|
650
|
+
records : list[dict] | None
|
|
651
|
+
Optional list of pre-fetched metadata records. If provided, the dataset is
|
|
652
|
+
constructed directly from these records without querying MongoDB.
|
|
565
653
|
kwargs : dict
|
|
566
654
|
Additional keyword arguments to be passed to the EEGDashBaseDataset
|
|
567
655
|
constructor.
|
|
@@ -569,50 +657,79 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
569
657
|
"""
|
|
570
658
|
self.cache_dir = cache_dir
|
|
571
659
|
self.s3_bucket = s3_bucket
|
|
572
|
-
self.eeg_dash = eeg_dash_instance
|
|
573
|
-
_owns_client =
|
|
660
|
+
self.eeg_dash = eeg_dash_instance
|
|
661
|
+
_owns_client = False
|
|
662
|
+
if self.eeg_dash is None and records is None:
|
|
663
|
+
self.eeg_dash = EEGDash()
|
|
664
|
+
_owns_client = True
|
|
665
|
+
|
|
666
|
+
# Separate query kwargs from other kwargs passed to the BaseDataset constructor
|
|
667
|
+
query_kwargs = {
|
|
668
|
+
k: v for k, v in kwargs.items() if k in EEGDash._ALLOWED_QUERY_FIELDS
|
|
669
|
+
}
|
|
670
|
+
base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in query_kwargs}
|
|
671
|
+
|
|
672
|
+
if query and query_kwargs:
|
|
673
|
+
raise ValueError(
|
|
674
|
+
"Provide either a 'query' dictionary or keyword arguments for filtering, not both."
|
|
675
|
+
)
|
|
574
676
|
|
|
575
677
|
try:
|
|
576
|
-
if
|
|
577
|
-
|
|
678
|
+
if records is not None:
|
|
679
|
+
self.records = records
|
|
680
|
+
datasets = [
|
|
681
|
+
EEGDashBaseDataset(
|
|
682
|
+
record,
|
|
683
|
+
self.cache_dir,
|
|
684
|
+
self.s3_bucket,
|
|
685
|
+
**base_dataset_kwargs,
|
|
686
|
+
)
|
|
687
|
+
for record in self.records
|
|
688
|
+
]
|
|
578
689
|
elif data_dir:
|
|
579
|
-
|
|
690
|
+
# This path loads from a local directory and is not affected by DB query logic
|
|
691
|
+
if isinstance(data_dir, str) or isinstance(data_dir, Path):
|
|
580
692
|
datasets = self.load_bids_dataset(
|
|
581
|
-
dataset,
|
|
693
|
+
dataset=dataset,
|
|
694
|
+
data_dir=data_dir,
|
|
695
|
+
description_fields=description_fields,
|
|
696
|
+
s3_bucket=s3_bucket,
|
|
697
|
+
**base_dataset_kwargs,
|
|
582
698
|
)
|
|
583
699
|
else:
|
|
584
700
|
assert len(data_dir) == len(dataset), (
|
|
585
|
-
"Number of datasets and
|
|
701
|
+
"Number of datasets and directories must match"
|
|
586
702
|
)
|
|
587
703
|
datasets = []
|
|
588
704
|
for i, _ in enumerate(data_dir):
|
|
589
705
|
datasets.extend(
|
|
590
706
|
self.load_bids_dataset(
|
|
591
|
-
dataset[i],
|
|
592
|
-
data_dir[i],
|
|
593
|
-
description_fields,
|
|
594
|
-
s3_bucket,
|
|
595
|
-
**
|
|
707
|
+
dataset=dataset[i],
|
|
708
|
+
data_dir=data_dir[i],
|
|
709
|
+
description_fields=description_fields,
|
|
710
|
+
s3_bucket=s3_bucket,
|
|
711
|
+
**base_dataset_kwargs,
|
|
596
712
|
)
|
|
597
713
|
)
|
|
714
|
+
elif query or query_kwargs:
|
|
715
|
+
# This is the DB query path that we are improving
|
|
716
|
+
datasets = self.find_datasets(
|
|
717
|
+
query=query,
|
|
718
|
+
description_fields=description_fields,
|
|
719
|
+
query_kwargs=query_kwargs,
|
|
720
|
+
base_dataset_kwargs=base_dataset_kwargs,
|
|
721
|
+
)
|
|
722
|
+
# We only need filesystem if we need to access S3
|
|
723
|
+
self.filesystem = S3FileSystem(
|
|
724
|
+
anon=True, client_kwargs={"region_name": "us-east-2"}
|
|
725
|
+
)
|
|
598
726
|
else:
|
|
599
727
|
raise ValueError(
|
|
600
|
-
"
|
|
728
|
+
"You must provide either 'records', a 'data_dir', or a query/keyword arguments for filtering."
|
|
601
729
|
)
|
|
602
730
|
finally:
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
try:
|
|
606
|
-
self.eeg_dash.close()
|
|
607
|
-
except Exception:
|
|
608
|
-
# Don't let close errors break construction
|
|
609
|
-
pass
|
|
610
|
-
|
|
611
|
-
self.filesystem = S3FileSystem(
|
|
612
|
-
anon=True, client_kwargs={"region_name": "us-east-2"}
|
|
613
|
-
)
|
|
614
|
-
|
|
615
|
-
self.eeg_dash.close()
|
|
731
|
+
if _owns_client and self.eeg_dash is not None:
|
|
732
|
+
self.eeg_dash.close()
|
|
616
733
|
|
|
617
734
|
super().__init__(datasets)
|
|
618
735
|
|
|
@@ -630,7 +747,11 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
630
747
|
return None
|
|
631
748
|
|
|
632
749
|
def find_datasets(
|
|
633
|
-
self,
|
|
750
|
+
self,
|
|
751
|
+
query: dict[str, Any],
|
|
752
|
+
description_fields: list[str],
|
|
753
|
+
query_kwargs: dict,
|
|
754
|
+
base_dataset_kwargs: dict,
|
|
634
755
|
) -> list[EEGDashBaseDataset]:
|
|
635
756
|
"""Helper method to find datasets in the MongoDB collection that satisfy the
|
|
636
757
|
given query and return them as a list of EEGDashBaseDataset objects.
|
|
@@ -652,7 +773,10 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
652
773
|
|
|
653
774
|
"""
|
|
654
775
|
datasets: list[EEGDashBaseDataset] = []
|
|
655
|
-
|
|
776
|
+
|
|
777
|
+
self.records = self.eeg_dash.find(query, **query_kwargs)
|
|
778
|
+
|
|
779
|
+
for record in self.records:
|
|
656
780
|
description = {}
|
|
657
781
|
for field in description_fields:
|
|
658
782
|
value = self.find_key_in_nested_dict(record, field)
|
|
@@ -664,15 +788,15 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
664
788
|
self.cache_dir,
|
|
665
789
|
self.s3_bucket,
|
|
666
790
|
description=description,
|
|
667
|
-
**
|
|
791
|
+
**base_dataset_kwargs,
|
|
668
792
|
)
|
|
669
793
|
)
|
|
670
794
|
return datasets
|
|
671
795
|
|
|
672
796
|
def load_bids_dataset(
|
|
673
797
|
self,
|
|
674
|
-
dataset,
|
|
675
|
-
data_dir,
|
|
798
|
+
dataset: str,
|
|
799
|
+
data_dir: str | Path,
|
|
676
800
|
description_fields: list[str],
|
|
677
801
|
s3_bucket: str | None = None,
|
|
678
802
|
**kwargs,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.5.dev80
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
6
6
|
License-Expression: GPL-3.0-only
|
|
@@ -38,6 +38,7 @@ Requires-Dist: tqdm
|
|
|
38
38
|
Requires-Dist: xarray
|
|
39
39
|
Requires-Dist: h5io>=0.2.4
|
|
40
40
|
Requires-Dist: pymatreader
|
|
41
|
+
Requires-Dist: eeglabio
|
|
41
42
|
Requires-Dist: tabulate
|
|
42
43
|
Provides-Extra: tests
|
|
43
44
|
Requires-Dist: pytest; extra == "tests"
|
|
@@ -6,6 +6,7 @@ docs/Makefile
|
|
|
6
6
|
docs/source/conf.py
|
|
7
7
|
docs/source/dataset_summary.rst
|
|
8
8
|
docs/source/index.rst
|
|
9
|
+
docs/source/overview.rst
|
|
9
10
|
docs/source/install/install.rst
|
|
10
11
|
docs/source/install/install_pip.rst
|
|
11
12
|
docs/source/install/install_source.rst
|
|
@@ -39,10 +40,13 @@ eegdash/features/feature_bank/dimensionality.py
|
|
|
39
40
|
eegdash/features/feature_bank/signal.py
|
|
40
41
|
eegdash/features/feature_bank/spectral.py
|
|
41
42
|
eegdash/features/feature_bank/utils.py
|
|
43
|
+
tests/test_api.py
|
|
42
44
|
tests/test_correctness.py
|
|
43
45
|
tests/test_dataset.py
|
|
44
46
|
tests/test_dataset_registration.py
|
|
45
47
|
tests/test_eegdash.py
|
|
46
48
|
tests/test_functional.py
|
|
47
49
|
tests/test_init.py
|
|
48
|
-
tests/test_mongo_connection.py
|
|
50
|
+
tests/test_mongo_connection.py
|
|
51
|
+
tests/test_offline.py
|
|
52
|
+
tests/test_query.py
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import mne
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pytest
|
|
4
|
+
import xarray as xr
|
|
5
|
+
from mne_bids import BIDSPath, write_raw_bids
|
|
6
|
+
|
|
7
|
+
from eegdash.api import EEGDash
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# Fixture to create a dummy BIDS dataset for testing
|
|
11
|
+
@pytest.fixture(scope="module")
|
|
12
|
+
def dummy_bids_dataset(tmpdir_factory):
|
|
13
|
+
bids_root = tmpdir_factory.mktemp("bids")
|
|
14
|
+
# Create a simple MNE Raw object
|
|
15
|
+
ch_names = ["EEG 001", "EEG 002", "EEG 003"]
|
|
16
|
+
ch_types = ["eeg"] * 3
|
|
17
|
+
sfreq = 100
|
|
18
|
+
n_times = 100
|
|
19
|
+
data = np.random.randn(len(ch_names), n_times)
|
|
20
|
+
info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
|
|
21
|
+
raw = mne.io.RawArray(data, info)
|
|
22
|
+
|
|
23
|
+
# Define BIDS path
|
|
24
|
+
subject_id = "01"
|
|
25
|
+
session_id = "01"
|
|
26
|
+
task_name = "test"
|
|
27
|
+
run_id = "01"
|
|
28
|
+
bids_path = BIDSPath(
|
|
29
|
+
subject=subject_id,
|
|
30
|
+
session=session_id,
|
|
31
|
+
task=task_name,
|
|
32
|
+
run=run_id,
|
|
33
|
+
root=bids_root,
|
|
34
|
+
datatype="eeg",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Write BIDS data
|
|
38
|
+
write_raw_bids(raw, bids_path, overwrite=True, format="EEGLAB", allow_preload=True)
|
|
39
|
+
|
|
40
|
+
return str(bids_path.fpath)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_load_eeg_data_from_bids_file(dummy_bids_dataset):
|
|
44
|
+
eegdash = EEGDash()
|
|
45
|
+
data = eegdash.load_eeg_data_from_bids_file(dummy_bids_dataset)
|
|
46
|
+
assert isinstance(data, xr.DataArray)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_load_eeg_data_from_bids_file_content(dummy_bids_dataset):
|
|
50
|
+
eegdash = EEGDash()
|
|
51
|
+
data = eegdash.load_eeg_data_from_bids_file(dummy_bids_dataset)
|
|
52
|
+
|
|
53
|
+
# Check dimensions
|
|
54
|
+
assert data.dims == ("channel", "time")
|
|
55
|
+
|
|
56
|
+
# Check shape
|
|
57
|
+
assert data.shape == (3, 100)
|
|
58
|
+
|
|
59
|
+
# Check channel names
|
|
60
|
+
assert list(data.channel.values) == ["EEG 001", "EEG 002", "EEG 003"]
|
|
61
|
+
|
|
62
|
+
# Check time values
|
|
63
|
+
assert len(data.time.values) == 100
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from eegdash import EEGDash, EEGDashDataset
|
|
6
|
+
|
|
7
|
+
CACHE_DIR = (Path.home() / "mne_data" / "eeg_challenge_cache").resolve()
|
|
8
|
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_dataset_loads_without_eegdash(monkeypatch):
|
|
12
|
+
"""Dataset should load from records without contacting network resources."""
|
|
13
|
+
eeg_dash = EEGDash()
|
|
14
|
+
|
|
15
|
+
records = eeg_dash.find(subject="NDARAC350XUM", task="RestingState")
|
|
16
|
+
|
|
17
|
+
# test with internet
|
|
18
|
+
dataset_internet = EEGDashDataset(
|
|
19
|
+
query=dict(task="RestingState", subject="NDARAC350XUM", dataset="ds005509"),
|
|
20
|
+
cache_dir=CACHE_DIR,
|
|
21
|
+
eeg_dash_instance=eeg_dash,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Monkeypatch any network calls inside EEGDashDataset to raise if called
|
|
25
|
+
monkeypatch.setattr(
|
|
26
|
+
EEGDashDataset,
|
|
27
|
+
"find_datasets",
|
|
28
|
+
lambda *args, **kwargs: pytest.skip(
|
|
29
|
+
"Skipping network download in offline test"
|
|
30
|
+
),
|
|
31
|
+
)
|
|
32
|
+
monkeypatch.setattr(
|
|
33
|
+
EEGDashDataset,
|
|
34
|
+
"find_datasets",
|
|
35
|
+
lambda *args, **kwargs: pytest.skip(
|
|
36
|
+
"Skipping network download in offline test"
|
|
37
|
+
),
|
|
38
|
+
)
|
|
39
|
+
# TO-DO: discover way to do this pytest
|
|
40
|
+
|
|
41
|
+
dataset_without_internet = EEGDashDataset(
|
|
42
|
+
records=records, cache_dir=CACHE_DIR, eeg_dash_instance=None
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
assert dataset_internet.datasets[0].raw == dataset_without_internet.datasets[0].raw
|
|
46
|
+
assert (
|
|
47
|
+
dataset_internet.datasets[0].record
|
|
48
|
+
== dataset_without_internet.datasets[0].record
|
|
49
|
+
)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from unittest.mock import MagicMock, patch
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from eegdash import EEGDash
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Mock the MongoConnectionManager to prevent actual DB connections during tests
|
|
9
|
+
@pytest.fixture(autouse=True)
|
|
10
|
+
def mock_mongo_connection():
|
|
11
|
+
"""Automatically mocks the MongoDB connection for all tests."""
|
|
12
|
+
with patch("eegdash.mongodb.MongoConnectionManager.get_client") as mock_get_client:
|
|
13
|
+
mock_collection = MagicMock()
|
|
14
|
+
mock_db = MagicMock()
|
|
15
|
+
mock_client = MagicMock()
|
|
16
|
+
mock_get_client.return_value = (mock_client, mock_db, mock_collection)
|
|
17
|
+
yield mock_collection
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pytest.fixture
|
|
21
|
+
def eegdash_instance(mock_mongo_connection):
|
|
22
|
+
"""Provides a clean instance of EEGDash for each test."""
|
|
23
|
+
return EEGDash(is_public=True)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_build_query_with_single_values(eegdash_instance):
|
|
27
|
+
"""Test 1: Validates that the query builder correctly handles simple
|
|
28
|
+
key-value pairs.
|
|
29
|
+
"""
|
|
30
|
+
kwargs = {"dataset": "ds001", "subject": "sub-01"}
|
|
31
|
+
expected_query = {"dataset": "ds001", "subject": "sub-01"}
|
|
32
|
+
|
|
33
|
+
# _build_query_from_kwargs is a protected method, but we test it
|
|
34
|
+
# to ensure the core logic is sound.
|
|
35
|
+
query = eegdash_instance._build_query_from_kwargs(**kwargs)
|
|
36
|
+
|
|
37
|
+
assert query == expected_query
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_build_query_with_list_value(eegdash_instance):
|
|
41
|
+
"""Test 2: Validates that the query builder correctly translates a list
|
|
42
|
+
of values into a MongoDB `$in` operator.
|
|
43
|
+
"""
|
|
44
|
+
kwargs = {"dataset": "ds002", "subject": ["sub-01", "sub-02", "sub-03"]}
|
|
45
|
+
expected_query = {
|
|
46
|
+
"dataset": "ds002",
|
|
47
|
+
"subject": {"$in": ["sub-01", "sub-02", "sub-03"]},
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
query = eegdash_instance._build_query_from_kwargs(**kwargs)
|
|
51
|
+
|
|
52
|
+
assert query == expected_query
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_build_query_with_invalid_field(eegdash_instance):
|
|
56
|
+
"""Test 3: Ensures the query builder raises a ValueError when an unsupported
|
|
57
|
+
query field is provided.
|
|
58
|
+
"""
|
|
59
|
+
kwargs = {"dataset": "ds003", "invalid_field": "some_value"}
|
|
60
|
+
|
|
61
|
+
with pytest.raises(
|
|
62
|
+
ValueError, match="Unsupported query field\\(s\\): invalid_field"
|
|
63
|
+
):
|
|
64
|
+
eegdash_instance._build_query_from_kwargs(**kwargs)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_find_method_with_kwargs(eegdash_instance, mock_mongo_connection):
|
|
68
|
+
"""Test 4: Verifies that the `find` method correctly uses the query builder
|
|
69
|
+
and calls the underlying database collection with the constructed query.
|
|
70
|
+
"""
|
|
71
|
+
# Mock the return value of the collection's find method
|
|
72
|
+
mock_mongo_connection.find.return_value = [{"_id": "123", "dataset": "ds004"}]
|
|
73
|
+
|
|
74
|
+
# Call the method with user-friendly kwargs
|
|
75
|
+
results = eegdash_instance.find(dataset="ds004", subject=["sub-05", "sub-06"])
|
|
76
|
+
|
|
77
|
+
# Define the query we expect to be built and passed to the DB
|
|
78
|
+
expected_db_query = {"dataset": "ds004", "subject": {"$in": ["sub-05", "sub-06"]}}
|
|
79
|
+
|
|
80
|
+
# Assert that the collection's find method was called once with the correct query
|
|
81
|
+
mock_mongo_connection.find.assert_called_once_with(expected_db_query)
|
|
82
|
+
|
|
83
|
+
# Assert that the method returned the mocked data
|
|
84
|
+
assert len(results) == 1
|
|
85
|
+
assert results[0]["dataset"] == "ds004"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|