eegdash 0.3.9.dev170082126__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/__init__.py +12 -1
- eegdash/api.py +297 -295
- eegdash/bids_eeg_metadata.py +297 -56
- eegdash/const.py +43 -0
- eegdash/data_utils.py +327 -430
- eegdash/dataset/__init__.py +19 -1
- eegdash/dataset/dataset.py +61 -33
- eegdash/dataset/dataset_summary.csv +255 -256
- eegdash/dataset/registry.py +163 -11
- eegdash/downloader.py +197 -0
- eegdash/features/datasets.py +323 -138
- eegdash/features/decorators.py +88 -3
- eegdash/features/extractors.py +203 -55
- eegdash/features/feature_bank/complexity.py +7 -3
- eegdash/features/feature_bank/dimensionality.py +1 -1
- eegdash/features/inspect.py +80 -5
- eegdash/features/serialization.py +49 -17
- eegdash/features/utils.py +75 -8
- eegdash/hbn/__init__.py +11 -0
- eegdash/hbn/preprocessing.py +61 -19
- eegdash/hbn/windows.py +157 -34
- eegdash/logging.py +54 -0
- eegdash/mongodb.py +55 -24
- eegdash/paths.py +28 -5
- eegdash/utils.py +29 -1
- {eegdash-0.3.9.dev170082126.dist-info → eegdash-0.4.0.dist-info}/METADATA +11 -59
- eegdash-0.4.0.dist-info/RECORD +37 -0
- eegdash-0.3.9.dev170082126.dist-info/RECORD +0 -35
- {eegdash-0.3.9.dev170082126.dist-info → eegdash-0.4.0.dist-info}/WHEEL +0 -0
- {eegdash-0.3.9.dev170082126.dist-info → eegdash-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {eegdash-0.3.9.dev170082126.dist-info → eegdash-0.4.0.dist-info}/top_level.txt +0 -0
eegdash/api.py
CHANGED
|
@@ -1,23 +1,31 @@
|
|
|
1
|
-
|
|
1
|
+
# Authors: The EEGDash contributors.
|
|
2
|
+
# License: GNU General Public License
|
|
3
|
+
# Copyright the EEGDash contributors.
|
|
4
|
+
|
|
5
|
+
"""High-level interface to the EEGDash metadata database.
|
|
6
|
+
|
|
7
|
+
This module provides the main EEGDash class which serves as the primary entry point for
|
|
8
|
+
interacting with the EEGDash ecosystem. It offers methods to query, insert, and update
|
|
9
|
+
metadata records stored in the EEGDash MongoDB database, and includes utilities to load
|
|
10
|
+
EEG data from S3 for matched records.
|
|
11
|
+
"""
|
|
12
|
+
|
|
2
13
|
import os
|
|
3
|
-
import tempfile
|
|
4
14
|
from pathlib import Path
|
|
5
15
|
from typing import Any, Mapping
|
|
6
|
-
from urllib.parse import urlsplit
|
|
7
16
|
|
|
8
17
|
import mne
|
|
9
|
-
import numpy as np
|
|
10
|
-
import xarray as xr
|
|
11
18
|
from docstring_inheritance import NumpyDocstringInheritanceInitMeta
|
|
12
19
|
from dotenv import load_dotenv
|
|
13
|
-
from
|
|
14
|
-
from mne.utils import warn
|
|
15
|
-
from mne_bids import find_matching_paths, get_bids_path_from_fname, read_raw_bids
|
|
20
|
+
from mne_bids import find_matching_paths
|
|
16
21
|
from pymongo import InsertOne, UpdateOne
|
|
17
|
-
from
|
|
22
|
+
from rich.console import Console
|
|
23
|
+
from rich.panel import Panel
|
|
24
|
+
from rich.text import Text
|
|
18
25
|
|
|
19
26
|
from braindecode.datasets import BaseConcatDataset
|
|
20
27
|
|
|
28
|
+
from . import downloader
|
|
21
29
|
from .bids_eeg_metadata import (
|
|
22
30
|
build_query_from_kwargs,
|
|
23
31
|
load_eeg_attrs_from_bids_file,
|
|
@@ -33,10 +41,10 @@ from .data_utils import (
|
|
|
33
41
|
EEGBIDSDataset,
|
|
34
42
|
EEGDashBaseDataset,
|
|
35
43
|
)
|
|
44
|
+
from .logging import logger
|
|
36
45
|
from .mongodb import MongoConnectionManager
|
|
37
46
|
from .paths import get_default_cache_dir
|
|
38
|
-
|
|
39
|
-
logger = logging.getLogger("eegdash")
|
|
47
|
+
from .utils import _init_mongo_client
|
|
40
48
|
|
|
41
49
|
|
|
42
50
|
class EEGDash:
|
|
@@ -74,19 +82,26 @@ class EEGDash:
|
|
|
74
82
|
|
|
75
83
|
if self.is_public:
|
|
76
84
|
DB_CONNECTION_STRING = mne.utils.get_config("EEGDASH_DB_URI")
|
|
85
|
+
if not DB_CONNECTION_STRING:
|
|
86
|
+
try:
|
|
87
|
+
_init_mongo_client()
|
|
88
|
+
DB_CONNECTION_STRING = mne.utils.get_config("EEGDASH_DB_URI")
|
|
89
|
+
except Exception:
|
|
90
|
+
DB_CONNECTION_STRING = None
|
|
77
91
|
else:
|
|
78
92
|
load_dotenv()
|
|
79
93
|
DB_CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING")
|
|
80
94
|
|
|
81
95
|
# Use singleton to get MongoDB client, database, and collection
|
|
96
|
+
if not DB_CONNECTION_STRING:
|
|
97
|
+
raise RuntimeError(
|
|
98
|
+
"No MongoDB connection string configured. Set MNE config 'EEGDASH_DB_URI' "
|
|
99
|
+
"or environment variable 'DB_CONNECTION_STRING'."
|
|
100
|
+
)
|
|
82
101
|
self.__client, self.__db, self.__collection = MongoConnectionManager.get_client(
|
|
83
102
|
DB_CONNECTION_STRING, is_staging
|
|
84
103
|
)
|
|
85
104
|
|
|
86
|
-
self.filesystem = S3FileSystem(
|
|
87
|
-
anon=True, client_kwargs={"region_name": "us-east-2"}
|
|
88
|
-
)
|
|
89
|
-
|
|
90
105
|
def find(
|
|
91
106
|
self, query: dict[str, Any] = None, /, **kwargs
|
|
92
107
|
) -> list[Mapping[str, Any]]:
|
|
@@ -197,17 +212,22 @@ class EEGDash:
|
|
|
197
212
|
return doc is not None
|
|
198
213
|
|
|
199
214
|
def _validate_input(self, record: dict[str, Any]) -> dict[str, Any]:
|
|
200
|
-
"""
|
|
215
|
+
"""Validate the input record against the expected schema.
|
|
201
216
|
|
|
202
217
|
Parameters
|
|
203
218
|
----------
|
|
204
|
-
record: dict
|
|
219
|
+
record : dict
|
|
205
220
|
A dictionary representing the EEG data record to be validated.
|
|
206
221
|
|
|
207
222
|
Returns
|
|
208
223
|
-------
|
|
209
|
-
dict
|
|
210
|
-
|
|
224
|
+
dict
|
|
225
|
+
The record itself on success.
|
|
226
|
+
|
|
227
|
+
Raises
|
|
228
|
+
------
|
|
229
|
+
ValueError
|
|
230
|
+
If the record is missing required keys or has values of the wrong type.
|
|
211
231
|
|
|
212
232
|
"""
|
|
213
233
|
input_types = {
|
|
@@ -237,20 +257,44 @@ class EEGDash:
|
|
|
237
257
|
return record
|
|
238
258
|
|
|
239
259
|
def _build_query_from_kwargs(self, **kwargs) -> dict[str, Any]:
|
|
240
|
-
"""
|
|
260
|
+
"""Build a validated MongoDB query from keyword arguments.
|
|
261
|
+
|
|
262
|
+
This delegates to the module-level builder used across the package.
|
|
263
|
+
|
|
264
|
+
Parameters
|
|
265
|
+
----------
|
|
266
|
+
**kwargs
|
|
267
|
+
Keyword arguments to convert into a MongoDB query.
|
|
268
|
+
|
|
269
|
+
Returns
|
|
270
|
+
-------
|
|
271
|
+
dict
|
|
272
|
+
A MongoDB query dictionary.
|
|
241
273
|
|
|
242
|
-
This delegates to the module-level builder used across the package and
|
|
243
|
-
is exposed here for testing and convenience.
|
|
244
274
|
"""
|
|
245
275
|
return build_query_from_kwargs(**kwargs)
|
|
246
276
|
|
|
247
|
-
|
|
248
|
-
|
|
277
|
+
def _extract_simple_constraint(
|
|
278
|
+
self, query: dict[str, Any], key: str
|
|
279
|
+
) -> tuple[str, Any] | None:
|
|
249
280
|
"""Extract a simple constraint for a given key from a query dict.
|
|
250
281
|
|
|
251
|
-
Supports
|
|
252
|
-
|
|
253
|
-
|
|
282
|
+
Supports top-level equality (e.g., ``{'subject': '01'}``) and ``$in``
|
|
283
|
+
(e.g., ``{'subject': {'$in': ['01', '02']}}``) constraints.
|
|
284
|
+
|
|
285
|
+
Parameters
|
|
286
|
+
----------
|
|
287
|
+
query : dict
|
|
288
|
+
The MongoDB query dictionary.
|
|
289
|
+
key : str
|
|
290
|
+
The key for which to extract the constraint.
|
|
291
|
+
|
|
292
|
+
Returns
|
|
293
|
+
-------
|
|
294
|
+
tuple or None
|
|
295
|
+
A tuple of (kind, value) where kind is "eq" or "in", or None if the
|
|
296
|
+
constraint is not present or unsupported.
|
|
297
|
+
|
|
254
298
|
"""
|
|
255
299
|
if not isinstance(query, dict) or key not in query:
|
|
256
300
|
return None
|
|
@@ -260,16 +304,28 @@ class EEGDash:
|
|
|
260
304
|
return ("in", list(val["$in"]))
|
|
261
305
|
return None # unsupported operator shape for conflict checking
|
|
262
306
|
else:
|
|
263
|
-
return
|
|
307
|
+
return "eq", val
|
|
264
308
|
|
|
265
309
|
def _raise_if_conflicting_constraints(
|
|
266
310
|
self, raw_query: dict[str, Any], kwargs_query: dict[str, Any]
|
|
267
311
|
) -> None:
|
|
268
|
-
"""Raise ValueError if
|
|
312
|
+
"""Raise ValueError if query sources have incompatible constraints.
|
|
313
|
+
|
|
314
|
+
Checks for mutually exclusive constraints on the same field to avoid
|
|
315
|
+
silent empty results.
|
|
316
|
+
|
|
317
|
+
Parameters
|
|
318
|
+
----------
|
|
319
|
+
raw_query : dict
|
|
320
|
+
The raw MongoDB query dictionary.
|
|
321
|
+
kwargs_query : dict
|
|
322
|
+
The query dictionary built from keyword arguments.
|
|
323
|
+
|
|
324
|
+
Raises
|
|
325
|
+
------
|
|
326
|
+
ValueError
|
|
327
|
+
If conflicting constraints are found.
|
|
269
328
|
|
|
270
|
-
We conservatively check only top-level fields with simple equality or $in
|
|
271
|
-
constraints. If a field appears in both queries and constraints are mutually
|
|
272
|
-
exclusive, raise an explicit error to avoid silent empty result sets.
|
|
273
329
|
"""
|
|
274
330
|
if not raw_query or not kwargs_query:
|
|
275
331
|
return
|
|
@@ -310,115 +366,6 @@ class EEGDash:
|
|
|
310
366
|
f"Conflicting constraints for '{key}': disjoint sets {r_val!r} and {k_val!r}"
|
|
311
367
|
)
|
|
312
368
|
|
|
313
|
-
def load_eeg_data_from_s3(self, s3path: str) -> xr.DataArray:
|
|
314
|
-
"""Load EEG data from an S3 URI into an ``xarray.DataArray``.
|
|
315
|
-
|
|
316
|
-
Preserves the original filename, downloads sidecar files when applicable
|
|
317
|
-
(e.g., ``.fdt`` for EEGLAB, ``.vmrk``/``.eeg`` for BrainVision), and uses
|
|
318
|
-
MNE's direct readers.
|
|
319
|
-
|
|
320
|
-
Parameters
|
|
321
|
-
----------
|
|
322
|
-
s3path : str
|
|
323
|
-
An S3 URI (should start with "s3://").
|
|
324
|
-
|
|
325
|
-
Returns
|
|
326
|
-
-------
|
|
327
|
-
xr.DataArray
|
|
328
|
-
EEG data with dimensions ``("channel", "time")``.
|
|
329
|
-
|
|
330
|
-
Raises
|
|
331
|
-
------
|
|
332
|
-
ValueError
|
|
333
|
-
If the file extension is unsupported.
|
|
334
|
-
|
|
335
|
-
"""
|
|
336
|
-
# choose a temp dir so sidecars can be colocated
|
|
337
|
-
with tempfile.TemporaryDirectory() as tmpdir:
|
|
338
|
-
# Derive local filenames from the S3 key to keep base name consistent
|
|
339
|
-
s3_key = urlsplit(s3path).path # e.g., "/dsXXXX/sub-.../..._eeg.set"
|
|
340
|
-
basename = Path(s3_key).name
|
|
341
|
-
ext = Path(basename).suffix.lower()
|
|
342
|
-
local_main = Path(tmpdir) / basename
|
|
343
|
-
|
|
344
|
-
# Download main file
|
|
345
|
-
with (
|
|
346
|
-
self.filesystem.open(s3path, mode="rb") as fsrc,
|
|
347
|
-
open(local_main, "wb") as fdst,
|
|
348
|
-
):
|
|
349
|
-
fdst.write(fsrc.read())
|
|
350
|
-
|
|
351
|
-
# Determine and fetch any required sidecars
|
|
352
|
-
sidecars: list[str] = []
|
|
353
|
-
if ext == ".set": # EEGLAB
|
|
354
|
-
sidecars = [".fdt"]
|
|
355
|
-
elif ext == ".vhdr": # BrainVision
|
|
356
|
-
sidecars = [".vmrk", ".eeg", ".dat", ".raw"]
|
|
357
|
-
|
|
358
|
-
for sc_ext in sidecars:
|
|
359
|
-
sc_key = s3_key[: -len(ext)] + sc_ext
|
|
360
|
-
sc_uri = f"s3://{urlsplit(s3path).netloc}{sc_key}"
|
|
361
|
-
try:
|
|
362
|
-
# If sidecar exists, download next to the main file
|
|
363
|
-
info = self.filesystem.info(sc_uri)
|
|
364
|
-
if info:
|
|
365
|
-
sc_local = Path(tmpdir) / Path(sc_key).name
|
|
366
|
-
with (
|
|
367
|
-
self.filesystem.open(sc_uri, mode="rb") as fsrc,
|
|
368
|
-
open(sc_local, "wb") as fdst,
|
|
369
|
-
):
|
|
370
|
-
fdst.write(fsrc.read())
|
|
371
|
-
except Exception:
|
|
372
|
-
# Sidecar not present; skip silently
|
|
373
|
-
pass
|
|
374
|
-
|
|
375
|
-
# Read using appropriate MNE reader
|
|
376
|
-
raw = mne.io.read_raw(str(local_main), preload=True, verbose=False)
|
|
377
|
-
|
|
378
|
-
data = raw.get_data()
|
|
379
|
-
fs = raw.info["sfreq"]
|
|
380
|
-
max_time = data.shape[1] / fs
|
|
381
|
-
time_steps = np.linspace(0, max_time, data.shape[1]).squeeze()
|
|
382
|
-
channel_names = raw.ch_names
|
|
383
|
-
|
|
384
|
-
return xr.DataArray(
|
|
385
|
-
data=data,
|
|
386
|
-
dims=["channel", "time"],
|
|
387
|
-
coords={"time": time_steps, "channel": channel_names},
|
|
388
|
-
)
|
|
389
|
-
|
|
390
|
-
def load_eeg_data_from_bids_file(self, bids_file: str) -> xr.DataArray:
|
|
391
|
-
"""Load EEG data from a local BIDS-formatted file.
|
|
392
|
-
|
|
393
|
-
Parameters
|
|
394
|
-
----------
|
|
395
|
-
bids_file : str
|
|
396
|
-
Path to a BIDS-compliant EEG file (e.g., ``*_eeg.edf``, ``*_eeg.bdf``,
|
|
397
|
-
``*_eeg.vhdr``, ``*_eeg.set``).
|
|
398
|
-
|
|
399
|
-
Returns
|
|
400
|
-
-------
|
|
401
|
-
xr.DataArray
|
|
402
|
-
EEG data with dimensions ``("channel", "time")``.
|
|
403
|
-
|
|
404
|
-
"""
|
|
405
|
-
bids_path = get_bids_path_from_fname(bids_file, verbose=False)
|
|
406
|
-
raw_object = read_raw_bids(bids_path=bids_path, verbose=False)
|
|
407
|
-
eeg_data = raw_object.get_data()
|
|
408
|
-
|
|
409
|
-
fs = raw_object.info["sfreq"]
|
|
410
|
-
max_time = eeg_data.shape[1] / fs
|
|
411
|
-
time_steps = np.linspace(0, max_time, eeg_data.shape[1]).squeeze() # in seconds
|
|
412
|
-
|
|
413
|
-
channel_names = raw_object.ch_names
|
|
414
|
-
|
|
415
|
-
eeg_xarray = xr.DataArray(
|
|
416
|
-
data=eeg_data,
|
|
417
|
-
dims=["channel", "time"],
|
|
418
|
-
coords={"time": time_steps, "channel": channel_names},
|
|
419
|
-
)
|
|
420
|
-
return eeg_xarray
|
|
421
|
-
|
|
422
369
|
def add_bids_dataset(
|
|
423
370
|
self, dataset: str, data_dir: str, overwrite: bool = True
|
|
424
371
|
) -> None:
|
|
@@ -482,84 +429,59 @@ class EEGDash:
|
|
|
482
429
|
logger.info("Upserted: %s", result.upserted_count)
|
|
483
430
|
logger.info("Errors: %s ", result.bulk_api_result.get("writeErrors", []))
|
|
484
431
|
|
|
485
|
-
def
|
|
486
|
-
"""
|
|
432
|
+
def _add_request(self, record: dict) -> InsertOne:
|
|
433
|
+
"""Create a MongoDB insertion request for a record.
|
|
487
434
|
|
|
488
435
|
Parameters
|
|
489
436
|
----------
|
|
490
|
-
|
|
491
|
-
|
|
437
|
+
record : dict
|
|
438
|
+
The record to insert.
|
|
492
439
|
|
|
493
440
|
Returns
|
|
494
441
|
-------
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
Notes
|
|
499
|
-
-----
|
|
500
|
-
Retrieval runs in parallel. Downloaded files are read and discarded
|
|
501
|
-
(no on-disk caching here).
|
|
442
|
+
InsertOne
|
|
443
|
+
A PyMongo ``InsertOne`` object.
|
|
502
444
|
|
|
503
445
|
"""
|
|
504
|
-
|
|
505
|
-
results = []
|
|
506
|
-
if sessions:
|
|
507
|
-
logger.info("Found %s records", len(sessions))
|
|
508
|
-
results = Parallel(
|
|
509
|
-
n_jobs=-1 if len(sessions) > 1 else 1, prefer="threads", verbose=1
|
|
510
|
-
)(
|
|
511
|
-
delayed(self.load_eeg_data_from_s3)(self._get_s3path(session))
|
|
512
|
-
for session in sessions
|
|
513
|
-
)
|
|
514
|
-
return results
|
|
446
|
+
return InsertOne(record)
|
|
515
447
|
|
|
516
|
-
def
|
|
517
|
-
"""
|
|
448
|
+
def add(self, record: dict) -> None:
|
|
449
|
+
"""Add a single record to the MongoDB collection.
|
|
518
450
|
|
|
519
451
|
Parameters
|
|
520
452
|
----------
|
|
521
|
-
record : dict
|
|
522
|
-
|
|
523
|
-
path string under the OpenNeuro bucket.
|
|
524
|
-
|
|
525
|
-
Returns
|
|
526
|
-
-------
|
|
527
|
-
str
|
|
528
|
-
Fully qualified S3 URI.
|
|
529
|
-
|
|
530
|
-
Raises
|
|
531
|
-
------
|
|
532
|
-
ValueError
|
|
533
|
-
If a mapping is provided but ``'bidspath'`` is missing.
|
|
453
|
+
record : dict
|
|
454
|
+
The record to add.
|
|
534
455
|
|
|
535
456
|
"""
|
|
536
|
-
if isinstance(record, str):
|
|
537
|
-
rel = record
|
|
538
|
-
else:
|
|
539
|
-
rel = record.get("bidspath")
|
|
540
|
-
if not rel:
|
|
541
|
-
raise ValueError("Record missing 'bidspath' for S3 path resolution")
|
|
542
|
-
return f"s3://openneuro.org/{rel}"
|
|
543
|
-
|
|
544
|
-
def _add_request(self, record: dict):
|
|
545
|
-
"""Internal helper method to create a MongoDB insertion request for a record."""
|
|
546
|
-
return InsertOne(record)
|
|
547
|
-
|
|
548
|
-
def add(self, record: dict):
|
|
549
|
-
"""Add a single record to the MongoDB collection."""
|
|
550
457
|
try:
|
|
551
458
|
self.__collection.insert_one(record)
|
|
552
459
|
except ValueError as e:
|
|
553
460
|
logger.error("Validation error for record: %s ", record["data_name"])
|
|
554
461
|
logger.error(e)
|
|
555
|
-
except:
|
|
556
|
-
logger.error(
|
|
462
|
+
except Exception as exc:
|
|
463
|
+
logger.error(
|
|
464
|
+
"Error adding record: %s ", record.get("data_name", "<unknown>")
|
|
465
|
+
)
|
|
466
|
+
logger.debug("Add operation failed", exc_info=exc)
|
|
467
|
+
|
|
468
|
+
def _update_request(self, record: dict) -> UpdateOne:
|
|
469
|
+
"""Create a MongoDB update request for a record.
|
|
557
470
|
|
|
558
|
-
|
|
559
|
-
|
|
471
|
+
Parameters
|
|
472
|
+
----------
|
|
473
|
+
record : dict
|
|
474
|
+
The record to update.
|
|
475
|
+
|
|
476
|
+
Returns
|
|
477
|
+
-------
|
|
478
|
+
UpdateOne
|
|
479
|
+
A PyMongo ``UpdateOne`` object.
|
|
480
|
+
|
|
481
|
+
"""
|
|
560
482
|
return UpdateOne({"data_name": record["data_name"]}, {"$set": record})
|
|
561
483
|
|
|
562
|
-
def update(self, record: dict):
|
|
484
|
+
def update(self, record: dict) -> None:
|
|
563
485
|
"""Update a single record in the MongoDB collection.
|
|
564
486
|
|
|
565
487
|
Parameters
|
|
@@ -572,62 +494,88 @@ class EEGDash:
|
|
|
572
494
|
self.__collection.update_one(
|
|
573
495
|
{"data_name": record["data_name"]}, {"$set": record}
|
|
574
496
|
)
|
|
575
|
-
except: #
|
|
576
|
-
logger.error(
|
|
497
|
+
except Exception as exc: # log and continue
|
|
498
|
+
logger.error(
|
|
499
|
+
"Error updating record: %s", record.get("data_name", "<unknown>")
|
|
500
|
+
)
|
|
501
|
+
logger.debug("Update operation failed", exc_info=exc)
|
|
577
502
|
|
|
578
503
|
def exists(self, query: dict[str, Any]) -> bool:
|
|
579
|
-
"""
|
|
504
|
+
"""Check if at least one record matches the query.
|
|
505
|
+
|
|
506
|
+
This is an alias for :meth:`exist`.
|
|
507
|
+
|
|
508
|
+
Parameters
|
|
509
|
+
----------
|
|
510
|
+
query : dict
|
|
511
|
+
MongoDB query to check for existence.
|
|
512
|
+
|
|
513
|
+
Returns
|
|
514
|
+
-------
|
|
515
|
+
bool
|
|
516
|
+
True if a matching record exists, False otherwise.
|
|
517
|
+
|
|
518
|
+
"""
|
|
580
519
|
return self.exist(query)
|
|
581
520
|
|
|
582
|
-
def remove_field(self, record, field):
|
|
583
|
-
"""Remove a
|
|
521
|
+
def remove_field(self, record: dict, field: str) -> None:
|
|
522
|
+
"""Remove a field from a specific record in the MongoDB collection.
|
|
584
523
|
|
|
585
524
|
Parameters
|
|
586
525
|
----------
|
|
587
526
|
record : dict
|
|
588
|
-
Record
|
|
527
|
+
Record-identifying object with a ``data_name`` key.
|
|
589
528
|
field : str
|
|
590
|
-
|
|
529
|
+
The name of the field to remove.
|
|
591
530
|
|
|
592
531
|
"""
|
|
593
532
|
self.__collection.update_one(
|
|
594
533
|
{"data_name": record["data_name"]}, {"$unset": {field: 1}}
|
|
595
534
|
)
|
|
596
535
|
|
|
597
|
-
def remove_field_from_db(self, field):
|
|
598
|
-
"""Remove a field from all records
|
|
536
|
+
def remove_field_from_db(self, field: str) -> None:
|
|
537
|
+
"""Remove a field from all records in the database.
|
|
538
|
+
|
|
539
|
+
.. warning::
|
|
540
|
+
This is a destructive operation and cannot be undone.
|
|
599
541
|
|
|
600
542
|
Parameters
|
|
601
543
|
----------
|
|
602
544
|
field : str
|
|
603
|
-
|
|
545
|
+
The name of the field to remove from all documents.
|
|
604
546
|
|
|
605
547
|
"""
|
|
606
548
|
self.__collection.update_many({}, {"$unset": {field: 1}})
|
|
607
549
|
|
|
608
550
|
@property
|
|
609
551
|
def collection(self):
|
|
610
|
-
"""
|
|
611
|
-
return self.__collection
|
|
552
|
+
"""The underlying PyMongo ``Collection`` object.
|
|
612
553
|
|
|
613
|
-
|
|
614
|
-
|
|
554
|
+
Returns
|
|
555
|
+
-------
|
|
556
|
+
pymongo.collection.Collection
|
|
557
|
+
The collection object used for database interactions.
|
|
615
558
|
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
559
|
+
"""
|
|
560
|
+
return self.__collection
|
|
561
|
+
|
|
562
|
+
def close(self) -> None:
|
|
563
|
+
"""Close the MongoDB connection.
|
|
620
564
|
|
|
565
|
+
.. deprecated:: 0.1
|
|
566
|
+
Connections are now managed globally by :class:`MongoConnectionManager`.
|
|
567
|
+
This method is a no-op and will be removed in a future version.
|
|
568
|
+
Use :meth:`EEGDash.close_all_connections` to close all clients.
|
|
621
569
|
"""
|
|
622
570
|
# Individual instances no longer close the shared client
|
|
623
571
|
pass
|
|
624
572
|
|
|
625
573
|
@classmethod
|
|
626
|
-
def close_all_connections(cls):
|
|
627
|
-
"""Close all MongoDB client connections managed by the singleton."""
|
|
574
|
+
def close_all_connections(cls) -> None:
|
|
575
|
+
"""Close all MongoDB client connections managed by the singleton manager."""
|
|
628
576
|
MongoConnectionManager.close_all()
|
|
629
577
|
|
|
630
|
-
def __del__(self):
|
|
578
|
+
def __del__(self) -> None:
|
|
631
579
|
"""Destructor; no explicit action needed due to global connection manager."""
|
|
632
580
|
# No longer needed since we're using singleton pattern
|
|
633
581
|
pass
|
|
@@ -640,22 +588,59 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
|
|
|
640
588
|
|
|
641
589
|
Examples
|
|
642
590
|
--------
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
>>>
|
|
652
|
-
|
|
591
|
+
Basic usage with dataset and subject filtering:
|
|
592
|
+
|
|
593
|
+
>>> from eegdash import EEGDashDataset
|
|
594
|
+
>>> dataset = EEGDashDataset(
|
|
595
|
+
... cache_dir="./data",
|
|
596
|
+
... dataset="ds002718",
|
|
597
|
+
... subject="012"
|
|
598
|
+
... )
|
|
599
|
+
>>> print(f"Number of recordings: {len(dataset)}")
|
|
600
|
+
|
|
601
|
+
Filter by multiple subjects and specific task:
|
|
602
|
+
|
|
603
|
+
>>> subjects = ["012", "013", "014"]
|
|
604
|
+
>>> dataset = EEGDashDataset(
|
|
605
|
+
... cache_dir="./data",
|
|
606
|
+
... dataset="ds002718",
|
|
607
|
+
... subject=subjects,
|
|
608
|
+
... task="RestingState"
|
|
609
|
+
... )
|
|
610
|
+
|
|
611
|
+
Load and inspect EEG data from recordings:
|
|
612
|
+
|
|
613
|
+
>>> if len(dataset) > 0:
|
|
614
|
+
... recording = dataset[0]
|
|
615
|
+
... raw = recording.load()
|
|
616
|
+
... print(f"Sampling rate: {raw.info['sfreq']} Hz")
|
|
617
|
+
... print(f"Number of channels: {len(raw.ch_names)}")
|
|
618
|
+
... print(f"Duration: {raw.times[-1]:.1f} seconds")
|
|
619
|
+
|
|
620
|
+
Advanced filtering with raw MongoDB queries:
|
|
621
|
+
|
|
622
|
+
>>> from eegdash import EEGDashDataset
|
|
623
|
+
>>> query = {
|
|
624
|
+
... "dataset": "ds002718",
|
|
625
|
+
... "subject": {"$in": ["012", "013"]},
|
|
626
|
+
... "task": "RestingState"
|
|
627
|
+
... }
|
|
628
|
+
>>> dataset = EEGDashDataset(cache_dir="./data", query=query)
|
|
629
|
+
|
|
630
|
+
Working with dataset collections and braindecode integration:
|
|
631
|
+
|
|
632
|
+
>>> # EEGDashDataset is a braindecode BaseConcatDataset
|
|
633
|
+
>>> for i, recording in enumerate(dataset):
|
|
634
|
+
... if i >= 2: # limit output
|
|
635
|
+
... break
|
|
636
|
+
... print(f"Recording {i}: {recording.description}")
|
|
637
|
+
... raw = recording.load()
|
|
638
|
+
... print(f" Channels: {len(raw.ch_names)}, Duration: {raw.times[-1]:.1f}s")
|
|
653
639
|
|
|
654
640
|
Parameters
|
|
655
641
|
----------
|
|
656
642
|
cache_dir : str | Path
|
|
657
|
-
Directory where data are cached locally.
|
|
658
|
-
cache directory under the user cache is used.
|
|
643
|
+
Directory where data are cached locally.
|
|
659
644
|
query : dict | None
|
|
660
645
|
Raw MongoDB query to filter records. If provided, it is merged with
|
|
661
646
|
keyword filtering arguments (see ``**kwargs``) using logical AND.
|
|
@@ -726,13 +711,21 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
|
|
|
726
711
|
self.records = records
|
|
727
712
|
self.download = download
|
|
728
713
|
self.n_jobs = n_jobs
|
|
729
|
-
self.eeg_dash_instance = eeg_dash_instance
|
|
714
|
+
self.eeg_dash_instance = eeg_dash_instance
|
|
730
715
|
|
|
731
|
-
|
|
732
|
-
self.cache_dir
|
|
716
|
+
self.cache_dir = cache_dir
|
|
717
|
+
if self.cache_dir == "" or self.cache_dir is None:
|
|
718
|
+
self.cache_dir = get_default_cache_dir()
|
|
719
|
+
logger.warning(
|
|
720
|
+
f"Cache directory is empty, using the eegdash default path: {self.cache_dir}"
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
self.cache_dir = Path(self.cache_dir)
|
|
733
724
|
|
|
734
725
|
if not self.cache_dir.exists():
|
|
735
|
-
|
|
726
|
+
logger.warning(
|
|
727
|
+
f"Cache directory does not exist, creating it: {self.cache_dir}"
|
|
728
|
+
)
|
|
736
729
|
self.cache_dir.mkdir(exist_ok=True, parents=True)
|
|
737
730
|
|
|
738
731
|
# Separate query kwargs from other kwargs passed to the BaseDataset constructor
|
|
@@ -772,21 +765,29 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
|
|
|
772
765
|
not _suppress_comp_warning
|
|
773
766
|
and self.query["dataset"] in RELEASE_TO_OPENNEURO_DATASET_MAP.values()
|
|
774
767
|
):
|
|
775
|
-
|
|
776
|
-
"
|
|
777
|
-
"\n
|
|
778
|
-
"
|
|
779
|
-
"
|
|
780
|
-
"
|
|
781
|
-
"
|
|
782
|
-
"
|
|
783
|
-
"
|
|
784
|
-
"
|
|
785
|
-
"If you are participating in the competition, always use `EEGChallengeDataset` to ensure consistency with the challenge data.\n"
|
|
786
|
-
"\n",
|
|
787
|
-
UserWarning,
|
|
788
|
-
module="eegdash",
|
|
768
|
+
message_text = Text.from_markup(
|
|
769
|
+
"[italic]This notice is only for users who are participating in the [link=https://eeg2025.github.io/]EEG 2025 Competition[/link].[/italic]\n\n"
|
|
770
|
+
"[bold]EEG 2025 Competition Data Notice![/bold]\n"
|
|
771
|
+
"You are loading one of the datasets that is used in competition, but via `EEGDashDataset`.\n\n"
|
|
772
|
+
"[bold red]IMPORTANT[/bold red]: \n"
|
|
773
|
+
"If you download data from `EEGDashDataset`, it is [u]NOT[/u] identical to the official \n"
|
|
774
|
+
"competition data, which is accessed via `EEGChallengeDataset`. "
|
|
775
|
+
"The competition data has been downsampled and filtered.\n\n"
|
|
776
|
+
"[bold]If you are participating in the competition, \nyou must use the `EEGChallengeDataset` object to ensure consistency.[/bold] \n\n"
|
|
777
|
+
"If you are not participating in the competition, you can ignore this message."
|
|
789
778
|
)
|
|
779
|
+
warning_panel = Panel(
|
|
780
|
+
message_text,
|
|
781
|
+
title="[yellow]EEG 2025 Competition Data Notice[/yellow]",
|
|
782
|
+
subtitle="[cyan]Source: EEGDashDataset[/cyan]",
|
|
783
|
+
border_style="yellow",
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
try:
|
|
787
|
+
Console().print(warning_panel)
|
|
788
|
+
except Exception:
|
|
789
|
+
logger.warning(str(message_text))
|
|
790
|
+
|
|
790
791
|
if records is not None:
|
|
791
792
|
self.records = records
|
|
792
793
|
datasets = [
|
|
@@ -848,16 +849,15 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
|
|
|
848
849
|
)
|
|
849
850
|
)
|
|
850
851
|
elif self.query:
|
|
851
|
-
|
|
852
|
+
if self.eeg_dash_instance is None:
|
|
853
|
+
self.eeg_dash_instance = EEGDash()
|
|
852
854
|
datasets = self._find_datasets(
|
|
853
855
|
query=build_query_from_kwargs(**self.query),
|
|
854
856
|
description_fields=description_fields,
|
|
855
857
|
base_dataset_kwargs=base_dataset_kwargs,
|
|
856
858
|
)
|
|
857
859
|
# We only need filesystem if we need to access S3
|
|
858
|
-
self.filesystem =
|
|
859
|
-
anon=True, client_kwargs={"region_name": "us-east-2"}
|
|
860
|
-
)
|
|
860
|
+
self.filesystem = downloader.get_s3_filesystem()
|
|
861
861
|
else:
|
|
862
862
|
raise ValueError(
|
|
863
863
|
"You must provide either 'records', a 'data_dir', or a query/keyword arguments for filtering."
|
|
@@ -870,45 +870,30 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
|
|
|
870
870
|
) -> list[dict]:
|
|
871
871
|
"""Discover local BIDS EEG files and build minimal records.
|
|
872
872
|
|
|
873
|
-
|
|
874
|
-
``mne_bids.find_matching_paths`` and applies entity filters to produce
|
|
875
|
-
|
|
876
|
-
|
|
873
|
+
Enumerates EEG recordings under ``dataset_root`` using
|
|
874
|
+
``mne_bids.find_matching_paths`` and applies entity filters to produce
|
|
875
|
+
records suitable for :class:`EEGDashBaseDataset`. No network access is
|
|
876
|
+
performed, and files are not read.
|
|
877
877
|
|
|
878
878
|
Parameters
|
|
879
879
|
----------
|
|
880
880
|
dataset_root : Path
|
|
881
|
-
Local dataset directory
|
|
882
|
-
|
|
883
|
-
``
|
|
884
|
-
|
|
885
|
-
Query filters. Must include ``'dataset'`` with the dataset id (without
|
|
886
|
-
local suffixes). May include BIDS entities ``'subject'``,
|
|
887
|
-
``'session'``, ``'task'``, and ``'run'``. Each value can be a scalar
|
|
888
|
-
or a sequence of scalars.
|
|
881
|
+
Local dataset directory (e.g., ``/path/to/cache/ds005509``).
|
|
882
|
+
filters : dict
|
|
883
|
+
Query filters. Must include ``'dataset'`` and may include BIDS
|
|
884
|
+
entities like ``'subject'``, ``'session'``, etc.
|
|
889
885
|
|
|
890
886
|
Returns
|
|
891
887
|
-------
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
- ``'data_name'``
|
|
896
|
-
- ``'dataset'`` (dataset id, without suffixes)
|
|
897
|
-
- ``'bidspath'`` (normalized to start with the dataset id)
|
|
898
|
-
- ``'subject'``, ``'session'``, ``'task'``, ``'run'`` (may be None)
|
|
899
|
-
- ``'bidsdependencies'`` (empty list)
|
|
900
|
-
- ``'modality'`` (``"eeg"``)
|
|
901
|
-
- ``'sampling_frequency'``, ``'nchans'``, ``'ntimes'`` (minimal
|
|
902
|
-
defaults for offline usage)
|
|
888
|
+
list of dict
|
|
889
|
+
A list of records, one for each matched EEG file. Each record
|
|
890
|
+
contains BIDS entities, paths, and minimal metadata for offline use.
|
|
903
891
|
|
|
904
892
|
Notes
|
|
905
893
|
-----
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
first path component is the dataset id (without local cache suffixes).
|
|
910
|
-
- Minimal defaults are set for ``sampling_frequency``, ``nchans``, and
|
|
911
|
-
``ntimes`` to satisfy dataset length requirements offline.
|
|
894
|
+
Matching is performed for ``datatypes=['eeg']`` and ``suffixes=['eeg']``.
|
|
895
|
+
The ``bidspath`` is normalized to ensure it starts with the dataset ID,
|
|
896
|
+
even for suffixed cache directories.
|
|
912
897
|
|
|
913
898
|
"""
|
|
914
899
|
dataset_id = filters["dataset"]
|
|
@@ -970,10 +955,22 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
|
|
|
970
955
|
return records_out
|
|
971
956
|
|
|
972
957
|
def _find_key_in_nested_dict(self, data: Any, target_key: str) -> Any:
|
|
973
|
-
"""Recursively search for
|
|
958
|
+
"""Recursively search for a key in nested dicts/lists.
|
|
959
|
+
|
|
960
|
+
Performs a case-insensitive and underscore/hyphen-agnostic search.
|
|
961
|
+
|
|
962
|
+
Parameters
|
|
963
|
+
----------
|
|
964
|
+
data : Any
|
|
965
|
+
The nested data structure (dicts, lists) to search.
|
|
966
|
+
target_key : str
|
|
967
|
+
The key to search for.
|
|
968
|
+
|
|
969
|
+
Returns
|
|
970
|
+
-------
|
|
971
|
+
Any
|
|
972
|
+
The value of the first matching key, or None if not found.
|
|
974
973
|
|
|
975
|
-
This makes lookups tolerant to naming differences like "p-factor" vs "p_factor".
|
|
976
|
-
Returns the first match or None.
|
|
977
974
|
"""
|
|
978
975
|
norm_target = normalize_key(target_key)
|
|
979
976
|
if isinstance(data, dict):
|
|
@@ -996,23 +993,25 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
|
|
|
996
993
|
description_fields: list[str],
|
|
997
994
|
base_dataset_kwargs: dict,
|
|
998
995
|
) -> list[EEGDashBaseDataset]:
|
|
999
|
-
"""
|
|
1000
|
-
|
|
996
|
+
"""Find and construct datasets from a MongoDB query.
|
|
997
|
+
|
|
998
|
+
Queries the database, then creates a list of
|
|
999
|
+
:class:`EEGDashBaseDataset` objects from the results.
|
|
1001
1000
|
|
|
1002
1001
|
Parameters
|
|
1003
1002
|
----------
|
|
1004
|
-
query : dict
|
|
1005
|
-
The query
|
|
1006
|
-
description_fields : list
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
constructor.
|
|
1003
|
+
query : dict, optional
|
|
1004
|
+
The MongoDB query to execute.
|
|
1005
|
+
description_fields : list of str
|
|
1006
|
+
Fields to extract from each record for the dataset description.
|
|
1007
|
+
base_dataset_kwargs : dict
|
|
1008
|
+
Additional keyword arguments to pass to the
|
|
1009
|
+
:class:`EEGDashBaseDataset` constructor.
|
|
1011
1010
|
|
|
1012
1011
|
Returns
|
|
1013
1012
|
-------
|
|
1014
|
-
list
|
|
1015
|
-
A list of
|
|
1013
|
+
list of EEGDashBaseDataset
|
|
1014
|
+
A list of dataset objects matching the query.
|
|
1016
1015
|
|
|
1017
1016
|
"""
|
|
1018
1017
|
datasets: list[EEGDashBaseDataset] = []
|
|
@@ -1043,3 +1042,6 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
|
|
|
1043
1042
|
)
|
|
1044
1043
|
)
|
|
1045
1044
|
return datasets
|
|
1045
|
+
|
|
1046
|
+
|
|
1047
|
+
__all__ = ["EEGDash", "EEGDashDataset"]
|