eegdash 0.4.0.dev173498563__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/__init__.py +1 -1
- eegdash/api.py +183 -88
- eegdash/bids_eeg_metadata.py +139 -39
- eegdash/const.py +25 -0
- eegdash/data_utils.py +333 -276
- eegdash/dataset/dataset.py +35 -13
- eegdash/dataset/dataset_summary.csv +255 -255
- eegdash/dataset/registry.py +69 -4
- eegdash/downloader.py +95 -9
- eegdash/features/datasets.py +325 -136
- eegdash/features/decorators.py +96 -3
- eegdash/features/extractors.py +212 -55
- eegdash/features/feature_bank/complexity.py +7 -3
- eegdash/features/feature_bank/dimensionality.py +1 -1
- eegdash/features/feature_bank/signal.py +11 -10
- eegdash/features/feature_bank/utils.py +8 -0
- eegdash/features/inspect.py +88 -5
- eegdash/features/serialization.py +51 -19
- eegdash/features/utils.py +80 -8
- eegdash/hbn/preprocessing.py +50 -17
- eegdash/hbn/windows.py +145 -32
- eegdash/logging.py +19 -0
- eegdash/mongodb.py +44 -27
- eegdash/paths.py +14 -5
- eegdash/utils.py +16 -1
- {eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dist-info}/METADATA +6 -8
- eegdash-0.4.1.dist-info/RECORD +37 -0
- eegdash-0.4.0.dev173498563.dist-info/RECORD +0 -37
- {eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dist-info}/WHEEL +0 -0
- {eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {eegdash-0.4.0.dev173498563.dist-info → eegdash-0.4.1.dist-info}/top_level.txt +0 -0
eegdash/bids_eeg_metadata.py
CHANGED
|
@@ -33,12 +33,30 @@ __all__ = [
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
|
|
36
|
-
"""Build and validate a MongoDB query from
|
|
36
|
+
"""Build and validate a MongoDB query from keyword arguments.
|
|
37
|
+
|
|
38
|
+
This function converts user-friendly keyword arguments into a valid
|
|
39
|
+
MongoDB query dictionary. It handles scalar values as exact matches and
|
|
40
|
+
list-like values as ``$in`` queries. It also performs validation to
|
|
41
|
+
reject unsupported fields and empty values.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
**kwargs
|
|
46
|
+
Keyword arguments representing query filters. Allowed keys are defined
|
|
47
|
+
in ``eegdash.const.ALLOWED_QUERY_FIELDS``.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
dict
|
|
52
|
+
A MongoDB query dictionary.
|
|
53
|
+
|
|
54
|
+
Raises
|
|
55
|
+
------
|
|
56
|
+
ValueError
|
|
57
|
+
If an unsupported query field is provided, or if a value is None or
|
|
58
|
+
an empty string/list.
|
|
37
59
|
|
|
38
|
-
Improvements:
|
|
39
|
-
- Reject None values and empty/whitespace-only strings
|
|
40
|
-
- For list/tuple/set values: strip strings, drop None/empties, deduplicate, and use `$in`
|
|
41
|
-
- Preserve scalars as exact matches
|
|
42
60
|
"""
|
|
43
61
|
# 1. Validate that all provided keys are allowed for querying
|
|
44
62
|
unknown_fields = set(kwargs.keys()) - ALLOWED_QUERY_FIELDS
|
|
@@ -89,24 +107,29 @@ def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
|
|
|
89
107
|
|
|
90
108
|
|
|
91
109
|
def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any]:
|
|
92
|
-
"""Build
|
|
110
|
+
"""Build a metadata record for a BIDS file.
|
|
93
111
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
112
|
+
Extracts metadata attributes from a single BIDS EEG file within a given
|
|
113
|
+
BIDS dataset. The extracted attributes include BIDS entities, file paths,
|
|
114
|
+
and technical metadata required for database indexing.
|
|
97
115
|
|
|
98
116
|
Parameters
|
|
99
117
|
----------
|
|
100
118
|
bids_dataset : EEGBIDSDataset
|
|
101
119
|
The BIDS dataset object containing the file.
|
|
102
120
|
bids_file : str
|
|
103
|
-
The path to the BIDS file
|
|
121
|
+
The path to the BIDS file to process.
|
|
104
122
|
|
|
105
123
|
Returns
|
|
106
124
|
-------
|
|
107
|
-
dict
|
|
108
|
-
A dictionary
|
|
109
|
-
|
|
125
|
+
dict
|
|
126
|
+
A dictionary of metadata attributes for the file, suitable for
|
|
127
|
+
insertion into the database.
|
|
128
|
+
|
|
129
|
+
Raises
|
|
130
|
+
------
|
|
131
|
+
ValueError
|
|
132
|
+
If ``bids_file`` is not found in the ``bids_dataset``.
|
|
110
133
|
|
|
111
134
|
"""
|
|
112
135
|
if bids_file not in bids_dataset.files:
|
|
@@ -198,11 +221,23 @@ def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any
|
|
|
198
221
|
|
|
199
222
|
|
|
200
223
|
def normalize_key(key: str) -> str:
|
|
201
|
-
"""Normalize a
|
|
224
|
+
"""Normalize a string key for robust matching.
|
|
225
|
+
|
|
226
|
+
Converts the key to lowercase, replaces non-alphanumeric characters with
|
|
227
|
+
underscores, and removes leading/trailing underscores. This allows for
|
|
228
|
+
tolerant matching of keys that may have different capitalization or
|
|
229
|
+
separators (e.g., "p-factor" becomes "p_factor").
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
key : str
|
|
234
|
+
The key to normalize.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
str
|
|
239
|
+
The normalized key.
|
|
202
240
|
|
|
203
|
-
Lowercase and replace non-alphanumeric characters with underscores, then strip
|
|
204
|
-
leading/trailing underscores. This allows tolerant matching such as
|
|
205
|
-
"p-factor" ≈ "p_factor" ≈ "P Factor".
|
|
206
241
|
"""
|
|
207
242
|
return re.sub(r"[^a-z0-9]+", "_", str(key).lower()).strip("_")
|
|
208
243
|
|
|
@@ -212,27 +247,27 @@ def merge_participants_fields(
|
|
|
212
247
|
participants_row: dict[str, Any] | None,
|
|
213
248
|
description_fields: list[str] | None = None,
|
|
214
249
|
) -> dict[str, Any]:
|
|
215
|
-
"""Merge participants.tsv
|
|
250
|
+
"""Merge fields from a participants.tsv row into a description dict.
|
|
216
251
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
unless a matching requested field already captured them.
|
|
252
|
+
Enriches a description dictionary with data from a subject's row in
|
|
253
|
+
``participants.tsv``. It avoids overwriting existing keys in the
|
|
254
|
+
description.
|
|
221
255
|
|
|
222
256
|
Parameters
|
|
223
257
|
----------
|
|
224
258
|
description : dict
|
|
225
|
-
|
|
226
|
-
participants_row : dict
|
|
227
|
-
A
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
259
|
+
The description dictionary to enrich.
|
|
260
|
+
participants_row : dict or None
|
|
261
|
+
A dictionary representing a row from ``participants.tsv``. If None,
|
|
262
|
+
the original description is returned unchanged.
|
|
263
|
+
description_fields : list of str, optional
|
|
264
|
+
A list of specific fields to include in the description. Matching is
|
|
265
|
+
done using normalized keys.
|
|
231
266
|
|
|
232
267
|
Returns
|
|
233
268
|
-------
|
|
234
269
|
dict
|
|
235
|
-
The enriched description
|
|
270
|
+
The enriched description dictionary.
|
|
236
271
|
|
|
237
272
|
"""
|
|
238
273
|
if not isinstance(description, dict) or not isinstance(participants_row, dict):
|
|
@@ -272,10 +307,26 @@ def participants_row_for_subject(
|
|
|
272
307
|
subject: str,
|
|
273
308
|
id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
|
|
274
309
|
) -> pd.Series | None:
|
|
275
|
-
"""Load participants.tsv and return the row for a subject.
|
|
310
|
+
"""Load participants.tsv and return the row for a specific subject.
|
|
311
|
+
|
|
312
|
+
Searches for a subject's data in the ``participants.tsv`` file within a
|
|
313
|
+
BIDS dataset. It can identify the subject with or without the "sub-"
|
|
314
|
+
prefix.
|
|
315
|
+
|
|
316
|
+
Parameters
|
|
317
|
+
----------
|
|
318
|
+
bids_root : str or Path
|
|
319
|
+
The root directory of the BIDS dataset.
|
|
320
|
+
subject : str
|
|
321
|
+
The subject identifier (e.g., "01" or "sub-01").
|
|
322
|
+
id_columns : tuple of str, default ("participant_id", "participant", "subject")
|
|
323
|
+
A tuple of column names to search for the subject identifier.
|
|
324
|
+
|
|
325
|
+
Returns
|
|
326
|
+
-------
|
|
327
|
+
pandas.Series or None
|
|
328
|
+
A pandas Series containing the subject's data if found, otherwise None.
|
|
276
329
|
|
|
277
|
-
- Accepts either "01" or "sub-01" as the subject identifier.
|
|
278
|
-
- Returns a pandas Series for the first matching row, or None if not found.
|
|
279
330
|
"""
|
|
280
331
|
try:
|
|
281
332
|
participants_tsv = Path(bids_root) / "participants.tsv"
|
|
@@ -311,9 +362,28 @@ def participants_extras_from_tsv(
|
|
|
311
362
|
id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
|
|
312
363
|
na_like: tuple[str, ...] = ("", "n/a", "na", "nan", "unknown", "none"),
|
|
313
364
|
) -> dict[str, Any]:
|
|
314
|
-
"""
|
|
365
|
+
"""Extract additional participant information from participants.tsv.
|
|
366
|
+
|
|
367
|
+
Retrieves all non-identifier and non-empty fields for a subject from
|
|
368
|
+
the ``participants.tsv`` file.
|
|
369
|
+
|
|
370
|
+
Parameters
|
|
371
|
+
----------
|
|
372
|
+
bids_root : str or Path
|
|
373
|
+
The root directory of the BIDS dataset.
|
|
374
|
+
subject : str
|
|
375
|
+
The subject identifier.
|
|
376
|
+
id_columns : tuple of str, default ("participant_id", "participant", "subject")
|
|
377
|
+
Column names to be treated as identifiers and excluded from the
|
|
378
|
+
output.
|
|
379
|
+
na_like : tuple of str, default ("", "n/a", "na", "nan", "unknown", "none")
|
|
380
|
+
Values to be considered as "Not Available" and excluded.
|
|
381
|
+
|
|
382
|
+
Returns
|
|
383
|
+
-------
|
|
384
|
+
dict
|
|
385
|
+
A dictionary of extra participant information.
|
|
315
386
|
|
|
316
|
-
Uses vectorized pandas operations to drop id columns and NA-like values.
|
|
317
387
|
"""
|
|
318
388
|
row = participants_row_for_subject(bids_root, subject, id_columns=id_columns)
|
|
319
389
|
if row is None:
|
|
@@ -331,10 +401,21 @@ def attach_participants_extras(
|
|
|
331
401
|
description: Any,
|
|
332
402
|
extras: dict[str, Any],
|
|
333
403
|
) -> None:
|
|
334
|
-
"""Attach
|
|
404
|
+
"""Attach extra participant data to a raw object and its description.
|
|
405
|
+
|
|
406
|
+
Updates the ``raw.info['subject_info']`` and the description object
|
|
407
|
+
(dict or pandas Series) with extra data from ``participants.tsv``.
|
|
408
|
+
It does not overwrite existing keys.
|
|
409
|
+
|
|
410
|
+
Parameters
|
|
411
|
+
----------
|
|
412
|
+
raw : mne.io.Raw
|
|
413
|
+
The MNE Raw object to be updated.
|
|
414
|
+
description : dict or pandas.Series
|
|
415
|
+
The description object to be updated.
|
|
416
|
+
extras : dict
|
|
417
|
+
A dictionary of extra participant information to attach.
|
|
335
418
|
|
|
336
|
-
- Adds to ``raw.info['subject_info']['participants_extras']``.
|
|
337
|
-
- Adds to ``description`` if dict or pandas Series (only missing keys).
|
|
338
419
|
"""
|
|
339
420
|
if not extras:
|
|
340
421
|
return
|
|
@@ -375,9 +456,28 @@ def enrich_from_participants(
|
|
|
375
456
|
raw: Any,
|
|
376
457
|
description: Any,
|
|
377
458
|
) -> dict[str, Any]:
|
|
378
|
-
"""
|
|
459
|
+
"""Read participants.tsv and attach extra info for the subject.
|
|
460
|
+
|
|
461
|
+
This is a convenience function that finds the subject from the
|
|
462
|
+
``bidspath``, retrieves extra information from ``participants.tsv``,
|
|
463
|
+
and attaches it to the raw object and its description.
|
|
464
|
+
|
|
465
|
+
Parameters
|
|
466
|
+
----------
|
|
467
|
+
bids_root : str or Path
|
|
468
|
+
The root directory of the BIDS dataset.
|
|
469
|
+
bidspath : mne_bids.BIDSPath
|
|
470
|
+
The BIDSPath object for the current data file.
|
|
471
|
+
raw : mne.io.Raw
|
|
472
|
+
The MNE Raw object to be updated.
|
|
473
|
+
description : dict or pandas.Series
|
|
474
|
+
The description object to be updated.
|
|
475
|
+
|
|
476
|
+
Returns
|
|
477
|
+
-------
|
|
478
|
+
dict
|
|
479
|
+
The dictionary of extras that were attached.
|
|
379
480
|
|
|
380
|
-
Returns the extras dictionary for further use if needed.
|
|
381
481
|
"""
|
|
382
482
|
subject = getattr(bidspath, "subject", None)
|
|
383
483
|
if not subject:
|
eegdash/const.py
CHANGED
|
@@ -28,6 +28,8 @@ ALLOWED_QUERY_FIELDS = {
|
|
|
28
28
|
"nchans",
|
|
29
29
|
"ntimes",
|
|
30
30
|
}
|
|
31
|
+
"""set: A set of field names that are permitted in database queries constructed
|
|
32
|
+
via :func:`~eegdash.api.EEGDash.find` with keyword arguments."""
|
|
31
33
|
|
|
32
34
|
RELEASE_TO_OPENNEURO_DATASET_MAP = {
|
|
33
35
|
"R11": "ds005516",
|
|
@@ -42,6 +44,8 @@ RELEASE_TO_OPENNEURO_DATASET_MAP = {
|
|
|
42
44
|
"R2": "ds005506",
|
|
43
45
|
"R1": "ds005505",
|
|
44
46
|
}
|
|
47
|
+
"""dict: A mapping from Healthy Brain Network (HBN) release identifiers (e.g., "R11")
|
|
48
|
+
to their corresponding OpenNeuro dataset identifiers (e.g., "ds005516")."""
|
|
45
49
|
|
|
46
50
|
SUBJECT_MINI_RELEASE_MAP = {
|
|
47
51
|
"R11": [
|
|
@@ -287,6 +291,9 @@ SUBJECT_MINI_RELEASE_MAP = {
|
|
|
287
291
|
"NDARFW972KFQ",
|
|
288
292
|
],
|
|
289
293
|
}
|
|
294
|
+
"""dict: A mapping from HBN release identifiers to a list of subject IDs.
|
|
295
|
+
This is used to select a small, representative subset of subjects for creating
|
|
296
|
+
"mini" datasets for testing and demonstration purposes."""
|
|
290
297
|
|
|
291
298
|
config = {
|
|
292
299
|
"required_fields": ["data_name"],
|
|
@@ -322,3 +329,21 @@ config = {
|
|
|
322
329
|
],
|
|
323
330
|
"accepted_query_fields": ["data_name", "dataset"],
|
|
324
331
|
}
|
|
332
|
+
"""dict: A global configuration dictionary for the EEGDash package.
|
|
333
|
+
|
|
334
|
+
Keys
|
|
335
|
+
----
|
|
336
|
+
required_fields : list
|
|
337
|
+
Fields that must be present in every database record.
|
|
338
|
+
attributes : dict
|
|
339
|
+
A schema defining the expected primary attributes and their types for a
|
|
340
|
+
database record.
|
|
341
|
+
description_fields : list
|
|
342
|
+
A list of fields considered to be descriptive metadata for a recording,
|
|
343
|
+
which can be used for filtering and display.
|
|
344
|
+
bids_dependencies_files : list
|
|
345
|
+
A list of BIDS metadata filenames that are relevant for interpreting an
|
|
346
|
+
EEG recording.
|
|
347
|
+
accepted_query_fields : list
|
|
348
|
+
Fields that are accepted for lightweight existence checks in the database.
|
|
349
|
+
"""
|