eegdash 0.4.0.dev153__py3-none-any.whl → 0.4.0.dev171__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- eegdash/__init__.py +1 -1
- eegdash/api.py +180 -86
- eegdash/bids_eeg_metadata.py +139 -39
- eegdash/const.py +25 -0
- eegdash/data_utils.py +239 -173
- eegdash/dataset/dataset.py +35 -13
- eegdash/dataset/dataset_summary.csv +255 -255
- eegdash/dataset/registry.py +69 -4
- eegdash/downloader.py +95 -9
- eegdash/features/datasets.py +320 -136
- eegdash/features/decorators.py +88 -3
- eegdash/features/extractors.py +203 -55
- eegdash/features/feature_bank/complexity.py +7 -3
- eegdash/features/feature_bank/dimensionality.py +1 -1
- eegdash/features/inspect.py +80 -5
- eegdash/features/serialization.py +47 -19
- eegdash/features/utils.py +75 -8
- eegdash/hbn/preprocessing.py +50 -17
- eegdash/hbn/windows.py +145 -32
- eegdash/logging.py +19 -0
- eegdash/mongodb.py +44 -27
- eegdash/paths.py +14 -5
- eegdash/utils.py +16 -1
- {eegdash-0.4.0.dev153.dist-info → eegdash-0.4.0.dev171.dist-info}/METADATA +1 -1
- eegdash-0.4.0.dev171.dist-info/RECORD +37 -0
- eegdash-0.4.0.dev153.dist-info/RECORD +0 -37
- {eegdash-0.4.0.dev153.dist-info → eegdash-0.4.0.dev171.dist-info}/WHEEL +0 -0
- {eegdash-0.4.0.dev153.dist-info → eegdash-0.4.0.dev171.dist-info}/licenses/LICENSE +0 -0
- {eegdash-0.4.0.dev153.dist-info → eegdash-0.4.0.dev171.dist-info}/top_level.txt +0 -0
eegdash/__init__.py
CHANGED
eegdash/api.py
CHANGED
|
@@ -212,17 +212,22 @@ class EEGDash:
|
|
|
212
212
|
return doc is not None
|
|
213
213
|
|
|
214
214
|
def _validate_input(self, record: dict[str, Any]) -> dict[str, Any]:
|
|
215
|
-
"""
|
|
215
|
+
"""Validate the input record against the expected schema.
|
|
216
216
|
|
|
217
217
|
Parameters
|
|
218
218
|
----------
|
|
219
|
-
record: dict
|
|
219
|
+
record : dict
|
|
220
220
|
A dictionary representing the EEG data record to be validated.
|
|
221
221
|
|
|
222
222
|
Returns
|
|
223
223
|
-------
|
|
224
|
-
dict
|
|
225
|
-
|
|
224
|
+
dict
|
|
225
|
+
The record itself on success.
|
|
226
|
+
|
|
227
|
+
Raises
|
|
228
|
+
------
|
|
229
|
+
ValueError
|
|
230
|
+
If the record is missing required keys or has values of the wrong type.
|
|
226
231
|
|
|
227
232
|
"""
|
|
228
233
|
input_types = {
|
|
@@ -252,20 +257,44 @@ class EEGDash:
|
|
|
252
257
|
return record
|
|
253
258
|
|
|
254
259
|
def _build_query_from_kwargs(self, **kwargs) -> dict[str, Any]:
|
|
255
|
-
"""
|
|
260
|
+
"""Build a validated MongoDB query from keyword arguments.
|
|
261
|
+
|
|
262
|
+
This delegates to the module-level builder used across the package.
|
|
263
|
+
|
|
264
|
+
Parameters
|
|
265
|
+
----------
|
|
266
|
+
**kwargs
|
|
267
|
+
Keyword arguments to convert into a MongoDB query.
|
|
268
|
+
|
|
269
|
+
Returns
|
|
270
|
+
-------
|
|
271
|
+
dict
|
|
272
|
+
A MongoDB query dictionary.
|
|
256
273
|
|
|
257
|
-
This delegates to the module-level builder used across the package and
|
|
258
|
-
is exposed here for testing and convenience.
|
|
259
274
|
"""
|
|
260
275
|
return build_query_from_kwargs(**kwargs)
|
|
261
276
|
|
|
262
|
-
|
|
263
|
-
|
|
277
|
+
def _extract_simple_constraint(
|
|
278
|
+
self, query: dict[str, Any], key: str
|
|
279
|
+
) -> tuple[str, Any] | None:
|
|
264
280
|
"""Extract a simple constraint for a given key from a query dict.
|
|
265
281
|
|
|
266
|
-
Supports
|
|
267
|
-
|
|
268
|
-
|
|
282
|
+
Supports top-level equality (e.g., ``{'subject': '01'}``) and ``$in``
|
|
283
|
+
(e.g., ``{'subject': {'$in': ['01', '02']}}``) constraints.
|
|
284
|
+
|
|
285
|
+
Parameters
|
|
286
|
+
----------
|
|
287
|
+
query : dict
|
|
288
|
+
The MongoDB query dictionary.
|
|
289
|
+
key : str
|
|
290
|
+
The key for which to extract the constraint.
|
|
291
|
+
|
|
292
|
+
Returns
|
|
293
|
+
-------
|
|
294
|
+
tuple or None
|
|
295
|
+
A tuple of (kind, value) where kind is "eq" or "in", or None if the
|
|
296
|
+
constraint is not present or unsupported.
|
|
297
|
+
|
|
269
298
|
"""
|
|
270
299
|
if not isinstance(query, dict) or key not in query:
|
|
271
300
|
return None
|
|
@@ -275,16 +304,28 @@ class EEGDash:
|
|
|
275
304
|
return ("in", list(val["$in"]))
|
|
276
305
|
return None # unsupported operator shape for conflict checking
|
|
277
306
|
else:
|
|
278
|
-
return
|
|
307
|
+
return "eq", val
|
|
279
308
|
|
|
280
309
|
def _raise_if_conflicting_constraints(
|
|
281
310
|
self, raw_query: dict[str, Any], kwargs_query: dict[str, Any]
|
|
282
311
|
) -> None:
|
|
283
|
-
"""Raise ValueError if
|
|
312
|
+
"""Raise ValueError if query sources have incompatible constraints.
|
|
313
|
+
|
|
314
|
+
Checks for mutually exclusive constraints on the same field to avoid
|
|
315
|
+
silent empty results.
|
|
316
|
+
|
|
317
|
+
Parameters
|
|
318
|
+
----------
|
|
319
|
+
raw_query : dict
|
|
320
|
+
The raw MongoDB query dictionary.
|
|
321
|
+
kwargs_query : dict
|
|
322
|
+
The query dictionary built from keyword arguments.
|
|
323
|
+
|
|
324
|
+
Raises
|
|
325
|
+
------
|
|
326
|
+
ValueError
|
|
327
|
+
If conflicting constraints are found.
|
|
284
328
|
|
|
285
|
-
We conservatively check only top-level fields with simple equality or $in
|
|
286
|
-
constraints. If a field appears in both queries and constraints are mutually
|
|
287
|
-
exclusive, raise an explicit error to avoid silent empty result sets.
|
|
288
329
|
"""
|
|
289
330
|
if not raw_query or not kwargs_query:
|
|
290
331
|
return
|
|
@@ -388,12 +429,31 @@ class EEGDash:
|
|
|
388
429
|
logger.info("Upserted: %s", result.upserted_count)
|
|
389
430
|
logger.info("Errors: %s ", result.bulk_api_result.get("writeErrors", []))
|
|
390
431
|
|
|
391
|
-
def _add_request(self, record: dict):
|
|
392
|
-
"""
|
|
432
|
+
def _add_request(self, record: dict) -> InsertOne:
|
|
433
|
+
"""Create a MongoDB insertion request for a record.
|
|
434
|
+
|
|
435
|
+
Parameters
|
|
436
|
+
----------
|
|
437
|
+
record : dict
|
|
438
|
+
The record to insert.
|
|
439
|
+
|
|
440
|
+
Returns
|
|
441
|
+
-------
|
|
442
|
+
InsertOne
|
|
443
|
+
A PyMongo ``InsertOne`` object.
|
|
444
|
+
|
|
445
|
+
"""
|
|
393
446
|
return InsertOne(record)
|
|
394
447
|
|
|
395
|
-
def add(self, record: dict):
|
|
396
|
-
"""Add a single record to the MongoDB collection.
|
|
448
|
+
def add(self, record: dict) -> None:
|
|
449
|
+
"""Add a single record to the MongoDB collection.
|
|
450
|
+
|
|
451
|
+
Parameters
|
|
452
|
+
----------
|
|
453
|
+
record : dict
|
|
454
|
+
The record to add.
|
|
455
|
+
|
|
456
|
+
"""
|
|
397
457
|
try:
|
|
398
458
|
self.__collection.insert_one(record)
|
|
399
459
|
except ValueError as e:
|
|
@@ -405,11 +465,23 @@ class EEGDash:
|
|
|
405
465
|
)
|
|
406
466
|
logger.debug("Add operation failed", exc_info=exc)
|
|
407
467
|
|
|
408
|
-
def _update_request(self, record: dict):
|
|
409
|
-
"""
|
|
468
|
+
def _update_request(self, record: dict) -> UpdateOne:
|
|
469
|
+
"""Create a MongoDB update request for a record.
|
|
470
|
+
|
|
471
|
+
Parameters
|
|
472
|
+
----------
|
|
473
|
+
record : dict
|
|
474
|
+
The record to update.
|
|
475
|
+
|
|
476
|
+
Returns
|
|
477
|
+
-------
|
|
478
|
+
UpdateOne
|
|
479
|
+
A PyMongo ``UpdateOne`` object.
|
|
480
|
+
|
|
481
|
+
"""
|
|
410
482
|
return UpdateOne({"data_name": record["data_name"]}, {"$set": record})
|
|
411
483
|
|
|
412
|
-
def update(self, record: dict):
|
|
484
|
+
def update(self, record: dict) -> None:
|
|
413
485
|
"""Update a single record in the MongoDB collection.
|
|
414
486
|
|
|
415
487
|
Parameters
|
|
@@ -429,58 +501,81 @@ class EEGDash:
|
|
|
429
501
|
logger.debug("Update operation failed", exc_info=exc)
|
|
430
502
|
|
|
431
503
|
def exists(self, query: dict[str, Any]) -> bool:
|
|
432
|
-
"""
|
|
504
|
+
"""Check if at least one record matches the query.
|
|
505
|
+
|
|
506
|
+
This is an alias for :meth:`exist`.
|
|
507
|
+
|
|
508
|
+
Parameters
|
|
509
|
+
----------
|
|
510
|
+
query : dict
|
|
511
|
+
MongoDB query to check for existence.
|
|
512
|
+
|
|
513
|
+
Returns
|
|
514
|
+
-------
|
|
515
|
+
bool
|
|
516
|
+
True if a matching record exists, False otherwise.
|
|
517
|
+
|
|
518
|
+
"""
|
|
433
519
|
return self.exist(query)
|
|
434
520
|
|
|
435
|
-
def remove_field(self, record, field):
|
|
436
|
-
"""Remove a
|
|
521
|
+
def remove_field(self, record: dict, field: str) -> None:
|
|
522
|
+
"""Remove a field from a specific record in the MongoDB collection.
|
|
437
523
|
|
|
438
524
|
Parameters
|
|
439
525
|
----------
|
|
440
526
|
record : dict
|
|
441
|
-
Record
|
|
527
|
+
Record-identifying object with a ``data_name`` key.
|
|
442
528
|
field : str
|
|
443
|
-
|
|
529
|
+
The name of the field to remove.
|
|
444
530
|
|
|
445
531
|
"""
|
|
446
532
|
self.__collection.update_one(
|
|
447
533
|
{"data_name": record["data_name"]}, {"$unset": {field: 1}}
|
|
448
534
|
)
|
|
449
535
|
|
|
450
|
-
def remove_field_from_db(self, field):
|
|
451
|
-
"""Remove a field from all records
|
|
536
|
+
def remove_field_from_db(self, field: str) -> None:
|
|
537
|
+
"""Remove a field from all records in the database.
|
|
538
|
+
|
|
539
|
+
.. warning::
|
|
540
|
+
This is a destructive operation and cannot be undone.
|
|
452
541
|
|
|
453
542
|
Parameters
|
|
454
543
|
----------
|
|
455
544
|
field : str
|
|
456
|
-
|
|
545
|
+
The name of the field to remove from all documents.
|
|
457
546
|
|
|
458
547
|
"""
|
|
459
548
|
self.__collection.update_many({}, {"$unset": {field: 1}})
|
|
460
549
|
|
|
461
550
|
@property
|
|
462
551
|
def collection(self):
|
|
463
|
-
"""
|
|
464
|
-
return self.__collection
|
|
552
|
+
"""The underlying PyMongo ``Collection`` object.
|
|
465
553
|
|
|
466
|
-
|
|
467
|
-
|
|
554
|
+
Returns
|
|
555
|
+
-------
|
|
556
|
+
pymongo.collection.Collection
|
|
557
|
+
The collection object used for database interactions.
|
|
468
558
|
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
Connections are managed by :class:`MongoConnectionManager`. Use
|
|
472
|
-
:meth:`close_all_connections` to explicitly close all clients.
|
|
559
|
+
"""
|
|
560
|
+
return self.__collection
|
|
473
561
|
|
|
562
|
+
def close(self) -> None:
|
|
563
|
+
"""Close the MongoDB connection.
|
|
564
|
+
|
|
565
|
+
.. deprecated:: 0.1
|
|
566
|
+
Connections are now managed globally by :class:`MongoConnectionManager`.
|
|
567
|
+
This method is a no-op and will be removed in a future version.
|
|
568
|
+
Use :meth:`EEGDash.close_all_connections` to close all clients.
|
|
474
569
|
"""
|
|
475
570
|
# Individual instances no longer close the shared client
|
|
476
571
|
pass
|
|
477
572
|
|
|
478
573
|
@classmethod
|
|
479
|
-
def close_all_connections(cls):
|
|
480
|
-
"""Close all MongoDB client connections managed by the singleton."""
|
|
574
|
+
def close_all_connections(cls) -> None:
|
|
575
|
+
"""Close all MongoDB client connections managed by the singleton manager."""
|
|
481
576
|
MongoConnectionManager.close_all()
|
|
482
577
|
|
|
483
|
-
def __del__(self):
|
|
578
|
+
def __del__(self) -> None:
|
|
484
579
|
"""Destructor; no explicit action needed due to global connection manager."""
|
|
485
580
|
# No longer needed since we're using singleton pattern
|
|
486
581
|
pass
|
|
@@ -775,45 +870,30 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
|
|
|
775
870
|
) -> list[dict]:
|
|
776
871
|
"""Discover local BIDS EEG files and build minimal records.
|
|
777
872
|
|
|
778
|
-
|
|
779
|
-
``mne_bids.find_matching_paths`` and applies entity filters to produce
|
|
780
|
-
|
|
781
|
-
|
|
873
|
+
Enumerates EEG recordings under ``dataset_root`` using
|
|
874
|
+
``mne_bids.find_matching_paths`` and applies entity filters to produce
|
|
875
|
+
records suitable for :class:`EEGDashBaseDataset`. No network access is
|
|
876
|
+
performed, and files are not read.
|
|
782
877
|
|
|
783
878
|
Parameters
|
|
784
879
|
----------
|
|
785
880
|
dataset_root : Path
|
|
786
|
-
Local dataset directory
|
|
787
|
-
|
|
788
|
-
``
|
|
789
|
-
|
|
790
|
-
Query filters. Must include ``'dataset'`` with the dataset id (without
|
|
791
|
-
local suffixes). May include BIDS entities ``'subject'``,
|
|
792
|
-
``'session'``, ``'task'``, and ``'run'``. Each value can be a scalar
|
|
793
|
-
or a sequence of scalars.
|
|
881
|
+
Local dataset directory (e.g., ``/path/to/cache/ds005509``).
|
|
882
|
+
filters : dict
|
|
883
|
+
Query filters. Must include ``'dataset'`` and may include BIDS
|
|
884
|
+
entities like ``'subject'``, ``'session'``, etc.
|
|
794
885
|
|
|
795
886
|
Returns
|
|
796
887
|
-------
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
- ``'data_name'``
|
|
801
|
-
- ``'dataset'`` (dataset id, without suffixes)
|
|
802
|
-
- ``'bidspath'`` (normalized to start with the dataset id)
|
|
803
|
-
- ``'subject'``, ``'session'``, ``'task'``, ``'run'`` (may be None)
|
|
804
|
-
- ``'bidsdependencies'`` (empty list)
|
|
805
|
-
- ``'modality'`` (``"eeg"``)
|
|
806
|
-
- ``'sampling_frequency'``, ``'nchans'``, ``'ntimes'`` (minimal
|
|
807
|
-
defaults for offline usage)
|
|
888
|
+
list of dict
|
|
889
|
+
A list of records, one for each matched EEG file. Each record
|
|
890
|
+
contains BIDS entities, paths, and minimal metadata for offline use.
|
|
808
891
|
|
|
809
892
|
Notes
|
|
810
893
|
-----
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
first path component is the dataset id (without local cache suffixes).
|
|
815
|
-
- Minimal defaults are set for ``sampling_frequency``, ``nchans``, and
|
|
816
|
-
``ntimes`` to satisfy dataset length requirements offline.
|
|
894
|
+
Matching is performed for ``datatypes=['eeg']`` and ``suffixes=['eeg']``.
|
|
895
|
+
The ``bidspath`` is normalized to ensure it starts with the dataset ID,
|
|
896
|
+
even for suffixed cache directories.
|
|
817
897
|
|
|
818
898
|
"""
|
|
819
899
|
dataset_id = filters["dataset"]
|
|
@@ -875,10 +955,22 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
|
|
|
875
955
|
return records_out
|
|
876
956
|
|
|
877
957
|
def _find_key_in_nested_dict(self, data: Any, target_key: str) -> Any:
|
|
878
|
-
"""Recursively search for
|
|
958
|
+
"""Recursively search for a key in nested dicts/lists.
|
|
959
|
+
|
|
960
|
+
Performs a case-insensitive and underscore/hyphen-agnostic search.
|
|
961
|
+
|
|
962
|
+
Parameters
|
|
963
|
+
----------
|
|
964
|
+
data : Any
|
|
965
|
+
The nested data structure (dicts, lists) to search.
|
|
966
|
+
target_key : str
|
|
967
|
+
The key to search for.
|
|
968
|
+
|
|
969
|
+
Returns
|
|
970
|
+
-------
|
|
971
|
+
Any
|
|
972
|
+
The value of the first matching key, or None if not found.
|
|
879
973
|
|
|
880
|
-
This makes lookups tolerant to naming differences like "p-factor" vs "p_factor".
|
|
881
|
-
Returns the first match or None.
|
|
882
974
|
"""
|
|
883
975
|
norm_target = normalize_key(target_key)
|
|
884
976
|
if isinstance(data, dict):
|
|
@@ -901,23 +993,25 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
|
|
|
901
993
|
description_fields: list[str],
|
|
902
994
|
base_dataset_kwargs: dict,
|
|
903
995
|
) -> list[EEGDashBaseDataset]:
|
|
904
|
-
"""
|
|
905
|
-
|
|
996
|
+
"""Find and construct datasets from a MongoDB query.
|
|
997
|
+
|
|
998
|
+
Queries the database, then creates a list of
|
|
999
|
+
:class:`EEGDashBaseDataset` objects from the results.
|
|
906
1000
|
|
|
907
1001
|
Parameters
|
|
908
1002
|
----------
|
|
909
|
-
query : dict
|
|
910
|
-
The query
|
|
911
|
-
description_fields : list
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
constructor.
|
|
1003
|
+
query : dict, optional
|
|
1004
|
+
The MongoDB query to execute.
|
|
1005
|
+
description_fields : list of str
|
|
1006
|
+
Fields to extract from each record for the dataset description.
|
|
1007
|
+
base_dataset_kwargs : dict
|
|
1008
|
+
Additional keyword arguments to pass to the
|
|
1009
|
+
:class:`EEGDashBaseDataset` constructor.
|
|
916
1010
|
|
|
917
1011
|
Returns
|
|
918
1012
|
-------
|
|
919
|
-
list
|
|
920
|
-
A list of
|
|
1013
|
+
list of EEGDashBaseDataset
|
|
1014
|
+
A list of dataset objects matching the query.
|
|
921
1015
|
|
|
922
1016
|
"""
|
|
923
1017
|
datasets: list[EEGDashBaseDataset] = []
|
eegdash/bids_eeg_metadata.py
CHANGED
|
@@ -33,12 +33,30 @@ __all__ = [
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
|
|
36
|
-
"""Build and validate a MongoDB query from
|
|
36
|
+
"""Build and validate a MongoDB query from keyword arguments.
|
|
37
|
+
|
|
38
|
+
This function converts user-friendly keyword arguments into a valid
|
|
39
|
+
MongoDB query dictionary. It handles scalar values as exact matches and
|
|
40
|
+
list-like values as ``$in`` queries. It also performs validation to
|
|
41
|
+
reject unsupported fields and empty values.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
**kwargs
|
|
46
|
+
Keyword arguments representing query filters. Allowed keys are defined
|
|
47
|
+
in ``eegdash.const.ALLOWED_QUERY_FIELDS``.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
dict
|
|
52
|
+
A MongoDB query dictionary.
|
|
53
|
+
|
|
54
|
+
Raises
|
|
55
|
+
------
|
|
56
|
+
ValueError
|
|
57
|
+
If an unsupported query field is provided, or if a value is None or
|
|
58
|
+
an empty string/list.
|
|
37
59
|
|
|
38
|
-
Improvements:
|
|
39
|
-
- Reject None values and empty/whitespace-only strings
|
|
40
|
-
- For list/tuple/set values: strip strings, drop None/empties, deduplicate, and use `$in`
|
|
41
|
-
- Preserve scalars as exact matches
|
|
42
60
|
"""
|
|
43
61
|
# 1. Validate that all provided keys are allowed for querying
|
|
44
62
|
unknown_fields = set(kwargs.keys()) - ALLOWED_QUERY_FIELDS
|
|
@@ -89,24 +107,29 @@ def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
|
|
|
89
107
|
|
|
90
108
|
|
|
91
109
|
def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any]:
|
|
92
|
-
"""Build
|
|
110
|
+
"""Build a metadata record for a BIDS file.
|
|
93
111
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
112
|
+
Extracts metadata attributes from a single BIDS EEG file within a given
|
|
113
|
+
BIDS dataset. The extracted attributes include BIDS entities, file paths,
|
|
114
|
+
and technical metadata required for database indexing.
|
|
97
115
|
|
|
98
116
|
Parameters
|
|
99
117
|
----------
|
|
100
118
|
bids_dataset : EEGBIDSDataset
|
|
101
119
|
The BIDS dataset object containing the file.
|
|
102
120
|
bids_file : str
|
|
103
|
-
The path to the BIDS file
|
|
121
|
+
The path to the BIDS file to process.
|
|
104
122
|
|
|
105
123
|
Returns
|
|
106
124
|
-------
|
|
107
|
-
dict
|
|
108
|
-
A dictionary
|
|
109
|
-
|
|
125
|
+
dict
|
|
126
|
+
A dictionary of metadata attributes for the file, suitable for
|
|
127
|
+
insertion into the database.
|
|
128
|
+
|
|
129
|
+
Raises
|
|
130
|
+
------
|
|
131
|
+
ValueError
|
|
132
|
+
If ``bids_file`` is not found in the ``bids_dataset``.
|
|
110
133
|
|
|
111
134
|
"""
|
|
112
135
|
if bids_file not in bids_dataset.files:
|
|
@@ -198,11 +221,23 @@ def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any
|
|
|
198
221
|
|
|
199
222
|
|
|
200
223
|
def normalize_key(key: str) -> str:
|
|
201
|
-
"""Normalize a
|
|
224
|
+
"""Normalize a string key for robust matching.
|
|
225
|
+
|
|
226
|
+
Converts the key to lowercase, replaces non-alphanumeric characters with
|
|
227
|
+
underscores, and removes leading/trailing underscores. This allows for
|
|
228
|
+
tolerant matching of keys that may have different capitalization or
|
|
229
|
+
separators (e.g., "p-factor" becomes "p_factor").
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
key : str
|
|
234
|
+
The key to normalize.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
str
|
|
239
|
+
The normalized key.
|
|
202
240
|
|
|
203
|
-
Lowercase and replace non-alphanumeric characters with underscores, then strip
|
|
204
|
-
leading/trailing underscores. This allows tolerant matching such as
|
|
205
|
-
"p-factor" ≈ "p_factor" ≈ "P Factor".
|
|
206
241
|
"""
|
|
207
242
|
return re.sub(r"[^a-z0-9]+", "_", str(key).lower()).strip("_")
|
|
208
243
|
|
|
@@ -212,27 +247,27 @@ def merge_participants_fields(
|
|
|
212
247
|
participants_row: dict[str, Any] | None,
|
|
213
248
|
description_fields: list[str] | None = None,
|
|
214
249
|
) -> dict[str, Any]:
|
|
215
|
-
"""Merge participants.tsv
|
|
250
|
+
"""Merge fields from a participants.tsv row into a description dict.
|
|
216
251
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
unless a matching requested field already captured them.
|
|
252
|
+
Enriches a description dictionary with data from a subject's row in
|
|
253
|
+
``participants.tsv``. It avoids overwriting existing keys in the
|
|
254
|
+
description.
|
|
221
255
|
|
|
222
256
|
Parameters
|
|
223
257
|
----------
|
|
224
258
|
description : dict
|
|
225
|
-
|
|
226
|
-
participants_row : dict
|
|
227
|
-
A
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
259
|
+
The description dictionary to enrich.
|
|
260
|
+
participants_row : dict or None
|
|
261
|
+
A dictionary representing a row from ``participants.tsv``. If None,
|
|
262
|
+
the original description is returned unchanged.
|
|
263
|
+
description_fields : list of str, optional
|
|
264
|
+
A list of specific fields to include in the description. Matching is
|
|
265
|
+
done using normalized keys.
|
|
231
266
|
|
|
232
267
|
Returns
|
|
233
268
|
-------
|
|
234
269
|
dict
|
|
235
|
-
The enriched description
|
|
270
|
+
The enriched description dictionary.
|
|
236
271
|
|
|
237
272
|
"""
|
|
238
273
|
if not isinstance(description, dict) or not isinstance(participants_row, dict):
|
|
@@ -272,10 +307,26 @@ def participants_row_for_subject(
|
|
|
272
307
|
subject: str,
|
|
273
308
|
id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
|
|
274
309
|
) -> pd.Series | None:
|
|
275
|
-
"""Load participants.tsv and return the row for a subject.
|
|
310
|
+
"""Load participants.tsv and return the row for a specific subject.
|
|
311
|
+
|
|
312
|
+
Searches for a subject's data in the ``participants.tsv`` file within a
|
|
313
|
+
BIDS dataset. It can identify the subject with or without the "sub-"
|
|
314
|
+
prefix.
|
|
315
|
+
|
|
316
|
+
Parameters
|
|
317
|
+
----------
|
|
318
|
+
bids_root : str or Path
|
|
319
|
+
The root directory of the BIDS dataset.
|
|
320
|
+
subject : str
|
|
321
|
+
The subject identifier (e.g., "01" or "sub-01").
|
|
322
|
+
id_columns : tuple of str, default ("participant_id", "participant", "subject")
|
|
323
|
+
A tuple of column names to search for the subject identifier.
|
|
324
|
+
|
|
325
|
+
Returns
|
|
326
|
+
-------
|
|
327
|
+
pandas.Series or None
|
|
328
|
+
A pandas Series containing the subject's data if found, otherwise None.
|
|
276
329
|
|
|
277
|
-
- Accepts either "01" or "sub-01" as the subject identifier.
|
|
278
|
-
- Returns a pandas Series for the first matching row, or None if not found.
|
|
279
330
|
"""
|
|
280
331
|
try:
|
|
281
332
|
participants_tsv = Path(bids_root) / "participants.tsv"
|
|
@@ -311,9 +362,28 @@ def participants_extras_from_tsv(
|
|
|
311
362
|
id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
|
|
312
363
|
na_like: tuple[str, ...] = ("", "n/a", "na", "nan", "unknown", "none"),
|
|
313
364
|
) -> dict[str, Any]:
|
|
314
|
-
"""
|
|
365
|
+
"""Extract additional participant information from participants.tsv.
|
|
366
|
+
|
|
367
|
+
Retrieves all non-identifier and non-empty fields for a subject from
|
|
368
|
+
the ``participants.tsv`` file.
|
|
369
|
+
|
|
370
|
+
Parameters
|
|
371
|
+
----------
|
|
372
|
+
bids_root : str or Path
|
|
373
|
+
The root directory of the BIDS dataset.
|
|
374
|
+
subject : str
|
|
375
|
+
The subject identifier.
|
|
376
|
+
id_columns : tuple of str, default ("participant_id", "participant", "subject")
|
|
377
|
+
Column names to be treated as identifiers and excluded from the
|
|
378
|
+
output.
|
|
379
|
+
na_like : tuple of str, default ("", "n/a", "na", "nan", "unknown", "none")
|
|
380
|
+
Values to be considered as "Not Available" and excluded.
|
|
381
|
+
|
|
382
|
+
Returns
|
|
383
|
+
-------
|
|
384
|
+
dict
|
|
385
|
+
A dictionary of extra participant information.
|
|
315
386
|
|
|
316
|
-
Uses vectorized pandas operations to drop id columns and NA-like values.
|
|
317
387
|
"""
|
|
318
388
|
row = participants_row_for_subject(bids_root, subject, id_columns=id_columns)
|
|
319
389
|
if row is None:
|
|
@@ -331,10 +401,21 @@ def attach_participants_extras(
|
|
|
331
401
|
description: Any,
|
|
332
402
|
extras: dict[str, Any],
|
|
333
403
|
) -> None:
|
|
334
|
-
"""Attach
|
|
404
|
+
"""Attach extra participant data to a raw object and its description.
|
|
405
|
+
|
|
406
|
+
Updates the ``raw.info['subject_info']`` and the description object
|
|
407
|
+
(dict or pandas Series) with extra data from ``participants.tsv``.
|
|
408
|
+
It does not overwrite existing keys.
|
|
409
|
+
|
|
410
|
+
Parameters
|
|
411
|
+
----------
|
|
412
|
+
raw : mne.io.Raw
|
|
413
|
+
The MNE Raw object to be updated.
|
|
414
|
+
description : dict or pandas.Series
|
|
415
|
+
The description object to be updated.
|
|
416
|
+
extras : dict
|
|
417
|
+
A dictionary of extra participant information to attach.
|
|
335
418
|
|
|
336
|
-
- Adds to ``raw.info['subject_info']['participants_extras']``.
|
|
337
|
-
- Adds to ``description`` if dict or pandas Series (only missing keys).
|
|
338
419
|
"""
|
|
339
420
|
if not extras:
|
|
340
421
|
return
|
|
@@ -375,9 +456,28 @@ def enrich_from_participants(
|
|
|
375
456
|
raw: Any,
|
|
376
457
|
description: Any,
|
|
377
458
|
) -> dict[str, Any]:
|
|
378
|
-
"""
|
|
459
|
+
"""Read participants.tsv and attach extra info for the subject.
|
|
460
|
+
|
|
461
|
+
This is a convenience function that finds the subject from the
|
|
462
|
+
``bidspath``, retrieves extra information from ``participants.tsv``,
|
|
463
|
+
and attaches it to the raw object and its description.
|
|
464
|
+
|
|
465
|
+
Parameters
|
|
466
|
+
----------
|
|
467
|
+
bids_root : str or Path
|
|
468
|
+
The root directory of the BIDS dataset.
|
|
469
|
+
bidspath : mne_bids.BIDSPath
|
|
470
|
+
The BIDSPath object for the current data file.
|
|
471
|
+
raw : mne.io.Raw
|
|
472
|
+
The MNE Raw object to be updated.
|
|
473
|
+
description : dict or pandas.Series
|
|
474
|
+
The description object to be updated.
|
|
475
|
+
|
|
476
|
+
Returns
|
|
477
|
+
-------
|
|
478
|
+
dict
|
|
479
|
+
The dictionary of extras that were attached.
|
|
379
480
|
|
|
380
|
-
Returns the extras dictionary for further use if needed.
|
|
381
481
|
"""
|
|
382
482
|
subject = getattr(bidspath, "subject", None)
|
|
383
483
|
if not subject:
|