eegdash 0.3.3.dev61__py3-none-any.whl → 0.5.0.dev180784713__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. eegdash/__init__.py +19 -6
  2. eegdash/api.py +336 -539
  3. eegdash/bids_eeg_metadata.py +495 -0
  4. eegdash/const.py +349 -0
  5. eegdash/dataset/__init__.py +28 -0
  6. eegdash/dataset/base.py +311 -0
  7. eegdash/dataset/bids_dataset.py +641 -0
  8. eegdash/dataset/dataset.py +692 -0
  9. eegdash/dataset/dataset_summary.csv +255 -0
  10. eegdash/dataset/registry.py +287 -0
  11. eegdash/downloader.py +197 -0
  12. eegdash/features/__init__.py +15 -13
  13. eegdash/features/datasets.py +329 -138
  14. eegdash/features/decorators.py +105 -13
  15. eegdash/features/extractors.py +233 -63
  16. eegdash/features/feature_bank/__init__.py +12 -12
  17. eegdash/features/feature_bank/complexity.py +22 -20
  18. eegdash/features/feature_bank/connectivity.py +27 -28
  19. eegdash/features/feature_bank/csp.py +3 -1
  20. eegdash/features/feature_bank/dimensionality.py +6 -6
  21. eegdash/features/feature_bank/signal.py +29 -30
  22. eegdash/features/feature_bank/spectral.py +40 -44
  23. eegdash/features/feature_bank/utils.py +8 -0
  24. eegdash/features/inspect.py +126 -15
  25. eegdash/features/serialization.py +58 -17
  26. eegdash/features/utils.py +90 -16
  27. eegdash/hbn/__init__.py +28 -0
  28. eegdash/hbn/preprocessing.py +105 -0
  29. eegdash/hbn/windows.py +428 -0
  30. eegdash/logging.py +54 -0
  31. eegdash/mongodb.py +55 -24
  32. eegdash/paths.py +52 -0
  33. eegdash/utils.py +29 -1
  34. eegdash-0.5.0.dev180784713.dist-info/METADATA +121 -0
  35. eegdash-0.5.0.dev180784713.dist-info/RECORD +38 -0
  36. eegdash-0.5.0.dev180784713.dist-info/licenses/LICENSE +29 -0
  37. eegdash/data_config.py +0 -34
  38. eegdash/data_utils.py +0 -687
  39. eegdash/dataset.py +0 -69
  40. eegdash/preprocessing.py +0 -63
  41. eegdash-0.3.3.dev61.dist-info/METADATA +0 -192
  42. eegdash-0.3.3.dev61.dist-info/RECORD +0 -28
  43. eegdash-0.3.3.dev61.dist-info/licenses/LICENSE +0 -23
  44. {eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/WHEEL +0 -0
  45. {eegdash-0.3.3.dev61.dist-info → eegdash-0.5.0.dev180784713.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,495 @@
1
+ # Authors: The EEGDash contributors.
2
+ # License: BSD-3-Clause
3
+ # Copyright the EEGDash contributors.
4
+
5
+ """BIDS metadata processing and query building utilities.
6
+
7
+ This module provides functions for processing BIDS-formatted EEG metadata, building database
8
+ queries from user parameters, and enriching metadata records with participant information.
9
+ It handles the translation between user-friendly query parameters and MongoDB query syntax.
10
+ """
11
+
12
+ import re
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import pandas as pd
17
+ from mne_bids import BIDSPath
18
+
19
+ from .const import ALLOWED_QUERY_FIELDS
20
+ from .const import config as data_config
21
+ from .logging import logger
22
+
23
+ __all__ = [
24
+ "build_query_from_kwargs",
25
+ "load_eeg_attrs_from_bids_file",
26
+ "merge_participants_fields",
27
+ "normalize_key",
28
+ "participants_row_for_subject",
29
+ "participants_extras_from_tsv",
30
+ "attach_participants_extras",
31
+ "enrich_from_participants",
32
+ ]
33
+
34
+
35
+ def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
36
+ """Build and validate a MongoDB query from keyword arguments.
37
+
38
+ This function converts user-friendly keyword arguments into a valid
39
+ MongoDB query dictionary. It handles scalar values as exact matches and
40
+ list-like values as ``$in`` queries. It also performs validation to
41
+ reject unsupported fields and empty values.
42
+
43
+ Parameters
44
+ ----------
45
+ **kwargs
46
+ Keyword arguments representing query filters. Allowed keys are defined
47
+ in ``eegdash.const.ALLOWED_QUERY_FIELDS``.
48
+
49
+ Returns
50
+ -------
51
+ dict
52
+ A MongoDB query dictionary.
53
+
54
+ Raises
55
+ ------
56
+ ValueError
57
+ If an unsupported query field is provided, or if a value is None or
58
+ an empty string/list.
59
+
60
+ """
61
+ # 1. Validate that all provided keys are allowed for querying
62
+ unknown_fields = set(kwargs.keys()) - ALLOWED_QUERY_FIELDS
63
+ if unknown_fields:
64
+ raise ValueError(
65
+ f"Unsupported query field(s): {', '.join(sorted(unknown_fields))}. "
66
+ f"Allowed fields are: {', '.join(sorted(ALLOWED_QUERY_FIELDS))}"
67
+ )
68
+
69
+ # 2. Construct the query dictionary
70
+ query = {}
71
+ for key, value in kwargs.items():
72
+ # None is not a valid constraint
73
+ if value is None:
74
+ raise ValueError(
75
+ f"Received None for query parameter '{key}'. Provide a concrete value."
76
+ )
77
+
78
+ # Handle list-like values as multi-constraints
79
+ if isinstance(value, (list, tuple, set)):
80
+ cleaned: list[Any] = []
81
+ for item in value:
82
+ if item is None:
83
+ continue
84
+ if isinstance(item, str):
85
+ item = item.strip()
86
+ if not item:
87
+ continue
88
+ cleaned.append(item)
89
+ # Deduplicate while preserving order
90
+ cleaned = list(dict.fromkeys(cleaned))
91
+ if not cleaned:
92
+ raise ValueError(
93
+ f"Received an empty list for query parameter '{key}'. This is not supported."
94
+ )
95
+ query[key] = {"$in": cleaned}
96
+ else:
97
+ # Scalars: trim strings and validate
98
+ if isinstance(value, str):
99
+ value = value.strip()
100
+ if not value:
101
+ raise ValueError(
102
+ f"Received an empty string for query parameter '{key}'."
103
+ )
104
+ query[key] = value
105
+
106
+ return query
107
+
108
+
109
+ def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any]:
110
+ """Build a metadata record for a BIDS file.
111
+
112
+ Extracts metadata attributes from a single BIDS EEG file within a given
113
+ BIDS dataset. The extracted attributes include BIDS entities, file paths,
114
+ and technical metadata required for database indexing.
115
+
116
+ Parameters
117
+ ----------
118
+ bids_dataset : EEGBIDSDataset
119
+ The BIDS dataset object containing the file.
120
+ bids_file : str
121
+ The path to the BIDS file to process.
122
+
123
+ Returns
124
+ -------
125
+ dict
126
+ A dictionary of metadata attributes for the file, suitable for
127
+ insertion into the database.
128
+
129
+ Raises
130
+ ------
131
+ ValueError
132
+ If ``bids_file`` is not found in the ``bids_dataset``.
133
+
134
+ """
135
+ if bids_file not in bids_dataset.files:
136
+ raise ValueError(f"{bids_file} not in {bids_dataset.dataset}")
137
+
138
+ # Initialize attrs with None values for all expected fields
139
+ attrs = {field: None for field in data_config["attributes"].keys()}
140
+
141
+ file = Path(bids_file).name
142
+ dsnumber = bids_dataset.dataset
143
+ # extract openneuro path by finding the first occurrence of the dataset name in the filename and remove the path before that
144
+ openneuro_path = dsnumber + bids_file.split(dsnumber)[1]
145
+
146
+ # Update with actual values where available
147
+ try:
148
+ participants_tsv = bids_dataset.subject_participant_tsv(bids_file)
149
+ except Exception as e:
150
+ logger.error("Error getting participants_tsv: %s", str(e))
151
+ participants_tsv = None
152
+
153
+ try:
154
+ eeg_json = bids_dataset.eeg_json(bids_file)
155
+ except Exception as e:
156
+ logger.error("Error getting eeg_json: %s", str(e))
157
+ eeg_json = None
158
+
159
+ bids_dependencies_files = data_config["bids_dependencies_files"]
160
+ bidsdependencies: list[str] = []
161
+ for extension in bids_dependencies_files:
162
+ try:
163
+ dep_path = bids_dataset.get_bids_metadata_files(bids_file, extension)
164
+ dep_path = [
165
+ str(bids_dataset.get_relative_bidspath(dep)) for dep in dep_path
166
+ ]
167
+ bidsdependencies.extend(dep_path)
168
+ except Exception:
169
+ pass
170
+
171
+ bids_path = BIDSPath(
172
+ subject=bids_dataset.get_bids_file_attribute("subject", bids_file),
173
+ session=bids_dataset.get_bids_file_attribute("session", bids_file),
174
+ task=bids_dataset.get_bids_file_attribute("task", bids_file),
175
+ run=bids_dataset.get_bids_file_attribute("run", bids_file),
176
+ root=bids_dataset.bidsdir,
177
+ datatype=bids_dataset.get_bids_file_attribute("modality", bids_file),
178
+ suffix="eeg",
179
+ extension=Path(bids_file).suffix,
180
+ check=False,
181
+ )
182
+
183
+ sidecars_map = {
184
+ ".set": [".fdt"],
185
+ ".vhdr": [".eeg", ".vmrk", ".dat", ".raw"],
186
+ }
187
+ for ext in sidecars_map.get(bids_path.extension, []):
188
+ sidecar = bids_path.find_matching_sidecar(extension=ext, on_error="ignore")
189
+ if sidecar is not None:
190
+ bidsdependencies.append(str(bids_dataset._get_relative_bidspath(sidecar)))
191
+
192
+ # Define field extraction functions with error handling
193
+ field_extractors = {
194
+ "data_name": lambda: f"{bids_dataset.dataset}_{file}",
195
+ "dataset": lambda: bids_dataset.dataset,
196
+ "bidspath": lambda: openneuro_path,
197
+ "subject": lambda: bids_dataset.get_bids_file_attribute("subject", bids_file),
198
+ "task": lambda: bids_dataset.get_bids_file_attribute("task", bids_file),
199
+ "session": lambda: bids_dataset.get_bids_file_attribute("session", bids_file),
200
+ "run": lambda: bids_dataset.get_bids_file_attribute("run", bids_file),
201
+ "modality": lambda: bids_dataset.get_bids_file_attribute("modality", bids_file),
202
+ "sampling_frequency": lambda: bids_dataset.get_bids_file_attribute(
203
+ "sfreq", bids_file
204
+ ),
205
+ "nchans": lambda: bids_dataset.get_bids_file_attribute("nchans", bids_file),
206
+ "ntimes": lambda: bids_dataset.get_bids_file_attribute("ntimes", bids_file),
207
+ "participant_tsv": lambda: participants_tsv,
208
+ "eeg_json": lambda: eeg_json,
209
+ "bidsdependencies": lambda: bidsdependencies,
210
+ }
211
+
212
+ # Dynamically populate attrs with error handling
213
+ for field, extractor in field_extractors.items():
214
+ try:
215
+ attrs[field] = extractor()
216
+ except Exception as e:
217
+ logger.error("Error extracting %s : %s", field, str(e))
218
+ attrs[field] = None
219
+
220
+ return attrs
221
+
222
+
223
+ def normalize_key(key: str) -> str:
224
+ """Normalize a string key for robust matching.
225
+
226
+ Converts the key to lowercase, replaces non-alphanumeric characters with
227
+ underscores, and removes leading/trailing underscores. This allows for
228
+ tolerant matching of keys that may have different capitalization or
229
+ separators (e.g., "p-factor" becomes "p_factor").
230
+
231
+ Parameters
232
+ ----------
233
+ key : str
234
+ The key to normalize.
235
+
236
+ Returns
237
+ -------
238
+ str
239
+ The normalized key.
240
+
241
+ """
242
+ return re.sub(r"[^a-z0-9]+", "_", str(key).lower()).strip("_")
243
+
244
+
245
+ def merge_participants_fields(
246
+ description: dict[str, Any],
247
+ participants_row: dict[str, Any] | None,
248
+ description_fields: list[str] | None = None,
249
+ ) -> dict[str, Any]:
250
+ """Merge fields from a participants.tsv row into a description dict.
251
+
252
+ Enriches a description dictionary with data from a subject's row in
253
+ ``participants.tsv``. It avoids overwriting existing keys in the
254
+ description.
255
+
256
+ Parameters
257
+ ----------
258
+ description : dict
259
+ The description dictionary to enrich.
260
+ participants_row : dict or None
261
+ A dictionary representing a row from ``participants.tsv``. If None,
262
+ the original description is returned unchanged.
263
+ description_fields : list of str, optional
264
+ A list of specific fields to include in the description. Matching is
265
+ done using normalized keys.
266
+
267
+ Returns
268
+ -------
269
+ dict
270
+ The enriched description dictionary.
271
+
272
+ """
273
+ if not isinstance(description, dict) or not isinstance(participants_row, dict):
274
+ return description
275
+
276
+ # Normalize participants keys and keep first non-None value per normalized key
277
+ norm_map: dict[str, Any] = {}
278
+ for part_key, part_value in participants_row.items():
279
+ norm_key = normalize_key(part_key)
280
+ if norm_key not in norm_map and part_value is not None:
281
+ norm_map[norm_key] = part_value
282
+
283
+ # Ensure description_fields is a list for matching
284
+ requested = list(description_fields or [])
285
+
286
+ # 1) Fill requested fields first using normalized matching, preserving names
287
+ for key in requested:
288
+ if key in description:
289
+ continue
290
+ requested_norm_key = normalize_key(key)
291
+ if requested_norm_key in norm_map:
292
+ description[key] = norm_map[requested_norm_key]
293
+
294
+ # 2) Add remaining participants columns generically under normalized names,
295
+ # unless a requested field already captured them
296
+ requested_norm = {normalize_key(k) for k in requested}
297
+ for norm_key, part_value in norm_map.items():
298
+ if norm_key in requested_norm:
299
+ continue
300
+ if norm_key not in description:
301
+ description[norm_key] = part_value
302
+ return description
303
+
304
+
305
+ def participants_row_for_subject(
306
+ bids_root: str | Path,
307
+ subject: str,
308
+ id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
309
+ ) -> pd.Series | None:
310
+ """Load participants.tsv and return the row for a specific subject.
311
+
312
+ Searches for a subject's data in the ``participants.tsv`` file within a
313
+ BIDS dataset. It can identify the subject with or without the "sub-"
314
+ prefix.
315
+
316
+ Parameters
317
+ ----------
318
+ bids_root : str or Path
319
+ The root directory of the BIDS dataset.
320
+ subject : str
321
+ The subject identifier (e.g., "01" or "sub-01").
322
+ id_columns : tuple of str, default ("participant_id", "participant", "subject")
323
+ A tuple of column names to search for the subject identifier.
324
+
325
+ Returns
326
+ -------
327
+ pandas.Series or None
328
+ A pandas Series containing the subject's data if found, otherwise None.
329
+
330
+ """
331
+ try:
332
+ participants_tsv = Path(bids_root) / "participants.tsv"
333
+ if not participants_tsv.exists():
334
+ return None
335
+
336
+ df = pd.read_csv(
337
+ participants_tsv, sep="\t", dtype="string", keep_default_na=False
338
+ )
339
+ if df.empty:
340
+ return None
341
+
342
+ candidates = {str(subject), f"sub-{subject}"}
343
+ present_cols = [c for c in id_columns if c in df.columns]
344
+ if not present_cols:
345
+ return None
346
+
347
+ mask = pd.Series(False, index=df.index)
348
+ for col in present_cols:
349
+ mask |= df[col].isin(candidates)
350
+ match = df.loc[mask]
351
+ if match.empty:
352
+ return None
353
+ return match.iloc[0]
354
+ except Exception:
355
+ return None
356
+
357
+
358
+ def participants_extras_from_tsv(
359
+ bids_root: str | Path,
360
+ subject: str,
361
+ *,
362
+ id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
363
+ na_like: tuple[str, ...] = ("", "n/a", "na", "nan", "unknown", "none"),
364
+ ) -> dict[str, Any]:
365
+ """Extract additional participant information from participants.tsv.
366
+
367
+ Retrieves all non-identifier and non-empty fields for a subject from
368
+ the ``participants.tsv`` file.
369
+
370
+ Parameters
371
+ ----------
372
+ bids_root : str or Path
373
+ The root directory of the BIDS dataset.
374
+ subject : str
375
+ The subject identifier.
376
+ id_columns : tuple of str, default ("participant_id", "participant", "subject")
377
+ Column names to be treated as identifiers and excluded from the
378
+ output.
379
+ na_like : tuple of str, default ("", "n/a", "na", "nan", "unknown", "none")
380
+ Values to be considered as "Not Available" and excluded.
381
+
382
+ Returns
383
+ -------
384
+ dict
385
+ A dictionary of extra participant information.
386
+
387
+ """
388
+ row = participants_row_for_subject(bids_root, subject, id_columns=id_columns)
389
+ if row is None:
390
+ return {}
391
+
392
+ # Drop identifier columns and clean values
393
+ extras = row.drop(labels=[c for c in id_columns if c in row.index], errors="ignore")
394
+ s = extras.astype("string").str.strip()
395
+ valid = ~s.isna() & ~s.str.lower().isin(na_like)
396
+ return s[valid].to_dict()
397
+
398
+
399
+ def attach_participants_extras(
400
+ raw: Any,
401
+ description: Any,
402
+ extras: dict[str, Any],
403
+ ) -> None:
404
+ """Attach extra participant data to a raw object and its description.
405
+
406
+ Updates the ``raw.info['subject_info']`` and the description object
407
+ (dict or pandas Series) with extra data from ``participants.tsv``.
408
+ It does not overwrite existing keys.
409
+
410
+ Parameters
411
+ ----------
412
+ raw : mne.io.Raw
413
+ The MNE Raw object to be updated.
414
+ description : dict or pandas.Series
415
+ The description object to be updated.
416
+ extras : dict
417
+ A dictionary of extra participant information to attach.
418
+
419
+ """
420
+ if not extras:
421
+ return
422
+
423
+ # Raw.info enrichment
424
+ try:
425
+ subject_info = raw.info.get("subject_info") or {}
426
+ if not isinstance(subject_info, dict):
427
+ subject_info = {}
428
+ pe = subject_info.get("participants_extras") or {}
429
+ if not isinstance(pe, dict):
430
+ pe = {}
431
+ for k, v in extras.items():
432
+ pe.setdefault(k, v)
433
+ subject_info["participants_extras"] = pe
434
+ raw.info["subject_info"] = subject_info
435
+ except Exception:
436
+ pass
437
+
438
+ # Description enrichment
439
+ try:
440
+ import pandas as _pd # local import to avoid hard dependency at import time
441
+
442
+ if isinstance(description, dict):
443
+ for k, v in extras.items():
444
+ description.setdefault(k, v)
445
+ elif isinstance(description, _pd.Series):
446
+ missing = [k for k in extras.keys() if k not in description.index]
447
+ if missing:
448
+ description.loc[missing] = [extras[m] for m in missing]
449
+ except Exception:
450
+ pass
451
+
452
+
453
+ def enrich_from_participants(
454
+ bids_root: str | Path,
455
+ bidspath: BIDSPath,
456
+ raw: Any,
457
+ description: Any,
458
+ ) -> dict[str, Any]:
459
+ """Read participants.tsv and attach extra info for the subject.
460
+
461
+ This is a convenience function that finds the subject from the
462
+ ``bidspath``, retrieves extra information from ``participants.tsv``,
463
+ and attaches it to the raw object and its description.
464
+
465
+ Parameters
466
+ ----------
467
+ bids_root : str or Path
468
+ The root directory of the BIDS dataset.
469
+ bidspath : mne_bids.BIDSPath
470
+ The BIDSPath object for the current data file.
471
+ raw : mne.io.Raw
472
+ The MNE Raw object to be updated.
473
+ description : dict or pandas.Series
474
+ The description object to be updated.
475
+
476
+ Returns
477
+ -------
478
+ dict
479
+ The dictionary of extras that were attached.
480
+
481
+ """
482
+ subject = getattr(bidspath, "subject", None)
483
+ if not subject:
484
+ return {}
485
+ extras = participants_extras_from_tsv(bids_root, subject)
486
+ attach_participants_extras(raw, description, extras)
487
+ return extras
488
+
489
+
490
+ __all__ = [
491
+ "participants_row_for_subject",
492
+ "participants_extras_from_tsv",
493
+ "attach_participants_extras",
494
+ "enrich_from_participants",
495
+ ]