eegdash 0.3.6.dev183416654__py3-none-any.whl → 0.3.7.dev105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

@@ -0,0 +1,184 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Any
4
+
5
+ from .const import ALLOWED_QUERY_FIELDS
6
+ from .const import config as data_config
7
+ from .data_utils import EEGBIDSDataset
8
+
9
+ logger = logging.getLogger("eegdash")
10
+
11
+ __all__ = [
12
+ "build_query_from_kwargs",
13
+ "load_eeg_attrs_from_bids_file",
14
+ ]
15
+
16
+
17
+ def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
18
+ """Build and validate a MongoDB query from user-friendly keyword arguments.
19
+
20
+ Improvements:
21
+ - Reject None values and empty/whitespace-only strings
22
+ - For list/tuple/set values: strip strings, drop None/empties, deduplicate, and use `$in`
23
+ - Preserve scalars as exact matches
24
+ """
25
+ # 1. Validate that all provided keys are allowed for querying
26
+ unknown_fields = set(kwargs.keys()) - ALLOWED_QUERY_FIELDS
27
+ if unknown_fields:
28
+ raise ValueError(
29
+ f"Unsupported query field(s): {', '.join(sorted(unknown_fields))}. "
30
+ f"Allowed fields are: {', '.join(sorted(ALLOWED_QUERY_FIELDS))}"
31
+ )
32
+
33
+ # 2. Construct the query dictionary
34
+ query = {}
35
+ for key, value in kwargs.items():
36
+ # None is not a valid constraint
37
+ if value is None:
38
+ raise ValueError(
39
+ f"Received None for query parameter '{key}'. Provide a concrete value."
40
+ )
41
+
42
+ # Handle list-like values as multi-constraints
43
+ if isinstance(value, (list, tuple, set)):
44
+ cleaned: list[Any] = []
45
+ for item in value:
46
+ if item is None:
47
+ continue
48
+ if isinstance(item, str):
49
+ item = item.strip()
50
+ if not item:
51
+ continue
52
+ cleaned.append(item)
53
+ # Deduplicate while preserving order
54
+ cleaned = list(dict.fromkeys(cleaned))
55
+ if not cleaned:
56
+ raise ValueError(
57
+ f"Received an empty list for query parameter '{key}'. This is not supported."
58
+ )
59
+ query[key] = {"$in": cleaned}
60
+ else:
61
+ # Scalars: trim strings and validate
62
+ if isinstance(value, str):
63
+ value = value.strip()
64
+ if not value:
65
+ raise ValueError(
66
+ f"Received an empty string for query parameter '{key}'."
67
+ )
68
+ query[key] = value
69
+
70
+ return query
71
+
72
+
73
+ def _get_raw_extensions(bids_file: str, bids_dataset: EEGBIDSDataset) -> list[str]:
74
+ """Helper to find paths to additional "sidecar" files that may be associated
75
+ with a given main data file in a BIDS dataset; paths are returned as relative to
76
+ the parent dataset path.
77
+
78
+ For example, if the input file is a .set file, this will return the relative path
79
+ to a corresponding .fdt file (if any).
80
+ """
81
+ bids_file = Path(bids_file)
82
+ extensions = {
83
+ ".set": [".set", ".fdt"], # eeglab
84
+ ".edf": [".edf"], # european
85
+ ".vhdr": [".eeg", ".vhdr", ".vmrk", ".dat", ".raw"], # brainvision
86
+ ".bdf": [".bdf"], # biosemi
87
+ }
88
+ return [
89
+ str(bids_dataset._get_relative_bidspath(bids_file.with_suffix(suffix)))
90
+ for suffix in extensions[bids_file.suffix]
91
+ if bids_file.with_suffix(suffix).exists()
92
+ ]
93
+
94
+
95
+ def load_eeg_attrs_from_bids_file(
96
+ bids_dataset: EEGBIDSDataset, bids_file: str
97
+ ) -> dict[str, Any]:
98
+ """Build the metadata record for a given BIDS file (single recording) in a BIDS dataset.
99
+
100
+ Attributes are at least the ones defined in data_config attributes (set to None if missing),
101
+ but are typically a superset, and include, among others, the paths to relevant
102
+ meta-data files needed to load and interpret the file in question.
103
+
104
+ Parameters
105
+ ----------
106
+ bids_dataset : EEGBIDSDataset
107
+ The BIDS dataset object containing the file.
108
+ bids_file : str
109
+ The path to the BIDS file within the dataset.
110
+
111
+ Returns
112
+ -------
113
+ dict:
114
+ A dictionary representing the metadata record for the given file. This is the
115
+ same format as the records stored in the database.
116
+
117
+ """
118
+ if bids_file not in bids_dataset.files:
119
+ raise ValueError(f"{bids_file} not in {bids_dataset.dataset}")
120
+
121
+ # Initialize attrs with None values for all expected fields
122
+ attrs = {field: None for field in data_config["attributes"].keys()}
123
+
124
+ file = Path(bids_file).name
125
+ dsnumber = bids_dataset.dataset
126
+ # extract openneuro path by finding the first occurrence of the dataset name in the filename and remove the path before that
127
+ openneuro_path = dsnumber + bids_file.split(dsnumber)[1]
128
+
129
+ # Update with actual values where available
130
+ try:
131
+ participants_tsv = bids_dataset.subject_participant_tsv(bids_file)
132
+ except Exception as e:
133
+ logger.error("Error getting participants_tsv: %s", str(e))
134
+ participants_tsv = None
135
+
136
+ try:
137
+ eeg_json = bids_dataset.eeg_json(bids_file)
138
+ except Exception as e:
139
+ logger.error("Error getting eeg_json: %s", str(e))
140
+ eeg_json = None
141
+
142
+ bids_dependencies_files = data_config["bids_dependencies_files"]
143
+ bidsdependencies = []
144
+ for extension in bids_dependencies_files:
145
+ try:
146
+ dep_path = bids_dataset.get_bids_metadata_files(bids_file, extension)
147
+ dep_path = [
148
+ str(bids_dataset.get_relative_bidspath(dep)) for dep in dep_path
149
+ ]
150
+ bidsdependencies.extend(dep_path)
151
+ except Exception:
152
+ pass
153
+
154
+ bidsdependencies.extend(_get_raw_extensions(bids_file, bids_dataset))
155
+
156
+ # Define field extraction functions with error handling
157
+ field_extractors = {
158
+ "data_name": lambda: f"{bids_dataset.dataset}_{file}",
159
+ "dataset": lambda: bids_dataset.dataset,
160
+ "bidspath": lambda: openneuro_path,
161
+ "subject": lambda: bids_dataset.get_bids_file_attribute("subject", bids_file),
162
+ "task": lambda: bids_dataset.get_bids_file_attribute("task", bids_file),
163
+ "session": lambda: bids_dataset.get_bids_file_attribute("session", bids_file),
164
+ "run": lambda: bids_dataset.get_bids_file_attribute("run", bids_file),
165
+ "modality": lambda: bids_dataset.get_bids_file_attribute("modality", bids_file),
166
+ "sampling_frequency": lambda: bids_dataset.get_bids_file_attribute(
167
+ "sfreq", bids_file
168
+ ),
169
+ "nchans": lambda: bids_dataset.get_bids_file_attribute("nchans", bids_file),
170
+ "ntimes": lambda: bids_dataset.get_bids_file_attribute("ntimes", bids_file),
171
+ "participant_tsv": lambda: participants_tsv,
172
+ "eeg_json": lambda: eeg_json,
173
+ "bidsdependencies": lambda: bidsdependencies,
174
+ }
175
+
176
+ # Dynamically populate attrs with error handling
177
+ for field, extractor in field_extractors.items():
178
+ try:
179
+ attrs[field] = extractor()
180
+ except Exception as e:
181
+ logger.error("Error extracting %s : %s", field, str(e))
182
+ attrs[field] = None
183
+
184
+ return attrs
@@ -1,7 +1,15 @@
1
- from pathlib import Path
2
-
3
- from .api import EEGDashDataset
4
- from .registry import register_openneuro_datasets
1
+ ALLOWED_QUERY_FIELDS = {
2
+ "data_name",
3
+ "dataset",
4
+ "subject",
5
+ "task",
6
+ "session",
7
+ "run",
8
+ "modality",
9
+ "sampling_frequency",
10
+ "nchans",
11
+ "ntimes",
12
+ }
5
13
 
6
14
  RELEASE_TO_OPENNEURO_DATASET_MAP = {
7
15
  "R11": "ds005516",
@@ -262,92 +270,37 @@ SUBJECT_MINI_RELEASE_MAP = {
262
270
  ],
263
271
  }
264
272
 
265
-
266
- class EEGChallengeDataset(EEGDashDataset):
267
- def __init__(
268
- self,
269
- release: str,
270
- cache_dir: str,
271
- mini: bool = True,
272
- query: dict | None = None,
273
- s3_bucket: str | None = "s3://nmdatasets/NeurIPS25",
274
- **kwargs,
275
- ):
276
- """Create a new EEGDashDataset from a given query or local BIDS dataset directory
277
- and dataset name. An EEGDashDataset is pooled collection of EEGDashBaseDataset
278
- instances (individual recordings) and is a subclass of braindecode's BaseConcatDataset.
279
-
280
- Parameters
281
- ----------
282
- release: str
283
- Release name. Can be one of ["R1", ..., "R11"]
284
- mini: bool, default True
285
- Whether to use the mini-release version of the dataset. It is recommended
286
- to use the mini version for faster training and evaluation.
287
- query : dict | None
288
- Optionally a dictionary that specifies a query to be executed,
289
- in addition to the dataset (automatically inferred from the release argument).
290
- See EEGDash.find() for details on the query format.
291
- cache_dir : str
292
- A directory where the dataset will be cached locally.
293
- s3_bucket : str | None
294
- An optional S3 bucket URI to use instead of the
295
- default OpenNeuro bucket for loading data files.
296
- kwargs : dict
297
- Additional keyword arguments to be passed to the EEGDashDataset
298
- constructor.
299
-
300
- """
301
- self.release = release
302
- self.mini = mini
303
-
304
- if release not in RELEASE_TO_OPENNEURO_DATASET_MAP:
305
- raise ValueError(
306
- f"Unknown release: {release}, expected one of {list(RELEASE_TO_OPENNEURO_DATASET_MAP.keys())}"
307
- )
308
-
309
- dataset_parameters = []
310
- if isinstance(release, str):
311
- dataset_parameters.append(RELEASE_TO_OPENNEURO_DATASET_MAP[release])
312
- else:
313
- raise ValueError(
314
- f"Unknown release type: {type(release)}, the expected type is str."
315
- )
316
-
317
- if query and "dataset" in query:
318
- raise ValueError(
319
- "Query using the parameters `dataset` with the class EEGChallengeDataset is not possible."
320
- "Please use the release argument instead, or the object EEGDashDataset instead."
321
- )
322
-
323
- if self.mini:
324
- # Disallow mixing subject selection with mini=True since mini already
325
- # applies a predefined subject subset.
326
- if (query and "subject" in query) or ("subject" in kwargs):
327
- raise ValueError(
328
- "Query using the parameters `subject` with the class EEGChallengeDataset and `mini==True` is not possible."
329
- "Please don't use the `subject` selection twice."
330
- "Set `mini=False` to use the `subject` selection."
331
- )
332
- kwargs["subject"] = SUBJECT_MINI_RELEASE_MAP[release]
333
- s3_bucket = f"{s3_bucket}/{release}_mini_L100_bdf"
334
- else:
335
- s3_bucket = f"{s3_bucket}/{release}_L100_bdf"
336
-
337
- super().__init__(
338
- dataset=RELEASE_TO_OPENNEURO_DATASET_MAP[release],
339
- query=query,
340
- cache_dir=cache_dir,
341
- s3_bucket=s3_bucket,
342
- **kwargs,
343
- )
344
-
345
-
346
- registered_classes = register_openneuro_datasets(
347
- summary_file=Path(__file__).with_name("dataset_summary.csv"),
348
- base_class=EEGDashDataset,
349
- namespace=globals(),
350
- )
351
-
352
-
353
- __all__ = ["EEGChallengeDataset"] + list(registered_classes.keys())
273
+ config = {
274
+ "required_fields": ["data_name"],
275
+ # Default set of user-facing primary record attributes expected in the database. Records
276
+ # where any of these are missing will be loaded with the respective attribute set to None.
277
+ # Additional fields may be returned if they are present in the database, notably bidsdependencies.
278
+ "attributes": {
279
+ "data_name": "str",
280
+ "dataset": "str",
281
+ "bidspath": "str",
282
+ "subject": "str",
283
+ "task": "str",
284
+ "session": "str",
285
+ "run": "str",
286
+ "sampling_frequency": "float",
287
+ "modality": "str",
288
+ "nchans": "int",
289
+ "ntimes": "int", # note: this is really the number of seconds in the data, rounded down
290
+ },
291
+ # queryable descriptive fields for a given recording
292
+ "description_fields": ["subject", "session", "run", "task", "age", "gender", "sex"],
293
+ # list of filenames that may be present in the BIDS dataset directory that are used
294
+ # to load and interpret a given BIDS recording.
295
+ "bids_dependencies_files": [
296
+ "dataset_description.json",
297
+ "participants.tsv",
298
+ "events.tsv",
299
+ "events.json",
300
+ "eeg.json",
301
+ "electrodes.tsv",
302
+ "channels.tsv",
303
+ "coordsystem.json",
304
+ ],
305
+ "accepted_query_fields": ["data_name", "dataset"],
306
+ }
eegdash/data_utils.py CHANGED
@@ -57,7 +57,7 @@ class EEGDashBaseDataset(BaseDataset):
57
57
  super().__init__(None, **kwargs)
58
58
  self.record = record
59
59
  self.cache_dir = Path(cache_dir)
60
- self.bids_kwargs = self.get_raw_bids_args()
60
+ self.bids_kwargs = self._get_raw_bids_args()
61
61
 
62
62
  if s3_bucket:
63
63
  self.s3_bucket = s3_bucket
@@ -66,16 +66,46 @@ class EEGDashBaseDataset(BaseDataset):
66
66
  self.s3_bucket = self._AWS_BUCKET
67
67
  self.s3_open_neuro = True
68
68
 
69
- self.filecache = self.cache_dir / record["bidspath"]
70
- self.bids_root = self.cache_dir / record["dataset"]
69
+ # Compute a dataset folder name under cache_dir that encodes preprocessing
70
+ # (e.g., bdf, mini) to avoid overlapping with the original dataset cache.
71
+ self.dataset_folder = record.get("dataset", "")
72
+ if s3_bucket:
73
+ suffixes: list[str] = []
74
+ bucket_lower = str(s3_bucket).lower()
75
+ if "bdf" in bucket_lower:
76
+ suffixes.append("bdf")
77
+ if "mini" in bucket_lower:
78
+ suffixes.append("mini")
79
+ if suffixes:
80
+ self.dataset_folder = f"{self.dataset_folder}-{'-'.join(suffixes)}"
81
+
82
+ # Place files under the dataset-specific folder (with suffix if any)
83
+ rel = Path(record["bidspath"]) # usually starts with dataset id
84
+ if rel.parts and rel.parts[0] == record.get("dataset"):
85
+ rel = Path(self.dataset_folder, *rel.parts[1:])
86
+ else:
87
+ rel = Path(self.dataset_folder) / rel
88
+ self.filecache = self.cache_dir / rel
89
+ self.bids_root = self.cache_dir / self.dataset_folder
71
90
  self.bidspath = BIDSPath(
72
91
  root=self.bids_root,
73
92
  datatype="eeg",
74
93
  suffix="eeg",
94
+ # extension='.bdf',
75
95
  **self.bids_kwargs,
76
96
  )
97
+ # TO-DO: remove this once find a better solution using mne-bids or update competition dataset
98
+ try:
99
+ _ = str(self.bidspath)
100
+ except RuntimeError:
101
+ try:
102
+ self.bidspath = self.bidspath.update(extension=".bdf")
103
+ self.filecache = self.filecache.with_suffix(".bdf")
104
+ except Exception as e:
105
+ logger.error(f"Error while updating BIDS path: {e}")
106
+ raise e
77
107
 
78
- self.s3file = self.get_s3path(record["bidspath"])
108
+ self.s3file = self._get_s3path(record["bidspath"])
79
109
  self.bids_dependencies = record["bidsdependencies"]
80
110
  # Temporary fix for BIDS dependencies path
81
111
  # just to release to the competition
@@ -87,7 +117,7 @@ class EEGDashBaseDataset(BaseDataset):
87
117
 
88
118
  self._raw = None
89
119
 
90
- def get_s3path(self, filepath: str) -> str:
120
+ def _get_s3path(self, filepath: str) -> str:
91
121
  """Helper to form an AWS S3 URI for the given relative filepath."""
92
122
  return f"{self.s3_bucket}/{filepath}"
93
123
 
@@ -141,11 +171,16 @@ class EEGDashBaseDataset(BaseDataset):
141
171
  if dep.endswith(".set"):
142
172
  dep = dep[:-4] + ".bdf"
143
173
 
144
- s3path = self.get_s3path(dep)
174
+ s3path = self._get_s3path(dep)
145
175
  if not self.s3_open_neuro:
146
176
  dep = self.bids_dependencies_original[i]
147
177
 
148
- filepath = self.cache_dir / dep
178
+ dep_path = Path(dep)
179
+ if dep_path.parts and dep_path.parts[0] == self.record.get("dataset"):
180
+ dep_local = Path(self.dataset_folder, *dep_path.parts[1:])
181
+ else:
182
+ dep_local = Path(self.dataset_folder) / dep_path
183
+ filepath = self.cache_dir / dep_local
149
184
  if not self.s3_open_neuro:
150
185
  if self.filecache.suffix == ".set":
151
186
  self.filecache = self.filecache.with_suffix(".bdf")
@@ -174,14 +209,14 @@ class EEGDashBaseDataset(BaseDataset):
174
209
  )
175
210
  filesystem.get(s3path, filepath, callback=callback)
176
211
 
177
- def get_raw_bids_args(self) -> dict[str, Any]:
212
+ def _get_raw_bids_args(self) -> dict[str, Any]:
178
213
  """Helper to restrict the metadata record to the fields needed to locate a BIDS
179
214
  recording.
180
215
  """
181
216
  desired_fields = ["subject", "session", "task", "run"]
182
217
  return {k: self.record[k] for k in desired_fields if self.record[k]}
183
218
 
184
- def check_and_get_raw(self) -> None:
219
+ def _ensure_raw(self) -> None:
185
220
  """Download the S3 file and BIDS dependencies if not already cached."""
186
221
  if not os.path.exists(self.filecache): # not preload
187
222
  if self.bids_dependencies:
@@ -195,7 +230,6 @@ class EEGDashBaseDataset(BaseDataset):
195
230
  # TO-DO: remove this once is fixed on the our side
196
231
  if not self.s3_open_neuro:
197
232
  self.bidspath = self.bidspath.update(extension=".bdf")
198
-
199
233
  self._raw = mne_bids.read_raw_bids(
200
234
  bids_path=self.bidspath, verbose="ERROR"
201
235
  )
@@ -242,7 +276,7 @@ class EEGDashBaseDataset(BaseDataset):
242
276
  retrieval if not yet done so.
243
277
  """
244
278
  if self._raw is None:
245
- self.check_and_get_raw()
279
+ self._ensure_raw()
246
280
  return self._raw
247
281
 
248
282
  @raw.setter
@@ -300,7 +334,7 @@ class EEGDashBaseRaw(BaseRaw):
300
334
  chtype = "eog"
301
335
  ch_types.append(chtype)
302
336
  info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
303
- self.s3file = self.get_s3path(input_fname)
337
+ self.s3file = self._get_s3path(input_fname)
304
338
  self.cache_dir = Path(cache_dir)
305
339
  self.filecache = self.cache_dir / input_fname
306
340
  self.bids_dependencies = bids_dependencies
@@ -317,7 +351,7 @@ class EEGDashBaseRaw(BaseRaw):
317
351
  verbose=verbose,
318
352
  )
319
353
 
320
- def get_s3path(self, filepath):
354
+ def _get_s3path(self, filepath):
321
355
  return f"{self._AWS_BUCKET}/{filepath}"
322
356
 
323
357
  def _download_s3(self) -> None:
@@ -333,7 +367,7 @@ class EEGDashBaseRaw(BaseRaw):
333
367
  anon=True, client_kwargs={"region_name": "us-east-2"}
334
368
  )
335
369
  for dep in self.bids_dependencies:
336
- s3path = self.get_s3path(dep)
370
+ s3path = self._get_s3path(dep)
337
371
  filepath = self.cache_dir / dep
338
372
  if not filepath.exists():
339
373
  filepath.parent.mkdir(parents=True, exist_ok=True)
@@ -394,11 +428,17 @@ class EEGBIDSDataset:
394
428
  raise ValueError("data_dir must be specified and must exist")
395
429
  self.bidsdir = Path(data_dir)
396
430
  self.dataset = dataset
397
- assert str(self.bidsdir).endswith(self.dataset)
431
+ # Accept exact dataset folder or a variant with informative suffixes
432
+ # (e.g., dsXXXXX-bdf, dsXXXXX-bdf-mini) to avoid collisions.
433
+ dir_name = self.bidsdir.name
434
+ if not (dir_name == self.dataset or dir_name.startswith(self.dataset + "-")):
435
+ raise AssertionError(
436
+ f"BIDS directory '{dir_name}' does not correspond to dataset '{self.dataset}'"
437
+ )
398
438
  self.layout = BIDSLayout(data_dir)
399
439
 
400
440
  # get all recording files in the bids directory
401
- self.files = self.get_recordings(self.layout)
441
+ self.files = self._get_recordings(self.layout)
402
442
  assert len(self.files) > 0, ValueError(
403
443
  "Unable to construct EEG dataset. No EEG recordings found."
404
444
  )
@@ -408,7 +448,7 @@ class EEGBIDSDataset:
408
448
  """Check if the dataset is EEG."""
409
449
  return self.get_bids_file_attribute("modality", self.files[0]).lower() == "eeg"
410
450
 
411
- def get_recordings(self, layout: BIDSLayout) -> list[str]:
451
+ def _get_recordings(self, layout: BIDSLayout) -> list[str]:
412
452
  """Get a list of all EEG recording files in the BIDS layout."""
413
453
  files = []
414
454
  for ext, exts in self.RAW_EXTENSIONS.items():
@@ -417,12 +457,12 @@ class EEGBIDSDataset:
417
457
  break
418
458
  return files
419
459
 
420
- def get_relative_bidspath(self, filename: str) -> str:
460
+ def _get_relative_bidspath(self, filename: str) -> str:
421
461
  """Make the given file path relative to the BIDS directory."""
422
462
  bids_parent_dir = self.bidsdir.parent.absolute()
423
463
  return str(Path(filename).relative_to(bids_parent_dir))
424
464
 
425
- def get_property_from_filename(self, property: str, filename: str) -> str:
465
+ def _get_property_from_filename(self, property: str, filename: str) -> str:
426
466
  """Parse a property out of a BIDS-compliant filename. Returns an empty string
427
467
  if not found.
428
468
  """
@@ -434,7 +474,7 @@ class EEGBIDSDataset:
434
474
  lookup = re.search(rf"{property}-(.*?)[_\/]", filename)
435
475
  return lookup.group(1) if lookup else ""
436
476
 
437
- def merge_json_inheritance(self, json_files: list[str | Path]) -> dict:
477
+ def _merge_json_inheritance(self, json_files: list[str | Path]) -> dict:
438
478
  """Internal helper to merge list of json files found by get_bids_file_inheritance,
439
479
  expecting the order (from left to right) is from lowest
440
480
  level to highest level, and return a merged dictionary
@@ -445,7 +485,7 @@ class EEGBIDSDataset:
445
485
  json_dict.update(json.load(open(f))) # FIXME: should close file
446
486
  return json_dict
447
487
 
448
- def get_bids_file_inheritance(
488
+ def _get_bids_file_inheritance(
449
489
  self, path: str | Path, basename: str, extension: str
450
490
  ) -> list[Path]:
451
491
  """Get all file paths that apply to the basename file in the specified directory
@@ -492,7 +532,7 @@ class EEGBIDSDataset:
492
532
  else:
493
533
  # call get_bids_file_inheritance recursively with parent directory
494
534
  bids_files.extend(
495
- self.get_bids_file_inheritance(path.parent, basename, extension)
535
+ self._get_bids_file_inheritance(path.parent, basename, extension)
496
536
  )
497
537
  return bids_files
498
538
 
@@ -523,12 +563,12 @@ class EEGBIDSDataset:
523
563
  path, filename = os.path.split(filepath)
524
564
  basename = filename[: filename.rfind("_")]
525
565
  # metadata files
526
- meta_files = self.get_bids_file_inheritance(
566
+ meta_files = self._get_bids_file_inheritance(
527
567
  path, basename, metadata_file_extension
528
568
  )
529
569
  return meta_files
530
570
 
531
- def scan_directory(self, directory: str, extension: str) -> list[Path]:
571
+ def _scan_directory(self, directory: str, extension: str) -> list[Path]:
532
572
  """Return a list of file paths that end with the given extension in the specified
533
573
  directory. Ignores certain special directories like .git, .datalad, derivatives,
534
574
  and code.
@@ -545,7 +585,7 @@ class EEGBIDSDataset:
545
585
  result_files.append(entry.path) # Add directory to scan later
546
586
  return result_files
547
587
 
548
- def get_files_with_extension_parallel(
588
+ def _get_files_with_extension_parallel(
549
589
  self, directory: str, extension: str = ".set", max_workers: int = -1
550
590
  ) -> list[Path]:
551
591
  """Efficiently scan a directory and its subdirectories for files that end with
@@ -577,7 +617,7 @@ class EEGBIDSDataset:
577
617
  )
578
618
  # Run the scan_directory function in parallel across directories
579
619
  results = Parallel(n_jobs=max_workers, prefer="threads", verbose=1)(
580
- delayed(self.scan_directory)(d, extension) for d in dirs_to_scan
620
+ delayed(self._scan_directory)(d, extension) for d in dirs_to_scan
581
621
  )
582
622
 
583
623
  # Reset the directories to scan and process the results
@@ -682,7 +722,7 @@ class EEGBIDSDataset:
682
722
  def num_times(self, data_filepath: str) -> int:
683
723
  """Get the approximate number of time points in the EEG recording based on the BIDS metadata."""
684
724
  eeg_jsons = self.get_bids_metadata_files(data_filepath, "eeg.json")
685
- eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
725
+ eeg_json_dict = self._merge_json_inheritance(eeg_jsons)
686
726
  return int(
687
727
  eeg_json_dict["SamplingFrequency"] * eeg_json_dict["RecordingDuration"]
688
728
  )
@@ -705,7 +745,7 @@ class EEGBIDSDataset:
705
745
  def eeg_json(self, data_filepath: str) -> dict[str, Any]:
706
746
  """Get BIDS eeg.json metadata for the given data file path."""
707
747
  eeg_jsons = self.get_bids_metadata_files(data_filepath, "eeg.json")
708
- eeg_json_dict = self.merge_json_inheritance(eeg_jsons)
748
+ eeg_json_dict = self._merge_json_inheritance(eeg_jsons)
709
749
  return eeg_json_dict
710
750
 
711
751
  def channel_tsv(self, data_filepath: str) -> dict[str, Any]:
@@ -0,0 +1,4 @@
1
+ from .dataset import EEGChallengeDataset
2
+ from .registry import register_openneuro_datasets
3
+
4
+ __all__ = ["EEGChallengeDataset", "register_openneuro_datasets"]