eegdash 0.3.4.dev70__tar.gz → 0.3.5.dev77__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

Files changed (54) hide show
  1. {eegdash-0.3.4.dev70/eegdash.egg-info → eegdash-0.3.5.dev77}/PKG-INFO +2 -1
  2. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/docs/source/conf.py +1 -1
  3. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/docs/source/index.rst +1 -0
  4. eegdash-0.3.5.dev77/docs/source/overview.rst +37 -0
  5. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/__init__.py +1 -1
  6. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/api.py +177 -54
  7. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77/eegdash.egg-info}/PKG-INFO +2 -1
  8. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash.egg-info/SOURCES.txt +5 -1
  9. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash.egg-info/requires.txt +1 -0
  10. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/pyproject.toml +1 -0
  11. eegdash-0.3.5.dev77/tests/test_api.py +63 -0
  12. eegdash-0.3.5.dev77/tests/test_offline.py +49 -0
  13. eegdash-0.3.5.dev77/tests/test_query.py +85 -0
  14. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/LICENSE +0 -0
  15. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/MANIFEST.in +0 -0
  16. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/README.md +0 -0
  17. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/docs/Makefile +0 -0
  18. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/docs/source/dataset_summary.rst +0 -0
  19. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/docs/source/install/install.rst +0 -0
  20. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/docs/source/install/install_pip.rst +0 -0
  21. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/docs/source/install/install_source.rst +0 -0
  22. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/data_config.py +0 -0
  23. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/data_utils.py +0 -0
  24. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/dataset.py +0 -0
  25. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/dataset_summary.csv +0 -0
  26. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/__init__.py +0 -0
  27. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/datasets.py +0 -0
  28. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/decorators.py +0 -0
  29. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/extractors.py +0 -0
  30. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/feature_bank/__init__.py +0 -0
  31. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/feature_bank/complexity.py +0 -0
  32. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/feature_bank/connectivity.py +0 -0
  33. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/feature_bank/csp.py +0 -0
  34. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/feature_bank/dimensionality.py +0 -0
  35. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/feature_bank/signal.py +0 -0
  36. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/feature_bank/spectral.py +0 -0
  37. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/feature_bank/utils.py +0 -0
  38. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/inspect.py +0 -0
  39. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/serialization.py +0 -0
  40. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/features/utils.py +0 -0
  41. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/mongodb.py +0 -0
  42. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/preprocessing.py +0 -0
  43. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/registry.py +0 -0
  44. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash/utils.py +0 -0
  45. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash.egg-info/dependency_links.txt +0 -0
  46. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/eegdash.egg-info/top_level.txt +0 -0
  47. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/setup.cfg +0 -0
  48. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/tests/test_correctness.py +0 -0
  49. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/tests/test_dataset.py +0 -0
  50. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/tests/test_dataset_registration.py +0 -0
  51. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/tests/test_eegdash.py +0 -0
  52. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/tests/test_functional.py +0 -0
  53. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/tests/test_init.py +0 -0
  54. {eegdash-0.3.4.dev70 → eegdash-0.3.5.dev77}/tests/test_mongo_connection.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.3.4.dev70
3
+ Version: 0.3.5.dev77
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License-Expression: GPL-3.0-only
@@ -38,6 +38,7 @@ Requires-Dist: tqdm
38
38
  Requires-Dist: xarray
39
39
  Requires-Dist: h5io>=0.2.4
40
40
  Requires-Dist: pymatreader
41
+ Requires-Dist: eeglabio
41
42
  Requires-Dist: tabulate
42
43
  Provides-Extra: tests
43
44
  Requires-Dist: pytest; extra == "tests"
@@ -123,7 +123,7 @@ sphinx_gallery_conf = {
123
123
  "subsection_order": ExplicitOrder(
124
124
  [
125
125
  f"{EX_DIR}/core",
126
- f"{EX_DIR}/eeg2025",
126
+ # f"{EX_DIR}/eeg2025",
127
127
  "*",
128
128
  ]
129
129
  ),
@@ -49,6 +49,7 @@ research for machine learning and deep learning applications.
49
49
  .. toctree::
50
50
  :hidden:
51
51
 
52
+ Overview <overview>
52
53
  Install <install/install>
53
54
  API <api/modules.rst>
54
55
  Dataset Summary <dataset_summary>
@@ -0,0 +1,37 @@
1
+ .. _overview:
2
+
3
+ ========
4
+ Overview
5
+ ========
6
+
7
+ eegdash is an interface designed to streamline the access and use of EEG data for machine learning applications. It is composed of three main components that work together to provide a seamless experience for researchers and developers.
8
+
9
+ The architecture of eegdash can be visualized as follows:
10
+
11
+ .. code-block:: text
12
+
13
+ +-----------------+
14
+ | MongoDB |
15
+ | (Metadata) |
16
+ +-----------------+
17
+ |
18
+ |
19
+ +-----------v-----------+ +-----------------+
20
+ | eegdash |<---->| S3 Filesystem |
21
+ | Interface | | (Raw Data) |
22
+ +-----------------------+ +-----------------+
23
+ |
24
+ |
25
+ +-----------v-----------+
26
+ | BIDS Parser |
27
+ +-----------------------+
28
+
29
+
30
+
31
+ The components are:
32
+
33
+ * **MongoDB**: This is a NoSQL database that centralizes all the metadata related to the EEG datasets. It stores information about subjects, sessions, tasks, and other relevant details, allowing for fast and efficient querying.
34
+
35
+ * **S3 Filesystem**: The raw EEG data is stored in an S3-compatible object storage. This allows for scalable and reliable storage of large datasets. eegdash interacts with the S3 filesystem to download the data when it is needed.
36
+
37
+ * **BIDS Parser**: The BIDS (Brain Imaging Data Structure) parser is responsible for interpreting the structure of the datasets. It ensures that the data is organized in a standardized way, making it easier to work with and understand.
@@ -7,4 +7,4 @@ __init__mongo_client()
7
7
 
8
8
  __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset"]
9
9
 
10
- __version__ = "0.3.4.dev70"
10
+ __version__ = "0.3.5.dev77"
@@ -9,6 +9,7 @@ import numpy as np
9
9
  import xarray as xr
10
10
  from dotenv import load_dotenv
11
11
  from joblib import Parallel, delayed
12
+ from mne_bids import get_bids_path_from_fname, read_raw_bids
12
13
  from pymongo import InsertOne, UpdateOne
13
14
  from s3fs import S3FileSystem
14
15
 
@@ -34,6 +35,19 @@ class EEGDash:
34
35
 
35
36
  """
36
37
 
38
+ _ALLOWED_QUERY_FIELDS = {
39
+ "data_name",
40
+ "dataset",
41
+ "subject",
42
+ "task",
43
+ "session",
44
+ "run",
45
+ "modality",
46
+ "sampling_frequency",
47
+ "nchans",
48
+ "ntimes",
49
+ }
50
+
37
51
  def __init__(self, *, is_public: bool = True, is_staging: bool = False) -> None:
38
52
  """Create new instance of the EEGDash Database client.
39
53
 
@@ -71,34 +85,59 @@ class EEGDash:
71
85
  anon=True, client_kwargs={"region_name": "us-east-2"}
72
86
  )
73
87
 
74
- def find(self, query: dict[str, Any], *args, **kwargs) -> list[Mapping[str, Any]]:
75
- """Find records in the MongoDB collection that satisfy the given query.
88
+ def find(
89
+ self, query: dict[str, Any] = None, /, **kwargs
90
+ ) -> list[Mapping[str, Any]]:
91
+ """Find records in the MongoDB collection.
92
+
93
+ This method can be called in two ways:
94
+ 1. With a pre-built MongoDB query dictionary (positional argument):
95
+ >>> eegdash.find({"dataset": "ds002718", "subject": {"$in": ["012", "013"]}})
96
+ 2. With user-friendly keyword arguments for simple and multi-value queries:
97
+ >>> eegdash.find(dataset="ds002718", subject="012")
98
+ >>> eegdash.find(dataset="ds002718", subject=["012", "013"])
76
99
 
77
100
  Parameters
78
101
  ----------
79
- query: dict
80
- A dictionary that specifies the query to be executed; this is a reference
81
- document that is used to match records in the MongoDB collection.
82
- args:
83
- Additional positional arguments for the MongoDB find() method; see
84
- https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find
85
- kwargs:
86
- Additional keyword arguments for the MongoDB find() method.
102
+ query: dict, optional
103
+ A complete MongoDB query dictionary. This is a positional-only argument.
104
+ **kwargs:
105
+ Keyword arguments representing field-value pairs for the query.
106
+ Values can be single items (str, int) or lists of items for multi-search.
87
107
 
88
108
  Returns
89
109
  -------
90
110
  list:
91
111
  A list of DB records (string-keyed dictionaries) that match the query.
92
112
 
93
- Example
94
- -------
95
- >>> eegdash = EEGDash()
96
- >>> eegdash.find({"dataset": "ds002718", "subject": "012"})
113
+ Raises
114
+ ------
115
+ ValueError
116
+ If both a `query` dictionary and keyword arguments are provided.
97
117
 
98
118
  """
99
- results = self.__collection.find(query, *args, **kwargs)
119
+ if query is not None and kwargs:
120
+ raise ValueError(
121
+ "Provide either a positional 'query' dictionary or keyword arguments, not both."
122
+ )
100
123
 
101
- return [result for result in results]
124
+ final_query = {}
125
+ if query is not None:
126
+ final_query = query
127
+ elif kwargs:
128
+ final_query = self._build_query_from_kwargs(**kwargs)
129
+ else:
130
+ # By default, an empty query {} returns all documents.
131
+ # This can be dangerous, so we can either allow it or raise an error.
132
+ # Let's require an explicit query for safety.
133
+ raise ValueError(
134
+ "find() requires a query dictionary or at least one keyword argument. "
135
+ "To find all documents, use find({})."
136
+ )
137
+
138
+ results = self.__collection.find(final_query)
139
+
140
+ return list(results)
102
141
 
103
142
  def exist(self, query: dict[str, Any]) -> bool:
104
143
  """Return True if at least one record matches the query, else False.
@@ -184,6 +223,35 @@ class EEGDash:
184
223
 
185
224
  return record
186
225
 
226
+ def _build_query_from_kwargs(self, **kwargs) -> dict[str, Any]:
227
+ """Builds and validates a MongoDB query from user-friendly keyword arguments.
228
+
229
+ Translates list values into MongoDB's `$in` operator.
230
+ """
231
+ # 1. Validate that all provided keys are allowed for querying
232
+ unknown_fields = set(kwargs.keys()) - self._ALLOWED_QUERY_FIELDS
233
+ if unknown_fields:
234
+ raise ValueError(
235
+ f"Unsupported query field(s): {', '.join(sorted(unknown_fields))}. "
236
+ f"Allowed fields are: {', '.join(sorted(self._ALLOWED_QUERY_FIELDS))}"
237
+ )
238
+
239
+ # 2. Construct the query dictionary
240
+ query = {}
241
+ for key, value in kwargs.items():
242
+ if isinstance(value, (list, tuple)):
243
+ if not value:
244
+ raise ValueError(
245
+ f"Received an empty list for query parameter '{key}'. This is not supported."
246
+ )
247
+ # If the value is a list, use the `$in` operator for multi-search
248
+ query[key] = {"$in": value}
249
+ else:
250
+ # Otherwise, it's a direct match
251
+ query[key] = value
252
+
253
+ return query
254
+
187
255
  def load_eeg_data_from_s3(self, s3path: str) -> xr.DataArray:
188
256
  """Load an EEGLAB .set file from an AWS S3 URI and return it as an xarray DataArray.
189
257
 
@@ -218,14 +286,15 @@ class EEGDash:
218
286
  Parameters
219
287
  ----------
220
288
  bids_file : str
221
- Path to the file on the local filesystem.
289
+ Path to the BIDS-compliant file on the local filesystem.
222
290
 
223
291
  Notes
224
292
  -----
225
293
  Currently, only non-epoched .set files are supported.
226
294
 
227
295
  """
228
- raw_object = mne.io.read_raw(bids_file)
296
+ bids_path = get_bids_path_from_fname(bids_file, verbose=False)
297
+ raw_object = read_raw_bids(bids_path=bids_path, verbose=False)
229
298
  eeg_data = raw_object.get_data()
230
299
 
231
300
  fs = raw_object.info["sfreq"]
@@ -521,8 +590,8 @@ class EEGDashDataset(BaseConcatDataset):
521
590
  def __init__(
522
591
  self,
523
592
  query: dict | None = None,
524
- data_dir: str | list | None = None,
525
- dataset: str | list | None = None,
593
+ cache_dir: str = "~/eegdash_cache",
594
+ dataset: str | None = None,
526
595
  description_fields: list[str] = [
527
596
  "subject",
528
597
  "session",
@@ -532,36 +601,55 @@ class EEGDashDataset(BaseConcatDataset):
532
601
  "gender",
533
602
  "sex",
534
603
  ],
535
- cache_dir: str = "~/eegdash_cache",
536
604
  s3_bucket: str | None = None,
605
+ data_dir: str | None = None,
537
606
  eeg_dash_instance=None,
607
+ records: list[dict] | None = None,
538
608
  **kwargs,
539
609
  ):
540
610
  """Create a new EEGDashDataset from a given query or local BIDS dataset directory
541
611
  and dataset name. An EEGDashDataset is pooled collection of EEGDashBaseDataset
542
612
  instances (individual recordings) and is a subclass of braindecode's BaseConcatDataset.
543
613
 
614
+
615
+ Querying Examples:
616
+ ------------------
617
+ # Find by single subject
618
+ >>> ds = EEGDashDataset(dataset="ds005505", subject="NDARCA153NKE")
619
+
620
+ # Find by a list of subjects and a specific task
621
+ >>> subjects = ["NDARCA153NKE", "NDARXT792GY8"]
622
+ >>> ds = EEGDashDataset(dataset="ds005505", subject=subjects, task="RestingState")
623
+
624
+ # Use a raw MongoDB query for advanced filtering
625
+ >>> raw_query = {"dataset": "ds005505", "subject": {"$in": subjects}}
626
+ >>> ds = EEGDashDataset(query=raw_query)
627
+
544
628
  Parameters
545
629
  ----------
546
630
  query : dict | None
547
- Optionally a dictionary that specifies the query to be executed; see
548
- EEGDash.find() for details on the query format.
549
- data_dir : str | list[str] | None
550
- Optionally a string or a list of strings specifying one or more local
551
- BIDS dataset directories from which to load the EEG data files. Exactly one
631
+ A raw MongoDB query dictionary. If provided, keyword arguments for filtering are ignored.
632
+ **kwargs : dict
633
+ Keyword arguments for filtering (e.g., `subject="X"`, `task=["T1", "T2"]`) and/or
634
+ arguments to be passed to the EEGDashBaseDataset constructor (e.g., `subject=...`).
635
+ cache_dir : str
636
+ A directory where the dataset will be cached locally.
637
+ data_dir : str | None
638
+ Optionally a string specifying a local BIDS dataset directory from which to load the EEG data files. Exactly one
552
639
  of query or data_dir must be provided.
553
- dataset : str | list[str] | None
554
- If data_dir is given, a name or list of names for for the dataset(s) to be loaded.
640
+ dataset : str | None
641
+ If data_dir is given, a name for the dataset to be loaded.
555
642
  description_fields : list[str]
556
643
  A list of fields to be extracted from the dataset records
557
644
  and included in the returned data description(s). Examples are typical
558
645
  subject metadata fields such as "subject", "session", "run", "task", etc.;
559
646
  see also data_config.description_fields for the default set of fields.
560
- cache_dir : str
561
- A directory where the dataset will be cached locally.
562
647
  s3_bucket : str | None
563
648
  An optional S3 bucket URI (e.g., "s3://mybucket") to use instead of the
564
649
  default OpenNeuro bucket for loading data files
650
+ records : list[dict] | None
651
+ Optional list of pre-fetched metadata records. If provided, the dataset is
652
+ constructed directly from these records without querying MongoDB.
565
653
  kwargs : dict
566
654
  Additional keyword arguments to be passed to the EEGDashBaseDataset
567
655
  constructor.
@@ -569,20 +657,47 @@ class EEGDashDataset(BaseConcatDataset):
569
657
  """
570
658
  self.cache_dir = cache_dir
571
659
  self.s3_bucket = s3_bucket
572
- self.eeg_dash = eeg_dash_instance or EEGDash()
573
- _owns_client = eeg_dash_instance is None
660
+ self.eeg_dash = eeg_dash_instance
661
+ _owns_client = False
662
+ if self.eeg_dash is None and records is None:
663
+ self.eeg_dash = EEGDash()
664
+ _owns_client = True
665
+
666
+ # Separate query kwargs from other kwargs passed to the BaseDataset constructor
667
+ query_kwargs = {
668
+ k: v for k, v in kwargs.items() if k in EEGDash._ALLOWED_QUERY_FIELDS
669
+ }
670
+ base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in query_kwargs}
671
+
672
+ if query and query_kwargs:
673
+ raise ValueError(
674
+ "Provide either a 'query' dictionary or keyword arguments for filtering, not both."
675
+ )
574
676
 
575
677
  try:
576
- if query:
577
- datasets = self.find_datasets(query, description_fields, **kwargs)
678
+ if records is not None:
679
+ datasets = [
680
+ EEGDashBaseDataset(
681
+ record,
682
+ self.cache_dir,
683
+ self.s3_bucket,
684
+ **base_dataset_kwargs,
685
+ )
686
+ for record in records
687
+ ]
578
688
  elif data_dir:
689
+ # This path loads from a local directory and is not affected by DB query logic
579
690
  if isinstance(data_dir, str):
580
691
  datasets = self.load_bids_dataset(
581
- dataset, data_dir, description_fields, s3_bucket, **kwargs
692
+ dataset,
693
+ data_dir,
694
+ description_fields,
695
+ s3_bucket,
696
+ **base_dataset_kwargs,
582
697
  )
583
698
  else:
584
699
  assert len(data_dir) == len(dataset), (
585
- "Number of datasets and their directories must match"
700
+ "Number of datasets and directories must match"
586
701
  )
587
702
  datasets = []
588
703
  for i, _ in enumerate(data_dir):
@@ -592,27 +707,28 @@ class EEGDashDataset(BaseConcatDataset):
592
707
  data_dir[i],
593
708
  description_fields,
594
709
  s3_bucket,
595
- **kwargs,
710
+ **base_dataset_kwargs,
596
711
  )
597
712
  )
713
+ elif query or query_kwargs:
714
+ # This is the DB query path that we are improving
715
+ datasets = self.find_datasets(
716
+ query=query,
717
+ description_fields=description_fields,
718
+ query_kwargs=query_kwargs,
719
+ base_dataset_kwargs=base_dataset_kwargs,
720
+ )
721
+ # We only need filesystem if we need to access S3
722
+ self.filesystem = S3FileSystem(
723
+ anon=True, client_kwargs={"region_name": "us-east-2"}
724
+ )
598
725
  else:
599
726
  raise ValueError(
600
- "Exactly one of 'query' or 'data_dir' must be provided."
727
+ "You must provide either 'records', a 'data_dir', or a query/keyword arguments for filtering."
601
728
  )
602
729
  finally:
603
- # If we created the client, close it now that construction is done.
604
- if _owns_client:
605
- try:
606
- self.eeg_dash.close()
607
- except Exception:
608
- # Don't let close errors break construction
609
- pass
610
-
611
- self.filesystem = S3FileSystem(
612
- anon=True, client_kwargs={"region_name": "us-east-2"}
613
- )
614
-
615
- self.eeg_dash.close()
730
+ if _owns_client and self.eeg_dash is not None:
731
+ self.eeg_dash.close()
616
732
 
617
733
  super().__init__(datasets)
618
734
 
@@ -630,7 +746,11 @@ class EEGDashDataset(BaseConcatDataset):
630
746
  return None
631
747
 
632
748
  def find_datasets(
633
- self, query: dict[str, Any], description_fields: list[str], **kwargs
749
+ self,
750
+ query: dict[str, Any],
751
+ description_fields: list[str],
752
+ query_kwargs: dict,
753
+ base_dataset_kwargs: dict,
634
754
  ) -> list[EEGDashBaseDataset]:
635
755
  """Helper method to find datasets in the MongoDB collection that satisfy the
636
756
  given query and return them as a list of EEGDashBaseDataset objects.
@@ -652,7 +772,10 @@ class EEGDashDataset(BaseConcatDataset):
652
772
 
653
773
  """
654
774
  datasets: list[EEGDashBaseDataset] = []
655
- for record in self.eeg_dash.find(query):
775
+
776
+ records = self.eeg_dash.find(query, **query_kwargs)
777
+
778
+ for record in records:
656
779
  description = {}
657
780
  for field in description_fields:
658
781
  value = self.find_key_in_nested_dict(record, field)
@@ -664,7 +787,7 @@ class EEGDashDataset(BaseConcatDataset):
664
787
  self.cache_dir,
665
788
  self.s3_bucket,
666
789
  description=description,
667
- **kwargs,
790
+ **base_dataset_kwargs,
668
791
  )
669
792
  )
670
793
  return datasets
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.3.4.dev70
3
+ Version: 0.3.5.dev77
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License-Expression: GPL-3.0-only
@@ -38,6 +38,7 @@ Requires-Dist: tqdm
38
38
  Requires-Dist: xarray
39
39
  Requires-Dist: h5io>=0.2.4
40
40
  Requires-Dist: pymatreader
41
+ Requires-Dist: eeglabio
41
42
  Requires-Dist: tabulate
42
43
  Provides-Extra: tests
43
44
  Requires-Dist: pytest; extra == "tests"
@@ -6,6 +6,7 @@ docs/Makefile
6
6
  docs/source/conf.py
7
7
  docs/source/dataset_summary.rst
8
8
  docs/source/index.rst
9
+ docs/source/overview.rst
9
10
  docs/source/install/install.rst
10
11
  docs/source/install/install_pip.rst
11
12
  docs/source/install/install_source.rst
@@ -39,10 +40,13 @@ eegdash/features/feature_bank/dimensionality.py
39
40
  eegdash/features/feature_bank/signal.py
40
41
  eegdash/features/feature_bank/spectral.py
41
42
  eegdash/features/feature_bank/utils.py
43
+ tests/test_api.py
42
44
  tests/test_correctness.py
43
45
  tests/test_dataset.py
44
46
  tests/test_dataset_registration.py
45
47
  tests/test_eegdash.py
46
48
  tests/test_functional.py
47
49
  tests/test_init.py
48
- tests/test_mongo_connection.py
50
+ tests/test_mongo_connection.py
51
+ tests/test_offline.py
52
+ tests/test_query.py
@@ -12,6 +12,7 @@ tqdm
12
12
  xarray
13
13
  h5io>=0.2.4
14
14
  pymatreader
15
+ eeglabio
15
16
  tabulate
16
17
 
17
18
  [all]
@@ -49,6 +49,7 @@ dependencies = [
49
49
  "xarray",
50
50
  "h5io >= 0.2.4",
51
51
  "pymatreader",
52
+ "eeglabio",
52
53
  "tabulate",
53
54
  ]
54
55
 
@@ -0,0 +1,63 @@
1
+ import mne
2
+ import numpy as np
3
+ import pytest
4
+ import xarray as xr
5
+ from mne_bids import BIDSPath, write_raw_bids
6
+
7
+ from eegdash.api import EEGDash
8
+
9
+
10
+ # Fixture to create a dummy BIDS dataset for testing
11
+ @pytest.fixture(scope="module")
12
+ def dummy_bids_dataset(tmpdir_factory):
13
+ bids_root = tmpdir_factory.mktemp("bids")
14
+ # Create a simple MNE Raw object
15
+ ch_names = ["EEG 001", "EEG 002", "EEG 003"]
16
+ ch_types = ["eeg"] * 3
17
+ sfreq = 100
18
+ n_times = 100
19
+ data = np.random.randn(len(ch_names), n_times)
20
+ info = mne.create_info(ch_names=ch_names, sfreq=sfreq, ch_types=ch_types)
21
+ raw = mne.io.RawArray(data, info)
22
+
23
+ # Define BIDS path
24
+ subject_id = "01"
25
+ session_id = "01"
26
+ task_name = "test"
27
+ run_id = "01"
28
+ bids_path = BIDSPath(
29
+ subject=subject_id,
30
+ session=session_id,
31
+ task=task_name,
32
+ run=run_id,
33
+ root=bids_root,
34
+ datatype="eeg",
35
+ )
36
+
37
+ # Write BIDS data
38
+ write_raw_bids(raw, bids_path, overwrite=True, format="EEGLAB", allow_preload=True)
39
+
40
+ return str(bids_path.fpath)
41
+
42
+
43
+ def test_load_eeg_data_from_bids_file(dummy_bids_dataset):
44
+ eegdash = EEGDash()
45
+ data = eegdash.load_eeg_data_from_bids_file(dummy_bids_dataset)
46
+ assert isinstance(data, xr.DataArray)
47
+
48
+
49
+ def test_load_eeg_data_from_bids_file_content(dummy_bids_dataset):
50
+ eegdash = EEGDash()
51
+ data = eegdash.load_eeg_data_from_bids_file(dummy_bids_dataset)
52
+
53
+ # Check dimensions
54
+ assert data.dims == ("channel", "time")
55
+
56
+ # Check shape
57
+ assert data.shape == (3, 100)
58
+
59
+ # Check channel names
60
+ assert list(data.channel.values) == ["EEG 001", "EEG 002", "EEG 003"]
61
+
62
+ # Check time values
63
+ assert len(data.time.values) == 100
@@ -0,0 +1,49 @@
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+
5
+ from eegdash import EEGDash, EEGDashDataset
6
+
7
+ CACHE_DIR = (Path.home() / "mne_data" / "eeg_challenge_cache").resolve()
8
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
9
+
10
+
11
+ def test_dataset_loads_without_eegdash(monkeypatch):
12
+ """Dataset should load from records without contacting network resources."""
13
+ eeg_dash = EEGDash()
14
+
15
+ records = eeg_dash.find(subject="NDARAC350XUM", task="RestingState")
16
+
17
+ # test with internet
18
+ dataset_internet = EEGDashDataset(
19
+ query=dict(task="RestingState", subject="NDARAC350XUM", dataset="ds005509"),
20
+ cache_dir=CACHE_DIR,
21
+ eeg_dash_instance=eeg_dash,
22
+ )
23
+
24
+ # Monkeypatch any network calls inside EEGDashDataset to raise if called
25
+ monkeypatch.setattr(
26
+ EEGDashDataset,
27
+ "find_datasets",
28
+ lambda *args, **kwargs: pytest.skip(
29
+ "Skipping network download in offline test"
30
+ ),
31
+ )
32
+ monkeypatch.setattr(
33
+ EEGDashDataset,
34
+ "find_datasets",
35
+ lambda *args, **kwargs: pytest.skip(
36
+ "Skipping network download in offline test"
37
+ ),
38
+ )
39
+ # TO-DO: discover way to do this pytest
40
+
41
+ dataset_without_internet = EEGDashDataset(
42
+ records=records, cache_dir=CACHE_DIR, eeg_dash_instance=None
43
+ )
44
+
45
+ assert dataset_internet.datasets[0].raw == dataset_without_internet.datasets[0].raw
46
+ assert (
47
+ dataset_internet.datasets[0].record
48
+ == dataset_without_internet.datasets[0].record
49
+ )
@@ -0,0 +1,85 @@
1
+ from unittest.mock import MagicMock, patch
2
+
3
+ import pytest
4
+
5
+ from eegdash import EEGDash
6
+
7
+
8
+ # Mock the MongoConnectionManager to prevent actual DB connections during tests
9
+ @pytest.fixture(autouse=True)
10
+ def mock_mongo_connection():
11
+ """Automatically mocks the MongoDB connection for all tests."""
12
+ with patch("eegdash.mongodb.MongoConnectionManager.get_client") as mock_get_client:
13
+ mock_collection = MagicMock()
14
+ mock_db = MagicMock()
15
+ mock_client = MagicMock()
16
+ mock_get_client.return_value = (mock_client, mock_db, mock_collection)
17
+ yield mock_collection
18
+
19
+
20
+ @pytest.fixture
21
+ def eegdash_instance(mock_mongo_connection):
22
+ """Provides a clean instance of EEGDash for each test."""
23
+ return EEGDash(is_public=True)
24
+
25
+
26
+ def test_build_query_with_single_values(eegdash_instance):
27
+ """Test 1: Validates that the query builder correctly handles simple
28
+ key-value pairs.
29
+ """
30
+ kwargs = {"dataset": "ds001", "subject": "sub-01"}
31
+ expected_query = {"dataset": "ds001", "subject": "sub-01"}
32
+
33
+ # _build_query_from_kwargs is a protected method, but we test it
34
+ # to ensure the core logic is sound.
35
+ query = eegdash_instance._build_query_from_kwargs(**kwargs)
36
+
37
+ assert query == expected_query
38
+
39
+
40
+ def test_build_query_with_list_value(eegdash_instance):
41
+ """Test 2: Validates that the query builder correctly translates a list
42
+ of values into a MongoDB `$in` operator.
43
+ """
44
+ kwargs = {"dataset": "ds002", "subject": ["sub-01", "sub-02", "sub-03"]}
45
+ expected_query = {
46
+ "dataset": "ds002",
47
+ "subject": {"$in": ["sub-01", "sub-02", "sub-03"]},
48
+ }
49
+
50
+ query = eegdash_instance._build_query_from_kwargs(**kwargs)
51
+
52
+ assert query == expected_query
53
+
54
+
55
+ def test_build_query_with_invalid_field(eegdash_instance):
56
+ """Test 3: Ensures the query builder raises a ValueError when an unsupported
57
+ query field is provided.
58
+ """
59
+ kwargs = {"dataset": "ds003", "invalid_field": "some_value"}
60
+
61
+ with pytest.raises(
62
+ ValueError, match="Unsupported query field\\(s\\): invalid_field"
63
+ ):
64
+ eegdash_instance._build_query_from_kwargs(**kwargs)
65
+
66
+
67
+ def test_find_method_with_kwargs(eegdash_instance, mock_mongo_connection):
68
+ """Test 4: Verifies that the `find` method correctly uses the query builder
69
+ and calls the underlying database collection with the constructed query.
70
+ """
71
+ # Mock the return value of the collection's find method
72
+ mock_mongo_connection.find.return_value = [{"_id": "123", "dataset": "ds004"}]
73
+
74
+ # Call the method with user-friendly kwargs
75
+ results = eegdash_instance.find(dataset="ds004", subject=["sub-05", "sub-06"])
76
+
77
+ # Define the query we expect to be built and passed to the DB
78
+ expected_db_query = {"dataset": "ds004", "subject": {"$in": ["sub-05", "sub-06"]}}
79
+
80
+ # Assert that the collection's find method was called once with the correct query
81
+ mock_mongo_connection.find.assert_called_once_with(expected_db_query)
82
+
83
+ # Assert that the method returned the mocked data
84
+ assert len(results) == 1
85
+ assert results[0]["dataset"] == "ds004"
File without changes
File without changes
File without changes
File without changes