eegdash 0.3.6.dev182011805__tar.gz → 0.3.6.dev183416654__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

Files changed (58) hide show
  1. {eegdash-0.3.6.dev182011805/eegdash.egg-info → eegdash-0.3.6.dev183416654}/PKG-INFO +4 -1
  2. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/docs/source/conf.py +14 -0
  3. eegdash-0.3.6.dev183416654/docs/source/dataset_summary.rst +201 -0
  4. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/__init__.py +1 -1
  5. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/api.py +38 -57
  6. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/dataset.py +1 -1
  7. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/registry.py +13 -3
  8. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654/eegdash.egg-info}/PKG-INFO +4 -1
  9. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash.egg-info/SOURCES.txt +1 -0
  10. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash.egg-info/requires.txt +3 -0
  11. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/pyproject.toml +4 -1
  12. eegdash-0.3.6.dev183416654/tests/test_database.py +0 -0
  13. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/tests/test_dataset_registration.py +3 -3
  14. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/tests/test_offline.py +6 -4
  15. eegdash-0.3.6.dev182011805/docs/source/dataset_summary.rst +0 -85
  16. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/LICENSE +0 -0
  17. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/MANIFEST.in +0 -0
  18. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/README.md +0 -0
  19. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/docs/Makefile +0 -0
  20. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/docs/source/index.rst +0 -0
  21. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/docs/source/install/install.rst +0 -0
  22. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/docs/source/install/install_pip.rst +0 -0
  23. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/docs/source/install/install_source.rst +0 -0
  24. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/docs/source/overview.rst +0 -0
  25. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/data_config.py +0 -0
  26. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/data_utils.py +0 -0
  27. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/dataset_summary.csv +0 -0
  28. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/__init__.py +0 -0
  29. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/datasets.py +0 -0
  30. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/decorators.py +0 -0
  31. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/extractors.py +0 -0
  32. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/feature_bank/__init__.py +0 -0
  33. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/feature_bank/complexity.py +0 -0
  34. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/feature_bank/connectivity.py +0 -0
  35. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/feature_bank/csp.py +0 -0
  36. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/feature_bank/dimensionality.py +0 -0
  37. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/feature_bank/signal.py +0 -0
  38. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/feature_bank/spectral.py +0 -0
  39. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/feature_bank/utils.py +0 -0
  40. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/inspect.py +0 -0
  41. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/serialization.py +0 -0
  42. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/features/utils.py +0 -0
  43. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/mongodb.py +0 -0
  44. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/preprocessing.py +0 -0
  45. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash/utils.py +0 -0
  46. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash.egg-info/dependency_links.txt +0 -0
  47. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/eegdash.egg-info/top_level.txt +0 -0
  48. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/setup.cfg +0 -0
  49. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/tests/test_api.py +0 -0
  50. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/tests/test_challenge_kwargs.py +0 -0
  51. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/tests/test_correctness.py +0 -0
  52. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/tests/test_dataset.py +0 -0
  53. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/tests/test_eegdash.py +0 -0
  54. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/tests/test_functional.py +0 -0
  55. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/tests/test_init.py +0 -0
  56. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/tests/test_minirelease.py +0 -0
  57. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/tests/test_mongo_connection.py +0 -0
  58. {eegdash-0.3.6.dev182011805 → eegdash-0.3.6.dev183416654}/tests/test_query.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.3.6.dev182011805
3
+ Version: 0.3.6.dev183416654
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License-Expression: GPL-3.0-only
@@ -48,6 +48,7 @@ Requires-Dist: pytest_cases; extra == "tests"
48
48
  Requires-Dist: pytest-benchmark; extra == "tests"
49
49
  Provides-Extra: dev
50
50
  Requires-Dist: pre-commit; extra == "dev"
51
+ Requires-Dist: ipykernel; extra == "dev"
51
52
  Provides-Extra: docs
52
53
  Requires-Dist: sphinx; extra == "docs"
53
54
  Requires-Dist: sphinx_design; extra == "docs"
@@ -55,10 +56,12 @@ Requires-Dist: sphinx_gallery; extra == "docs"
55
56
  Requires-Dist: sphinx_rtd_theme; extra == "docs"
56
57
  Requires-Dist: pydata-sphinx-theme; extra == "docs"
57
58
  Requires-Dist: sphinx-autobuild; extra == "docs"
59
+ Requires-Dist: sphinx-sitemap; extra == "docs"
58
60
  Requires-Dist: numpydoc; extra == "docs"
59
61
  Requires-Dist: memory_profiler; extra == "docs"
60
62
  Requires-Dist: ipython; extra == "docs"
61
63
  Requires-Dist: lightgbm; extra == "docs"
64
+ Requires-Dist: plotly; extra == "docs"
62
65
  Provides-Extra: all
63
66
  Requires-Dist: eegdash[docs]; extra == "all"
64
67
  Requires-Dist: eegdash[dev]; extra == "all"
@@ -31,6 +31,8 @@ extensions = [
31
31
  # "autoapi.extension",
32
32
  "numpydoc",
33
33
  "sphinx_gallery.gen_gallery",
34
+ # Generate sitemap.xml for search engines
35
+ "sphinx_sitemap",
34
36
  ]
35
37
 
36
38
  templates_path = ["_templates"]
@@ -45,6 +47,11 @@ html_favicon = "_static/eegdash_icon.png"
45
47
  html_title = "EEG Dash"
46
48
  html_short_title = "EEG Dash"
47
49
  html_css_files = ["custom.css"]
50
+ html_js_files = []
51
+
52
+ # Required for sphinx-sitemap: set the canonical base URL of the site
53
+ # Make sure this matches the actual published docs URL and ends with '/'
54
+ html_baseurl = "https://sccn.github.io/eegdash/"
48
55
 
49
56
  html_theme_options = {
50
57
  "icon_links_label": "External Links", # for screen reader
@@ -94,6 +101,9 @@ html_theme_options = {
94
101
 
95
102
  html_sidebars = {"api": [], "dataset_summary": [], "installation": []}
96
103
 
104
+ # Copy extra files (e.g., robots.txt) to the output root
105
+ html_extra_path = ["_extra"]
106
+
97
107
 
98
108
  # -- Extension configurations ------------------------------------------------
99
109
  autoclass_content = "both"
@@ -140,3 +150,7 @@ def setup(app):
140
150
  )
141
151
  if not os.path.exists(backreferences_dir):
142
152
  os.makedirs(backreferences_dir)
153
+
154
+
155
+ # Configure sitemap URL format (omit .html where possible)
156
+ sitemap_url_scheme = "{link}"
@@ -0,0 +1,201 @@
1
+ .. meta::
2
+ :hide_sidebar: true
3
+
4
+ :html_theme.sidebar_secondary.remove:
5
+ :html_theme.sidebar_primary.remove:
6
+
7
+ .. _data_summary:
8
+
9
+ EEGDash
10
+ ========
11
+
12
+ To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
13
+
14
+ The archive is currently still in :bdg-danger:`beta testing` mode, so be kind.
15
+
16
+ .. raw:: html
17
+
18
+ <figure class="eegdash-figure" style="margin: 0 0 1.25rem 0;">
19
+
20
+ .. raw:: html
21
+ :file: ../build/dataset_bubble.html
22
+
23
+ .. raw:: html
24
+
25
+ <figcaption class="eegdash-caption">
26
+ Figure: Dataset landscape. Each bubble represents a dataset: x-axis shows the number of records,
27
+ y-axis the number of subjects, bubble area encodes on-disk size, and color indicates sampling frequency band.
28
+ Hover for details and use the legend to highlight groups.
29
+ </figcaption>
30
+ </figure>
31
+
32
+
33
+ .. raw:: html
34
+
35
+ <figure class="eegdash-figure" style="margin: 1.0rem 0 0 0;">
36
+
37
+
38
+ MEEG Datasets Table
39
+ ===================
40
+
41
+ The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs,
42
+ involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks.
43
+
44
+ In addition, EEG-DaSh will incorporate a subset of the data converted from `NEMAR <https://nemar.org/>`__, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
45
+
46
+ .. raw:: html
47
+ :file: ../build/dataset_summary_table.html
48
+
49
+ .. raw:: html
50
+
51
+ <figcaption class="eegdash-caption">
52
+ Table: Sortable catalogue of EEG‑DaSh datasets. Use the “Filters” button to open column filters;
53
+ click a column header to jump directly to a filter pane. The Total row is pinned at the bottom.
54
+ * means that we use the median value across multiple recordings in the dataset, and empty cells
55
+ when the metainformation is not extracted yet.
56
+ </figcaption>
57
+ </figure>
58
+
59
+ .. raw:: html
60
+
61
+ <!-- jQuery + DataTables core -->
62
+ <script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
63
+ <link rel="stylesheet" href="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.css"/>
64
+ <script src="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.js"></script>
65
+
66
+ <!-- Buttons + SearchPanes (+ Select required by SearchPanes) -->
67
+ <link rel="stylesheet" href="https://cdn.datatables.net/buttons/2.4.2/css/buttons.dataTables.min.css">
68
+ <script src="https://cdn.datatables.net/buttons/2.4.2/js/dataTables.buttons.min.js"></script>
69
+ <link rel="stylesheet" href="https://cdn.datatables.net/select/1.7.0/css/select.dataTables.min.css">
70
+ <link rel="stylesheet" href="https://cdn.datatables.net/searchpanes/2.3.1/css/searchPanes.dataTables.min.css">
71
+ <script src="https://cdn.datatables.net/select/1.7.0/js/dataTables.select.min.js"></script>
72
+ <script src="https://cdn.datatables.net/searchpanes/2.3.1/js/dataTables.searchPanes.min.js"></script>
73
+
74
+ <style>
75
+ /* Styling for the Total row (placed in tfoot) */
76
+ table.sd-table tfoot td {
77
+ font-weight: 600;
78
+ border-top: 2px solid rgba(0,0,0,0.2);
79
+ background: #f9fafb;
80
+ /* Match body cell padding to keep perfect alignment */
81
+ padding: 8px 10px !important;
82
+ vertical-align: middle;
83
+ }
84
+
85
+ /* Right-align numeric-like columns (2..8) consistently for body & footer */
86
+ table.sd-table tbody td:nth-child(n+2),
87
+ table.sd-table tfoot td:nth-child(n+2) {
88
+ text-align: right;
89
+ }
90
+ /* Keep first column (Dataset/Total) left-aligned */
91
+ table.sd-table tbody td:first-child,
92
+ table.sd-table tfoot td:first-child {
93
+ text-align: left;
94
+ }
95
+ </style>
96
+
97
+ <script>
98
+ // Helper: robustly extract values for SearchPanes when needed
99
+ function tagsArrayFromHtml(html) {
100
+ if (html == null) return [];
101
+ // If it's numeric or plain text, just return as a single value
102
+ if (typeof html === 'number') return [String(html)];
103
+ if (typeof html === 'string' && html.indexOf('<') === -1) return [html.trim()];
104
+ // Else parse any .tag elements inside HTML
105
+ var tmp = document.createElement('div');
106
+ tmp.innerHTML = html;
107
+ var tags = Array.from(tmp.querySelectorAll('.tag')).map(function(el){
108
+ return (el.textContent || '').trim();
109
+ });
110
+ return tags.length ? tags : [tmp.textContent.trim()];
111
+ }
112
+
113
+ // Helper: parse human-readable sizes like "4.31 GB" into bytes (number)
114
+ function parseSizeToBytes(text) {
115
+ if (!text) return 0;
116
+ var s = String(text).trim();
117
+ var m = s.match(/([\d,.]+)\s*(TB|GB|MB|KB|B)/i);
118
+ if (!m) return 0;
119
+ var value = parseFloat(m[1].replace(/,/g, ''));
120
+ var unit = m[2].toUpperCase();
121
+ var factor = { B:1, KB:1024, MB:1024**2, GB:1024**3, TB:1024**4 }[unit] || 1;
122
+ return value * factor;
123
+ }
124
+
125
+ $(function () {
126
+ // 1) Move the "Total" row into <tfoot> so sorting/filtering never moves it
127
+ $('.sortable').each(function(){
128
+ var $t = $(this);
129
+ var $tbody = $t.find('tbody');
130
+ var $total = $tbody.find('tr').filter(function(){
131
+ return $(this).find('td').eq(0).text().trim() === 'Total';
132
+ });
133
+ if ($total.length) {
134
+ var $tfoot = $t.find('tfoot');
135
+ if (!$tfoot.length) $tfoot = $('<tfoot/>').appendTo($t);
136
+ $total.appendTo($tfoot);
137
+ }
138
+ });
139
+
140
+ // 2) Initialize DataTable with SearchPanes button
141
+ var FILTER_COLS = [1,2,3,4,5,6];
142
+ // Detect the index of the size column by header text
143
+ var sizeIdx = (function(){
144
+ var idx = -1;
145
+ $('.sortable thead th').each(function(i){
146
+ var t = $(this).text().trim().toLowerCase();
147
+ if (t === 'size on disk' || t === 'size') idx = i;
148
+ });
149
+ return idx;
150
+ })();
151
+
152
+ var table = $('.sortable').DataTable({
153
+ dom: 'Blfrtip',
154
+ paging: false,
155
+ searching: true,
156
+ info: false,
157
+ language: {
158
+ search: 'Filter dataset:',
159
+ searchPanes: { collapse: { 0: 'Filters', _: 'Filters (%d)' } }
160
+ },
161
+ buttons: [{
162
+ extend: 'searchPanes',
163
+ text: 'Filters',
164
+ config: { cascadePanes: true, viewTotal: true, layout: 'columns-4', initCollapsed: false }
165
+ }],
166
+ columnDefs: (function(){
167
+ var defs = [
168
+ { searchPanes: { show: true }, targets: FILTER_COLS }
169
+ ];
170
+ if (sizeIdx !== -1) {
171
+ defs.push({
172
+ targets: sizeIdx,
173
+ render: function(data, type) {
174
+ if (type === 'sort' || type === 'type') {
175
+ return parseSizeToBytes(data);
176
+ }
177
+ return data;
178
+ }
179
+ });
180
+ }
181
+ return defs;
182
+ })()
183
+ });
184
+
185
+ // 3) UX: click a header to open the relevant filter pane
186
+ $('.sortable thead th').each(function (i) {
187
+ if ([1,2,3,4].indexOf(i) === -1) return;
188
+ $(this).css('cursor','pointer').attr('title','Click to filter this column');
189
+ $(this).on('click', function () {
190
+ table.button('.buttons-searchPanes').trigger();
191
+ setTimeout(function () {
192
+ var idx = [1,2,3,4].indexOf(i);
193
+ var $container = $(table.searchPanes.container());
194
+ var $pane = $container.find('.dtsp-pane').eq(idx);
195
+ var $title = $pane.find('.dtsp-title');
196
+ if ($title.length) $title.trigger('click');
197
+ }, 0);
198
+ });
199
+ });
200
+ });
201
+ </script>
@@ -7,4 +7,4 @@ __init__mongo_client()
7
7
 
8
8
  __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset"]
9
9
 
10
- __version__ = "0.3.6.dev182011805"
10
+ __version__ = "0.3.6.dev183416654"
@@ -6,9 +6,11 @@ from typing import Any, Mapping
6
6
 
7
7
  import mne
8
8
  import numpy as np
9
+ import platformdirs
9
10
  import xarray as xr
10
11
  from dotenv import load_dotenv
11
12
  from joblib import Parallel, delayed
13
+ from mne.utils import warn
12
14
  from mne_bids import get_bids_path_from_fname, read_raw_bids
13
15
  from pymongo import InsertOne, UpdateOne
14
16
  from s3fs import S3FileSystem
@@ -693,9 +695,8 @@ class EEGDash:
693
695
  class EEGDashDataset(BaseConcatDataset):
694
696
  def __init__(
695
697
  self,
696
- query: dict | None = None,
697
- cache_dir: str = "~/eegdash_cache",
698
- dataset: str | list[str] | None = None,
698
+ cache_dir: str | Path,
699
+ query: dict[str, Any] = None,
699
700
  description_fields: list[str] = [
700
701
  "subject",
701
702
  "session",
@@ -706,9 +707,9 @@ class EEGDashDataset(BaseConcatDataset):
706
707
  "sex",
707
708
  ],
708
709
  s3_bucket: str | None = None,
709
- data_dir: str | None = None,
710
710
  eeg_dash_instance=None,
711
711
  records: list[dict] | None = None,
712
+ offline_mode: bool = False,
712
713
  **kwargs,
713
714
  ):
714
715
  """Create a new EEGDashDataset from a given query or local BIDS dataset directory
@@ -754,34 +755,36 @@ class EEGDashDataset(BaseConcatDataset):
754
755
  records : list[dict] | None
755
756
  Optional list of pre-fetched metadata records. If provided, the dataset is
756
757
  constructed directly from these records without querying MongoDB.
758
+ offline_mode : bool
759
+ If True, do not attempt to query MongoDB at all. This is useful if you want to
760
+ work with a local cache only, or if you are offline.
757
761
  kwargs : dict
758
762
  Additional keyword arguments to be passed to the EEGDashBaseDataset
759
763
  constructor.
760
764
 
761
765
  """
762
- self.cache_dir = cache_dir
766
+ self.cache_dir = Path(cache_dir or platformdirs.user_cache_dir("EEGDash"))
767
+ if not self.cache_dir.exists():
768
+ warn(f"Cache directory does not exist, creating it: {self.cache_dir}")
769
+ self.cache_dir.mkdir(exist_ok=True, parents=True)
763
770
  self.s3_bucket = s3_bucket
764
771
  self.eeg_dash = eeg_dash_instance
765
- _owns_client = False
766
- if self.eeg_dash is None and records is None:
767
- self.eeg_dash = EEGDash()
768
- _owns_client = True
769
772
 
770
773
  # Separate query kwargs from other kwargs passed to the BaseDataset constructor
771
- query_kwargs = {
772
- k: v for k, v in kwargs.items() if k in EEGDash._ALLOWED_QUERY_FIELDS
773
- }
774
- base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in query_kwargs}
774
+ self.query = query or {}
775
+ self.query.update(
776
+ {k: v for k, v in kwargs.items() if k in EEGDash._ALLOWED_QUERY_FIELDS}
777
+ )
778
+ base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in self.query}
779
+ if "dataset" not in self.query:
780
+ raise ValueError("You must provide a 'dataset' argument")
775
781
 
776
- # If user provided a dataset name via the dedicated parameter (and we're not
777
- # loading from a local directory), treat it as a query filter. Accept str or list.
778
- if data_dir is None and dataset is not None:
779
- # Allow callers to pass a single dataset id (str) or a list of them.
780
- # If list is provided, let _build_query_from_kwargs turn it into $in later.
781
- query_kwargs.setdefault("dataset", dataset)
782
+ self.data_dir = self.cache_dir / self.query["dataset"]
782
783
 
783
- # Allow mixing raw DB query with additional keyword filters. Both will be
784
- # merged by EEGDash.find() (logical AND), so we do not raise here.
784
+ _owns_client = False
785
+ if self.eeg_dash is None and records is None:
786
+ self.eeg_dash = EEGDash()
787
+ _owns_client = True
785
788
 
786
789
  try:
787
790
  if records is not None:
@@ -795,42 +798,25 @@ class EEGDashDataset(BaseConcatDataset):
795
798
  )
796
799
  for record in self.records
797
800
  ]
798
- elif data_dir:
799
- # This path loads from a local directory and is not affected by DB query logic
800
- if isinstance(data_dir, (str, Path)):
801
+ elif offline_mode: # only assume local data is complete if in offline mode
802
+ if self.data_dir.exists():
803
+ # This path loads from a local directory and is not affected by DB query logic
801
804
  datasets = self.load_bids_dataset(
802
- dataset=dataset
803
- if isinstance(dataset, str)
804
- else (dataset[0] if dataset else None),
805
- data_dir=data_dir,
805
+ dataset=self.query["dataset"],
806
+ data_dir=self.data_dir,
806
807
  description_fields=description_fields,
807
808
  s3_bucket=s3_bucket,
808
809
  **base_dataset_kwargs,
809
810
  )
810
811
  else:
811
- assert dataset is not None, (
812
- "dataset must be provided when passing multiple data_dir"
813
- )
814
- assert len(data_dir) == len(dataset), (
815
- "Number of datasets and directories must match"
812
+ raise ValueError(
813
+ f"Offline mode is enabled, but local data_dir {self.data_dir} does not exist."
816
814
  )
817
- datasets = []
818
- for i, _ in enumerate(data_dir):
819
- datasets.extend(
820
- self.load_bids_dataset(
821
- dataset=dataset[i],
822
- data_dir=data_dir[i],
823
- description_fields=description_fields,
824
- s3_bucket=s3_bucket,
825
- **base_dataset_kwargs,
826
- )
827
- )
828
- elif query is not None or query_kwargs:
815
+ elif self.query:
829
816
  # This is the DB query path that we are improving
830
- datasets = self.find_datasets(
831
- query=query,
817
+ datasets = self._find_datasets(
818
+ query=self.eeg_dash._build_query_from_kwargs(**self.query),
832
819
  description_fields=description_fields,
833
- query_kwargs=query_kwargs,
834
820
  base_dataset_kwargs=base_dataset_kwargs,
835
821
  )
836
822
  # We only need filesystem if we need to access S3
@@ -860,11 +846,10 @@ class EEGDashDataset(BaseConcatDataset):
860
846
  return result
861
847
  return None
862
848
 
863
- def find_datasets(
849
+ def _find_datasets(
864
850
  self,
865
851
  query: dict[str, Any] | None,
866
852
  description_fields: list[str],
867
- query_kwargs: dict,
868
853
  base_dataset_kwargs: dict,
869
854
  ) -> list[EEGDashBaseDataset]:
870
855
  """Helper method to find datasets in the MongoDB collection that satisfy the
@@ -888,11 +873,7 @@ class EEGDashDataset(BaseConcatDataset):
888
873
  """
889
874
  datasets: list[EEGDashBaseDataset] = []
890
875
 
891
- # Build records using either a raw query OR keyword filters, but not both.
892
- # Note: callers may accidentally pass an empty dict for `query` along with
893
- # kwargs. In that case, treat it as if no query was provided and rely on kwargs.
894
- # Always delegate merging of raw query + kwargs to EEGDash.find
895
- self.records = self.eeg_dash.find(query, **query_kwargs)
876
+ self.records = self.eeg_dash.find(query)
896
877
 
897
878
  for record in self.records:
898
879
  description = {}
@@ -903,8 +884,8 @@ class EEGDashDataset(BaseConcatDataset):
903
884
  datasets.append(
904
885
  EEGDashBaseDataset(
905
886
  record,
906
- self.cache_dir,
907
- self.s3_bucket,
887
+ cache_dir=self.cache_dir,
888
+ s3_bucket=self.s3_bucket,
908
889
  description=description,
909
890
  **base_dataset_kwargs,
910
891
  )
@@ -335,7 +335,7 @@ class EEGChallengeDataset(EEGDashDataset):
335
335
  s3_bucket = f"{s3_bucket}/{release}_L100_bdf"
336
336
 
337
337
  super().__init__(
338
- dataset=dataset_parameters,
338
+ dataset=RELEASE_TO_OPENNEURO_DATASET_MAP[release],
339
339
  query=query,
340
340
  cache_dir=cache_dir,
341
341
  s3_bucket=s3_bucket,
@@ -57,7 +57,7 @@ def register_openneuro_datasets(
57
57
 
58
58
  init = make_init(dataset_id)
59
59
 
60
- doc = f"""Create an instance for OpenNeuro dataset ``{dataset_id}``.
60
+ doc = f"""OpenNeuro dataset ``{dataset_id}``.
61
61
 
62
62
  {markdown_table(row_series)}
63
63
 
@@ -69,11 +69,15 @@ def register_openneuro_datasets(
69
69
  Extra Mongo query merged with ``{{'dataset': '{dataset_id}'}}``.
70
70
  s3_bucket : str | None
71
71
  Optional S3 bucket name.
72
+ subject : str | None
73
+ Optional subject identifier.
74
+ task : str | None
75
+ Optional task identifier.
72
76
  **kwargs
73
77
  Passed through to {base_class.__name__}.
74
78
  """
75
79
 
76
- init.__doc__ = doc
80
+ # init.__doc__ = doc
77
81
 
78
82
  cls = type(
79
83
  class_name,
@@ -101,6 +105,7 @@ def markdown_table(row_series: pd.Series) -> str:
101
105
  """Create a reStructuredText grid table from a pandas Series."""
102
106
  if row_series.empty:
103
107
  return ""
108
+ dataset_id = row_series["dataset"]
104
109
 
105
110
  # Prepare the dataframe with user's suggested logic
106
111
  df = (
@@ -112,6 +117,7 @@ def markdown_table(row_series: pd.Series) -> str:
112
117
  "n_tasks": "#Classes",
113
118
  "sampling_freqs": "Freq(Hz)",
114
119
  "duration_hours_total": "Duration(H)",
120
+ "size": "Size",
115
121
  }
116
122
  )
117
123
  .reindex(
@@ -122,6 +128,7 @@ def markdown_table(row_series: pd.Series) -> str:
122
128
  "#Classes",
123
129
  "Freq(Hz)",
124
130
  "Duration(H)",
131
+ "Size",
125
132
  ]
126
133
  )
127
134
  .infer_objects(copy=False)
@@ -131,6 +138,9 @@ def markdown_table(row_series: pd.Series) -> str:
131
138
  # Use tabulate for the final rst formatting
132
139
  table = tabulate(df, headers="keys", tablefmt="rst", showindex=False)
133
140
 
141
+ # Add a caption for the table
142
+ caption = f"Short overview of dataset {dataset_id} more details in the `Nemar documentation <https://nemar.org/dataexplorer/detail?dataset_id={dataset_id}>`_."
143
+ # adding caption below the table
134
144
  # Indent the table to fit within the admonition block
135
145
  indented_table = "\n".join(" " + line for line in table.split("\n"))
136
- return f"\n\n{indented_table}"
146
+ return f"\n\n{indented_table}\n\n{caption}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.3.6.dev182011805
3
+ Version: 0.3.6.dev183416654
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License-Expression: GPL-3.0-only
@@ -48,6 +48,7 @@ Requires-Dist: pytest_cases; extra == "tests"
48
48
  Requires-Dist: pytest-benchmark; extra == "tests"
49
49
  Provides-Extra: dev
50
50
  Requires-Dist: pre-commit; extra == "dev"
51
+ Requires-Dist: ipykernel; extra == "dev"
51
52
  Provides-Extra: docs
52
53
  Requires-Dist: sphinx; extra == "docs"
53
54
  Requires-Dist: sphinx_design; extra == "docs"
@@ -55,10 +56,12 @@ Requires-Dist: sphinx_gallery; extra == "docs"
55
56
  Requires-Dist: sphinx_rtd_theme; extra == "docs"
56
57
  Requires-Dist: pydata-sphinx-theme; extra == "docs"
57
58
  Requires-Dist: sphinx-autobuild; extra == "docs"
59
+ Requires-Dist: sphinx-sitemap; extra == "docs"
58
60
  Requires-Dist: numpydoc; extra == "docs"
59
61
  Requires-Dist: memory_profiler; extra == "docs"
60
62
  Requires-Dist: ipython; extra == "docs"
61
63
  Requires-Dist: lightgbm; extra == "docs"
64
+ Requires-Dist: plotly; extra == "docs"
62
65
  Provides-Extra: all
63
66
  Requires-Dist: eegdash[docs]; extra == "all"
64
67
  Requires-Dist: eegdash[dev]; extra == "all"
@@ -43,6 +43,7 @@ eegdash/features/feature_bank/utils.py
43
43
  tests/test_api.py
44
44
  tests/test_challenge_kwargs.py
45
45
  tests/test_correctness.py
46
+ tests/test_database.py
46
47
  tests/test_dataset.py
47
48
  tests/test_dataset_registration.py
48
49
  tests/test_eegdash.py
@@ -22,6 +22,7 @@ eegdash[tests]
22
22
 
23
23
  [dev]
24
24
  pre-commit
25
+ ipykernel
25
26
 
26
27
  [docs]
27
28
  sphinx
@@ -30,10 +31,12 @@ sphinx_gallery
30
31
  sphinx_rtd_theme
31
32
  pydata-sphinx-theme
32
33
  sphinx-autobuild
34
+ sphinx-sitemap
33
35
  numpydoc
34
36
  memory_profiler
35
37
  ipython
36
38
  lightgbm
39
+ plotly
37
40
 
38
41
  [tests]
39
42
  pytest
@@ -67,7 +67,8 @@ tests = [
67
67
  'pytest-benchmark',
68
68
  ]
69
69
  dev = [
70
- "pre-commit"
70
+ "pre-commit",
71
+ "ipykernel"
71
72
  ]
72
73
 
73
74
  docs = [
@@ -77,10 +78,12 @@ docs = [
77
78
  "sphinx_rtd_theme",
78
79
  "pydata-sphinx-theme",
79
80
  "sphinx-autobuild",
81
+ "sphinx-sitemap",
80
82
  "numpydoc",
81
83
  "memory_profiler",
82
84
  "ipython",
83
85
  "lightgbm",
86
+ "plotly"
84
87
  ]
85
88
 
86
89
  all = [
File without changes
@@ -16,9 +16,9 @@ def test_register_openneuro_datasets(tmp_path: Path):
16
16
  summary.write_text(
17
17
  "\n".join(
18
18
  [
19
- "dataset_id,num_subjects,num_sessions,num_runs,num_channels,sampling_rate,duration",
20
- "ds002718,18,18,1,74,250,14.844",
21
- "ds000001,1,1,1,1,1,1",
19
+ "dataset,num_subjects,num_sessions,num_runs,num_channels,sampling_rate,duration,size",
20
+ "ds002718,18,18,1,74,250,14.844,1.2GB",
21
+ "ds000001,1,1,1,1,1,1,100MB",
22
22
  ]
23
23
  )
24
24
  )
@@ -12,7 +12,9 @@ def test_dataset_loads_without_eegdash(monkeypatch):
12
12
  """Dataset should load from records without contacting network resources."""
13
13
  eeg_dash = EEGDash()
14
14
 
15
- records = eeg_dash.find(subject="NDARAC350XUM", task="RestingState")
15
+ records = eeg_dash.find(
16
+ dataset="ds005509", subject="NDARAC350XUM", task="RestingState"
17
+ )
16
18
 
17
19
  # test with internet
18
20
  dataset_internet = EEGDashDataset(
@@ -24,14 +26,14 @@ def test_dataset_loads_without_eegdash(monkeypatch):
24
26
  # Monkeypatch any network calls inside EEGDashDataset to raise if called
25
27
  monkeypatch.setattr(
26
28
  EEGDashDataset,
27
- "find_datasets",
29
+ "_find_datasets",
28
30
  lambda *args, **kwargs: pytest.skip(
29
31
  "Skipping network download in offline test"
30
32
  ),
31
33
  )
32
34
  monkeypatch.setattr(
33
35
  EEGDashDataset,
34
- "find_datasets",
36
+ "_find_datasets",
35
37
  lambda *args, **kwargs: pytest.skip(
36
38
  "Skipping network download in offline test"
37
39
  ),
@@ -39,7 +41,7 @@ def test_dataset_loads_without_eegdash(monkeypatch):
39
41
  # TO-DO: discover way to do this pytest
40
42
 
41
43
  dataset_without_internet = EEGDashDataset(
42
- records=records, cache_dir=CACHE_DIR, eeg_dash_instance=None
44
+ dataset="ds005509", records=records, cache_dir=CACHE_DIR, eeg_dash_instance=None
43
45
  )
44
46
 
45
47
  assert dataset_internet.datasets[0].raw == dataset_without_internet.datasets[0].raw
@@ -1,85 +0,0 @@
1
- .. meta::
2
- :hide_sidebar: true
3
-
4
- :html_theme.sidebar_secondary.remove:
5
- :html_theme.sidebar_primary.remove:
6
-
7
- .. _data_summary:
8
- .. automodule:: eegdash.dataset
9
-
10
- .. currentmodule:: eegdash.dataset
11
-
12
- To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
13
-
14
- The archive is currently still in :bdg-danger:`beta testing` mode, so be kind.
15
-
16
- EEG Dash Datasets
17
- ==================
18
-
19
- The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
20
-
21
- Columns definitions for the table below:
22
- - **dataset**: Name of the dataset.
23
- - **n_records**: Number of EEG records in the dataset.
24
- - **n_subjects**: Number of subjects in the dataset.
25
- - **n_tasks**: Number of experimental tasks in the dataset.
26
- - **nchans_set**: Set of EEG channel counts used in the dataset.
27
- - **sampling_freqs**: Set of sampling frequencies used in the dataset.
28
- - **duration_hours_total**: Total duration of all recordings in hours.
29
-
30
-
31
- Datasets
32
- ======================
33
-
34
- .. csv-table::
35
- :file: ../build/dataset_summary.csv
36
- :header-rows: 1
37
- :class: sortable
38
-
39
-
40
-
41
- .. raw:: html
42
- <style>
43
- /* Make this page full-width and remove side padding */
44
- :root {
45
- --pst-page-max-width: 100%;
46
- --pst-content-max-width: 100%;
47
- }
48
- .bd-main .bd-content .bd-article-container {
49
- max-width: 100%;
50
- padding-left: 0;
51
- padding-right: 0;
52
- }
53
- /* Ensure the DataTable uses the full width */
54
- table.sortable { width: 100% !important; }
55
- </style>
56
-
57
- <link href="https://cdn.datatables.net/v/bm/jq-3.7.0/dt-2.3.2/af-2.7.0/b-3.2.4/b-html5-3.2.4/cr-2.1.1/fh-4.0.3/r-3.0.5/datatables.min.css"
58
- rel="stylesheet"
59
- integrity="sha384-aemAM3yl2c0KAZZkR1b1AwMH2u3r1NHOppsl5i6Ny1L5pfqn7SDH52qdaa1TbyN9"
60
- crossorigin="anonymous">
61
-
62
- <script src="https://cdn.datatables.net/v/bm/jq-3.7.0/dt-2.3.2/af-2.7.0/b-3.2.4/b-html5-3.2.4/cr-2.1.1/fh-4.0.3/r-3.0.5/datatables.min.js"
63
- integrity="sha384-CKcCNsP1rMRsJFtrN6zMWK+KIK/FjYiV/d8uOp0LZtbEVzbidk105YcuVncAhBR8"
64
- crossorigin="anonymous"></script>
65
-
66
- <script>
67
- document.addEventListener('DOMContentLoaded', function () {
68
- const tables = document.querySelectorAll('table.sortable');
69
- tables.forEach(function (tbl) {
70
- // Use the jQuery plugin that ships in the bundle
71
- $(tbl).DataTable({
72
- paging: false,
73
- searching: false,
74
- info: false,
75
- ordering: true,
76
- responsive: true,
77
- fixedHeader: true,
78
- // Avoid re-initialization if this script runs more than once
79
- retrieve: true,
80
- scrollX: true
81
- });
82
- });
83
- });
84
- </script>
85
-