eegdash 0.3.6.dev182011805__tar.gz → 0.3.7.dev177024734__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of eegdash might be problematic. Click here for more details.
- {eegdash-0.3.6.dev182011805/eegdash.egg-info → eegdash-0.3.7.dev177024734}/PKG-INFO +4 -1
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/docs/source/conf.py +14 -0
- eegdash-0.3.7.dev177024734/docs/source/dataset_summary.rst +201 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/__init__.py +1 -1
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/api.py +65 -59
- eegdash-0.3.6.dev182011805/eegdash/dataset.py → eegdash-0.3.7.dev177024734/eegdash/const.py +0 -95
- eegdash-0.3.7.dev177024734/eegdash/dataset.py +118 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/registry.py +13 -3
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734/eegdash.egg-info}/PKG-INFO +4 -1
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash.egg-info/SOURCES.txt +2 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash.egg-info/requires.txt +3 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/pyproject.toml +4 -1
- eegdash-0.3.7.dev177024734/tests/test_database.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_dataset_registration.py +3 -3
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_offline.py +6 -4
- eegdash-0.3.6.dev182011805/docs/source/dataset_summary.rst +0 -85
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/LICENSE +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/MANIFEST.in +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/README.md +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/docs/Makefile +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/docs/source/index.rst +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/docs/source/install/install.rst +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/docs/source/install/install_pip.rst +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/docs/source/install/install_source.rst +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/docs/source/overview.rst +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/data_config.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/data_utils.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/dataset_summary.csv +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/__init__.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/datasets.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/decorators.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/extractors.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/__init__.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/complexity.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/connectivity.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/csp.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/dimensionality.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/signal.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/spectral.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/utils.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/inspect.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/serialization.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/utils.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/mongodb.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/preprocessing.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/utils.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash.egg-info/dependency_links.txt +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash.egg-info/top_level.txt +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/setup.cfg +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_api.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_challenge_kwargs.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_correctness.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_dataset.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_eegdash.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_functional.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_init.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_minirelease.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_mongo_connection.py +0 -0
- {eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_query.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.7.dev177024734
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
6
6
|
License-Expression: GPL-3.0-only
|
|
@@ -48,6 +48,7 @@ Requires-Dist: pytest_cases; extra == "tests"
|
|
|
48
48
|
Requires-Dist: pytest-benchmark; extra == "tests"
|
|
49
49
|
Provides-Extra: dev
|
|
50
50
|
Requires-Dist: pre-commit; extra == "dev"
|
|
51
|
+
Requires-Dist: ipykernel; extra == "dev"
|
|
51
52
|
Provides-Extra: docs
|
|
52
53
|
Requires-Dist: sphinx; extra == "docs"
|
|
53
54
|
Requires-Dist: sphinx_design; extra == "docs"
|
|
@@ -55,10 +56,12 @@ Requires-Dist: sphinx_gallery; extra == "docs"
|
|
|
55
56
|
Requires-Dist: sphinx_rtd_theme; extra == "docs"
|
|
56
57
|
Requires-Dist: pydata-sphinx-theme; extra == "docs"
|
|
57
58
|
Requires-Dist: sphinx-autobuild; extra == "docs"
|
|
59
|
+
Requires-Dist: sphinx-sitemap; extra == "docs"
|
|
58
60
|
Requires-Dist: numpydoc; extra == "docs"
|
|
59
61
|
Requires-Dist: memory_profiler; extra == "docs"
|
|
60
62
|
Requires-Dist: ipython; extra == "docs"
|
|
61
63
|
Requires-Dist: lightgbm; extra == "docs"
|
|
64
|
+
Requires-Dist: plotly; extra == "docs"
|
|
62
65
|
Provides-Extra: all
|
|
63
66
|
Requires-Dist: eegdash[docs]; extra == "all"
|
|
64
67
|
Requires-Dist: eegdash[dev]; extra == "all"
|
|
@@ -31,6 +31,8 @@ extensions = [
|
|
|
31
31
|
# "autoapi.extension",
|
|
32
32
|
"numpydoc",
|
|
33
33
|
"sphinx_gallery.gen_gallery",
|
|
34
|
+
# Generate sitemap.xml for search engines
|
|
35
|
+
"sphinx_sitemap",
|
|
34
36
|
]
|
|
35
37
|
|
|
36
38
|
templates_path = ["_templates"]
|
|
@@ -45,6 +47,11 @@ html_favicon = "_static/eegdash_icon.png"
|
|
|
45
47
|
html_title = "EEG Dash"
|
|
46
48
|
html_short_title = "EEG Dash"
|
|
47
49
|
html_css_files = ["custom.css"]
|
|
50
|
+
html_js_files = []
|
|
51
|
+
|
|
52
|
+
# Required for sphinx-sitemap: set the canonical base URL of the site
|
|
53
|
+
# Make sure this matches the actual published docs URL and ends with '/'
|
|
54
|
+
html_baseurl = "https://sccn.github.io/eegdash/"
|
|
48
55
|
|
|
49
56
|
html_theme_options = {
|
|
50
57
|
"icon_links_label": "External Links", # for screen reader
|
|
@@ -94,6 +101,9 @@ html_theme_options = {
|
|
|
94
101
|
|
|
95
102
|
html_sidebars = {"api": [], "dataset_summary": [], "installation": []}
|
|
96
103
|
|
|
104
|
+
# Copy extra files (e.g., robots.txt) to the output root
|
|
105
|
+
html_extra_path = ["_extra"]
|
|
106
|
+
|
|
97
107
|
|
|
98
108
|
# -- Extension configurations ------------------------------------------------
|
|
99
109
|
autoclass_content = "both"
|
|
@@ -140,3 +150,7 @@ def setup(app):
|
|
|
140
150
|
)
|
|
141
151
|
if not os.path.exists(backreferences_dir):
|
|
142
152
|
os.makedirs(backreferences_dir)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# Configure sitemap URL format (omit .html where possible)
|
|
156
|
+
sitemap_url_scheme = "{link}"
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
.. meta::
|
|
2
|
+
:hide_sidebar: true
|
|
3
|
+
|
|
4
|
+
:html_theme.sidebar_secondary.remove:
|
|
5
|
+
:html_theme.sidebar_primary.remove:
|
|
6
|
+
|
|
7
|
+
.. _data_summary:
|
|
8
|
+
|
|
9
|
+
EEGDash
|
|
10
|
+
========
|
|
11
|
+
|
|
12
|
+
To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
|
|
13
|
+
|
|
14
|
+
The archive is currently still in :bdg-danger:`beta testing` mode, so be kind.
|
|
15
|
+
|
|
16
|
+
.. raw:: html
|
|
17
|
+
|
|
18
|
+
<figure class="eegdash-figure" style="margin: 0 0 1.25rem 0;">
|
|
19
|
+
|
|
20
|
+
.. raw:: html
|
|
21
|
+
:file: ../build/dataset_bubble.html
|
|
22
|
+
|
|
23
|
+
.. raw:: html
|
|
24
|
+
|
|
25
|
+
<figcaption class="eegdash-caption">
|
|
26
|
+
Figure: Dataset landscape. Each bubble represents a dataset: x-axis shows the number of records,
|
|
27
|
+
y-axis the number of subjects, bubble area encodes on-disk size, and color indicates sampling frequency band.
|
|
28
|
+
Hover for details and use the legend to highlight groups.
|
|
29
|
+
</figcaption>
|
|
30
|
+
</figure>
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
.. raw:: html
|
|
34
|
+
|
|
35
|
+
<figure class="eegdash-figure" style="margin: 1.0rem 0 0 0;">
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
MEEG Datasets Table
|
|
39
|
+
===================
|
|
40
|
+
|
|
41
|
+
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs,
|
|
42
|
+
involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks.
|
|
43
|
+
|
|
44
|
+
In addition, EEG-DaSh will incorporate a subset of the data converted from `NEMAR <https://nemar.org/>`__, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
|
|
45
|
+
|
|
46
|
+
.. raw:: html
|
|
47
|
+
:file: ../build/dataset_summary_table.html
|
|
48
|
+
|
|
49
|
+
.. raw:: html
|
|
50
|
+
|
|
51
|
+
<figcaption class="eegdash-caption">
|
|
52
|
+
Table: Sortable catalogue of EEG‑DaSh datasets. Use the “Filters” button to open column filters;
|
|
53
|
+
click a column header to jump directly to a filter pane. The Total row is pinned at the bottom.
|
|
54
|
+
* means that we use the median value across multiple recordings in the dataset, and empty cells
|
|
55
|
+
when the metainformation is not extracted yet.
|
|
56
|
+
</figcaption>
|
|
57
|
+
</figure>
|
|
58
|
+
|
|
59
|
+
.. raw:: html
|
|
60
|
+
|
|
61
|
+
<!-- jQuery + DataTables core -->
|
|
62
|
+
<script src="https://code.jquery.com/jquery-3.7.1.min.js"></script>
|
|
63
|
+
<link rel="stylesheet" href="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.css"/>
|
|
64
|
+
<script src="https://cdn.datatables.net/v/bm/dt-1.13.4/datatables.min.js"></script>
|
|
65
|
+
|
|
66
|
+
<!-- Buttons + SearchPanes (+ Select required by SearchPanes) -->
|
|
67
|
+
<link rel="stylesheet" href="https://cdn.datatables.net/buttons/2.4.2/css/buttons.dataTables.min.css">
|
|
68
|
+
<script src="https://cdn.datatables.net/buttons/2.4.2/js/dataTables.buttons.min.js"></script>
|
|
69
|
+
<link rel="stylesheet" href="https://cdn.datatables.net/select/1.7.0/css/select.dataTables.min.css">
|
|
70
|
+
<link rel="stylesheet" href="https://cdn.datatables.net/searchpanes/2.3.1/css/searchPanes.dataTables.min.css">
|
|
71
|
+
<script src="https://cdn.datatables.net/select/1.7.0/js/dataTables.select.min.js"></script>
|
|
72
|
+
<script src="https://cdn.datatables.net/searchpanes/2.3.1/js/dataTables.searchPanes.min.js"></script>
|
|
73
|
+
|
|
74
|
+
<style>
|
|
75
|
+
/* Styling for the Total row (placed in tfoot) */
|
|
76
|
+
table.sd-table tfoot td {
|
|
77
|
+
font-weight: 600;
|
|
78
|
+
border-top: 2px solid rgba(0,0,0,0.2);
|
|
79
|
+
background: #f9fafb;
|
|
80
|
+
/* Match body cell padding to keep perfect alignment */
|
|
81
|
+
padding: 8px 10px !important;
|
|
82
|
+
vertical-align: middle;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/* Right-align numeric-like columns (2..8) consistently for body & footer */
|
|
86
|
+
table.sd-table tbody td:nth-child(n+2),
|
|
87
|
+
table.sd-table tfoot td:nth-child(n+2) {
|
|
88
|
+
text-align: right;
|
|
89
|
+
}
|
|
90
|
+
/* Keep first column (Dataset/Total) left-aligned */
|
|
91
|
+
table.sd-table tbody td:first-child,
|
|
92
|
+
table.sd-table tfoot td:first-child {
|
|
93
|
+
text-align: left;
|
|
94
|
+
}
|
|
95
|
+
</style>
|
|
96
|
+
|
|
97
|
+
<script>
|
|
98
|
+
// Helper: robustly extract values for SearchPanes when needed
|
|
99
|
+
function tagsArrayFromHtml(html) {
|
|
100
|
+
if (html == null) return [];
|
|
101
|
+
// If it's numeric or plain text, just return as a single value
|
|
102
|
+
if (typeof html === 'number') return [String(html)];
|
|
103
|
+
if (typeof html === 'string' && html.indexOf('<') === -1) return [html.trim()];
|
|
104
|
+
// Else parse any .tag elements inside HTML
|
|
105
|
+
var tmp = document.createElement('div');
|
|
106
|
+
tmp.innerHTML = html;
|
|
107
|
+
var tags = Array.from(tmp.querySelectorAll('.tag')).map(function(el){
|
|
108
|
+
return (el.textContent || '').trim();
|
|
109
|
+
});
|
|
110
|
+
return tags.length ? tags : [tmp.textContent.trim()];
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Helper: parse human-readable sizes like "4.31 GB" into bytes (number)
|
|
114
|
+
function parseSizeToBytes(text) {
|
|
115
|
+
if (!text) return 0;
|
|
116
|
+
var s = String(text).trim();
|
|
117
|
+
var m = s.match(/([\d,.]+)\s*(TB|GB|MB|KB|B)/i);
|
|
118
|
+
if (!m) return 0;
|
|
119
|
+
var value = parseFloat(m[1].replace(/,/g, ''));
|
|
120
|
+
var unit = m[2].toUpperCase();
|
|
121
|
+
var factor = { B:1, KB:1024, MB:1024**2, GB:1024**3, TB:1024**4 }[unit] || 1;
|
|
122
|
+
return value * factor;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
$(function () {
|
|
126
|
+
// 1) Move the "Total" row into <tfoot> so sorting/filtering never moves it
|
|
127
|
+
$('.sortable').each(function(){
|
|
128
|
+
var $t = $(this);
|
|
129
|
+
var $tbody = $t.find('tbody');
|
|
130
|
+
var $total = $tbody.find('tr').filter(function(){
|
|
131
|
+
return $(this).find('td').eq(0).text().trim() === 'Total';
|
|
132
|
+
});
|
|
133
|
+
if ($total.length) {
|
|
134
|
+
var $tfoot = $t.find('tfoot');
|
|
135
|
+
if (!$tfoot.length) $tfoot = $('<tfoot/>').appendTo($t);
|
|
136
|
+
$total.appendTo($tfoot);
|
|
137
|
+
}
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
// 2) Initialize DataTable with SearchPanes button
|
|
141
|
+
var FILTER_COLS = [1,2,3,4,5,6];
|
|
142
|
+
// Detect the index of the size column by header text
|
|
143
|
+
var sizeIdx = (function(){
|
|
144
|
+
var idx = -1;
|
|
145
|
+
$('.sortable thead th').each(function(i){
|
|
146
|
+
var t = $(this).text().trim().toLowerCase();
|
|
147
|
+
if (t === 'size on disk' || t === 'size') idx = i;
|
|
148
|
+
});
|
|
149
|
+
return idx;
|
|
150
|
+
})();
|
|
151
|
+
|
|
152
|
+
var table = $('.sortable').DataTable({
|
|
153
|
+
dom: 'Blfrtip',
|
|
154
|
+
paging: false,
|
|
155
|
+
searching: true,
|
|
156
|
+
info: false,
|
|
157
|
+
language: {
|
|
158
|
+
search: 'Filter dataset:',
|
|
159
|
+
searchPanes: { collapse: { 0: 'Filters', _: 'Filters (%d)' } }
|
|
160
|
+
},
|
|
161
|
+
buttons: [{
|
|
162
|
+
extend: 'searchPanes',
|
|
163
|
+
text: 'Filters',
|
|
164
|
+
config: { cascadePanes: true, viewTotal: true, layout: 'columns-4', initCollapsed: false }
|
|
165
|
+
}],
|
|
166
|
+
columnDefs: (function(){
|
|
167
|
+
var defs = [
|
|
168
|
+
{ searchPanes: { show: true }, targets: FILTER_COLS }
|
|
169
|
+
];
|
|
170
|
+
if (sizeIdx !== -1) {
|
|
171
|
+
defs.push({
|
|
172
|
+
targets: sizeIdx,
|
|
173
|
+
render: function(data, type) {
|
|
174
|
+
if (type === 'sort' || type === 'type') {
|
|
175
|
+
return parseSizeToBytes(data);
|
|
176
|
+
}
|
|
177
|
+
return data;
|
|
178
|
+
}
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
return defs;
|
|
182
|
+
})()
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
// 3) UX: click a header to open the relevant filter pane
|
|
186
|
+
$('.sortable thead th').each(function (i) {
|
|
187
|
+
if ([1,2,3,4].indexOf(i) === -1) return;
|
|
188
|
+
$(this).css('cursor','pointer').attr('title','Click to filter this column');
|
|
189
|
+
$(this).on('click', function () {
|
|
190
|
+
table.button('.buttons-searchPanes').trigger();
|
|
191
|
+
setTimeout(function () {
|
|
192
|
+
var idx = [1,2,3,4].indexOf(i);
|
|
193
|
+
var $container = $(table.searchPanes.container());
|
|
194
|
+
var $pane = $container.find('.dtsp-pane').eq(idx);
|
|
195
|
+
var $title = $pane.find('.dtsp-title');
|
|
196
|
+
if ($title.length) $title.trigger('click');
|
|
197
|
+
}, 0);
|
|
198
|
+
});
|
|
199
|
+
});
|
|
200
|
+
});
|
|
201
|
+
</script>
|
|
@@ -6,15 +6,18 @@ from typing import Any, Mapping
|
|
|
6
6
|
|
|
7
7
|
import mne
|
|
8
8
|
import numpy as np
|
|
9
|
+
import platformdirs
|
|
9
10
|
import xarray as xr
|
|
10
11
|
from dotenv import load_dotenv
|
|
11
12
|
from joblib import Parallel, delayed
|
|
13
|
+
from mne.utils import warn
|
|
12
14
|
from mne_bids import get_bids_path_from_fname, read_raw_bids
|
|
13
15
|
from pymongo import InsertOne, UpdateOne
|
|
14
16
|
from s3fs import S3FileSystem
|
|
15
17
|
|
|
16
18
|
from braindecode.datasets import BaseConcatDataset
|
|
17
19
|
|
|
20
|
+
from .const import RELEASE_TO_OPENNEURO_DATASET_MAP
|
|
18
21
|
from .data_config import config as data_config
|
|
19
22
|
from .data_utils import EEGBIDSDataset, EEGDashBaseDataset
|
|
20
23
|
from .mongodb import MongoConnectionManager
|
|
@@ -693,9 +696,8 @@ class EEGDash:
|
|
|
693
696
|
class EEGDashDataset(BaseConcatDataset):
|
|
694
697
|
def __init__(
|
|
695
698
|
self,
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
dataset: str | list[str] | None = None,
|
|
699
|
+
cache_dir: str | Path,
|
|
700
|
+
query: dict[str, Any] = None,
|
|
699
701
|
description_fields: list[str] = [
|
|
700
702
|
"subject",
|
|
701
703
|
"session",
|
|
@@ -706,9 +708,10 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
706
708
|
"sex",
|
|
707
709
|
],
|
|
708
710
|
s3_bucket: str | None = None,
|
|
709
|
-
data_dir: str | None = None,
|
|
710
711
|
eeg_dash_instance=None,
|
|
711
712
|
records: list[dict] | None = None,
|
|
713
|
+
offline_mode: bool = False,
|
|
714
|
+
n_jobs: int = -1,
|
|
712
715
|
**kwargs,
|
|
713
716
|
):
|
|
714
717
|
"""Create a new EEGDashDataset from a given query or local BIDS dataset directory
|
|
@@ -754,35 +757,54 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
754
757
|
records : list[dict] | None
|
|
755
758
|
Optional list of pre-fetched metadata records. If provided, the dataset is
|
|
756
759
|
constructed directly from these records without querying MongoDB.
|
|
760
|
+
offline_mode : bool
|
|
761
|
+
If True, do not attempt to query MongoDB at all. This is useful if you want to
|
|
762
|
+
work with a local cache only, or if you are offline.
|
|
763
|
+
n_jobs : int
|
|
764
|
+
The number of jobs to run in parallel (default is -1, meaning using all processors).
|
|
757
765
|
kwargs : dict
|
|
758
766
|
Additional keyword arguments to be passed to the EEGDashBaseDataset
|
|
759
767
|
constructor.
|
|
760
768
|
|
|
761
769
|
"""
|
|
762
|
-
self.cache_dir = cache_dir
|
|
770
|
+
self.cache_dir = Path(cache_dir or platformdirs.user_cache_dir("EEGDash"))
|
|
771
|
+
if not self.cache_dir.exists():
|
|
772
|
+
warn(f"Cache directory does not exist, creating it: {self.cache_dir}")
|
|
773
|
+
self.cache_dir.mkdir(exist_ok=True, parents=True)
|
|
763
774
|
self.s3_bucket = s3_bucket
|
|
764
775
|
self.eeg_dash = eeg_dash_instance
|
|
776
|
+
|
|
777
|
+
# Separate query kwargs from other kwargs passed to the BaseDataset constructor
|
|
778
|
+
self.query = query or {}
|
|
779
|
+
self.query.update(
|
|
780
|
+
{k: v for k, v in kwargs.items() if k in EEGDash._ALLOWED_QUERY_FIELDS}
|
|
781
|
+
)
|
|
782
|
+
base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in self.query}
|
|
783
|
+
if "dataset" not in self.query:
|
|
784
|
+
raise ValueError("You must provide a 'dataset' argument")
|
|
785
|
+
|
|
786
|
+
self.data_dir = self.cache_dir / self.query["dataset"]
|
|
787
|
+
if self.query["dataset"] in RELEASE_TO_OPENNEURO_DATASET_MAP.values():
|
|
788
|
+
warn(
|
|
789
|
+
"If you are not participating in the competition, you can ignore this warning!"
|
|
790
|
+
"\n\n"
|
|
791
|
+
"[EEGChallengeDataset] EEG 2025 Competition Data Notice:\n"
|
|
792
|
+
"-------------------------------------------------------\n"
|
|
793
|
+
" You are loading the dataset that is used in the EEG 2025 Competition:\n"
|
|
794
|
+
"IMPORTANT: The data accessed via `EEGDashDataset` is NOT identical to what you get from `EEGChallengeDataset` directly.\n"
|
|
795
|
+
"and it is not what you will use for the competition. Downsampling and filtering were applied to the data"
|
|
796
|
+
"to allow more people to participate.\n"
|
|
797
|
+
"\n",
|
|
798
|
+
"If you are participating in the competition, always use `EEGChallengeDataset` to ensure consistency with the challenge data.\n"
|
|
799
|
+
"\n",
|
|
800
|
+
UserWarning,
|
|
801
|
+
module="eegdash",
|
|
802
|
+
)
|
|
765
803
|
_owns_client = False
|
|
766
804
|
if self.eeg_dash is None and records is None:
|
|
767
805
|
self.eeg_dash = EEGDash()
|
|
768
806
|
_owns_client = True
|
|
769
807
|
|
|
770
|
-
# Separate query kwargs from other kwargs passed to the BaseDataset constructor
|
|
771
|
-
query_kwargs = {
|
|
772
|
-
k: v for k, v in kwargs.items() if k in EEGDash._ALLOWED_QUERY_FIELDS
|
|
773
|
-
}
|
|
774
|
-
base_dataset_kwargs = {k: v for k, v in kwargs.items() if k not in query_kwargs}
|
|
775
|
-
|
|
776
|
-
# If user provided a dataset name via the dedicated parameter (and we're not
|
|
777
|
-
# loading from a local directory), treat it as a query filter. Accept str or list.
|
|
778
|
-
if data_dir is None and dataset is not None:
|
|
779
|
-
# Allow callers to pass a single dataset id (str) or a list of them.
|
|
780
|
-
# If list is provided, let _build_query_from_kwargs turn it into $in later.
|
|
781
|
-
query_kwargs.setdefault("dataset", dataset)
|
|
782
|
-
|
|
783
|
-
# Allow mixing raw DB query with additional keyword filters. Both will be
|
|
784
|
-
# merged by EEGDash.find() (logical AND), so we do not raise here.
|
|
785
|
-
|
|
786
808
|
try:
|
|
787
809
|
if records is not None:
|
|
788
810
|
self.records = records
|
|
@@ -795,42 +817,26 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
795
817
|
)
|
|
796
818
|
for record in self.records
|
|
797
819
|
]
|
|
798
|
-
elif
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
datasets = self.
|
|
802
|
-
dataset=dataset
|
|
803
|
-
|
|
804
|
-
else (dataset[0] if dataset else None),
|
|
805
|
-
data_dir=data_dir,
|
|
820
|
+
elif offline_mode: # only assume local data is complete if in offline mode
|
|
821
|
+
if self.data_dir.exists():
|
|
822
|
+
# This path loads from a local directory and is not affected by DB query logic
|
|
823
|
+
datasets = self.load_bids_daxtaset(
|
|
824
|
+
dataset=self.query["dataset"],
|
|
825
|
+
data_dir=self.data_dir,
|
|
806
826
|
description_fields=description_fields,
|
|
807
827
|
s3_bucket=s3_bucket,
|
|
828
|
+
n_jobs=n_jobs,
|
|
808
829
|
**base_dataset_kwargs,
|
|
809
830
|
)
|
|
810
831
|
else:
|
|
811
|
-
|
|
812
|
-
"
|
|
813
|
-
)
|
|
814
|
-
assert len(data_dir) == len(dataset), (
|
|
815
|
-
"Number of datasets and directories must match"
|
|
832
|
+
raise ValueError(
|
|
833
|
+
f"Offline mode is enabled, but local data_dir {self.data_dir} does not exist."
|
|
816
834
|
)
|
|
817
|
-
|
|
818
|
-
for i, _ in enumerate(data_dir):
|
|
819
|
-
datasets.extend(
|
|
820
|
-
self.load_bids_dataset(
|
|
821
|
-
dataset=dataset[i],
|
|
822
|
-
data_dir=data_dir[i],
|
|
823
|
-
description_fields=description_fields,
|
|
824
|
-
s3_bucket=s3_bucket,
|
|
825
|
-
**base_dataset_kwargs,
|
|
826
|
-
)
|
|
827
|
-
)
|
|
828
|
-
elif query is not None or query_kwargs:
|
|
835
|
+
elif self.query:
|
|
829
836
|
# This is the DB query path that we are improving
|
|
830
|
-
datasets = self.
|
|
831
|
-
query=query,
|
|
837
|
+
datasets = self._find_datasets(
|
|
838
|
+
query=self.eeg_dash._build_query_from_kwargs(**self.query),
|
|
832
839
|
description_fields=description_fields,
|
|
833
|
-
query_kwargs=query_kwargs,
|
|
834
840
|
base_dataset_kwargs=base_dataset_kwargs,
|
|
835
841
|
)
|
|
836
842
|
# We only need filesystem if we need to access S3
|
|
@@ -860,11 +866,10 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
860
866
|
return result
|
|
861
867
|
return None
|
|
862
868
|
|
|
863
|
-
def
|
|
869
|
+
def _find_datasets(
|
|
864
870
|
self,
|
|
865
871
|
query: dict[str, Any] | None,
|
|
866
872
|
description_fields: list[str],
|
|
867
|
-
query_kwargs: dict,
|
|
868
873
|
base_dataset_kwargs: dict,
|
|
869
874
|
) -> list[EEGDashBaseDataset]:
|
|
870
875
|
"""Helper method to find datasets in the MongoDB collection that satisfy the
|
|
@@ -888,11 +893,7 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
888
893
|
"""
|
|
889
894
|
datasets: list[EEGDashBaseDataset] = []
|
|
890
895
|
|
|
891
|
-
|
|
892
|
-
# Note: callers may accidentally pass an empty dict for `query` along with
|
|
893
|
-
# kwargs. In that case, treat it as if no query was provided and rely on kwargs.
|
|
894
|
-
# Always delegate merging of raw query + kwargs to EEGDash.find
|
|
895
|
-
self.records = self.eeg_dash.find(query, **query_kwargs)
|
|
896
|
+
self.records = self.eeg_dash.find(query)
|
|
896
897
|
|
|
897
898
|
for record in self.records:
|
|
898
899
|
description = {}
|
|
@@ -903,8 +904,8 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
903
904
|
datasets.append(
|
|
904
905
|
EEGDashBaseDataset(
|
|
905
906
|
record,
|
|
906
|
-
self.cache_dir,
|
|
907
|
-
self.s3_bucket,
|
|
907
|
+
cache_dir=self.cache_dir,
|
|
908
|
+
s3_bucket=self.s3_bucket,
|
|
908
909
|
description=description,
|
|
909
910
|
**base_dataset_kwargs,
|
|
910
911
|
)
|
|
@@ -917,6 +918,7 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
917
918
|
data_dir: str | Path,
|
|
918
919
|
description_fields: list[str],
|
|
919
920
|
s3_bucket: str | None = None,
|
|
921
|
+
n_jobs: int = -1,
|
|
920
922
|
**kwargs,
|
|
921
923
|
):
|
|
922
924
|
"""Helper method to load a single local BIDS dataset and return it as a list of
|
|
@@ -931,13 +933,17 @@ class EEGDashDataset(BaseConcatDataset):
|
|
|
931
933
|
description_fields : list[str]
|
|
932
934
|
A list of fields to be extracted from the dataset records
|
|
933
935
|
and included in the returned dataset description(s).
|
|
936
|
+
s3_bucket : str | None
|
|
937
|
+
The S3 bucket to upload the dataset files to (if any).
|
|
938
|
+
n_jobs : int
|
|
939
|
+
The number of jobs to run in parallel (default is -1, meaning using all processors).
|
|
934
940
|
|
|
935
941
|
"""
|
|
936
942
|
bids_dataset = EEGBIDSDataset(
|
|
937
943
|
data_dir=data_dir,
|
|
938
944
|
dataset=dataset,
|
|
939
945
|
)
|
|
940
|
-
datasets = Parallel(n_jobs
|
|
946
|
+
datasets = Parallel(n_jobs=n_jobs, prefer="threads", verbose=1)(
|
|
941
947
|
delayed(self.get_base_dataset_from_bids_file)(
|
|
942
948
|
bids_dataset=bids_dataset,
|
|
943
949
|
bids_file=bids_file,
|
|
@@ -1,8 +1,3 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
from .api import EEGDashDataset
|
|
4
|
-
from .registry import register_openneuro_datasets
|
|
5
|
-
|
|
6
1
|
RELEASE_TO_OPENNEURO_DATASET_MAP = {
|
|
7
2
|
"R11": "ds005516",
|
|
8
3
|
"R10": "ds005515",
|
|
@@ -261,93 +256,3 @@ SUBJECT_MINI_RELEASE_MAP = {
|
|
|
261
256
|
"NDARFW972KFQ",
|
|
262
257
|
],
|
|
263
258
|
}
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
class EEGChallengeDataset(EEGDashDataset):
|
|
267
|
-
def __init__(
|
|
268
|
-
self,
|
|
269
|
-
release: str,
|
|
270
|
-
cache_dir: str,
|
|
271
|
-
mini: bool = True,
|
|
272
|
-
query: dict | None = None,
|
|
273
|
-
s3_bucket: str | None = "s3://nmdatasets/NeurIPS25",
|
|
274
|
-
**kwargs,
|
|
275
|
-
):
|
|
276
|
-
"""Create a new EEGDashDataset from a given query or local BIDS dataset directory
|
|
277
|
-
and dataset name. An EEGDashDataset is pooled collection of EEGDashBaseDataset
|
|
278
|
-
instances (individual recordings) and is a subclass of braindecode's BaseConcatDataset.
|
|
279
|
-
|
|
280
|
-
Parameters
|
|
281
|
-
----------
|
|
282
|
-
release: str
|
|
283
|
-
Release name. Can be one of ["R1", ..., "R11"]
|
|
284
|
-
mini: bool, default True
|
|
285
|
-
Whether to use the mini-release version of the dataset. It is recommended
|
|
286
|
-
to use the mini version for faster training and evaluation.
|
|
287
|
-
query : dict | None
|
|
288
|
-
Optionally a dictionary that specifies a query to be executed,
|
|
289
|
-
in addition to the dataset (automatically inferred from the release argument).
|
|
290
|
-
See EEGDash.find() for details on the query format.
|
|
291
|
-
cache_dir : str
|
|
292
|
-
A directory where the dataset will be cached locally.
|
|
293
|
-
s3_bucket : str | None
|
|
294
|
-
An optional S3 bucket URI to use instead of the
|
|
295
|
-
default OpenNeuro bucket for loading data files.
|
|
296
|
-
kwargs : dict
|
|
297
|
-
Additional keyword arguments to be passed to the EEGDashDataset
|
|
298
|
-
constructor.
|
|
299
|
-
|
|
300
|
-
"""
|
|
301
|
-
self.release = release
|
|
302
|
-
self.mini = mini
|
|
303
|
-
|
|
304
|
-
if release not in RELEASE_TO_OPENNEURO_DATASET_MAP:
|
|
305
|
-
raise ValueError(
|
|
306
|
-
f"Unknown release: {release}, expected one of {list(RELEASE_TO_OPENNEURO_DATASET_MAP.keys())}"
|
|
307
|
-
)
|
|
308
|
-
|
|
309
|
-
dataset_parameters = []
|
|
310
|
-
if isinstance(release, str):
|
|
311
|
-
dataset_parameters.append(RELEASE_TO_OPENNEURO_DATASET_MAP[release])
|
|
312
|
-
else:
|
|
313
|
-
raise ValueError(
|
|
314
|
-
f"Unknown release type: {type(release)}, the expected type is str."
|
|
315
|
-
)
|
|
316
|
-
|
|
317
|
-
if query and "dataset" in query:
|
|
318
|
-
raise ValueError(
|
|
319
|
-
"Query using the parameters `dataset` with the class EEGChallengeDataset is not possible."
|
|
320
|
-
"Please use the release argument instead, or the object EEGDashDataset instead."
|
|
321
|
-
)
|
|
322
|
-
|
|
323
|
-
if self.mini:
|
|
324
|
-
# Disallow mixing subject selection with mini=True since mini already
|
|
325
|
-
# applies a predefined subject subset.
|
|
326
|
-
if (query and "subject" in query) or ("subject" in kwargs):
|
|
327
|
-
raise ValueError(
|
|
328
|
-
"Query using the parameters `subject` with the class EEGChallengeDataset and `mini==True` is not possible."
|
|
329
|
-
"Please don't use the `subject` selection twice."
|
|
330
|
-
"Set `mini=False` to use the `subject` selection."
|
|
331
|
-
)
|
|
332
|
-
kwargs["subject"] = SUBJECT_MINI_RELEASE_MAP[release]
|
|
333
|
-
s3_bucket = f"{s3_bucket}/{release}_mini_L100_bdf"
|
|
334
|
-
else:
|
|
335
|
-
s3_bucket = f"{s3_bucket}/{release}_L100_bdf"
|
|
336
|
-
|
|
337
|
-
super().__init__(
|
|
338
|
-
dataset=dataset_parameters,
|
|
339
|
-
query=query,
|
|
340
|
-
cache_dir=cache_dir,
|
|
341
|
-
s3_bucket=s3_bucket,
|
|
342
|
-
**kwargs,
|
|
343
|
-
)
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
registered_classes = register_openneuro_datasets(
|
|
347
|
-
summary_file=Path(__file__).with_name("dataset_summary.csv"),
|
|
348
|
-
base_class=EEGDashDataset,
|
|
349
|
-
namespace=globals(),
|
|
350
|
-
)
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
__all__ = ["EEGChallengeDataset"] + list(registered_classes.keys())
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from mne.utils import warn
|
|
5
|
+
|
|
6
|
+
from .api import EEGDashDataset
|
|
7
|
+
from .const import RELEASE_TO_OPENNEURO_DATASET_MAP, SUBJECT_MINI_RELEASE_MAP
|
|
8
|
+
from .registry import register_openneuro_datasets
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger("eegdash")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EEGChallengeDataset(EEGDashDataset):
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
release: str,
|
|
17
|
+
cache_dir: str,
|
|
18
|
+
mini: bool = True,
|
|
19
|
+
query: dict | None = None,
|
|
20
|
+
s3_bucket: str | None = "s3://nmdatasets/NeurIPS25",
|
|
21
|
+
**kwargs,
|
|
22
|
+
):
|
|
23
|
+
"""Create a new EEGDashDataset from a given query or local BIDS dataset directory
|
|
24
|
+
and dataset name. An EEGDashDataset is pooled collection of EEGDashBaseDataset
|
|
25
|
+
instances (individual recordings) and is a subclass of braindecode's BaseConcatDataset.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
release: str
|
|
30
|
+
Release name. Can be one of ["R1", ..., "R11"]
|
|
31
|
+
mini: bool, default True
|
|
32
|
+
Whether to use the mini-release version of the dataset. It is recommended
|
|
33
|
+
to use the mini version for faster training and evaluation.
|
|
34
|
+
query : dict | None
|
|
35
|
+
Optionally a dictionary that specifies a query to be executed,
|
|
36
|
+
in addition to the dataset (automatically inferred from the release argument).
|
|
37
|
+
See EEGDash.find() for details on the query format.
|
|
38
|
+
cache_dir : str
|
|
39
|
+
A directory where the dataset will be cached locally.
|
|
40
|
+
s3_bucket : str | None
|
|
41
|
+
An optional S3 bucket URI to use instead of the
|
|
42
|
+
default OpenNeuro bucket for loading data files.
|
|
43
|
+
kwargs : dict
|
|
44
|
+
Additional keyword arguments to be passed to the EEGDashDataset
|
|
45
|
+
constructor.
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
self.release = release
|
|
49
|
+
self.mini = mini
|
|
50
|
+
|
|
51
|
+
if release not in RELEASE_TO_OPENNEURO_DATASET_MAP:
|
|
52
|
+
raise ValueError(
|
|
53
|
+
f"Unknown release: {release}, expected one of {list(RELEASE_TO_OPENNEURO_DATASET_MAP.keys())}"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
dataset_parameters = []
|
|
57
|
+
if isinstance(release, str):
|
|
58
|
+
dataset_parameters.append(RELEASE_TO_OPENNEURO_DATASET_MAP[release])
|
|
59
|
+
else:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
f"Unknown release type: {type(release)}, the expected type is str."
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
if query and "dataset" in query:
|
|
65
|
+
raise ValueError(
|
|
66
|
+
"Query using the parameters `dataset` with the class EEGChallengeDataset is not possible."
|
|
67
|
+
"Please use the release argument instead, or the object EEGDashDataset instead."
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
if self.mini:
|
|
71
|
+
# Disallow mixing subject selection with mini=True since mini already
|
|
72
|
+
# applies a predefined subject subset.
|
|
73
|
+
if (query and "subject" in query) or ("subject" in kwargs):
|
|
74
|
+
raise ValueError(
|
|
75
|
+
"Query using the parameters `subject` with the class EEGChallengeDataset and `mini==True` is not possible."
|
|
76
|
+
"Please don't use the `subject` selection twice."
|
|
77
|
+
"Set `mini=False` to use the `subject` selection."
|
|
78
|
+
)
|
|
79
|
+
kwargs["subject"] = SUBJECT_MINI_RELEASE_MAP[release]
|
|
80
|
+
s3_bucket = f"{s3_bucket}/{release}_mini_L100_bdf"
|
|
81
|
+
else:
|
|
82
|
+
s3_bucket = f"{s3_bucket}/{release}_L100_bdf"
|
|
83
|
+
|
|
84
|
+
warn(
|
|
85
|
+
"\n\n"
|
|
86
|
+
"[EEGChallengeDataset] EEG 2025 Competition Data Notice:\n"
|
|
87
|
+
"-------------------------------------------------------\n"
|
|
88
|
+
"This object loads the HBN dataset that has been preprocessed for the EEG Challenge:\n"
|
|
89
|
+
" - Downsampled from 500Hz to 100Hz\n"
|
|
90
|
+
" - Bandpass filtered (0.5–50 Hz)\n"
|
|
91
|
+
"\n"
|
|
92
|
+
"For full preprocessing details, see:\n"
|
|
93
|
+
" https://github.com/eeg2025/downsample-datasets\n"
|
|
94
|
+
"\n"
|
|
95
|
+
"IMPORTANT: The data accessed via `EEGChallengeDataset` is NOT identical to what you get from `EEGDashDataset` directly.\n"
|
|
96
|
+
"If you are participating in the competition, always use `EEGChallengeDataset` to ensure consistency with the challenge data.\n"
|
|
97
|
+
"\n",
|
|
98
|
+
UserWarning,
|
|
99
|
+
module="eegdash",
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
super().__init__(
|
|
103
|
+
dataset=RELEASE_TO_OPENNEURO_DATASET_MAP[release],
|
|
104
|
+
query=query,
|
|
105
|
+
cache_dir=cache_dir,
|
|
106
|
+
s3_bucket=s3_bucket,
|
|
107
|
+
**kwargs,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
registered_classes = register_openneuro_datasets(
|
|
112
|
+
summary_file=Path(__file__).with_name("dataset_summary.csv"),
|
|
113
|
+
base_class=EEGDashDataset,
|
|
114
|
+
namespace=globals(),
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
__all__ = ["EEGChallengeDataset"] + list(registered_classes.keys())
|
|
@@ -57,7 +57,7 @@ def register_openneuro_datasets(
|
|
|
57
57
|
|
|
58
58
|
init = make_init(dataset_id)
|
|
59
59
|
|
|
60
|
-
doc = f"""
|
|
60
|
+
doc = f"""OpenNeuro dataset ``{dataset_id}``.
|
|
61
61
|
|
|
62
62
|
{markdown_table(row_series)}
|
|
63
63
|
|
|
@@ -69,11 +69,15 @@ def register_openneuro_datasets(
|
|
|
69
69
|
Extra Mongo query merged with ``{{'dataset': '{dataset_id}'}}``.
|
|
70
70
|
s3_bucket : str | None
|
|
71
71
|
Optional S3 bucket name.
|
|
72
|
+
subject : str | None
|
|
73
|
+
Optional subject identifier.
|
|
74
|
+
task : str | None
|
|
75
|
+
Optional task identifier.
|
|
72
76
|
**kwargs
|
|
73
77
|
Passed through to {base_class.__name__}.
|
|
74
78
|
"""
|
|
75
79
|
|
|
76
|
-
init.__doc__ = doc
|
|
80
|
+
# init.__doc__ = doc
|
|
77
81
|
|
|
78
82
|
cls = type(
|
|
79
83
|
class_name,
|
|
@@ -101,6 +105,7 @@ def markdown_table(row_series: pd.Series) -> str:
|
|
|
101
105
|
"""Create a reStructuredText grid table from a pandas Series."""
|
|
102
106
|
if row_series.empty:
|
|
103
107
|
return ""
|
|
108
|
+
dataset_id = row_series["dataset"]
|
|
104
109
|
|
|
105
110
|
# Prepare the dataframe with user's suggested logic
|
|
106
111
|
df = (
|
|
@@ -112,6 +117,7 @@ def markdown_table(row_series: pd.Series) -> str:
|
|
|
112
117
|
"n_tasks": "#Classes",
|
|
113
118
|
"sampling_freqs": "Freq(Hz)",
|
|
114
119
|
"duration_hours_total": "Duration(H)",
|
|
120
|
+
"size": "Size",
|
|
115
121
|
}
|
|
116
122
|
)
|
|
117
123
|
.reindex(
|
|
@@ -122,6 +128,7 @@ def markdown_table(row_series: pd.Series) -> str:
|
|
|
122
128
|
"#Classes",
|
|
123
129
|
"Freq(Hz)",
|
|
124
130
|
"Duration(H)",
|
|
131
|
+
"Size",
|
|
125
132
|
]
|
|
126
133
|
)
|
|
127
134
|
.infer_objects(copy=False)
|
|
@@ -131,6 +138,9 @@ def markdown_table(row_series: pd.Series) -> str:
|
|
|
131
138
|
# Use tabulate for the final rst formatting
|
|
132
139
|
table = tabulate(df, headers="keys", tablefmt="rst", showindex=False)
|
|
133
140
|
|
|
141
|
+
# Add a caption for the table
|
|
142
|
+
caption = f"Short overview of dataset {dataset_id} more details in the `Nemar documentation <https://nemar.org/dataexplorer/detail?dataset_id={dataset_id}>`_."
|
|
143
|
+
# adding caption below the table
|
|
134
144
|
# Indent the table to fit within the admonition block
|
|
135
145
|
indented_table = "\n".join(" " + line for line in table.split("\n"))
|
|
136
|
-
return f"\n\n{indented_table}"
|
|
146
|
+
return f"\n\n{indented_table}\n\n{caption}"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eegdash
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.7.dev177024734
|
|
4
4
|
Summary: EEG data for machine learning
|
|
5
5
|
Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
|
|
6
6
|
License-Expression: GPL-3.0-only
|
|
@@ -48,6 +48,7 @@ Requires-Dist: pytest_cases; extra == "tests"
|
|
|
48
48
|
Requires-Dist: pytest-benchmark; extra == "tests"
|
|
49
49
|
Provides-Extra: dev
|
|
50
50
|
Requires-Dist: pre-commit; extra == "dev"
|
|
51
|
+
Requires-Dist: ipykernel; extra == "dev"
|
|
51
52
|
Provides-Extra: docs
|
|
52
53
|
Requires-Dist: sphinx; extra == "docs"
|
|
53
54
|
Requires-Dist: sphinx_design; extra == "docs"
|
|
@@ -55,10 +56,12 @@ Requires-Dist: sphinx_gallery; extra == "docs"
|
|
|
55
56
|
Requires-Dist: sphinx_rtd_theme; extra == "docs"
|
|
56
57
|
Requires-Dist: pydata-sphinx-theme; extra == "docs"
|
|
57
58
|
Requires-Dist: sphinx-autobuild; extra == "docs"
|
|
59
|
+
Requires-Dist: sphinx-sitemap; extra == "docs"
|
|
58
60
|
Requires-Dist: numpydoc; extra == "docs"
|
|
59
61
|
Requires-Dist: memory_profiler; extra == "docs"
|
|
60
62
|
Requires-Dist: ipython; extra == "docs"
|
|
61
63
|
Requires-Dist: lightgbm; extra == "docs"
|
|
64
|
+
Requires-Dist: plotly; extra == "docs"
|
|
62
65
|
Provides-Extra: all
|
|
63
66
|
Requires-Dist: eegdash[docs]; extra == "all"
|
|
64
67
|
Requires-Dist: eegdash[dev]; extra == "all"
|
|
@@ -12,6 +12,7 @@ docs/source/install/install_pip.rst
|
|
|
12
12
|
docs/source/install/install_source.rst
|
|
13
13
|
eegdash/__init__.py
|
|
14
14
|
eegdash/api.py
|
|
15
|
+
eegdash/const.py
|
|
15
16
|
eegdash/data_config.py
|
|
16
17
|
eegdash/data_utils.py
|
|
17
18
|
eegdash/dataset.py
|
|
@@ -43,6 +44,7 @@ eegdash/features/feature_bank/utils.py
|
|
|
43
44
|
tests/test_api.py
|
|
44
45
|
tests/test_challenge_kwargs.py
|
|
45
46
|
tests/test_correctness.py
|
|
47
|
+
tests/test_database.py
|
|
46
48
|
tests/test_dataset.py
|
|
47
49
|
tests/test_dataset_registration.py
|
|
48
50
|
tests/test_eegdash.py
|
|
@@ -22,6 +22,7 @@ eegdash[tests]
|
|
|
22
22
|
|
|
23
23
|
[dev]
|
|
24
24
|
pre-commit
|
|
25
|
+
ipykernel
|
|
25
26
|
|
|
26
27
|
[docs]
|
|
27
28
|
sphinx
|
|
@@ -30,10 +31,12 @@ sphinx_gallery
|
|
|
30
31
|
sphinx_rtd_theme
|
|
31
32
|
pydata-sphinx-theme
|
|
32
33
|
sphinx-autobuild
|
|
34
|
+
sphinx-sitemap
|
|
33
35
|
numpydoc
|
|
34
36
|
memory_profiler
|
|
35
37
|
ipython
|
|
36
38
|
lightgbm
|
|
39
|
+
plotly
|
|
37
40
|
|
|
38
41
|
[tests]
|
|
39
42
|
pytest
|
|
@@ -67,7 +67,8 @@ tests = [
|
|
|
67
67
|
'pytest-benchmark',
|
|
68
68
|
]
|
|
69
69
|
dev = [
|
|
70
|
-
"pre-commit"
|
|
70
|
+
"pre-commit",
|
|
71
|
+
"ipykernel"
|
|
71
72
|
]
|
|
72
73
|
|
|
73
74
|
docs = [
|
|
@@ -77,10 +78,12 @@ docs = [
|
|
|
77
78
|
"sphinx_rtd_theme",
|
|
78
79
|
"pydata-sphinx-theme",
|
|
79
80
|
"sphinx-autobuild",
|
|
81
|
+
"sphinx-sitemap",
|
|
80
82
|
"numpydoc",
|
|
81
83
|
"memory_profiler",
|
|
82
84
|
"ipython",
|
|
83
85
|
"lightgbm",
|
|
86
|
+
"plotly"
|
|
84
87
|
]
|
|
85
88
|
|
|
86
89
|
all = [
|
|
File without changes
|
{eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/tests/test_dataset_registration.py
RENAMED
|
@@ -16,9 +16,9 @@ def test_register_openneuro_datasets(tmp_path: Path):
|
|
|
16
16
|
summary.write_text(
|
|
17
17
|
"\n".join(
|
|
18
18
|
[
|
|
19
|
-
"
|
|
20
|
-
"ds002718,18,18,1,74,250,14.844",
|
|
21
|
-
"ds000001,1,1,1,1,1,1",
|
|
19
|
+
"dataset,num_subjects,num_sessions,num_runs,num_channels,sampling_rate,duration,size",
|
|
20
|
+
"ds002718,18,18,1,74,250,14.844,1.2GB",
|
|
21
|
+
"ds000001,1,1,1,1,1,1,100MB",
|
|
22
22
|
]
|
|
23
23
|
)
|
|
24
24
|
)
|
|
@@ -12,7 +12,9 @@ def test_dataset_loads_without_eegdash(monkeypatch):
|
|
|
12
12
|
"""Dataset should load from records without contacting network resources."""
|
|
13
13
|
eeg_dash = EEGDash()
|
|
14
14
|
|
|
15
|
-
records = eeg_dash.find(
|
|
15
|
+
records = eeg_dash.find(
|
|
16
|
+
dataset="ds005509", subject="NDARAC350XUM", task="RestingState"
|
|
17
|
+
)
|
|
16
18
|
|
|
17
19
|
# test with internet
|
|
18
20
|
dataset_internet = EEGDashDataset(
|
|
@@ -24,14 +26,14 @@ def test_dataset_loads_without_eegdash(monkeypatch):
|
|
|
24
26
|
# Monkeypatch any network calls inside EEGDashDataset to raise if called
|
|
25
27
|
monkeypatch.setattr(
|
|
26
28
|
EEGDashDataset,
|
|
27
|
-
"
|
|
29
|
+
"_find_datasets",
|
|
28
30
|
lambda *args, **kwargs: pytest.skip(
|
|
29
31
|
"Skipping network download in offline test"
|
|
30
32
|
),
|
|
31
33
|
)
|
|
32
34
|
monkeypatch.setattr(
|
|
33
35
|
EEGDashDataset,
|
|
34
|
-
"
|
|
36
|
+
"_find_datasets",
|
|
35
37
|
lambda *args, **kwargs: pytest.skip(
|
|
36
38
|
"Skipping network download in offline test"
|
|
37
39
|
),
|
|
@@ -39,7 +41,7 @@ def test_dataset_loads_without_eegdash(monkeypatch):
|
|
|
39
41
|
# TO-DO: discover way to do this pytest
|
|
40
42
|
|
|
41
43
|
dataset_without_internet = EEGDashDataset(
|
|
42
|
-
records=records, cache_dir=CACHE_DIR, eeg_dash_instance=None
|
|
44
|
+
dataset="ds005509", records=records, cache_dir=CACHE_DIR, eeg_dash_instance=None
|
|
43
45
|
)
|
|
44
46
|
|
|
45
47
|
assert dataset_internet.datasets[0].raw == dataset_without_internet.datasets[0].raw
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
.. meta::
|
|
2
|
-
:hide_sidebar: true
|
|
3
|
-
|
|
4
|
-
:html_theme.sidebar_secondary.remove:
|
|
5
|
-
:html_theme.sidebar_primary.remove:
|
|
6
|
-
|
|
7
|
-
.. _data_summary:
|
|
8
|
-
.. automodule:: eegdash.dataset
|
|
9
|
-
|
|
10
|
-
.. currentmodule:: eegdash.dataset
|
|
11
|
-
|
|
12
|
-
To leverage recent and ongoing advancements in large-scale computational methods and to ensure the preservation of scientific data generated from publicly funded research, the EEG-DaSh data archive will create a data-sharing resource for MEEG (EEG, MEG) data contributed by collaborators for machine learning (ML) and deep learning (DL) applications.
|
|
13
|
-
|
|
14
|
-
The archive is currently still in :bdg-danger:`beta testing` mode, so be kind.
|
|
15
|
-
|
|
16
|
-
EEG Dash Datasets
|
|
17
|
-
==================
|
|
18
|
-
|
|
19
|
-
The data in EEG-DaSh originates from a collaboration involving 25 laboratories, encompassing 27,053 participants. This extensive collection includes MEEG data, which is a combination of EEG and MEG signals. The data is sourced from various studies conducted by these labs, involving both healthy subjects and clinical populations with conditions such as ADHD, depression, schizophrenia, dementia, autism, and psychosis. Additionally, data spans different mental states like sleep, meditation, and cognitive tasks. In addition, EEG-DaSh will incorporate a subset of the data converted from NEMAR, which includes 330 MEEG BIDS-formatted datasets, further expanding the archive with well-curated, standardized neuroelectromagnetic data.
|
|
20
|
-
|
|
21
|
-
Columns definitions for the table below:
|
|
22
|
-
- **dataset**: Name of the dataset.
|
|
23
|
-
- **n_records**: Number of EEG records in the dataset.
|
|
24
|
-
- **n_subjects**: Number of subjects in the dataset.
|
|
25
|
-
- **n_tasks**: Number of experimental tasks in the dataset.
|
|
26
|
-
- **nchans_set**: Set of EEG channel counts used in the dataset.
|
|
27
|
-
- **sampling_freqs**: Set of sampling frequencies used in the dataset.
|
|
28
|
-
- **duration_hours_total**: Total duration of all recordings in hours.
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
Datasets
|
|
32
|
-
======================
|
|
33
|
-
|
|
34
|
-
.. csv-table::
|
|
35
|
-
:file: ../build/dataset_summary.csv
|
|
36
|
-
:header-rows: 1
|
|
37
|
-
:class: sortable
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
.. raw:: html
|
|
42
|
-
<style>
|
|
43
|
-
/* Make this page full-width and remove side padding */
|
|
44
|
-
:root {
|
|
45
|
-
--pst-page-max-width: 100%;
|
|
46
|
-
--pst-content-max-width: 100%;
|
|
47
|
-
}
|
|
48
|
-
.bd-main .bd-content .bd-article-container {
|
|
49
|
-
max-width: 100%;
|
|
50
|
-
padding-left: 0;
|
|
51
|
-
padding-right: 0;
|
|
52
|
-
}
|
|
53
|
-
/* Ensure the DataTable uses the full width */
|
|
54
|
-
table.sortable { width: 100% !important; }
|
|
55
|
-
</style>
|
|
56
|
-
|
|
57
|
-
<link href="https://cdn.datatables.net/v/bm/jq-3.7.0/dt-2.3.2/af-2.7.0/b-3.2.4/b-html5-3.2.4/cr-2.1.1/fh-4.0.3/r-3.0.5/datatables.min.css"
|
|
58
|
-
rel="stylesheet"
|
|
59
|
-
integrity="sha384-aemAM3yl2c0KAZZkR1b1AwMH2u3r1NHOppsl5i6Ny1L5pfqn7SDH52qdaa1TbyN9"
|
|
60
|
-
crossorigin="anonymous">
|
|
61
|
-
|
|
62
|
-
<script src="https://cdn.datatables.net/v/bm/jq-3.7.0/dt-2.3.2/af-2.7.0/b-3.2.4/b-html5-3.2.4/cr-2.1.1/fh-4.0.3/r-3.0.5/datatables.min.js"
|
|
63
|
-
integrity="sha384-CKcCNsP1rMRsJFtrN6zMWK+KIK/FjYiV/d8uOp0LZtbEVzbidk105YcuVncAhBR8"
|
|
64
|
-
crossorigin="anonymous"></script>
|
|
65
|
-
|
|
66
|
-
<script>
|
|
67
|
-
document.addEventListener('DOMContentLoaded', function () {
|
|
68
|
-
const tables = document.querySelectorAll('table.sortable');
|
|
69
|
-
tables.forEach(function (tbl) {
|
|
70
|
-
// Use the jQuery plugin that ships in the bundle
|
|
71
|
-
$(tbl).DataTable({
|
|
72
|
-
paging: false,
|
|
73
|
-
searching: false,
|
|
74
|
-
info: false,
|
|
75
|
-
ordering: true,
|
|
76
|
-
responsive: true,
|
|
77
|
-
fixedHeader: true,
|
|
78
|
-
// Avoid re-initialization if this script runs more than once
|
|
79
|
-
retrieve: true,
|
|
80
|
-
scrollX: true
|
|
81
|
-
});
|
|
82
|
-
});
|
|
83
|
-
});
|
|
84
|
-
</script>
|
|
85
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/docs/source/install/install_pip.rst
RENAMED
|
File without changes
|
{eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/docs/source/install/install_source.rst
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/csp.py
RENAMED
|
File without changes
|
|
File without changes
|
{eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/signal.py
RENAMED
|
File without changes
|
{eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/spectral.py
RENAMED
|
File without changes
|
{eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash/features/feature_bank/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eegdash-0.3.6.dev182011805 → eegdash-0.3.7.dev177024734}/eegdash.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|