nimare 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmarks/__init__.py +0 -0
- benchmarks/bench_cbma.py +57 -0
- nimare/__init__.py +45 -0
- nimare/_version.py +21 -0
- nimare/annotate/__init__.py +21 -0
- nimare/annotate/cogat.py +213 -0
- nimare/annotate/gclda.py +924 -0
- nimare/annotate/lda.py +147 -0
- nimare/annotate/text.py +75 -0
- nimare/annotate/utils.py +87 -0
- nimare/base.py +217 -0
- nimare/cli.py +124 -0
- nimare/correct.py +462 -0
- nimare/dataset.py +685 -0
- nimare/decode/__init__.py +33 -0
- nimare/decode/base.py +115 -0
- nimare/decode/continuous.py +462 -0
- nimare/decode/discrete.py +753 -0
- nimare/decode/encode.py +110 -0
- nimare/decode/utils.py +44 -0
- nimare/diagnostics.py +510 -0
- nimare/estimator.py +139 -0
- nimare/extract/__init__.py +19 -0
- nimare/extract/extract.py +466 -0
- nimare/extract/utils.py +295 -0
- nimare/generate.py +331 -0
- nimare/io.py +667 -0
- nimare/meta/__init__.py +39 -0
- nimare/meta/cbma/__init__.py +6 -0
- nimare/meta/cbma/ale.py +951 -0
- nimare/meta/cbma/base.py +947 -0
- nimare/meta/cbma/mkda.py +1361 -0
- nimare/meta/cbmr.py +970 -0
- nimare/meta/ibma.py +1683 -0
- nimare/meta/kernel.py +501 -0
- nimare/meta/models.py +1199 -0
- nimare/meta/utils.py +494 -0
- nimare/nimads.py +492 -0
- nimare/reports/__init__.py +24 -0
- nimare/reports/base.py +664 -0
- nimare/reports/default.yml +123 -0
- nimare/reports/figures.py +651 -0
- nimare/reports/report.tpl +160 -0
- nimare/resources/__init__.py +1 -0
- nimare/resources/atlases/Harvard-Oxford-LICENSE +93 -0
- nimare/resources/atlases/HarvardOxford-cort-maxprob-thr25-2mm.nii.gz +0 -0
- nimare/resources/database_file_manifest.json +142 -0
- nimare/resources/english_spellings.csv +1738 -0
- nimare/resources/filenames.json +32 -0
- nimare/resources/neurosynth_laird_studies.json +58773 -0
- nimare/resources/neurosynth_stoplist.txt +396 -0
- nimare/resources/nidm_pain_dset.json +1349 -0
- nimare/resources/references.bib +541 -0
- nimare/resources/semantic_knowledge_children.txt +325 -0
- nimare/resources/semantic_relatedness_children.txt +249 -0
- nimare/resources/templates/MNI152_2x2x2_brainmask.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_T1w.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_T1w.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_desc-brain_mask.nii.gz +0 -0
- nimare/results.py +225 -0
- nimare/stats.py +276 -0
- nimare/tests/__init__.py +1 -0
- nimare/tests/conftest.py +229 -0
- nimare/tests/data/amygdala_roi.nii.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_coordinates.tsv.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_metadata.tsv.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz +0 -0
- nimare/tests/data/data-neurosynth_version-7_vocab-terms_vocabulary.txt +100 -0
- nimare/tests/data/neurosynth_dset.json +2868 -0
- nimare/tests/data/neurosynth_laird_studies.json +58773 -0
- nimare/tests/data/nidm_pain_dset.json +1349 -0
- nimare/tests/data/nimads_annotation.json +1 -0
- nimare/tests/data/nimads_studyset.json +1 -0
- nimare/tests/data/test_baseline.txt +2 -0
- nimare/tests/data/test_pain_dataset.json +1278 -0
- nimare/tests/data/test_pain_dataset_multiple_contrasts.json +1242 -0
- nimare/tests/data/test_sleuth_file.txt +18 -0
- nimare/tests/data/test_sleuth_file2.txt +10 -0
- nimare/tests/data/test_sleuth_file3.txt +5 -0
- nimare/tests/data/test_sleuth_file4.txt +5 -0
- nimare/tests/data/test_sleuth_file5.txt +5 -0
- nimare/tests/test_annotate_cogat.py +32 -0
- nimare/tests/test_annotate_gclda.py +86 -0
- nimare/tests/test_annotate_lda.py +27 -0
- nimare/tests/test_dataset.py +99 -0
- nimare/tests/test_decode_continuous.py +132 -0
- nimare/tests/test_decode_discrete.py +92 -0
- nimare/tests/test_diagnostics.py +168 -0
- nimare/tests/test_estimator_performance.py +385 -0
- nimare/tests/test_extract.py +46 -0
- nimare/tests/test_generate.py +247 -0
- nimare/tests/test_io.py +294 -0
- nimare/tests/test_meta_ale.py +298 -0
- nimare/tests/test_meta_cbmr.py +295 -0
- nimare/tests/test_meta_ibma.py +240 -0
- nimare/tests/test_meta_kernel.py +209 -0
- nimare/tests/test_meta_mkda.py +234 -0
- nimare/tests/test_nimads.py +21 -0
- nimare/tests/test_reports.py +110 -0
- nimare/tests/test_stats.py +101 -0
- nimare/tests/test_transforms.py +272 -0
- nimare/tests/test_utils.py +200 -0
- nimare/tests/test_workflows.py +221 -0
- nimare/tests/utils.py +126 -0
- nimare/transforms.py +907 -0
- nimare/utils.py +1367 -0
- nimare/workflows/__init__.py +14 -0
- nimare/workflows/base.py +189 -0
- nimare/workflows/cbma.py +165 -0
- nimare/workflows/ibma.py +108 -0
- nimare/workflows/macm.py +77 -0
- nimare/workflows/misc.py +65 -0
- nimare-0.4.2.dist-info/LICENSE +21 -0
- nimare-0.4.2.dist-info/METADATA +124 -0
- nimare-0.4.2.dist-info/RECORD +119 -0
- nimare-0.4.2.dist-info/WHEEL +5 -0
- nimare-0.4.2.dist-info/entry_points.txt +2 -0
- nimare-0.4.2.dist-info/top_level.txt +2 -0
nimare/extract/utils.py
ADDED
@@ -0,0 +1,295 @@
|
|
1
|
+
"""Utility functions for the extract module."""
|
2
|
+
|
3
|
+
from __future__ import division
|
4
|
+
|
5
|
+
import logging
|
6
|
+
import os
|
7
|
+
import os.path as op
|
8
|
+
|
9
|
+
import numpy as np
|
10
|
+
import pandas as pd
|
11
|
+
import requests
|
12
|
+
from fuzzywuzzy import fuzz
|
13
|
+
|
14
|
+
from nimare.utils import _uk_to_us
|
15
|
+
|
16
|
+
LGR = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
def get_data_dirs(data_dir=None):
|
20
|
+
"""Return the directories in which NiMARE looks for data.
|
21
|
+
|
22
|
+
.. versionadded:: 0.0.2
|
23
|
+
|
24
|
+
This is typically useful for the end-user to check where the data is
|
25
|
+
downloaded and stored.
|
26
|
+
|
27
|
+
Parameters
|
28
|
+
----------
|
29
|
+
data_dir: :obj:`pathlib.Path` or :obj:`str`, optional
|
30
|
+
Path of the data directory. Used to force data storage in a specified
|
31
|
+
location. Default: None
|
32
|
+
|
33
|
+
Returns
|
34
|
+
-------
|
35
|
+
paths : :obj:`list` of :obj:`str`
|
36
|
+
Paths of the dataset directories.
|
37
|
+
|
38
|
+
Notes
|
39
|
+
-----
|
40
|
+
Taken from Nilearn.
|
41
|
+
This function retrieves the datasets directories using the following
|
42
|
+
priority :
|
43
|
+
|
44
|
+
1. defaults system paths
|
45
|
+
2. the keyword argument data_dir
|
46
|
+
3. the global environment variable NIMARE_SHARED_DATA
|
47
|
+
4. the user environment variable NIMARE_DATA
|
48
|
+
5. nimare_data in the user home folder
|
49
|
+
"""
|
50
|
+
# We build an array of successive paths by priority
|
51
|
+
# The boolean indicates if it is a pre_dir: in that case, we won't add the
|
52
|
+
# dataset name to the path.
|
53
|
+
paths = []
|
54
|
+
|
55
|
+
# Check data_dir which force storage in a specific location
|
56
|
+
if data_dir is not None:
|
57
|
+
paths.extend(str(data_dir).split(os.pathsep))
|
58
|
+
|
59
|
+
# If data_dir has not been specified, then we crawl default locations
|
60
|
+
if data_dir is None:
|
61
|
+
global_data = os.getenv("NIMARE_SHARED_DATA")
|
62
|
+
if global_data is not None:
|
63
|
+
paths.extend(global_data.split(os.pathsep))
|
64
|
+
|
65
|
+
local_data = os.getenv("NIMARE_DATA")
|
66
|
+
if local_data is not None:
|
67
|
+
paths.extend(local_data.split(os.pathsep))
|
68
|
+
|
69
|
+
paths.append(os.path.expanduser("~/.nimare"))
|
70
|
+
return paths
|
71
|
+
|
72
|
+
|
73
|
+
def _get_dataset_dir(dataset_name, data_dir=None, default_paths=None):
|
74
|
+
"""Create if necessary and returns data directory of given dataset.
|
75
|
+
|
76
|
+
.. versionadded:: 0.0.2
|
77
|
+
|
78
|
+
Parameters
|
79
|
+
----------
|
80
|
+
dataset_name : :obj:`str`
|
81
|
+
The unique name of the dataset.
|
82
|
+
data_dir : :obj:`pathlib.Path` or :obj:`str`, optional
|
83
|
+
Path of the data directory. Used to force data storage in a specified
|
84
|
+
location. Default: None
|
85
|
+
default_paths : :obj:`list` of :obj:`str`, optional
|
86
|
+
Default system paths in which the dataset may already have been
|
87
|
+
installed by a third party software. They will be checked first.
|
88
|
+
|
89
|
+
Returns
|
90
|
+
-------
|
91
|
+
data_dir : :obj:`str`
|
92
|
+
Path of the given dataset directory.
|
93
|
+
|
94
|
+
Notes
|
95
|
+
-----
|
96
|
+
Taken from Nilearn.
|
97
|
+
This function retrieves the datasets directory (or data directory) using
|
98
|
+
the following priority :
|
99
|
+
1. defaults system paths
|
100
|
+
2. the keyword argument data_dir
|
101
|
+
3. the global environment variable NIMARE_SHARED_DATA
|
102
|
+
4. the user environment variable NIMARE_DATA
|
103
|
+
5. nimare_data in the user home folder
|
104
|
+
"""
|
105
|
+
paths = []
|
106
|
+
# Search possible data-specific system paths
|
107
|
+
if default_paths is not None:
|
108
|
+
for default_path in default_paths:
|
109
|
+
paths.extend([(d, True) for d in str(default_path).split(os.pathsep)])
|
110
|
+
|
111
|
+
paths.extend([(d, False) for d in get_data_dirs(data_dir=data_dir)])
|
112
|
+
|
113
|
+
LGR.debug(f"Dataset search paths: {paths}")
|
114
|
+
|
115
|
+
# Check if the dataset exists somewhere
|
116
|
+
for path, is_pre_dir in paths:
|
117
|
+
if not is_pre_dir:
|
118
|
+
path = os.path.join(path, dataset_name)
|
119
|
+
|
120
|
+
if os.path.islink(path):
|
121
|
+
# Resolve path
|
122
|
+
path = readlinkabs(path)
|
123
|
+
|
124
|
+
if os.path.exists(path) and os.path.isdir(path):
|
125
|
+
LGR.info(f"Dataset found in {path}\n")
|
126
|
+
return path
|
127
|
+
|
128
|
+
# If not, create a folder in the first writeable directory
|
129
|
+
errors = []
|
130
|
+
for path, is_pre_dir in paths:
|
131
|
+
if not is_pre_dir:
|
132
|
+
path = os.path.join(path, dataset_name)
|
133
|
+
|
134
|
+
if not os.path.exists(path):
|
135
|
+
try:
|
136
|
+
os.makedirs(path)
|
137
|
+
LGR.info(f"Dataset created in {path}")
|
138
|
+
return path
|
139
|
+
except Exception as exc:
|
140
|
+
short_error_message = getattr(exc, "strerror", str(exc))
|
141
|
+
errors.append(f"\n -{path} ({short_error_message})")
|
142
|
+
|
143
|
+
raise OSError(
|
144
|
+
"NiMARE tried to store the dataset in the following directories, but: " + "".join(errors)
|
145
|
+
)
|
146
|
+
|
147
|
+
|
148
|
+
def readlinkabs(link):
|
149
|
+
"""Return an absolute path for the destination of a symlink.
|
150
|
+
|
151
|
+
.. versionadded:: 0.0.2
|
152
|
+
|
153
|
+
From nilearn.
|
154
|
+
"""
|
155
|
+
path = os.readlink(link)
|
156
|
+
if os.path.isabs(path):
|
157
|
+
return path
|
158
|
+
return os.path.join(os.path.dirname(link), path)
|
159
|
+
|
160
|
+
|
161
|
+
def _download_zipped_file(url, filename=None):
|
162
|
+
"""Download from a URL to a file.
|
163
|
+
|
164
|
+
.. versionadded:: 0.0.2
|
165
|
+
|
166
|
+
"""
|
167
|
+
if filename is None:
|
168
|
+
data_dir = op.abspath(op.getcwd())
|
169
|
+
filename = op.join(data_dir, url.split("/")[-1])
|
170
|
+
# NOTE the stream=True parameter
|
171
|
+
req = requests.get(url, stream=True)
|
172
|
+
with open(filename, "wb") as f_obj:
|
173
|
+
for chunk in req.iter_content(chunk_size=1024):
|
174
|
+
if chunk: # filter out keep-alive new chunks
|
175
|
+
f_obj.write(chunk)
|
176
|
+
return filename
|
177
|
+
|
178
|
+
|
179
|
+
def _longify(df):
|
180
|
+
"""Expand comma-separated lists of aliases in DataFrame into separate rows.
|
181
|
+
|
182
|
+
.. versionadded:: 0.0.2
|
183
|
+
|
184
|
+
"""
|
185
|
+
reduced = df[["id", "name", "alias"]]
|
186
|
+
rows = []
|
187
|
+
for index, row in reduced.iterrows():
|
188
|
+
if isinstance(row["alias"], str) and "," in row["alias"]:
|
189
|
+
aliases = row["alias"].split(", ") + [row["name"]]
|
190
|
+
else:
|
191
|
+
aliases = [row["name"]]
|
192
|
+
|
193
|
+
for alias in aliases:
|
194
|
+
rows.append([row["id"], row["name"].lower(), alias.lower()])
|
195
|
+
out_df = pd.DataFrame(columns=["id", "name", "alias"], data=rows)
|
196
|
+
out_df = out_df.replace("", np.nan)
|
197
|
+
return out_df
|
198
|
+
|
199
|
+
|
200
|
+
def _get_ratio(tup):
|
201
|
+
"""Get fuzzy ratio.
|
202
|
+
|
203
|
+
.. versionadded:: 0.0.2
|
204
|
+
|
205
|
+
"""
|
206
|
+
if all(isinstance(t, str) for t in tup):
|
207
|
+
return fuzz.ratio(tup[0], tup[1])
|
208
|
+
else:
|
209
|
+
return 100
|
210
|
+
|
211
|
+
|
212
|
+
def _gen_alt_forms(term):
|
213
|
+
"""Generate a list of alternate forms for a given term.
|
214
|
+
|
215
|
+
.. versionadded:: 0.0.2
|
216
|
+
|
217
|
+
"""
|
218
|
+
if not isinstance(term, str) or len(term) == 0:
|
219
|
+
return [None]
|
220
|
+
|
221
|
+
alt_forms = []
|
222
|
+
# For one alternate form, put contents of parentheses at beginning of term
|
223
|
+
if "(" in term:
|
224
|
+
prefix = term[term.find("(") + 1 : term.find(")")]
|
225
|
+
temp_term = term.replace(f"({prefix})", "").replace(" ", " ")
|
226
|
+
alt_forms.append(temp_term)
|
227
|
+
alt_forms.append(f"{prefix} {temp_term}")
|
228
|
+
else:
|
229
|
+
prefix = ""
|
230
|
+
|
231
|
+
# Remove extra spaces
|
232
|
+
alt_forms = [s.strip() for s in alt_forms]
|
233
|
+
|
234
|
+
# Allow plurals
|
235
|
+
# temp = [s+'s' for s in alt_forms]
|
236
|
+
# temp += [s+'es' for s in alt_forms]
|
237
|
+
# alt_forms += temp
|
238
|
+
|
239
|
+
# Remove words "task" and/or "paradigm"
|
240
|
+
alt_forms += [term.replace(" task", "") for term in alt_forms]
|
241
|
+
alt_forms += [term.replace(" paradigm", "") for term in alt_forms]
|
242
|
+
|
243
|
+
# Remove duplicates
|
244
|
+
alt_forms = list(set(alt_forms))
|
245
|
+
return alt_forms
|
246
|
+
|
247
|
+
|
248
|
+
def _get_concept_reltype(relationship, direction):
|
249
|
+
"""Convert two-part relationship info to more parsimonious representation.
|
250
|
+
|
251
|
+
.. versionadded:: 0.0.2
|
252
|
+
|
253
|
+
The two part representation includes relationship type and direction.
|
254
|
+
"""
|
255
|
+
new_rel = None
|
256
|
+
if relationship == "PARTOF":
|
257
|
+
if direction == "child":
|
258
|
+
new_rel = "hasPart"
|
259
|
+
elif direction == "parent":
|
260
|
+
new_rel = "isPartOf"
|
261
|
+
elif relationship == "KINDOF":
|
262
|
+
if direction == "child":
|
263
|
+
new_rel = "hasKind"
|
264
|
+
elif direction == "parent":
|
265
|
+
new_rel = "isKindOf"
|
266
|
+
return new_rel
|
267
|
+
|
268
|
+
|
269
|
+
def _expand_df(df):
|
270
|
+
"""Add alternate forms to DataFrame, then sort DataFrame by alias length and similarity.
|
271
|
+
|
272
|
+
.. versionadded:: 0.0.2
|
273
|
+
|
274
|
+
Sorting by alias length is done for order of extraction from text. Sorting by similarity to
|
275
|
+
original name is done in order to select most appropriate term to associate with alias.
|
276
|
+
"""
|
277
|
+
df = df.copy()
|
278
|
+
df["alias"] = df["alias"].apply(_uk_to_us)
|
279
|
+
new_rows = []
|
280
|
+
for index, row in df.iterrows():
|
281
|
+
alias = row["alias"]
|
282
|
+
alt_forms = _gen_alt_forms(alias)
|
283
|
+
for alt_form in alt_forms:
|
284
|
+
temp_row = row.copy()
|
285
|
+
temp_row["alias"] = alt_form
|
286
|
+
new_rows.append(temp_row.tolist())
|
287
|
+
alt_df = pd.DataFrame(columns=df.columns, data=new_rows)
|
288
|
+
df = pd.concat((df, alt_df), axis=0)
|
289
|
+
# Sort by name length and similarity of alternate form to preferred term
|
290
|
+
# For example, "task switching" the concept should take priority over the
|
291
|
+
# "task switching" version of the "task-switching" task.
|
292
|
+
df["length"] = df["alias"].str.len()
|
293
|
+
df["ratio"] = df[["alias", "name"]].apply(_get_ratio, axis=1)
|
294
|
+
df = df.sort_values(by=["length", "ratio"], ascending=[False, False])
|
295
|
+
return df
|
nimare/generate.py
ADDED
@@ -0,0 +1,331 @@
|
|
1
|
+
"""Utilities for generating data for testing."""
|
2
|
+
|
3
|
+
from itertools import zip_longest
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
import sparse
|
7
|
+
|
8
|
+
from nimare.dataset import Dataset
|
9
|
+
from nimare.io import convert_neurovault_to_dataset
|
10
|
+
from nimare.meta.utils import compute_ale_ma, get_ale_kernel
|
11
|
+
from nimare.transforms import ImageTransformer
|
12
|
+
from nimare.utils import get_template, mm2vox, vox2mm
|
13
|
+
|
14
|
+
# defaults for creating a neurovault dataset
|
15
|
+
NEUROVAULT_IDS = (8836, 8838, 8893, 8895, 8892, 8891, 8962, 8894, 8956, 8854, 9000)
|
16
|
+
CONTRAST_OF_INTEREST = {"animal": "as-Animal"}
|
17
|
+
|
18
|
+
|
19
|
+
def create_coordinate_dataset(
|
20
|
+
foci=1,
|
21
|
+
foci_percentage="100%",
|
22
|
+
fwhm=10,
|
23
|
+
sample_size=30,
|
24
|
+
n_studies=30,
|
25
|
+
n_noise_foci=0,
|
26
|
+
seed=None,
|
27
|
+
space="MNI",
|
28
|
+
):
|
29
|
+
"""Generate coordinate based dataset for meta analysis.
|
30
|
+
|
31
|
+
.. versionadded:: 0.0.4
|
32
|
+
|
33
|
+
Parameters
|
34
|
+
----------
|
35
|
+
foci : :obj:`int` or :obj:`list`
|
36
|
+
The number of foci to be generated per study or the
|
37
|
+
x,y,z coordinates of the ground truth foci. (Default=1)
|
38
|
+
foci_percentage : :obj:`float`
|
39
|
+
Percentage of studies where the foci appear. (Default="100%")
|
40
|
+
fwhm : :obj:`float`
|
41
|
+
Full width at half maximum (fwhm) to define the probability
|
42
|
+
spread of the foci. (Default=10)
|
43
|
+
sample_size : :obj:`int` or :obj:`list`
|
44
|
+
Either mean number of participants in each study
|
45
|
+
or a list specifying the sample size for each
|
46
|
+
study. If a list of two numbers and n_studies is
|
47
|
+
not two, then the first number will represent a lower
|
48
|
+
bound and the second number will represent an upper bound
|
49
|
+
of a uniform sample. (Default=30)
|
50
|
+
n_studies : :obj:`int`
|
51
|
+
Number of studies to generate. (Default=30)
|
52
|
+
n_noise_foci : :obj:`int`
|
53
|
+
Number of foci considered to be noise in each study. (Default=0)
|
54
|
+
seed : :obj:`int` or None
|
55
|
+
Random state to reproducibly initialize random numbers.
|
56
|
+
If seed is None, then the random state will try to be initialized
|
57
|
+
with data from /dev/urandom (or the Windows analogue) if available
|
58
|
+
or will initialize from the clock otherwise. (Default=None)
|
59
|
+
space : :obj:`str`
|
60
|
+
The template space the coordinates are reported in. (Default='MNI')
|
61
|
+
|
62
|
+
Returns
|
63
|
+
-------
|
64
|
+
ground_truth_foci : :obj:`list`
|
65
|
+
generated foci in xyz (mm) coordinates
|
66
|
+
dataset : :class:`~nimare.dataset.Dataset`
|
67
|
+
"""
|
68
|
+
# set random state
|
69
|
+
rng = np.random.RandomState(seed=seed)
|
70
|
+
|
71
|
+
# check foci argument
|
72
|
+
if not isinstance(foci, int) and not _array_like(foci):
|
73
|
+
raise ValueError("foci must be a positive integer or array like")
|
74
|
+
|
75
|
+
# check foci_percentage argument
|
76
|
+
if (
|
77
|
+
(not isinstance(foci_percentage, (float, str)))
|
78
|
+
or (isinstance(foci_percentage, str) and foci_percentage[-1] != "%")
|
79
|
+
or (isinstance(foci_percentage, float) and not (0.0 <= foci_percentage <= 1.0))
|
80
|
+
):
|
81
|
+
raise ValueError(
|
82
|
+
"foci_percentage must be a string (example '96%') or a float between 0 and 1"
|
83
|
+
)
|
84
|
+
|
85
|
+
# check sample_size argument
|
86
|
+
if _array_like(sample_size) and len(sample_size) != n_studies and len(sample_size) != 2:
|
87
|
+
raise ValueError("sample_size must be the same length as n_studies or list of 2 items")
|
88
|
+
elif not _array_like(sample_size) and not isinstance(sample_size, int):
|
89
|
+
raise ValueError("sample_size must be array like or integer")
|
90
|
+
|
91
|
+
# check space argument
|
92
|
+
if space != "MNI":
|
93
|
+
raise NotImplementedError("Only coordinates for the MNI atlas has been defined")
|
94
|
+
|
95
|
+
# process foci_percentage argument
|
96
|
+
if isinstance(foci_percentage, str) and foci_percentage[-1] == "%":
|
97
|
+
foci_percentage = float(foci_percentage[:-1]) / 100
|
98
|
+
|
99
|
+
# process sample_size argument
|
100
|
+
if isinstance(sample_size, int):
|
101
|
+
sample_size = [sample_size] * n_studies
|
102
|
+
elif _array_like(sample_size) and len(sample_size) == 2 and n_studies != 2:
|
103
|
+
sample_size_lower_limit = sample_size[0]
|
104
|
+
sample_size_upper_limit = sample_size[1]
|
105
|
+
sample_size = rng.randint(sample_size_lower_limit, sample_size_upper_limit, size=n_studies)
|
106
|
+
|
107
|
+
ground_truth_foci, foci_dict = _create_foci(
|
108
|
+
foci, foci_percentage, fwhm, n_studies, n_noise_foci, rng, space
|
109
|
+
)
|
110
|
+
|
111
|
+
source_dict = _create_source(foci_dict, sample_size, space)
|
112
|
+
dataset = Dataset(source_dict)
|
113
|
+
|
114
|
+
return ground_truth_foci, dataset
|
115
|
+
|
116
|
+
|
117
|
+
def create_neurovault_dataset(
|
118
|
+
collection_ids=NEUROVAULT_IDS,
|
119
|
+
contrasts=CONTRAST_OF_INTEREST,
|
120
|
+
img_dir=None,
|
121
|
+
map_type_conversion=None,
|
122
|
+
**dset_kwargs,
|
123
|
+
):
|
124
|
+
"""Download images from NeuroVault and use them to create a dataset.
|
125
|
+
|
126
|
+
.. versionadded:: 0.0.8
|
127
|
+
|
128
|
+
This function will also attempt to generate Z images for any contrasts
|
129
|
+
for which this is possible.
|
130
|
+
|
131
|
+
Parameters
|
132
|
+
----------
|
133
|
+
collection_ids : :obj:`list` of :obj:`int` or :obj:`dict`, optional
|
134
|
+
A list of collections on neurovault specified by their id.
|
135
|
+
The collection ids can accessed through the neurovault API
|
136
|
+
(i.e., https://neurovault.org/api/collections) or
|
137
|
+
their main website (i.e., https://neurovault.org/collections).
|
138
|
+
For example, in this URL https://neurovault.org/collections/8836/,
|
139
|
+
`8836` is the collection id.
|
140
|
+
collection_ids can also be a dictionary whose keys are the informative
|
141
|
+
study name and the values are collection ids to give the collections
|
142
|
+
human readable names in the dataset.
|
143
|
+
contrasts : :obj:`dict`, optional
|
144
|
+
Dictionary whose keys represent the name of the contrast in
|
145
|
+
the dataset and whose values represent a regular expression that would
|
146
|
+
match the names represented in NeuroVault.
|
147
|
+
For example, under the ``Name`` column in this URL
|
148
|
+
https://neurovault.org/collections/8836/,
|
149
|
+
a valid contrast could be "as-Animal", which will be called "animal" in the created
|
150
|
+
dataset if the contrasts argument is ``{'animal': "as-Animal"}``.
|
151
|
+
img_dir : :obj:`str` or None, optional
|
152
|
+
Base path to save all the downloaded images, by default the images
|
153
|
+
will be saved to a temporary directory with the prefix "neurovault"
|
154
|
+
map_type_conversion : :obj:`dict` or None, optional
|
155
|
+
Dictionary whose keys are what you expect the `map_type` name to
|
156
|
+
be in neurovault and the values are the name of the respective
|
157
|
+
statistic map in a nimare dataset. Default = None.
|
158
|
+
**dset_kwargs : keyword arguments passed to Dataset
|
159
|
+
Keyword arguments to pass in when creating the Dataset object.
|
160
|
+
see :obj:`~nimare.dataset.Dataset` for details.
|
161
|
+
|
162
|
+
Returns
|
163
|
+
-------
|
164
|
+
:obj:`~nimare.dataset.Dataset`
|
165
|
+
Dataset object containing experiment information from neurovault.
|
166
|
+
"""
|
167
|
+
dataset = convert_neurovault_to_dataset(
|
168
|
+
collection_ids, contrasts, img_dir, map_type_conversion, **dset_kwargs
|
169
|
+
)
|
170
|
+
transformer = ImageTransformer(target="z")
|
171
|
+
dataset = transformer.transform(dataset)
|
172
|
+
|
173
|
+
return dataset
|
174
|
+
|
175
|
+
|
176
|
+
def _create_source(foci, sample_sizes, space="MNI"):
|
177
|
+
"""Create dictionary according to nimads(ish) specification.
|
178
|
+
|
179
|
+
.. versionadded:: 0.0.4
|
180
|
+
|
181
|
+
Parameters
|
182
|
+
----------
|
183
|
+
foci : :obj:`dict`
|
184
|
+
A dictionary of foci in xyz (mm) coordinates whose keys represent
|
185
|
+
different studies.
|
186
|
+
sample_sizes : :obj:`list`
|
187
|
+
The sample size for each study
|
188
|
+
space : :obj:`str`
|
189
|
+
The template space the coordinates are reported in. (Default='MNI')
|
190
|
+
|
191
|
+
Returns
|
192
|
+
-------
|
193
|
+
source : :obj:`dict`
|
194
|
+
study information in nimads format
|
195
|
+
"""
|
196
|
+
source = {}
|
197
|
+
for sample_size, (study, study_foci) in zip(sample_sizes, foci.items()):
|
198
|
+
source[f"study-{study}"] = {
|
199
|
+
"contrasts": {
|
200
|
+
"1": {
|
201
|
+
"coords": {
|
202
|
+
"space": space,
|
203
|
+
"x": [c[0] for c in study_foci],
|
204
|
+
"y": [c[1] for c in study_foci],
|
205
|
+
"z": [c[2] for c in study_foci],
|
206
|
+
},
|
207
|
+
"metadata": {"sample_sizes": [sample_size]},
|
208
|
+
}
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
return source
|
213
|
+
|
214
|
+
|
215
|
+
def _create_foci(foci, foci_percentage, fwhm, n_studies, n_noise_foci, rng, space):
|
216
|
+
"""Generate study specific foci.
|
217
|
+
|
218
|
+
.. versionadded:: 0.0.4
|
219
|
+
|
220
|
+
Parameters
|
221
|
+
----------
|
222
|
+
foci : :obj:`int` or :obj:`list`
|
223
|
+
The number of foci to be generated per study or the
|
224
|
+
x,y,z coordinates of the ground truth foci.
|
225
|
+
foci_percentage : :obj:`float`
|
226
|
+
Percentage of studies where the foci appear.
|
227
|
+
fwhm : :obj:`float`
|
228
|
+
Full width at half maximum (fwhm) to define the probability
|
229
|
+
spread of the foci.
|
230
|
+
n_studies : :obj:`int`
|
231
|
+
Number of n_studies to generate.
|
232
|
+
n_noise_foci : :obj:`int`
|
233
|
+
Number of foci considered to be noise in each study.
|
234
|
+
rng : :class:`numpy.random.RandomState`
|
235
|
+
Random state to reproducibly initialize random numbers.
|
236
|
+
space : :obj:`str`
|
237
|
+
The template space the coordinates are reported in.
|
238
|
+
|
239
|
+
Returns
|
240
|
+
-------
|
241
|
+
ground_truth_foci : :obj:`list`
|
242
|
+
List of 3-item tuples containing x, y, z coordinates
|
243
|
+
of the ground truth foci or an empty list if
|
244
|
+
there are no ground_truth_foci.
|
245
|
+
foci_dict : :obj:`dict`
|
246
|
+
Dictionary with keys representing the study, and
|
247
|
+
whose values represent the study specific foci.
|
248
|
+
"""
|
249
|
+
# convert foci_percentage to float between 0 and 1
|
250
|
+
if isinstance(foci_percentage, str) and foci_percentage[-1] == "%":
|
251
|
+
foci_percentage = float(foci_percentage[:-1]) / 100
|
252
|
+
|
253
|
+
if space == "MNI":
|
254
|
+
template_img = get_template(space="mni152_2mm", mask="brain")
|
255
|
+
|
256
|
+
# use a template to find all "valid" coordinates
|
257
|
+
template_data = template_img.get_fdata()
|
258
|
+
possible_ijks = np.argwhere(template_data)
|
259
|
+
|
260
|
+
# number of "convergent" foci each study should report
|
261
|
+
if isinstance(foci, int):
|
262
|
+
foci_idxs = np.unique(rng.choice(range(possible_ijks.shape[0]), foci, replace=True))
|
263
|
+
# if there are no foci_idxs, give a dummy coordinate (0, 0, 0)
|
264
|
+
ground_truth_foci_ijks = possible_ijks[foci_idxs] if foci_idxs.size else np.array([[]])
|
265
|
+
elif isinstance(foci, list):
|
266
|
+
ground_truth_foci_ijks = np.array([mm2vox(coord, template_img.affine) for coord in foci])
|
267
|
+
|
268
|
+
# create a probability map for each peak
|
269
|
+
kernel = get_ale_kernel(template_img, fwhm)[1]
|
270
|
+
foci_prob_maps = {
|
271
|
+
tuple(peak): compute_ale_ma(template_img, np.atleast_2d(peak), kernel=kernel).reshape(
|
272
|
+
template_data.shape
|
273
|
+
)
|
274
|
+
for peak in ground_truth_foci_ijks
|
275
|
+
if peak.size
|
276
|
+
}
|
277
|
+
|
278
|
+
# get study specific instances of each foci
|
279
|
+
signal_studies = int(round(foci_percentage * n_studies))
|
280
|
+
signal_ijks = {
|
281
|
+
peak: sparse.argwhere(prob_map)[
|
282
|
+
rng.choice(
|
283
|
+
sparse.argwhere(prob_map).shape[0],
|
284
|
+
size=signal_studies,
|
285
|
+
replace=True,
|
286
|
+
p=(prob_map[prob_map.nonzero()] / sum(prob_map[prob_map.nonzero()])).todense(),
|
287
|
+
)
|
288
|
+
]
|
289
|
+
for peak, prob_map in foci_prob_maps.items()
|
290
|
+
}
|
291
|
+
|
292
|
+
# reshape foci coordinates to be study specific
|
293
|
+
paired_signal_ijks = (
|
294
|
+
np.transpose(np.array(list(signal_ijks.values())), axes=(1, 0, 2))
|
295
|
+
if signal_ijks
|
296
|
+
else (None,)
|
297
|
+
)
|
298
|
+
|
299
|
+
foci_dict = {}
|
300
|
+
for study_signal_ijks, study in zip_longest(paired_signal_ijks, range(n_studies)):
|
301
|
+
if study_signal_ijks is None:
|
302
|
+
study_signal_ijks = np.array([[]])
|
303
|
+
n_noise_foci = max(1, n_noise_foci)
|
304
|
+
|
305
|
+
if n_noise_foci > 0:
|
306
|
+
noise_ijks = possible_ijks[
|
307
|
+
rng.choice(possible_ijks.shape[0], n_noise_foci, replace=True)
|
308
|
+
]
|
309
|
+
|
310
|
+
# add the noise foci ijks to the existing signal ijks
|
311
|
+
foci_ijks = (
|
312
|
+
np.unique(np.vstack([study_signal_ijks, noise_ijks]), axis=0)
|
313
|
+
if np.any(study_signal_ijks)
|
314
|
+
else noise_ijks
|
315
|
+
)
|
316
|
+
else:
|
317
|
+
foci_ijks = study_signal_ijks
|
318
|
+
|
319
|
+
# transform ijk voxel coordinates to xyz mm coordinates
|
320
|
+
foci_xyzs = [vox2mm(ijk, template_img.affine) for ijk in foci_ijks]
|
321
|
+
foci_dict[study] = foci_xyzs
|
322
|
+
|
323
|
+
ground_truth_foci_xyz = [
|
324
|
+
tuple(vox2mm(ijk, template_img.affine)) for ijk in ground_truth_foci_ijks if np.any(ijk)
|
325
|
+
]
|
326
|
+
return ground_truth_foci_xyz, foci_dict
|
327
|
+
|
328
|
+
|
329
|
+
def _array_like(obj):
|
330
|
+
"""Test if obj is array-like."""
|
331
|
+
return isinstance(obj, (list, tuple, np.ndarray))
|