nimare 0.4.2rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmarks/__init__.py +0 -0
- benchmarks/bench_cbma.py +57 -0
- nimare/__init__.py +45 -0
- nimare/_version.py +21 -0
- nimare/annotate/__init__.py +21 -0
- nimare/annotate/cogat.py +213 -0
- nimare/annotate/gclda.py +924 -0
- nimare/annotate/lda.py +147 -0
- nimare/annotate/text.py +75 -0
- nimare/annotate/utils.py +87 -0
- nimare/base.py +217 -0
- nimare/cli.py +124 -0
- nimare/correct.py +462 -0
- nimare/dataset.py +685 -0
- nimare/decode/__init__.py +33 -0
- nimare/decode/base.py +115 -0
- nimare/decode/continuous.py +462 -0
- nimare/decode/discrete.py +753 -0
- nimare/decode/encode.py +110 -0
- nimare/decode/utils.py +44 -0
- nimare/diagnostics.py +510 -0
- nimare/estimator.py +139 -0
- nimare/extract/__init__.py +19 -0
- nimare/extract/extract.py +466 -0
- nimare/extract/utils.py +295 -0
- nimare/generate.py +331 -0
- nimare/io.py +635 -0
- nimare/meta/__init__.py +39 -0
- nimare/meta/cbma/__init__.py +6 -0
- nimare/meta/cbma/ale.py +951 -0
- nimare/meta/cbma/base.py +947 -0
- nimare/meta/cbma/mkda.py +1361 -0
- nimare/meta/cbmr.py +970 -0
- nimare/meta/ibma.py +1683 -0
- nimare/meta/kernel.py +501 -0
- nimare/meta/models.py +1199 -0
- nimare/meta/utils.py +494 -0
- nimare/nimads.py +492 -0
- nimare/reports/__init__.py +24 -0
- nimare/reports/base.py +664 -0
- nimare/reports/default.yml +123 -0
- nimare/reports/figures.py +651 -0
- nimare/reports/report.tpl +160 -0
- nimare/resources/__init__.py +1 -0
- nimare/resources/atlases/Harvard-Oxford-LICENSE +93 -0
- nimare/resources/atlases/HarvardOxford-cort-maxprob-thr25-2mm.nii.gz +0 -0
- nimare/resources/database_file_manifest.json +142 -0
- nimare/resources/english_spellings.csv +1738 -0
- nimare/resources/filenames.json +32 -0
- nimare/resources/neurosynth_laird_studies.json +58773 -0
- nimare/resources/neurosynth_stoplist.txt +396 -0
- nimare/resources/nidm_pain_dset.json +1349 -0
- nimare/resources/references.bib +541 -0
- nimare/resources/semantic_knowledge_children.txt +325 -0
- nimare/resources/semantic_relatedness_children.txt +249 -0
- nimare/resources/templates/MNI152_2x2x2_brainmask.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_T1w.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_T1w.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_desc-brain_mask.nii.gz +0 -0
- nimare/results.py +225 -0
- nimare/stats.py +276 -0
- nimare/tests/__init__.py +1 -0
- nimare/tests/conftest.py +229 -0
- nimare/tests/data/amygdala_roi.nii.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_coordinates.tsv.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_metadata.tsv.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz +0 -0
- nimare/tests/data/data-neurosynth_version-7_vocab-terms_vocabulary.txt +100 -0
- nimare/tests/data/neurosynth_dset.json +2868 -0
- nimare/tests/data/neurosynth_laird_studies.json +58773 -0
- nimare/tests/data/nidm_pain_dset.json +1349 -0
- nimare/tests/data/nimads_annotation.json +1 -0
- nimare/tests/data/nimads_studyset.json +1 -0
- nimare/tests/data/test_baseline.txt +2 -0
- nimare/tests/data/test_pain_dataset.json +1278 -0
- nimare/tests/data/test_pain_dataset_multiple_contrasts.json +1242 -0
- nimare/tests/data/test_sleuth_file.txt +18 -0
- nimare/tests/data/test_sleuth_file2.txt +10 -0
- nimare/tests/data/test_sleuth_file3.txt +5 -0
- nimare/tests/data/test_sleuth_file4.txt +5 -0
- nimare/tests/data/test_sleuth_file5.txt +5 -0
- nimare/tests/test_annotate_cogat.py +32 -0
- nimare/tests/test_annotate_gclda.py +86 -0
- nimare/tests/test_annotate_lda.py +27 -0
- nimare/tests/test_dataset.py +99 -0
- nimare/tests/test_decode_continuous.py +132 -0
- nimare/tests/test_decode_discrete.py +92 -0
- nimare/tests/test_diagnostics.py +168 -0
- nimare/tests/test_estimator_performance.py +385 -0
- nimare/tests/test_extract.py +46 -0
- nimare/tests/test_generate.py +247 -0
- nimare/tests/test_io.py +240 -0
- nimare/tests/test_meta_ale.py +298 -0
- nimare/tests/test_meta_cbmr.py +295 -0
- nimare/tests/test_meta_ibma.py +240 -0
- nimare/tests/test_meta_kernel.py +209 -0
- nimare/tests/test_meta_mkda.py +234 -0
- nimare/tests/test_nimads.py +21 -0
- nimare/tests/test_reports.py +110 -0
- nimare/tests/test_stats.py +101 -0
- nimare/tests/test_transforms.py +272 -0
- nimare/tests/test_utils.py +200 -0
- nimare/tests/test_workflows.py +221 -0
- nimare/tests/utils.py +126 -0
- nimare/transforms.py +907 -0
- nimare/utils.py +1367 -0
- nimare/workflows/__init__.py +14 -0
- nimare/workflows/base.py +189 -0
- nimare/workflows/cbma.py +165 -0
- nimare/workflows/ibma.py +108 -0
- nimare/workflows/macm.py +77 -0
- nimare/workflows/misc.py +65 -0
- nimare-0.4.2rc4.dist-info/LICENSE +21 -0
- nimare-0.4.2rc4.dist-info/METADATA +124 -0
- nimare-0.4.2rc4.dist-info/RECORD +119 -0
- nimare-0.4.2rc4.dist-info/WHEEL +5 -0
- nimare-0.4.2rc4.dist-info/entry_points.txt +2 -0
- nimare-0.4.2rc4.dist-info/top_level.txt +2 -0
@@ -0,0 +1,753 @@
|
|
1
|
+
"""Methods for decoding subsets of voxels or experiments into text."""
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
import pandas as pd
|
5
|
+
from nilearn._utils import load_niimg
|
6
|
+
from pymare.stats import bonferroni, fdr
|
7
|
+
from scipy import special
|
8
|
+
from scipy.stats import binom
|
9
|
+
|
10
|
+
from nimare.decode.base import Decoder
|
11
|
+
from nimare.decode.utils import weight_priors
|
12
|
+
from nimare.meta.kernel import KernelTransformer, MKDAKernel
|
13
|
+
from nimare.stats import one_way, pearson, two_way
|
14
|
+
from nimare.transforms import p_to_z
|
15
|
+
from nimare.utils import _check_type, get_masker
|
16
|
+
|
17
|
+
|
18
|
+
def gclda_decode_roi(model, roi, topic_priors=None, prior_weight=1.0):
|
19
|
+
r"""Perform image-to-text decoding for discrete inputs using method from Rubin et al. (2017).
|
20
|
+
|
21
|
+
The method used in this function was originally described in :footcite:t:`rubin2017decoding`.
|
22
|
+
|
23
|
+
Parameters
|
24
|
+
----------
|
25
|
+
model : :obj:`~nimare.annotate.gclda.GCLDAModel`
|
26
|
+
Model object needed for decoding.
|
27
|
+
roi : :obj:`nibabel.nifti1.Nifti1Image` or :obj:`str`
|
28
|
+
Binary image to decode into text. If string, path to a file with
|
29
|
+
the binary image.
|
30
|
+
topic_priors : :obj:`numpy.ndarray` of :obj:`float`, optional
|
31
|
+
A 1d array of size (n_topics) with values for topic weighting.
|
32
|
+
If None, no weighting is done. Default is None.
|
33
|
+
prior_weight : :obj:`float`, optional
|
34
|
+
The weight by which the prior will affect the decoding.
|
35
|
+
Default is 1.
|
36
|
+
|
37
|
+
Returns
|
38
|
+
-------
|
39
|
+
decoded_df : :obj:`pandas.DataFrame`
|
40
|
+
A DataFrame with the word-tokens and their associated weights.
|
41
|
+
topic_weights : :obj:`numpy.ndarray` of :obj:`float`
|
42
|
+
The weights of the topics used in decoding.
|
43
|
+
|
44
|
+
Notes
|
45
|
+
-----
|
46
|
+
====================== ==============================================================
|
47
|
+
Notation Meaning
|
48
|
+
====================== ==============================================================
|
49
|
+
:math:`v` Voxel
|
50
|
+
:math:`t` Topic
|
51
|
+
:math:`w` Word type
|
52
|
+
:math:`r` Region of interest (ROI)
|
53
|
+
:math:`p(v|t)` Probability of topic given voxel (``p_topic_g_voxel``)
|
54
|
+
:math:`\\tau_{t}` Topic weight vector (``topic_weights``)
|
55
|
+
:math:`p(w|t)` Probability of word type given topic (``p_word_g_topic``)
|
56
|
+
====================== ==============================================================
|
57
|
+
|
58
|
+
1. Compute :math:`p(v|t)`.
|
59
|
+
|
60
|
+
- From :func:`gclda.model.Model.get_spatial_probs()`
|
61
|
+
|
62
|
+
2. Compute topic weight vector (:math:`\\tau_{t}`) by adding across voxels within ROI.
|
63
|
+
|
64
|
+
- :math:`\\tau_{t} = \sum_{i} {p(t|v_{i})}`
|
65
|
+
|
66
|
+
3. Multiply :math:`\\tau_{t}` by :math:`p(w|t)`.
|
67
|
+
|
68
|
+
- :math:`p(w|r) \propto \\tau_{t} \cdot p(w|t)`
|
69
|
+
|
70
|
+
4. The resulting vector (``word_weights``) reflects arbitrarily scaled term weights for the
|
71
|
+
ROI.
|
72
|
+
|
73
|
+
See Also
|
74
|
+
--------
|
75
|
+
:class:`~nimare.annotate.gclda.GCLDAModel`
|
76
|
+
:func:`~nimare.decode.continuous.gclda_decode_map`
|
77
|
+
:func:`~nimare.decode.encode.gclda_encode`
|
78
|
+
|
79
|
+
References
|
80
|
+
----------
|
81
|
+
.. footbibliography::
|
82
|
+
"""
|
83
|
+
roi = load_niimg(roi)
|
84
|
+
|
85
|
+
dset_aff = model.mask.affine
|
86
|
+
if not np.array_equal(roi.affine, dset_aff):
|
87
|
+
raise ValueError(
|
88
|
+
"Input roi must have same affine as mask img:\n"
|
89
|
+
f"{np.array2string(roi.affine)}\n{np.array2string(dset_aff)}"
|
90
|
+
)
|
91
|
+
|
92
|
+
# Load ROI file and get ROI voxels overlapping with brain mask
|
93
|
+
mask_vec = model.mask.get_fdata().ravel().astype(bool)
|
94
|
+
roi_vec = roi.get_fdata().astype(bool).ravel()
|
95
|
+
roi_vec = roi_vec[mask_vec]
|
96
|
+
roi_idx = np.where(roi_vec)[0]
|
97
|
+
p_topic_g_roi = model.p_topic_g_voxel_[roi_idx, :] # p(T|V) for voxels in ROI only
|
98
|
+
topic_weights = np.sum(p_topic_g_roi, axis=0) # Sum across words
|
99
|
+
if topic_priors is not None:
|
100
|
+
weighted_priors = weight_priors(topic_priors, prior_weight)
|
101
|
+
topic_weights *= weighted_priors
|
102
|
+
|
103
|
+
# Multiply topic_weights by topic-by-word matrix (p_word_g_topic).
|
104
|
+
# n_word_tokens_per_topic = np.sum(model.n_word_tokens_word_by_topic, axis=0)
|
105
|
+
# p_word_g_topic = model.n_word_tokens_word_by_topic / n_word_tokens_per_topic[None, :]
|
106
|
+
# p_word_g_topic = np.nan_to_num(p_word_g_topic, 0)
|
107
|
+
word_weights = np.dot(model.p_word_g_topic_, topic_weights)
|
108
|
+
|
109
|
+
decoded_df = pd.DataFrame(index=model.vocabulary, columns=["Weight"], data=word_weights)
|
110
|
+
decoded_df.index.name = "Term"
|
111
|
+
return decoded_df, topic_weights
|
112
|
+
|
113
|
+
|
114
|
+
class BrainMapDecoder(Decoder):
|
115
|
+
"""Perform image-to-text decoding for discrete inputs according to the BrainMap method.
|
116
|
+
|
117
|
+
This method was described in :footcite:t:`amft2015definition`.
|
118
|
+
|
119
|
+
.. versionadded:: 0.0.3
|
120
|
+
|
121
|
+
Parameters
|
122
|
+
----------
|
123
|
+
feature_group : :obj:`str`, optional
|
124
|
+
Feature group name used to select labels from a specific source.
|
125
|
+
Feature groups are stored as prefixes to feature name columns in
|
126
|
+
Dataset.annotations, with the format ``[source]_[valuetype]__``.
|
127
|
+
Input may or may not include the trailing underscore.
|
128
|
+
Default is None, which uses all feature groups available.
|
129
|
+
features : :obj:`list`, optional
|
130
|
+
List of features in dataset annotations to use for decoding.
|
131
|
+
If feature_group is provided, then features should not include the
|
132
|
+
feature group prefix.
|
133
|
+
If feature_group is *not* provided, then features *should* include the
|
134
|
+
prefix.
|
135
|
+
Default is None, which uses all features available.
|
136
|
+
frequency_threshold : :obj:`float`, optional
|
137
|
+
Threshold to apply to dataset annotations. Values greater than or
|
138
|
+
equal to the threshold as assigned as label+, while values below
|
139
|
+
the threshold are considered label-. Default is 0.001.
|
140
|
+
u : :obj:`float`, optional
|
141
|
+
Alpha level for multiple comparisons correction. Default is 0.05.
|
142
|
+
correction : {None, "bh", "by", "bonferroni"}, optional
|
143
|
+
Multiple comparisons correction method to apply.
|
144
|
+
Default is 'bh' (Benjamini-Hochberg FDR correction).
|
145
|
+
|
146
|
+
See Also
|
147
|
+
--------
|
148
|
+
:func:`~nimare.decode.discrete.brainmap_decode`: The associated function for this method.
|
149
|
+
|
150
|
+
References
|
151
|
+
----------
|
152
|
+
.. footbibliography::
|
153
|
+
"""
|
154
|
+
|
155
|
+
_required_inputs = {
|
156
|
+
"coordinates": ("coordinates", None),
|
157
|
+
"annotations": ("annotations", None),
|
158
|
+
}
|
159
|
+
|
160
|
+
def __init__(
|
161
|
+
self,
|
162
|
+
feature_group=None,
|
163
|
+
features=None,
|
164
|
+
frequency_threshold=0.001,
|
165
|
+
u=0.05,
|
166
|
+
correction="fdr_bh",
|
167
|
+
):
|
168
|
+
self.feature_group = feature_group
|
169
|
+
self.features = features
|
170
|
+
self.frequency_threshold = frequency_threshold
|
171
|
+
self.u = u
|
172
|
+
self.correction = correction
|
173
|
+
|
174
|
+
def _fit(self, dataset):
|
175
|
+
pass
|
176
|
+
|
177
|
+
def transform(self, ids, ids2=None):
|
178
|
+
"""Apply the decoding method to a Dataset.
|
179
|
+
|
180
|
+
Parameters
|
181
|
+
----------
|
182
|
+
ids : :obj:`list`
|
183
|
+
Subset of studies in coordinates/annotations dataframes indicating
|
184
|
+
target for decoding. Examples include studies reporting at least one
|
185
|
+
peak in an ROI, or studies selected from a clustering analysis.
|
186
|
+
ids2 : :obj:`list` or None, optional
|
187
|
+
Second subset of studies, representing "unselected" studies. If None,
|
188
|
+
then all studies in coordinates/annotations dataframes **not** in
|
189
|
+
``ids`` will be used.
|
190
|
+
|
191
|
+
Returns
|
192
|
+
-------
|
193
|
+
results : :class:`pandas.DataFrame`
|
194
|
+
Table with each label and the following values associated with each
|
195
|
+
label: 'pForward', 'zForward', 'likelihoodForward', 'pReverse',
|
196
|
+
'zReverse', and 'probReverse'.
|
197
|
+
"""
|
198
|
+
results = brainmap_decode(
|
199
|
+
self.inputs_["coordinates"],
|
200
|
+
self.inputs_["annotations"],
|
201
|
+
ids=ids,
|
202
|
+
ids2=ids2,
|
203
|
+
features=self.features_,
|
204
|
+
frequency_threshold=self.frequency_threshold,
|
205
|
+
u=self.u,
|
206
|
+
correction=self.correction,
|
207
|
+
)
|
208
|
+
|
209
|
+
return results
|
210
|
+
|
211
|
+
|
212
|
+
def brainmap_decode(
|
213
|
+
coordinates,
|
214
|
+
annotations,
|
215
|
+
ids,
|
216
|
+
ids2=None,
|
217
|
+
features=None,
|
218
|
+
frequency_threshold=0.001,
|
219
|
+
u=0.05,
|
220
|
+
correction="fdr_bh",
|
221
|
+
):
|
222
|
+
"""Perform image-to-text decoding for discrete inputs according to the BrainMap method.
|
223
|
+
|
224
|
+
This method was described in :footcite:t:`amft2015definition`.
|
225
|
+
|
226
|
+
Parameters
|
227
|
+
----------
|
228
|
+
coordinates : :class:`pandas.DataFrame`
|
229
|
+
DataFrame containing coordinates. Must include a column named 'id' and
|
230
|
+
must have a separate row for each reported peak coordinate for each
|
231
|
+
study (i.e., there are multiple rows per ID).
|
232
|
+
IDs from ``coordinates`` must match those from ``annotations``.
|
233
|
+
annotations : :class:`pandas.DataFrame`
|
234
|
+
DataFrame containing labels. Must include a column named 'id' and each
|
235
|
+
row must correspond to a study. Other columns may correspond to
|
236
|
+
individual labels.
|
237
|
+
IDs from ``annotations`` must match those from ``coordinates``.
|
238
|
+
ids : :obj:`list`
|
239
|
+
Subset of studies in coordinates/annotations dataframes indicating
|
240
|
+
target for decoding. Examples include studies reporting at least one
|
241
|
+
peak in an ROI, or studies selected from a clustering analysis.
|
242
|
+
ids2 : :obj:`list` or None, optional
|
243
|
+
Second subset of studies, representing "unselected" studies. If None,
|
244
|
+
then all studies in coordinates/annotations dataframes **not** in
|
245
|
+
``ids`` will be used.
|
246
|
+
features : :obj:`list`, optional
|
247
|
+
List of features in dataset annotations to use for decoding.
|
248
|
+
Default is None, which uses all features available.
|
249
|
+
frequency_threshold : :obj:`float`, optional
|
250
|
+
Threshold to apply to dataset annotations. Values greater than or
|
251
|
+
equal to the threshold as assigned as label+, while values below
|
252
|
+
the threshold are considered label-. Default is 0.001.
|
253
|
+
u : :obj:`float`, optional
|
254
|
+
Alpha level for multiple comparisons correction. Default is 0.05.
|
255
|
+
correction : {None, "bh", "by", "bonferroni"}, optional
|
256
|
+
Multiple comparisons correction method to apply.
|
257
|
+
Default is 'bh' (Benjamini-Hochberg FDR correction).
|
258
|
+
|
259
|
+
Returns
|
260
|
+
-------
|
261
|
+
out_df : :class:`pandas.DataFrame`
|
262
|
+
Table with each label and the following values associated with each
|
263
|
+
label: 'pForward', 'zForward', 'likelihoodForward', 'pReverse',
|
264
|
+
'zReverse', and 'probReverse'.
|
265
|
+
|
266
|
+
See Also
|
267
|
+
--------
|
268
|
+
:func:`~nimare.decode.discrete.BrainMapDecoder`: The associated class for this method.
|
269
|
+
|
270
|
+
References
|
271
|
+
----------
|
272
|
+
.. footbibliography::
|
273
|
+
"""
|
274
|
+
dataset_ids = sorted(list(set(coordinates["id"].values)))
|
275
|
+
if ids2 is None:
|
276
|
+
unselected = sorted(list(set(dataset_ids) - set(ids)))
|
277
|
+
else:
|
278
|
+
unselected = ids2[:]
|
279
|
+
|
280
|
+
# Binarize with frequency threshold
|
281
|
+
features_df = annotations.set_index("id", drop=True)
|
282
|
+
features_df = features_df[features].ge(frequency_threshold)
|
283
|
+
|
284
|
+
sel_array = features_df.loc[ids].values
|
285
|
+
unsel_array = features_df.loc[unselected].values
|
286
|
+
|
287
|
+
n_selected = len(ids)
|
288
|
+
n_unselected = len(unselected)
|
289
|
+
|
290
|
+
# the number of times any term is used (e.g., if one experiment uses
|
291
|
+
# two terms, that counts twice). Why though?
|
292
|
+
n_exps_across_terms = np.sum(np.sum(features_df))
|
293
|
+
|
294
|
+
n_selected_term = np.sum(sel_array, axis=0)
|
295
|
+
n_unselected_term = np.sum(unsel_array, axis=0)
|
296
|
+
|
297
|
+
n_selected_noterm = n_selected - n_selected_term
|
298
|
+
n_unselected_noterm = n_unselected - n_unselected_term
|
299
|
+
|
300
|
+
n_term = n_selected_term + n_unselected_term
|
301
|
+
p_term = n_term / n_exps_across_terms
|
302
|
+
|
303
|
+
n_foci_in_database = coordinates.shape[0]
|
304
|
+
p_selected = n_selected / n_foci_in_database
|
305
|
+
|
306
|
+
# I hope there's a way to do this without the for loop
|
307
|
+
n_term_foci = np.zeros(len(features))
|
308
|
+
n_noterm_foci = np.zeros(len(features))
|
309
|
+
for i, term in enumerate(features):
|
310
|
+
term_ids = features_df.loc[features_df[term] == 1].index.values
|
311
|
+
noterm_ids = features_df.loc[features_df[term] == 0].index.values
|
312
|
+
n_term_foci[i] = coordinates["id"].isin(term_ids).sum()
|
313
|
+
n_noterm_foci[i] = coordinates["id"].isin(noterm_ids).sum()
|
314
|
+
|
315
|
+
p_selected_g_term = n_selected_term / n_term_foci # probForward
|
316
|
+
l_selected_g_term = p_selected_g_term / p_selected # likelihoodForward
|
317
|
+
p_selected_g_noterm = n_selected_noterm / n_noterm_foci
|
318
|
+
|
319
|
+
p_term_g_selected = p_selected_g_term * p_term / p_selected # probReverse
|
320
|
+
p_term_g_selected = p_term_g_selected / np.nansum(p_term_g_selected) # Normalize
|
321
|
+
|
322
|
+
# Significance testing
|
323
|
+
# Forward inference significance is determined with a binomial distribution
|
324
|
+
p_fi = 1 - binom.cdf(k=n_selected_term, n=n_term_foci, p=p_selected)
|
325
|
+
sign_fi = np.sign(
|
326
|
+
n_selected_term - np.mean(n_selected_term)
|
327
|
+
).ravel() # pylint: disable=no-member
|
328
|
+
|
329
|
+
# Two-way chi-square test for association of activation
|
330
|
+
cells = np.array(
|
331
|
+
[
|
332
|
+
[n_selected_term, n_selected_noterm], # pylint: disable=no-member
|
333
|
+
[n_unselected_term, n_unselected_noterm],
|
334
|
+
]
|
335
|
+
).T
|
336
|
+
chi2_ri = two_way(cells)
|
337
|
+
p_ri = special.chdtrc(1, chi2_ri)
|
338
|
+
sign_ri = np.sign(p_selected_g_term - p_selected_g_noterm).ravel() # pylint: disable=no-member
|
339
|
+
|
340
|
+
# Ignore rare features
|
341
|
+
p_fi[n_selected_term < 5] = 1.0
|
342
|
+
p_ri[n_selected_term < 5] = 1.0
|
343
|
+
|
344
|
+
# Multiple comparisons correction across features. Separately done for FI and RI.
|
345
|
+
if correction in ("bh", "by"):
|
346
|
+
p_corr_fi = fdr(p_fi, alpha=u, method=correction)
|
347
|
+
p_corr_ri = fdr(p_ri, alpha=u, method=correction)
|
348
|
+
elif correction == "bonferroni":
|
349
|
+
p_corr_fi = bonferroni(p_fi)
|
350
|
+
p_corr_ri = bonferroni(p_ri)
|
351
|
+
else:
|
352
|
+
p_corr_fi = p_fi
|
353
|
+
p_corr_ri = p_ri
|
354
|
+
|
355
|
+
# Compute z-values
|
356
|
+
z_corr_fi = p_to_z(p_corr_fi, "two") * sign_fi
|
357
|
+
z_corr_ri = p_to_z(p_corr_ri, "two") * sign_ri
|
358
|
+
|
359
|
+
# Effect size
|
360
|
+
arr = np.array(
|
361
|
+
[
|
362
|
+
p_corr_fi,
|
363
|
+
z_corr_fi,
|
364
|
+
l_selected_g_term, # pylint: disable=no-member
|
365
|
+
p_corr_ri,
|
366
|
+
z_corr_ri,
|
367
|
+
p_term_g_selected,
|
368
|
+
]
|
369
|
+
).T
|
370
|
+
|
371
|
+
out_df = pd.DataFrame(
|
372
|
+
data=arr,
|
373
|
+
index=features,
|
374
|
+
columns=[
|
375
|
+
"pForward",
|
376
|
+
"zForward",
|
377
|
+
"likelihoodForward",
|
378
|
+
"pReverse",
|
379
|
+
"zReverse",
|
380
|
+
"probReverse",
|
381
|
+
],
|
382
|
+
)
|
383
|
+
out_df.index.name = "Term"
|
384
|
+
return out_df
|
385
|
+
|
386
|
+
|
387
|
+
class NeurosynthDecoder(Decoder):
|
388
|
+
"""Perform discrete functional decoding according to Neurosynth's meta-analytic method.
|
389
|
+
|
390
|
+
Neurosynth was described in :footcite:t:`yarkoni2011large`.
|
391
|
+
|
392
|
+
.. versionadded:: 0.0.3
|
393
|
+
|
394
|
+
This does not employ correlations between unthresholded maps, which are the
|
395
|
+
method of choice for decoding within Neurosynth and Neurovault.
|
396
|
+
Metadata (i.e., feature labels) for studies within the selected sample
|
397
|
+
(`ids`) are compared to the unselected studies remaining in the database
|
398
|
+
(`dataset`).
|
399
|
+
|
400
|
+
Parameters
|
401
|
+
----------
|
402
|
+
feature_group : :obj:`str`, optional
|
403
|
+
Feature group name used to select labels from a specific source.
|
404
|
+
Feature groups are stored as prefixes to feature name columns in
|
405
|
+
Dataset.annotations, with the format ``[source]_[valuetype]__``.
|
406
|
+
Input may or may not include the trailing underscore.
|
407
|
+
Default is None, which uses all feature groups available.
|
408
|
+
features : :obj:`list`, optional
|
409
|
+
List of features in dataset annotations to use for decoding.
|
410
|
+
If feature_group is provided, then features should not include the
|
411
|
+
feature group prefix.
|
412
|
+
If feature_group is *not* provided, then features *should* include the
|
413
|
+
prefix.
|
414
|
+
Default is None, which uses all features available.
|
415
|
+
frequency_threshold : :obj:`float`, optional
|
416
|
+
Threshold to apply to dataset annotations. Values greater than or
|
417
|
+
equal to the threshold as assigned as label+, while values below
|
418
|
+
the threshold are considered label-. Default is 0.001.
|
419
|
+
prior : :obj:`float`, optional
|
420
|
+
Uniform prior probability of each label being active in a study in
|
421
|
+
the absence of evidence (labels or selection) from the study.
|
422
|
+
Default is 0.5 (50%).
|
423
|
+
u : :obj:`float`, optional
|
424
|
+
Alpha level for multiple comparisons correction. Default is 0.05.
|
425
|
+
correction : {None, "bh", "by", "bonferroni"}, optional
|
426
|
+
Multiple comparisons correction method to apply.
|
427
|
+
Default is 'bh' (Benjamini-Hochberg FDR correction).
|
428
|
+
|
429
|
+
See Also
|
430
|
+
--------
|
431
|
+
:func:`~nimare.decode.discrete.neurosynth_decode`: The associated function for this method.
|
432
|
+
|
433
|
+
References
|
434
|
+
----------
|
435
|
+
.. footbibliography::
|
436
|
+
"""
|
437
|
+
|
438
|
+
_required_inputs = {
|
439
|
+
"coordinates": ("coordinates", None),
|
440
|
+
"annotations": ("annotations", None),
|
441
|
+
}
|
442
|
+
|
443
|
+
def __init__(
|
444
|
+
self,
|
445
|
+
feature_group=None,
|
446
|
+
features=None,
|
447
|
+
frequency_threshold=0.001,
|
448
|
+
prior=0.5,
|
449
|
+
u=0.05,
|
450
|
+
correction="fdr_bh",
|
451
|
+
):
|
452
|
+
self.feature_group = feature_group
|
453
|
+
self.features = features
|
454
|
+
self.frequency_threshold = frequency_threshold
|
455
|
+
self.prior = prior
|
456
|
+
self.u = u
|
457
|
+
self.correction = correction
|
458
|
+
|
459
|
+
def _fit(self, dataset):
|
460
|
+
pass
|
461
|
+
|
462
|
+
def transform(self, ids, ids2=None):
|
463
|
+
"""Apply the decoding method to a Dataset.
|
464
|
+
|
465
|
+
Parameters
|
466
|
+
----------
|
467
|
+
ids : :obj:`list`
|
468
|
+
Subset of studies in coordinates/annotations dataframes indicating
|
469
|
+
target for decoding. Examples include studies reporting at least one
|
470
|
+
peak in an ROI, or studies selected from a clustering analysis.
|
471
|
+
ids2 : :obj:`list` or None, optional
|
472
|
+
Second subset of studies, representing "unselected" studies. If None,
|
473
|
+
then all studies in Dataset **not** in
|
474
|
+
``ids`` will be used.
|
475
|
+
|
476
|
+
Returns
|
477
|
+
-------
|
478
|
+
results : :class:`pandas.DataFrame`
|
479
|
+
Table with each label and the following values associated with each
|
480
|
+
label: 'pForward', 'zForward', 'probForward', 'pReverse', 'zReverse',
|
481
|
+
and 'probReverse'.
|
482
|
+
"""
|
483
|
+
results = neurosynth_decode(
|
484
|
+
self.inputs_["coordinates"],
|
485
|
+
self.inputs_["annotations"],
|
486
|
+
ids=ids,
|
487
|
+
ids2=ids2,
|
488
|
+
features=self.features_,
|
489
|
+
frequency_threshold=self.frequency_threshold,
|
490
|
+
prior=self.prior,
|
491
|
+
u=self.u,
|
492
|
+
correction=self.correction,
|
493
|
+
)
|
494
|
+
return results
|
495
|
+
|
496
|
+
|
497
|
+
def neurosynth_decode(
|
498
|
+
coordinates,
|
499
|
+
annotations,
|
500
|
+
ids,
|
501
|
+
ids2=None,
|
502
|
+
feature_group=None,
|
503
|
+
features=None,
|
504
|
+
frequency_threshold=0.001,
|
505
|
+
prior=0.5,
|
506
|
+
u=0.05,
|
507
|
+
correction="fdr_bh",
|
508
|
+
):
|
509
|
+
"""Perform discrete functional decoding according to Neurosynth's meta-analytic method.
|
510
|
+
|
511
|
+
This does not employ correlations between unthresholded maps, which are the
|
512
|
+
method of choice for decoding within Neurosynth and Neurovault.
|
513
|
+
Metadata (i.e., feature labels) for studies within the selected sample
|
514
|
+
(`ids`) are compared to the unselected studies remaining in the database
|
515
|
+
(`dataset`).
|
516
|
+
|
517
|
+
Neurosynth was described in :footcite:t:`yarkoni2011large`.
|
518
|
+
|
519
|
+
Parameters
|
520
|
+
----------
|
521
|
+
coordinates : :class:`pandas.DataFrame`
|
522
|
+
DataFrame containing coordinates. Must include a column named 'id' and
|
523
|
+
must have a separate row for each reported peak coordinate for each
|
524
|
+
study (i.e., there are multiple rows per ID).
|
525
|
+
IDs from ``coordinates`` must match those from ``annotations``.
|
526
|
+
annotations : :class:`pandas.DataFrame`
|
527
|
+
DataFrame containing labels. Must include a column named 'id' and each
|
528
|
+
row must correspond to a study. Other columns may correspond to
|
529
|
+
individual labels.
|
530
|
+
IDs from ``annotations`` must match those from ``coordinates``.
|
531
|
+
ids : :obj:`list`
|
532
|
+
Subset of studies in coordinates/annotations dataframes indicating
|
533
|
+
target for decoding. Examples include studies reporting at least one
|
534
|
+
peak in an ROI, or studies selected from a clustering analysis.
|
535
|
+
ids2 : :obj:`list` or None, optional
|
536
|
+
Second subset of studies, representing "unselected" studies. If None,
|
537
|
+
then all studies in coordinates/annotations dataframes **not** in
|
538
|
+
``ids`` will be used.
|
539
|
+
features : :obj:`list`, optional
|
540
|
+
List of features in dataset annotations to use for decoding.
|
541
|
+
Default is None, which uses all features available.
|
542
|
+
frequency_threshold : :obj:`float`, optional
|
543
|
+
Threshold to apply to dataset annotations. Values greater than or
|
544
|
+
equal to the threshold as assigned as label+, while values below
|
545
|
+
the threshold are considered label-. Default is 0.001.
|
546
|
+
prior : :obj:`float`, optional
|
547
|
+
Uniform prior probability of each label being active in a study in
|
548
|
+
the absence of evidence (labels or selection) from the study.
|
549
|
+
Default is 0.5 (50%).
|
550
|
+
u : :obj:`float`, optional
|
551
|
+
Alpha level for multiple comparisons correction. Default is 0.05.
|
552
|
+
correction : {None, "bh", "by", "bonferroni"}, optional
|
553
|
+
Multiple comparisons correction method to apply.
|
554
|
+
Default is 'bh' (Benjamini-Hochberg FDR correction).
|
555
|
+
|
556
|
+
Returns
|
557
|
+
-------
|
558
|
+
out_df : :class:`pandas.DataFrame`
|
559
|
+
Table with each label and the following values associated with each
|
560
|
+
label: 'pForward', 'zForward', 'probForward', 'pReverse', 'zReverse',
|
561
|
+
and 'probReverse'.
|
562
|
+
|
563
|
+
See Also
|
564
|
+
--------
|
565
|
+
:class:`~nimare.decode.discrete.NeurosynthDecoder`: The associated class for this method.
|
566
|
+
:func:`~nimare.decode.continuous.CorrelationDecoder`: The correlation-based decoding
|
567
|
+
method employed in Neurosynth and NeuroVault.
|
568
|
+
|
569
|
+
References
|
570
|
+
----------
|
571
|
+
.. footbibliography::
|
572
|
+
"""
|
573
|
+
dataset_ids = sorted(list(set(coordinates["id"].values)))
|
574
|
+
if ids2 is None:
|
575
|
+
unselected = sorted(list(set(dataset_ids) - set(ids)))
|
576
|
+
else:
|
577
|
+
unselected = ids2[:]
|
578
|
+
|
579
|
+
# Binarize with frequency threshold
|
580
|
+
features_df = annotations.set_index("id", drop=True)
|
581
|
+
features_df = features_df[features].ge(frequency_threshold)
|
582
|
+
|
583
|
+
sel_array = features_df.loc[ids].values
|
584
|
+
unsel_array = features_df.loc[unselected].values
|
585
|
+
|
586
|
+
n_selected = len(ids)
|
587
|
+
n_unselected = len(unselected)
|
588
|
+
|
589
|
+
n_selected_term = np.sum(sel_array, axis=0)
|
590
|
+
n_unselected_term = np.sum(unsel_array, axis=0)
|
591
|
+
|
592
|
+
n_selected_noterm = n_selected - n_selected_term
|
593
|
+
n_unselected_noterm = n_unselected - n_unselected_term
|
594
|
+
|
595
|
+
n_term = n_selected_term + n_unselected_term
|
596
|
+
n_noterm = n_selected_noterm + n_unselected_noterm
|
597
|
+
|
598
|
+
p_term = n_term / (n_term + n_noterm)
|
599
|
+
|
600
|
+
p_selected_g_term = n_selected_term / n_term
|
601
|
+
p_selected_g_noterm = n_selected_noterm / n_noterm
|
602
|
+
|
603
|
+
# Recompute conditions with empirically derived prior (or inputted one)
|
604
|
+
if prior is None:
|
605
|
+
# if this is used, p_term_g_selected_prior = p_selected (regardless of term)
|
606
|
+
prior = p_term
|
607
|
+
|
608
|
+
# Significance testing
|
609
|
+
# One-way chi-square test for uniformity of term frequency across terms
|
610
|
+
chi2_fi = one_way(n_selected_term, n_term)
|
611
|
+
p_fi = special.chdtrc(1, chi2_fi)
|
612
|
+
sign_fi = np.sign(
|
613
|
+
n_selected_term - np.mean(n_selected_term)
|
614
|
+
).ravel() # pylint: disable=no-member
|
615
|
+
|
616
|
+
# Two-way chi-square test for association
|
617
|
+
cells = np.array(
|
618
|
+
[
|
619
|
+
[n_selected_term, n_selected_noterm], # pylint: disable=no-member
|
620
|
+
[n_unselected_term, n_unselected_noterm],
|
621
|
+
]
|
622
|
+
).T
|
623
|
+
chi2_ri = two_way(cells)
|
624
|
+
p_ri = special.chdtrc(1, chi2_ri)
|
625
|
+
sign_ri = np.sign(p_selected_g_term - p_selected_g_noterm).ravel() # pylint: disable=no-member
|
626
|
+
|
627
|
+
# Multiple comparisons correction across terms. Separately done for FI and RI.
|
628
|
+
if correction in ("bh", "by"):
|
629
|
+
p_corr_fi = fdr(p_fi, alpha=u, method=correction)
|
630
|
+
p_corr_ri = fdr(p_ri, alpha=u, method=correction)
|
631
|
+
elif correction == "bonferroni":
|
632
|
+
p_corr_fi = bonferroni(p_fi)
|
633
|
+
p_corr_ri = bonferroni(p_ri)
|
634
|
+
else:
|
635
|
+
p_corr_fi = p_fi
|
636
|
+
p_corr_ri = p_ri
|
637
|
+
|
638
|
+
# Compute z-values
|
639
|
+
z_corr_fi = p_to_z(p_corr_fi, "two") * sign_fi
|
640
|
+
z_corr_ri = p_to_z(p_corr_ri, "two") * sign_ri
|
641
|
+
|
642
|
+
# Effect size
|
643
|
+
# est. prob. of brain state described by term finding activation in ROI
|
644
|
+
p_selected_g_term_g_prior = prior * p_selected_g_term + (1 - prior) * p_selected_g_noterm
|
645
|
+
|
646
|
+
# est. prob. of activation in ROI reflecting brain state described by term
|
647
|
+
p_term_g_selected_g_prior = p_selected_g_term * prior / p_selected_g_term_g_prior
|
648
|
+
|
649
|
+
arr = np.array(
|
650
|
+
[
|
651
|
+
p_corr_fi,
|
652
|
+
z_corr_fi,
|
653
|
+
p_selected_g_term_g_prior, # pylint: disable=no-member
|
654
|
+
p_corr_ri,
|
655
|
+
z_corr_ri,
|
656
|
+
p_term_g_selected_g_prior,
|
657
|
+
]
|
658
|
+
).T
|
659
|
+
|
660
|
+
out_df = pd.DataFrame(
|
661
|
+
data=arr,
|
662
|
+
index=features,
|
663
|
+
columns=["pForward", "zForward", "probForward", "pReverse", "zReverse", "probReverse"],
|
664
|
+
)
|
665
|
+
out_df.index.name = "Term"
|
666
|
+
return out_df
|
667
|
+
|
668
|
+
|
669
|
+
class ROIAssociationDecoder(Decoder):
|
670
|
+
"""Perform discrete functional decoding according to Neurosynth's ROI association method.
|
671
|
+
|
672
|
+
Neurosynth was described in :footcite:t:`yarkoni2011large`.
|
673
|
+
|
674
|
+
Parameters
|
675
|
+
----------
|
676
|
+
masker : :class:`~nilearn.input_data.NiftiMasker`, img_like, or similar
|
677
|
+
Masker for region of interest.
|
678
|
+
kernel_transformer : :obj:`~nimare.meta.kernel.KernelTransformer`, optional
|
679
|
+
Kernel with which to create modeled activation maps. Default is MKDAKernel.
|
680
|
+
feature_group : :obj:`str`, optional
|
681
|
+
Feature group name used to select labels from a specific source.
|
682
|
+
Feature groups are stored as prefixes to feature name columns in
|
683
|
+
Dataset.annotations, with the format ``[source]_[valuetype]__``.
|
684
|
+
Input may or may not include the trailing underscore.
|
685
|
+
Default is None, which uses all feature groups available.
|
686
|
+
features : :obj:`list`, optional
|
687
|
+
List of features in dataset annotations to use for decoding.
|
688
|
+
If feature_group is provided, then features should not include the feature group prefix.
|
689
|
+
If feature_group is *not* provided, then features *should* include the prefix.
|
690
|
+
Default is None, which uses all features available.
|
691
|
+
|
692
|
+
Notes
|
693
|
+
-----
|
694
|
+
The general approach in this method is:
|
695
|
+
|
696
|
+
1. Define ROI.
|
697
|
+
2. Generate MA maps for all studies in Dataset.
|
698
|
+
3. Average MA values within ROI to get study-wise MA regressor.
|
699
|
+
4. Correlate MA regressor with study-wise annotation values (e.g., tf-idf values).
|
700
|
+
|
701
|
+
References
|
702
|
+
----------
|
703
|
+
.. footbibliography::
|
704
|
+
"""
|
705
|
+
|
706
|
+
_required_inputs = {
|
707
|
+
"coordinates": ("coordinates", None),
|
708
|
+
"annotations": ("annotations", None),
|
709
|
+
}
|
710
|
+
|
711
|
+
def __init__(
|
712
|
+
self,
|
713
|
+
masker,
|
714
|
+
kernel_transformer=MKDAKernel,
|
715
|
+
feature_group=None,
|
716
|
+
features=None,
|
717
|
+
**kwargs,
|
718
|
+
):
|
719
|
+
self.masker = get_masker(masker)
|
720
|
+
|
721
|
+
# Get kernel transformer
|
722
|
+
kernel_args = {
|
723
|
+
k.split("kernel__")[1]: v for k, v in kwargs.items() if k.startswith("kernel__")
|
724
|
+
}
|
725
|
+
kernel_transformer = _check_type(kernel_transformer, KernelTransformer, **kernel_args)
|
726
|
+
self.kernel_transformer = kernel_transformer
|
727
|
+
|
728
|
+
self.feature_group = feature_group
|
729
|
+
self.features = features
|
730
|
+
self.frequency_threshold = 0
|
731
|
+
|
732
|
+
def _fit(self, dataset):
|
733
|
+
roi_values = self.kernel_transformer.transform(
|
734
|
+
self.inputs_["coordinates"],
|
735
|
+
self.masker,
|
736
|
+
return_type="array",
|
737
|
+
)
|
738
|
+
self.roi_values_ = roi_values.mean(axis=1)
|
739
|
+
|
740
|
+
def transform(self):
|
741
|
+
"""Apply the decoding method to a Dataset.
|
742
|
+
|
743
|
+
Returns
|
744
|
+
-------
|
745
|
+
results : :class:`pandas.DataFrame`
|
746
|
+
Table with each label and the following values associated with each
|
747
|
+
label: 'r'.
|
748
|
+
"""
|
749
|
+
feature_values = self.inputs_["annotations"][self.features_].values
|
750
|
+
corrs = pearson(self.roi_values_, feature_values.T)
|
751
|
+
out_df = pd.DataFrame(index=self.features_, columns=["r"], data=corrs)
|
752
|
+
out_df.index.name = "feature"
|
753
|
+
return out_df
|