nimare 0.4.2rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. benchmarks/__init__.py +0 -0
  2. benchmarks/bench_cbma.py +57 -0
  3. nimare/__init__.py +45 -0
  4. nimare/_version.py +21 -0
  5. nimare/annotate/__init__.py +21 -0
  6. nimare/annotate/cogat.py +213 -0
  7. nimare/annotate/gclda.py +924 -0
  8. nimare/annotate/lda.py +147 -0
  9. nimare/annotate/text.py +75 -0
  10. nimare/annotate/utils.py +87 -0
  11. nimare/base.py +217 -0
  12. nimare/cli.py +124 -0
  13. nimare/correct.py +462 -0
  14. nimare/dataset.py +685 -0
  15. nimare/decode/__init__.py +33 -0
  16. nimare/decode/base.py +115 -0
  17. nimare/decode/continuous.py +462 -0
  18. nimare/decode/discrete.py +753 -0
  19. nimare/decode/encode.py +110 -0
  20. nimare/decode/utils.py +44 -0
  21. nimare/diagnostics.py +510 -0
  22. nimare/estimator.py +139 -0
  23. nimare/extract/__init__.py +19 -0
  24. nimare/extract/extract.py +466 -0
  25. nimare/extract/utils.py +295 -0
  26. nimare/generate.py +331 -0
  27. nimare/io.py +635 -0
  28. nimare/meta/__init__.py +39 -0
  29. nimare/meta/cbma/__init__.py +6 -0
  30. nimare/meta/cbma/ale.py +951 -0
  31. nimare/meta/cbma/base.py +947 -0
  32. nimare/meta/cbma/mkda.py +1361 -0
  33. nimare/meta/cbmr.py +970 -0
  34. nimare/meta/ibma.py +1683 -0
  35. nimare/meta/kernel.py +501 -0
  36. nimare/meta/models.py +1199 -0
  37. nimare/meta/utils.py +494 -0
  38. nimare/nimads.py +492 -0
  39. nimare/reports/__init__.py +24 -0
  40. nimare/reports/base.py +664 -0
  41. nimare/reports/default.yml +123 -0
  42. nimare/reports/figures.py +651 -0
  43. nimare/reports/report.tpl +160 -0
  44. nimare/resources/__init__.py +1 -0
  45. nimare/resources/atlases/Harvard-Oxford-LICENSE +93 -0
  46. nimare/resources/atlases/HarvardOxford-cort-maxprob-thr25-2mm.nii.gz +0 -0
  47. nimare/resources/database_file_manifest.json +142 -0
  48. nimare/resources/english_spellings.csv +1738 -0
  49. nimare/resources/filenames.json +32 -0
  50. nimare/resources/neurosynth_laird_studies.json +58773 -0
  51. nimare/resources/neurosynth_stoplist.txt +396 -0
  52. nimare/resources/nidm_pain_dset.json +1349 -0
  53. nimare/resources/references.bib +541 -0
  54. nimare/resources/semantic_knowledge_children.txt +325 -0
  55. nimare/resources/semantic_relatedness_children.txt +249 -0
  56. nimare/resources/templates/MNI152_2x2x2_brainmask.nii.gz +0 -0
  57. nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_T1w.nii.gz +0 -0
  58. nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz +0 -0
  59. nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_T1w.nii.gz +0 -0
  60. nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_desc-brain_mask.nii.gz +0 -0
  61. nimare/results.py +225 -0
  62. nimare/stats.py +276 -0
  63. nimare/tests/__init__.py +1 -0
  64. nimare/tests/conftest.py +229 -0
  65. nimare/tests/data/amygdala_roi.nii.gz +0 -0
  66. nimare/tests/data/data-neurosynth_version-7_coordinates.tsv.gz +0 -0
  67. nimare/tests/data/data-neurosynth_version-7_metadata.tsv.gz +0 -0
  68. nimare/tests/data/data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz +0 -0
  69. nimare/tests/data/data-neurosynth_version-7_vocab-terms_vocabulary.txt +100 -0
  70. nimare/tests/data/neurosynth_dset.json +2868 -0
  71. nimare/tests/data/neurosynth_laird_studies.json +58773 -0
  72. nimare/tests/data/nidm_pain_dset.json +1349 -0
  73. nimare/tests/data/nimads_annotation.json +1 -0
  74. nimare/tests/data/nimads_studyset.json +1 -0
  75. nimare/tests/data/test_baseline.txt +2 -0
  76. nimare/tests/data/test_pain_dataset.json +1278 -0
  77. nimare/tests/data/test_pain_dataset_multiple_contrasts.json +1242 -0
  78. nimare/tests/data/test_sleuth_file.txt +18 -0
  79. nimare/tests/data/test_sleuth_file2.txt +10 -0
  80. nimare/tests/data/test_sleuth_file3.txt +5 -0
  81. nimare/tests/data/test_sleuth_file4.txt +5 -0
  82. nimare/tests/data/test_sleuth_file5.txt +5 -0
  83. nimare/tests/test_annotate_cogat.py +32 -0
  84. nimare/tests/test_annotate_gclda.py +86 -0
  85. nimare/tests/test_annotate_lda.py +27 -0
  86. nimare/tests/test_dataset.py +99 -0
  87. nimare/tests/test_decode_continuous.py +132 -0
  88. nimare/tests/test_decode_discrete.py +92 -0
  89. nimare/tests/test_diagnostics.py +168 -0
  90. nimare/tests/test_estimator_performance.py +385 -0
  91. nimare/tests/test_extract.py +46 -0
  92. nimare/tests/test_generate.py +247 -0
  93. nimare/tests/test_io.py +240 -0
  94. nimare/tests/test_meta_ale.py +298 -0
  95. nimare/tests/test_meta_cbmr.py +295 -0
  96. nimare/tests/test_meta_ibma.py +240 -0
  97. nimare/tests/test_meta_kernel.py +209 -0
  98. nimare/tests/test_meta_mkda.py +234 -0
  99. nimare/tests/test_nimads.py +21 -0
  100. nimare/tests/test_reports.py +110 -0
  101. nimare/tests/test_stats.py +101 -0
  102. nimare/tests/test_transforms.py +272 -0
  103. nimare/tests/test_utils.py +200 -0
  104. nimare/tests/test_workflows.py +221 -0
  105. nimare/tests/utils.py +126 -0
  106. nimare/transforms.py +907 -0
  107. nimare/utils.py +1367 -0
  108. nimare/workflows/__init__.py +14 -0
  109. nimare/workflows/base.py +189 -0
  110. nimare/workflows/cbma.py +165 -0
  111. nimare/workflows/ibma.py +108 -0
  112. nimare/workflows/macm.py +77 -0
  113. nimare/workflows/misc.py +65 -0
  114. nimare-0.4.2rc4.dist-info/LICENSE +21 -0
  115. nimare-0.4.2rc4.dist-info/METADATA +124 -0
  116. nimare-0.4.2rc4.dist-info/RECORD +119 -0
  117. nimare-0.4.2rc4.dist-info/WHEEL +5 -0
  118. nimare-0.4.2rc4.dist-info/entry_points.txt +2 -0
  119. nimare-0.4.2rc4.dist-info/top_level.txt +2 -0
@@ -0,0 +1,753 @@
1
+ """Methods for decoding subsets of voxels or experiments into text."""
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from nilearn._utils import load_niimg
6
+ from pymare.stats import bonferroni, fdr
7
+ from scipy import special
8
+ from scipy.stats import binom
9
+
10
+ from nimare.decode.base import Decoder
11
+ from nimare.decode.utils import weight_priors
12
+ from nimare.meta.kernel import KernelTransformer, MKDAKernel
13
+ from nimare.stats import one_way, pearson, two_way
14
+ from nimare.transforms import p_to_z
15
+ from nimare.utils import _check_type, get_masker
16
+
17
+
18
+ def gclda_decode_roi(model, roi, topic_priors=None, prior_weight=1.0):
19
+ r"""Perform image-to-text decoding for discrete inputs using method from Rubin et al. (2017).
20
+
21
+ The method used in this function was originally described in :footcite:t:`rubin2017decoding`.
22
+
23
+ Parameters
24
+ ----------
25
+ model : :obj:`~nimare.annotate.gclda.GCLDAModel`
26
+ Model object needed for decoding.
27
+ roi : :obj:`nibabel.nifti1.Nifti1Image` or :obj:`str`
28
+ Binary image to decode into text. If string, path to a file with
29
+ the binary image.
30
+ topic_priors : :obj:`numpy.ndarray` of :obj:`float`, optional
31
+ A 1d array of size (n_topics) with values for topic weighting.
32
+ If None, no weighting is done. Default is None.
33
+ prior_weight : :obj:`float`, optional
34
+ The weight by which the prior will affect the decoding.
35
+ Default is 1.
36
+
37
+ Returns
38
+ -------
39
+ decoded_df : :obj:`pandas.DataFrame`
40
+ A DataFrame with the word-tokens and their associated weights.
41
+ topic_weights : :obj:`numpy.ndarray` of :obj:`float`
42
+ The weights of the topics used in decoding.
43
+
44
+ Notes
45
+ -----
46
+ ====================== ==============================================================
47
+ Notation Meaning
48
+ ====================== ==============================================================
49
+ :math:`v` Voxel
50
+ :math:`t` Topic
51
+ :math:`w` Word type
52
+ :math:`r` Region of interest (ROI)
53
+ :math:`p(v|t)` Probability of topic given voxel (``p_topic_g_voxel``)
54
+ :math:`\\tau_{t}` Topic weight vector (``topic_weights``)
55
+ :math:`p(w|t)` Probability of word type given topic (``p_word_g_topic``)
56
+ ====================== ==============================================================
57
+
58
+ 1. Compute :math:`p(v|t)`.
59
+
60
+ - From :func:`gclda.model.Model.get_spatial_probs()`
61
+
62
+ 2. Compute topic weight vector (:math:`\\tau_{t}`) by adding across voxels within ROI.
63
+
64
+ - :math:`\\tau_{t} = \sum_{i} {p(t|v_{i})}`
65
+
66
+ 3. Multiply :math:`\\tau_{t}` by :math:`p(w|t)`.
67
+
68
+ - :math:`p(w|r) \propto \\tau_{t} \cdot p(w|t)`
69
+
70
+ 4. The resulting vector (``word_weights``) reflects arbitrarily scaled term weights for the
71
+ ROI.
72
+
73
+ See Also
74
+ --------
75
+ :class:`~nimare.annotate.gclda.GCLDAModel`
76
+ :func:`~nimare.decode.continuous.gclda_decode_map`
77
+ :func:`~nimare.decode.encode.gclda_encode`
78
+
79
+ References
80
+ ----------
81
+ .. footbibliography::
82
+ """
83
+ roi = load_niimg(roi)
84
+
85
+ dset_aff = model.mask.affine
86
+ if not np.array_equal(roi.affine, dset_aff):
87
+ raise ValueError(
88
+ "Input roi must have same affine as mask img:\n"
89
+ f"{np.array2string(roi.affine)}\n{np.array2string(dset_aff)}"
90
+ )
91
+
92
+ # Load ROI file and get ROI voxels overlapping with brain mask
93
+ mask_vec = model.mask.get_fdata().ravel().astype(bool)
94
+ roi_vec = roi.get_fdata().astype(bool).ravel()
95
+ roi_vec = roi_vec[mask_vec]
96
+ roi_idx = np.where(roi_vec)[0]
97
+ p_topic_g_roi = model.p_topic_g_voxel_[roi_idx, :] # p(T|V) for voxels in ROI only
98
+ topic_weights = np.sum(p_topic_g_roi, axis=0) # Sum across words
99
+ if topic_priors is not None:
100
+ weighted_priors = weight_priors(topic_priors, prior_weight)
101
+ topic_weights *= weighted_priors
102
+
103
+ # Multiply topic_weights by topic-by-word matrix (p_word_g_topic).
104
+ # n_word_tokens_per_topic = np.sum(model.n_word_tokens_word_by_topic, axis=0)
105
+ # p_word_g_topic = model.n_word_tokens_word_by_topic / n_word_tokens_per_topic[None, :]
106
+ # p_word_g_topic = np.nan_to_num(p_word_g_topic, 0)
107
+ word_weights = np.dot(model.p_word_g_topic_, topic_weights)
108
+
109
+ decoded_df = pd.DataFrame(index=model.vocabulary, columns=["Weight"], data=word_weights)
110
+ decoded_df.index.name = "Term"
111
+ return decoded_df, topic_weights
112
+
113
+
114
+ class BrainMapDecoder(Decoder):
115
+ """Perform image-to-text decoding for discrete inputs according to the BrainMap method.
116
+
117
+ This method was described in :footcite:t:`amft2015definition`.
118
+
119
+ .. versionadded:: 0.0.3
120
+
121
+ Parameters
122
+ ----------
123
+ feature_group : :obj:`str`, optional
124
+ Feature group name used to select labels from a specific source.
125
+ Feature groups are stored as prefixes to feature name columns in
126
+ Dataset.annotations, with the format ``[source]_[valuetype]__``.
127
+ Input may or may not include the trailing underscore.
128
+ Default is None, which uses all feature groups available.
129
+ features : :obj:`list`, optional
130
+ List of features in dataset annotations to use for decoding.
131
+ If feature_group is provided, then features should not include the
132
+ feature group prefix.
133
+ If feature_group is *not* provided, then features *should* include the
134
+ prefix.
135
+ Default is None, which uses all features available.
136
+ frequency_threshold : :obj:`float`, optional
137
+ Threshold to apply to dataset annotations. Values greater than or
138
+ equal to the threshold as assigned as label+, while values below
139
+ the threshold are considered label-. Default is 0.001.
140
+ u : :obj:`float`, optional
141
+ Alpha level for multiple comparisons correction. Default is 0.05.
142
+ correction : {None, "bh", "by", "bonferroni"}, optional
143
+ Multiple comparisons correction method to apply.
144
+ Default is 'bh' (Benjamini-Hochberg FDR correction).
145
+
146
+ See Also
147
+ --------
148
+ :func:`~nimare.decode.discrete.brainmap_decode`: The associated function for this method.
149
+
150
+ References
151
+ ----------
152
+ .. footbibliography::
153
+ """
154
+
155
+ _required_inputs = {
156
+ "coordinates": ("coordinates", None),
157
+ "annotations": ("annotations", None),
158
+ }
159
+
160
+ def __init__(
161
+ self,
162
+ feature_group=None,
163
+ features=None,
164
+ frequency_threshold=0.001,
165
+ u=0.05,
166
+ correction="fdr_bh",
167
+ ):
168
+ self.feature_group = feature_group
169
+ self.features = features
170
+ self.frequency_threshold = frequency_threshold
171
+ self.u = u
172
+ self.correction = correction
173
+
174
+ def _fit(self, dataset):
175
+ pass
176
+
177
+ def transform(self, ids, ids2=None):
178
+ """Apply the decoding method to a Dataset.
179
+
180
+ Parameters
181
+ ----------
182
+ ids : :obj:`list`
183
+ Subset of studies in coordinates/annotations dataframes indicating
184
+ target for decoding. Examples include studies reporting at least one
185
+ peak in an ROI, or studies selected from a clustering analysis.
186
+ ids2 : :obj:`list` or None, optional
187
+ Second subset of studies, representing "unselected" studies. If None,
188
+ then all studies in coordinates/annotations dataframes **not** in
189
+ ``ids`` will be used.
190
+
191
+ Returns
192
+ -------
193
+ results : :class:`pandas.DataFrame`
194
+ Table with each label and the following values associated with each
195
+ label: 'pForward', 'zForward', 'likelihoodForward', 'pReverse',
196
+ 'zReverse', and 'probReverse'.
197
+ """
198
+ results = brainmap_decode(
199
+ self.inputs_["coordinates"],
200
+ self.inputs_["annotations"],
201
+ ids=ids,
202
+ ids2=ids2,
203
+ features=self.features_,
204
+ frequency_threshold=self.frequency_threshold,
205
+ u=self.u,
206
+ correction=self.correction,
207
+ )
208
+
209
+ return results
210
+
211
+
212
+ def brainmap_decode(
213
+ coordinates,
214
+ annotations,
215
+ ids,
216
+ ids2=None,
217
+ features=None,
218
+ frequency_threshold=0.001,
219
+ u=0.05,
220
+ correction="fdr_bh",
221
+ ):
222
+ """Perform image-to-text decoding for discrete inputs according to the BrainMap method.
223
+
224
+ This method was described in :footcite:t:`amft2015definition`.
225
+
226
+ Parameters
227
+ ----------
228
+ coordinates : :class:`pandas.DataFrame`
229
+ DataFrame containing coordinates. Must include a column named 'id' and
230
+ must have a separate row for each reported peak coordinate for each
231
+ study (i.e., there are multiple rows per ID).
232
+ IDs from ``coordinates`` must match those from ``annotations``.
233
+ annotations : :class:`pandas.DataFrame`
234
+ DataFrame containing labels. Must include a column named 'id' and each
235
+ row must correspond to a study. Other columns may correspond to
236
+ individual labels.
237
+ IDs from ``annotations`` must match those from ``coordinates``.
238
+ ids : :obj:`list`
239
+ Subset of studies in coordinates/annotations dataframes indicating
240
+ target for decoding. Examples include studies reporting at least one
241
+ peak in an ROI, or studies selected from a clustering analysis.
242
+ ids2 : :obj:`list` or None, optional
243
+ Second subset of studies, representing "unselected" studies. If None,
244
+ then all studies in coordinates/annotations dataframes **not** in
245
+ ``ids`` will be used.
246
+ features : :obj:`list`, optional
247
+ List of features in dataset annotations to use for decoding.
248
+ Default is None, which uses all features available.
249
+ frequency_threshold : :obj:`float`, optional
250
+ Threshold to apply to dataset annotations. Values greater than or
251
+ equal to the threshold as assigned as label+, while values below
252
+ the threshold are considered label-. Default is 0.001.
253
+ u : :obj:`float`, optional
254
+ Alpha level for multiple comparisons correction. Default is 0.05.
255
+ correction : {None, "bh", "by", "bonferroni"}, optional
256
+ Multiple comparisons correction method to apply.
257
+ Default is 'bh' (Benjamini-Hochberg FDR correction).
258
+
259
+ Returns
260
+ -------
261
+ out_df : :class:`pandas.DataFrame`
262
+ Table with each label and the following values associated with each
263
+ label: 'pForward', 'zForward', 'likelihoodForward', 'pReverse',
264
+ 'zReverse', and 'probReverse'.
265
+
266
+ See Also
267
+ --------
268
+ :func:`~nimare.decode.discrete.BrainMapDecoder`: The associated class for this method.
269
+
270
+ References
271
+ ----------
272
+ .. footbibliography::
273
+ """
274
+ dataset_ids = sorted(list(set(coordinates["id"].values)))
275
+ if ids2 is None:
276
+ unselected = sorted(list(set(dataset_ids) - set(ids)))
277
+ else:
278
+ unselected = ids2[:]
279
+
280
+ # Binarize with frequency threshold
281
+ features_df = annotations.set_index("id", drop=True)
282
+ features_df = features_df[features].ge(frequency_threshold)
283
+
284
+ sel_array = features_df.loc[ids].values
285
+ unsel_array = features_df.loc[unselected].values
286
+
287
+ n_selected = len(ids)
288
+ n_unselected = len(unselected)
289
+
290
+ # the number of times any term is used (e.g., if one experiment uses
291
+ # two terms, that counts twice). Why though?
292
+ n_exps_across_terms = np.sum(np.sum(features_df))
293
+
294
+ n_selected_term = np.sum(sel_array, axis=0)
295
+ n_unselected_term = np.sum(unsel_array, axis=0)
296
+
297
+ n_selected_noterm = n_selected - n_selected_term
298
+ n_unselected_noterm = n_unselected - n_unselected_term
299
+
300
+ n_term = n_selected_term + n_unselected_term
301
+ p_term = n_term / n_exps_across_terms
302
+
303
+ n_foci_in_database = coordinates.shape[0]
304
+ p_selected = n_selected / n_foci_in_database
305
+
306
+ # I hope there's a way to do this without the for loop
307
+ n_term_foci = np.zeros(len(features))
308
+ n_noterm_foci = np.zeros(len(features))
309
+ for i, term in enumerate(features):
310
+ term_ids = features_df.loc[features_df[term] == 1].index.values
311
+ noterm_ids = features_df.loc[features_df[term] == 0].index.values
312
+ n_term_foci[i] = coordinates["id"].isin(term_ids).sum()
313
+ n_noterm_foci[i] = coordinates["id"].isin(noterm_ids).sum()
314
+
315
+ p_selected_g_term = n_selected_term / n_term_foci # probForward
316
+ l_selected_g_term = p_selected_g_term / p_selected # likelihoodForward
317
+ p_selected_g_noterm = n_selected_noterm / n_noterm_foci
318
+
319
+ p_term_g_selected = p_selected_g_term * p_term / p_selected # probReverse
320
+ p_term_g_selected = p_term_g_selected / np.nansum(p_term_g_selected) # Normalize
321
+
322
+ # Significance testing
323
+ # Forward inference significance is determined with a binomial distribution
324
+ p_fi = 1 - binom.cdf(k=n_selected_term, n=n_term_foci, p=p_selected)
325
+ sign_fi = np.sign(
326
+ n_selected_term - np.mean(n_selected_term)
327
+ ).ravel() # pylint: disable=no-member
328
+
329
+ # Two-way chi-square test for association of activation
330
+ cells = np.array(
331
+ [
332
+ [n_selected_term, n_selected_noterm], # pylint: disable=no-member
333
+ [n_unselected_term, n_unselected_noterm],
334
+ ]
335
+ ).T
336
+ chi2_ri = two_way(cells)
337
+ p_ri = special.chdtrc(1, chi2_ri)
338
+ sign_ri = np.sign(p_selected_g_term - p_selected_g_noterm).ravel() # pylint: disable=no-member
339
+
340
+ # Ignore rare features
341
+ p_fi[n_selected_term < 5] = 1.0
342
+ p_ri[n_selected_term < 5] = 1.0
343
+
344
+ # Multiple comparisons correction across features. Separately done for FI and RI.
345
+ if correction in ("bh", "by"):
346
+ p_corr_fi = fdr(p_fi, alpha=u, method=correction)
347
+ p_corr_ri = fdr(p_ri, alpha=u, method=correction)
348
+ elif correction == "bonferroni":
349
+ p_corr_fi = bonferroni(p_fi)
350
+ p_corr_ri = bonferroni(p_ri)
351
+ else:
352
+ p_corr_fi = p_fi
353
+ p_corr_ri = p_ri
354
+
355
+ # Compute z-values
356
+ z_corr_fi = p_to_z(p_corr_fi, "two") * sign_fi
357
+ z_corr_ri = p_to_z(p_corr_ri, "two") * sign_ri
358
+
359
+ # Effect size
360
+ arr = np.array(
361
+ [
362
+ p_corr_fi,
363
+ z_corr_fi,
364
+ l_selected_g_term, # pylint: disable=no-member
365
+ p_corr_ri,
366
+ z_corr_ri,
367
+ p_term_g_selected,
368
+ ]
369
+ ).T
370
+
371
+ out_df = pd.DataFrame(
372
+ data=arr,
373
+ index=features,
374
+ columns=[
375
+ "pForward",
376
+ "zForward",
377
+ "likelihoodForward",
378
+ "pReverse",
379
+ "zReverse",
380
+ "probReverse",
381
+ ],
382
+ )
383
+ out_df.index.name = "Term"
384
+ return out_df
385
+
386
+
387
+ class NeurosynthDecoder(Decoder):
388
+ """Perform discrete functional decoding according to Neurosynth's meta-analytic method.
389
+
390
+ Neurosynth was described in :footcite:t:`yarkoni2011large`.
391
+
392
+ .. versionadded:: 0.0.3
393
+
394
+ This does not employ correlations between unthresholded maps, which are the
395
+ method of choice for decoding within Neurosynth and Neurovault.
396
+ Metadata (i.e., feature labels) for studies within the selected sample
397
+ (`ids`) are compared to the unselected studies remaining in the database
398
+ (`dataset`).
399
+
400
+ Parameters
401
+ ----------
402
+ feature_group : :obj:`str`, optional
403
+ Feature group name used to select labels from a specific source.
404
+ Feature groups are stored as prefixes to feature name columns in
405
+ Dataset.annotations, with the format ``[source]_[valuetype]__``.
406
+ Input may or may not include the trailing underscore.
407
+ Default is None, which uses all feature groups available.
408
+ features : :obj:`list`, optional
409
+ List of features in dataset annotations to use for decoding.
410
+ If feature_group is provided, then features should not include the
411
+ feature group prefix.
412
+ If feature_group is *not* provided, then features *should* include the
413
+ prefix.
414
+ Default is None, which uses all features available.
415
+ frequency_threshold : :obj:`float`, optional
416
+ Threshold to apply to dataset annotations. Values greater than or
417
+ equal to the threshold as assigned as label+, while values below
418
+ the threshold are considered label-. Default is 0.001.
419
+ prior : :obj:`float`, optional
420
+ Uniform prior probability of each label being active in a study in
421
+ the absence of evidence (labels or selection) from the study.
422
+ Default is 0.5 (50%).
423
+ u : :obj:`float`, optional
424
+ Alpha level for multiple comparisons correction. Default is 0.05.
425
+ correction : {None, "bh", "by", "bonferroni"}, optional
426
+ Multiple comparisons correction method to apply.
427
+ Default is 'bh' (Benjamini-Hochberg FDR correction).
428
+
429
+ See Also
430
+ --------
431
+ :func:`~nimare.decode.discrete.neurosynth_decode`: The associated function for this method.
432
+
433
+ References
434
+ ----------
435
+ .. footbibliography::
436
+ """
437
+
438
+ _required_inputs = {
439
+ "coordinates": ("coordinates", None),
440
+ "annotations": ("annotations", None),
441
+ }
442
+
443
+ def __init__(
444
+ self,
445
+ feature_group=None,
446
+ features=None,
447
+ frequency_threshold=0.001,
448
+ prior=0.5,
449
+ u=0.05,
450
+ correction="fdr_bh",
451
+ ):
452
+ self.feature_group = feature_group
453
+ self.features = features
454
+ self.frequency_threshold = frequency_threshold
455
+ self.prior = prior
456
+ self.u = u
457
+ self.correction = correction
458
+
459
+ def _fit(self, dataset):
460
+ pass
461
+
462
+ def transform(self, ids, ids2=None):
463
+ """Apply the decoding method to a Dataset.
464
+
465
+ Parameters
466
+ ----------
467
+ ids : :obj:`list`
468
+ Subset of studies in coordinates/annotations dataframes indicating
469
+ target for decoding. Examples include studies reporting at least one
470
+ peak in an ROI, or studies selected from a clustering analysis.
471
+ ids2 : :obj:`list` or None, optional
472
+ Second subset of studies, representing "unselected" studies. If None,
473
+ then all studies in Dataset **not** in
474
+ ``ids`` will be used.
475
+
476
+ Returns
477
+ -------
478
+ results : :class:`pandas.DataFrame`
479
+ Table with each label and the following values associated with each
480
+ label: 'pForward', 'zForward', 'probForward', 'pReverse', 'zReverse',
481
+ and 'probReverse'.
482
+ """
483
+ results = neurosynth_decode(
484
+ self.inputs_["coordinates"],
485
+ self.inputs_["annotations"],
486
+ ids=ids,
487
+ ids2=ids2,
488
+ features=self.features_,
489
+ frequency_threshold=self.frequency_threshold,
490
+ prior=self.prior,
491
+ u=self.u,
492
+ correction=self.correction,
493
+ )
494
+ return results
495
+
496
+
497
+ def neurosynth_decode(
498
+ coordinates,
499
+ annotations,
500
+ ids,
501
+ ids2=None,
502
+ feature_group=None,
503
+ features=None,
504
+ frequency_threshold=0.001,
505
+ prior=0.5,
506
+ u=0.05,
507
+ correction="fdr_bh",
508
+ ):
509
+ """Perform discrete functional decoding according to Neurosynth's meta-analytic method.
510
+
511
+ This does not employ correlations between unthresholded maps, which are the
512
+ method of choice for decoding within Neurosynth and Neurovault.
513
+ Metadata (i.e., feature labels) for studies within the selected sample
514
+ (`ids`) are compared to the unselected studies remaining in the database
515
+ (`dataset`).
516
+
517
+ Neurosynth was described in :footcite:t:`yarkoni2011large`.
518
+
519
+ Parameters
520
+ ----------
521
+ coordinates : :class:`pandas.DataFrame`
522
+ DataFrame containing coordinates. Must include a column named 'id' and
523
+ must have a separate row for each reported peak coordinate for each
524
+ study (i.e., there are multiple rows per ID).
525
+ IDs from ``coordinates`` must match those from ``annotations``.
526
+ annotations : :class:`pandas.DataFrame`
527
+ DataFrame containing labels. Must include a column named 'id' and each
528
+ row must correspond to a study. Other columns may correspond to
529
+ individual labels.
530
+ IDs from ``annotations`` must match those from ``coordinates``.
531
+ ids : :obj:`list`
532
+ Subset of studies in coordinates/annotations dataframes indicating
533
+ target for decoding. Examples include studies reporting at least one
534
+ peak in an ROI, or studies selected from a clustering analysis.
535
+ ids2 : :obj:`list` or None, optional
536
+ Second subset of studies, representing "unselected" studies. If None,
537
+ then all studies in coordinates/annotations dataframes **not** in
538
+ ``ids`` will be used.
539
+ features : :obj:`list`, optional
540
+ List of features in dataset annotations to use for decoding.
541
+ Default is None, which uses all features available.
542
+ frequency_threshold : :obj:`float`, optional
543
+ Threshold to apply to dataset annotations. Values greater than or
544
+ equal to the threshold as assigned as label+, while values below
545
+ the threshold are considered label-. Default is 0.001.
546
+ prior : :obj:`float`, optional
547
+ Uniform prior probability of each label being active in a study in
548
+ the absence of evidence (labels or selection) from the study.
549
+ Default is 0.5 (50%).
550
+ u : :obj:`float`, optional
551
+ Alpha level for multiple comparisons correction. Default is 0.05.
552
+ correction : {None, "bh", "by", "bonferroni"}, optional
553
+ Multiple comparisons correction method to apply.
554
+ Default is 'bh' (Benjamini-Hochberg FDR correction).
555
+
556
+ Returns
557
+ -------
558
+ out_df : :class:`pandas.DataFrame`
559
+ Table with each label and the following values associated with each
560
+ label: 'pForward', 'zForward', 'probForward', 'pReverse', 'zReverse',
561
+ and 'probReverse'.
562
+
563
+ See Also
564
+ --------
565
+ :class:`~nimare.decode.discrete.NeurosynthDecoder`: The associated class for this method.
566
+ :func:`~nimare.decode.continuous.CorrelationDecoder`: The correlation-based decoding
567
+ method employed in Neurosynth and NeuroVault.
568
+
569
+ References
570
+ ----------
571
+ .. footbibliography::
572
+ """
573
+ dataset_ids = sorted(list(set(coordinates["id"].values)))
574
+ if ids2 is None:
575
+ unselected = sorted(list(set(dataset_ids) - set(ids)))
576
+ else:
577
+ unselected = ids2[:]
578
+
579
+ # Binarize with frequency threshold
580
+ features_df = annotations.set_index("id", drop=True)
581
+ features_df = features_df[features].ge(frequency_threshold)
582
+
583
+ sel_array = features_df.loc[ids].values
584
+ unsel_array = features_df.loc[unselected].values
585
+
586
+ n_selected = len(ids)
587
+ n_unselected = len(unselected)
588
+
589
+ n_selected_term = np.sum(sel_array, axis=0)
590
+ n_unselected_term = np.sum(unsel_array, axis=0)
591
+
592
+ n_selected_noterm = n_selected - n_selected_term
593
+ n_unselected_noterm = n_unselected - n_unselected_term
594
+
595
+ n_term = n_selected_term + n_unselected_term
596
+ n_noterm = n_selected_noterm + n_unselected_noterm
597
+
598
+ p_term = n_term / (n_term + n_noterm)
599
+
600
+ p_selected_g_term = n_selected_term / n_term
601
+ p_selected_g_noterm = n_selected_noterm / n_noterm
602
+
603
+ # Recompute conditions with empirically derived prior (or inputted one)
604
+ if prior is None:
605
+ # if this is used, p_term_g_selected_prior = p_selected (regardless of term)
606
+ prior = p_term
607
+
608
+ # Significance testing
609
+ # One-way chi-square test for uniformity of term frequency across terms
610
+ chi2_fi = one_way(n_selected_term, n_term)
611
+ p_fi = special.chdtrc(1, chi2_fi)
612
+ sign_fi = np.sign(
613
+ n_selected_term - np.mean(n_selected_term)
614
+ ).ravel() # pylint: disable=no-member
615
+
616
+ # Two-way chi-square test for association
617
+ cells = np.array(
618
+ [
619
+ [n_selected_term, n_selected_noterm], # pylint: disable=no-member
620
+ [n_unselected_term, n_unselected_noterm],
621
+ ]
622
+ ).T
623
+ chi2_ri = two_way(cells)
624
+ p_ri = special.chdtrc(1, chi2_ri)
625
+ sign_ri = np.sign(p_selected_g_term - p_selected_g_noterm).ravel() # pylint: disable=no-member
626
+
627
+ # Multiple comparisons correction across terms. Separately done for FI and RI.
628
+ if correction in ("bh", "by"):
629
+ p_corr_fi = fdr(p_fi, alpha=u, method=correction)
630
+ p_corr_ri = fdr(p_ri, alpha=u, method=correction)
631
+ elif correction == "bonferroni":
632
+ p_corr_fi = bonferroni(p_fi)
633
+ p_corr_ri = bonferroni(p_ri)
634
+ else:
635
+ p_corr_fi = p_fi
636
+ p_corr_ri = p_ri
637
+
638
+ # Compute z-values
639
+ z_corr_fi = p_to_z(p_corr_fi, "two") * sign_fi
640
+ z_corr_ri = p_to_z(p_corr_ri, "two") * sign_ri
641
+
642
+ # Effect size
643
+ # est. prob. of brain state described by term finding activation in ROI
644
+ p_selected_g_term_g_prior = prior * p_selected_g_term + (1 - prior) * p_selected_g_noterm
645
+
646
+ # est. prob. of activation in ROI reflecting brain state described by term
647
+ p_term_g_selected_g_prior = p_selected_g_term * prior / p_selected_g_term_g_prior
648
+
649
+ arr = np.array(
650
+ [
651
+ p_corr_fi,
652
+ z_corr_fi,
653
+ p_selected_g_term_g_prior, # pylint: disable=no-member
654
+ p_corr_ri,
655
+ z_corr_ri,
656
+ p_term_g_selected_g_prior,
657
+ ]
658
+ ).T
659
+
660
+ out_df = pd.DataFrame(
661
+ data=arr,
662
+ index=features,
663
+ columns=["pForward", "zForward", "probForward", "pReverse", "zReverse", "probReverse"],
664
+ )
665
+ out_df.index.name = "Term"
666
+ return out_df
667
+
668
+
669
+ class ROIAssociationDecoder(Decoder):
670
+ """Perform discrete functional decoding according to Neurosynth's ROI association method.
671
+
672
+ Neurosynth was described in :footcite:t:`yarkoni2011large`.
673
+
674
+ Parameters
675
+ ----------
676
+ masker : :class:`~nilearn.input_data.NiftiMasker`, img_like, or similar
677
+ Masker for region of interest.
678
+ kernel_transformer : :obj:`~nimare.meta.kernel.KernelTransformer`, optional
679
+ Kernel with which to create modeled activation maps. Default is MKDAKernel.
680
+ feature_group : :obj:`str`, optional
681
+ Feature group name used to select labels from a specific source.
682
+ Feature groups are stored as prefixes to feature name columns in
683
+ Dataset.annotations, with the format ``[source]_[valuetype]__``.
684
+ Input may or may not include the trailing underscore.
685
+ Default is None, which uses all feature groups available.
686
+ features : :obj:`list`, optional
687
+ List of features in dataset annotations to use for decoding.
688
+ If feature_group is provided, then features should not include the feature group prefix.
689
+ If feature_group is *not* provided, then features *should* include the prefix.
690
+ Default is None, which uses all features available.
691
+
692
+ Notes
693
+ -----
694
+ The general approach in this method is:
695
+
696
+ 1. Define ROI.
697
+ 2. Generate MA maps for all studies in Dataset.
698
+ 3. Average MA values within ROI to get study-wise MA regressor.
699
+ 4. Correlate MA regressor with study-wise annotation values (e.g., tf-idf values).
700
+
701
+ References
702
+ ----------
703
+ .. footbibliography::
704
+ """
705
+
706
+ _required_inputs = {
707
+ "coordinates": ("coordinates", None),
708
+ "annotations": ("annotations", None),
709
+ }
710
+
711
+ def __init__(
712
+ self,
713
+ masker,
714
+ kernel_transformer=MKDAKernel,
715
+ feature_group=None,
716
+ features=None,
717
+ **kwargs,
718
+ ):
719
+ self.masker = get_masker(masker)
720
+
721
+ # Get kernel transformer
722
+ kernel_args = {
723
+ k.split("kernel__")[1]: v for k, v in kwargs.items() if k.startswith("kernel__")
724
+ }
725
+ kernel_transformer = _check_type(kernel_transformer, KernelTransformer, **kernel_args)
726
+ self.kernel_transformer = kernel_transformer
727
+
728
+ self.feature_group = feature_group
729
+ self.features = features
730
+ self.frequency_threshold = 0
731
+
732
+ def _fit(self, dataset):
733
+ roi_values = self.kernel_transformer.transform(
734
+ self.inputs_["coordinates"],
735
+ self.masker,
736
+ return_type="array",
737
+ )
738
+ self.roi_values_ = roi_values.mean(axis=1)
739
+
740
+ def transform(self):
741
+ """Apply the decoding method to a Dataset.
742
+
743
+ Returns
744
+ -------
745
+ results : :class:`pandas.DataFrame`
746
+ Table with each label and the following values associated with each
747
+ label: 'r'.
748
+ """
749
+ feature_values = self.inputs_["annotations"][self.features_].values
750
+ corrs = pearson(self.roi_values_, feature_values.T)
751
+ out_df = pd.DataFrame(index=self.features_, columns=["r"], data=corrs)
752
+ out_df.index.name = "feature"
753
+ return out_df