nimare 0.4.2rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. benchmarks/__init__.py +0 -0
  2. benchmarks/bench_cbma.py +57 -0
  3. nimare/__init__.py +45 -0
  4. nimare/_version.py +21 -0
  5. nimare/annotate/__init__.py +21 -0
  6. nimare/annotate/cogat.py +213 -0
  7. nimare/annotate/gclda.py +924 -0
  8. nimare/annotate/lda.py +147 -0
  9. nimare/annotate/text.py +75 -0
  10. nimare/annotate/utils.py +87 -0
  11. nimare/base.py +217 -0
  12. nimare/cli.py +124 -0
  13. nimare/correct.py +462 -0
  14. nimare/dataset.py +685 -0
  15. nimare/decode/__init__.py +33 -0
  16. nimare/decode/base.py +115 -0
  17. nimare/decode/continuous.py +462 -0
  18. nimare/decode/discrete.py +753 -0
  19. nimare/decode/encode.py +110 -0
  20. nimare/decode/utils.py +44 -0
  21. nimare/diagnostics.py +510 -0
  22. nimare/estimator.py +139 -0
  23. nimare/extract/__init__.py +19 -0
  24. nimare/extract/extract.py +466 -0
  25. nimare/extract/utils.py +295 -0
  26. nimare/generate.py +331 -0
  27. nimare/io.py +635 -0
  28. nimare/meta/__init__.py +39 -0
  29. nimare/meta/cbma/__init__.py +6 -0
  30. nimare/meta/cbma/ale.py +951 -0
  31. nimare/meta/cbma/base.py +947 -0
  32. nimare/meta/cbma/mkda.py +1361 -0
  33. nimare/meta/cbmr.py +970 -0
  34. nimare/meta/ibma.py +1683 -0
  35. nimare/meta/kernel.py +501 -0
  36. nimare/meta/models.py +1199 -0
  37. nimare/meta/utils.py +494 -0
  38. nimare/nimads.py +492 -0
  39. nimare/reports/__init__.py +24 -0
  40. nimare/reports/base.py +664 -0
  41. nimare/reports/default.yml +123 -0
  42. nimare/reports/figures.py +651 -0
  43. nimare/reports/report.tpl +160 -0
  44. nimare/resources/__init__.py +1 -0
  45. nimare/resources/atlases/Harvard-Oxford-LICENSE +93 -0
  46. nimare/resources/atlases/HarvardOxford-cort-maxprob-thr25-2mm.nii.gz +0 -0
  47. nimare/resources/database_file_manifest.json +142 -0
  48. nimare/resources/english_spellings.csv +1738 -0
  49. nimare/resources/filenames.json +32 -0
  50. nimare/resources/neurosynth_laird_studies.json +58773 -0
  51. nimare/resources/neurosynth_stoplist.txt +396 -0
  52. nimare/resources/nidm_pain_dset.json +1349 -0
  53. nimare/resources/references.bib +541 -0
  54. nimare/resources/semantic_knowledge_children.txt +325 -0
  55. nimare/resources/semantic_relatedness_children.txt +249 -0
  56. nimare/resources/templates/MNI152_2x2x2_brainmask.nii.gz +0 -0
  57. nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_T1w.nii.gz +0 -0
  58. nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz +0 -0
  59. nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_T1w.nii.gz +0 -0
  60. nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_desc-brain_mask.nii.gz +0 -0
  61. nimare/results.py +225 -0
  62. nimare/stats.py +276 -0
  63. nimare/tests/__init__.py +1 -0
  64. nimare/tests/conftest.py +229 -0
  65. nimare/tests/data/amygdala_roi.nii.gz +0 -0
  66. nimare/tests/data/data-neurosynth_version-7_coordinates.tsv.gz +0 -0
  67. nimare/tests/data/data-neurosynth_version-7_metadata.tsv.gz +0 -0
  68. nimare/tests/data/data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz +0 -0
  69. nimare/tests/data/data-neurosynth_version-7_vocab-terms_vocabulary.txt +100 -0
  70. nimare/tests/data/neurosynth_dset.json +2868 -0
  71. nimare/tests/data/neurosynth_laird_studies.json +58773 -0
  72. nimare/tests/data/nidm_pain_dset.json +1349 -0
  73. nimare/tests/data/nimads_annotation.json +1 -0
  74. nimare/tests/data/nimads_studyset.json +1 -0
  75. nimare/tests/data/test_baseline.txt +2 -0
  76. nimare/tests/data/test_pain_dataset.json +1278 -0
  77. nimare/tests/data/test_pain_dataset_multiple_contrasts.json +1242 -0
  78. nimare/tests/data/test_sleuth_file.txt +18 -0
  79. nimare/tests/data/test_sleuth_file2.txt +10 -0
  80. nimare/tests/data/test_sleuth_file3.txt +5 -0
  81. nimare/tests/data/test_sleuth_file4.txt +5 -0
  82. nimare/tests/data/test_sleuth_file5.txt +5 -0
  83. nimare/tests/test_annotate_cogat.py +32 -0
  84. nimare/tests/test_annotate_gclda.py +86 -0
  85. nimare/tests/test_annotate_lda.py +27 -0
  86. nimare/tests/test_dataset.py +99 -0
  87. nimare/tests/test_decode_continuous.py +132 -0
  88. nimare/tests/test_decode_discrete.py +92 -0
  89. nimare/tests/test_diagnostics.py +168 -0
  90. nimare/tests/test_estimator_performance.py +385 -0
  91. nimare/tests/test_extract.py +46 -0
  92. nimare/tests/test_generate.py +247 -0
  93. nimare/tests/test_io.py +240 -0
  94. nimare/tests/test_meta_ale.py +298 -0
  95. nimare/tests/test_meta_cbmr.py +295 -0
  96. nimare/tests/test_meta_ibma.py +240 -0
  97. nimare/tests/test_meta_kernel.py +209 -0
  98. nimare/tests/test_meta_mkda.py +234 -0
  99. nimare/tests/test_nimads.py +21 -0
  100. nimare/tests/test_reports.py +110 -0
  101. nimare/tests/test_stats.py +101 -0
  102. nimare/tests/test_transforms.py +272 -0
  103. nimare/tests/test_utils.py +200 -0
  104. nimare/tests/test_workflows.py +221 -0
  105. nimare/tests/utils.py +126 -0
  106. nimare/transforms.py +907 -0
  107. nimare/utils.py +1367 -0
  108. nimare/workflows/__init__.py +14 -0
  109. nimare/workflows/base.py +189 -0
  110. nimare/workflows/cbma.py +165 -0
  111. nimare/workflows/ibma.py +108 -0
  112. nimare/workflows/macm.py +77 -0
  113. nimare/workflows/misc.py +65 -0
  114. nimare-0.4.2rc4.dist-info/LICENSE +21 -0
  115. nimare-0.4.2rc4.dist-info/METADATA +124 -0
  116. nimare-0.4.2rc4.dist-info/RECORD +119 -0
  117. nimare-0.4.2rc4.dist-info/WHEEL +5 -0
  118. nimare-0.4.2rc4.dist-info/entry_points.txt +2 -0
  119. nimare-0.4.2rc4.dist-info/top_level.txt +2 -0
nimare/utils.py ADDED
@@ -0,0 +1,1367 @@
1
+ """Utility functions for NiMARE."""
2
+
3
+ import datetime
4
+ import inspect
5
+ import json
6
+ import logging
7
+ import multiprocessing as mp
8
+ import os
9
+ import os.path as op
10
+ import re
11
+ from functools import wraps
12
+ from tempfile import mkstemp
13
+
14
+ import joblib
15
+ import nibabel as nib
16
+ import numpy as np
17
+ import pandas as pd
18
+ import sparse
19
+ from nilearn.input_data import NiftiMasker
20
+
21
+ LGR = logging.getLogger(__name__)
22
+
23
+
24
+ def _check_ncores(n_cores):
25
+ """Check number of cores used for method.
26
+
27
+ .. versionadded:: 0.0.12
28
+ Moved from Estimator._check_ncores into its own function.
29
+ """
30
+ if n_cores <= 0:
31
+ n_cores = mp.cpu_count()
32
+ elif n_cores > mp.cpu_count():
33
+ LGR.warning(
34
+ f"Desired number of cores ({n_cores}) greater than number "
35
+ f"available ({mp.cpu_count()}). Setting to {mp.cpu_count()}."
36
+ )
37
+ n_cores = mp.cpu_count()
38
+ return n_cores
39
+
40
+
41
+ def get_resource_path():
42
+ """Return the path to general resources, terminated with separator.
43
+
44
+ Resources are kept outside package folder in "datasets".
45
+ Based on function by Yaroslav Halchenko used in Neurosynth Python package.
46
+ """
47
+ return op.abspath(op.join(op.dirname(__file__), "resources") + op.sep)
48
+
49
+
50
+ def get_template(space="mni152_2mm", mask=None):
51
+ """Load template file.
52
+
53
+ .. versionchanged:: 0.0.11
54
+
55
+ - Remove the ``mask="gm"`` option.
56
+ - Replace the nilearn templates with ones downloaded directly from TemplateFlow.
57
+
58
+ Parameters
59
+ ----------
60
+ space : {'mni152_1mm', 'mni152_2mm', 'ale_2mm'}, optional
61
+ Template to load. Default is 'mni152_2mm'.
62
+ The options are:
63
+
64
+ - mni152_1mm: The MNI152NLin6Asym template at 1mm3 resolution,
65
+ downloaded from TemplateFlow. The shape of this template is 182x218x182 voxels.
66
+ - mni152_2mm: The MNI152NLin6Asym template at 2mm3 resolution,
67
+ downloaded from TemplateFlow. The shape of this template is 91x109x91 voxels.
68
+ - ale_2mm: The template used is the MNI152NLin6Asym template at 2mm3 resolution,
69
+ but if ``mask='brain'``, then a brain mask taken from GingerALE will be used.
70
+ The brain mask corresponds to GingerALE's "more conservative" mask.
71
+ The shape of this template is 91x109x91 voxels.
72
+ mask : {None, 'brain'}, optional
73
+ Whether to return the raw T1w template (None) or a brain mask ('brain').
74
+ Default is None.
75
+
76
+ Returns
77
+ -------
78
+ img : :obj:`~nibabel.nifti1.Nifti1Image`
79
+ Template image object.
80
+ """
81
+ template_dir = op.join(get_resource_path(), "templates")
82
+ if space == "mni152_1mm":
83
+ if mask is None:
84
+ img = nib.load(op.join(template_dir, "tpl-MNI152NLin6Asym_res-01_T1w.nii.gz"))
85
+ elif mask == "brain":
86
+ img = nib.load(
87
+ op.join(template_dir, "tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz")
88
+ )
89
+ else:
90
+ raise ValueError(f"Mask option '{mask}' not supported")
91
+ elif space == "mni152_2mm":
92
+ if mask is None:
93
+ img = nib.load(op.join(template_dir, "tpl-MNI152NLin6Asym_res-02_T1w.nii.gz"))
94
+ elif mask == "brain":
95
+ img = nib.load(
96
+ op.join(template_dir, "tpl-MNI152NLin6Asym_res-02_desc-brain_mask.nii.gz")
97
+ )
98
+ else:
99
+ raise ValueError(f"Mask option '{mask}' not supported")
100
+ elif space == "ale_2mm":
101
+ if mask is None:
102
+ img = nib.load(op.join(template_dir, "tpl-MNI152NLin6Asym_res-02_T1w.nii.gz"))
103
+ elif mask == "brain":
104
+ # Not the same as the nilearn brain mask, but should correspond to
105
+ # the default "more conservative" MNI152 mask in GingerALE.
106
+ img = nib.load(op.join(template_dir, "MNI152_2x2x2_brainmask.nii.gz"))
107
+ else:
108
+ raise ValueError(f"Mask option '{mask}' not supported")
109
+ else:
110
+ raise ValueError(f"Space '{space}' not supported")
111
+
112
+ # Coerce to array-image
113
+ img = nib.Nifti1Image(img.get_fdata(), affine=img.affine, header=img.header)
114
+ return img
115
+
116
+
117
+ def get_masker(mask, memory=joblib.Memory(location=None, verbose=0), memory_level=1):
118
+ """Get an initialized, fitted nilearn Masker instance from passed argument.
119
+
120
+ Parameters
121
+ ----------
122
+ mask : str, :class:`nibabel.nifti1.Nifti1Image`, or any nilearn Masker
123
+ memory : instance of :class:`joblib.Memory`, :obj:`str`, or :class:`pathlib.Path`
124
+ Used to cache the output of a function. By default, no caching is done.
125
+ If a :obj:`str` is given, it is the path to the caching directory.
126
+ memory_level : :obj:`int`, default=1
127
+ Rough estimator of the amount of memory used by caching.
128
+ Higher value means more memory for caching. Zero means no caching.
129
+
130
+ Returns
131
+ -------
132
+ masker : an initialized, fitted instance of a subclass of
133
+ `nilearn.input_data.base_masker.BaseMasker`
134
+ """
135
+ if isinstance(mask, str):
136
+ mask = nib.load(mask)
137
+
138
+ if isinstance(mask, nib.nifti1.Nifti1Image):
139
+ # Coerce to array-image
140
+ mask = nib.Nifti1Image(mask.get_fdata(), affine=mask.affine, header=mask.header)
141
+
142
+ mask = NiftiMasker(mask, memory=memory, memory_level=memory_level)
143
+
144
+ if not (hasattr(mask, "transform") and hasattr(mask, "inverse_transform")):
145
+ raise ValueError(
146
+ "mask argument must be a string, a nibabel image, or a Nilearn Masker instance."
147
+ )
148
+
149
+ # Fit the masker if needed
150
+ if not hasattr(mask, "mask_img_"):
151
+ mask.fit()
152
+
153
+ return mask
154
+
155
+
156
+ def vox2mm(ijk, affine):
157
+ """Convert matrix subscripts to coordinates.
158
+
159
+ .. versionchanged:: 0.0.8
160
+
161
+ * [ENH] This function was part of `nimare.transforms` in previous versions (0.0.3-0.0.7)
162
+
163
+ Parameters
164
+ ----------
165
+ ijk : (X, 3) :obj:`numpy.ndarray`
166
+ Matrix subscripts for coordinates being transformed.
167
+ One row for each coordinate, with three columns: i, j, and k.
168
+ affine : (4, 4) :obj:`numpy.ndarray`
169
+ Affine matrix from image.
170
+
171
+ Returns
172
+ -------
173
+ xyz : (X, 3) :obj:`numpy.ndarray`
174
+ Coordinates in image-space.
175
+
176
+ Notes
177
+ -----
178
+ From here:
179
+ http://blog.chrisgorgolewski.org/2014/12/how-to-convert-between-voxel-and-mm.html
180
+ """
181
+ xyz = nib.affines.apply_affine(affine, ijk)
182
+ return xyz
183
+
184
+
185
+ def mm2vox(xyz, affine):
186
+ """Convert coordinates to matrix subscripts.
187
+
188
+ .. versionchanged:: 0.0.8
189
+
190
+ * [ENH] This function was part of `nimare.transforms` in previous versions (0.0.3-0.0.7)
191
+
192
+ Parameters
193
+ ----------
194
+ xyz : (X, 3) :obj:`numpy.ndarray`
195
+ Coordinates in image-space.
196
+ One row for each coordinate, with three columns: x, y, and z.
197
+ affine : (4, 4) :obj:`numpy.ndarray`
198
+ Affine matrix from image.
199
+
200
+ Returns
201
+ -------
202
+ ijk : (X, 3) :obj:`numpy.ndarray`
203
+ Matrix subscripts for coordinates being transformed.
204
+
205
+ Notes
206
+ -----
207
+ From here:
208
+ http://blog.chrisgorgolewski.org/2014/12/how-to-convert-between-voxel-and-mm.html
209
+ """
210
+ ijk = nib.affines.apply_affine(np.linalg.inv(affine), xyz).astype(int)
211
+ return ijk
212
+
213
+
214
+ def tal2mni(coords):
215
+ """Convert coordinates from Talairach space to MNI space.
216
+
217
+ .. versionchanged:: 0.0.8
218
+
219
+ * [ENH] This function was part of `nimare.transforms` in previous versions (0.0.3-0.0.7)
220
+
221
+ Parameters
222
+ ----------
223
+ coords : (X, 3) :obj:`numpy.ndarray`
224
+ Coordinates in Talairach space to convert.
225
+ Each row is a coordinate, with three columns.
226
+
227
+ Returns
228
+ -------
229
+ coords : (X, 3) :obj:`numpy.ndarray`
230
+ Coordinates in MNI space.
231
+ Each row is a coordinate, with three columns.
232
+
233
+ Notes
234
+ -----
235
+ Python version of BrainMap's tal2icbm_other.m.
236
+
237
+ This function converts coordinates from Talairach space to MNI
238
+ space (normalized using templates other than those contained
239
+ in SPM and FSL) using the tal2icbm transform developed and
240
+ validated by Jack Lancaster at the Research Imaging Center in
241
+ San Antonio, Texas.
242
+ http://www3.interscience.wiley.com/cgi-bin/abstract/114104479/ABSTRACT
243
+ """
244
+ # Find which dimensions are of size 3
245
+ shape = np.array(coords.shape)
246
+ if all(shape == 3):
247
+ LGR.info("Input is an ambiguous 3x3 matrix.\nAssuming coords are row vectors (Nx3).")
248
+ use_dim = 1
249
+ elif not any(shape == 3):
250
+ raise AttributeError("Input must be an Nx3 or 3xN matrix.")
251
+ else:
252
+ use_dim = np.where(shape == 3)[0][0]
253
+
254
+ # Transpose if necessary
255
+ if use_dim == 1:
256
+ coords = coords.transpose()
257
+
258
+ # Transformation matrices, different for each software package
259
+ icbm_other = np.array(
260
+ [
261
+ [0.9357, 0.0029, -0.0072, -1.0423],
262
+ [-0.0065, 0.9396, -0.0726, -1.3940],
263
+ [0.0103, 0.0752, 0.8967, 3.6475],
264
+ [0.0000, 0.0000, 0.0000, 1.0000],
265
+ ]
266
+ )
267
+
268
+ # Invert the transformation matrix
269
+ icbm_other = np.linalg.inv(icbm_other)
270
+
271
+ # Apply the transformation matrix
272
+ coords = np.concatenate((coords, np.ones((1, coords.shape[1]))))
273
+ coords = np.dot(icbm_other, coords)
274
+
275
+ # Format the output, transpose if necessary
276
+ out_coords = coords[:3, :]
277
+ if use_dim == 1:
278
+ out_coords = out_coords.transpose()
279
+ return out_coords
280
+
281
+
282
+ def mni2tal(coords):
283
+ """Convert coordinates from MNI space Talairach space.
284
+
285
+ .. versionchanged:: 0.0.8
286
+
287
+ * [ENH] This function was part of `nimare.transforms` in previous versions (0.0.3-0.0.7)
288
+
289
+ Parameters
290
+ ----------
291
+ coords : (X, 3) :obj:`numpy.ndarray`
292
+ Coordinates in MNI space to convert.
293
+ Each row is a coordinate, with three columns.
294
+
295
+ Returns
296
+ -------
297
+ coords : (X, 3) :obj:`numpy.ndarray`
298
+ Coordinates in Talairach space.
299
+ Each row is a coordinate, with three columns.
300
+
301
+ Notes
302
+ -----
303
+ Python version of BrainMap's icbm_other2tal.m.
304
+ This function converts coordinates from MNI space (normalized using
305
+ templates other than those contained in SPM and FSL) to Talairach space
306
+ using the icbm2tal transform developed and validated by Jack Lancaster at
307
+ the Research Imaging Center in San Antonio, Texas.
308
+ http://www3.interscience.wiley.com/cgi-bin/abstract/114104479/ABSTRACT
309
+ """
310
+ # Find which dimensions are of size 3
311
+ shape = np.array(coords.shape)
312
+ if all(shape == 3):
313
+ LGR.info("Input is an ambiguous 3x3 matrix.\nAssuming coords are row vectors (Nx3).")
314
+ use_dim = 1
315
+ elif not any(shape == 3):
316
+ raise AttributeError("Input must be an Nx3 or 3xN matrix.")
317
+ else:
318
+ use_dim = np.where(shape == 3)[0][0]
319
+
320
+ # Transpose if necessary
321
+ if use_dim == 1:
322
+ coords = coords.transpose()
323
+
324
+ # Transformation matrices, different for each software package
325
+ icbm_other = np.array(
326
+ [
327
+ [0.9357, 0.0029, -0.0072, -1.0423],
328
+ [-0.0065, 0.9396, -0.0726, -1.3940],
329
+ [0.0103, 0.0752, 0.8967, 3.6475],
330
+ [0.0000, 0.0000, 0.0000, 1.0000],
331
+ ]
332
+ )
333
+
334
+ # Apply the transformation matrix
335
+ coords = np.concatenate((coords, np.ones((1, coords.shape[1]))))
336
+ coords = np.dot(icbm_other, coords)
337
+
338
+ # Format the output, transpose if necessary
339
+ out_coords = coords[:3, :]
340
+ if use_dim == 1:
341
+ out_coords = out_coords.transpose()
342
+ return out_coords
343
+
344
+
345
+ def _dict_to_df(id_df, data, key="labels"):
346
+ """Load a given data type in NIMADS-format dictionary into DataFrame.
347
+
348
+ Parameters
349
+ ----------
350
+ id_df : :obj:`pandas.DataFrame`
351
+ DataFrame with columns for identifiers. Index is [studyid]-[expid].
352
+ data : :obj:`dict`
353
+ NIMADS-format dictionary storing the raw dataset, from which
354
+ relevant data are loaded into DataFrames.
355
+ key : {'labels', 'metadata', 'text', 'images'}
356
+ Which data type to load.
357
+
358
+ Returns
359
+ -------
360
+ df : :obj:`pandas.DataFrame`
361
+ DataFrame with id columns from id_df and new columns for the
362
+ requested data type.
363
+ """
364
+ exp_dict = {}
365
+ for pid in data.keys():
366
+ for expid in data[pid]["contrasts"].keys():
367
+ exp = data[pid]["contrasts"][expid]
368
+ id_ = f"{pid}-{expid}"
369
+
370
+ if key not in data[pid]["contrasts"][expid].keys():
371
+ continue
372
+ exp_dict[id_] = exp[key]
373
+
374
+ temp_df = pd.DataFrame.from_dict(exp_dict, orient="index")
375
+ df = pd.merge(id_df, temp_df, left_index=True, right_index=True, how="outer")
376
+ df = df.reset_index(drop=True)
377
+ df = df.replace(to_replace="None", value=np.nan)
378
+ # replace nan with none
379
+ df = df.where(pd.notnull(df), None)
380
+ return df
381
+
382
+
383
+ def _dict_to_coordinates(data, masker, space):
384
+ """Load coordinates in NIMADS-format dictionary into DataFrame."""
385
+ # Required columns
386
+ columns = ["id", "study_id", "contrast_id", "x", "y", "z", "space"]
387
+ core_columns = columns.copy() # Used in contrast for loop
388
+
389
+ all_dfs = []
390
+ for pid in data.keys():
391
+ for expid in data[pid]["contrasts"].keys():
392
+ if "coords" not in data[pid]["contrasts"][expid].keys():
393
+ continue
394
+
395
+ exp_columns = core_columns.copy()
396
+ exp = data[pid]["contrasts"][expid]
397
+
398
+ # Required info (ids, x, y, z, space)
399
+ n_coords = len(exp["coords"]["x"])
400
+ rep_id = np.array([[f"{pid}-{expid}", pid, expid]] * n_coords).T
401
+
402
+ space_arr = exp["coords"].get("space")
403
+ space_arr = np.array([space_arr] * n_coords)
404
+ temp_data = np.vstack(
405
+ (
406
+ rep_id,
407
+ np.array(exp["coords"]["x"]),
408
+ np.array(exp["coords"]["y"]),
409
+ np.array(exp["coords"]["z"]),
410
+ space_arr,
411
+ )
412
+ )
413
+
414
+ # Optional information
415
+ for k in list(set(exp["coords"].keys()) - set(core_columns)):
416
+ k_data = exp["coords"][k]
417
+ if not isinstance(k_data, list):
418
+ k_data = np.array([k_data] * n_coords)
419
+ exp_columns.append(k)
420
+
421
+ if k not in columns:
422
+ columns.append(k)
423
+ temp_data = np.vstack((temp_data, k_data))
424
+
425
+ # Place data in list of dataframes to merge
426
+ con_df = pd.DataFrame(temp_data.T, columns=exp_columns)
427
+ all_dfs.append(con_df)
428
+
429
+ if not all_dfs:
430
+ return pd.DataFrame(
431
+ {
432
+ "id": [],
433
+ "study_id": [],
434
+ "contrast_id": [],
435
+ "x": [],
436
+ "y": [],
437
+ "z": [],
438
+ "space": [],
439
+ },
440
+ )
441
+
442
+ df = pd.concat(all_dfs, axis=0, join="outer", sort=False)
443
+ df = df[columns].reset_index(drop=True)
444
+ df = df.replace(to_replace="None", value=np.nan)
445
+ # replace nan with none
446
+ df = df.where(pd.notnull(df), None)
447
+ df[["x", "y", "z"]] = df[["x", "y", "z"]].astype(float)
448
+ df = _transform_coordinates_to_space(df, masker, space)
449
+ return df
450
+
451
+
452
+ def _transform_coordinates_to_space(df, masker, space):
453
+ """Convert xyz coordinates in a DataFrame to ijk indices for a given target space.
454
+
455
+ Parameters
456
+ ----------
457
+ df : :obj:`pandas.DataFrame`
458
+ masker : :class:`~nilearn.input_data.NiftiMasker` or similar
459
+ Masker object defining the space and location of the area of interest
460
+ (e.g., 'brain').
461
+ space : :obj:`str`
462
+ String describing the stereotactic space and resolution of the masker.
463
+
464
+ Returns
465
+ -------
466
+ df : :obj:`pandas.DataFrame`
467
+ DataFrame with IJK columns either added or overwritten.
468
+ """
469
+ # Now to apply transformations!
470
+ if "mni" in space.lower() or "ale" in space.lower():
471
+ transform = {"MNI": None, "TAL": tal2mni, "Talairach": tal2mni}
472
+ elif "tal" in space.lower():
473
+ transform = {"MNI": mni2tal, "TAL": None, "Talairach": None}
474
+ else:
475
+ raise ValueError(f"Unrecognized space: {space}")
476
+
477
+ found_spaces = df["space"].unique()
478
+ for found_space in found_spaces:
479
+ if found_space not in transform.keys():
480
+ LGR.warning(
481
+ f"Not applying transforms to coordinates in unrecognized space '{found_space}'"
482
+ )
483
+ alg = transform.get(found_space, None)
484
+ idx = df["space"] == found_space
485
+ if alg:
486
+ df.loc[idx, ["x", "y", "z"]] = alg(df.loc[idx, ["x", "y", "z"]].values)
487
+ df.loc[idx, "space"] = space
488
+
489
+ return df
490
+
491
+
492
+ def _validate_df(df):
493
+ """Check that an input is a DataFrame and has a column for 'id'."""
494
+ assert isinstance(df, pd.DataFrame)
495
+ assert "id" in df.columns
496
+
497
+
498
+ def _validate_images_df(image_df):
499
+ """Check and update image paths in DataFrame.
500
+
501
+ Parameters
502
+ ----------
503
+ image_df : :class:`pandas.DataFrame`
504
+ DataFrame with one row for each study and one column for each image
505
+ type. Cells contain paths to image files.
506
+
507
+ Returns
508
+ -------
509
+ image_df : :class:`pandas.DataFrame`
510
+ DataFrame with updated paths and columns.
511
+ """
512
+ valid_suffixes = [".brik", ".head", ".nii", ".img", ".hed"]
513
+ id_columns = set(["id", "study_id", "contrast_id"])
514
+ # Find columns in the DataFrame with images
515
+ file_cols = []
516
+ for col in set(image_df.columns) - id_columns:
517
+ vals = [v for v in image_df[col].values if isinstance(v, str)]
518
+ fc = any([any([vs in v for vs in valid_suffixes]) for v in vals])
519
+ if fc:
520
+ file_cols.append(col)
521
+
522
+ # Clean up DataFrame
523
+ # Find out which columns have full paths and which have relative paths
524
+ abs_cols = []
525
+ for col in file_cols:
526
+ files = image_df[col].tolist()
527
+ abspaths = [f == op.abspath(f) for f in files if isinstance(f, str)]
528
+ if all(abspaths):
529
+ abs_cols.append(col)
530
+ elif not any(abspaths):
531
+ if not col.endswith("__relative"):
532
+ image_df = image_df.rename(columns={col: col + "__relative"})
533
+ else:
534
+ raise ValueError(
535
+ f"Mix of absolute and relative paths detected for images in column '{col}'"
536
+ )
537
+
538
+ # Set relative paths from absolute ones
539
+ if len(abs_cols):
540
+ all_files = list(np.ravel(image_df[abs_cols].values))
541
+ all_files = [f for f in all_files if isinstance(f, str)]
542
+
543
+ if len(all_files) == 1:
544
+ # In the odd case where there's only one absolute path
545
+ shared_path = op.dirname(all_files[0]) + op.sep
546
+ else:
547
+ shared_path = _find_stem(all_files)
548
+
549
+ # Get parent *directory* if shared path includes common prefix.
550
+ if not shared_path.endswith(op.sep):
551
+ shared_path = op.dirname(shared_path) + op.sep
552
+ LGR.info(f"Shared path detected: '{shared_path}'")
553
+
554
+ image_df_out = image_df.copy() # To avoid SettingWithCopyWarning
555
+ for abs_col in abs_cols:
556
+ image_df_out[abs_col + "__relative"] = image_df[abs_col].apply(
557
+ lambda x: x.split(shared_path)[1] if isinstance(x, str) else x
558
+ )
559
+
560
+ image_df = image_df_out
561
+
562
+ return image_df
563
+
564
+
565
+ def _listify(obj):
566
+ """Wrap all non-list or tuple objects in a list.
567
+
568
+ This provides a simple way to accept flexible arguments.
569
+ """
570
+ return obj if isinstance(obj, (list, tuple, type(None), np.ndarray)) else [obj]
571
+
572
+
573
+ def _round2(ndarray):
574
+ """Round X.5 to the nearest integer away from zero.
575
+
576
+ Numpy rounds X.5 values to nearest even integer.
577
+ """
578
+ onedarray = ndarray.flatten()
579
+ signs = np.sign(onedarray) # pylint: disable=no-member
580
+ idx = np.where(np.abs(onedarray - np.round(onedarray)) == 0.5)[0]
581
+ x = np.abs(onedarray)
582
+ y = np.round(x)
583
+ y[idx] = np.ceil(x[idx])
584
+ y *= signs
585
+ rounded = y.reshape(ndarray.shape)
586
+ return rounded.astype(int)
587
+
588
+
589
+ def _try_prepend(value, prefix):
590
+ """Try to prepend a value to a string with a separator ('/').
591
+
592
+ If not a string, will just return the original value.
593
+ """
594
+ if isinstance(value, str):
595
+ return op.join(prefix, value)
596
+ else:
597
+ return value
598
+
599
+
600
+ def _find_stem(arr):
601
+ """Find longest common substring in array of strings.
602
+
603
+ From https://www.geeksforgeeks.org/longest-common-substring-array-strings/
604
+ """
605
+ # Determine size of the array
606
+ n_items_in_array = len(arr)
607
+
608
+ # Take first word from array as reference
609
+ reference_string = arr[0]
610
+ n_chars_in_first_item = len(reference_string)
611
+
612
+ res = ""
613
+ for i_char in range(n_chars_in_first_item):
614
+ # Generate all starting substrings of our reference string
615
+ stem = reference_string[:i_char]
616
+
617
+ j_item = 1 # Retained in case of an array with only one item
618
+ for j_item in range(1, n_items_in_array):
619
+ # Check if the generated stem is common to to all words
620
+ if not arr[j_item].startswith(stem):
621
+ break
622
+
623
+ # If current substring is present in all strings and its length is
624
+ # greater than current result
625
+ if (j_item + 1 == n_items_in_array) and (len(res) < len(stem)):
626
+ res = stem
627
+
628
+ return res
629
+
630
+
631
+ def _uk_to_us(text):
632
+ """Convert UK spellings to US based on a converter.
633
+
634
+ .. versionadded:: 0.0.2
635
+
636
+ Parameters
637
+ ----------
638
+ text : :obj:`str`
639
+
640
+ Returns
641
+ -------
642
+ text : :obj:`str`
643
+
644
+ Notes
645
+ -----
646
+ The english_spellings.csv file is from http://www.tysto.com/uk-us-spelling-list.html.
647
+ """
648
+ SPELL_DF = pd.read_csv(op.join(get_resource_path(), "english_spellings.csv"), index_col="UK")
649
+ SPELL_DICT = SPELL_DF["US"].to_dict()
650
+
651
+ if isinstance(text, str):
652
+ # Convert British to American English
653
+ pattern = re.compile(r"\b(" + "|".join(SPELL_DICT.keys()) + r")\b")
654
+ text = pattern.sub(lambda x: SPELL_DICT[x.group()], text)
655
+ return text
656
+
657
+
658
+ def use_memmap(logger, n_files=1):
659
+ """Memory-map array to a file, and perform cleanup after.
660
+
661
+ .. versionadded:: 0.0.8
662
+
663
+ Parameters
664
+ ----------
665
+ logger : :obj:`logging.Logger`
666
+ A Logger with which to log information about the function.
667
+ n_files : :obj:`int`, optional
668
+ Number of memory-mapped files to create and manage.
669
+
670
+ Notes
671
+ -----
672
+ This function is used as a decorator to methods in which memory-mapped arrays may be used.
673
+ It will only be triggered if the class to which the method belongs has a ``memory_limit``
674
+ attribute that is set to something other than ``None``.
675
+
676
+ It will set an attribute within the method's class named ``memmap_filenames``, which is a list
677
+ of filename strings, with ``n_files`` elements.
678
+ If ``memory_limit`` is None, then it will be a list of ``Nones``.
679
+
680
+ Files generated by this function will be stored in the NiMARE data directory and will be
681
+ removed after the wrapped method finishes.
682
+ """
683
+
684
+ def inner_function(function):
685
+ @wraps(function)
686
+ def memmap_context(self, *args, **kwargs):
687
+ if hasattr(self, "memory_limit") and self.memory_limit:
688
+ self.memmap_filenames, filenames = [], []
689
+ for i_file in range(n_files):
690
+ start_time = datetime.datetime.now().strftime("%Y%m%dT%H%M%S")
691
+ _, filename = mkstemp(prefix=self.__class__.__name__, suffix=start_time)
692
+ logger.debug(f"Temporary file written to {filename}")
693
+ self.memmap_filenames.append(filename)
694
+ filenames.append(filename)
695
+ else:
696
+ filenames = self.memmap_filenames = [None] * n_files
697
+
698
+ try:
699
+ return function(self, *args, **kwargs)
700
+ except:
701
+ for filename in filenames:
702
+ logger.error(f"{function.__name__} failed, removing {filename}")
703
+ raise
704
+ finally:
705
+ if hasattr(self, "memory_limit") and self.memory_limit:
706
+ for filename in filenames:
707
+ if os.path.isfile(filename):
708
+ logger.debug(f"Removing temporary file: {filename}")
709
+ os.remove(filename)
710
+ else:
711
+ logger.debug(f"Temporary file DNE: {filename}")
712
+
713
+ return memmap_context
714
+
715
+ return inner_function
716
+
717
+
718
+ BYTE = 2
719
+ KILOBYTE = BYTE**10
720
+ BYTE_CONVERSION = {
721
+ "kb": KILOBYTE,
722
+ "mb": KILOBYTE**2,
723
+ "gb": KILOBYTE**3,
724
+ "tb": KILOBYTE**4,
725
+ }
726
+
727
+
728
+ def _determine_chunk_size(limit, arr, multiplier=1):
729
+ """Determine how many arrays can be read into memory at once.
730
+
731
+ Parameters
732
+ ----------
733
+ limit : :obj:`str`
734
+ String representation of memory limit, can use:
735
+ kb, mb, gb, and tb as suffix (e.g., "4gb").
736
+ arr : :obj:`numpy.array`
737
+ Representative numpy array.
738
+ multiplier : :obj:`int`
739
+ Adjustment for processes that have more or
740
+ less overhead than expected.
741
+ """
742
+ limit = limit.lower()
743
+ size, representation = re.search(r"([0-9]+)([a-z]+)", limit).groups()
744
+
745
+ limit_bytes = float(size) * BYTE_CONVERSION[representation] * multiplier
746
+
747
+ arr_bytes = arr.size * arr.itemsize
748
+
749
+ chunk_size = int(limit_bytes // arr_bytes)
750
+
751
+ if chunk_size == 0:
752
+ arr_size = arr_bytes // BYTE_CONVERSION["mb"]
753
+ raise RuntimeError(f"memory limit: {limit} too small for array with size {arr_size}mb")
754
+
755
+ return chunk_size
756
+
757
+
758
+ def _safe_transform(imgs, masker, memory_limit="1gb", dtype="auto", memfile=None):
759
+ """Apply a masker with limited memory usage.
760
+
761
+ Parameters
762
+ ----------
763
+ imgs : list of niimgs
764
+ List of images upon which to apply the masker.
765
+ masker : nilearn masker
766
+ Masker object to apply to images.
767
+ memory_limit : :obj:`str`, optional
768
+ String representation of memory limit, can use:
769
+ kb, mb, gb, and tb as suffix (e.g., "4gb").
770
+ dtype : :obj:`str`, optional
771
+ Target datatype of masked array.
772
+ Default is "auto", which uses the datatype of the niimgs.
773
+ memfile : :obj:`str` or None, optional
774
+ Name of a memory-mapped file. If None, memory-mapping will not be used.
775
+
776
+ Returns
777
+ -------
778
+ masked_data : :obj:`numpy.ndarray` or :obj:`numpy.memmap`
779
+ Masked data in a 2D array.
780
+ Either an ndarray (if memfile is None) or a memmap array (if memfile is a string).
781
+ """
782
+ assert isinstance(memfile, (type(None), str))
783
+
784
+ first_img_data = masker.transform(imgs[0])
785
+ masked_shape = (len(imgs), first_img_data.size)
786
+ if memfile:
787
+ masked_data = np.memmap(
788
+ memfile,
789
+ dtype=first_img_data.dtype if dtype == "auto" else dtype,
790
+ mode="w+",
791
+ shape=masked_shape,
792
+ )
793
+ else:
794
+ masked_data = np.empty(
795
+ masked_shape,
796
+ dtype=first_img_data.dtype if dtype == "auto" else dtype,
797
+ )
798
+
799
+ # perform transform on chunks of the input maps
800
+ chunk_size = _determine_chunk_size(memory_limit, first_img_data)
801
+ map_chunks = [imgs[i : i + chunk_size] for i in range(0, len(imgs), chunk_size)]
802
+ idx = 0
803
+ for map_chunk in map_chunks:
804
+ end_idx = idx + len(map_chunk)
805
+ map_chunk_data = masker.transform(map_chunk)
806
+ masked_data[idx:end_idx, :] = map_chunk_data
807
+ idx = end_idx
808
+
809
+ return masked_data
810
+
811
+
812
+ def _add_metadata_to_dataframe(
813
+ dataset,
814
+ dataframe,
815
+ metadata_field,
816
+ target_column,
817
+ filter_func=np.mean,
818
+ ):
819
+ """Add metadata from a Dataset to a DataFrame.
820
+
821
+ .. versionadded:: 0.0.8
822
+
823
+ This is particularly useful for kernel transformers or estimators where a given metadata field
824
+ is necessary (e.g., ALEKernel with "sample_size"), but we want to just use the coordinates
825
+ DataFrame instead of passing the full Dataset.
826
+
827
+ Parameters
828
+ ----------
829
+ dataset : :obj:`~nimare.dataset.Dataset`
830
+ Dataset containing study IDs and metadata to feed into dataframe.
831
+ dataframe : :obj:`pandas.DataFrame`
832
+ DataFrame containing study IDs, into which Dataset metadata will be merged.
833
+ metadata_field : :obj:`str`
834
+ Metadata field in ``dataset``.
835
+ target_column : :obj:`str`
836
+ Name of the column that will be added to ``dataframe``, containing information from the
837
+ Dataset.
838
+ filter_func : :obj:`function`, optional
839
+ Function to apply to the metadata so that it fits as a column in a DataFrame.
840
+ Default is ``numpy.mean``.
841
+
842
+ Returns
843
+ -------
844
+ dataframe : :obj:`pandas.DataFrame`
845
+ Updated DataFrame with ``target_column`` added.
846
+ """
847
+ dataframe = dataframe.copy()
848
+
849
+ if metadata_field in dataset.get_metadata():
850
+ # Collect metadata from Dataset
851
+ metadata = dataset.get_metadata(field=metadata_field, ids=dataset.ids)
852
+ metadata = [[m] for m in metadata]
853
+ # Create a DataFrame with the metadata
854
+ metadata = pd.DataFrame(
855
+ index=dataset.ids,
856
+ data=metadata,
857
+ columns=[metadata_field],
858
+ )
859
+ # Reduce the metadata (if in list/array format) to single values
860
+ metadata[target_column] = metadata[metadata_field].apply(
861
+ lambda x: None if x is None else filter_func(x)
862
+ )
863
+ # Merge metadata df into coordinates df
864
+ dataframe = dataframe.merge(
865
+ right=metadata,
866
+ left_on="id",
867
+ right_index=True,
868
+ sort=False,
869
+ validate="many_to_one",
870
+ suffixes=(False, False),
871
+ how="left",
872
+ )
873
+ else:
874
+ LGR.warning(
875
+ f"Metadata field '{metadata_field}' not found. "
876
+ "Set a constant value for this field as an argument, if possible."
877
+ )
878
+
879
+ return dataframe
880
+
881
+
882
+ def _check_type(obj, clss, **kwargs):
883
+ """Check variable type and initialize if necessary.
884
+
885
+ .. versionadded:: 0.0.8
886
+
887
+ Parameters
888
+ ----------
889
+ obj
890
+ Object to check and initialized if necessary.
891
+ clss
892
+ Target class of the object.
893
+ kwargs
894
+ Dictionary of keyword arguments that can be used when initializing the object.
895
+
896
+ Returns
897
+ -------
898
+ obj
899
+ Initialized version of the object.
900
+ """
901
+ # Allow both instances and classes for the input.
902
+ if not issubclass(type(obj), clss) and not issubclass(obj, clss):
903
+ raise ValueError(f"Argument {type(obj)} must be a kind of {clss}")
904
+ elif not inspect.isclass(obj) and kwargs:
905
+ LGR.warning(
906
+ f"Argument {type(obj)} has already been initialized, so arguments "
907
+ f"will be ignored: {', '.join(kwargs.keys())}"
908
+ )
909
+ elif inspect.isclass(obj):
910
+ obj = obj(**kwargs)
911
+ return obj
912
+
913
+
914
+ def _boolean_unmask(data_array, bool_array):
915
+ """Unmask data based on a boolean array, with NaNs in empty voxels.
916
+
917
+ Parameters
918
+ ----------
919
+ data_array : 1D or 2D :obj:`numpy.ndarray`
920
+ Masked data array.
921
+ bool_array : 1D :obj:`numpy.ndarray`
922
+ Boolean mask array. Must have the same number of ``True`` entries as elements in the
923
+ second dimension of ``data_array``.
924
+
925
+ Returns
926
+ -------
927
+ unmasked_data : 1D or 2D :obj:`numpy.ndarray`
928
+ Unmasked data array.
929
+ If 1D, first dimension is the same size as the first (and only) dimension of
930
+ ``boolean_array``.
931
+ If 2D, first dimension is the same size as the first dimension of ``data_array``, while
932
+ second dimension is the same size as the first (and only) dimension of ``boolean_array``.
933
+ All elements corresponding to ``False`` values in ``boolean_array`` will have NaNs.
934
+ """
935
+ assert data_array.ndim in (1, 2)
936
+ assert bool_array.ndim == 1
937
+ assert bool_array.sum() == data_array.shape[-1]
938
+
939
+ unmasked_data = np.full(
940
+ shape=bool_array.shape + data_array.T.shape[1:],
941
+ fill_value=np.nan,
942
+ dtype=data_array.dtype,
943
+ )
944
+ unmasked_data[bool_array] = data_array
945
+ unmasked_data = unmasked_data.T
946
+ return unmasked_data
947
+
948
+
949
+ def unique_rows(ar, return_counts=False):
950
+ """Remove repeated rows from a 2D array.
951
+
952
+ In particular, if given an array of coordinates of shape
953
+ (Npoints, Ndim), it will remove repeated points.
954
+
955
+ Parameters
956
+ ----------
957
+ ar : 2-D ndarray
958
+ The input array.
959
+ return_counts : :obj:`bool`, optional
960
+ If True, also return the number of times each unique item appears in ar.
961
+
962
+ Returns
963
+ -------
964
+ ar_out : 2-D ndarray
965
+ A copy of the input array with repeated rows removed.
966
+ unique_counts : :obj:`np.ndarray`, optional
967
+ The number of times each of the unique values comes up in the original array.
968
+ Only provided if return_counts is True.
969
+
970
+ Raises
971
+ ------
972
+ ValueError : if `ar` is not two-dimensional.
973
+
974
+ Notes
975
+ -----
976
+ The function will generate a copy of `ar` if it is not
977
+ C-contiguous, which will negatively affect performance for large
978
+ input arrays.
979
+
980
+ This is taken from skimage. See :func:`skimage.util.unique_rows`.
981
+
982
+ Examples
983
+ --------
984
+ >>> ar = np.array([[1, 0, 1],
985
+ ... [0, 1, 0],
986
+ ... [1, 0, 1]], np.uint8)
987
+ >>> unique_rows(ar)
988
+ array([[0, 1, 0],
989
+ [1, 0, 1]], dtype=uint8)
990
+
991
+ License
992
+ -------
993
+ Copyright (C) 2019, the scikit-image team
994
+ All rights reserved.
995
+ """
996
+ if ar.ndim != 2:
997
+ raise ValueError("unique_rows() only makes sense for 2D arrays, " "got %dd" % ar.ndim)
998
+ # the view in the next line only works if the array is C-contiguous
999
+ ar = np.ascontiguousarray(ar)
1000
+ # np.unique() finds identical items in a raveled array. To make it
1001
+ # see each row as a single item, we create a view of each row as a
1002
+ # byte string of length itemsize times number of columns in `ar`
1003
+ ar_row_view = ar.view("|S%d" % (ar.itemsize * ar.shape[1]))
1004
+ if return_counts:
1005
+ _, unique_row_indices, counts = np.unique(
1006
+ ar_row_view, return_index=True, return_counts=True
1007
+ )
1008
+
1009
+ return ar[unique_row_indices], counts
1010
+ else:
1011
+ _, unique_row_indices = np.unique(ar_row_view, return_index=True)
1012
+
1013
+ return ar[unique_row_indices]
1014
+
1015
+
1016
+ def find_braces(string):
1017
+ """Search a string for matched braces.
1018
+
1019
+ This is used to identify pairs of braces in BibTeX files.
1020
+ The outside-most pairs should correspond to BibTeX entries.
1021
+
1022
+ Parameters
1023
+ ----------
1024
+ string : :obj:`str`
1025
+ A long string to search for paired braces.
1026
+
1027
+ Returns
1028
+ -------
1029
+ :obj:`list` of :obj:`tuple` of :obj:`int`
1030
+ A list of two-element tuples of indices of matched braces.
1031
+ """
1032
+ toret = {}
1033
+ pstack = []
1034
+
1035
+ for idx, char in enumerate(string):
1036
+ if char == "{":
1037
+ pstack.append(idx)
1038
+ elif char == "}":
1039
+ if len(pstack) == 0:
1040
+ raise IndexError(f"No matching closing parens at: {idx}")
1041
+
1042
+ toret[pstack.pop()] = idx
1043
+
1044
+ if len(pstack) > 0:
1045
+ raise IndexError(f"No matching opening parens at: {pstack.pop()}")
1046
+
1047
+ toret = list(toret.items())
1048
+ return toret
1049
+
1050
+
1051
+ def reduce_idx(idx_list):
1052
+ """Identify outermost brace indices in list of indices.
1053
+
1054
+ The purpose here is to find the brace pairs that correspond to BibTeX entries,
1055
+ while discarding brace pairs that appear within the entries
1056
+ (e.g., braces around article titles).
1057
+
1058
+ Parameters
1059
+ ----------
1060
+ idx_list : :obj:`list` of :obj:`tuple` of :obj:`int`
1061
+ A list of two-element tuples of indices of matched braces.
1062
+
1063
+ Returns
1064
+ -------
1065
+ reduced_idx_list : :obj:`list` of :obj:`tuple` of :obj:`int`
1066
+ A list of two-element tuples of indices of matched braces corresponding to BibTeX entries.
1067
+ """
1068
+ idx_list2 = [idx_item[0] for idx_item in idx_list]
1069
+ idx = np.argsort(idx_list2)
1070
+ idx_list = [idx_list[i] for i in idx]
1071
+
1072
+ df = pd.DataFrame(data=idx_list, columns=["start", "end"])
1073
+
1074
+ good_idx = []
1075
+ df["within"] = False
1076
+ for i, row in df.iterrows():
1077
+ df["within"] = df["within"] | ((df["start"] > row["start"]) & (df["end"] < row["end"]))
1078
+ if not df.iloc[i]["within"]:
1079
+ good_idx.append(i)
1080
+
1081
+ idx_list = [idx_list[i] for i in good_idx]
1082
+ return idx_list
1083
+
1084
+
1085
+ def index_bibtex_identifiers(string, idx_list):
1086
+ """Identify the BibTeX entry identifier before each entry.
1087
+
1088
+ The purpose of this function is to take the raw BibTeX string and a list of indices of entries,
1089
+ starting and ending with the braces of each entry, and then extract the identifier before each.
1090
+
1091
+ Parameters
1092
+ ----------
1093
+ string : :obj:`str`
1094
+ The full BibTeX file, as a string.
1095
+ idx_list : :obj:`list` of :obj:`tuple` of :obj:`int`
1096
+ A list of two-element tuples of indices of matched braces corresponding to BibTeX entries.
1097
+
1098
+ Returns
1099
+ -------
1100
+ idx_list : :obj:`list` of :obj:`tuple` of :obj:`int`
1101
+ A list of two-element tuples of indices of BibTeX entries,
1102
+ from the starting @ to the final }.
1103
+ """
1104
+ at_idx = [(a.start(), a.end() - 1) for a in re.finditer("@[a-zA-Z0-9]+{", string)]
1105
+ df = pd.DataFrame(at_idx, columns=["real_start", "false_start"])
1106
+ df2 = pd.DataFrame(idx_list, columns=["false_start", "end"])
1107
+ df = pd.merge(left=df, right=df2, left_on="false_start", right_on="false_start")
1108
+ new_idx_list = list(zip(df.real_start, df.end))
1109
+ return new_idx_list
1110
+
1111
+
1112
+ def find_citations(description):
1113
+ r"""Find citations in a text description.
1114
+
1115
+ It looks for cases of \\citep{} and \\cite{} in a string.
1116
+
1117
+ Parameters
1118
+ ----------
1119
+ description_ : :obj:`str`
1120
+ Description of a method, optionally with citations.
1121
+
1122
+ Returns
1123
+ -------
1124
+ all_citations : :obj:`list` of :obj:`str`
1125
+ A list of all identifiers for citations.
1126
+ """
1127
+ paren_citations = re.findall(r"\\citep{([a-zA-Z0-9,/\.]+)}", description)
1128
+ intext_citations = re.findall(r"\\cite{([a-zA-Z0-9,/\.]+)}", description)
1129
+ inparen_citations = re.findall(r"\\citealt{([a-zA-Z0-9,/\.]+)}", description)
1130
+ all_citations = ",".join(paren_citations + intext_citations + inparen_citations)
1131
+ all_citations = all_citations.split(",")
1132
+ all_citations = sorted(list(set(all_citations)))
1133
+ return all_citations
1134
+
1135
+
1136
+ def reduce_references(citations, reference_list):
1137
+ """Reduce the list of references to only include ones associated with requested citations.
1138
+
1139
+ Parameters
1140
+ ----------
1141
+ citations : :obj:`list` of :obj:`str`
1142
+ A list of all identifiers for citations.
1143
+ reference_list : :obj:`list` of :obj:`str`
1144
+ List of all available BibTeX entries.
1145
+
1146
+ Returns
1147
+ -------
1148
+ reduced_reference_list : :obj:`list` of :obj:`str`
1149
+ List of BibTeX entries for citations only.
1150
+ """
1151
+ reduced_reference_list = []
1152
+ for citation in citations:
1153
+ citation_found = False
1154
+ for reference in reference_list:
1155
+ check_string = "@[a-zA-Z]+{" + citation + ","
1156
+ if re.match(check_string, reference):
1157
+ reduced_reference_list.append(reference)
1158
+ citation_found = True
1159
+ continue
1160
+
1161
+ if not citation_found:
1162
+ LGR.warning(f"Citation {citation} not found.")
1163
+
1164
+ return reduced_reference_list
1165
+
1166
+
1167
+ def get_description_references(description):
1168
+ """Find BibTeX references for citations in a methods description.
1169
+
1170
+ Parameters
1171
+ ----------
1172
+ description_ : :obj:`str`
1173
+ Description of a method, optionally with citations.
1174
+
1175
+ Returns
1176
+ -------
1177
+ bibtex_string : :obj:`str`
1178
+ A string containing BibTeX entries, limited only to the citations in the description.
1179
+ """
1180
+ bibtex_file = op.join(get_resource_path(), "references.bib")
1181
+ with open(bibtex_file, "r") as fo:
1182
+ bibtex_string = fo.read()
1183
+
1184
+ braces_idx = find_braces(bibtex_string)
1185
+ red_braces_idx = reduce_idx(braces_idx)
1186
+ bibtex_idx = index_bibtex_identifiers(bibtex_string, red_braces_idx)
1187
+ citations = find_citations(description)
1188
+ reference_list = [bibtex_string[start : end + 1] for start, end in bibtex_idx]
1189
+ reduced_reference_list = reduce_references(citations, reference_list)
1190
+
1191
+ bibtex_string = "\n".join(reduced_reference_list)
1192
+ return bibtex_string
1193
+
1194
+
1195
+ def _create_name(resource):
1196
+ """Take study/analysis object and try to create dataframe friendly/readable name."""
1197
+ return "_".join(resource.name.split()) if resource.name else resource.id
1198
+
1199
+
1200
+ def load_nimads(studyset, annotation=None):
1201
+ """Load a studyset object from a dictionary, json file, or studyset object."""
1202
+ from nimare.nimads import Studyset
1203
+
1204
+ if isinstance(studyset, dict):
1205
+ studyset = Studyset(studyset)
1206
+ elif isinstance(studyset, str):
1207
+ with open(studyset, "r") as f:
1208
+ studyset = Studyset(json.load(f))
1209
+ elif isinstance(studyset, Studyset):
1210
+ pass
1211
+ else:
1212
+ raise ValueError(
1213
+ "studyset must be: a dictionary, a path to a json file, or studyset object"
1214
+ )
1215
+
1216
+ if annotation:
1217
+ studyset.annotations = annotation
1218
+ return studyset
1219
+
1220
+
1221
+ def coef_spline_bases(axis_coords, spacing, margin):
1222
+ """
1223
+ Coefficient of cubic B-spline bases in any x/y/z direction.
1224
+
1225
+ Parameters
1226
+ ----------
1227
+ axis_coords : value range in x/y/z direction
1228
+ spacing: (equally spaced) knots spacing in x/y/z direction,
1229
+ margin: extend the region where B-splines are constructed (min-margin, max_margin)
1230
+ to avoid weakly-supported B-spline on the edge
1231
+ Returns
1232
+ -------
1233
+ coef_spline : 2-D ndarray (n_points x n_spline_bases)
1234
+ """
1235
+ import patsy
1236
+
1237
+ # create B-spline basis for x/y/z coordinate
1238
+ wider_axis_coords = np.arange(np.min(axis_coords) - margin, np.max(axis_coords) + margin)
1239
+ knots = np.arange( # noqa: F841
1240
+ np.min(axis_coords) - margin, np.max(axis_coords) + margin, step=spacing
1241
+ )
1242
+ design_matrix = patsy.dmatrix(
1243
+ "bs(x, knots=knots, degree=3,include_intercept=False)",
1244
+ data={"x": wider_axis_coords},
1245
+ return_type="matrix",
1246
+ )
1247
+ design_array = np.array(design_matrix)[:, 1:] # remove the first column (every element is 1)
1248
+ coef_spline = design_array[margin : -margin + 1, :]
1249
+ # remove the basis with no/weakly support from the square
1250
+ supported_basis = np.sum(coef_spline, axis=0) != 0
1251
+ coef_spline = coef_spline[:, supported_basis]
1252
+
1253
+ return coef_spline
1254
+
1255
+
1256
+ def b_spline_bases(masker_voxels, spacing, margin=10):
1257
+ """Cubic B-spline bases for spatial intensity.
1258
+
1259
+ The whole coefficient matrix is constructed by taking tensor product of
1260
+ all B-spline bases coefficient matrix in three direction.
1261
+
1262
+ Parameters
1263
+ ----------
1264
+ masker_voxels : :obj:`numpy.ndarray`
1265
+ matrix with element either 0 or 1, indicating if it's within brain mask,
1266
+ spacing : :obj:`int`
1267
+ (equally spaced) knots spacing in x/y/z direction,
1268
+ margin : :obj:`int`
1269
+ extend the region where B-splines are constructed (min-margin, max_margin)
1270
+ to avoid weakly-supported B-spline on the edge
1271
+ Returns
1272
+ -------
1273
+ X : :obj:`numpy.ndarray`
1274
+ 2-D ndarray (n_voxel x n_spline_bases) only keeps with within-brain voxels
1275
+ """
1276
+ # dim_mask = masker_voxels.shape
1277
+ # n_brain_voxel = np.sum(masker_voxels)
1278
+ # remove the blank space around the brain mask
1279
+ xx = np.where(np.apply_over_axes(np.sum, masker_voxels, [1, 2]) > 0)[0]
1280
+ yy = np.where(np.apply_over_axes(np.sum, masker_voxels, [0, 2]) > 0)[1]
1281
+ zz = np.where(np.apply_over_axes(np.sum, masker_voxels, [0, 1]) > 0)[2]
1282
+
1283
+ x_spline = coef_spline_bases(xx, spacing, margin)
1284
+ y_spline = coef_spline_bases(yy, spacing, margin)
1285
+ z_spline = coef_spline_bases(zz, spacing, margin)
1286
+ x_spline_coords = x_spline.nonzero()
1287
+ y_spline_coords = y_spline.nonzero()
1288
+ z_spline_coords = z_spline.nonzero()
1289
+ x_spline_sparse = sparse.COO(x_spline_coords, x_spline[x_spline_coords])
1290
+ y_spline_sparse = sparse.COO(y_spline_coords, y_spline[y_spline_coords])
1291
+ z_spline_sparse = sparse.COO(z_spline_coords, z_spline[z_spline_coords])
1292
+
1293
+ # create spatial design matrix by tensor product of spline bases in 3 dimesion
1294
+ # Row sums of X are all 1=> There is no need to re-normalise X
1295
+ X = np.kron(np.kron(x_spline_sparse, y_spline_sparse), z_spline_sparse)
1296
+ # remove the voxels outside brain mask
1297
+ axis_dim = [xx.shape[0], yy.shape[0], zz.shape[0]]
1298
+ brain_voxels_index = [
1299
+ (z - np.min(zz))
1300
+ + axis_dim[2] * (y - np.min(yy))
1301
+ + axis_dim[1] * axis_dim[2] * (x - np.min(xx))
1302
+ for x in xx
1303
+ for y in yy
1304
+ for z in zz
1305
+ if masker_voxels[x, y, z] == 1
1306
+ ]
1307
+ X = X[brain_voxels_index, :].todense()
1308
+ # remove tensor product basis that have no support in the brain
1309
+ x_df, y_df, z_df = x_spline.shape[1], y_spline.shape[1], z_spline.shape[1]
1310
+ support_basis = []
1311
+ # find and remove weakly supported B-spline bases
1312
+ for bx in range(x_df):
1313
+ for by in range(y_df):
1314
+ for bz in range(z_df):
1315
+ basis_index = bz + z_df * by + z_df * y_df * bx
1316
+ basis_coef = X[:, basis_index]
1317
+ if np.max(basis_coef) >= 0.1:
1318
+ support_basis.append(basis_index)
1319
+ X = X[:, support_basis]
1320
+
1321
+ return X
1322
+
1323
+
1324
+ def dummy_encoding_moderators(dataset_annotations, moderators):
1325
+ """Convert categorical moderators to dummy encoded variables.
1326
+
1327
+ Parameters
1328
+ ----------
1329
+ dataset_annotations : :obj:`pandas.DataFrame`
1330
+ Annotations of the dataset.
1331
+ moderators : :obj:`list`
1332
+ Study-level moderators to be considered into CBMR framework.
1333
+
1334
+ Returns
1335
+ -------
1336
+ dataset_annotations : :obj:`pandas.DataFrame`
1337
+ Annotations of the dataset with dummy encoded moderator columns.
1338
+ new_moderators : :obj:`list`
1339
+ List of study-level moderators after dummy encoding.
1340
+ """
1341
+ new_moderators = []
1342
+ for moderator in moderators.copy():
1343
+ if len(moderator.split(":reference=")) == 2:
1344
+ moderator, reference_subtype = moderator.split(":reference=")
1345
+ if np.array_equal(
1346
+ dataset_annotations[moderator], dataset_annotations[moderator].astype(str)
1347
+ ):
1348
+ categories_unique = dataset_annotations[moderator].unique().tolist()
1349
+ # sort categories alphabetically
1350
+ categories_unique = sorted(categories_unique, key=str.lower)
1351
+ if "reference_subtype" in locals():
1352
+ # remove reference subgroup from list and add it to the first position
1353
+ categories_unique.remove(reference_subtype)
1354
+ categories_unique.insert(0, reference_subtype)
1355
+ for category in categories_unique:
1356
+ dataset_annotations[category] = (
1357
+ dataset_annotations[moderator] == category
1358
+ ).astype(int)
1359
+ # remove last categorical moderator column as it encoded
1360
+ # as the other dummy encoded columns being zero
1361
+ dataset_annotations = dataset_annotations.drop([categories_unique[0]], axis=1)
1362
+ new_moderators.extend(
1363
+ categories_unique[1:]
1364
+ ) # add dummy encoded moderators (except from the reference subgroup)
1365
+ else:
1366
+ new_moderators.append(moderator)
1367
+ return dataset_annotations, new_moderators