nimare 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmarks/__init__.py +0 -0
- benchmarks/bench_cbma.py +57 -0
- nimare/__init__.py +45 -0
- nimare/_version.py +21 -0
- nimare/annotate/__init__.py +21 -0
- nimare/annotate/cogat.py +213 -0
- nimare/annotate/gclda.py +924 -0
- nimare/annotate/lda.py +147 -0
- nimare/annotate/text.py +75 -0
- nimare/annotate/utils.py +87 -0
- nimare/base.py +217 -0
- nimare/cli.py +124 -0
- nimare/correct.py +462 -0
- nimare/dataset.py +685 -0
- nimare/decode/__init__.py +33 -0
- nimare/decode/base.py +115 -0
- nimare/decode/continuous.py +462 -0
- nimare/decode/discrete.py +753 -0
- nimare/decode/encode.py +110 -0
- nimare/decode/utils.py +44 -0
- nimare/diagnostics.py +510 -0
- nimare/estimator.py +139 -0
- nimare/extract/__init__.py +19 -0
- nimare/extract/extract.py +466 -0
- nimare/extract/utils.py +295 -0
- nimare/generate.py +331 -0
- nimare/io.py +667 -0
- nimare/meta/__init__.py +39 -0
- nimare/meta/cbma/__init__.py +6 -0
- nimare/meta/cbma/ale.py +951 -0
- nimare/meta/cbma/base.py +947 -0
- nimare/meta/cbma/mkda.py +1361 -0
- nimare/meta/cbmr.py +970 -0
- nimare/meta/ibma.py +1683 -0
- nimare/meta/kernel.py +501 -0
- nimare/meta/models.py +1199 -0
- nimare/meta/utils.py +494 -0
- nimare/nimads.py +492 -0
- nimare/reports/__init__.py +24 -0
- nimare/reports/base.py +664 -0
- nimare/reports/default.yml +123 -0
- nimare/reports/figures.py +651 -0
- nimare/reports/report.tpl +160 -0
- nimare/resources/__init__.py +1 -0
- nimare/resources/atlases/Harvard-Oxford-LICENSE +93 -0
- nimare/resources/atlases/HarvardOxford-cort-maxprob-thr25-2mm.nii.gz +0 -0
- nimare/resources/database_file_manifest.json +142 -0
- nimare/resources/english_spellings.csv +1738 -0
- nimare/resources/filenames.json +32 -0
- nimare/resources/neurosynth_laird_studies.json +58773 -0
- nimare/resources/neurosynth_stoplist.txt +396 -0
- nimare/resources/nidm_pain_dset.json +1349 -0
- nimare/resources/references.bib +541 -0
- nimare/resources/semantic_knowledge_children.txt +325 -0
- nimare/resources/semantic_relatedness_children.txt +249 -0
- nimare/resources/templates/MNI152_2x2x2_brainmask.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_T1w.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_T1w.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_desc-brain_mask.nii.gz +0 -0
- nimare/results.py +225 -0
- nimare/stats.py +276 -0
- nimare/tests/__init__.py +1 -0
- nimare/tests/conftest.py +229 -0
- nimare/tests/data/amygdala_roi.nii.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_coordinates.tsv.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_metadata.tsv.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz +0 -0
- nimare/tests/data/data-neurosynth_version-7_vocab-terms_vocabulary.txt +100 -0
- nimare/tests/data/neurosynth_dset.json +2868 -0
- nimare/tests/data/neurosynth_laird_studies.json +58773 -0
- nimare/tests/data/nidm_pain_dset.json +1349 -0
- nimare/tests/data/nimads_annotation.json +1 -0
- nimare/tests/data/nimads_studyset.json +1 -0
- nimare/tests/data/test_baseline.txt +2 -0
- nimare/tests/data/test_pain_dataset.json +1278 -0
- nimare/tests/data/test_pain_dataset_multiple_contrasts.json +1242 -0
- nimare/tests/data/test_sleuth_file.txt +18 -0
- nimare/tests/data/test_sleuth_file2.txt +10 -0
- nimare/tests/data/test_sleuth_file3.txt +5 -0
- nimare/tests/data/test_sleuth_file4.txt +5 -0
- nimare/tests/data/test_sleuth_file5.txt +5 -0
- nimare/tests/test_annotate_cogat.py +32 -0
- nimare/tests/test_annotate_gclda.py +86 -0
- nimare/tests/test_annotate_lda.py +27 -0
- nimare/tests/test_dataset.py +99 -0
- nimare/tests/test_decode_continuous.py +132 -0
- nimare/tests/test_decode_discrete.py +92 -0
- nimare/tests/test_diagnostics.py +168 -0
- nimare/tests/test_estimator_performance.py +385 -0
- nimare/tests/test_extract.py +46 -0
- nimare/tests/test_generate.py +247 -0
- nimare/tests/test_io.py +294 -0
- nimare/tests/test_meta_ale.py +298 -0
- nimare/tests/test_meta_cbmr.py +295 -0
- nimare/tests/test_meta_ibma.py +240 -0
- nimare/tests/test_meta_kernel.py +209 -0
- nimare/tests/test_meta_mkda.py +234 -0
- nimare/tests/test_nimads.py +21 -0
- nimare/tests/test_reports.py +110 -0
- nimare/tests/test_stats.py +101 -0
- nimare/tests/test_transforms.py +272 -0
- nimare/tests/test_utils.py +200 -0
- nimare/tests/test_workflows.py +221 -0
- nimare/tests/utils.py +126 -0
- nimare/transforms.py +907 -0
- nimare/utils.py +1367 -0
- nimare/workflows/__init__.py +14 -0
- nimare/workflows/base.py +189 -0
- nimare/workflows/cbma.py +165 -0
- nimare/workflows/ibma.py +108 -0
- nimare/workflows/macm.py +77 -0
- nimare/workflows/misc.py +65 -0
- nimare-0.4.2.dist-info/LICENSE +21 -0
- nimare-0.4.2.dist-info/METADATA +124 -0
- nimare-0.4.2.dist-info/RECORD +119 -0
- nimare-0.4.2.dist-info/WHEEL +5 -0
- nimare-0.4.2.dist-info/entry_points.txt +2 -0
- nimare-0.4.2.dist-info/top_level.txt +2 -0
nimare/utils.py
ADDED
@@ -0,0 +1,1367 @@
|
|
1
|
+
"""Utility functions for NiMARE."""
|
2
|
+
|
3
|
+
import datetime
|
4
|
+
import inspect
|
5
|
+
import json
|
6
|
+
import logging
|
7
|
+
import multiprocessing as mp
|
8
|
+
import os
|
9
|
+
import os.path as op
|
10
|
+
import re
|
11
|
+
from functools import wraps
|
12
|
+
from tempfile import mkstemp
|
13
|
+
|
14
|
+
import joblib
|
15
|
+
import nibabel as nib
|
16
|
+
import numpy as np
|
17
|
+
import pandas as pd
|
18
|
+
import sparse
|
19
|
+
from nilearn.input_data import NiftiMasker
|
20
|
+
|
21
|
+
LGR = logging.getLogger(__name__)
|
22
|
+
|
23
|
+
|
24
|
+
def _check_ncores(n_cores):
|
25
|
+
"""Check number of cores used for method.
|
26
|
+
|
27
|
+
.. versionadded:: 0.0.12
|
28
|
+
Moved from Estimator._check_ncores into its own function.
|
29
|
+
"""
|
30
|
+
if n_cores <= 0:
|
31
|
+
n_cores = mp.cpu_count()
|
32
|
+
elif n_cores > mp.cpu_count():
|
33
|
+
LGR.warning(
|
34
|
+
f"Desired number of cores ({n_cores}) greater than number "
|
35
|
+
f"available ({mp.cpu_count()}). Setting to {mp.cpu_count()}."
|
36
|
+
)
|
37
|
+
n_cores = mp.cpu_count()
|
38
|
+
return n_cores
|
39
|
+
|
40
|
+
|
41
|
+
def get_resource_path():
|
42
|
+
"""Return the path to general resources, terminated with separator.
|
43
|
+
|
44
|
+
Resources are kept outside package folder in "datasets".
|
45
|
+
Based on function by Yaroslav Halchenko used in Neurosynth Python package.
|
46
|
+
"""
|
47
|
+
return op.abspath(op.join(op.dirname(__file__), "resources") + op.sep)
|
48
|
+
|
49
|
+
|
50
|
+
def get_template(space="mni152_2mm", mask=None):
|
51
|
+
"""Load template file.
|
52
|
+
|
53
|
+
.. versionchanged:: 0.0.11
|
54
|
+
|
55
|
+
- Remove the ``mask="gm"`` option.
|
56
|
+
- Replace the nilearn templates with ones downloaded directly from TemplateFlow.
|
57
|
+
|
58
|
+
Parameters
|
59
|
+
----------
|
60
|
+
space : {'mni152_1mm', 'mni152_2mm', 'ale_2mm'}, optional
|
61
|
+
Template to load. Default is 'mni152_2mm'.
|
62
|
+
The options are:
|
63
|
+
|
64
|
+
- mni152_1mm: The MNI152NLin6Asym template at 1mm3 resolution,
|
65
|
+
downloaded from TemplateFlow. The shape of this template is 182x218x182 voxels.
|
66
|
+
- mni152_2mm: The MNI152NLin6Asym template at 2mm3 resolution,
|
67
|
+
downloaded from TemplateFlow. The shape of this template is 91x109x91 voxels.
|
68
|
+
- ale_2mm: The template used is the MNI152NLin6Asym template at 2mm3 resolution,
|
69
|
+
but if ``mask='brain'``, then a brain mask taken from GingerALE will be used.
|
70
|
+
The brain mask corresponds to GingerALE's "more conservative" mask.
|
71
|
+
The shape of this template is 91x109x91 voxels.
|
72
|
+
mask : {None, 'brain'}, optional
|
73
|
+
Whether to return the raw T1w template (None) or a brain mask ('brain').
|
74
|
+
Default is None.
|
75
|
+
|
76
|
+
Returns
|
77
|
+
-------
|
78
|
+
img : :obj:`~nibabel.nifti1.Nifti1Image`
|
79
|
+
Template image object.
|
80
|
+
"""
|
81
|
+
template_dir = op.join(get_resource_path(), "templates")
|
82
|
+
if space == "mni152_1mm":
|
83
|
+
if mask is None:
|
84
|
+
img = nib.load(op.join(template_dir, "tpl-MNI152NLin6Asym_res-01_T1w.nii.gz"))
|
85
|
+
elif mask == "brain":
|
86
|
+
img = nib.load(
|
87
|
+
op.join(template_dir, "tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz")
|
88
|
+
)
|
89
|
+
else:
|
90
|
+
raise ValueError(f"Mask option '{mask}' not supported")
|
91
|
+
elif space == "mni152_2mm":
|
92
|
+
if mask is None:
|
93
|
+
img = nib.load(op.join(template_dir, "tpl-MNI152NLin6Asym_res-02_T1w.nii.gz"))
|
94
|
+
elif mask == "brain":
|
95
|
+
img = nib.load(
|
96
|
+
op.join(template_dir, "tpl-MNI152NLin6Asym_res-02_desc-brain_mask.nii.gz")
|
97
|
+
)
|
98
|
+
else:
|
99
|
+
raise ValueError(f"Mask option '{mask}' not supported")
|
100
|
+
elif space == "ale_2mm":
|
101
|
+
if mask is None:
|
102
|
+
img = nib.load(op.join(template_dir, "tpl-MNI152NLin6Asym_res-02_T1w.nii.gz"))
|
103
|
+
elif mask == "brain":
|
104
|
+
# Not the same as the nilearn brain mask, but should correspond to
|
105
|
+
# the default "more conservative" MNI152 mask in GingerALE.
|
106
|
+
img = nib.load(op.join(template_dir, "MNI152_2x2x2_brainmask.nii.gz"))
|
107
|
+
else:
|
108
|
+
raise ValueError(f"Mask option '{mask}' not supported")
|
109
|
+
else:
|
110
|
+
raise ValueError(f"Space '{space}' not supported")
|
111
|
+
|
112
|
+
# Coerce to array-image
|
113
|
+
img = nib.Nifti1Image(img.get_fdata(), affine=img.affine, header=img.header)
|
114
|
+
return img
|
115
|
+
|
116
|
+
|
117
|
+
def get_masker(mask, memory=joblib.Memory(location=None, verbose=0), memory_level=1):
|
118
|
+
"""Get an initialized, fitted nilearn Masker instance from passed argument.
|
119
|
+
|
120
|
+
Parameters
|
121
|
+
----------
|
122
|
+
mask : str, :class:`nibabel.nifti1.Nifti1Image`, or any nilearn Masker
|
123
|
+
memory : instance of :class:`joblib.Memory`, :obj:`str`, or :class:`pathlib.Path`
|
124
|
+
Used to cache the output of a function. By default, no caching is done.
|
125
|
+
If a :obj:`str` is given, it is the path to the caching directory.
|
126
|
+
memory_level : :obj:`int`, default=1
|
127
|
+
Rough estimator of the amount of memory used by caching.
|
128
|
+
Higher value means more memory for caching. Zero means no caching.
|
129
|
+
|
130
|
+
Returns
|
131
|
+
-------
|
132
|
+
masker : an initialized, fitted instance of a subclass of
|
133
|
+
`nilearn.input_data.base_masker.BaseMasker`
|
134
|
+
"""
|
135
|
+
if isinstance(mask, str):
|
136
|
+
mask = nib.load(mask)
|
137
|
+
|
138
|
+
if isinstance(mask, nib.nifti1.Nifti1Image):
|
139
|
+
# Coerce to array-image
|
140
|
+
mask = nib.Nifti1Image(mask.get_fdata(), affine=mask.affine, header=mask.header)
|
141
|
+
|
142
|
+
mask = NiftiMasker(mask, memory=memory, memory_level=memory_level)
|
143
|
+
|
144
|
+
if not (hasattr(mask, "transform") and hasattr(mask, "inverse_transform")):
|
145
|
+
raise ValueError(
|
146
|
+
"mask argument must be a string, a nibabel image, or a Nilearn Masker instance."
|
147
|
+
)
|
148
|
+
|
149
|
+
# Fit the masker if needed
|
150
|
+
if not hasattr(mask, "mask_img_"):
|
151
|
+
mask.fit()
|
152
|
+
|
153
|
+
return mask
|
154
|
+
|
155
|
+
|
156
|
+
def vox2mm(ijk, affine):
|
157
|
+
"""Convert matrix subscripts to coordinates.
|
158
|
+
|
159
|
+
.. versionchanged:: 0.0.8
|
160
|
+
|
161
|
+
* [ENH] This function was part of `nimare.transforms` in previous versions (0.0.3-0.0.7)
|
162
|
+
|
163
|
+
Parameters
|
164
|
+
----------
|
165
|
+
ijk : (X, 3) :obj:`numpy.ndarray`
|
166
|
+
Matrix subscripts for coordinates being transformed.
|
167
|
+
One row for each coordinate, with three columns: i, j, and k.
|
168
|
+
affine : (4, 4) :obj:`numpy.ndarray`
|
169
|
+
Affine matrix from image.
|
170
|
+
|
171
|
+
Returns
|
172
|
+
-------
|
173
|
+
xyz : (X, 3) :obj:`numpy.ndarray`
|
174
|
+
Coordinates in image-space.
|
175
|
+
|
176
|
+
Notes
|
177
|
+
-----
|
178
|
+
From here:
|
179
|
+
http://blog.chrisgorgolewski.org/2014/12/how-to-convert-between-voxel-and-mm.html
|
180
|
+
"""
|
181
|
+
xyz = nib.affines.apply_affine(affine, ijk)
|
182
|
+
return xyz
|
183
|
+
|
184
|
+
|
185
|
+
def mm2vox(xyz, affine):
|
186
|
+
"""Convert coordinates to matrix subscripts.
|
187
|
+
|
188
|
+
.. versionchanged:: 0.0.8
|
189
|
+
|
190
|
+
* [ENH] This function was part of `nimare.transforms` in previous versions (0.0.3-0.0.7)
|
191
|
+
|
192
|
+
Parameters
|
193
|
+
----------
|
194
|
+
xyz : (X, 3) :obj:`numpy.ndarray`
|
195
|
+
Coordinates in image-space.
|
196
|
+
One row for each coordinate, with three columns: x, y, and z.
|
197
|
+
affine : (4, 4) :obj:`numpy.ndarray`
|
198
|
+
Affine matrix from image.
|
199
|
+
|
200
|
+
Returns
|
201
|
+
-------
|
202
|
+
ijk : (X, 3) :obj:`numpy.ndarray`
|
203
|
+
Matrix subscripts for coordinates being transformed.
|
204
|
+
|
205
|
+
Notes
|
206
|
+
-----
|
207
|
+
From here:
|
208
|
+
http://blog.chrisgorgolewski.org/2014/12/how-to-convert-between-voxel-and-mm.html
|
209
|
+
"""
|
210
|
+
ijk = nib.affines.apply_affine(np.linalg.inv(affine), xyz).astype(int)
|
211
|
+
return ijk
|
212
|
+
|
213
|
+
|
214
|
+
def tal2mni(coords):
|
215
|
+
"""Convert coordinates from Talairach space to MNI space.
|
216
|
+
|
217
|
+
.. versionchanged:: 0.0.8
|
218
|
+
|
219
|
+
* [ENH] This function was part of `nimare.transforms` in previous versions (0.0.3-0.0.7)
|
220
|
+
|
221
|
+
Parameters
|
222
|
+
----------
|
223
|
+
coords : (X, 3) :obj:`numpy.ndarray`
|
224
|
+
Coordinates in Talairach space to convert.
|
225
|
+
Each row is a coordinate, with three columns.
|
226
|
+
|
227
|
+
Returns
|
228
|
+
-------
|
229
|
+
coords : (X, 3) :obj:`numpy.ndarray`
|
230
|
+
Coordinates in MNI space.
|
231
|
+
Each row is a coordinate, with three columns.
|
232
|
+
|
233
|
+
Notes
|
234
|
+
-----
|
235
|
+
Python version of BrainMap's tal2icbm_other.m.
|
236
|
+
|
237
|
+
This function converts coordinates from Talairach space to MNI
|
238
|
+
space (normalized using templates other than those contained
|
239
|
+
in SPM and FSL) using the tal2icbm transform developed and
|
240
|
+
validated by Jack Lancaster at the Research Imaging Center in
|
241
|
+
San Antonio, Texas.
|
242
|
+
http://www3.interscience.wiley.com/cgi-bin/abstract/114104479/ABSTRACT
|
243
|
+
"""
|
244
|
+
# Find which dimensions are of size 3
|
245
|
+
shape = np.array(coords.shape)
|
246
|
+
if all(shape == 3):
|
247
|
+
LGR.info("Input is an ambiguous 3x3 matrix.\nAssuming coords are row vectors (Nx3).")
|
248
|
+
use_dim = 1
|
249
|
+
elif not any(shape == 3):
|
250
|
+
raise AttributeError("Input must be an Nx3 or 3xN matrix.")
|
251
|
+
else:
|
252
|
+
use_dim = np.where(shape == 3)[0][0]
|
253
|
+
|
254
|
+
# Transpose if necessary
|
255
|
+
if use_dim == 1:
|
256
|
+
coords = coords.transpose()
|
257
|
+
|
258
|
+
# Transformation matrices, different for each software package
|
259
|
+
icbm_other = np.array(
|
260
|
+
[
|
261
|
+
[0.9357, 0.0029, -0.0072, -1.0423],
|
262
|
+
[-0.0065, 0.9396, -0.0726, -1.3940],
|
263
|
+
[0.0103, 0.0752, 0.8967, 3.6475],
|
264
|
+
[0.0000, 0.0000, 0.0000, 1.0000],
|
265
|
+
]
|
266
|
+
)
|
267
|
+
|
268
|
+
# Invert the transformation matrix
|
269
|
+
icbm_other = np.linalg.inv(icbm_other)
|
270
|
+
|
271
|
+
# Apply the transformation matrix
|
272
|
+
coords = np.concatenate((coords, np.ones((1, coords.shape[1]))))
|
273
|
+
coords = np.dot(icbm_other, coords)
|
274
|
+
|
275
|
+
# Format the output, transpose if necessary
|
276
|
+
out_coords = coords[:3, :]
|
277
|
+
if use_dim == 1:
|
278
|
+
out_coords = out_coords.transpose()
|
279
|
+
return out_coords
|
280
|
+
|
281
|
+
|
282
|
+
def mni2tal(coords):
|
283
|
+
"""Convert coordinates from MNI space Talairach space.
|
284
|
+
|
285
|
+
.. versionchanged:: 0.0.8
|
286
|
+
|
287
|
+
* [ENH] This function was part of `nimare.transforms` in previous versions (0.0.3-0.0.7)
|
288
|
+
|
289
|
+
Parameters
|
290
|
+
----------
|
291
|
+
coords : (X, 3) :obj:`numpy.ndarray`
|
292
|
+
Coordinates in MNI space to convert.
|
293
|
+
Each row is a coordinate, with three columns.
|
294
|
+
|
295
|
+
Returns
|
296
|
+
-------
|
297
|
+
coords : (X, 3) :obj:`numpy.ndarray`
|
298
|
+
Coordinates in Talairach space.
|
299
|
+
Each row is a coordinate, with three columns.
|
300
|
+
|
301
|
+
Notes
|
302
|
+
-----
|
303
|
+
Python version of BrainMap's icbm_other2tal.m.
|
304
|
+
This function converts coordinates from MNI space (normalized using
|
305
|
+
templates other than those contained in SPM and FSL) to Talairach space
|
306
|
+
using the icbm2tal transform developed and validated by Jack Lancaster at
|
307
|
+
the Research Imaging Center in San Antonio, Texas.
|
308
|
+
http://www3.interscience.wiley.com/cgi-bin/abstract/114104479/ABSTRACT
|
309
|
+
"""
|
310
|
+
# Find which dimensions are of size 3
|
311
|
+
shape = np.array(coords.shape)
|
312
|
+
if all(shape == 3):
|
313
|
+
LGR.info("Input is an ambiguous 3x3 matrix.\nAssuming coords are row vectors (Nx3).")
|
314
|
+
use_dim = 1
|
315
|
+
elif not any(shape == 3):
|
316
|
+
raise AttributeError("Input must be an Nx3 or 3xN matrix.")
|
317
|
+
else:
|
318
|
+
use_dim = np.where(shape == 3)[0][0]
|
319
|
+
|
320
|
+
# Transpose if necessary
|
321
|
+
if use_dim == 1:
|
322
|
+
coords = coords.transpose()
|
323
|
+
|
324
|
+
# Transformation matrices, different for each software package
|
325
|
+
icbm_other = np.array(
|
326
|
+
[
|
327
|
+
[0.9357, 0.0029, -0.0072, -1.0423],
|
328
|
+
[-0.0065, 0.9396, -0.0726, -1.3940],
|
329
|
+
[0.0103, 0.0752, 0.8967, 3.6475],
|
330
|
+
[0.0000, 0.0000, 0.0000, 1.0000],
|
331
|
+
]
|
332
|
+
)
|
333
|
+
|
334
|
+
# Apply the transformation matrix
|
335
|
+
coords = np.concatenate((coords, np.ones((1, coords.shape[1]))))
|
336
|
+
coords = np.dot(icbm_other, coords)
|
337
|
+
|
338
|
+
# Format the output, transpose if necessary
|
339
|
+
out_coords = coords[:3, :]
|
340
|
+
if use_dim == 1:
|
341
|
+
out_coords = out_coords.transpose()
|
342
|
+
return out_coords
|
343
|
+
|
344
|
+
|
345
|
+
def _dict_to_df(id_df, data, key="labels"):
|
346
|
+
"""Load a given data type in NIMADS-format dictionary into DataFrame.
|
347
|
+
|
348
|
+
Parameters
|
349
|
+
----------
|
350
|
+
id_df : :obj:`pandas.DataFrame`
|
351
|
+
DataFrame with columns for identifiers. Index is [studyid]-[expid].
|
352
|
+
data : :obj:`dict`
|
353
|
+
NIMADS-format dictionary storing the raw dataset, from which
|
354
|
+
relevant data are loaded into DataFrames.
|
355
|
+
key : {'labels', 'metadata', 'text', 'images'}
|
356
|
+
Which data type to load.
|
357
|
+
|
358
|
+
Returns
|
359
|
+
-------
|
360
|
+
df : :obj:`pandas.DataFrame`
|
361
|
+
DataFrame with id columns from id_df and new columns for the
|
362
|
+
requested data type.
|
363
|
+
"""
|
364
|
+
exp_dict = {}
|
365
|
+
for pid in data.keys():
|
366
|
+
for expid in data[pid]["contrasts"].keys():
|
367
|
+
exp = data[pid]["contrasts"][expid]
|
368
|
+
id_ = f"{pid}-{expid}"
|
369
|
+
|
370
|
+
if key not in data[pid]["contrasts"][expid].keys():
|
371
|
+
continue
|
372
|
+
exp_dict[id_] = exp[key]
|
373
|
+
|
374
|
+
temp_df = pd.DataFrame.from_dict(exp_dict, orient="index")
|
375
|
+
df = pd.merge(id_df, temp_df, left_index=True, right_index=True, how="outer")
|
376
|
+
df = df.reset_index(drop=True)
|
377
|
+
df = df.replace(to_replace="None", value=np.nan)
|
378
|
+
# replace nan with none
|
379
|
+
df = df.where(pd.notnull(df), None)
|
380
|
+
return df
|
381
|
+
|
382
|
+
|
383
|
+
def _dict_to_coordinates(data, masker, space):
|
384
|
+
"""Load coordinates in NIMADS-format dictionary into DataFrame."""
|
385
|
+
# Required columns
|
386
|
+
columns = ["id", "study_id", "contrast_id", "x", "y", "z", "space"]
|
387
|
+
core_columns = columns.copy() # Used in contrast for loop
|
388
|
+
|
389
|
+
all_dfs = []
|
390
|
+
for pid in data.keys():
|
391
|
+
for expid in data[pid]["contrasts"].keys():
|
392
|
+
if "coords" not in data[pid]["contrasts"][expid].keys():
|
393
|
+
continue
|
394
|
+
|
395
|
+
exp_columns = core_columns.copy()
|
396
|
+
exp = data[pid]["contrasts"][expid]
|
397
|
+
|
398
|
+
# Required info (ids, x, y, z, space)
|
399
|
+
n_coords = len(exp["coords"]["x"])
|
400
|
+
rep_id = np.array([[f"{pid}-{expid}", pid, expid]] * n_coords).T
|
401
|
+
|
402
|
+
space_arr = exp["coords"].get("space")
|
403
|
+
space_arr = np.array([space_arr] * n_coords)
|
404
|
+
temp_data = np.vstack(
|
405
|
+
(
|
406
|
+
rep_id,
|
407
|
+
np.array(exp["coords"]["x"]),
|
408
|
+
np.array(exp["coords"]["y"]),
|
409
|
+
np.array(exp["coords"]["z"]),
|
410
|
+
space_arr,
|
411
|
+
)
|
412
|
+
)
|
413
|
+
|
414
|
+
# Optional information
|
415
|
+
for k in list(set(exp["coords"].keys()) - set(core_columns)):
|
416
|
+
k_data = exp["coords"][k]
|
417
|
+
if not isinstance(k_data, list):
|
418
|
+
k_data = np.array([k_data] * n_coords)
|
419
|
+
exp_columns.append(k)
|
420
|
+
|
421
|
+
if k not in columns:
|
422
|
+
columns.append(k)
|
423
|
+
temp_data = np.vstack((temp_data, k_data))
|
424
|
+
|
425
|
+
# Place data in list of dataframes to merge
|
426
|
+
con_df = pd.DataFrame(temp_data.T, columns=exp_columns)
|
427
|
+
all_dfs.append(con_df)
|
428
|
+
|
429
|
+
if not all_dfs:
|
430
|
+
return pd.DataFrame(
|
431
|
+
{
|
432
|
+
"id": [],
|
433
|
+
"study_id": [],
|
434
|
+
"contrast_id": [],
|
435
|
+
"x": [],
|
436
|
+
"y": [],
|
437
|
+
"z": [],
|
438
|
+
"space": [],
|
439
|
+
},
|
440
|
+
)
|
441
|
+
|
442
|
+
df = pd.concat(all_dfs, axis=0, join="outer", sort=False)
|
443
|
+
df = df[columns].reset_index(drop=True)
|
444
|
+
df = df.replace(to_replace="None", value=np.nan)
|
445
|
+
# replace nan with none
|
446
|
+
df = df.where(pd.notnull(df), None)
|
447
|
+
df[["x", "y", "z"]] = df[["x", "y", "z"]].astype(float)
|
448
|
+
df = _transform_coordinates_to_space(df, masker, space)
|
449
|
+
return df
|
450
|
+
|
451
|
+
|
452
|
+
def _transform_coordinates_to_space(df, masker, space):
|
453
|
+
"""Convert xyz coordinates in a DataFrame to ijk indices for a given target space.
|
454
|
+
|
455
|
+
Parameters
|
456
|
+
----------
|
457
|
+
df : :obj:`pandas.DataFrame`
|
458
|
+
masker : :class:`~nilearn.input_data.NiftiMasker` or similar
|
459
|
+
Masker object defining the space and location of the area of interest
|
460
|
+
(e.g., 'brain').
|
461
|
+
space : :obj:`str`
|
462
|
+
String describing the stereotactic space and resolution of the masker.
|
463
|
+
|
464
|
+
Returns
|
465
|
+
-------
|
466
|
+
df : :obj:`pandas.DataFrame`
|
467
|
+
DataFrame with IJK columns either added or overwritten.
|
468
|
+
"""
|
469
|
+
# Now to apply transformations!
|
470
|
+
if "mni" in space.lower() or "ale" in space.lower():
|
471
|
+
transform = {"MNI": None, "TAL": tal2mni, "Talairach": tal2mni}
|
472
|
+
elif "tal" in space.lower():
|
473
|
+
transform = {"MNI": mni2tal, "TAL": None, "Talairach": None}
|
474
|
+
else:
|
475
|
+
raise ValueError(f"Unrecognized space: {space}")
|
476
|
+
|
477
|
+
found_spaces = df["space"].unique()
|
478
|
+
for found_space in found_spaces:
|
479
|
+
if found_space not in transform.keys():
|
480
|
+
LGR.warning(
|
481
|
+
f"Not applying transforms to coordinates in unrecognized space '{found_space}'"
|
482
|
+
)
|
483
|
+
alg = transform.get(found_space, None)
|
484
|
+
idx = df["space"] == found_space
|
485
|
+
if alg:
|
486
|
+
df.loc[idx, ["x", "y", "z"]] = alg(df.loc[idx, ["x", "y", "z"]].values)
|
487
|
+
df.loc[idx, "space"] = space
|
488
|
+
|
489
|
+
return df
|
490
|
+
|
491
|
+
|
492
|
+
def _validate_df(df):
|
493
|
+
"""Check that an input is a DataFrame and has a column for 'id'."""
|
494
|
+
assert isinstance(df, pd.DataFrame)
|
495
|
+
assert "id" in df.columns
|
496
|
+
|
497
|
+
|
498
|
+
def _validate_images_df(image_df):
|
499
|
+
"""Check and update image paths in DataFrame.
|
500
|
+
|
501
|
+
Parameters
|
502
|
+
----------
|
503
|
+
image_df : :class:`pandas.DataFrame`
|
504
|
+
DataFrame with one row for each study and one column for each image
|
505
|
+
type. Cells contain paths to image files.
|
506
|
+
|
507
|
+
Returns
|
508
|
+
-------
|
509
|
+
image_df : :class:`pandas.DataFrame`
|
510
|
+
DataFrame with updated paths and columns.
|
511
|
+
"""
|
512
|
+
valid_suffixes = [".brik", ".head", ".nii", ".img", ".hed"]
|
513
|
+
id_columns = set(["id", "study_id", "contrast_id"])
|
514
|
+
# Find columns in the DataFrame with images
|
515
|
+
file_cols = []
|
516
|
+
for col in set(image_df.columns) - id_columns:
|
517
|
+
vals = [v for v in image_df[col].values if isinstance(v, str)]
|
518
|
+
fc = any([any([vs in v for vs in valid_suffixes]) for v in vals])
|
519
|
+
if fc:
|
520
|
+
file_cols.append(col)
|
521
|
+
|
522
|
+
# Clean up DataFrame
|
523
|
+
# Find out which columns have full paths and which have relative paths
|
524
|
+
abs_cols = []
|
525
|
+
for col in file_cols:
|
526
|
+
files = image_df[col].tolist()
|
527
|
+
abspaths = [f == op.abspath(f) for f in files if isinstance(f, str)]
|
528
|
+
if all(abspaths):
|
529
|
+
abs_cols.append(col)
|
530
|
+
elif not any(abspaths):
|
531
|
+
if not col.endswith("__relative"):
|
532
|
+
image_df = image_df.rename(columns={col: col + "__relative"})
|
533
|
+
else:
|
534
|
+
raise ValueError(
|
535
|
+
f"Mix of absolute and relative paths detected for images in column '{col}'"
|
536
|
+
)
|
537
|
+
|
538
|
+
# Set relative paths from absolute ones
|
539
|
+
if len(abs_cols):
|
540
|
+
all_files = list(np.ravel(image_df[abs_cols].values))
|
541
|
+
all_files = [f for f in all_files if isinstance(f, str)]
|
542
|
+
|
543
|
+
if len(all_files) == 1:
|
544
|
+
# In the odd case where there's only one absolute path
|
545
|
+
shared_path = op.dirname(all_files[0]) + op.sep
|
546
|
+
else:
|
547
|
+
shared_path = _find_stem(all_files)
|
548
|
+
|
549
|
+
# Get parent *directory* if shared path includes common prefix.
|
550
|
+
if not shared_path.endswith(op.sep):
|
551
|
+
shared_path = op.dirname(shared_path) + op.sep
|
552
|
+
LGR.info(f"Shared path detected: '{shared_path}'")
|
553
|
+
|
554
|
+
image_df_out = image_df.copy() # To avoid SettingWithCopyWarning
|
555
|
+
for abs_col in abs_cols:
|
556
|
+
image_df_out[abs_col + "__relative"] = image_df[abs_col].apply(
|
557
|
+
lambda x: x.split(shared_path)[1] if isinstance(x, str) else x
|
558
|
+
)
|
559
|
+
|
560
|
+
image_df = image_df_out
|
561
|
+
|
562
|
+
return image_df
|
563
|
+
|
564
|
+
|
565
|
+
def _listify(obj):
|
566
|
+
"""Wrap all non-list or tuple objects in a list.
|
567
|
+
|
568
|
+
This provides a simple way to accept flexible arguments.
|
569
|
+
"""
|
570
|
+
return obj if isinstance(obj, (list, tuple, type(None), np.ndarray)) else [obj]
|
571
|
+
|
572
|
+
|
573
|
+
def _round2(ndarray):
|
574
|
+
"""Round X.5 to the nearest integer away from zero.
|
575
|
+
|
576
|
+
Numpy rounds X.5 values to nearest even integer.
|
577
|
+
"""
|
578
|
+
onedarray = ndarray.flatten()
|
579
|
+
signs = np.sign(onedarray) # pylint: disable=no-member
|
580
|
+
idx = np.where(np.abs(onedarray - np.round(onedarray)) == 0.5)[0]
|
581
|
+
x = np.abs(onedarray)
|
582
|
+
y = np.round(x)
|
583
|
+
y[idx] = np.ceil(x[idx])
|
584
|
+
y *= signs
|
585
|
+
rounded = y.reshape(ndarray.shape)
|
586
|
+
return rounded.astype(int)
|
587
|
+
|
588
|
+
|
589
|
+
def _try_prepend(value, prefix):
|
590
|
+
"""Try to prepend a value to a string with a separator ('/').
|
591
|
+
|
592
|
+
If not a string, will just return the original value.
|
593
|
+
"""
|
594
|
+
if isinstance(value, str):
|
595
|
+
return op.join(prefix, value)
|
596
|
+
else:
|
597
|
+
return value
|
598
|
+
|
599
|
+
|
600
|
+
def _find_stem(arr):
|
601
|
+
"""Find longest common substring in array of strings.
|
602
|
+
|
603
|
+
From https://www.geeksforgeeks.org/longest-common-substring-array-strings/
|
604
|
+
"""
|
605
|
+
# Determine size of the array
|
606
|
+
n_items_in_array = len(arr)
|
607
|
+
|
608
|
+
# Take first word from array as reference
|
609
|
+
reference_string = arr[0]
|
610
|
+
n_chars_in_first_item = len(reference_string)
|
611
|
+
|
612
|
+
res = ""
|
613
|
+
for i_char in range(n_chars_in_first_item):
|
614
|
+
# Generate all starting substrings of our reference string
|
615
|
+
stem = reference_string[:i_char]
|
616
|
+
|
617
|
+
j_item = 1 # Retained in case of an array with only one item
|
618
|
+
for j_item in range(1, n_items_in_array):
|
619
|
+
# Check if the generated stem is common to to all words
|
620
|
+
if not arr[j_item].startswith(stem):
|
621
|
+
break
|
622
|
+
|
623
|
+
# If current substring is present in all strings and its length is
|
624
|
+
# greater than current result
|
625
|
+
if (j_item + 1 == n_items_in_array) and (len(res) < len(stem)):
|
626
|
+
res = stem
|
627
|
+
|
628
|
+
return res
|
629
|
+
|
630
|
+
|
631
|
+
def _uk_to_us(text):
|
632
|
+
"""Convert UK spellings to US based on a converter.
|
633
|
+
|
634
|
+
.. versionadded:: 0.0.2
|
635
|
+
|
636
|
+
Parameters
|
637
|
+
----------
|
638
|
+
text : :obj:`str`
|
639
|
+
|
640
|
+
Returns
|
641
|
+
-------
|
642
|
+
text : :obj:`str`
|
643
|
+
|
644
|
+
Notes
|
645
|
+
-----
|
646
|
+
The english_spellings.csv file is from http://www.tysto.com/uk-us-spelling-list.html.
|
647
|
+
"""
|
648
|
+
SPELL_DF = pd.read_csv(op.join(get_resource_path(), "english_spellings.csv"), index_col="UK")
|
649
|
+
SPELL_DICT = SPELL_DF["US"].to_dict()
|
650
|
+
|
651
|
+
if isinstance(text, str):
|
652
|
+
# Convert British to American English
|
653
|
+
pattern = re.compile(r"\b(" + "|".join(SPELL_DICT.keys()) + r")\b")
|
654
|
+
text = pattern.sub(lambda x: SPELL_DICT[x.group()], text)
|
655
|
+
return text
|
656
|
+
|
657
|
+
|
658
|
+
def use_memmap(logger, n_files=1):
|
659
|
+
"""Memory-map array to a file, and perform cleanup after.
|
660
|
+
|
661
|
+
.. versionadded:: 0.0.8
|
662
|
+
|
663
|
+
Parameters
|
664
|
+
----------
|
665
|
+
logger : :obj:`logging.Logger`
|
666
|
+
A Logger with which to log information about the function.
|
667
|
+
n_files : :obj:`int`, optional
|
668
|
+
Number of memory-mapped files to create and manage.
|
669
|
+
|
670
|
+
Notes
|
671
|
+
-----
|
672
|
+
This function is used as a decorator to methods in which memory-mapped arrays may be used.
|
673
|
+
It will only be triggered if the class to which the method belongs has a ``memory_limit``
|
674
|
+
attribute that is set to something other than ``None``.
|
675
|
+
|
676
|
+
It will set an attribute within the method's class named ``memmap_filenames``, which is a list
|
677
|
+
of filename strings, with ``n_files`` elements.
|
678
|
+
If ``memory_limit`` is None, then it will be a list of ``Nones``.
|
679
|
+
|
680
|
+
Files generated by this function will be stored in the NiMARE data directory and will be
|
681
|
+
removed after the wrapped method finishes.
|
682
|
+
"""
|
683
|
+
|
684
|
+
def inner_function(function):
|
685
|
+
@wraps(function)
|
686
|
+
def memmap_context(self, *args, **kwargs):
|
687
|
+
if hasattr(self, "memory_limit") and self.memory_limit:
|
688
|
+
self.memmap_filenames, filenames = [], []
|
689
|
+
for i_file in range(n_files):
|
690
|
+
start_time = datetime.datetime.now().strftime("%Y%m%dT%H%M%S")
|
691
|
+
_, filename = mkstemp(prefix=self.__class__.__name__, suffix=start_time)
|
692
|
+
logger.debug(f"Temporary file written to {filename}")
|
693
|
+
self.memmap_filenames.append(filename)
|
694
|
+
filenames.append(filename)
|
695
|
+
else:
|
696
|
+
filenames = self.memmap_filenames = [None] * n_files
|
697
|
+
|
698
|
+
try:
|
699
|
+
return function(self, *args, **kwargs)
|
700
|
+
except:
|
701
|
+
for filename in filenames:
|
702
|
+
logger.error(f"{function.__name__} failed, removing {filename}")
|
703
|
+
raise
|
704
|
+
finally:
|
705
|
+
if hasattr(self, "memory_limit") and self.memory_limit:
|
706
|
+
for filename in filenames:
|
707
|
+
if os.path.isfile(filename):
|
708
|
+
logger.debug(f"Removing temporary file: {filename}")
|
709
|
+
os.remove(filename)
|
710
|
+
else:
|
711
|
+
logger.debug(f"Temporary file DNE: {filename}")
|
712
|
+
|
713
|
+
return memmap_context
|
714
|
+
|
715
|
+
return inner_function
|
716
|
+
|
717
|
+
|
718
|
+
BYTE = 2
|
719
|
+
KILOBYTE = BYTE**10
|
720
|
+
BYTE_CONVERSION = {
|
721
|
+
"kb": KILOBYTE,
|
722
|
+
"mb": KILOBYTE**2,
|
723
|
+
"gb": KILOBYTE**3,
|
724
|
+
"tb": KILOBYTE**4,
|
725
|
+
}
|
726
|
+
|
727
|
+
|
728
|
+
def _determine_chunk_size(limit, arr, multiplier=1):
|
729
|
+
"""Determine how many arrays can be read into memory at once.
|
730
|
+
|
731
|
+
Parameters
|
732
|
+
----------
|
733
|
+
limit : :obj:`str`
|
734
|
+
String representation of memory limit, can use:
|
735
|
+
kb, mb, gb, and tb as suffix (e.g., "4gb").
|
736
|
+
arr : :obj:`numpy.array`
|
737
|
+
Representative numpy array.
|
738
|
+
multiplier : :obj:`int`
|
739
|
+
Adjustment for processes that have more or
|
740
|
+
less overhead than expected.
|
741
|
+
"""
|
742
|
+
limit = limit.lower()
|
743
|
+
size, representation = re.search(r"([0-9]+)([a-z]+)", limit).groups()
|
744
|
+
|
745
|
+
limit_bytes = float(size) * BYTE_CONVERSION[representation] * multiplier
|
746
|
+
|
747
|
+
arr_bytes = arr.size * arr.itemsize
|
748
|
+
|
749
|
+
chunk_size = int(limit_bytes // arr_bytes)
|
750
|
+
|
751
|
+
if chunk_size == 0:
|
752
|
+
arr_size = arr_bytes // BYTE_CONVERSION["mb"]
|
753
|
+
raise RuntimeError(f"memory limit: {limit} too small for array with size {arr_size}mb")
|
754
|
+
|
755
|
+
return chunk_size
|
756
|
+
|
757
|
+
|
758
|
+
def _safe_transform(imgs, masker, memory_limit="1gb", dtype="auto", memfile=None):
|
759
|
+
"""Apply a masker with limited memory usage.
|
760
|
+
|
761
|
+
Parameters
|
762
|
+
----------
|
763
|
+
imgs : list of niimgs
|
764
|
+
List of images upon which to apply the masker.
|
765
|
+
masker : nilearn masker
|
766
|
+
Masker object to apply to images.
|
767
|
+
memory_limit : :obj:`str`, optional
|
768
|
+
String representation of memory limit, can use:
|
769
|
+
kb, mb, gb, and tb as suffix (e.g., "4gb").
|
770
|
+
dtype : :obj:`str`, optional
|
771
|
+
Target datatype of masked array.
|
772
|
+
Default is "auto", which uses the datatype of the niimgs.
|
773
|
+
memfile : :obj:`str` or None, optional
|
774
|
+
Name of a memory-mapped file. If None, memory-mapping will not be used.
|
775
|
+
|
776
|
+
Returns
|
777
|
+
-------
|
778
|
+
masked_data : :obj:`numpy.ndarray` or :obj:`numpy.memmap`
|
779
|
+
Masked data in a 2D array.
|
780
|
+
Either an ndarray (if memfile is None) or a memmap array (if memfile is a string).
|
781
|
+
"""
|
782
|
+
assert isinstance(memfile, (type(None), str))
|
783
|
+
|
784
|
+
first_img_data = masker.transform(imgs[0])
|
785
|
+
masked_shape = (len(imgs), first_img_data.size)
|
786
|
+
if memfile:
|
787
|
+
masked_data = np.memmap(
|
788
|
+
memfile,
|
789
|
+
dtype=first_img_data.dtype if dtype == "auto" else dtype,
|
790
|
+
mode="w+",
|
791
|
+
shape=masked_shape,
|
792
|
+
)
|
793
|
+
else:
|
794
|
+
masked_data = np.empty(
|
795
|
+
masked_shape,
|
796
|
+
dtype=first_img_data.dtype if dtype == "auto" else dtype,
|
797
|
+
)
|
798
|
+
|
799
|
+
# perform transform on chunks of the input maps
|
800
|
+
chunk_size = _determine_chunk_size(memory_limit, first_img_data)
|
801
|
+
map_chunks = [imgs[i : i + chunk_size] for i in range(0, len(imgs), chunk_size)]
|
802
|
+
idx = 0
|
803
|
+
for map_chunk in map_chunks:
|
804
|
+
end_idx = idx + len(map_chunk)
|
805
|
+
map_chunk_data = masker.transform(map_chunk)
|
806
|
+
masked_data[idx:end_idx, :] = map_chunk_data
|
807
|
+
idx = end_idx
|
808
|
+
|
809
|
+
return masked_data
|
810
|
+
|
811
|
+
|
812
|
+
def _add_metadata_to_dataframe(
|
813
|
+
dataset,
|
814
|
+
dataframe,
|
815
|
+
metadata_field,
|
816
|
+
target_column,
|
817
|
+
filter_func=np.mean,
|
818
|
+
):
|
819
|
+
"""Add metadata from a Dataset to a DataFrame.
|
820
|
+
|
821
|
+
.. versionadded:: 0.0.8
|
822
|
+
|
823
|
+
This is particularly useful for kernel transformers or estimators where a given metadata field
|
824
|
+
is necessary (e.g., ALEKernel with "sample_size"), but we want to just use the coordinates
|
825
|
+
DataFrame instead of passing the full Dataset.
|
826
|
+
|
827
|
+
Parameters
|
828
|
+
----------
|
829
|
+
dataset : :obj:`~nimare.dataset.Dataset`
|
830
|
+
Dataset containing study IDs and metadata to feed into dataframe.
|
831
|
+
dataframe : :obj:`pandas.DataFrame`
|
832
|
+
DataFrame containing study IDs, into which Dataset metadata will be merged.
|
833
|
+
metadata_field : :obj:`str`
|
834
|
+
Metadata field in ``dataset``.
|
835
|
+
target_column : :obj:`str`
|
836
|
+
Name of the column that will be added to ``dataframe``, containing information from the
|
837
|
+
Dataset.
|
838
|
+
filter_func : :obj:`function`, optional
|
839
|
+
Function to apply to the metadata so that it fits as a column in a DataFrame.
|
840
|
+
Default is ``numpy.mean``.
|
841
|
+
|
842
|
+
Returns
|
843
|
+
-------
|
844
|
+
dataframe : :obj:`pandas.DataFrame`
|
845
|
+
Updated DataFrame with ``target_column`` added.
|
846
|
+
"""
|
847
|
+
dataframe = dataframe.copy()
|
848
|
+
|
849
|
+
if metadata_field in dataset.get_metadata():
|
850
|
+
# Collect metadata from Dataset
|
851
|
+
metadata = dataset.get_metadata(field=metadata_field, ids=dataset.ids)
|
852
|
+
metadata = [[m] for m in metadata]
|
853
|
+
# Create a DataFrame with the metadata
|
854
|
+
metadata = pd.DataFrame(
|
855
|
+
index=dataset.ids,
|
856
|
+
data=metadata,
|
857
|
+
columns=[metadata_field],
|
858
|
+
)
|
859
|
+
# Reduce the metadata (if in list/array format) to single values
|
860
|
+
metadata[target_column] = metadata[metadata_field].apply(
|
861
|
+
lambda x: None if x is None else filter_func(x)
|
862
|
+
)
|
863
|
+
# Merge metadata df into coordinates df
|
864
|
+
dataframe = dataframe.merge(
|
865
|
+
right=metadata,
|
866
|
+
left_on="id",
|
867
|
+
right_index=True,
|
868
|
+
sort=False,
|
869
|
+
validate="many_to_one",
|
870
|
+
suffixes=(False, False),
|
871
|
+
how="left",
|
872
|
+
)
|
873
|
+
else:
|
874
|
+
LGR.warning(
|
875
|
+
f"Metadata field '{metadata_field}' not found. "
|
876
|
+
"Set a constant value for this field as an argument, if possible."
|
877
|
+
)
|
878
|
+
|
879
|
+
return dataframe
|
880
|
+
|
881
|
+
|
882
|
+
def _check_type(obj, clss, **kwargs):
|
883
|
+
"""Check variable type and initialize if necessary.
|
884
|
+
|
885
|
+
.. versionadded:: 0.0.8
|
886
|
+
|
887
|
+
Parameters
|
888
|
+
----------
|
889
|
+
obj
|
890
|
+
Object to check and initialized if necessary.
|
891
|
+
clss
|
892
|
+
Target class of the object.
|
893
|
+
kwargs
|
894
|
+
Dictionary of keyword arguments that can be used when initializing the object.
|
895
|
+
|
896
|
+
Returns
|
897
|
+
-------
|
898
|
+
obj
|
899
|
+
Initialized version of the object.
|
900
|
+
"""
|
901
|
+
# Allow both instances and classes for the input.
|
902
|
+
if not issubclass(type(obj), clss) and not issubclass(obj, clss):
|
903
|
+
raise ValueError(f"Argument {type(obj)} must be a kind of {clss}")
|
904
|
+
elif not inspect.isclass(obj) and kwargs:
|
905
|
+
LGR.warning(
|
906
|
+
f"Argument {type(obj)} has already been initialized, so arguments "
|
907
|
+
f"will be ignored: {', '.join(kwargs.keys())}"
|
908
|
+
)
|
909
|
+
elif inspect.isclass(obj):
|
910
|
+
obj = obj(**kwargs)
|
911
|
+
return obj
|
912
|
+
|
913
|
+
|
914
|
+
def _boolean_unmask(data_array, bool_array):
|
915
|
+
"""Unmask data based on a boolean array, with NaNs in empty voxels.
|
916
|
+
|
917
|
+
Parameters
|
918
|
+
----------
|
919
|
+
data_array : 1D or 2D :obj:`numpy.ndarray`
|
920
|
+
Masked data array.
|
921
|
+
bool_array : 1D :obj:`numpy.ndarray`
|
922
|
+
Boolean mask array. Must have the same number of ``True`` entries as elements in the
|
923
|
+
second dimension of ``data_array``.
|
924
|
+
|
925
|
+
Returns
|
926
|
+
-------
|
927
|
+
unmasked_data : 1D or 2D :obj:`numpy.ndarray`
|
928
|
+
Unmasked data array.
|
929
|
+
If 1D, first dimension is the same size as the first (and only) dimension of
|
930
|
+
``boolean_array``.
|
931
|
+
If 2D, first dimension is the same size as the first dimension of ``data_array``, while
|
932
|
+
second dimension is the same size as the first (and only) dimension of ``boolean_array``.
|
933
|
+
All elements corresponding to ``False`` values in ``boolean_array`` will have NaNs.
|
934
|
+
"""
|
935
|
+
assert data_array.ndim in (1, 2)
|
936
|
+
assert bool_array.ndim == 1
|
937
|
+
assert bool_array.sum() == data_array.shape[-1]
|
938
|
+
|
939
|
+
unmasked_data = np.full(
|
940
|
+
shape=bool_array.shape + data_array.T.shape[1:],
|
941
|
+
fill_value=np.nan,
|
942
|
+
dtype=data_array.dtype,
|
943
|
+
)
|
944
|
+
unmasked_data[bool_array] = data_array
|
945
|
+
unmasked_data = unmasked_data.T
|
946
|
+
return unmasked_data
|
947
|
+
|
948
|
+
|
949
|
+
def unique_rows(ar, return_counts=False):
|
950
|
+
"""Remove repeated rows from a 2D array.
|
951
|
+
|
952
|
+
In particular, if given an array of coordinates of shape
|
953
|
+
(Npoints, Ndim), it will remove repeated points.
|
954
|
+
|
955
|
+
Parameters
|
956
|
+
----------
|
957
|
+
ar : 2-D ndarray
|
958
|
+
The input array.
|
959
|
+
return_counts : :obj:`bool`, optional
|
960
|
+
If True, also return the number of times each unique item appears in ar.
|
961
|
+
|
962
|
+
Returns
|
963
|
+
-------
|
964
|
+
ar_out : 2-D ndarray
|
965
|
+
A copy of the input array with repeated rows removed.
|
966
|
+
unique_counts : :obj:`np.ndarray`, optional
|
967
|
+
The number of times each of the unique values comes up in the original array.
|
968
|
+
Only provided if return_counts is True.
|
969
|
+
|
970
|
+
Raises
|
971
|
+
------
|
972
|
+
ValueError : if `ar` is not two-dimensional.
|
973
|
+
|
974
|
+
Notes
|
975
|
+
-----
|
976
|
+
The function will generate a copy of `ar` if it is not
|
977
|
+
C-contiguous, which will negatively affect performance for large
|
978
|
+
input arrays.
|
979
|
+
|
980
|
+
This is taken from skimage. See :func:`skimage.util.unique_rows`.
|
981
|
+
|
982
|
+
Examples
|
983
|
+
--------
|
984
|
+
>>> ar = np.array([[1, 0, 1],
|
985
|
+
... [0, 1, 0],
|
986
|
+
... [1, 0, 1]], np.uint8)
|
987
|
+
>>> unique_rows(ar)
|
988
|
+
array([[0, 1, 0],
|
989
|
+
[1, 0, 1]], dtype=uint8)
|
990
|
+
|
991
|
+
License
|
992
|
+
-------
|
993
|
+
Copyright (C) 2019, the scikit-image team
|
994
|
+
All rights reserved.
|
995
|
+
"""
|
996
|
+
if ar.ndim != 2:
|
997
|
+
raise ValueError("unique_rows() only makes sense for 2D arrays, " "got %dd" % ar.ndim)
|
998
|
+
# the view in the next line only works if the array is C-contiguous
|
999
|
+
ar = np.ascontiguousarray(ar)
|
1000
|
+
# np.unique() finds identical items in a raveled array. To make it
|
1001
|
+
# see each row as a single item, we create a view of each row as a
|
1002
|
+
# byte string of length itemsize times number of columns in `ar`
|
1003
|
+
ar_row_view = ar.view("|S%d" % (ar.itemsize * ar.shape[1]))
|
1004
|
+
if return_counts:
|
1005
|
+
_, unique_row_indices, counts = np.unique(
|
1006
|
+
ar_row_view, return_index=True, return_counts=True
|
1007
|
+
)
|
1008
|
+
|
1009
|
+
return ar[unique_row_indices], counts
|
1010
|
+
else:
|
1011
|
+
_, unique_row_indices = np.unique(ar_row_view, return_index=True)
|
1012
|
+
|
1013
|
+
return ar[unique_row_indices]
|
1014
|
+
|
1015
|
+
|
1016
|
+
def find_braces(string):
|
1017
|
+
"""Search a string for matched braces.
|
1018
|
+
|
1019
|
+
This is used to identify pairs of braces in BibTeX files.
|
1020
|
+
The outside-most pairs should correspond to BibTeX entries.
|
1021
|
+
|
1022
|
+
Parameters
|
1023
|
+
----------
|
1024
|
+
string : :obj:`str`
|
1025
|
+
A long string to search for paired braces.
|
1026
|
+
|
1027
|
+
Returns
|
1028
|
+
-------
|
1029
|
+
:obj:`list` of :obj:`tuple` of :obj:`int`
|
1030
|
+
A list of two-element tuples of indices of matched braces.
|
1031
|
+
"""
|
1032
|
+
toret = {}
|
1033
|
+
pstack = []
|
1034
|
+
|
1035
|
+
for idx, char in enumerate(string):
|
1036
|
+
if char == "{":
|
1037
|
+
pstack.append(idx)
|
1038
|
+
elif char == "}":
|
1039
|
+
if len(pstack) == 0:
|
1040
|
+
raise IndexError(f"No matching closing parens at: {idx}")
|
1041
|
+
|
1042
|
+
toret[pstack.pop()] = idx
|
1043
|
+
|
1044
|
+
if len(pstack) > 0:
|
1045
|
+
raise IndexError(f"No matching opening parens at: {pstack.pop()}")
|
1046
|
+
|
1047
|
+
toret = list(toret.items())
|
1048
|
+
return toret
|
1049
|
+
|
1050
|
+
|
1051
|
+
def reduce_idx(idx_list):
|
1052
|
+
"""Identify outermost brace indices in list of indices.
|
1053
|
+
|
1054
|
+
The purpose here is to find the brace pairs that correspond to BibTeX entries,
|
1055
|
+
while discarding brace pairs that appear within the entries
|
1056
|
+
(e.g., braces around article titles).
|
1057
|
+
|
1058
|
+
Parameters
|
1059
|
+
----------
|
1060
|
+
idx_list : :obj:`list` of :obj:`tuple` of :obj:`int`
|
1061
|
+
A list of two-element tuples of indices of matched braces.
|
1062
|
+
|
1063
|
+
Returns
|
1064
|
+
-------
|
1065
|
+
reduced_idx_list : :obj:`list` of :obj:`tuple` of :obj:`int`
|
1066
|
+
A list of two-element tuples of indices of matched braces corresponding to BibTeX entries.
|
1067
|
+
"""
|
1068
|
+
idx_list2 = [idx_item[0] for idx_item in idx_list]
|
1069
|
+
idx = np.argsort(idx_list2)
|
1070
|
+
idx_list = [idx_list[i] for i in idx]
|
1071
|
+
|
1072
|
+
df = pd.DataFrame(data=idx_list, columns=["start", "end"])
|
1073
|
+
|
1074
|
+
good_idx = []
|
1075
|
+
df["within"] = False
|
1076
|
+
for i, row in df.iterrows():
|
1077
|
+
df["within"] = df["within"] | ((df["start"] > row["start"]) & (df["end"] < row["end"]))
|
1078
|
+
if not df.iloc[i]["within"]:
|
1079
|
+
good_idx.append(i)
|
1080
|
+
|
1081
|
+
idx_list = [idx_list[i] for i in good_idx]
|
1082
|
+
return idx_list
|
1083
|
+
|
1084
|
+
|
1085
|
+
def index_bibtex_identifiers(string, idx_list):
|
1086
|
+
"""Identify the BibTeX entry identifier before each entry.
|
1087
|
+
|
1088
|
+
The purpose of this function is to take the raw BibTeX string and a list of indices of entries,
|
1089
|
+
starting and ending with the braces of each entry, and then extract the identifier before each.
|
1090
|
+
|
1091
|
+
Parameters
|
1092
|
+
----------
|
1093
|
+
string : :obj:`str`
|
1094
|
+
The full BibTeX file, as a string.
|
1095
|
+
idx_list : :obj:`list` of :obj:`tuple` of :obj:`int`
|
1096
|
+
A list of two-element tuples of indices of matched braces corresponding to BibTeX entries.
|
1097
|
+
|
1098
|
+
Returns
|
1099
|
+
-------
|
1100
|
+
idx_list : :obj:`list` of :obj:`tuple` of :obj:`int`
|
1101
|
+
A list of two-element tuples of indices of BibTeX entries,
|
1102
|
+
from the starting @ to the final }.
|
1103
|
+
"""
|
1104
|
+
at_idx = [(a.start(), a.end() - 1) for a in re.finditer("@[a-zA-Z0-9]+{", string)]
|
1105
|
+
df = pd.DataFrame(at_idx, columns=["real_start", "false_start"])
|
1106
|
+
df2 = pd.DataFrame(idx_list, columns=["false_start", "end"])
|
1107
|
+
df = pd.merge(left=df, right=df2, left_on="false_start", right_on="false_start")
|
1108
|
+
new_idx_list = list(zip(df.real_start, df.end))
|
1109
|
+
return new_idx_list
|
1110
|
+
|
1111
|
+
|
1112
|
+
def find_citations(description):
|
1113
|
+
r"""Find citations in a text description.
|
1114
|
+
|
1115
|
+
It looks for cases of \\citep{} and \\cite{} in a string.
|
1116
|
+
|
1117
|
+
Parameters
|
1118
|
+
----------
|
1119
|
+
description_ : :obj:`str`
|
1120
|
+
Description of a method, optionally with citations.
|
1121
|
+
|
1122
|
+
Returns
|
1123
|
+
-------
|
1124
|
+
all_citations : :obj:`list` of :obj:`str`
|
1125
|
+
A list of all identifiers for citations.
|
1126
|
+
"""
|
1127
|
+
paren_citations = re.findall(r"\\citep{([a-zA-Z0-9,/\.]+)}", description)
|
1128
|
+
intext_citations = re.findall(r"\\cite{([a-zA-Z0-9,/\.]+)}", description)
|
1129
|
+
inparen_citations = re.findall(r"\\citealt{([a-zA-Z0-9,/\.]+)}", description)
|
1130
|
+
all_citations = ",".join(paren_citations + intext_citations + inparen_citations)
|
1131
|
+
all_citations = all_citations.split(",")
|
1132
|
+
all_citations = sorted(list(set(all_citations)))
|
1133
|
+
return all_citations
|
1134
|
+
|
1135
|
+
|
1136
|
+
def reduce_references(citations, reference_list):
|
1137
|
+
"""Reduce the list of references to only include ones associated with requested citations.
|
1138
|
+
|
1139
|
+
Parameters
|
1140
|
+
----------
|
1141
|
+
citations : :obj:`list` of :obj:`str`
|
1142
|
+
A list of all identifiers for citations.
|
1143
|
+
reference_list : :obj:`list` of :obj:`str`
|
1144
|
+
List of all available BibTeX entries.
|
1145
|
+
|
1146
|
+
Returns
|
1147
|
+
-------
|
1148
|
+
reduced_reference_list : :obj:`list` of :obj:`str`
|
1149
|
+
List of BibTeX entries for citations only.
|
1150
|
+
"""
|
1151
|
+
reduced_reference_list = []
|
1152
|
+
for citation in citations:
|
1153
|
+
citation_found = False
|
1154
|
+
for reference in reference_list:
|
1155
|
+
check_string = "@[a-zA-Z]+{" + citation + ","
|
1156
|
+
if re.match(check_string, reference):
|
1157
|
+
reduced_reference_list.append(reference)
|
1158
|
+
citation_found = True
|
1159
|
+
continue
|
1160
|
+
|
1161
|
+
if not citation_found:
|
1162
|
+
LGR.warning(f"Citation {citation} not found.")
|
1163
|
+
|
1164
|
+
return reduced_reference_list
|
1165
|
+
|
1166
|
+
|
1167
|
+
def get_description_references(description):
|
1168
|
+
"""Find BibTeX references for citations in a methods description.
|
1169
|
+
|
1170
|
+
Parameters
|
1171
|
+
----------
|
1172
|
+
description_ : :obj:`str`
|
1173
|
+
Description of a method, optionally with citations.
|
1174
|
+
|
1175
|
+
Returns
|
1176
|
+
-------
|
1177
|
+
bibtex_string : :obj:`str`
|
1178
|
+
A string containing BibTeX entries, limited only to the citations in the description.
|
1179
|
+
"""
|
1180
|
+
bibtex_file = op.join(get_resource_path(), "references.bib")
|
1181
|
+
with open(bibtex_file, "r") as fo:
|
1182
|
+
bibtex_string = fo.read()
|
1183
|
+
|
1184
|
+
braces_idx = find_braces(bibtex_string)
|
1185
|
+
red_braces_idx = reduce_idx(braces_idx)
|
1186
|
+
bibtex_idx = index_bibtex_identifiers(bibtex_string, red_braces_idx)
|
1187
|
+
citations = find_citations(description)
|
1188
|
+
reference_list = [bibtex_string[start : end + 1] for start, end in bibtex_idx]
|
1189
|
+
reduced_reference_list = reduce_references(citations, reference_list)
|
1190
|
+
|
1191
|
+
bibtex_string = "\n".join(reduced_reference_list)
|
1192
|
+
return bibtex_string
|
1193
|
+
|
1194
|
+
|
1195
|
+
def _create_name(resource):
|
1196
|
+
"""Take study/analysis object and try to create dataframe friendly/readable name."""
|
1197
|
+
return "_".join(resource.name.split()) if resource.name else resource.id
|
1198
|
+
|
1199
|
+
|
1200
|
+
def load_nimads(studyset, annotation=None):
|
1201
|
+
"""Load a studyset object from a dictionary, json file, or studyset object."""
|
1202
|
+
from nimare.nimads import Studyset
|
1203
|
+
|
1204
|
+
if isinstance(studyset, dict):
|
1205
|
+
studyset = Studyset(studyset)
|
1206
|
+
elif isinstance(studyset, str):
|
1207
|
+
with open(studyset, "r") as f:
|
1208
|
+
studyset = Studyset(json.load(f))
|
1209
|
+
elif isinstance(studyset, Studyset):
|
1210
|
+
pass
|
1211
|
+
else:
|
1212
|
+
raise ValueError(
|
1213
|
+
"studyset must be: a dictionary, a path to a json file, or studyset object"
|
1214
|
+
)
|
1215
|
+
|
1216
|
+
if annotation:
|
1217
|
+
studyset.annotations = annotation
|
1218
|
+
return studyset
|
1219
|
+
|
1220
|
+
|
1221
|
+
def coef_spline_bases(axis_coords, spacing, margin):
|
1222
|
+
"""
|
1223
|
+
Coefficient of cubic B-spline bases in any x/y/z direction.
|
1224
|
+
|
1225
|
+
Parameters
|
1226
|
+
----------
|
1227
|
+
axis_coords : value range in x/y/z direction
|
1228
|
+
spacing: (equally spaced) knots spacing in x/y/z direction,
|
1229
|
+
margin: extend the region where B-splines are constructed (min-margin, max_margin)
|
1230
|
+
to avoid weakly-supported B-spline on the edge
|
1231
|
+
Returns
|
1232
|
+
-------
|
1233
|
+
coef_spline : 2-D ndarray (n_points x n_spline_bases)
|
1234
|
+
"""
|
1235
|
+
import patsy
|
1236
|
+
|
1237
|
+
# create B-spline basis for x/y/z coordinate
|
1238
|
+
wider_axis_coords = np.arange(np.min(axis_coords) - margin, np.max(axis_coords) + margin)
|
1239
|
+
knots = np.arange( # noqa: F841
|
1240
|
+
np.min(axis_coords) - margin, np.max(axis_coords) + margin, step=spacing
|
1241
|
+
)
|
1242
|
+
design_matrix = patsy.dmatrix(
|
1243
|
+
"bs(x, knots=knots, degree=3,include_intercept=False)",
|
1244
|
+
data={"x": wider_axis_coords},
|
1245
|
+
return_type="matrix",
|
1246
|
+
)
|
1247
|
+
design_array = np.array(design_matrix)[:, 1:] # remove the first column (every element is 1)
|
1248
|
+
coef_spline = design_array[margin : -margin + 1, :]
|
1249
|
+
# remove the basis with no/weakly support from the square
|
1250
|
+
supported_basis = np.sum(coef_spline, axis=0) != 0
|
1251
|
+
coef_spline = coef_spline[:, supported_basis]
|
1252
|
+
|
1253
|
+
return coef_spline
|
1254
|
+
|
1255
|
+
|
1256
|
+
def b_spline_bases(masker_voxels, spacing, margin=10):
|
1257
|
+
"""Cubic B-spline bases for spatial intensity.
|
1258
|
+
|
1259
|
+
The whole coefficient matrix is constructed by taking tensor product of
|
1260
|
+
all B-spline bases coefficient matrix in three direction.
|
1261
|
+
|
1262
|
+
Parameters
|
1263
|
+
----------
|
1264
|
+
masker_voxels : :obj:`numpy.ndarray`
|
1265
|
+
matrix with element either 0 or 1, indicating if it's within brain mask,
|
1266
|
+
spacing : :obj:`int`
|
1267
|
+
(equally spaced) knots spacing in x/y/z direction,
|
1268
|
+
margin : :obj:`int`
|
1269
|
+
extend the region where B-splines are constructed (min-margin, max_margin)
|
1270
|
+
to avoid weakly-supported B-spline on the edge
|
1271
|
+
Returns
|
1272
|
+
-------
|
1273
|
+
X : :obj:`numpy.ndarray`
|
1274
|
+
2-D ndarray (n_voxel x n_spline_bases) only keeps with within-brain voxels
|
1275
|
+
"""
|
1276
|
+
# dim_mask = masker_voxels.shape
|
1277
|
+
# n_brain_voxel = np.sum(masker_voxels)
|
1278
|
+
# remove the blank space around the brain mask
|
1279
|
+
xx = np.where(np.apply_over_axes(np.sum, masker_voxels, [1, 2]) > 0)[0]
|
1280
|
+
yy = np.where(np.apply_over_axes(np.sum, masker_voxels, [0, 2]) > 0)[1]
|
1281
|
+
zz = np.where(np.apply_over_axes(np.sum, masker_voxels, [0, 1]) > 0)[2]
|
1282
|
+
|
1283
|
+
x_spline = coef_spline_bases(xx, spacing, margin)
|
1284
|
+
y_spline = coef_spline_bases(yy, spacing, margin)
|
1285
|
+
z_spline = coef_spline_bases(zz, spacing, margin)
|
1286
|
+
x_spline_coords = x_spline.nonzero()
|
1287
|
+
y_spline_coords = y_spline.nonzero()
|
1288
|
+
z_spline_coords = z_spline.nonzero()
|
1289
|
+
x_spline_sparse = sparse.COO(x_spline_coords, x_spline[x_spline_coords])
|
1290
|
+
y_spline_sparse = sparse.COO(y_spline_coords, y_spline[y_spline_coords])
|
1291
|
+
z_spline_sparse = sparse.COO(z_spline_coords, z_spline[z_spline_coords])
|
1292
|
+
|
1293
|
+
# create spatial design matrix by tensor product of spline bases in 3 dimesion
|
1294
|
+
# Row sums of X are all 1=> There is no need to re-normalise X
|
1295
|
+
X = np.kron(np.kron(x_spline_sparse, y_spline_sparse), z_spline_sparse)
|
1296
|
+
# remove the voxels outside brain mask
|
1297
|
+
axis_dim = [xx.shape[0], yy.shape[0], zz.shape[0]]
|
1298
|
+
brain_voxels_index = [
|
1299
|
+
(z - np.min(zz))
|
1300
|
+
+ axis_dim[2] * (y - np.min(yy))
|
1301
|
+
+ axis_dim[1] * axis_dim[2] * (x - np.min(xx))
|
1302
|
+
for x in xx
|
1303
|
+
for y in yy
|
1304
|
+
for z in zz
|
1305
|
+
if masker_voxels[x, y, z] == 1
|
1306
|
+
]
|
1307
|
+
X = X[brain_voxels_index, :].todense()
|
1308
|
+
# remove tensor product basis that have no support in the brain
|
1309
|
+
x_df, y_df, z_df = x_spline.shape[1], y_spline.shape[1], z_spline.shape[1]
|
1310
|
+
support_basis = []
|
1311
|
+
# find and remove weakly supported B-spline bases
|
1312
|
+
for bx in range(x_df):
|
1313
|
+
for by in range(y_df):
|
1314
|
+
for bz in range(z_df):
|
1315
|
+
basis_index = bz + z_df * by + z_df * y_df * bx
|
1316
|
+
basis_coef = X[:, basis_index]
|
1317
|
+
if np.max(basis_coef) >= 0.1:
|
1318
|
+
support_basis.append(basis_index)
|
1319
|
+
X = X[:, support_basis]
|
1320
|
+
|
1321
|
+
return X
|
1322
|
+
|
1323
|
+
|
1324
|
+
def dummy_encoding_moderators(dataset_annotations, moderators):
|
1325
|
+
"""Convert categorical moderators to dummy encoded variables.
|
1326
|
+
|
1327
|
+
Parameters
|
1328
|
+
----------
|
1329
|
+
dataset_annotations : :obj:`pandas.DataFrame`
|
1330
|
+
Annotations of the dataset.
|
1331
|
+
moderators : :obj:`list`
|
1332
|
+
Study-level moderators to be considered into CBMR framework.
|
1333
|
+
|
1334
|
+
Returns
|
1335
|
+
-------
|
1336
|
+
dataset_annotations : :obj:`pandas.DataFrame`
|
1337
|
+
Annotations of the dataset with dummy encoded moderator columns.
|
1338
|
+
new_moderators : :obj:`list`
|
1339
|
+
List of study-level moderators after dummy encoding.
|
1340
|
+
"""
|
1341
|
+
new_moderators = []
|
1342
|
+
for moderator in moderators.copy():
|
1343
|
+
if len(moderator.split(":reference=")) == 2:
|
1344
|
+
moderator, reference_subtype = moderator.split(":reference=")
|
1345
|
+
if np.array_equal(
|
1346
|
+
dataset_annotations[moderator], dataset_annotations[moderator].astype(str)
|
1347
|
+
):
|
1348
|
+
categories_unique = dataset_annotations[moderator].unique().tolist()
|
1349
|
+
# sort categories alphabetically
|
1350
|
+
categories_unique = sorted(categories_unique, key=str.lower)
|
1351
|
+
if "reference_subtype" in locals():
|
1352
|
+
# remove reference subgroup from list and add it to the first position
|
1353
|
+
categories_unique.remove(reference_subtype)
|
1354
|
+
categories_unique.insert(0, reference_subtype)
|
1355
|
+
for category in categories_unique:
|
1356
|
+
dataset_annotations[category] = (
|
1357
|
+
dataset_annotations[moderator] == category
|
1358
|
+
).astype(int)
|
1359
|
+
# remove last categorical moderator column as it encoded
|
1360
|
+
# as the other dummy encoded columns being zero
|
1361
|
+
dataset_annotations = dataset_annotations.drop([categories_unique[0]], axis=1)
|
1362
|
+
new_moderators.extend(
|
1363
|
+
categories_unique[1:]
|
1364
|
+
) # add dummy encoded moderators (except from the reference subgroup)
|
1365
|
+
else:
|
1366
|
+
new_moderators.append(moderator)
|
1367
|
+
return dataset_annotations, new_moderators
|