nimare 0.4.2rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmarks/__init__.py +0 -0
- benchmarks/bench_cbma.py +57 -0
- nimare/__init__.py +45 -0
- nimare/_version.py +21 -0
- nimare/annotate/__init__.py +21 -0
- nimare/annotate/cogat.py +213 -0
- nimare/annotate/gclda.py +924 -0
- nimare/annotate/lda.py +147 -0
- nimare/annotate/text.py +75 -0
- nimare/annotate/utils.py +87 -0
- nimare/base.py +217 -0
- nimare/cli.py +124 -0
- nimare/correct.py +462 -0
- nimare/dataset.py +685 -0
- nimare/decode/__init__.py +33 -0
- nimare/decode/base.py +115 -0
- nimare/decode/continuous.py +462 -0
- nimare/decode/discrete.py +753 -0
- nimare/decode/encode.py +110 -0
- nimare/decode/utils.py +44 -0
- nimare/diagnostics.py +510 -0
- nimare/estimator.py +139 -0
- nimare/extract/__init__.py +19 -0
- nimare/extract/extract.py +466 -0
- nimare/extract/utils.py +295 -0
- nimare/generate.py +331 -0
- nimare/io.py +635 -0
- nimare/meta/__init__.py +39 -0
- nimare/meta/cbma/__init__.py +6 -0
- nimare/meta/cbma/ale.py +951 -0
- nimare/meta/cbma/base.py +947 -0
- nimare/meta/cbma/mkda.py +1361 -0
- nimare/meta/cbmr.py +970 -0
- nimare/meta/ibma.py +1683 -0
- nimare/meta/kernel.py +501 -0
- nimare/meta/models.py +1199 -0
- nimare/meta/utils.py +494 -0
- nimare/nimads.py +492 -0
- nimare/reports/__init__.py +24 -0
- nimare/reports/base.py +664 -0
- nimare/reports/default.yml +123 -0
- nimare/reports/figures.py +651 -0
- nimare/reports/report.tpl +160 -0
- nimare/resources/__init__.py +1 -0
- nimare/resources/atlases/Harvard-Oxford-LICENSE +93 -0
- nimare/resources/atlases/HarvardOxford-cort-maxprob-thr25-2mm.nii.gz +0 -0
- nimare/resources/database_file_manifest.json +142 -0
- nimare/resources/english_spellings.csv +1738 -0
- nimare/resources/filenames.json +32 -0
- nimare/resources/neurosynth_laird_studies.json +58773 -0
- nimare/resources/neurosynth_stoplist.txt +396 -0
- nimare/resources/nidm_pain_dset.json +1349 -0
- nimare/resources/references.bib +541 -0
- nimare/resources/semantic_knowledge_children.txt +325 -0
- nimare/resources/semantic_relatedness_children.txt +249 -0
- nimare/resources/templates/MNI152_2x2x2_brainmask.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_T1w.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_T1w.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_desc-brain_mask.nii.gz +0 -0
- nimare/results.py +225 -0
- nimare/stats.py +276 -0
- nimare/tests/__init__.py +1 -0
- nimare/tests/conftest.py +229 -0
- nimare/tests/data/amygdala_roi.nii.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_coordinates.tsv.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_metadata.tsv.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz +0 -0
- nimare/tests/data/data-neurosynth_version-7_vocab-terms_vocabulary.txt +100 -0
- nimare/tests/data/neurosynth_dset.json +2868 -0
- nimare/tests/data/neurosynth_laird_studies.json +58773 -0
- nimare/tests/data/nidm_pain_dset.json +1349 -0
- nimare/tests/data/nimads_annotation.json +1 -0
- nimare/tests/data/nimads_studyset.json +1 -0
- nimare/tests/data/test_baseline.txt +2 -0
- nimare/tests/data/test_pain_dataset.json +1278 -0
- nimare/tests/data/test_pain_dataset_multiple_contrasts.json +1242 -0
- nimare/tests/data/test_sleuth_file.txt +18 -0
- nimare/tests/data/test_sleuth_file2.txt +10 -0
- nimare/tests/data/test_sleuth_file3.txt +5 -0
- nimare/tests/data/test_sleuth_file4.txt +5 -0
- nimare/tests/data/test_sleuth_file5.txt +5 -0
- nimare/tests/test_annotate_cogat.py +32 -0
- nimare/tests/test_annotate_gclda.py +86 -0
- nimare/tests/test_annotate_lda.py +27 -0
- nimare/tests/test_dataset.py +99 -0
- nimare/tests/test_decode_continuous.py +132 -0
- nimare/tests/test_decode_discrete.py +92 -0
- nimare/tests/test_diagnostics.py +168 -0
- nimare/tests/test_estimator_performance.py +385 -0
- nimare/tests/test_extract.py +46 -0
- nimare/tests/test_generate.py +247 -0
- nimare/tests/test_io.py +240 -0
- nimare/tests/test_meta_ale.py +298 -0
- nimare/tests/test_meta_cbmr.py +295 -0
- nimare/tests/test_meta_ibma.py +240 -0
- nimare/tests/test_meta_kernel.py +209 -0
- nimare/tests/test_meta_mkda.py +234 -0
- nimare/tests/test_nimads.py +21 -0
- nimare/tests/test_reports.py +110 -0
- nimare/tests/test_stats.py +101 -0
- nimare/tests/test_transforms.py +272 -0
- nimare/tests/test_utils.py +200 -0
- nimare/tests/test_workflows.py +221 -0
- nimare/tests/utils.py +126 -0
- nimare/transforms.py +907 -0
- nimare/utils.py +1367 -0
- nimare/workflows/__init__.py +14 -0
- nimare/workflows/base.py +189 -0
- nimare/workflows/cbma.py +165 -0
- nimare/workflows/ibma.py +108 -0
- nimare/workflows/macm.py +77 -0
- nimare/workflows/misc.py +65 -0
- nimare-0.4.2rc4.dist-info/LICENSE +21 -0
- nimare-0.4.2rc4.dist-info/METADATA +124 -0
- nimare-0.4.2rc4.dist-info/RECORD +119 -0
- nimare-0.4.2rc4.dist-info/WHEEL +5 -0
- nimare-0.4.2rc4.dist-info/entry_points.txt +2 -0
- nimare-0.4.2rc4.dist-info/top_level.txt +2 -0
nimare/dataset.py
ADDED
@@ -0,0 +1,685 @@
|
|
1
|
+
"""Classes for representing datasets of images and/or coordinates."""
|
2
|
+
|
3
|
+
import copy
|
4
|
+
import inspect
|
5
|
+
import json
|
6
|
+
import logging
|
7
|
+
import os.path as op
|
8
|
+
import warnings
|
9
|
+
|
10
|
+
import numpy as np
|
11
|
+
import pandas as pd
|
12
|
+
from nilearn._utils import load_niimg
|
13
|
+
|
14
|
+
from nimare.base import NiMAREBase
|
15
|
+
from nimare.utils import (
|
16
|
+
_dict_to_coordinates,
|
17
|
+
_dict_to_df,
|
18
|
+
_listify,
|
19
|
+
_transform_coordinates_to_space,
|
20
|
+
_try_prepend,
|
21
|
+
_validate_df,
|
22
|
+
_validate_images_df,
|
23
|
+
get_masker,
|
24
|
+
get_template,
|
25
|
+
mm2vox,
|
26
|
+
)
|
27
|
+
|
28
|
+
LGR = logging.getLogger(__name__)
|
29
|
+
|
30
|
+
|
31
|
+
class Dataset(NiMAREBase):
|
32
|
+
"""Storage container for a coordinate- and/or image-based meta-analytic dataset/database.
|
33
|
+
|
34
|
+
.. versionchanged:: 0.0.9
|
35
|
+
|
36
|
+
* [ENH] Add merge method to Dataset class
|
37
|
+
|
38
|
+
.. versionchanged:: 0.0.8
|
39
|
+
|
40
|
+
* [FIX] Set ``nimare.dataset.Dataset.basepath`` in :func:`update_path` using absolute path.
|
41
|
+
|
42
|
+
Parameters
|
43
|
+
----------
|
44
|
+
source : :obj:`str` or :obj:`dict`
|
45
|
+
JSON file containing dictionary with database information or the dict()
|
46
|
+
object
|
47
|
+
|
48
|
+
target : :obj:`str`, optional
|
49
|
+
Desired coordinate space for coordinates. Names follow NIDM convention.
|
50
|
+
Default is 'mni152_2mm' (MNI space with 2x2x2 voxels).
|
51
|
+
This parameter has no impact on images.
|
52
|
+
|
53
|
+
mask : :obj:`str`, :class:`~nibabel.nifti1.Nifti1Image`, \
|
54
|
+
:class:`~nilearn.input_data.NiftiMasker` or similar, or None, optional
|
55
|
+
Mask(er) to use. If None, uses the target space image, with all
|
56
|
+
non-zero voxels included in the mask.
|
57
|
+
|
58
|
+
Attributes
|
59
|
+
----------
|
60
|
+
space : :obj:`str`
|
61
|
+
Standard space. Same as ``target`` parameter.
|
62
|
+
|
63
|
+
Notes
|
64
|
+
-----
|
65
|
+
Images loaded into a Dataset are assumed to be in the same space.
|
66
|
+
If images have different resolutions or affines from the Dataset's masker,
|
67
|
+
then they will be resampled automatically, at the point where they're used,
|
68
|
+
by :obj:`Dataset.masker`.
|
69
|
+
"""
|
70
|
+
|
71
|
+
_id_cols = ["id", "study_id", "contrast_id"]
|
72
|
+
|
73
|
+
def __init__(self, source, target="mni152_2mm", mask=None):
|
74
|
+
if isinstance(source, str):
|
75
|
+
with open(source, "r") as f_obj:
|
76
|
+
data = json.load(f_obj)
|
77
|
+
elif isinstance(source, dict):
|
78
|
+
data = source
|
79
|
+
else:
|
80
|
+
raise Exception("`source` needs to be a file path or a dictionary")
|
81
|
+
|
82
|
+
# Datasets are organized by study, then experiment
|
83
|
+
# To generate unique IDs, we combine study ID with experiment ID
|
84
|
+
# build list of ids
|
85
|
+
id_columns = ["id", "study_id", "contrast_id"]
|
86
|
+
all_ids = []
|
87
|
+
for pid in data.keys():
|
88
|
+
for expid in data[pid]["contrasts"].keys():
|
89
|
+
id_ = f"{pid}-{expid}"
|
90
|
+
all_ids.append([id_, pid, expid])
|
91
|
+
id_df = pd.DataFrame(columns=id_columns, data=all_ids)
|
92
|
+
id_df = id_df.set_index("id", drop=False)
|
93
|
+
self._ids = id_df.index.values
|
94
|
+
|
95
|
+
# Set up Masker
|
96
|
+
if mask is None:
|
97
|
+
mask = get_template(target, mask="brain")
|
98
|
+
self.masker = mask
|
99
|
+
self.space = target
|
100
|
+
|
101
|
+
self.annotations = _dict_to_df(id_df, data, key="labels")
|
102
|
+
self.coordinates = _dict_to_coordinates(data, masker=self.masker, space=self.space)
|
103
|
+
self.images = _dict_to_df(id_df, data, key="images")
|
104
|
+
self.metadata = _dict_to_df(id_df, data, key="metadata")
|
105
|
+
self.texts = _dict_to_df(id_df, data, key="text")
|
106
|
+
self.basepath = None
|
107
|
+
|
108
|
+
if "z_stat" in self.coordinates.columns:
|
109
|
+
# "z_stat" column may contain Nones
|
110
|
+
if not self.coordinates["z_stat"].isna().any():
|
111
|
+
# Ensure z_stat is treated as float
|
112
|
+
self.coordinates["z_stat"] = self.coordinates["z_stat"].astype(float)
|
113
|
+
|
114
|
+
# Raise warning if coordinates dataset contains both positive and negative z_stats
|
115
|
+
if ((self.coordinates["z_stat"].values >= 0).any()) and (
|
116
|
+
(self.coordinates["z_stat"].values < 0).any()
|
117
|
+
):
|
118
|
+
warnings.warn(
|
119
|
+
"Coordinates dataset contains both positive and negative z_stats. "
|
120
|
+
"The algorithms currently implemented in NiMARE are designed for "
|
121
|
+
"one-sided tests. This might lead to unexpected results."
|
122
|
+
)
|
123
|
+
|
124
|
+
def __repr__(self):
|
125
|
+
"""Show basic Dataset representation.
|
126
|
+
|
127
|
+
It's basically the same as the NiMAREBase representation, but with the number of
|
128
|
+
experiments in the Dataset represented as well.
|
129
|
+
"""
|
130
|
+
# Get default parameter values for the object
|
131
|
+
signature = inspect.signature(self.__init__)
|
132
|
+
defaults = {
|
133
|
+
k: v.default
|
134
|
+
for k, v in signature.parameters.items()
|
135
|
+
if v.default is not inspect.Parameter.empty
|
136
|
+
}
|
137
|
+
|
138
|
+
# Eliminate any sub-parameters (e.g., parameters for a Estimator's KernelTransformer),
|
139
|
+
# as well as default values
|
140
|
+
params = self.get_params()
|
141
|
+
params = {k: v for k, v in params.items() if "__" not in k}
|
142
|
+
# Parameter "target" is stored as attribute "space"
|
143
|
+
# and we want to show it regardless of whether it's the default or not
|
144
|
+
params["space"] = self.space
|
145
|
+
params.pop("target")
|
146
|
+
params = {k: v for k, v in params.items() if defaults.get(k) != v}
|
147
|
+
|
148
|
+
# Convert to strings
|
149
|
+
param_strs = []
|
150
|
+
for k, v in params.items():
|
151
|
+
if isinstance(v, str):
|
152
|
+
# Wrap string values in single quotes
|
153
|
+
param_str = f"{k}='{v}'"
|
154
|
+
else:
|
155
|
+
# Keep everything else as-is based on its own repr
|
156
|
+
param_str = f"{k}={v}"
|
157
|
+
param_strs.append(param_str)
|
158
|
+
|
159
|
+
params_str = ", ".join(param_strs)
|
160
|
+
params_str = f"{len(self.ids)} experiments{', ' if params_str else ''}{params_str}"
|
161
|
+
rep = f"{self.__class__.__name__}({params_str})"
|
162
|
+
return rep
|
163
|
+
|
164
|
+
@property
|
165
|
+
def ids(self):
|
166
|
+
"""numpy.ndarray: 1D array of identifiers in Dataset.
|
167
|
+
|
168
|
+
The associated setter for this property is private, as ``Dataset.ids`` is immutable.
|
169
|
+
"""
|
170
|
+
return self.__ids
|
171
|
+
|
172
|
+
@ids.setter
|
173
|
+
def _ids(self, ids):
|
174
|
+
ids = np.sort(np.asarray(ids))
|
175
|
+
assert isinstance(ids, np.ndarray) and ids.ndim == 1
|
176
|
+
self.__ids = ids
|
177
|
+
|
178
|
+
@property
|
179
|
+
def masker(self):
|
180
|
+
""":class:`nilearn.input_data.NiftiMasker` or similar: Masker object.
|
181
|
+
|
182
|
+
Defines the space and location of the area of interest (e.g., 'brain').
|
183
|
+
"""
|
184
|
+
return self.__masker
|
185
|
+
|
186
|
+
@masker.setter
|
187
|
+
def masker(self, mask):
|
188
|
+
mask = get_masker(mask)
|
189
|
+
if hasattr(self, "masker") and not np.array_equal(
|
190
|
+
self.masker.mask_img.affine, mask.mask_img.affine
|
191
|
+
):
|
192
|
+
# This message does not have an associated effect,
|
193
|
+
# since matrix indices are calculated as necessary
|
194
|
+
LGR.warning("New masker does not match old masker. Space is assumed to be the same.")
|
195
|
+
|
196
|
+
self.__masker = mask
|
197
|
+
|
198
|
+
@property
|
199
|
+
def annotations(self):
|
200
|
+
""":class:`pandas.DataFrame`: Labels describing studies in the dataset.
|
201
|
+
|
202
|
+
Each study/experiment has its own row.
|
203
|
+
Columns correspond to individual labels (e.g., 'emotion'), and may
|
204
|
+
be prefixed with a feature group including two underscores
|
205
|
+
(e.g., 'Neurosynth_TFIDF__emotion').
|
206
|
+
"""
|
207
|
+
return self.__annotations
|
208
|
+
|
209
|
+
@annotations.setter
|
210
|
+
def annotations(self, df):
|
211
|
+
_validate_df(df)
|
212
|
+
self.__annotations = df.sort_values(by="id")
|
213
|
+
|
214
|
+
@property
|
215
|
+
def coordinates(self):
|
216
|
+
""":class:`pandas.DataFrame`: Coordinates in the dataset.
|
217
|
+
|
218
|
+
.. versionchanged:: 0.0.10
|
219
|
+
|
220
|
+
The coordinates attribute no longer includes the associated matrix indices
|
221
|
+
(columns 'i', 'j', and 'k'). These columns are calculated as needed.
|
222
|
+
|
223
|
+
Each study has one row for each peak.
|
224
|
+
Columns include ['x', 'y', 'z'] (peak locations in mm) and 'space' (Dataset's space).
|
225
|
+
"""
|
226
|
+
return self.__coordinates
|
227
|
+
|
228
|
+
@coordinates.setter
|
229
|
+
def coordinates(self, df):
|
230
|
+
_validate_df(df)
|
231
|
+
self.__coordinates = df.sort_values(by="id")
|
232
|
+
|
233
|
+
@property
|
234
|
+
def images(self):
|
235
|
+
""":class:`pandas.DataFrame`: Images in the dataset.
|
236
|
+
|
237
|
+
Each image type has its own column (e.g., 'z') with absolute paths to
|
238
|
+
files and each study has its own row.
|
239
|
+
Additionally, relative paths to image files are stored in columns with
|
240
|
+
the suffix '__relative' (e.g., 'z__relative').
|
241
|
+
|
242
|
+
Warnings
|
243
|
+
--------
|
244
|
+
Images are assumed to be in the same space, although they may have
|
245
|
+
different resolutions and affines. Images will be resampled as needed
|
246
|
+
at the point where they are used, via :obj:`Dataset.masker`.
|
247
|
+
"""
|
248
|
+
return self.__images
|
249
|
+
|
250
|
+
@images.setter
|
251
|
+
def images(self, df):
|
252
|
+
_validate_df(df)
|
253
|
+
self.__images = _validate_images_df(df).sort_values(by="id")
|
254
|
+
|
255
|
+
@property
|
256
|
+
def metadata(self):
|
257
|
+
""":class:`pandas.DataFrame`: Metadata describing studies in the dataset.
|
258
|
+
|
259
|
+
Each metadata field has its own column (e.g., 'sample_sizes') and each study
|
260
|
+
has its own row.
|
261
|
+
"""
|
262
|
+
return self.__metadata
|
263
|
+
|
264
|
+
@metadata.setter
|
265
|
+
def metadata(self, df):
|
266
|
+
_validate_df(df)
|
267
|
+
self.__metadata = df.sort_values(by="id")
|
268
|
+
|
269
|
+
@property
|
270
|
+
def texts(self):
|
271
|
+
""":class:`pandas.DataFrame`: Texts in the dataset.
|
272
|
+
|
273
|
+
Each text type has its own column (e.g., 'abstract') and each study
|
274
|
+
has its own row.
|
275
|
+
"""
|
276
|
+
return self.__texts
|
277
|
+
|
278
|
+
@texts.setter
|
279
|
+
def texts(self, df):
|
280
|
+
_validate_df(df)
|
281
|
+
self.__texts = df.sort_values(by="id")
|
282
|
+
|
283
|
+
def slice(self, ids):
|
284
|
+
"""Create a new dataset with only requested IDs.
|
285
|
+
|
286
|
+
Parameters
|
287
|
+
----------
|
288
|
+
ids : array_like
|
289
|
+
List of study IDs to include in new dataset
|
290
|
+
|
291
|
+
Returns
|
292
|
+
-------
|
293
|
+
new_dset : :obj:`~nimare.dataset.Dataset`
|
294
|
+
Reduced Dataset containing only requested studies.
|
295
|
+
"""
|
296
|
+
new_dset = copy.deepcopy(self)
|
297
|
+
new_dset._ids = ids
|
298
|
+
for attribute in ("annotations", "coordinates", "images", "metadata", "texts"):
|
299
|
+
df = getattr(new_dset, attribute)
|
300
|
+
df = df.loc[df["id"].isin(ids)]
|
301
|
+
setattr(new_dset, attribute, df)
|
302
|
+
|
303
|
+
return new_dset
|
304
|
+
|
305
|
+
def merge(self, right):
|
306
|
+
"""Merge two Datasets.
|
307
|
+
|
308
|
+
.. versionadded:: 0.0.9
|
309
|
+
|
310
|
+
Parameters
|
311
|
+
----------
|
312
|
+
right : :obj:`~nimare.dataset.Dataset`
|
313
|
+
Dataset to merge with.
|
314
|
+
|
315
|
+
Returns
|
316
|
+
-------
|
317
|
+
:obj:`~nimare.dataset.Dataset`
|
318
|
+
A Dataset of the two merged Datasets.
|
319
|
+
"""
|
320
|
+
assert isinstance(right, Dataset)
|
321
|
+
shared_ids = np.intersect1d(self.ids, right.ids)
|
322
|
+
if shared_ids.size:
|
323
|
+
raise Exception("Duplicate IDs detected in both datasets.")
|
324
|
+
|
325
|
+
all_ids = np.concatenate((self.ids, right.ids))
|
326
|
+
new_dset = copy.deepcopy(self)
|
327
|
+
new_dset._ids = all_ids
|
328
|
+
|
329
|
+
for attribute in ("annotations", "coordinates", "images", "metadata", "texts"):
|
330
|
+
df1 = getattr(self, attribute)
|
331
|
+
df2 = getattr(right, attribute)
|
332
|
+
new_df = pd.concat([df1, df2], ignore_index=True, sort=False)
|
333
|
+
new_df.sort_values(by="id", inplace=True)
|
334
|
+
new_df.reset_index(drop=True, inplace=True)
|
335
|
+
new_df = new_df.where(~new_df.isna(), None)
|
336
|
+
setattr(new_dset, attribute, new_df)
|
337
|
+
|
338
|
+
new_dset.coordinates = _transform_coordinates_to_space(
|
339
|
+
new_dset.coordinates,
|
340
|
+
self.masker,
|
341
|
+
self.space,
|
342
|
+
)
|
343
|
+
|
344
|
+
return new_dset
|
345
|
+
|
346
|
+
def update_path(self, new_path):
|
347
|
+
"""Update paths to images.
|
348
|
+
|
349
|
+
Prepends new path to the relative path for files in Dataset.images.
|
350
|
+
|
351
|
+
Parameters
|
352
|
+
----------
|
353
|
+
new_path : :obj:`str`
|
354
|
+
Path to prepend to relative paths of files in Dataset.images.
|
355
|
+
"""
|
356
|
+
self.basepath = op.abspath(new_path)
|
357
|
+
df = self.images
|
358
|
+
relative_path_cols = [c for c in df if c.endswith("__relative")]
|
359
|
+
for col in relative_path_cols:
|
360
|
+
abs_col = col.replace("__relative", "")
|
361
|
+
if abs_col in df.columns:
|
362
|
+
LGR.info(f"Overwriting images column {abs_col}")
|
363
|
+
df[abs_col] = df[col].apply(_try_prepend, prefix=self.basepath)
|
364
|
+
self.images = df
|
365
|
+
|
366
|
+
def copy(self):
|
367
|
+
"""Create a copy of the Dataset."""
|
368
|
+
return copy.deepcopy(self)
|
369
|
+
|
370
|
+
def get(self, dict_, drop_invalid=True):
|
371
|
+
"""Retrieve files and/or metadata from the current Dataset.
|
372
|
+
|
373
|
+
Parameters
|
374
|
+
----------
|
375
|
+
dict_ : :obj:`dict`
|
376
|
+
Dictionary specifying images or metadata to collect.
|
377
|
+
Keys should be variables to be used as keys for results dictionary.
|
378
|
+
Values should be tuples with two values:
|
379
|
+
type (e.g., 'image' or 'metadata') and specific field corresponding
|
380
|
+
to column of type-specific DataFrame (e.g., 'z' or 'sample_sizes').
|
381
|
+
drop_invalid : :obj:`bool`, optional
|
382
|
+
Whether to automatically ignore any studies without the required data or not.
|
383
|
+
Default is False.
|
384
|
+
|
385
|
+
Returns
|
386
|
+
-------
|
387
|
+
results : :obj:`dict`
|
388
|
+
A dictionary of lists of requested data. Keys correspond to the keys in ``dict_``.
|
389
|
+
|
390
|
+
Examples
|
391
|
+
--------
|
392
|
+
>>> dset.get({'z_maps': ('image', 'z'), 'sample_sizes': ('metadata', 'sample_sizes')})
|
393
|
+
>>> dset.get({'coordinates': ('coordinates', None)})
|
394
|
+
"""
|
395
|
+
results = {}
|
396
|
+
results["id"] = self.ids
|
397
|
+
keep_idx = np.arange(len(self.ids), dtype=int)
|
398
|
+
for k, vals in dict_.items():
|
399
|
+
if vals[0] == "image":
|
400
|
+
temp = self.get_images(imtype=vals[1])
|
401
|
+
elif vals[0] == "metadata":
|
402
|
+
temp = self.get_metadata(field=vals[1])
|
403
|
+
elif vals[0] == "coordinates":
|
404
|
+
dset_coord_groupby_id = dict(iter(self.coordinates.groupby("id")))
|
405
|
+
temp = [
|
406
|
+
dset_coord_groupby_id[id_] if id_ in dset_coord_groupby_id.keys() else None
|
407
|
+
for id_ in self.ids
|
408
|
+
]
|
409
|
+
elif vals[0] == "annotations":
|
410
|
+
dset_annot_groupby_id = dict(iter(self.annotations.groupby("id")))
|
411
|
+
temp = [
|
412
|
+
dset_annot_groupby_id[id_] if id_ in dset_annot_groupby_id.keys() else None
|
413
|
+
for id_ in self.ids
|
414
|
+
]
|
415
|
+
else:
|
416
|
+
raise ValueError(f"Input '{vals[0]}' not understood.")
|
417
|
+
|
418
|
+
results[k] = temp
|
419
|
+
temp_keep_idx = np.where([t is not None for t in temp])[0]
|
420
|
+
keep_idx = np.intersect1d(keep_idx, temp_keep_idx)
|
421
|
+
|
422
|
+
# reduce
|
423
|
+
if drop_invalid and (len(keep_idx) != len(self.ids)):
|
424
|
+
LGR.info(f"Retaining {len(keep_idx)}/{len(self.ids)} studies")
|
425
|
+
elif len(keep_idx) != len(self.ids):
|
426
|
+
raise Exception(
|
427
|
+
f"Only {len(keep_idx)}/{len(self.ids)} in Dataset contain the necessary data. "
|
428
|
+
"If you want to analyze the subset of studies with required data, "
|
429
|
+
"set `drop_invalid` to True."
|
430
|
+
)
|
431
|
+
|
432
|
+
for k in results:
|
433
|
+
results[k] = [results[k][i] for i in keep_idx]
|
434
|
+
if dict_.get(k, [None])[0] in ("coordinates", "annotations"):
|
435
|
+
results[k] = pd.concat(results[k])
|
436
|
+
|
437
|
+
return results
|
438
|
+
|
439
|
+
def _generic_column_getter(self, attr, ids=None, column=None, ignore_columns=None):
|
440
|
+
"""Extract information from DataFrame-based attributes.
|
441
|
+
|
442
|
+
Parameters
|
443
|
+
----------
|
444
|
+
attr : :obj:`str`
|
445
|
+
The name of the DataFrame-format Dataset attribute to search.
|
446
|
+
ids : :obj:`list` or None, optional
|
447
|
+
A list of study IDs within which to extract values.
|
448
|
+
If None, extract values for all studies in the Dataset.
|
449
|
+
Default is None.
|
450
|
+
column : :obj:`str` or None, optional
|
451
|
+
The column from which to extract values.
|
452
|
+
If None, a list of all columns with valid values will be returned.
|
453
|
+
Must be a column within Dataset.[attr].
|
454
|
+
ignore_columns : :obj:`list` or None, optional
|
455
|
+
A list of columns to ignore. Only used if ``column`` is None.
|
456
|
+
|
457
|
+
Returns
|
458
|
+
-------
|
459
|
+
result : :obj:`list` or :obj:`str`
|
460
|
+
A list of values or a string, depending on if ids is a list (or None) or a string.
|
461
|
+
"""
|
462
|
+
if ignore_columns is None:
|
463
|
+
ignore_columns = self._id_cols
|
464
|
+
else:
|
465
|
+
ignore_columns += self._id_cols
|
466
|
+
|
467
|
+
df = getattr(self, attr)
|
468
|
+
return_first = False
|
469
|
+
|
470
|
+
if isinstance(ids, str) and column is not None:
|
471
|
+
return_first = True
|
472
|
+
ids = _listify(ids)
|
473
|
+
|
474
|
+
available_types = [c for c in df.columns if c not in self._id_cols]
|
475
|
+
if (column is not None) and (column not in available_types):
|
476
|
+
raise ValueError(
|
477
|
+
f"{column} not found in {attr}.\nAvailable types: {', '.join(available_types)}"
|
478
|
+
)
|
479
|
+
|
480
|
+
if column is not None:
|
481
|
+
if ids is not None:
|
482
|
+
result = df[column].loc[df["id"].isin(ids)].tolist()
|
483
|
+
else:
|
484
|
+
result = df[column].tolist()
|
485
|
+
else:
|
486
|
+
if ids is not None:
|
487
|
+
result = {v: df[v].loc[df["id"].isin(ids)].tolist() for v in available_types}
|
488
|
+
result = {k: v for k, v in result.items() if any(v)}
|
489
|
+
else:
|
490
|
+
result = {v: df[v].tolist() for v in available_types}
|
491
|
+
result = list(result.keys())
|
492
|
+
|
493
|
+
if return_first:
|
494
|
+
return result[0]
|
495
|
+
else:
|
496
|
+
return result
|
497
|
+
|
498
|
+
def get_labels(self, ids=None):
|
499
|
+
"""Extract list of labels for which studies in Dataset have annotations.
|
500
|
+
|
501
|
+
Parameters
|
502
|
+
----------
|
503
|
+
ids : :obj:`list`, optional
|
504
|
+
A list of IDs in the Dataset for which to find labels. Default is
|
505
|
+
None, in which case all labels are returned.
|
506
|
+
|
507
|
+
Returns
|
508
|
+
-------
|
509
|
+
labels : :obj:`list`
|
510
|
+
List of labels for which there are annotations in the Dataset.
|
511
|
+
"""
|
512
|
+
if not isinstance(ids, list) and ids is not None:
|
513
|
+
ids = _listify(ids)
|
514
|
+
|
515
|
+
result = [c for c in self.annotations.columns if c not in self._id_cols]
|
516
|
+
if ids is not None:
|
517
|
+
temp_annotations = self.annotations.loc[self.annotations["id"].isin(ids)]
|
518
|
+
res = temp_annotations[result].any(axis=0)
|
519
|
+
result = res.loc[res].index.tolist()
|
520
|
+
|
521
|
+
return result
|
522
|
+
|
523
|
+
def get_texts(self, ids=None, text_type=None):
|
524
|
+
"""Extract list of texts of a given type for selected IDs.
|
525
|
+
|
526
|
+
Parameters
|
527
|
+
----------
|
528
|
+
ids : :obj:`list`, optional
|
529
|
+
A list of IDs in the Dataset for which to find texts. Default is
|
530
|
+
None, in which case all texts of requested type are returned.
|
531
|
+
text_type : :obj:`str`, optional
|
532
|
+
Type of text to extract. Corresponds to column name in
|
533
|
+
Dataset.texts DataFrame. Default is None.
|
534
|
+
|
535
|
+
Returns
|
536
|
+
-------
|
537
|
+
texts : :obj:`list`
|
538
|
+
List of texts of requested type for selected IDs.
|
539
|
+
"""
|
540
|
+
result = self._generic_column_getter("texts", ids=ids, column=text_type)
|
541
|
+
return result
|
542
|
+
|
543
|
+
def get_metadata(self, ids=None, field=None):
|
544
|
+
"""Get metadata from Dataset.
|
545
|
+
|
546
|
+
Parameters
|
547
|
+
----------
|
548
|
+
ids : :obj:`list`, optional
|
549
|
+
A list of IDs in the Dataset for which to find metadata. Default is
|
550
|
+
None, in which case all metadata of requested type are returned.
|
551
|
+
field : :obj:`str`, optional
|
552
|
+
Metadata field to extract. Corresponds to column name in
|
553
|
+
Dataset.metadata DataFrame. Default is None.
|
554
|
+
|
555
|
+
Returns
|
556
|
+
-------
|
557
|
+
metadata : :obj:`list`
|
558
|
+
List of values of requested type for selected IDs.
|
559
|
+
"""
|
560
|
+
result = self._generic_column_getter("metadata", ids=ids, column=field)
|
561
|
+
return result
|
562
|
+
|
563
|
+
def get_images(self, ids=None, imtype=None):
|
564
|
+
"""Get images of a certain type for a subset of studies in the dataset.
|
565
|
+
|
566
|
+
Parameters
|
567
|
+
----------
|
568
|
+
ids : :obj:`list`, optional
|
569
|
+
A list of IDs in the Dataset for which to find images. Default is
|
570
|
+
None, in which case all images of requested type are returned.
|
571
|
+
imtype : :obj:`str`, optional
|
572
|
+
Type of image to extract. Corresponds to column name in
|
573
|
+
Dataset.images DataFrame. Default is None.
|
574
|
+
|
575
|
+
Returns
|
576
|
+
-------
|
577
|
+
images : :obj:`list`
|
578
|
+
List of images of requested type for selected IDs.
|
579
|
+
"""
|
580
|
+
ignore_columns = ["space"]
|
581
|
+
ignore_columns += [c for c in self.images.columns if c.endswith("__relative")]
|
582
|
+
result = self._generic_column_getter(
|
583
|
+
"images",
|
584
|
+
ids=ids,
|
585
|
+
column=imtype,
|
586
|
+
ignore_columns=ignore_columns,
|
587
|
+
)
|
588
|
+
return result
|
589
|
+
|
590
|
+
def get_studies_by_label(self, labels=None, label_threshold=0.001):
|
591
|
+
"""Extract list of studies with a given label.
|
592
|
+
|
593
|
+
.. versionchanged:: 0.0.10
|
594
|
+
|
595
|
+
Fix bug in which all IDs were returned when a label wasn't present in the Dataset.
|
596
|
+
|
597
|
+
.. versionchanged:: 0.0.9
|
598
|
+
|
599
|
+
Default value for label_threshold changed to 0.001.
|
600
|
+
|
601
|
+
Parameters
|
602
|
+
----------
|
603
|
+
labels : :obj:`list`, optional
|
604
|
+
List of labels to use to search Dataset. If a contrast has all of
|
605
|
+
the labels above the threshold, it will be returned.
|
606
|
+
Default is None.
|
607
|
+
label_threshold : :obj:`float`, optional
|
608
|
+
Default is 0.5.
|
609
|
+
|
610
|
+
Returns
|
611
|
+
-------
|
612
|
+
found_ids : :obj:`list`
|
613
|
+
A list of IDs from the Dataset found by the search criteria.
|
614
|
+
"""
|
615
|
+
if isinstance(labels, str):
|
616
|
+
labels = [labels]
|
617
|
+
elif not isinstance(labels, list):
|
618
|
+
raise ValueError(f"Argument 'labels' cannot be {type(labels)}")
|
619
|
+
|
620
|
+
missing_labels = [label for label in labels if label not in self.annotations.columns]
|
621
|
+
if missing_labels:
|
622
|
+
raise ValueError(f"Missing label(s): {', '.join(missing_labels)}")
|
623
|
+
|
624
|
+
temp_annotations = self.annotations[self._id_cols + labels]
|
625
|
+
found_rows = (temp_annotations[labels] >= label_threshold).all(axis=1)
|
626
|
+
if any(found_rows):
|
627
|
+
found_ids = temp_annotations.loc[found_rows, "id"].tolist()
|
628
|
+
else:
|
629
|
+
found_ids = []
|
630
|
+
|
631
|
+
return found_ids
|
632
|
+
|
633
|
+
def get_studies_by_mask(self, mask):
|
634
|
+
"""Extract list of studies with at least one coordinate in mask.
|
635
|
+
|
636
|
+
Parameters
|
637
|
+
----------
|
638
|
+
mask : img_like
|
639
|
+
Mask across which to search for coordinates.
|
640
|
+
|
641
|
+
Returns
|
642
|
+
-------
|
643
|
+
found_ids : :obj:`list`
|
644
|
+
A list of IDs from the Dataset with at least one focus in the mask.
|
645
|
+
"""
|
646
|
+
mask = load_niimg(mask)
|
647
|
+
dset_mask = self.masker.mask_img
|
648
|
+
|
649
|
+
if not np.array_equal(dset_mask.affine, mask.affine):
|
650
|
+
LGR.warning("Mask affine does not match Dataset affine. Assuming same space.")
|
651
|
+
|
652
|
+
dset_ijk = mm2vox(self.coordinates[["x", "y", "z"]].values, mask.affine)
|
653
|
+
mask_data = mask.get_fdata()
|
654
|
+
mask_coords = np.vstack(np.where(mask_data)).T
|
655
|
+
|
656
|
+
# Check for presence of coordinates in mask
|
657
|
+
in_mask = np.any(np.all(dset_ijk[:, None] == mask_coords[None, :], axis=-1), axis=-1)
|
658
|
+
found_ids = list(self.coordinates.loc[in_mask, "id"].unique())
|
659
|
+
|
660
|
+
return found_ids
|
661
|
+
|
662
|
+
def get_studies_by_coordinate(self, xyz, r=20):
|
663
|
+
"""Extract list of studies with at least one focus within radius of requested coordinates.
|
664
|
+
|
665
|
+
Parameters
|
666
|
+
----------
|
667
|
+
xyz : (X x 3) array_like
|
668
|
+
List of coordinates against which to find studies.
|
669
|
+
r : :obj:`float`, optional
|
670
|
+
Radius (in mm) within which to find studies. Default is 20mm.
|
671
|
+
|
672
|
+
Returns
|
673
|
+
-------
|
674
|
+
found_ids : :obj:`list`
|
675
|
+
A list of IDs from the Dataset with at least one focus within
|
676
|
+
radius r of requested coordinates.
|
677
|
+
"""
|
678
|
+
from scipy.spatial.distance import cdist
|
679
|
+
|
680
|
+
xyz = np.array(xyz)
|
681
|
+
assert xyz.shape[1] == 3 and xyz.ndim == 2
|
682
|
+
distances = cdist(xyz, self.coordinates[["x", "y", "z"]].values)
|
683
|
+
distances = np.any(distances <= r, axis=0)
|
684
|
+
found_ids = list(self.coordinates.loc[distances, "id"].unique())
|
685
|
+
return found_ids
|