nimare 0.4.2rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. benchmarks/__init__.py +0 -0
  2. benchmarks/bench_cbma.py +57 -0
  3. nimare/__init__.py +45 -0
  4. nimare/_version.py +21 -0
  5. nimare/annotate/__init__.py +21 -0
  6. nimare/annotate/cogat.py +213 -0
  7. nimare/annotate/gclda.py +924 -0
  8. nimare/annotate/lda.py +147 -0
  9. nimare/annotate/text.py +75 -0
  10. nimare/annotate/utils.py +87 -0
  11. nimare/base.py +217 -0
  12. nimare/cli.py +124 -0
  13. nimare/correct.py +462 -0
  14. nimare/dataset.py +685 -0
  15. nimare/decode/__init__.py +33 -0
  16. nimare/decode/base.py +115 -0
  17. nimare/decode/continuous.py +462 -0
  18. nimare/decode/discrete.py +753 -0
  19. nimare/decode/encode.py +110 -0
  20. nimare/decode/utils.py +44 -0
  21. nimare/diagnostics.py +510 -0
  22. nimare/estimator.py +139 -0
  23. nimare/extract/__init__.py +19 -0
  24. nimare/extract/extract.py +466 -0
  25. nimare/extract/utils.py +295 -0
  26. nimare/generate.py +331 -0
  27. nimare/io.py +635 -0
  28. nimare/meta/__init__.py +39 -0
  29. nimare/meta/cbma/__init__.py +6 -0
  30. nimare/meta/cbma/ale.py +951 -0
  31. nimare/meta/cbma/base.py +947 -0
  32. nimare/meta/cbma/mkda.py +1361 -0
  33. nimare/meta/cbmr.py +970 -0
  34. nimare/meta/ibma.py +1683 -0
  35. nimare/meta/kernel.py +501 -0
  36. nimare/meta/models.py +1199 -0
  37. nimare/meta/utils.py +494 -0
  38. nimare/nimads.py +492 -0
  39. nimare/reports/__init__.py +24 -0
  40. nimare/reports/base.py +664 -0
  41. nimare/reports/default.yml +123 -0
  42. nimare/reports/figures.py +651 -0
  43. nimare/reports/report.tpl +160 -0
  44. nimare/resources/__init__.py +1 -0
  45. nimare/resources/atlases/Harvard-Oxford-LICENSE +93 -0
  46. nimare/resources/atlases/HarvardOxford-cort-maxprob-thr25-2mm.nii.gz +0 -0
  47. nimare/resources/database_file_manifest.json +142 -0
  48. nimare/resources/english_spellings.csv +1738 -0
  49. nimare/resources/filenames.json +32 -0
  50. nimare/resources/neurosynth_laird_studies.json +58773 -0
  51. nimare/resources/neurosynth_stoplist.txt +396 -0
  52. nimare/resources/nidm_pain_dset.json +1349 -0
  53. nimare/resources/references.bib +541 -0
  54. nimare/resources/semantic_knowledge_children.txt +325 -0
  55. nimare/resources/semantic_relatedness_children.txt +249 -0
  56. nimare/resources/templates/MNI152_2x2x2_brainmask.nii.gz +0 -0
  57. nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_T1w.nii.gz +0 -0
  58. nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz +0 -0
  59. nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_T1w.nii.gz +0 -0
  60. nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_desc-brain_mask.nii.gz +0 -0
  61. nimare/results.py +225 -0
  62. nimare/stats.py +276 -0
  63. nimare/tests/__init__.py +1 -0
  64. nimare/tests/conftest.py +229 -0
  65. nimare/tests/data/amygdala_roi.nii.gz +0 -0
  66. nimare/tests/data/data-neurosynth_version-7_coordinates.tsv.gz +0 -0
  67. nimare/tests/data/data-neurosynth_version-7_metadata.tsv.gz +0 -0
  68. nimare/tests/data/data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz +0 -0
  69. nimare/tests/data/data-neurosynth_version-7_vocab-terms_vocabulary.txt +100 -0
  70. nimare/tests/data/neurosynth_dset.json +2868 -0
  71. nimare/tests/data/neurosynth_laird_studies.json +58773 -0
  72. nimare/tests/data/nidm_pain_dset.json +1349 -0
  73. nimare/tests/data/nimads_annotation.json +1 -0
  74. nimare/tests/data/nimads_studyset.json +1 -0
  75. nimare/tests/data/test_baseline.txt +2 -0
  76. nimare/tests/data/test_pain_dataset.json +1278 -0
  77. nimare/tests/data/test_pain_dataset_multiple_contrasts.json +1242 -0
  78. nimare/tests/data/test_sleuth_file.txt +18 -0
  79. nimare/tests/data/test_sleuth_file2.txt +10 -0
  80. nimare/tests/data/test_sleuth_file3.txt +5 -0
  81. nimare/tests/data/test_sleuth_file4.txt +5 -0
  82. nimare/tests/data/test_sleuth_file5.txt +5 -0
  83. nimare/tests/test_annotate_cogat.py +32 -0
  84. nimare/tests/test_annotate_gclda.py +86 -0
  85. nimare/tests/test_annotate_lda.py +27 -0
  86. nimare/tests/test_dataset.py +99 -0
  87. nimare/tests/test_decode_continuous.py +132 -0
  88. nimare/tests/test_decode_discrete.py +92 -0
  89. nimare/tests/test_diagnostics.py +168 -0
  90. nimare/tests/test_estimator_performance.py +385 -0
  91. nimare/tests/test_extract.py +46 -0
  92. nimare/tests/test_generate.py +247 -0
  93. nimare/tests/test_io.py +240 -0
  94. nimare/tests/test_meta_ale.py +298 -0
  95. nimare/tests/test_meta_cbmr.py +295 -0
  96. nimare/tests/test_meta_ibma.py +240 -0
  97. nimare/tests/test_meta_kernel.py +209 -0
  98. nimare/tests/test_meta_mkda.py +234 -0
  99. nimare/tests/test_nimads.py +21 -0
  100. nimare/tests/test_reports.py +110 -0
  101. nimare/tests/test_stats.py +101 -0
  102. nimare/tests/test_transforms.py +272 -0
  103. nimare/tests/test_utils.py +200 -0
  104. nimare/tests/test_workflows.py +221 -0
  105. nimare/tests/utils.py +126 -0
  106. nimare/transforms.py +907 -0
  107. nimare/utils.py +1367 -0
  108. nimare/workflows/__init__.py +14 -0
  109. nimare/workflows/base.py +189 -0
  110. nimare/workflows/cbma.py +165 -0
  111. nimare/workflows/ibma.py +108 -0
  112. nimare/workflows/macm.py +77 -0
  113. nimare/workflows/misc.py +65 -0
  114. nimare-0.4.2rc4.dist-info/LICENSE +21 -0
  115. nimare-0.4.2rc4.dist-info/METADATA +124 -0
  116. nimare-0.4.2rc4.dist-info/RECORD +119 -0
  117. nimare-0.4.2rc4.dist-info/WHEEL +5 -0
  118. nimare-0.4.2rc4.dist-info/entry_points.txt +2 -0
  119. nimare-0.4.2rc4.dist-info/top_level.txt +2 -0
nimare/dataset.py ADDED
@@ -0,0 +1,685 @@
1
+ """Classes for representing datasets of images and/or coordinates."""
2
+
3
+ import copy
4
+ import inspect
5
+ import json
6
+ import logging
7
+ import os.path as op
8
+ import warnings
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ from nilearn._utils import load_niimg
13
+
14
+ from nimare.base import NiMAREBase
15
+ from nimare.utils import (
16
+ _dict_to_coordinates,
17
+ _dict_to_df,
18
+ _listify,
19
+ _transform_coordinates_to_space,
20
+ _try_prepend,
21
+ _validate_df,
22
+ _validate_images_df,
23
+ get_masker,
24
+ get_template,
25
+ mm2vox,
26
+ )
27
+
28
+ LGR = logging.getLogger(__name__)
29
+
30
+
31
+ class Dataset(NiMAREBase):
32
+ """Storage container for a coordinate- and/or image-based meta-analytic dataset/database.
33
+
34
+ .. versionchanged:: 0.0.9
35
+
36
+ * [ENH] Add merge method to Dataset class
37
+
38
+ .. versionchanged:: 0.0.8
39
+
40
+ * [FIX] Set ``nimare.dataset.Dataset.basepath`` in :func:`update_path` using absolute path.
41
+
42
+ Parameters
43
+ ----------
44
+ source : :obj:`str` or :obj:`dict`
45
+ JSON file containing dictionary with database information or the dict()
46
+ object
47
+
48
+ target : :obj:`str`, optional
49
+ Desired coordinate space for coordinates. Names follow NIDM convention.
50
+ Default is 'mni152_2mm' (MNI space with 2x2x2 voxels).
51
+ This parameter has no impact on images.
52
+
53
+ mask : :obj:`str`, :class:`~nibabel.nifti1.Nifti1Image`, \
54
+ :class:`~nilearn.input_data.NiftiMasker` or similar, or None, optional
55
+ Mask(er) to use. If None, uses the target space image, with all
56
+ non-zero voxels included in the mask.
57
+
58
+ Attributes
59
+ ----------
60
+ space : :obj:`str`
61
+ Standard space. Same as ``target`` parameter.
62
+
63
+ Notes
64
+ -----
65
+ Images loaded into a Dataset are assumed to be in the same space.
66
+ If images have different resolutions or affines from the Dataset's masker,
67
+ then they will be resampled automatically, at the point where they're used,
68
+ by :obj:`Dataset.masker`.
69
+ """
70
+
71
+ _id_cols = ["id", "study_id", "contrast_id"]
72
+
73
+ def __init__(self, source, target="mni152_2mm", mask=None):
74
+ if isinstance(source, str):
75
+ with open(source, "r") as f_obj:
76
+ data = json.load(f_obj)
77
+ elif isinstance(source, dict):
78
+ data = source
79
+ else:
80
+ raise Exception("`source` needs to be a file path or a dictionary")
81
+
82
+ # Datasets are organized by study, then experiment
83
+ # To generate unique IDs, we combine study ID with experiment ID
84
+ # build list of ids
85
+ id_columns = ["id", "study_id", "contrast_id"]
86
+ all_ids = []
87
+ for pid in data.keys():
88
+ for expid in data[pid]["contrasts"].keys():
89
+ id_ = f"{pid}-{expid}"
90
+ all_ids.append([id_, pid, expid])
91
+ id_df = pd.DataFrame(columns=id_columns, data=all_ids)
92
+ id_df = id_df.set_index("id", drop=False)
93
+ self._ids = id_df.index.values
94
+
95
+ # Set up Masker
96
+ if mask is None:
97
+ mask = get_template(target, mask="brain")
98
+ self.masker = mask
99
+ self.space = target
100
+
101
+ self.annotations = _dict_to_df(id_df, data, key="labels")
102
+ self.coordinates = _dict_to_coordinates(data, masker=self.masker, space=self.space)
103
+ self.images = _dict_to_df(id_df, data, key="images")
104
+ self.metadata = _dict_to_df(id_df, data, key="metadata")
105
+ self.texts = _dict_to_df(id_df, data, key="text")
106
+ self.basepath = None
107
+
108
+ if "z_stat" in self.coordinates.columns:
109
+ # "z_stat" column may contain Nones
110
+ if not self.coordinates["z_stat"].isna().any():
111
+ # Ensure z_stat is treated as float
112
+ self.coordinates["z_stat"] = self.coordinates["z_stat"].astype(float)
113
+
114
+ # Raise warning if coordinates dataset contains both positive and negative z_stats
115
+ if ((self.coordinates["z_stat"].values >= 0).any()) and (
116
+ (self.coordinates["z_stat"].values < 0).any()
117
+ ):
118
+ warnings.warn(
119
+ "Coordinates dataset contains both positive and negative z_stats. "
120
+ "The algorithms currently implemented in NiMARE are designed for "
121
+ "one-sided tests. This might lead to unexpected results."
122
+ )
123
+
124
+ def __repr__(self):
125
+ """Show basic Dataset representation.
126
+
127
+ It's basically the same as the NiMAREBase representation, but with the number of
128
+ experiments in the Dataset represented as well.
129
+ """
130
+ # Get default parameter values for the object
131
+ signature = inspect.signature(self.__init__)
132
+ defaults = {
133
+ k: v.default
134
+ for k, v in signature.parameters.items()
135
+ if v.default is not inspect.Parameter.empty
136
+ }
137
+
138
+ # Eliminate any sub-parameters (e.g., parameters for a Estimator's KernelTransformer),
139
+ # as well as default values
140
+ params = self.get_params()
141
+ params = {k: v for k, v in params.items() if "__" not in k}
142
+ # Parameter "target" is stored as attribute "space"
143
+ # and we want to show it regardless of whether it's the default or not
144
+ params["space"] = self.space
145
+ params.pop("target")
146
+ params = {k: v for k, v in params.items() if defaults.get(k) != v}
147
+
148
+ # Convert to strings
149
+ param_strs = []
150
+ for k, v in params.items():
151
+ if isinstance(v, str):
152
+ # Wrap string values in single quotes
153
+ param_str = f"{k}='{v}'"
154
+ else:
155
+ # Keep everything else as-is based on its own repr
156
+ param_str = f"{k}={v}"
157
+ param_strs.append(param_str)
158
+
159
+ params_str = ", ".join(param_strs)
160
+ params_str = f"{len(self.ids)} experiments{', ' if params_str else ''}{params_str}"
161
+ rep = f"{self.__class__.__name__}({params_str})"
162
+ return rep
163
+
164
+ @property
165
+ def ids(self):
166
+ """numpy.ndarray: 1D array of identifiers in Dataset.
167
+
168
+ The associated setter for this property is private, as ``Dataset.ids`` is immutable.
169
+ """
170
+ return self.__ids
171
+
172
+ @ids.setter
173
+ def _ids(self, ids):
174
+ ids = np.sort(np.asarray(ids))
175
+ assert isinstance(ids, np.ndarray) and ids.ndim == 1
176
+ self.__ids = ids
177
+
178
+ @property
179
+ def masker(self):
180
+ """:class:`nilearn.input_data.NiftiMasker` or similar: Masker object.
181
+
182
+ Defines the space and location of the area of interest (e.g., 'brain').
183
+ """
184
+ return self.__masker
185
+
186
+ @masker.setter
187
+ def masker(self, mask):
188
+ mask = get_masker(mask)
189
+ if hasattr(self, "masker") and not np.array_equal(
190
+ self.masker.mask_img.affine, mask.mask_img.affine
191
+ ):
192
+ # This message does not have an associated effect,
193
+ # since matrix indices are calculated as necessary
194
+ LGR.warning("New masker does not match old masker. Space is assumed to be the same.")
195
+
196
+ self.__masker = mask
197
+
198
+ @property
199
+ def annotations(self):
200
+ """:class:`pandas.DataFrame`: Labels describing studies in the dataset.
201
+
202
+ Each study/experiment has its own row.
203
+ Columns correspond to individual labels (e.g., 'emotion'), and may
204
+ be prefixed with a feature group including two underscores
205
+ (e.g., 'Neurosynth_TFIDF__emotion').
206
+ """
207
+ return self.__annotations
208
+
209
+ @annotations.setter
210
+ def annotations(self, df):
211
+ _validate_df(df)
212
+ self.__annotations = df.sort_values(by="id")
213
+
214
+ @property
215
+ def coordinates(self):
216
+ """:class:`pandas.DataFrame`: Coordinates in the dataset.
217
+
218
+ .. versionchanged:: 0.0.10
219
+
220
+ The coordinates attribute no longer includes the associated matrix indices
221
+ (columns 'i', 'j', and 'k'). These columns are calculated as needed.
222
+
223
+ Each study has one row for each peak.
224
+ Columns include ['x', 'y', 'z'] (peak locations in mm) and 'space' (Dataset's space).
225
+ """
226
+ return self.__coordinates
227
+
228
+ @coordinates.setter
229
+ def coordinates(self, df):
230
+ _validate_df(df)
231
+ self.__coordinates = df.sort_values(by="id")
232
+
233
+ @property
234
+ def images(self):
235
+ """:class:`pandas.DataFrame`: Images in the dataset.
236
+
237
+ Each image type has its own column (e.g., 'z') with absolute paths to
238
+ files and each study has its own row.
239
+ Additionally, relative paths to image files are stored in columns with
240
+ the suffix '__relative' (e.g., 'z__relative').
241
+
242
+ Warnings
243
+ --------
244
+ Images are assumed to be in the same space, although they may have
245
+ different resolutions and affines. Images will be resampled as needed
246
+ at the point where they are used, via :obj:`Dataset.masker`.
247
+ """
248
+ return self.__images
249
+
250
+ @images.setter
251
+ def images(self, df):
252
+ _validate_df(df)
253
+ self.__images = _validate_images_df(df).sort_values(by="id")
254
+
255
+ @property
256
+ def metadata(self):
257
+ """:class:`pandas.DataFrame`: Metadata describing studies in the dataset.
258
+
259
+ Each metadata field has its own column (e.g., 'sample_sizes') and each study
260
+ has its own row.
261
+ """
262
+ return self.__metadata
263
+
264
+ @metadata.setter
265
+ def metadata(self, df):
266
+ _validate_df(df)
267
+ self.__metadata = df.sort_values(by="id")
268
+
269
+ @property
270
+ def texts(self):
271
+ """:class:`pandas.DataFrame`: Texts in the dataset.
272
+
273
+ Each text type has its own column (e.g., 'abstract') and each study
274
+ has its own row.
275
+ """
276
+ return self.__texts
277
+
278
+ @texts.setter
279
+ def texts(self, df):
280
+ _validate_df(df)
281
+ self.__texts = df.sort_values(by="id")
282
+
283
+ def slice(self, ids):
284
+ """Create a new dataset with only requested IDs.
285
+
286
+ Parameters
287
+ ----------
288
+ ids : array_like
289
+ List of study IDs to include in new dataset
290
+
291
+ Returns
292
+ -------
293
+ new_dset : :obj:`~nimare.dataset.Dataset`
294
+ Reduced Dataset containing only requested studies.
295
+ """
296
+ new_dset = copy.deepcopy(self)
297
+ new_dset._ids = ids
298
+ for attribute in ("annotations", "coordinates", "images", "metadata", "texts"):
299
+ df = getattr(new_dset, attribute)
300
+ df = df.loc[df["id"].isin(ids)]
301
+ setattr(new_dset, attribute, df)
302
+
303
+ return new_dset
304
+
305
+ def merge(self, right):
306
+ """Merge two Datasets.
307
+
308
+ .. versionadded:: 0.0.9
309
+
310
+ Parameters
311
+ ----------
312
+ right : :obj:`~nimare.dataset.Dataset`
313
+ Dataset to merge with.
314
+
315
+ Returns
316
+ -------
317
+ :obj:`~nimare.dataset.Dataset`
318
+ A Dataset of the two merged Datasets.
319
+ """
320
+ assert isinstance(right, Dataset)
321
+ shared_ids = np.intersect1d(self.ids, right.ids)
322
+ if shared_ids.size:
323
+ raise Exception("Duplicate IDs detected in both datasets.")
324
+
325
+ all_ids = np.concatenate((self.ids, right.ids))
326
+ new_dset = copy.deepcopy(self)
327
+ new_dset._ids = all_ids
328
+
329
+ for attribute in ("annotations", "coordinates", "images", "metadata", "texts"):
330
+ df1 = getattr(self, attribute)
331
+ df2 = getattr(right, attribute)
332
+ new_df = pd.concat([df1, df2], ignore_index=True, sort=False)
333
+ new_df.sort_values(by="id", inplace=True)
334
+ new_df.reset_index(drop=True, inplace=True)
335
+ new_df = new_df.where(~new_df.isna(), None)
336
+ setattr(new_dset, attribute, new_df)
337
+
338
+ new_dset.coordinates = _transform_coordinates_to_space(
339
+ new_dset.coordinates,
340
+ self.masker,
341
+ self.space,
342
+ )
343
+
344
+ return new_dset
345
+
346
+ def update_path(self, new_path):
347
+ """Update paths to images.
348
+
349
+ Prepends new path to the relative path for files in Dataset.images.
350
+
351
+ Parameters
352
+ ----------
353
+ new_path : :obj:`str`
354
+ Path to prepend to relative paths of files in Dataset.images.
355
+ """
356
+ self.basepath = op.abspath(new_path)
357
+ df = self.images
358
+ relative_path_cols = [c for c in df if c.endswith("__relative")]
359
+ for col in relative_path_cols:
360
+ abs_col = col.replace("__relative", "")
361
+ if abs_col in df.columns:
362
+ LGR.info(f"Overwriting images column {abs_col}")
363
+ df[abs_col] = df[col].apply(_try_prepend, prefix=self.basepath)
364
+ self.images = df
365
+
366
+ def copy(self):
367
+ """Create a copy of the Dataset."""
368
+ return copy.deepcopy(self)
369
+
370
+ def get(self, dict_, drop_invalid=True):
371
+ """Retrieve files and/or metadata from the current Dataset.
372
+
373
+ Parameters
374
+ ----------
375
+ dict_ : :obj:`dict`
376
+ Dictionary specifying images or metadata to collect.
377
+ Keys should be variables to be used as keys for results dictionary.
378
+ Values should be tuples with two values:
379
+ type (e.g., 'image' or 'metadata') and specific field corresponding
380
+ to column of type-specific DataFrame (e.g., 'z' or 'sample_sizes').
381
+ drop_invalid : :obj:`bool`, optional
382
+ Whether to automatically ignore any studies without the required data or not.
383
+ Default is False.
384
+
385
+ Returns
386
+ -------
387
+ results : :obj:`dict`
388
+ A dictionary of lists of requested data. Keys correspond to the keys in ``dict_``.
389
+
390
+ Examples
391
+ --------
392
+ >>> dset.get({'z_maps': ('image', 'z'), 'sample_sizes': ('metadata', 'sample_sizes')})
393
+ >>> dset.get({'coordinates': ('coordinates', None)})
394
+ """
395
+ results = {}
396
+ results["id"] = self.ids
397
+ keep_idx = np.arange(len(self.ids), dtype=int)
398
+ for k, vals in dict_.items():
399
+ if vals[0] == "image":
400
+ temp = self.get_images(imtype=vals[1])
401
+ elif vals[0] == "metadata":
402
+ temp = self.get_metadata(field=vals[1])
403
+ elif vals[0] == "coordinates":
404
+ dset_coord_groupby_id = dict(iter(self.coordinates.groupby("id")))
405
+ temp = [
406
+ dset_coord_groupby_id[id_] if id_ in dset_coord_groupby_id.keys() else None
407
+ for id_ in self.ids
408
+ ]
409
+ elif vals[0] == "annotations":
410
+ dset_annot_groupby_id = dict(iter(self.annotations.groupby("id")))
411
+ temp = [
412
+ dset_annot_groupby_id[id_] if id_ in dset_annot_groupby_id.keys() else None
413
+ for id_ in self.ids
414
+ ]
415
+ else:
416
+ raise ValueError(f"Input '{vals[0]}' not understood.")
417
+
418
+ results[k] = temp
419
+ temp_keep_idx = np.where([t is not None for t in temp])[0]
420
+ keep_idx = np.intersect1d(keep_idx, temp_keep_idx)
421
+
422
+ # reduce
423
+ if drop_invalid and (len(keep_idx) != len(self.ids)):
424
+ LGR.info(f"Retaining {len(keep_idx)}/{len(self.ids)} studies")
425
+ elif len(keep_idx) != len(self.ids):
426
+ raise Exception(
427
+ f"Only {len(keep_idx)}/{len(self.ids)} in Dataset contain the necessary data. "
428
+ "If you want to analyze the subset of studies with required data, "
429
+ "set `drop_invalid` to True."
430
+ )
431
+
432
+ for k in results:
433
+ results[k] = [results[k][i] for i in keep_idx]
434
+ if dict_.get(k, [None])[0] in ("coordinates", "annotations"):
435
+ results[k] = pd.concat(results[k])
436
+
437
+ return results
438
+
439
+ def _generic_column_getter(self, attr, ids=None, column=None, ignore_columns=None):
440
+ """Extract information from DataFrame-based attributes.
441
+
442
+ Parameters
443
+ ----------
444
+ attr : :obj:`str`
445
+ The name of the DataFrame-format Dataset attribute to search.
446
+ ids : :obj:`list` or None, optional
447
+ A list of study IDs within which to extract values.
448
+ If None, extract values for all studies in the Dataset.
449
+ Default is None.
450
+ column : :obj:`str` or None, optional
451
+ The column from which to extract values.
452
+ If None, a list of all columns with valid values will be returned.
453
+ Must be a column within Dataset.[attr].
454
+ ignore_columns : :obj:`list` or None, optional
455
+ A list of columns to ignore. Only used if ``column`` is None.
456
+
457
+ Returns
458
+ -------
459
+ result : :obj:`list` or :obj:`str`
460
+ A list of values or a string, depending on if ids is a list (or None) or a string.
461
+ """
462
+ if ignore_columns is None:
463
+ ignore_columns = self._id_cols
464
+ else:
465
+ ignore_columns += self._id_cols
466
+
467
+ df = getattr(self, attr)
468
+ return_first = False
469
+
470
+ if isinstance(ids, str) and column is not None:
471
+ return_first = True
472
+ ids = _listify(ids)
473
+
474
+ available_types = [c for c in df.columns if c not in self._id_cols]
475
+ if (column is not None) and (column not in available_types):
476
+ raise ValueError(
477
+ f"{column} not found in {attr}.\nAvailable types: {', '.join(available_types)}"
478
+ )
479
+
480
+ if column is not None:
481
+ if ids is not None:
482
+ result = df[column].loc[df["id"].isin(ids)].tolist()
483
+ else:
484
+ result = df[column].tolist()
485
+ else:
486
+ if ids is not None:
487
+ result = {v: df[v].loc[df["id"].isin(ids)].tolist() for v in available_types}
488
+ result = {k: v for k, v in result.items() if any(v)}
489
+ else:
490
+ result = {v: df[v].tolist() for v in available_types}
491
+ result = list(result.keys())
492
+
493
+ if return_first:
494
+ return result[0]
495
+ else:
496
+ return result
497
+
498
+ def get_labels(self, ids=None):
499
+ """Extract list of labels for which studies in Dataset have annotations.
500
+
501
+ Parameters
502
+ ----------
503
+ ids : :obj:`list`, optional
504
+ A list of IDs in the Dataset for which to find labels. Default is
505
+ None, in which case all labels are returned.
506
+
507
+ Returns
508
+ -------
509
+ labels : :obj:`list`
510
+ List of labels for which there are annotations in the Dataset.
511
+ """
512
+ if not isinstance(ids, list) and ids is not None:
513
+ ids = _listify(ids)
514
+
515
+ result = [c for c in self.annotations.columns if c not in self._id_cols]
516
+ if ids is not None:
517
+ temp_annotations = self.annotations.loc[self.annotations["id"].isin(ids)]
518
+ res = temp_annotations[result].any(axis=0)
519
+ result = res.loc[res].index.tolist()
520
+
521
+ return result
522
+
523
+ def get_texts(self, ids=None, text_type=None):
524
+ """Extract list of texts of a given type for selected IDs.
525
+
526
+ Parameters
527
+ ----------
528
+ ids : :obj:`list`, optional
529
+ A list of IDs in the Dataset for which to find texts. Default is
530
+ None, in which case all texts of requested type are returned.
531
+ text_type : :obj:`str`, optional
532
+ Type of text to extract. Corresponds to column name in
533
+ Dataset.texts DataFrame. Default is None.
534
+
535
+ Returns
536
+ -------
537
+ texts : :obj:`list`
538
+ List of texts of requested type for selected IDs.
539
+ """
540
+ result = self._generic_column_getter("texts", ids=ids, column=text_type)
541
+ return result
542
+
543
+ def get_metadata(self, ids=None, field=None):
544
+ """Get metadata from Dataset.
545
+
546
+ Parameters
547
+ ----------
548
+ ids : :obj:`list`, optional
549
+ A list of IDs in the Dataset for which to find metadata. Default is
550
+ None, in which case all metadata of requested type are returned.
551
+ field : :obj:`str`, optional
552
+ Metadata field to extract. Corresponds to column name in
553
+ Dataset.metadata DataFrame. Default is None.
554
+
555
+ Returns
556
+ -------
557
+ metadata : :obj:`list`
558
+ List of values of requested type for selected IDs.
559
+ """
560
+ result = self._generic_column_getter("metadata", ids=ids, column=field)
561
+ return result
562
+
563
+ def get_images(self, ids=None, imtype=None):
564
+ """Get images of a certain type for a subset of studies in the dataset.
565
+
566
+ Parameters
567
+ ----------
568
+ ids : :obj:`list`, optional
569
+ A list of IDs in the Dataset for which to find images. Default is
570
+ None, in which case all images of requested type are returned.
571
+ imtype : :obj:`str`, optional
572
+ Type of image to extract. Corresponds to column name in
573
+ Dataset.images DataFrame. Default is None.
574
+
575
+ Returns
576
+ -------
577
+ images : :obj:`list`
578
+ List of images of requested type for selected IDs.
579
+ """
580
+ ignore_columns = ["space"]
581
+ ignore_columns += [c for c in self.images.columns if c.endswith("__relative")]
582
+ result = self._generic_column_getter(
583
+ "images",
584
+ ids=ids,
585
+ column=imtype,
586
+ ignore_columns=ignore_columns,
587
+ )
588
+ return result
589
+
590
+ def get_studies_by_label(self, labels=None, label_threshold=0.001):
591
+ """Extract list of studies with a given label.
592
+
593
+ .. versionchanged:: 0.0.10
594
+
595
+ Fix bug in which all IDs were returned when a label wasn't present in the Dataset.
596
+
597
+ .. versionchanged:: 0.0.9
598
+
599
+ Default value for label_threshold changed to 0.001.
600
+
601
+ Parameters
602
+ ----------
603
+ labels : :obj:`list`, optional
604
+ List of labels to use to search Dataset. If a contrast has all of
605
+ the labels above the threshold, it will be returned.
606
+ Default is None.
607
+ label_threshold : :obj:`float`, optional
608
+ Default is 0.5.
609
+
610
+ Returns
611
+ -------
612
+ found_ids : :obj:`list`
613
+ A list of IDs from the Dataset found by the search criteria.
614
+ """
615
+ if isinstance(labels, str):
616
+ labels = [labels]
617
+ elif not isinstance(labels, list):
618
+ raise ValueError(f"Argument 'labels' cannot be {type(labels)}")
619
+
620
+ missing_labels = [label for label in labels if label not in self.annotations.columns]
621
+ if missing_labels:
622
+ raise ValueError(f"Missing label(s): {', '.join(missing_labels)}")
623
+
624
+ temp_annotations = self.annotations[self._id_cols + labels]
625
+ found_rows = (temp_annotations[labels] >= label_threshold).all(axis=1)
626
+ if any(found_rows):
627
+ found_ids = temp_annotations.loc[found_rows, "id"].tolist()
628
+ else:
629
+ found_ids = []
630
+
631
+ return found_ids
632
+
633
+ def get_studies_by_mask(self, mask):
634
+ """Extract list of studies with at least one coordinate in mask.
635
+
636
+ Parameters
637
+ ----------
638
+ mask : img_like
639
+ Mask across which to search for coordinates.
640
+
641
+ Returns
642
+ -------
643
+ found_ids : :obj:`list`
644
+ A list of IDs from the Dataset with at least one focus in the mask.
645
+ """
646
+ mask = load_niimg(mask)
647
+ dset_mask = self.masker.mask_img
648
+
649
+ if not np.array_equal(dset_mask.affine, mask.affine):
650
+ LGR.warning("Mask affine does not match Dataset affine. Assuming same space.")
651
+
652
+ dset_ijk = mm2vox(self.coordinates[["x", "y", "z"]].values, mask.affine)
653
+ mask_data = mask.get_fdata()
654
+ mask_coords = np.vstack(np.where(mask_data)).T
655
+
656
+ # Check for presence of coordinates in mask
657
+ in_mask = np.any(np.all(dset_ijk[:, None] == mask_coords[None, :], axis=-1), axis=-1)
658
+ found_ids = list(self.coordinates.loc[in_mask, "id"].unique())
659
+
660
+ return found_ids
661
+
662
+ def get_studies_by_coordinate(self, xyz, r=20):
663
+ """Extract list of studies with at least one focus within radius of requested coordinates.
664
+
665
+ Parameters
666
+ ----------
667
+ xyz : (X x 3) array_like
668
+ List of coordinates against which to find studies.
669
+ r : :obj:`float`, optional
670
+ Radius (in mm) within which to find studies. Default is 20mm.
671
+
672
+ Returns
673
+ -------
674
+ found_ids : :obj:`list`
675
+ A list of IDs from the Dataset with at least one focus within
676
+ radius r of requested coordinates.
677
+ """
678
+ from scipy.spatial.distance import cdist
679
+
680
+ xyz = np.array(xyz)
681
+ assert xyz.shape[1] == 3 and xyz.ndim == 2
682
+ distances = cdist(xyz, self.coordinates[["x", "y", "z"]].values)
683
+ distances = np.any(distances <= r, axis=0)
684
+ found_ids = list(self.coordinates.loc[distances, "id"].unique())
685
+ return found_ids