nimare 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. benchmarks/__init__.py +0 -0
  2. benchmarks/bench_cbma.py +57 -0
  3. nimare/__init__.py +45 -0
  4. nimare/_version.py +21 -0
  5. nimare/annotate/__init__.py +21 -0
  6. nimare/annotate/cogat.py +213 -0
  7. nimare/annotate/gclda.py +924 -0
  8. nimare/annotate/lda.py +147 -0
  9. nimare/annotate/text.py +75 -0
  10. nimare/annotate/utils.py +87 -0
  11. nimare/base.py +217 -0
  12. nimare/cli.py +124 -0
  13. nimare/correct.py +462 -0
  14. nimare/dataset.py +685 -0
  15. nimare/decode/__init__.py +33 -0
  16. nimare/decode/base.py +115 -0
  17. nimare/decode/continuous.py +462 -0
  18. nimare/decode/discrete.py +753 -0
  19. nimare/decode/encode.py +110 -0
  20. nimare/decode/utils.py +44 -0
  21. nimare/diagnostics.py +510 -0
  22. nimare/estimator.py +139 -0
  23. nimare/extract/__init__.py +19 -0
  24. nimare/extract/extract.py +466 -0
  25. nimare/extract/utils.py +295 -0
  26. nimare/generate.py +331 -0
  27. nimare/io.py +667 -0
  28. nimare/meta/__init__.py +39 -0
  29. nimare/meta/cbma/__init__.py +6 -0
  30. nimare/meta/cbma/ale.py +951 -0
  31. nimare/meta/cbma/base.py +947 -0
  32. nimare/meta/cbma/mkda.py +1361 -0
  33. nimare/meta/cbmr.py +970 -0
  34. nimare/meta/ibma.py +1683 -0
  35. nimare/meta/kernel.py +501 -0
  36. nimare/meta/models.py +1199 -0
  37. nimare/meta/utils.py +494 -0
  38. nimare/nimads.py +492 -0
  39. nimare/reports/__init__.py +24 -0
  40. nimare/reports/base.py +664 -0
  41. nimare/reports/default.yml +123 -0
  42. nimare/reports/figures.py +651 -0
  43. nimare/reports/report.tpl +160 -0
  44. nimare/resources/__init__.py +1 -0
  45. nimare/resources/atlases/Harvard-Oxford-LICENSE +93 -0
  46. nimare/resources/atlases/HarvardOxford-cort-maxprob-thr25-2mm.nii.gz +0 -0
  47. nimare/resources/database_file_manifest.json +142 -0
  48. nimare/resources/english_spellings.csv +1738 -0
  49. nimare/resources/filenames.json +32 -0
  50. nimare/resources/neurosynth_laird_studies.json +58773 -0
  51. nimare/resources/neurosynth_stoplist.txt +396 -0
  52. nimare/resources/nidm_pain_dset.json +1349 -0
  53. nimare/resources/references.bib +541 -0
  54. nimare/resources/semantic_knowledge_children.txt +325 -0
  55. nimare/resources/semantic_relatedness_children.txt +249 -0
  56. nimare/resources/templates/MNI152_2x2x2_brainmask.nii.gz +0 -0
  57. nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_T1w.nii.gz +0 -0
  58. nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz +0 -0
  59. nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_T1w.nii.gz +0 -0
  60. nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_desc-brain_mask.nii.gz +0 -0
  61. nimare/results.py +225 -0
  62. nimare/stats.py +276 -0
  63. nimare/tests/__init__.py +1 -0
  64. nimare/tests/conftest.py +229 -0
  65. nimare/tests/data/amygdala_roi.nii.gz +0 -0
  66. nimare/tests/data/data-neurosynth_version-7_coordinates.tsv.gz +0 -0
  67. nimare/tests/data/data-neurosynth_version-7_metadata.tsv.gz +0 -0
  68. nimare/tests/data/data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz +0 -0
  69. nimare/tests/data/data-neurosynth_version-7_vocab-terms_vocabulary.txt +100 -0
  70. nimare/tests/data/neurosynth_dset.json +2868 -0
  71. nimare/tests/data/neurosynth_laird_studies.json +58773 -0
  72. nimare/tests/data/nidm_pain_dset.json +1349 -0
  73. nimare/tests/data/nimads_annotation.json +1 -0
  74. nimare/tests/data/nimads_studyset.json +1 -0
  75. nimare/tests/data/test_baseline.txt +2 -0
  76. nimare/tests/data/test_pain_dataset.json +1278 -0
  77. nimare/tests/data/test_pain_dataset_multiple_contrasts.json +1242 -0
  78. nimare/tests/data/test_sleuth_file.txt +18 -0
  79. nimare/tests/data/test_sleuth_file2.txt +10 -0
  80. nimare/tests/data/test_sleuth_file3.txt +5 -0
  81. nimare/tests/data/test_sleuth_file4.txt +5 -0
  82. nimare/tests/data/test_sleuth_file5.txt +5 -0
  83. nimare/tests/test_annotate_cogat.py +32 -0
  84. nimare/tests/test_annotate_gclda.py +86 -0
  85. nimare/tests/test_annotate_lda.py +27 -0
  86. nimare/tests/test_dataset.py +99 -0
  87. nimare/tests/test_decode_continuous.py +132 -0
  88. nimare/tests/test_decode_discrete.py +92 -0
  89. nimare/tests/test_diagnostics.py +168 -0
  90. nimare/tests/test_estimator_performance.py +385 -0
  91. nimare/tests/test_extract.py +46 -0
  92. nimare/tests/test_generate.py +247 -0
  93. nimare/tests/test_io.py +294 -0
  94. nimare/tests/test_meta_ale.py +298 -0
  95. nimare/tests/test_meta_cbmr.py +295 -0
  96. nimare/tests/test_meta_ibma.py +240 -0
  97. nimare/tests/test_meta_kernel.py +209 -0
  98. nimare/tests/test_meta_mkda.py +234 -0
  99. nimare/tests/test_nimads.py +21 -0
  100. nimare/tests/test_reports.py +110 -0
  101. nimare/tests/test_stats.py +101 -0
  102. nimare/tests/test_transforms.py +272 -0
  103. nimare/tests/test_utils.py +200 -0
  104. nimare/tests/test_workflows.py +221 -0
  105. nimare/tests/utils.py +126 -0
  106. nimare/transforms.py +907 -0
  107. nimare/utils.py +1367 -0
  108. nimare/workflows/__init__.py +14 -0
  109. nimare/workflows/base.py +189 -0
  110. nimare/workflows/cbma.py +165 -0
  111. nimare/workflows/ibma.py +108 -0
  112. nimare/workflows/macm.py +77 -0
  113. nimare/workflows/misc.py +65 -0
  114. nimare-0.4.2.dist-info/LICENSE +21 -0
  115. nimare-0.4.2.dist-info/METADATA +124 -0
  116. nimare-0.4.2.dist-info/RECORD +119 -0
  117. nimare-0.4.2.dist-info/WHEEL +5 -0
  118. nimare-0.4.2.dist-info/entry_points.txt +2 -0
  119. nimare-0.4.2.dist-info/top_level.txt +2 -0
nimare/estimator.py ADDED
@@ -0,0 +1,139 @@
1
+ """Base class for estimators."""
2
+
3
+ from abc import abstractmethod
4
+
5
+ from joblib import Memory
6
+
7
+ from nimare.base import NiMAREBase
8
+ from nimare.results import MetaResult
9
+
10
+
11
+ class Estimator(NiMAREBase):
12
+ """Estimators take in Datasets and return MetaResults.
13
+
14
+ All Estimators must have a ``_fit`` method implemented, which applies algorithm-specific
15
+ methods to a Dataset and returns a dictionary of arrays to be converted into a MetaResult.
16
+
17
+ Users will interact with the ``_fit`` method by calling the user-facing ``fit`` method.
18
+ ``fit`` takes in a ``Dataset``, calls ``_collect_inputs``, then ``_preprocess_input``,
19
+ then ``_fit``, and finally converts the dictionary returned by ``_fit`` into a ``MetaResult``.
20
+ """
21
+
22
+ # Inputs that must be available in input Dataset. Keys are names of
23
+ # attributes to set; values are strings indicating location in Dataset.
24
+ _required_inputs = {}
25
+
26
+ def __init__(self, memory=Memory(location=None, verbose=0), memory_level=0):
27
+ self.memory = memory
28
+ self.memory_level = memory_level
29
+
30
+ def _collect_inputs(self, dataset, drop_invalid=True):
31
+ """Search for, and validate, required inputs as necessary.
32
+
33
+ This method populates the ``inputs_`` attribute.
34
+
35
+ .. versionchanged:: 0.0.12
36
+
37
+ Renamed from ``_validate_input``.
38
+
39
+ Parameters
40
+ ----------
41
+ dataset : :obj:`~nimare.dataset.Dataset`
42
+ drop_invalid : :obj:`bool`, default=True
43
+ Whether to automatically drop any studies in the Dataset without valid data or not.
44
+ Default is True.
45
+
46
+ Attributes
47
+ ----------
48
+ inputs_ : :obj:`dict`
49
+ A dictionary of required inputs for the Estimator, extracted from the Dataset.
50
+ The actual inputs collected in this attribute are determined by the
51
+ ``_required_inputs`` variable that should be specified in each child class.
52
+ """
53
+ if not hasattr(dataset, "slice"):
54
+ raise ValueError(
55
+ f"Argument 'dataset' must be a valid Dataset object, not a {type(dataset)}."
56
+ )
57
+
58
+ if self._required_inputs:
59
+ data = dataset.get(self._required_inputs, drop_invalid=drop_invalid)
60
+ # Do not overwrite existing inputs_ attribute.
61
+ # This is necessary for PairwiseCBMAEstimator, which validates two sets of coordinates
62
+ # in the same object.
63
+ # It makes the *strong* assumption that required inputs will not changes within an
64
+ # Estimator across fit calls, so all fields of inputs_ will be overwritten instead of
65
+ # retaining outdated fields from previous fit calls.
66
+ if not hasattr(self, "inputs_"):
67
+ self.inputs_ = {}
68
+
69
+ for k, v in data.items():
70
+ if v is None:
71
+ raise ValueError(
72
+ f"Estimator {self.__class__.__name__} requires input dataset to contain "
73
+ f"{k}, but no matching data were found."
74
+ )
75
+ self.inputs_[k] = v
76
+
77
+ @abstractmethod
78
+ def _generate_description(self):
79
+ """Generate a text description of the Estimator."""
80
+ pass
81
+
82
+ @abstractmethod
83
+ def _preprocess_input(self, dataset):
84
+ """Perform any additional preprocessing steps on data in self.inputs_.
85
+
86
+ Parameters
87
+ ----------
88
+ dataset : :obj:`~nimare.dataset.Dataset`
89
+ The Dataset
90
+ """
91
+ pass
92
+
93
+ @abstractmethod
94
+ def _fit(self, dataset):
95
+ """Apply estimation to dataset and output results.
96
+
97
+ Must return a dictionary of results, where keys are names of images
98
+ and values are ndarrays.
99
+ """
100
+ pass
101
+
102
+ def fit(self, dataset, drop_invalid=True):
103
+ """Fit Estimator to Dataset.
104
+
105
+ Parameters
106
+ ----------
107
+ dataset : :obj:`~nimare.dataset.Dataset`
108
+ Dataset object to analyze.
109
+ drop_invalid : :obj:`bool`, optional
110
+ Whether to automatically ignore any studies without the required data or not.
111
+ Default is False.
112
+
113
+ Returns
114
+ -------
115
+ :obj:`~nimare.results.MetaResult`
116
+ Results of Estimator fitting.
117
+
118
+ Attributes
119
+ ----------
120
+ inputs_ : :obj:`dict`
121
+ Inputs used in _fit.
122
+
123
+ Notes
124
+ -----
125
+ The `fit` method is a light wrapper that runs input validation and
126
+ preprocessing before fitting the actual model. Estimators' individual
127
+ "fitting" methods are implemented as `_fit`, although users should
128
+ call `fit`.
129
+ """
130
+ self._collect_inputs(dataset, drop_invalid=drop_invalid)
131
+ self._preprocess_input(dataset)
132
+ maps, tables, description = self._cache(self._fit, func_memory_level=1)(dataset)
133
+
134
+ if hasattr(self, "masker") and self.masker is not None:
135
+ masker = self.masker
136
+ else:
137
+ masker = dataset.masker
138
+
139
+ return MetaResult(self, mask=masker, maps=maps, tables=tables, description=description)
@@ -0,0 +1,19 @@
1
+ """Dataset and trained model downloading functions."""
2
+
3
+ from . import utils
4
+ from .extract import (
5
+ download_abstracts,
6
+ download_cognitive_atlas,
7
+ download_nidm_pain,
8
+ fetch_neuroquery,
9
+ fetch_neurosynth,
10
+ )
11
+
12
+ __all__ = [
13
+ "download_nidm_pain",
14
+ "download_cognitive_atlas",
15
+ "download_abstracts",
16
+ "fetch_neuroquery",
17
+ "fetch_neurosynth",
18
+ "utils",
19
+ ]
@@ -0,0 +1,466 @@
1
+ """Tools for downloading datasets."""
2
+
3
+ import itertools
4
+ import json
5
+ import logging
6
+ import os
7
+ import os.path as op
8
+ import shutil
9
+ import time
10
+ import zipfile
11
+ from glob import glob
12
+ from urllib.request import urlopen
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+
17
+ from nimare.dataset import Dataset
18
+ from nimare.extract.utils import (
19
+ _download_zipped_file,
20
+ _expand_df,
21
+ _get_concept_reltype,
22
+ _get_dataset_dir,
23
+ _longify,
24
+ )
25
+ from nimare.utils import get_resource_path
26
+
27
+ LGR = logging.getLogger(__name__)
28
+
29
+ VALID_ENTITIES = {
30
+ "coordinates.tsv.gz": ["data", "version"],
31
+ "metadata.tsv.gz": ["data", "version"],
32
+ "features.npz": ["data", "version", "vocab", "source", "type"],
33
+ "vocabulary.txt": ["data", "version", "vocab"],
34
+ "metadata.json": ["data", "version", "vocab"],
35
+ "keys.tsv": ["data", "version", "vocab"],
36
+ }
37
+
38
+
39
+ def _find_entities(filename, search_pairs, log=False):
40
+ """Search file for any matching patterns of entities."""
41
+ # Convert all string-based kwargs to lists
42
+ search_pairs = {k: [v] if isinstance(v, str) else v for k, v in search_pairs.items()}
43
+ search_pairs = [[f"{k}-{v_i}" for v_i in v] for k, v in search_pairs.items()]
44
+ searches = list(itertools.product(*search_pairs))
45
+
46
+ if log:
47
+ LGR.info(f"Searching for any feature files matching the following criteria: {searches}")
48
+
49
+ file_parts = filename.split("_")
50
+ suffix = file_parts[-1]
51
+ valid_entities_for_suffix = VALID_ENTITIES[suffix]
52
+ for search in searches:
53
+ temp_search = [term for term in search if term.split("-")[0] in valid_entities_for_suffix]
54
+ if all(term in file_parts for term in temp_search):
55
+ return True
56
+
57
+ return False
58
+
59
+
60
+ def _fetch_database(search_pairs, database_url, out_dir, overwrite=False):
61
+ """Fetch generic database."""
62
+ res_dir = get_resource_path()
63
+ with open(op.join(res_dir, "database_file_manifest.json"), "r") as fo:
64
+ database_file_manifest = json.load(fo)
65
+
66
+ out_dir = op.abspath(out_dir)
67
+ os.makedirs(out_dir, exist_ok=True)
68
+
69
+ found_databases = []
70
+ found_files = []
71
+ log = True
72
+ for database in database_file_manifest:
73
+ coordinates_file = database["coordinates"]
74
+ metadata_file = database["metadata"]
75
+ if not _find_entities(coordinates_file, search_pairs, log=log):
76
+ log = False
77
+ continue
78
+
79
+ log = False
80
+
81
+ feature_dicts = database["features"]
82
+ for feature_dict in feature_dicts:
83
+ features_file = feature_dict["features"]
84
+ # Other files associated with features have subset of entities,
85
+ # so unnecessary to search them if we assume that the hard-coded manifest is valid.
86
+ if not _find_entities(features_file, search_pairs):
87
+ continue
88
+ else:
89
+ out_coordinates_file = op.join(out_dir, coordinates_file)
90
+ out_metadata_file = op.join(out_dir, metadata_file)
91
+ out_feature_dict = {k: op.join(out_dir, v) for k, v in feature_dict.items()}
92
+
93
+ db_found = [
94
+ i_db
95
+ for i_db, db_dct in enumerate(found_databases)
96
+ if db_dct["coordinates"] == out_coordinates_file
97
+ ]
98
+ if len(db_found):
99
+ assert len(db_found) == 1
100
+
101
+ found_databases[db_found[0]]["features"].append(out_feature_dict)
102
+ else:
103
+ found_databases.append(
104
+ {
105
+ "coordinates": out_coordinates_file,
106
+ "metadata": out_metadata_file,
107
+ "features": [out_feature_dict],
108
+ }
109
+ )
110
+ found_files += [coordinates_file, metadata_file, *feature_dict.values()]
111
+
112
+ found_files = sorted(list(set(found_files)))
113
+ for found_file in found_files:
114
+ print(f"Downloading {found_file}", flush=True)
115
+
116
+ url = op.join(database_url, found_file + "?raw=true")
117
+ out_file = op.join(out_dir, found_file)
118
+
119
+ if op.isfile(out_file) and not overwrite:
120
+ print("File exists and overwrite is False. Skipping.")
121
+ continue
122
+
123
+ with open(out_file, "wb") as fo:
124
+ u = urlopen(url)
125
+
126
+ block_size = 8192
127
+ while True:
128
+ buffer = u.read(block_size)
129
+ if not buffer:
130
+ break
131
+ fo.write(buffer)
132
+
133
+ return found_databases
134
+
135
+
136
+ def fetch_neurosynth(data_dir=None, version="7", overwrite=False, **kwargs):
137
+ """Download the latest data files from NeuroSynth.
138
+
139
+ .. versionchanged:: 0.0.10
140
+
141
+ * Use new format for Neurosynth and NeuroQuery files.
142
+ * Change "path" parameter to "data_dir".
143
+
144
+ .. versionadded:: 0.0.4
145
+
146
+ Parameters
147
+ ----------
148
+ data_dir : :obj:`pathlib.Path` or :obj:`str`, optional
149
+ Path where data should be downloaded. By default, files are downloaded in home directory.
150
+ A subfolder, named ``neurosynth``, will be created in ``data_dir``, which is where the
151
+ files will be located.
152
+ version : str or list, optional
153
+ The version to fetch. The default is "7" (Neurosynth's latest version).
154
+ overwrite : bool, optional
155
+ Whether to overwrite existing files or not. Default is False.
156
+ kwargs : dict, optional
157
+ Keyword arguments to select relevant feature files.
158
+ Valid kwargs include: source, vocab, type.
159
+ Each kwarg may be a string or a list of strings.
160
+ If no kwargs are provided, all feature files for the specified database version will be
161
+ downloaded.
162
+
163
+ Returns
164
+ -------
165
+ found_databases : :obj:`list` of :obj:`dict`
166
+ List of dictionaries indicating datasets downloaded.
167
+ Each list entry is a different database, containing a dictionary with three keys:
168
+ "coordinates", "metadata", and "features". "coordinates" and "metadata" will be filenames.
169
+ "features" will be a list of dictionaries, each containing "id", "vocab", and "features"
170
+ keys with associated files.
171
+
172
+ Notes
173
+ -----
174
+ This function was adapted from neurosynth.base.dataset.download().
175
+
176
+ Warnings
177
+ --------
178
+ Starting in version 0.0.10, this function operates on the new Neurosynth/NeuroQuery file
179
+ format. Old code using this function **will not work** with the new version.
180
+ """
181
+ URL = (
182
+ "https://github.com/neurosynth/neurosynth-data/blob/"
183
+ "209c33cd009d0b069398a802198b41b9c488b9b7/"
184
+ )
185
+ dataset_name = "neurosynth"
186
+
187
+ data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir)
188
+
189
+ kwargs["data"] = dataset_name
190
+ kwargs["version"] = version
191
+
192
+ found_databases = _fetch_database(kwargs, URL, data_dir, overwrite=overwrite)
193
+
194
+ return found_databases
195
+
196
+
197
+ def fetch_neuroquery(data_dir=None, version="1", overwrite=False, **kwargs):
198
+ """Download the latest data files from NeuroQuery.
199
+
200
+ .. versionadded:: 0.0.10
201
+
202
+ Parameters
203
+ ----------
204
+ data_dir : :obj:`pathlib.Path` or :obj:`str`, optional
205
+ Path where data should be downloaded. By default, files are downloaded in home directory.
206
+ version : str or list, optional
207
+ The version to fetch. The default is "7" (Neurosynth's latest version).
208
+ url : None or str, optional
209
+ Specific URL to download. If not None, overrides URL to current data.
210
+ If you want to fetch Neurosynth's data from *before* the 2021 reorganization,
211
+ you will need to use this argument.
212
+ kwargs
213
+ Keyword arguments to select relevant feature files.
214
+ Valid kwargs include: source, vocab, type.
215
+ Each kwarg may be a string or a list of strings.
216
+ If no kwargs are provided, all feature files for the specified database version will be
217
+ downloaded.
218
+
219
+ Returns
220
+ -------
221
+ found_databases : :obj:`list` of :obj:`dict`
222
+ List of dictionaries indicating datasets downloaded.
223
+ Each list entry is a different database, containing a dictionary with three keys:
224
+ "coordinates", "metadata", and "features". "coordinates" and "metadata" will be filenames.
225
+ "features" will be a list of dictionaries, each containing "id", "vocab", and "features"
226
+ keys with associated files.
227
+
228
+ Notes
229
+ -----
230
+ This function was adapted from neurosynth.base.dataset.download().
231
+ """
232
+ URL = (
233
+ "https://github.com/neuroquery/neuroquery_data/blob/"
234
+ "4580f86267fb7c14ac1f601e298cbed898d79f2d/data/"
235
+ )
236
+ dataset_name = "neuroquery"
237
+
238
+ data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir)
239
+
240
+ kwargs["data"] = dataset_name
241
+ kwargs["version"] = version
242
+
243
+ found_databases = _fetch_database(kwargs, URL, data_dir, overwrite=overwrite)
244
+
245
+ return found_databases
246
+
247
+
248
+ def download_nidm_pain(data_dir=None, overwrite=False):
249
+ """Download NIDM Results for 21 pain studies from NeuroVault for tests.
250
+
251
+ .. versionadded:: 0.0.2
252
+
253
+ Parameters
254
+ ----------
255
+ data_dir : :obj:`pathlib.Path` or :obj:`str`, optional
256
+ Path where data should be downloaded. By default, files are downloaded in home directory.
257
+ A subfolder, named ``neuroquery``, will be created in ``data_dir``, which is where the
258
+ files will be located.
259
+ overwrite : :obj:`bool`, optional
260
+ Whether to overwrite existing files or not. Default is False.
261
+
262
+ Returns
263
+ -------
264
+ data_dir : :obj:`str`
265
+ Updated data directory pointing to dataset files.
266
+ """
267
+ url = "https://neurovault.org/collections/1425/download"
268
+
269
+ dataset_name = "nidm_21pain"
270
+
271
+ data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir)
272
+ desc_file = op.join(data_dir, "description.txt")
273
+ if op.isfile(desc_file) and overwrite is False:
274
+ return data_dir
275
+
276
+ # Download
277
+ fname = op.join(data_dir, url.split("/")[-1])
278
+ _download_zipped_file(url, filename=fname)
279
+
280
+ # Unzip
281
+ with zipfile.ZipFile(fname, "r") as zip_ref:
282
+ zip_ref.extractall(data_dir)
283
+
284
+ collection_folders = [f for f in glob(op.join(data_dir, "*")) if ".nidm" not in f]
285
+ collection_folders = [f for f in collection_folders if op.isdir(f)]
286
+ if len(collection_folders) > 1:
287
+ raise Exception(f"More than one folder found: {', '.join(collection_folders)}")
288
+ else:
289
+ folder = collection_folders[0]
290
+ zip_files = glob(op.join(folder, "*.zip"))
291
+ for zf in zip_files:
292
+ fn = op.splitext(op.basename(zf))[0]
293
+ with zipfile.ZipFile(zf, "r") as zip_ref:
294
+ zip_ref.extractall(op.join(data_dir, fn))
295
+
296
+ os.remove(fname)
297
+ shutil.rmtree(folder)
298
+
299
+ with open(desc_file, "w") as fo:
300
+ fo.write("21 pain studies in NIDM-results packs.")
301
+ return data_dir
302
+
303
+
304
+ def download_cognitive_atlas(data_dir=None, overwrite=False):
305
+ """Download Cognitive Atlas ontology and extract IDs and relationships.
306
+
307
+ .. versionadded:: 0.0.2
308
+
309
+ Parameters
310
+ ----------
311
+ data_dir : :obj:`pathlib.Path` or :obj:`str`, optional
312
+ Path where data should be downloaded. By default, files are downloaded in home directory.
313
+ overwrite : :obj:`bool`, optional
314
+ Whether to overwrite existing files or not. Default is False.
315
+
316
+ Returns
317
+ -------
318
+ out_dict : :obj:`dict`
319
+ Dictionary with two keys: 'ids' and 'relationships'. Each points to a
320
+ csv file. The 'ids' file contains CogAt identifiers, canonical names,
321
+ and aliases, sorted by alias length (number of characters).
322
+ The 'relationships' file contains associations between CogAt items,
323
+ with three columns: input, output, and rel_type (relationship type).
324
+ """
325
+ from cognitiveatlas.api import get_concept, get_disorder, get_task
326
+
327
+ dataset_name = "cognitive_atlas"
328
+ data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir)
329
+
330
+ ids_file = op.join(data_dir, "cogat_aliases.csv")
331
+ rels_file = op.join(data_dir, "cogat_relationships.csv")
332
+ if overwrite or not all([op.isfile(f) for f in [ids_file, rels_file]]):
333
+ concepts = get_concept(silent=True).pandas
334
+ tasks = get_task(silent=True).pandas
335
+ disorders = get_disorder(silent=True).pandas
336
+
337
+ # Identifiers and aliases
338
+ long_concepts = _longify(concepts)
339
+ long_tasks = _longify(tasks)
340
+
341
+ # Disorders currently lack aliases
342
+ disorders["name"] = disorders["name"].str.lower()
343
+ disorders = disorders.assign(alias=disorders["name"])
344
+ disorders = disorders[["id", "name", "alias"]]
345
+
346
+ # Combine into aliases DataFrame
347
+ aliases = pd.concat((long_concepts, long_tasks, disorders), axis=0)
348
+ aliases = _expand_df(aliases)
349
+ aliases = aliases.replace("", np.nan)
350
+ aliases = aliases.dropna(axis=0)
351
+ aliases = aliases.reset_index(drop=True)
352
+
353
+ # Relationships
354
+ relationship_list = []
355
+ for i, id_ in enumerate(concepts["id"].unique()):
356
+ if i % 100 == 0:
357
+ time.sleep(5)
358
+ row = [id_, id_, "isSelf"]
359
+ relationship_list.append(row)
360
+ concept = get_concept(id=id_, silent=True).json
361
+ for rel in concept["relationships"]:
362
+ reltype = _get_concept_reltype(rel["relationship"], rel["direction"])
363
+ if reltype is not None:
364
+ row = [id_, rel["id"], reltype]
365
+ relationship_list.append(row)
366
+
367
+ for i, id_ in enumerate(tasks["id"].unique()):
368
+ if i % 100 == 0:
369
+ time.sleep(5)
370
+ row = [id_, id_, "isSelf"]
371
+ relationship_list.append(row)
372
+ task = get_task(id=id_, silent=True).json
373
+ for rel in task["concepts"]:
374
+ row = [id_, rel["concept_id"], "measures"]
375
+ relationship_list.append(row)
376
+ row = [rel["concept_id"], id_, "measuredBy"]
377
+ relationship_list.append(row)
378
+
379
+ for i, id_ in enumerate(disorders["id"].unique()):
380
+ if i % 100 == 0:
381
+ time.sleep(5)
382
+ row = [id_, id_, "isSelf"]
383
+ relationship_list.append(row)
384
+ disorder = get_disorder(id=id_, silent=True).json
385
+ for rel in disorder["disorders"]:
386
+ if rel["relationship"] == "ISA":
387
+ rel_type = "isA"
388
+ else:
389
+ rel_type = rel["relationship"]
390
+ row = [id_, rel["id"], rel_type]
391
+ relationship_list.append(row)
392
+
393
+ relationships = pd.DataFrame(
394
+ columns=["input", "output", "rel_type"], data=relationship_list
395
+ )
396
+ ctp_df = concepts[["id", "id_concept_class"]]
397
+ ctp_df = ctp_df.assign(rel_type="inCategory")
398
+ ctp_df.columns = ["input", "output", "rel_type"]
399
+ ctp_df["output"].replace("", np.nan, inplace=True)
400
+ ctp_df.dropna(axis=0, inplace=True)
401
+ relationships = pd.concat((ctp_df, relationships))
402
+ relationships = relationships.reset_index(drop=True)
403
+ aliases.to_csv(ids_file, index=False)
404
+ relationships.to_csv(rels_file, index=False)
405
+ out_dict = {"ids": ids_file, "relationships": rels_file}
406
+
407
+ return out_dict
408
+
409
+
410
+ def download_abstracts(dataset, email):
411
+ """Download the abstracts for a list of PubMed IDs.
412
+
413
+ Uses the BioPython package.
414
+
415
+ .. versionadded:: 0.0.2
416
+
417
+ Parameters
418
+ ----------
419
+ dataset : :obj:`~nimare.dataset.Dataset`
420
+ A Dataset object where IDs are in the form PMID-EXPID
421
+ email : :obj:`str`
422
+ Email address to use to call the PubMed API
423
+
424
+ Returns
425
+ -------
426
+ dataset : :obj:`~nimare.dataset.Dataset`
427
+
428
+ Warnings
429
+ --------
430
+ This function assumes that the dataset uses identifiers in the format
431
+ [PMID-EXPID]. Thus, the ``study_id`` column of the
432
+ :py:attr:`~nimare.dataset.Dataset.texts` DataFrame should correspond to PMID.
433
+ """
434
+ try:
435
+ from Bio import Entrez, Medline
436
+ except ImportError:
437
+ raise Exception("Module biopython is required for downloading abstracts from PubMed.")
438
+
439
+ Entrez.email = email
440
+
441
+ if isinstance(dataset, Dataset):
442
+ pmids = dataset.texts["study_id"].astype(str).tolist()
443
+ pmids = sorted(list(set(pmids)))
444
+ elif isinstance(dataset, list):
445
+ pmids = [str(pmid) for pmid in dataset]
446
+ else:
447
+ raise Exception(f"Dataset type not recognized: {type(dataset)}")
448
+
449
+ records = []
450
+ # PubMed only allows you to search ~1000 at a time. I chose 900 to be safe.
451
+ chunks = [pmids[x : x + 900] for x in range(0, len(pmids), 900)]
452
+ for i, chunk in enumerate(chunks):
453
+ LGR.info(f"Downloading chunk {i + 1} of {len(chunks)}")
454
+ h = Entrez.efetch(db="pubmed", id=chunk, rettype="medline", retmode="text")
455
+ records += list(Medline.parse(h))
456
+
457
+ # Pull data for studies with abstracts
458
+ data = [[study["PMID"], study["AB"]] for study in records if study.get("AB", None)]
459
+ df = pd.DataFrame(columns=["study_id", "abstract"], data=data)
460
+ if not isinstance(dataset, Dataset):
461
+ return df
462
+
463
+ dataset.texts = pd.merge(
464
+ dataset.texts, df, left_on="study_id", right_on="study_id", how="left"
465
+ )
466
+ return dataset