nimare 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- benchmarks/__init__.py +0 -0
- benchmarks/bench_cbma.py +57 -0
- nimare/__init__.py +45 -0
- nimare/_version.py +21 -0
- nimare/annotate/__init__.py +21 -0
- nimare/annotate/cogat.py +213 -0
- nimare/annotate/gclda.py +924 -0
- nimare/annotate/lda.py +147 -0
- nimare/annotate/text.py +75 -0
- nimare/annotate/utils.py +87 -0
- nimare/base.py +217 -0
- nimare/cli.py +124 -0
- nimare/correct.py +462 -0
- nimare/dataset.py +685 -0
- nimare/decode/__init__.py +33 -0
- nimare/decode/base.py +115 -0
- nimare/decode/continuous.py +462 -0
- nimare/decode/discrete.py +753 -0
- nimare/decode/encode.py +110 -0
- nimare/decode/utils.py +44 -0
- nimare/diagnostics.py +510 -0
- nimare/estimator.py +139 -0
- nimare/extract/__init__.py +19 -0
- nimare/extract/extract.py +466 -0
- nimare/extract/utils.py +295 -0
- nimare/generate.py +331 -0
- nimare/io.py +667 -0
- nimare/meta/__init__.py +39 -0
- nimare/meta/cbma/__init__.py +6 -0
- nimare/meta/cbma/ale.py +951 -0
- nimare/meta/cbma/base.py +947 -0
- nimare/meta/cbma/mkda.py +1361 -0
- nimare/meta/cbmr.py +970 -0
- nimare/meta/ibma.py +1683 -0
- nimare/meta/kernel.py +501 -0
- nimare/meta/models.py +1199 -0
- nimare/meta/utils.py +494 -0
- nimare/nimads.py +492 -0
- nimare/reports/__init__.py +24 -0
- nimare/reports/base.py +664 -0
- nimare/reports/default.yml +123 -0
- nimare/reports/figures.py +651 -0
- nimare/reports/report.tpl +160 -0
- nimare/resources/__init__.py +1 -0
- nimare/resources/atlases/Harvard-Oxford-LICENSE +93 -0
- nimare/resources/atlases/HarvardOxford-cort-maxprob-thr25-2mm.nii.gz +0 -0
- nimare/resources/database_file_manifest.json +142 -0
- nimare/resources/english_spellings.csv +1738 -0
- nimare/resources/filenames.json +32 -0
- nimare/resources/neurosynth_laird_studies.json +58773 -0
- nimare/resources/neurosynth_stoplist.txt +396 -0
- nimare/resources/nidm_pain_dset.json +1349 -0
- nimare/resources/references.bib +541 -0
- nimare/resources/semantic_knowledge_children.txt +325 -0
- nimare/resources/semantic_relatedness_children.txt +249 -0
- nimare/resources/templates/MNI152_2x2x2_brainmask.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_T1w.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-01_desc-brain_mask.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_T1w.nii.gz +0 -0
- nimare/resources/templates/tpl-MNI152NLin6Asym_res-02_desc-brain_mask.nii.gz +0 -0
- nimare/results.py +225 -0
- nimare/stats.py +276 -0
- nimare/tests/__init__.py +1 -0
- nimare/tests/conftest.py +229 -0
- nimare/tests/data/amygdala_roi.nii.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_coordinates.tsv.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_metadata.tsv.gz +0 -0
- nimare/tests/data/data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz +0 -0
- nimare/tests/data/data-neurosynth_version-7_vocab-terms_vocabulary.txt +100 -0
- nimare/tests/data/neurosynth_dset.json +2868 -0
- nimare/tests/data/neurosynth_laird_studies.json +58773 -0
- nimare/tests/data/nidm_pain_dset.json +1349 -0
- nimare/tests/data/nimads_annotation.json +1 -0
- nimare/tests/data/nimads_studyset.json +1 -0
- nimare/tests/data/test_baseline.txt +2 -0
- nimare/tests/data/test_pain_dataset.json +1278 -0
- nimare/tests/data/test_pain_dataset_multiple_contrasts.json +1242 -0
- nimare/tests/data/test_sleuth_file.txt +18 -0
- nimare/tests/data/test_sleuth_file2.txt +10 -0
- nimare/tests/data/test_sleuth_file3.txt +5 -0
- nimare/tests/data/test_sleuth_file4.txt +5 -0
- nimare/tests/data/test_sleuth_file5.txt +5 -0
- nimare/tests/test_annotate_cogat.py +32 -0
- nimare/tests/test_annotate_gclda.py +86 -0
- nimare/tests/test_annotate_lda.py +27 -0
- nimare/tests/test_dataset.py +99 -0
- nimare/tests/test_decode_continuous.py +132 -0
- nimare/tests/test_decode_discrete.py +92 -0
- nimare/tests/test_diagnostics.py +168 -0
- nimare/tests/test_estimator_performance.py +385 -0
- nimare/tests/test_extract.py +46 -0
- nimare/tests/test_generate.py +247 -0
- nimare/tests/test_io.py +294 -0
- nimare/tests/test_meta_ale.py +298 -0
- nimare/tests/test_meta_cbmr.py +295 -0
- nimare/tests/test_meta_ibma.py +240 -0
- nimare/tests/test_meta_kernel.py +209 -0
- nimare/tests/test_meta_mkda.py +234 -0
- nimare/tests/test_nimads.py +21 -0
- nimare/tests/test_reports.py +110 -0
- nimare/tests/test_stats.py +101 -0
- nimare/tests/test_transforms.py +272 -0
- nimare/tests/test_utils.py +200 -0
- nimare/tests/test_workflows.py +221 -0
- nimare/tests/utils.py +126 -0
- nimare/transforms.py +907 -0
- nimare/utils.py +1367 -0
- nimare/workflows/__init__.py +14 -0
- nimare/workflows/base.py +189 -0
- nimare/workflows/cbma.py +165 -0
- nimare/workflows/ibma.py +108 -0
- nimare/workflows/macm.py +77 -0
- nimare/workflows/misc.py +65 -0
- nimare-0.4.2.dist-info/LICENSE +21 -0
- nimare-0.4.2.dist-info/METADATA +124 -0
- nimare-0.4.2.dist-info/RECORD +119 -0
- nimare-0.4.2.dist-info/WHEEL +5 -0
- nimare-0.4.2.dist-info/entry_points.txt +2 -0
- nimare-0.4.2.dist-info/top_level.txt +2 -0
nimare/annotate/lda.py
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
"""Topic modeling with latent Dirichlet allocation."""
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
import pandas as pd
|
5
|
+
from sklearn.decomposition import LatentDirichletAllocation
|
6
|
+
|
7
|
+
from nimare.annotate.text import generate_counts
|
8
|
+
from nimare.base import NiMAREBase
|
9
|
+
from nimare.utils import _check_ncores
|
10
|
+
|
11
|
+
|
12
|
+
class LDAModel(NiMAREBase):
|
13
|
+
"""Generate a latent Dirichlet allocation (LDA) topic model.
|
14
|
+
|
15
|
+
This class is a light wrapper around scikit-learn tools for tokenization and LDA.
|
16
|
+
|
17
|
+
Parameters
|
18
|
+
----------
|
19
|
+
n_topics : :obj:`int`
|
20
|
+
Number of topics for topic model. This corresponds to the model's ``n_components``
|
21
|
+
parameter. Must be an integer >= 1.
|
22
|
+
max_iter : :obj:`int`, optional
|
23
|
+
Maximum number of iterations to use during model fitting. Default = 1000.
|
24
|
+
alpha : :obj:`float` or None, optional
|
25
|
+
The ``alpha`` value for the model. This corresponds to the model's ``doc_topic_prior``
|
26
|
+
parameter. Default is None, which evaluates to ``1 / n_topics``,
|
27
|
+
as was used in :footcite:t:`poldrack2012discovering`.
|
28
|
+
beta : :obj:`float` or None, optional
|
29
|
+
The ``beta`` value for the model. This corresponds to the model's ``topic_word_prior``
|
30
|
+
parameter. If None, it evaluates to ``1 / n_topics``.
|
31
|
+
Default is 0.001, which was used in :footcite:t:`poldrack2012discovering`.
|
32
|
+
text_column : :obj:`str`, optional
|
33
|
+
The source of text to use for the model. This should correspond to an existing column
|
34
|
+
in the :py:attr:`~nimare.dataset.Dataset.texts` attribute. Default is "abstract".
|
35
|
+
n_cores : :obj:`int`, optional
|
36
|
+
Number of cores to use for parallelization.
|
37
|
+
If <=0, defaults to using all available cores.
|
38
|
+
Default is 1.
|
39
|
+
|
40
|
+
Attributes
|
41
|
+
----------
|
42
|
+
model : :obj:`~sklearn.decomposition.LatentDirichletAllocation`
|
43
|
+
|
44
|
+
Notes
|
45
|
+
-----
|
46
|
+
Latent Dirichlet allocation was first developed in :footcite:t:`blei2003latent`,
|
47
|
+
and was first applied to neuroimaging articles in :footcite:t:`poldrack2012discovering`.
|
48
|
+
|
49
|
+
References
|
50
|
+
----------
|
51
|
+
.. footbibliography::
|
52
|
+
|
53
|
+
See Also
|
54
|
+
--------
|
55
|
+
:class:`~sklearn.feature_extraction.text.CountVectorizer`: Used to build a vocabulary of terms
|
56
|
+
and their associated counts from texts in the ``self.text_column`` of the Dataset's
|
57
|
+
``texts`` attribute.
|
58
|
+
:class:`~sklearn.decomposition.LatentDirichletAllocation`: Used to train the LDA model.
|
59
|
+
"""
|
60
|
+
|
61
|
+
def __init__(
|
62
|
+
self, n_topics, max_iter=1000, alpha=None, beta=0.001, text_column="abstract", n_cores=1
|
63
|
+
):
|
64
|
+
self.n_topics = n_topics
|
65
|
+
self.max_iter = max_iter
|
66
|
+
self.alpha = alpha
|
67
|
+
self.beta = beta
|
68
|
+
self.text_column = text_column
|
69
|
+
self.n_cores = _check_ncores(n_cores)
|
70
|
+
|
71
|
+
self.model = LatentDirichletAllocation(
|
72
|
+
n_components=n_topics,
|
73
|
+
max_iter=max_iter,
|
74
|
+
learning_method="batch",
|
75
|
+
doc_topic_prior=alpha,
|
76
|
+
topic_word_prior=beta,
|
77
|
+
n_jobs=n_cores,
|
78
|
+
)
|
79
|
+
|
80
|
+
def fit(self, dset):
|
81
|
+
"""Fit the LDA topic model to text from a Dataset.
|
82
|
+
|
83
|
+
Parameters
|
84
|
+
----------
|
85
|
+
dset : :obj:`~nimare.dataset.Dataset`
|
86
|
+
A Dataset with, at minimum, text available in the ``self.text_column`` column of its
|
87
|
+
:py:attr:`~nimare.dataset.Dataset.texts` attribute.
|
88
|
+
|
89
|
+
Returns
|
90
|
+
-------
|
91
|
+
dset : :obj:`~nimare.dataset.Dataset`
|
92
|
+
A new Dataset with an updated :py:attr:`~nimare.dataset.Dataset.annotations` attribute.
|
93
|
+
|
94
|
+
Attributes
|
95
|
+
----------
|
96
|
+
distributions_ : :obj:`dict`
|
97
|
+
A dictionary containing additional distributions produced by the model, including:
|
98
|
+
|
99
|
+
- ``p_topic_g_word``: :obj:`numpy.ndarray` of shape (n_topics, n_tokens)
|
100
|
+
containing the topic-term weights for the model.
|
101
|
+
- ``p_topic_g_word_df``: :obj:`pandas.DataFrame` of shape (n_topics, n_tokens)
|
102
|
+
containing the topic-term weights for the model.
|
103
|
+
"""
|
104
|
+
counts_df = generate_counts(
|
105
|
+
dset.texts,
|
106
|
+
text_column=self.text_column,
|
107
|
+
tfidf=False,
|
108
|
+
max_df=len(dset.ids) - 2,
|
109
|
+
min_df=2,
|
110
|
+
)
|
111
|
+
vocabulary = counts_df.columns.to_numpy()
|
112
|
+
count_values = counts_df.values
|
113
|
+
study_ids = counts_df.index.tolist()
|
114
|
+
|
115
|
+
doc_topic_weights = self.model.fit_transform(count_values)
|
116
|
+
topic_word_weights = self.model.components_
|
117
|
+
|
118
|
+
# Get top 3 words for each topic for annotation
|
119
|
+
sorted_weights_idxs = np.argsort(-topic_word_weights, axis=1)
|
120
|
+
top_tokens = [
|
121
|
+
"_".join(vocabulary[sorted_weights_idxs[topic_i, :]][:3])
|
122
|
+
for topic_i in range(self.n_topics)
|
123
|
+
]
|
124
|
+
topic_names = [
|
125
|
+
f"LDA{self.n_topics}__{i + 1}_{top_tokens[i]}" for i in range(self.n_topics)
|
126
|
+
]
|
127
|
+
|
128
|
+
doc_topic_weights_df = pd.DataFrame(
|
129
|
+
index=study_ids,
|
130
|
+
columns=topic_names,
|
131
|
+
data=doc_topic_weights,
|
132
|
+
)
|
133
|
+
topic_word_weights_df = pd.DataFrame(
|
134
|
+
index=topic_names,
|
135
|
+
columns=vocabulary,
|
136
|
+
data=topic_word_weights,
|
137
|
+
)
|
138
|
+
self.distributions_ = {
|
139
|
+
"p_topic_g_word": topic_word_weights,
|
140
|
+
"p_topic_g_word_df": topic_word_weights_df,
|
141
|
+
}
|
142
|
+
|
143
|
+
annotations = dset.annotations.copy()
|
144
|
+
annotations = pd.merge(annotations, doc_topic_weights_df, left_on="id", right_index=True)
|
145
|
+
new_dset = dset.copy()
|
146
|
+
new_dset.annotations = annotations
|
147
|
+
return new_dset
|
nimare/annotate/text.py
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
"""Text extraction tools."""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import os.path as op
|
5
|
+
|
6
|
+
import pandas as pd
|
7
|
+
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
|
8
|
+
|
9
|
+
from nimare.utils import get_resource_path
|
10
|
+
|
11
|
+
LGR = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
|
14
|
+
def generate_counts(text_df, text_column="abstract", tfidf=True, min_df=50, max_df=0.5):
|
15
|
+
"""Generate tf-idf weights for unigrams/bigrams derived from textual data.
|
16
|
+
|
17
|
+
Parameters
|
18
|
+
----------
|
19
|
+
text_df : (D x 2) :obj:`pandas.DataFrame`
|
20
|
+
A DataFrame with two columns ('id' and 'text'). D = document.
|
21
|
+
|
22
|
+
Returns
|
23
|
+
-------
|
24
|
+
weights_df : (D x T) :obj:`pandas.DataFrame`
|
25
|
+
A DataFrame where the index is 'id' and the columns are the
|
26
|
+
unigrams/bigrams derived from the data. D = document. T = term.
|
27
|
+
"""
|
28
|
+
if text_column not in text_df.columns:
|
29
|
+
raise ValueError(f"Column '{text_column}' not found in DataFrame")
|
30
|
+
|
31
|
+
# Remove rows with empty text cells
|
32
|
+
orig_ids = text_df["id"].tolist()
|
33
|
+
text_df = text_df.fillna("")
|
34
|
+
keep_ids = text_df.loc[text_df[text_column] != "", "id"]
|
35
|
+
text_df = text_df.loc[text_df["id"].isin(keep_ids)]
|
36
|
+
|
37
|
+
if len(keep_ids) != len(orig_ids):
|
38
|
+
LGR.info(f"Retaining {len(keep_ids)}/{len(orig_ids)} studies")
|
39
|
+
|
40
|
+
ids = text_df["id"].tolist()
|
41
|
+
text = text_df[text_column].tolist()
|
42
|
+
stoplist = op.join(get_resource_path(), "neurosynth_stoplist.txt")
|
43
|
+
with open(stoplist, "r") as fo:
|
44
|
+
stop_words = fo.read().splitlines()
|
45
|
+
|
46
|
+
if tfidf:
|
47
|
+
vectorizer = TfidfVectorizer(
|
48
|
+
min_df=min_df,
|
49
|
+
max_df=max_df,
|
50
|
+
ngram_range=(1, 2),
|
51
|
+
vocabulary=None,
|
52
|
+
stop_words=stop_words,
|
53
|
+
)
|
54
|
+
else:
|
55
|
+
vectorizer = CountVectorizer(
|
56
|
+
min_df=min_df,
|
57
|
+
max_df=max_df,
|
58
|
+
ngram_range=(1, 2),
|
59
|
+
vocabulary=None,
|
60
|
+
stop_words=stop_words,
|
61
|
+
)
|
62
|
+
weights = vectorizer.fit_transform(text).toarray()
|
63
|
+
|
64
|
+
if hasattr(vectorizer, "get_feature_names_out"):
|
65
|
+
# scikit-learn >= 1.0.0
|
66
|
+
names = vectorizer.get_feature_names_out()
|
67
|
+
else:
|
68
|
+
# scikit-learn < 1.0.0
|
69
|
+
# To remove when we drop support for 3.6 and increase minimum sklearn version to 1.0.0.
|
70
|
+
names = vectorizer.get_feature_names()
|
71
|
+
|
72
|
+
names = [str(name) for name in names]
|
73
|
+
weights_df = pd.DataFrame(weights, columns=names, index=ids)
|
74
|
+
weights_df.index.name = "id"
|
75
|
+
return weights_df
|
nimare/annotate/utils.py
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
"""Utility functions for ontology tools."""
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
import pandas as pd
|
5
|
+
|
6
|
+
|
7
|
+
def _generate_weights(rel_df, weights):
|
8
|
+
"""Create an IDxID DataFrame linking weight value to each relationship type.
|
9
|
+
|
10
|
+
.. versionadded:: 0.0.2
|
11
|
+
|
12
|
+
Parameters
|
13
|
+
----------
|
14
|
+
rel_df : (X x 3) :obj:`pandas.DataFrame`
|
15
|
+
DataFrame with three columns: input, output, and rel_type
|
16
|
+
(relationship type).
|
17
|
+
weights : :obj:`dict`
|
18
|
+
Dictionary defining relationship weights. Each relationship type is a
|
19
|
+
key and the associated value is the weight to use for that kind of
|
20
|
+
relationship.
|
21
|
+
|
22
|
+
Returns
|
23
|
+
-------
|
24
|
+
expanded_df : :obj:`pandas.DataFrame`
|
25
|
+
Square DataFrame where rows correspond to input items, columns
|
26
|
+
correspond to output items, and cells have the weights associated with
|
27
|
+
the particular input/output relationship.
|
28
|
+
|
29
|
+
Notes
|
30
|
+
-----
|
31
|
+
For example, if weights is {'partOf': 1}, the resulting expanded_df will
|
32
|
+
have a value of 1 for all cells where the input item (row) is a part of the
|
33
|
+
output item (column), and will have zeroes for all other cells.
|
34
|
+
"""
|
35
|
+
# Override isSelf weight
|
36
|
+
weights["isSelf"] = 1
|
37
|
+
|
38
|
+
# Hierarchical expansion
|
39
|
+
def get_weight(rel_type):
|
40
|
+
weight = weights.get(rel_type, 0)
|
41
|
+
return weight
|
42
|
+
|
43
|
+
t_df = rel_df.copy()
|
44
|
+
t_df["rel_type"] = t_df["rel_type"].apply(get_weight)
|
45
|
+
weights_df = t_df.pivot_table(
|
46
|
+
index="input", columns="output", values="rel_type", aggfunc=np.max
|
47
|
+
)
|
48
|
+
weights_df = weights_df.fillna(0)
|
49
|
+
out_not_in = list(set(t_df["output"].values) - set(t_df["input"].values))
|
50
|
+
in_not_out = list(set(t_df["input"].values) - set(t_df["output"].values))
|
51
|
+
|
52
|
+
new_cols = pd.DataFrame(
|
53
|
+
columns=in_not_out,
|
54
|
+
index=weights_df.index,
|
55
|
+
data=np.zeros((weights_df.shape[0], len(in_not_out))),
|
56
|
+
)
|
57
|
+
weights_df = pd.concat((weights_df, new_cols), axis=1)
|
58
|
+
new_rows = pd.DataFrame(
|
59
|
+
columns=weights_df.columns,
|
60
|
+
index=out_not_in,
|
61
|
+
data=np.zeros((len(out_not_in), weights_df.shape[1])),
|
62
|
+
)
|
63
|
+
weights_df = pd.concat((weights_df, new_rows), axis=0)
|
64
|
+
all_cols = sorted(weights_df.columns.tolist())
|
65
|
+
weights_df = weights_df.loc[all_cols, :]
|
66
|
+
weights_df = weights_df.loc[:, all_cols]
|
67
|
+
|
68
|
+
# expanding the hierarchical expansion to all related terms
|
69
|
+
# this way, a single dot product will apply counts to all layers
|
70
|
+
expanded_df = weights_df.copy()
|
71
|
+
mat = weights_df.values
|
72
|
+
|
73
|
+
for i, val in enumerate(weights_df.index):
|
74
|
+
row = np.zeros((1, weights_df.shape[0]))
|
75
|
+
row[0, i] = 1 # identity
|
76
|
+
temp = np.zeros((1, weights_df.shape[0]))
|
77
|
+
|
78
|
+
while not np.array_equal(temp != 0, row != 0):
|
79
|
+
temp = np.copy(row)
|
80
|
+
row = np.dot(row, mat)
|
81
|
+
|
82
|
+
# Constrain weights to <=1.
|
83
|
+
# Hopefully this won't mess with weights <1,
|
84
|
+
# but will also prevent weights from adding to one another.
|
85
|
+
row[row > 1] = 1
|
86
|
+
expanded_df.loc[val] = np.squeeze(row)
|
87
|
+
return expanded_df
|
nimare/base.py
ADDED
@@ -0,0 +1,217 @@
|
|
1
|
+
"""Base classes for NiMARE."""
|
2
|
+
|
3
|
+
import gzip
|
4
|
+
import inspect
|
5
|
+
import logging
|
6
|
+
import pickle
|
7
|
+
from abc import ABCMeta
|
8
|
+
from collections import defaultdict
|
9
|
+
|
10
|
+
from nilearn._utils import CacheMixin
|
11
|
+
|
12
|
+
LGR = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
class NiMAREBase(CacheMixin, metaclass=ABCMeta):
|
16
|
+
"""Base class for NiMARE.
|
17
|
+
|
18
|
+
This class contains a few features that are useful throughout the library:
|
19
|
+
|
20
|
+
- Custom __repr__ method for printing the object.
|
21
|
+
- get_params from scikit-learn, with which parameters provided at __init__ can be viewed.
|
22
|
+
- set_params from scikit-learn, with which parameters provided at __init__ can be overwritten.
|
23
|
+
I'm not sure that this is actually used or useable in NiMARE.
|
24
|
+
- save to save the object to a Pickle file.
|
25
|
+
- load to load an instance of the object from a Pickle file.
|
26
|
+
|
27
|
+
TODO: Actually write/refactor class methods. They mostly come directly from sklearn
|
28
|
+
https://github.com/scikit-learn/scikit-learn/blob/
|
29
|
+
2a1e9686eeb203f5fddf44fd06414db8ab6a554a/sklearn/base.py#L141
|
30
|
+
"""
|
31
|
+
|
32
|
+
def __init__(self):
|
33
|
+
pass
|
34
|
+
|
35
|
+
def __repr__(self):
|
36
|
+
"""Show basic NiMARE class representation.
|
37
|
+
|
38
|
+
Specifically, this shows the name of the class, along with any parameters
|
39
|
+
that are **not** set to the default.
|
40
|
+
"""
|
41
|
+
# Get default parameter values for the object
|
42
|
+
signature = inspect.signature(self.__init__)
|
43
|
+
defaults = {
|
44
|
+
k: v.default
|
45
|
+
for k, v in signature.parameters.items()
|
46
|
+
if v.default is not inspect.Parameter.empty
|
47
|
+
}
|
48
|
+
|
49
|
+
# Eliminate any sub-parameters (e.g., parameters for a Estimator's KernelTransformer),
|
50
|
+
# as well as default values
|
51
|
+
params = self.get_params()
|
52
|
+
params = {k: v for k, v in params.items() if "__" not in k}
|
53
|
+
params = {k: v for k, v in params.items() if defaults.get(k) != v}
|
54
|
+
|
55
|
+
# Convert to strings
|
56
|
+
param_strs = []
|
57
|
+
for k, v in params.items():
|
58
|
+
if isinstance(v, str):
|
59
|
+
# Wrap string values in single quotes
|
60
|
+
param_str = f"{k}='{v}'"
|
61
|
+
else:
|
62
|
+
# Keep everything else as-is based on its own repr
|
63
|
+
param_str = f"{k}={v}"
|
64
|
+
param_strs.append(param_str)
|
65
|
+
|
66
|
+
rep = f"{self.__class__.__name__}({', '.join(param_strs)})"
|
67
|
+
return rep
|
68
|
+
|
69
|
+
@classmethod
|
70
|
+
def _get_param_names(cls):
|
71
|
+
"""Get parameter names for the estimator."""
|
72
|
+
# fetch the constructor or the original constructor before
|
73
|
+
# deprecation wrapping if any
|
74
|
+
init = getattr(cls.__init__, "deprecated_original", cls.__init__)
|
75
|
+
if init is object.__init__:
|
76
|
+
# No explicit constructor to introspect
|
77
|
+
return []
|
78
|
+
|
79
|
+
# introspect the constructor arguments to find the model parameters
|
80
|
+
# to represent
|
81
|
+
init_signature = inspect.signature(init)
|
82
|
+
# Consider the constructor parameters excluding 'self'
|
83
|
+
parameters = [
|
84
|
+
p
|
85
|
+
for p in init_signature.parameters.values()
|
86
|
+
if p.name != "self" and p.kind != p.VAR_KEYWORD
|
87
|
+
]
|
88
|
+
for p in parameters:
|
89
|
+
if p.kind == p.VAR_POSITIONAL:
|
90
|
+
raise RuntimeError(
|
91
|
+
"scikit-learn estimators should always "
|
92
|
+
"specify their parameters in the signature"
|
93
|
+
" of their __init__ (no varargs)."
|
94
|
+
" %s with constructor %s doesn't "
|
95
|
+
" follow this convention." % (cls, init_signature)
|
96
|
+
)
|
97
|
+
# Extract and sort argument names excluding 'self'
|
98
|
+
return sorted([p.name for p in parameters])
|
99
|
+
|
100
|
+
def get_params(self, deep=True):
|
101
|
+
"""Get parameters for this estimator.
|
102
|
+
|
103
|
+
Parameters
|
104
|
+
----------
|
105
|
+
deep : :obj:`bool`, default=True
|
106
|
+
If True, will return the parameters for this estimator and
|
107
|
+
contained subobjects that are estimators.
|
108
|
+
|
109
|
+
Returns
|
110
|
+
-------
|
111
|
+
params : :obj:`dict`
|
112
|
+
Parameter names mapped to their values.
|
113
|
+
"""
|
114
|
+
out = dict()
|
115
|
+
for key in self._get_param_names():
|
116
|
+
value = getattr(self, key, None)
|
117
|
+
if deep and hasattr(value, "get_params"):
|
118
|
+
deep_items = value.get_params().items()
|
119
|
+
out.update((key + "__" + k, val) for k, val in deep_items)
|
120
|
+
out[key] = value
|
121
|
+
return out
|
122
|
+
|
123
|
+
def set_params(self, **params):
|
124
|
+
"""Set the parameters of this estimator.
|
125
|
+
|
126
|
+
The method works on simple estimators as well as on nested objects
|
127
|
+
(such as pipelines). The latter have parameters of the form
|
128
|
+
``<component>__<parameter>`` so that it's possible to update each
|
129
|
+
component of a nested object.
|
130
|
+
|
131
|
+
Returns
|
132
|
+
-------
|
133
|
+
self
|
134
|
+
"""
|
135
|
+
if not params:
|
136
|
+
# Simple optimization to gain speed (inspect is slow)
|
137
|
+
return self
|
138
|
+
valid_params = self.get_params(deep=True)
|
139
|
+
|
140
|
+
nested_params = defaultdict(dict) # grouped by prefix
|
141
|
+
for key, value in params.items():
|
142
|
+
key, delim, sub_key = key.partition("__")
|
143
|
+
if key not in valid_params:
|
144
|
+
raise ValueError(
|
145
|
+
"Invalid parameter %s for estimator %s. "
|
146
|
+
"Check the list of available parameters "
|
147
|
+
"with `estimator.get_params().keys()`." % (key, self)
|
148
|
+
)
|
149
|
+
|
150
|
+
if delim:
|
151
|
+
nested_params[key][sub_key] = value
|
152
|
+
else:
|
153
|
+
setattr(self, key, value)
|
154
|
+
valid_params[key] = value
|
155
|
+
|
156
|
+
for key, sub_params in nested_params.items():
|
157
|
+
valid_params[key].set_params(**sub_params)
|
158
|
+
|
159
|
+
return self
|
160
|
+
|
161
|
+
def save(self, filename, compress=True):
|
162
|
+
"""Pickle the class instance to the provided file.
|
163
|
+
|
164
|
+
Parameters
|
165
|
+
----------
|
166
|
+
filename : :obj:`str`
|
167
|
+
File to which object will be saved.
|
168
|
+
compress : :obj:`bool`, optional
|
169
|
+
If True, the file will be compressed with gzip. Otherwise, the
|
170
|
+
uncompressed version will be saved. Default = True.
|
171
|
+
"""
|
172
|
+
if compress:
|
173
|
+
with gzip.GzipFile(filename, "wb") as file_object:
|
174
|
+
pickle.dump(self, file_object)
|
175
|
+
else:
|
176
|
+
with open(filename, "wb") as file_object:
|
177
|
+
pickle.dump(self, file_object)
|
178
|
+
|
179
|
+
@classmethod
|
180
|
+
def load(cls, filename, compressed=True):
|
181
|
+
"""Load a pickled class instance from file.
|
182
|
+
|
183
|
+
Parameters
|
184
|
+
----------
|
185
|
+
filename : :obj:`str`
|
186
|
+
Name of file containing object.
|
187
|
+
compressed : :obj:`bool`, default=True
|
188
|
+
If True, the file is assumed to be compressed and gzip will be used
|
189
|
+
to load it. Otherwise, it will assume that the file is not
|
190
|
+
compressed. Default = True.
|
191
|
+
|
192
|
+
Returns
|
193
|
+
-------
|
194
|
+
obj : class object
|
195
|
+
Loaded class object.
|
196
|
+
"""
|
197
|
+
if compressed:
|
198
|
+
try:
|
199
|
+
with gzip.GzipFile(filename, "rb") as file_object:
|
200
|
+
obj = pickle.load(file_object)
|
201
|
+
except UnicodeDecodeError:
|
202
|
+
# Need to try this for python3
|
203
|
+
with gzip.GzipFile(filename, "rb") as file_object:
|
204
|
+
obj = pickle.load(file_object, encoding="latin")
|
205
|
+
else:
|
206
|
+
try:
|
207
|
+
with open(filename, "rb") as file_object:
|
208
|
+
obj = pickle.load(file_object)
|
209
|
+
except UnicodeDecodeError:
|
210
|
+
# Need to try this for python3
|
211
|
+
with open(filename, "rb") as file_object:
|
212
|
+
obj = pickle.load(file_object, encoding="latin")
|
213
|
+
|
214
|
+
if not isinstance(obj, cls):
|
215
|
+
raise IOError(f"Pickled object must be {cls}, not {type(obj)}")
|
216
|
+
|
217
|
+
return obj
|
nimare/cli.py
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
"""Command-line interfaces for common workflows."""
|
2
|
+
|
3
|
+
import argparse
|
4
|
+
import os.path as op
|
5
|
+
|
6
|
+
from nimare.io import convert_neurosynth_to_json, convert_sleuth_to_json
|
7
|
+
from nimare.workflows.macm import macm_workflow
|
8
|
+
|
9
|
+
|
10
|
+
def _is_valid_file(parser, arg):
|
11
|
+
"""Check if argument is existing file."""
|
12
|
+
if not op.isfile(arg) and arg is not None:
|
13
|
+
parser.error(f"The file {arg} does not exist!")
|
14
|
+
|
15
|
+
return arg
|
16
|
+
|
17
|
+
|
18
|
+
def _get_parser():
|
19
|
+
"""Parse command line inputs for NiMARE.
|
20
|
+
|
21
|
+
Returns
|
22
|
+
-------
|
23
|
+
parser.parse_args() : argparse dict
|
24
|
+
"""
|
25
|
+
parser = argparse.ArgumentParser(prog="nimare")
|
26
|
+
subparsers = parser.add_subparsers(help="NiMARE workflows")
|
27
|
+
|
28
|
+
# MACM
|
29
|
+
macm_parser = subparsers.add_parser(
|
30
|
+
"macm",
|
31
|
+
help=(
|
32
|
+
"Run a meta-analytic coactivation modeling (MACM) "
|
33
|
+
"analysis using activation likelihood estimation "
|
34
|
+
"(ALE) on a NiMARE dataset file and a target mask."
|
35
|
+
),
|
36
|
+
)
|
37
|
+
macm_parser.set_defaults(func=macm_workflow)
|
38
|
+
macm_parser.add_argument(
|
39
|
+
"dataset_file", type=lambda x: _is_valid_file(parser, x), help=("Dataset file to analyze.")
|
40
|
+
)
|
41
|
+
macm_parser.add_argument(
|
42
|
+
"--mask",
|
43
|
+
"--mask_file",
|
44
|
+
dest="mask_file",
|
45
|
+
type=lambda x: _is_valid_file(parser, x),
|
46
|
+
help=("Mask file"),
|
47
|
+
required=True,
|
48
|
+
)
|
49
|
+
macm_parser.add_argument(
|
50
|
+
"--output_dir",
|
51
|
+
dest="output_dir",
|
52
|
+
metavar="PATH",
|
53
|
+
type=str,
|
54
|
+
help=("Output directory."),
|
55
|
+
default=".",
|
56
|
+
)
|
57
|
+
macm_parser.add_argument(
|
58
|
+
"--prefix", dest="prefix", type=str, help=("Common prefix for output maps."), default=""
|
59
|
+
)
|
60
|
+
macm_parser.add_argument(
|
61
|
+
"--n_iters",
|
62
|
+
dest="n_iters",
|
63
|
+
type=int,
|
64
|
+
help=("Number of iterations for permutation testing."),
|
65
|
+
default=5000,
|
66
|
+
)
|
67
|
+
macm_parser.add_argument(
|
68
|
+
"--v_thr",
|
69
|
+
dest="v_thr",
|
70
|
+
type=float,
|
71
|
+
help=("Voxel p-value threshold used to create clusters."),
|
72
|
+
default=0.001,
|
73
|
+
)
|
74
|
+
macm_parser.add_argument(
|
75
|
+
"--n_cores",
|
76
|
+
dest="n_cores",
|
77
|
+
type=int,
|
78
|
+
default=1,
|
79
|
+
help=("Number of processes to use for meta-analysis. If -1, use all available cores."),
|
80
|
+
)
|
81
|
+
|
82
|
+
# Conversion workflows
|
83
|
+
sleuth2nimare_parser = subparsers.add_parser(
|
84
|
+
"sleuth2nimare", help=("Convert a Sleuth text file to a NiMARE json file.")
|
85
|
+
)
|
86
|
+
sleuth2nimare_parser.set_defaults(func=convert_sleuth_to_json)
|
87
|
+
sleuth2nimare_parser.add_argument(
|
88
|
+
"text_file",
|
89
|
+
type=lambda x: _is_valid_file(parser, x),
|
90
|
+
help=("Sleuth text file to convert."),
|
91
|
+
)
|
92
|
+
sleuth2nimare_parser.add_argument("out_file", type=str, help=("Output file."))
|
93
|
+
|
94
|
+
neurosynth2nimare_parser = subparsers.add_parser(
|
95
|
+
"neurosynth2nimare", help=("Convert a Neurosynth text file to a NiMARE json file.")
|
96
|
+
)
|
97
|
+
neurosynth2nimare_parser.set_defaults(func=convert_neurosynth_to_json)
|
98
|
+
neurosynth2nimare_parser.add_argument(
|
99
|
+
"text_file",
|
100
|
+
type=lambda x: _is_valid_file(parser, x),
|
101
|
+
help=("Neurosynth text file to convert."),
|
102
|
+
)
|
103
|
+
neurosynth2nimare_parser.add_argument("out_file", type=str, help=("Output file."))
|
104
|
+
neurosynth2nimare_parser.add_argument(
|
105
|
+
"--annotations_file",
|
106
|
+
metavar="FILE",
|
107
|
+
type=lambda x: _is_valid_file(parser, x),
|
108
|
+
help=("Optional annotations (features) file."),
|
109
|
+
default=None,
|
110
|
+
)
|
111
|
+
|
112
|
+
return parser
|
113
|
+
|
114
|
+
|
115
|
+
def _main(argv=None):
|
116
|
+
"""Run NiMARE CLI entrypoint."""
|
117
|
+
options = _get_parser().parse_args(argv)
|
118
|
+
args = vars(options).copy()
|
119
|
+
args.pop("func")
|
120
|
+
options.func(**args)
|
121
|
+
|
122
|
+
|
123
|
+
if __name__ == "__main__":
|
124
|
+
_main()
|