pcntoolkit 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pcntoolkit/__init__.py +4 -0
- pcntoolkit/configs.py +9 -0
- pcntoolkit/dataio/__init__.py +1 -0
- pcntoolkit/dataio/fileio.py +608 -0
- pcntoolkit/model/KnuOp.py +48 -0
- pcntoolkit/model/NP.py +88 -0
- pcntoolkit/model/NPR.py +86 -0
- pcntoolkit/model/SHASH.py +509 -0
- pcntoolkit/model/__init__.py +6 -0
- pcntoolkit/model/architecture.py +219 -0
- pcntoolkit/model/bayesreg.py +585 -0
- pcntoolkit/model/core.21290 +0 -0
- pcntoolkit/model/gp.py +489 -0
- pcntoolkit/model/hbr.py +1584 -0
- pcntoolkit/model/rfa.py +245 -0
- pcntoolkit/normative.py +1647 -0
- pcntoolkit/normative_NP.py +336 -0
- pcntoolkit/normative_model/__init__.py +6 -0
- pcntoolkit/normative_model/norm_base.py +62 -0
- pcntoolkit/normative_model/norm_blr.py +303 -0
- pcntoolkit/normative_model/norm_gpr.py +112 -0
- pcntoolkit/normative_model/norm_hbr.py +752 -0
- pcntoolkit/normative_model/norm_np.py +333 -0
- pcntoolkit/normative_model/norm_rfa.py +109 -0
- pcntoolkit/normative_model/norm_utils.py +29 -0
- pcntoolkit/normative_parallel.py +1420 -0
- pcntoolkit/regression_model/blr/warp.py +1 -0
- pcntoolkit/trendsurf.py +315 -0
- pcntoolkit/util/__init__.py +1 -0
- pcntoolkit/util/bspline.py +149 -0
- pcntoolkit/util/hbr_utils.py +242 -0
- pcntoolkit/util/utils.py +1698 -0
- pcntoolkit-0.32.0.dist-info/LICENSE +674 -0
- pcntoolkit-0.32.0.dist-info/METADATA +134 -0
- pcntoolkit-0.32.0.dist-info/RECORD +37 -0
- pcntoolkit-0.32.0.dist-info/WHEEL +4 -0
- pcntoolkit-0.32.0.dist-info/entry_points.txt +5 -0
pcntoolkit/normative.py
ADDED
|
@@ -0,0 +1,1647 @@
|
|
|
1
|
+
#!/opt/conda/bin/python
|
|
2
|
+
|
|
3
|
+
# ------------------------------------------------------------------------------
|
|
4
|
+
# Usage:
|
|
5
|
+
# python normative.py -m [maskfile] -k [number of CV folds] -c <covariates>
|
|
6
|
+
# -t [test covariates] -r [test responses] <infile>
|
|
7
|
+
#
|
|
8
|
+
# Either the -k switch or -t switch should be specified, but not both.
|
|
9
|
+
# If -t is selected, a set of responses should be provided with the -r switch
|
|
10
|
+
#
|
|
11
|
+
# Written by A. Marquand
|
|
12
|
+
# ------------------------------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
from __future__ import division, print_function
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import glob
|
|
18
|
+
import os
|
|
19
|
+
import pickle
|
|
20
|
+
import sys
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
from sklearn.model_selection import KFold
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
import nutpie
|
|
28
|
+
except ImportError:
|
|
29
|
+
# warnings.warn("Nutpie not installed. For fitting HBR models with the nutpie backend, install it with `conda install nutpie numba`")
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
try: # run as a package if installed
|
|
34
|
+
from pcntoolkit import configs
|
|
35
|
+
from pcntoolkit.dataio import fileio
|
|
36
|
+
from pcntoolkit.normative_model.norm_utils import norm_init
|
|
37
|
+
from pcntoolkit.util.utils import (
|
|
38
|
+
CustomCV,
|
|
39
|
+
compute_MSLL,
|
|
40
|
+
compute_pearsonr,
|
|
41
|
+
explained_var,
|
|
42
|
+
get_package_versions,
|
|
43
|
+
scaler,
|
|
44
|
+
)
|
|
45
|
+
except ImportError:
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
path = os.path.abspath(os.path.dirname(__file__))
|
|
49
|
+
if path not in sys.path:
|
|
50
|
+
sys.path.append(path)
|
|
51
|
+
# sys.path.append(os.path.join(path,'normative_model'))
|
|
52
|
+
del path
|
|
53
|
+
|
|
54
|
+
import configs
|
|
55
|
+
from dataio import fileio
|
|
56
|
+
from normative_model.norm_utils import norm_init
|
|
57
|
+
from util.utils import (
|
|
58
|
+
CustomCV,
|
|
59
|
+
compute_MSLL,
|
|
60
|
+
compute_pearsonr,
|
|
61
|
+
explained_var,
|
|
62
|
+
get_package_versions,
|
|
63
|
+
scaler,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
PICKLE_PROTOCOL = configs.PICKLE_PROTOCOL
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def load_response_vars(datafile, maskfile=None, vol=True):
|
|
70
|
+
"""
|
|
71
|
+
Load response variables from file. This will load the data and mask it if
|
|
72
|
+
necessary. If the data is in ascii format it will be converted into a numpy
|
|
73
|
+
array. If the data is in neuroimaging format it will be reshaped into a
|
|
74
|
+
2D array (subjects x variables) and a mask will be created if necessary.
|
|
75
|
+
|
|
76
|
+
:param datafile: File containing the response variables
|
|
77
|
+
:param maskfile: Mask file (nifti only)
|
|
78
|
+
:param vol: If True, load the data as a 4D volume (nifti only)
|
|
79
|
+
:returns Y: Response variables
|
|
80
|
+
:returns volmask: Mask file (nifti only)
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
if fileio.file_type(datafile) == 'nifti':
|
|
84
|
+
dat = fileio.load_nifti(datafile, vol=vol)
|
|
85
|
+
volmask = fileio.create_mask(dat, mask=maskfile)
|
|
86
|
+
Y = fileio.vol2vec(dat, volmask).T
|
|
87
|
+
else:
|
|
88
|
+
Y = fileio.load(datafile)
|
|
89
|
+
volmask = None
|
|
90
|
+
if fileio.file_type(datafile) == 'cifti':
|
|
91
|
+
Y = Y.T
|
|
92
|
+
|
|
93
|
+
return Y, volmask
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def get_args(*args):
|
|
97
|
+
"""
|
|
98
|
+
Parse command line arguments for normative modeling
|
|
99
|
+
|
|
100
|
+
:param args: command line arguments
|
|
101
|
+
:returns respfile: response variables for the normative model
|
|
102
|
+
:returns maskfile: mask used to apply to the data (nifti only)
|
|
103
|
+
:returns covfile: covariates used to predict the response variable
|
|
104
|
+
:returns cvfolds: Number of cross-validation folds
|
|
105
|
+
:returns testcov: Test covariates
|
|
106
|
+
:returns testresp: Test responses
|
|
107
|
+
:returns func: Function to call
|
|
108
|
+
:returns alg: Algorithm for normative model
|
|
109
|
+
:returns configparam: Parameters controlling the estimation algorithm
|
|
110
|
+
:returns kw_args: Additional keyword arguments
|
|
111
|
+
"""
|
|
112
|
+
args = args[0][0]
|
|
113
|
+
# parse arguments
|
|
114
|
+
parser = argparse.ArgumentParser(description="Normative Modeling")
|
|
115
|
+
parser.add_argument("respfile", help="Response variables for the normative model")
|
|
116
|
+
parser.add_argument("-f", help="Function to call", dest="func", default="estimate")
|
|
117
|
+
parser.add_argument("-m", help="mask file", dest="maskfile", default=None)
|
|
118
|
+
parser.add_argument("-c", help="covariates file", dest="covfile", default=None)
|
|
119
|
+
parser.add_argument("-k", help="cross-validation folds", dest="cvfolds", default=None)
|
|
120
|
+
parser.add_argument("-t", help="covariates (test data)", dest="testcov", default=None)
|
|
121
|
+
parser.add_argument("-r", help="responses (test data)", dest="testresp", default=None)
|
|
122
|
+
parser.add_argument("-a", help="algorithm", dest="alg", default="gpr")
|
|
123
|
+
parser.add_argument("-x", help="algorithm specific config options", dest="configparam", default=None)
|
|
124
|
+
parsed_args, keyword_args = parser.parse_known_args(args)
|
|
125
|
+
|
|
126
|
+
# Process required arguments
|
|
127
|
+
wdir = os.path.realpath(os.path.curdir)
|
|
128
|
+
respfile = os.path.join(wdir, parsed_args.respfile)
|
|
129
|
+
if parsed_args.covfile is None:
|
|
130
|
+
raise ValueError("No covariates specified")
|
|
131
|
+
else:
|
|
132
|
+
covfile = parsed_args.covfile
|
|
133
|
+
|
|
134
|
+
# Process optional arguments
|
|
135
|
+
if parsed_args.maskfile is None:
|
|
136
|
+
maskfile = None
|
|
137
|
+
else:
|
|
138
|
+
maskfile = os.path.join(wdir, parsed_args.maskfile)
|
|
139
|
+
if parsed_args.testcov is None and parsed_args.cvfolds is not None:
|
|
140
|
+
testcov = None
|
|
141
|
+
testresp = None
|
|
142
|
+
cvfolds = int(parsed_args.cvfolds)
|
|
143
|
+
print("Running under " + str(cvfolds) + " fold cross-validation.")
|
|
144
|
+
else:
|
|
145
|
+
print("Test covariates specified")
|
|
146
|
+
testcov = parsed_args.testcov
|
|
147
|
+
cvfolds = None
|
|
148
|
+
if parsed_args.testresp is None:
|
|
149
|
+
testresp = None
|
|
150
|
+
print("No test response variables specified")
|
|
151
|
+
else:
|
|
152
|
+
testresp = parsed_args.testresp
|
|
153
|
+
if parsed_args.cvfolds is not None:
|
|
154
|
+
print("Ignoring cross-valdation specification (test data given)")
|
|
155
|
+
|
|
156
|
+
# Process addtional keyword arguments. These are always added as strings
|
|
157
|
+
kw_args = {}
|
|
158
|
+
for kw in keyword_args:
|
|
159
|
+
kw_arg = kw.split('=')
|
|
160
|
+
|
|
161
|
+
exec("kw_args.update({'" + kw_arg[0] + "' : " +
|
|
162
|
+
"'" + str(kw_arg[1]) + "'" + "})")
|
|
163
|
+
|
|
164
|
+
return respfile, maskfile, covfile, cvfolds, \
|
|
165
|
+
testcov, testresp, parsed_args.func, parsed_args.alg, \
|
|
166
|
+
parsed_args.configparam, kw_args
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def evaluate(Y, Yhat, S2=None, mY=None, sY=None, nlZ=None, nm=None, Xz_tr=None, alg=None,
|
|
170
|
+
metrics=['Rho', 'RMSE', 'SMSE', 'EXPV', 'MSLL']):
|
|
171
|
+
''' Compute error metrics
|
|
172
|
+
This function will compute error metrics based on a set of predictions Yhat
|
|
173
|
+
and a set of true response variables Y, namely:
|
|
174
|
+
|
|
175
|
+
* Rho: Pearson correlation
|
|
176
|
+
* RMSE: root mean squared error
|
|
177
|
+
* SMSE: standardized mean squared error
|
|
178
|
+
* EXPV: explained variance
|
|
179
|
+
|
|
180
|
+
If the predictive variance is also specified the log loss will be computed
|
|
181
|
+
(which also takes into account the predictive variance). If the mean and
|
|
182
|
+
standard deviation are also specified these will be used to standardize
|
|
183
|
+
this, yielding the mean standardized log loss
|
|
184
|
+
|
|
185
|
+
:param Y: N x P array of true response variables
|
|
186
|
+
:param Yhat: N x P array of predicted response variables
|
|
187
|
+
:param S2: predictive variance
|
|
188
|
+
:param mY: mean of the training set
|
|
189
|
+
:param sY: standard deviation of the training set
|
|
190
|
+
|
|
191
|
+
:returns metrics: evaluation metrics
|
|
192
|
+
|
|
193
|
+
'''
|
|
194
|
+
|
|
195
|
+
feature_num = Y.shape[1]
|
|
196
|
+
|
|
197
|
+
# Remove metrics that cannot be computed with only a single data point
|
|
198
|
+
if Y.shape[0] == 1:
|
|
199
|
+
if 'MSLL' in metrics:
|
|
200
|
+
metrics.remove('MSLL')
|
|
201
|
+
if 'SMSE' in metrics:
|
|
202
|
+
metrics.remove('SMSE')
|
|
203
|
+
|
|
204
|
+
# find and remove bad variables from the response variables
|
|
205
|
+
nz = np.where(np.bitwise_and(np.isfinite(Y).any(axis=0),
|
|
206
|
+
np.var(Y, axis=0) != 0))[0]
|
|
207
|
+
|
|
208
|
+
MSE = np.mean((Y - Yhat)**2, axis=0)
|
|
209
|
+
|
|
210
|
+
results = dict()
|
|
211
|
+
|
|
212
|
+
if 'RMSE' in metrics:
|
|
213
|
+
RMSE = np.sqrt(MSE)
|
|
214
|
+
results['RMSE'] = RMSE
|
|
215
|
+
|
|
216
|
+
if 'Rho' in metrics:
|
|
217
|
+
Rho = np.zeros(feature_num)
|
|
218
|
+
pRho = np.ones(feature_num)
|
|
219
|
+
Rho[nz], pRho[nz] = compute_pearsonr(Y[:, nz], Yhat[:, nz])
|
|
220
|
+
results['Rho'] = Rho
|
|
221
|
+
results['pRho'] = pRho
|
|
222
|
+
|
|
223
|
+
if 'SMSE' in metrics:
|
|
224
|
+
SMSE = np.zeros_like(MSE)
|
|
225
|
+
SMSE[nz] = MSE[nz] / np.var(Y[:, nz], axis=0)
|
|
226
|
+
results['SMSE'] = SMSE
|
|
227
|
+
|
|
228
|
+
if 'EXPV' in metrics:
|
|
229
|
+
EXPV = np.zeros(feature_num)
|
|
230
|
+
EXPV[nz] = explained_var(Y[:, nz], Yhat[:, nz])
|
|
231
|
+
results['EXPV'] = EXPV
|
|
232
|
+
|
|
233
|
+
if 'MSLL' in metrics:
|
|
234
|
+
if ((S2 is not None) and (mY is not None) and (sY is not None)):
|
|
235
|
+
MSLL = np.zeros(feature_num)
|
|
236
|
+
MSLL[nz] = compute_MSLL(Y[:, nz], Yhat[:, nz], S2[:, nz],
|
|
237
|
+
mY.reshape(-1, 1).T,
|
|
238
|
+
(sY**2).reshape(-1, 1).T)
|
|
239
|
+
results['MSLL'] = MSLL
|
|
240
|
+
|
|
241
|
+
if 'NLL' in metrics:
|
|
242
|
+
results['NLL'] = nlZ
|
|
243
|
+
|
|
244
|
+
if 'BIC' in metrics:
|
|
245
|
+
if hasattr(getattr(nm, alg), 'hyp'):
|
|
246
|
+
n = Xz_tr.shape[0]
|
|
247
|
+
k = len(getattr(nm, alg).hyp)
|
|
248
|
+
BIC = k * np.log(n) + 2 * nlZ
|
|
249
|
+
results['BIC'] = BIC
|
|
250
|
+
|
|
251
|
+
return results
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def save_results(respfile, Yhat, S2, maskvol, Z=None, Y=None, outputsuffix=None,
|
|
255
|
+
results=None, save_path=''):
|
|
256
|
+
"""
|
|
257
|
+
Writes the results of the normative model to disk.
|
|
258
|
+
|
|
259
|
+
Parameters:
|
|
260
|
+
respfile (str): The response variables file.
|
|
261
|
+
Yhat (np.array): The predicted response variables.
|
|
262
|
+
S2 (np.array): The predictive variance.
|
|
263
|
+
maskvol (np.array): The mask volume.
|
|
264
|
+
Z (np.array, optional): The latent variable. Defaults to None.
|
|
265
|
+
Y (np.array, optional): The observed response variables. Defaults to None.
|
|
266
|
+
outputsuffix (str, optional): The suffix to append to the output files. Defaults to None.
|
|
267
|
+
results (dict, optional): The results of the normative model. Defaults to None.
|
|
268
|
+
save_path (str, optional): The directory to save the results to. Defaults to ''.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
None
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
print("Writing outputs ...")
|
|
275
|
+
if respfile is None:
|
|
276
|
+
exfile = None
|
|
277
|
+
file_ext = '.pkl'
|
|
278
|
+
else:
|
|
279
|
+
if fileio.file_type(respfile) == 'cifti' or \
|
|
280
|
+
fileio.file_type(respfile) == 'nifti':
|
|
281
|
+
exfile = respfile
|
|
282
|
+
else:
|
|
283
|
+
exfile = None
|
|
284
|
+
file_ext = fileio.file_extension(respfile)
|
|
285
|
+
|
|
286
|
+
if outputsuffix is not None:
|
|
287
|
+
ext = str(outputsuffix) + file_ext
|
|
288
|
+
else:
|
|
289
|
+
ext = file_ext
|
|
290
|
+
|
|
291
|
+
fileio.save(Yhat, os.path.join(save_path, 'yhat' + ext), example=exfile,
|
|
292
|
+
mask=maskvol)
|
|
293
|
+
fileio.save(S2, os.path.join(save_path, 'ys2' + ext), example=exfile,
|
|
294
|
+
mask=maskvol)
|
|
295
|
+
if Z is not None:
|
|
296
|
+
fileio.save(Z, os.path.join(save_path, 'Z' + ext), example=exfile,
|
|
297
|
+
mask=maskvol)
|
|
298
|
+
if Y is not None:
|
|
299
|
+
fileio.save(Y, os.path.join(save_path, 'Y' + ext), example=exfile,
|
|
300
|
+
mask=maskvol)
|
|
301
|
+
if results is not None:
|
|
302
|
+
for metric in list(results.keys()):
|
|
303
|
+
if (metric == 'NLL' or metric == 'BIC') and file_ext == '.nii.gz':
|
|
304
|
+
fileio.save(results[metric], os.path.join(save_path, metric + str(outputsuffix) + '.pkl'),
|
|
305
|
+
example=exfile, mask=maskvol)
|
|
306
|
+
else:
|
|
307
|
+
fileio.save(results[metric], os.path.join(save_path, metric + ext),
|
|
308
|
+
example=exfile, mask=maskvol)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def estimate(covfile, respfile, **kwargs):
|
|
312
|
+
""" Estimate a normative model
|
|
313
|
+
|
|
314
|
+
This will estimate a model in one of two settings according to
|
|
315
|
+
theparticular parameters specified (see below)
|
|
316
|
+
|
|
317
|
+
* under k-fold cross-validation.
|
|
318
|
+
requires respfile, covfile and cvfolds>=2
|
|
319
|
+
* estimating a training dataset then applying to a second test dataset.
|
|
320
|
+
requires respfile, covfile, testcov and testresp.
|
|
321
|
+
* estimating on a training dataset ouput of forward maps mean and se.
|
|
322
|
+
requires respfile, covfile and testcov
|
|
323
|
+
|
|
324
|
+
The models are estimated on the basis of data stored on disk in ascii or
|
|
325
|
+
neuroimaging data formats (nifti or cifti). Ascii data should be in
|
|
326
|
+
tab or space delimited format with the number of subjects in rows and the
|
|
327
|
+
number of variables in columns. Neuroimaging data will be reshaped
|
|
328
|
+
into the appropriate format
|
|
329
|
+
|
|
330
|
+
Basic usage::
|
|
331
|
+
|
|
332
|
+
estimate(covfile, respfile, [extra_arguments])
|
|
333
|
+
|
|
334
|
+
where the variables are defined below. Note that either the cfolds
|
|
335
|
+
parameter or (testcov, testresp) should be specified, but not both.
|
|
336
|
+
|
|
337
|
+
:param respfile: response variables for the normative model
|
|
338
|
+
:param covfile: covariates used to predict the response variable
|
|
339
|
+
:param maskfile: mask used to apply to the data (nifti only)
|
|
340
|
+
:param cvfolds: Number of cross-validation folds
|
|
341
|
+
:param testcov: Test covariates
|
|
342
|
+
:param testresp: Test responses
|
|
343
|
+
:param alg: Algorithm for normative model
|
|
344
|
+
:param configparam: Parameters controlling the estimation algorithm
|
|
345
|
+
:param saveoutput: Save the output to disk? Otherwise returned as arrays
|
|
346
|
+
:param outputsuffix: Text string to add to the output filenames
|
|
347
|
+
:param inscaler: Scaling approach for input covariates, could be 'None' (Default),
|
|
348
|
+
'standardize', 'minmax', or 'robminmax'.
|
|
349
|
+
:param outscaler: Scaling approach for output responses, could be 'None' (Default),
|
|
350
|
+
'standardize', 'minmax', or 'robminmax'.
|
|
351
|
+
|
|
352
|
+
All outputs are written to disk in the same format as the input. These are:
|
|
353
|
+
|
|
354
|
+
:outputs: * yhat - predictive mean
|
|
355
|
+
* ys2 - predictive variance
|
|
356
|
+
* nm - normative model
|
|
357
|
+
* Z - deviance scores
|
|
358
|
+
* Rho - Pearson correlation between true and predicted responses
|
|
359
|
+
* pRho - parametric p-value for this correlation
|
|
360
|
+
* rmse - root mean squared error between true/predicted responses
|
|
361
|
+
* smse - standardised mean squared error
|
|
362
|
+
|
|
363
|
+
The outputsuffix may be useful to estimate multiple normative models in the
|
|
364
|
+
same directory (e.g. for custom cross-validation schemes)
|
|
365
|
+
"""
|
|
366
|
+
|
|
367
|
+
# parse keyword arguments
|
|
368
|
+
maskfile = kwargs.pop('maskfile', None)
|
|
369
|
+
cvfolds = kwargs.pop('cvfolds', None)
|
|
370
|
+
testcov = kwargs.pop('testcov', None)
|
|
371
|
+
testresp = kwargs.pop('testresp', None)
|
|
372
|
+
alg = kwargs.pop('alg', 'gpr')
|
|
373
|
+
outputsuffix = kwargs.pop('outputsuffix', 'estimate')
|
|
374
|
+
# Making sure there is only one
|
|
375
|
+
outputsuffix = "_" + outputsuffix.replace("_", "")
|
|
376
|
+
# '_' is in the outputsuffix to
|
|
377
|
+
# avoid file name parsing problem.
|
|
378
|
+
inscaler = kwargs.pop('inscaler', 'None')
|
|
379
|
+
print(f"inscaler: {inscaler}")
|
|
380
|
+
outscaler = kwargs.pop('outscaler', 'None')
|
|
381
|
+
print(f"outscaler: {outscaler}")
|
|
382
|
+
warp = kwargs.get('warp', None)
|
|
383
|
+
|
|
384
|
+
# convert from strings if necessary
|
|
385
|
+
saveoutput = kwargs.pop('saveoutput', 'True')
|
|
386
|
+
if type(saveoutput) is str:
|
|
387
|
+
saveoutput = saveoutput == 'True'
|
|
388
|
+
savemodel = kwargs.pop('savemodel', 'False')
|
|
389
|
+
if type(savemodel) is str:
|
|
390
|
+
savemodel = savemodel == 'True'
|
|
391
|
+
|
|
392
|
+
if savemodel and not os.path.isdir('Models'):
|
|
393
|
+
os.mkdir('Models')
|
|
394
|
+
|
|
395
|
+
# which output metrics to compute
|
|
396
|
+
metrics = ['Rho', 'RMSE', 'SMSE', 'EXPV', 'MSLL', 'NLL', 'BIC']
|
|
397
|
+
|
|
398
|
+
# load data
|
|
399
|
+
print("Processing data in " + respfile)
|
|
400
|
+
X = fileio.load(covfile)
|
|
401
|
+
Y, maskvol = load_response_vars(respfile, maskfile)
|
|
402
|
+
if len(Y.shape) == 1:
|
|
403
|
+
Y = Y[:, np.newaxis]
|
|
404
|
+
if len(X.shape) == 1:
|
|
405
|
+
X = X[:, np.newaxis]
|
|
406
|
+
Nmod = Y.shape[1]
|
|
407
|
+
|
|
408
|
+
if (testcov is not None) and (cvfolds is None): # a separate test dataset
|
|
409
|
+
|
|
410
|
+
run_cv = False
|
|
411
|
+
cvfolds = 1
|
|
412
|
+
Xte = fileio.load(testcov)
|
|
413
|
+
if len(Xte.shape) == 1:
|
|
414
|
+
Xte = Xte[:, np.newaxis]
|
|
415
|
+
if testresp is not None:
|
|
416
|
+
Yte, testmask = load_response_vars(testresp, maskfile)
|
|
417
|
+
if len(Yte.shape) == 1:
|
|
418
|
+
Yte = Yte[:, np.newaxis]
|
|
419
|
+
else:
|
|
420
|
+
sub_te = Xte.shape[0]
|
|
421
|
+
Yte = np.zeros([sub_te, Nmod])
|
|
422
|
+
|
|
423
|
+
# treat as a single train-test split
|
|
424
|
+
testids = range(X.shape[0], X.shape[0]+Xte.shape[0])
|
|
425
|
+
splits = CustomCV((range(0, X.shape[0]),), (testids,))
|
|
426
|
+
|
|
427
|
+
Y = np.concatenate((Y, Yte), axis=0)
|
|
428
|
+
X = np.concatenate((X, Xte), axis=0)
|
|
429
|
+
|
|
430
|
+
else:
|
|
431
|
+
run_cv = True
|
|
432
|
+
# we are running under cross-validation
|
|
433
|
+
splits = KFold(n_splits=cvfolds, shuffle=True)
|
|
434
|
+
testids = range(0, X.shape[0])
|
|
435
|
+
if alg == 'hbr':
|
|
436
|
+
trbefile = kwargs.get('trbefile', None)
|
|
437
|
+
if trbefile is not None:
|
|
438
|
+
be = fileio.load(trbefile)
|
|
439
|
+
if len(be.shape) == 1:
|
|
440
|
+
be = be[:, np.newaxis]
|
|
441
|
+
else:
|
|
442
|
+
print('No batch-effects file! Initilizing all as zeros!')
|
|
443
|
+
be = np.zeros([X.shape[0], 1])
|
|
444
|
+
|
|
445
|
+
# find and remove bad variables from the response variables
|
|
446
|
+
# note: the covariates are assumed to have already been checked
|
|
447
|
+
nz = np.where(np.bitwise_and(np.isfinite(Y).any(axis=0),
|
|
448
|
+
np.var(Y, axis=0) != 0))[0]
|
|
449
|
+
|
|
450
|
+
# run cross-validation loop
|
|
451
|
+
Yhat = np.zeros_like(Y)
|
|
452
|
+
S2 = np.zeros_like(Y)
|
|
453
|
+
Z = np.zeros_like(Y)
|
|
454
|
+
nlZ = np.zeros((Nmod, cvfolds))
|
|
455
|
+
|
|
456
|
+
scaler_resp = []
|
|
457
|
+
scaler_cov = []
|
|
458
|
+
mean_resp = [] # this is just for computing MSLL
|
|
459
|
+
std_resp = [] # this is just for computing MSLL
|
|
460
|
+
|
|
461
|
+
if warp is not None:
|
|
462
|
+
Ywarp = np.zeros_like(Yhat)
|
|
463
|
+
|
|
464
|
+
# for warping we need to compute metrics separately for each fold
|
|
465
|
+
results_folds = dict()
|
|
466
|
+
for m in metrics:
|
|
467
|
+
results_folds[m] = np.zeros((Nmod, cvfolds))
|
|
468
|
+
|
|
469
|
+
for idx in enumerate(splits.split(X)):
|
|
470
|
+
|
|
471
|
+
fold = idx[0]
|
|
472
|
+
tr = idx[1][0]
|
|
473
|
+
ts = idx[1][1]
|
|
474
|
+
|
|
475
|
+
# standardize responses and covariates, ignoring invalid entries
|
|
476
|
+
iy_tr, jy_tr = np.ix_(tr, nz)
|
|
477
|
+
iy_ts, jy_ts = np.ix_(ts, nz)
|
|
478
|
+
mY = np.mean(Y[iy_tr, jy_tr], axis=0)
|
|
479
|
+
sY = np.std(Y[iy_tr, jy_tr], axis=0)
|
|
480
|
+
mean_resp.append(mY)
|
|
481
|
+
std_resp.append(sY)
|
|
482
|
+
|
|
483
|
+
if inscaler in ['standardize', 'minmax', 'robminmax']:
|
|
484
|
+
X_scaler = scaler(inscaler)
|
|
485
|
+
Xz_tr = X_scaler.fit_transform(X[tr, :])
|
|
486
|
+
Xz_ts = X_scaler.transform(X[ts, :])
|
|
487
|
+
scaler_cov.append(X_scaler)
|
|
488
|
+
else:
|
|
489
|
+
Xz_tr = X[tr, :]
|
|
490
|
+
Xz_ts = X[ts, :]
|
|
491
|
+
|
|
492
|
+
if outscaler in ['standardize', 'minmax', 'robminmax']:
|
|
493
|
+
Y_scaler = scaler(outscaler)
|
|
494
|
+
Yz_tr = Y_scaler.fit_transform(Y[iy_tr, jy_tr])
|
|
495
|
+
scaler_resp.append(Y_scaler)
|
|
496
|
+
else:
|
|
497
|
+
Yz_tr = Y[iy_tr, jy_tr]
|
|
498
|
+
|
|
499
|
+
if (run_cv == True and alg == 'hbr'):
|
|
500
|
+
fileio.save(be[tr, :], 'be_kfold_tr_tempfile.pkl')
|
|
501
|
+
fileio.save(be[ts, :], 'be_kfold_ts_tempfile.pkl')
|
|
502
|
+
kwargs['trbefile'] = 'be_kfold_tr_tempfile.pkl'
|
|
503
|
+
kwargs['tsbefile'] = 'be_kfold_ts_tempfile.pkl'
|
|
504
|
+
|
|
505
|
+
# estimate the models for all response variables
|
|
506
|
+
for i in range(0, len(nz)):
|
|
507
|
+
print("Estimating model ", i+1, "of", len(nz))
|
|
508
|
+
nm = norm_init(Xz_tr, Yz_tr[:, i], alg=alg, **kwargs)
|
|
509
|
+
|
|
510
|
+
try:
|
|
511
|
+
nm = nm.estimate(Xz_tr, Yz_tr[:, i], **kwargs)
|
|
512
|
+
yhat, s2 = nm.predict(Xz_ts, Xz_tr, Yz_tr[:, i], **kwargs)
|
|
513
|
+
|
|
514
|
+
if savemodel:
|
|
515
|
+
nm.save('Models/NM_' + str(fold) + '_' + str(nz[i]) +
|
|
516
|
+
outputsuffix + '.pkl')
|
|
517
|
+
|
|
518
|
+
if outscaler == 'standardize':
|
|
519
|
+
Yhat[ts, nz[i]] = Y_scaler.inverse_transform(yhat, index=i)
|
|
520
|
+
S2[ts, nz[i]] = s2 * sY[i]**2
|
|
521
|
+
elif outscaler in ['minmax', 'robminmax']:
|
|
522
|
+
Yhat[ts, nz[i]] = Y_scaler.inverse_transform(yhat, index=i)
|
|
523
|
+
S2[ts, nz[i]] = s2 * (Y_scaler.max[i] - Y_scaler.min[i])**2
|
|
524
|
+
else:
|
|
525
|
+
Yhat[ts, nz[i]] = yhat
|
|
526
|
+
S2[ts, nz[i]] = s2
|
|
527
|
+
|
|
528
|
+
nlZ[nz[i], fold] = nm.neg_log_lik
|
|
529
|
+
|
|
530
|
+
if (run_cv or testresp is not None):
|
|
531
|
+
if warp is not None:
|
|
532
|
+
# TODO: Warping for scaled data
|
|
533
|
+
if outscaler is not None and outscaler != 'None':
|
|
534
|
+
raise ValueError(
|
|
535
|
+
"outscaler not yet supported warping")
|
|
536
|
+
warp_param = nm.blr.hyp[1:nm.blr.warp.get_n_params()+1]
|
|
537
|
+
Ywarp[ts, nz[i]] = nm.blr.warp.f(
|
|
538
|
+
Y[ts, nz[i]], warp_param)
|
|
539
|
+
Ytest = Ywarp[ts, nz[i]]
|
|
540
|
+
|
|
541
|
+
# Save warped mean of the training data (for MSLL)
|
|
542
|
+
yw = nm.blr.warp.f(Y[tr, nz[i]], warp_param)
|
|
543
|
+
|
|
544
|
+
# create arrays for evaluation
|
|
545
|
+
Yhati = Yhat[ts, nz[i]]
|
|
546
|
+
Yhati = Yhati[:, np.newaxis]
|
|
547
|
+
S2i = S2[ts, nz[i]]
|
|
548
|
+
S2i = S2i[:, np.newaxis]
|
|
549
|
+
|
|
550
|
+
# evaluate and save results
|
|
551
|
+
mf = evaluate(Ytest[:, np.newaxis], Yhati, S2=S2i,
|
|
552
|
+
mY=np.mean(yw), sY=np.std(yw),
|
|
553
|
+
nlZ=nm.neg_log_lik, nm=nm, Xz_tr=Xz_tr,
|
|
554
|
+
alg=alg, metrics=metrics)
|
|
555
|
+
for k in metrics:
|
|
556
|
+
results_folds[k][nz[i]][fold] = mf[k]
|
|
557
|
+
else:
|
|
558
|
+
Ytest = Y[ts, nz[i]]
|
|
559
|
+
|
|
560
|
+
if alg == 'hbr':
|
|
561
|
+
if outscaler in ['standardize', 'minmax', 'robminmax']:
|
|
562
|
+
Ytestz = Y_scaler.transform(
|
|
563
|
+
Ytest.reshape(-1, 1), index=i)
|
|
564
|
+
else:
|
|
565
|
+
Ytestz = Ytest.reshape(-1, 1)
|
|
566
|
+
Z[ts, nz[i]] = nm.get_mcmc_zscores(
|
|
567
|
+
Xz_ts, Ytestz, **kwargs)
|
|
568
|
+
else:
|
|
569
|
+
Z[ts, nz[i]] = (Ytest - Yhat[ts, nz[i]]) / \
|
|
570
|
+
np.sqrt(S2[ts, nz[i]])
|
|
571
|
+
|
|
572
|
+
except Exception as e:
|
|
573
|
+
exc_type, exc_obj, exc_tb = sys.exc_info()
|
|
574
|
+
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
|
575
|
+
print("Model ", i+1, "of", len(nz),
|
|
576
|
+
"FAILED!..skipping and writing NaN to outputs")
|
|
577
|
+
print("Exception:")
|
|
578
|
+
print(e)
|
|
579
|
+
print(exc_type, fname, exc_tb.tb_lineno)
|
|
580
|
+
|
|
581
|
+
Yhat[ts, nz[i]] = float('nan')
|
|
582
|
+
S2[ts, nz[i]] = float('nan')
|
|
583
|
+
nlZ[nz[i], fold] = float('nan')
|
|
584
|
+
if testcov is None:
|
|
585
|
+
Z[ts, nz[i]] = float('nan')
|
|
586
|
+
else:
|
|
587
|
+
if testresp is not None:
|
|
588
|
+
Z[ts, nz[i]] = float('nan')
|
|
589
|
+
|
|
590
|
+
if savemodel:
|
|
591
|
+
print('Saving model meta-data...')
|
|
592
|
+
v = get_package_versions()
|
|
593
|
+
with open('Models/meta_data.md', 'wb') as file:
|
|
594
|
+
pickle.dump({'valid_voxels': nz, 'fold_num': cvfolds,
|
|
595
|
+
'mean_resp': mean_resp, 'std_resp': std_resp,
|
|
596
|
+
'scaler_cov': scaler_cov, 'scaler_resp': scaler_resp,
|
|
597
|
+
'regressor': alg, 'inscaler': inscaler,
|
|
598
|
+
'outscaler': outscaler, 'versions': v},
|
|
599
|
+
file, protocol=PICKLE_PROTOCOL)
|
|
600
|
+
|
|
601
|
+
# compute performance metrics
|
|
602
|
+
if (run_cv or testresp is not None):
|
|
603
|
+
print("Evaluating the model ...")
|
|
604
|
+
if warp is None:
|
|
605
|
+
results = evaluate(Y[testids, :], Yhat[testids, :],
|
|
606
|
+
S2=S2[testids, :], mY=mean_resp[0],
|
|
607
|
+
sY=std_resp[0], nlZ=nlZ, nm=nm, Xz_tr=Xz_tr, alg=alg,
|
|
608
|
+
metrics=metrics)
|
|
609
|
+
else:
|
|
610
|
+
# for warped data we just aggregate across folds
|
|
611
|
+
results = dict()
|
|
612
|
+
for m in ['Rho', 'RMSE', 'SMSE', 'EXPV', 'MSLL']:
|
|
613
|
+
results[m] = np.mean(results_folds[m], axis=1)
|
|
614
|
+
results['NLL'] = results_folds['NLL']
|
|
615
|
+
results['BIC'] = results_folds['BIC']
|
|
616
|
+
|
|
617
|
+
# Set writing options
|
|
618
|
+
if saveoutput:
|
|
619
|
+
if (run_cv or testresp is not None):
|
|
620
|
+
save_results(respfile, Yhat[testids, :], S2[testids, :], maskvol,
|
|
621
|
+
Z=Z[testids, :], results=results,
|
|
622
|
+
outputsuffix=outputsuffix)
|
|
623
|
+
|
|
624
|
+
else:
|
|
625
|
+
save_results(respfile, Yhat[testids, :], S2[testids, :], maskvol,
|
|
626
|
+
outputsuffix=outputsuffix)
|
|
627
|
+
|
|
628
|
+
else:
|
|
629
|
+
if (run_cv or testresp is not None):
|
|
630
|
+
output = (Yhat[testids, :], S2[testids, :], nm, Z[testids, :],
|
|
631
|
+
results)
|
|
632
|
+
else:
|
|
633
|
+
output = (Yhat[testids, :], S2[testids, :], nm)
|
|
634
|
+
|
|
635
|
+
return output
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def fit(covfile, respfile, **kwargs):
|
|
639
|
+
"""
|
|
640
|
+
Fits a normative model to the data.
|
|
641
|
+
|
|
642
|
+
Parameters:
|
|
643
|
+
covfile (str): The path to the covariates file.
|
|
644
|
+
respfile (str): The path to the response variables file.
|
|
645
|
+
maskfile (str, optional): The path to the mask file. Defaults to None.
|
|
646
|
+
alg (str, optional): The algorithm to use. Defaults to 'gpr'.
|
|
647
|
+
savemodel (bool, optional): Whether to save the model. Defaults to True.
|
|
648
|
+
outputsuffix (str, optional): The suffix to append to the output files. Defaults to 'fit'.
|
|
649
|
+
inscaler (str, optional): The scaler to use for the input data. Defaults to 'None'.
|
|
650
|
+
outscaler (str, optional): The scaler to use for the output data. Defaults to 'None'.
|
|
651
|
+
|
|
652
|
+
Returns:
|
|
653
|
+
None
|
|
654
|
+
"""
|
|
655
|
+
|
|
656
|
+
# parse keyword arguments
|
|
657
|
+
maskfile = kwargs.pop('maskfile', None)
|
|
658
|
+
alg = kwargs.pop('alg', 'gpr')
|
|
659
|
+
savemodel = kwargs.pop('savemodel', 'True') == 'True'
|
|
660
|
+
outputsuffix = kwargs.pop('outputsuffix', 'fit')
|
|
661
|
+
outputsuffix = "_" + outputsuffix.replace("_", "")
|
|
662
|
+
inscaler = kwargs.pop('inscaler', 'None')
|
|
663
|
+
outscaler = kwargs.pop('outscaler', 'None')
|
|
664
|
+
print(f"inscaler: {inscaler}")
|
|
665
|
+
print(f"outscaler: {outscaler}")
|
|
666
|
+
|
|
667
|
+
if savemodel and not os.path.isdir('Models'):
|
|
668
|
+
os.mkdir('Models')
|
|
669
|
+
|
|
670
|
+
# load data
|
|
671
|
+
print("Processing data in " + respfile)
|
|
672
|
+
X = fileio.load(covfile)
|
|
673
|
+
Y, maskvol = load_response_vars(respfile, maskfile)
|
|
674
|
+
if len(Y.shape) == 1:
|
|
675
|
+
Y = Y[:, np.newaxis]
|
|
676
|
+
if len(X.shape) == 1:
|
|
677
|
+
X = X[:, np.newaxis]
|
|
678
|
+
|
|
679
|
+
scaler_resp = []
|
|
680
|
+
scaler_cov = []
|
|
681
|
+
mean_resp = [] # this is just for computing MSLL
|
|
682
|
+
std_resp = [] # this is just for computing MSLL
|
|
683
|
+
|
|
684
|
+
# standardize responses and covariates, ignoring invalid entries
|
|
685
|
+
mY = np.mean(Y, axis=0)
|
|
686
|
+
sY = np.std(Y, axis=0)
|
|
687
|
+
mean_resp.append(mY)
|
|
688
|
+
std_resp.append(sY)
|
|
689
|
+
|
|
690
|
+
if inscaler in ['standardize', 'minmax', 'robminmax']:
|
|
691
|
+
X_scaler = scaler(inscaler)
|
|
692
|
+
Xz = X_scaler.fit_transform(X)
|
|
693
|
+
scaler_cov.append(X_scaler)
|
|
694
|
+
else:
|
|
695
|
+
Xz = X
|
|
696
|
+
|
|
697
|
+
if outscaler in ['standardize', 'minmax', 'robminmax']:
|
|
698
|
+
Yz = np.zeros_like(Y)
|
|
699
|
+
Y_scaler = scaler(outscaler)
|
|
700
|
+
Yz= Y_scaler.fit_transform(Y)
|
|
701
|
+
scaler_resp.append(Y_scaler)
|
|
702
|
+
else:
|
|
703
|
+
Yz = Y
|
|
704
|
+
|
|
705
|
+
# estimate the models for all subjects
|
|
706
|
+
for i in range(Y.shape[1]):
|
|
707
|
+
print("Estimating model ", i+1, "of", Y.shape[1])
|
|
708
|
+
nm = norm_init(Xz, Yz[:, i], alg=alg, **kwargs)
|
|
709
|
+
nm = nm.estimate(Xz, Yz[:, i], **kwargs)
|
|
710
|
+
|
|
711
|
+
if savemodel:
|
|
712
|
+
nm.save('Models/NM_' + str(0) + '_' + str(i) + outputsuffix +
|
|
713
|
+
'.pkl')
|
|
714
|
+
|
|
715
|
+
if savemodel:
|
|
716
|
+
print('Saving model meta-data...')
|
|
717
|
+
v = get_package_versions()
|
|
718
|
+
with open('Models/meta_data.md', 'wb') as file:
|
|
719
|
+
pickle.dump({'mean_resp': mean_resp, 'std_resp': std_resp,
|
|
720
|
+
'scaler_cov': scaler_cov, 'scaler_resp': scaler_resp,
|
|
721
|
+
'regressor': alg, 'inscaler': inscaler,
|
|
722
|
+
'outscaler': outscaler, 'versions': v},
|
|
723
|
+
file, protocol=PICKLE_PROTOCOL)
|
|
724
|
+
|
|
725
|
+
return nm
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def predict(covfile, respfile, maskfile=None, **kwargs):
|
|
729
|
+
'''
|
|
730
|
+
Make predictions on the basis of a pre-estimated normative model
|
|
731
|
+
If only the covariates are specified then only predicted mean and variance
|
|
732
|
+
will be returned. If the test responses are also specified then quantities
|
|
733
|
+
That depend on those will also be returned (Z scores and error metrics)
|
|
734
|
+
|
|
735
|
+
Basic usage::
|
|
736
|
+
|
|
737
|
+
predict(covfile, [extra_arguments])
|
|
738
|
+
|
|
739
|
+
where the variables are defined below.
|
|
740
|
+
|
|
741
|
+
:param covfile: test covariates used to predict the response variable
|
|
742
|
+
:param respfile: test response variables for the normative model
|
|
743
|
+
:param maskfile: mask used to apply to the data (nifti only)
|
|
744
|
+
:param model_path: Directory containing the normative model and metadata.
|
|
745
|
+
When using parallel prediction, do not pass the model path. It will be
|
|
746
|
+
automatically decided.
|
|
747
|
+
:param outputsuffix: Text string to add to the output filenames
|
|
748
|
+
:param batch_size: batch size (for use with normative_parallel)
|
|
749
|
+
:param job_id: batch id, 'None' when non-parallel module is used.
|
|
750
|
+
:param fold: which cross-validation fold to use (default = 0)
|
|
751
|
+
:param fold: list of model IDs to predict (if not specified all are computed)
|
|
752
|
+
:param return_y: return the (transformed) response variable (default = False)
|
|
753
|
+
|
|
754
|
+
All outputs are written to disk in the same format as the input. These are:
|
|
755
|
+
|
|
756
|
+
:outputs: * Yhat - predictive mean
|
|
757
|
+
* S2 - predictive variance
|
|
758
|
+
* Z - Z scores
|
|
759
|
+
* Y - response variable (if return_y is True)
|
|
760
|
+
'''
|
|
761
|
+
|
|
762
|
+
model_path = kwargs.pop('model_path', 'Models')
|
|
763
|
+
job_id = kwargs.pop('job_id', None)
|
|
764
|
+
batch_size = kwargs.pop('batch_size', None)
|
|
765
|
+
outputsuffix = kwargs.pop('outputsuffix', 'predict')
|
|
766
|
+
outputsuffix = "_" + outputsuffix.replace("_", "")
|
|
767
|
+
inputsuffix = kwargs.pop('inputsuffix', 'estimate')
|
|
768
|
+
inputsuffix = "_" + inputsuffix.replace("_", "")
|
|
769
|
+
alg = kwargs.pop('alg')
|
|
770
|
+
models = kwargs.pop('models', None)
|
|
771
|
+
fold = kwargs.pop('fold', 0)
|
|
772
|
+
return_y = kwargs.pop('return_y', False)
|
|
773
|
+
|
|
774
|
+
if alg == 'gpr':
|
|
775
|
+
raise ValueError("gpr is not supported with predict()")
|
|
776
|
+
|
|
777
|
+
if respfile is not None and not os.path.exists(respfile):
|
|
778
|
+
print("Response file does not exist. Only returning predictions")
|
|
779
|
+
respfile = None
|
|
780
|
+
if not os.path.isdir(model_path):
|
|
781
|
+
print('Models directory does not exist!')
|
|
782
|
+
return
|
|
783
|
+
else:
|
|
784
|
+
if os.path.exists(os.path.join(model_path, 'meta_data.md')):
|
|
785
|
+
with open(os.path.join(model_path, 'meta_data.md'), 'rb') as file:
|
|
786
|
+
meta_data = pickle.load(file)
|
|
787
|
+
inscaler = meta_data['inscaler']
|
|
788
|
+
outscaler = meta_data['outscaler']
|
|
789
|
+
mY = meta_data['mean_resp']
|
|
790
|
+
sY = meta_data['std_resp']
|
|
791
|
+
scaler_cov = meta_data['scaler_cov']
|
|
792
|
+
scaler_resp = meta_data['scaler_resp']
|
|
793
|
+
meta_data = True
|
|
794
|
+
else:
|
|
795
|
+
print("No meta-data file is found!")
|
|
796
|
+
inscaler = 'None'
|
|
797
|
+
outscaler = 'None'
|
|
798
|
+
meta_data = False
|
|
799
|
+
|
|
800
|
+
if batch_size is not None:
|
|
801
|
+
batch_size = int(batch_size)
|
|
802
|
+
|
|
803
|
+
if job_id is not None:
|
|
804
|
+
job_id = int(job_id) - 1
|
|
805
|
+
parallel = True
|
|
806
|
+
else:
|
|
807
|
+
parallel = False
|
|
808
|
+
job_id = 0
|
|
809
|
+
|
|
810
|
+
|
|
811
|
+
# load data
|
|
812
|
+
print("Loading data ...")
|
|
813
|
+
X = fileio.load(covfile)
|
|
814
|
+
if len(X.shape) == 1:
|
|
815
|
+
X = X[:, np.newaxis]
|
|
816
|
+
if respfile is not None:
|
|
817
|
+
Y, maskvol = load_response_vars(respfile, maskfile)
|
|
818
|
+
if len(Y.shape) == 1:
|
|
819
|
+
Y = Y[:, np.newaxis]
|
|
820
|
+
|
|
821
|
+
sample_num = X.shape[0]
|
|
822
|
+
if models is not None:
|
|
823
|
+
feature_num = len(models)
|
|
824
|
+
else:
|
|
825
|
+
feature_num = len(glob.glob(os.path.join(model_path, 'NM_' + str(fold) + '_' +
|
|
826
|
+
'*' + inputsuffix + '.pkl')))
|
|
827
|
+
models = range(feature_num)
|
|
828
|
+
|
|
829
|
+
Yhat = np.zeros([sample_num, feature_num])
|
|
830
|
+
S2 = np.zeros([sample_num, feature_num])
|
|
831
|
+
Z = np.zeros([sample_num, feature_num])
|
|
832
|
+
|
|
833
|
+
if inscaler in ['standardize', 'minmax', 'robminmax']:
|
|
834
|
+
Xz = scaler_cov[job_id].transform(X)
|
|
835
|
+
else:
|
|
836
|
+
Xz = X
|
|
837
|
+
if respfile is not None:
|
|
838
|
+
if outscaler in ['standardize', 'minmax', 'robminmax']:
|
|
839
|
+
Yz = scaler_resp[job_id].transform(Y)
|
|
840
|
+
else:
|
|
841
|
+
Yz = Y
|
|
842
|
+
|
|
843
|
+
for i, m in enumerate(models):
|
|
844
|
+
print("Prediction by model ", i+1, "of", feature_num)
|
|
845
|
+
nm = norm_init(Xz)
|
|
846
|
+
nm = nm.load(os.path.join(model_path, 'NM_' + str(fold) + '_' +
|
|
847
|
+
str(m) + inputsuffix + '.pkl'))
|
|
848
|
+
if (alg != 'hbr' or nm.configs['transferred'] == False):
|
|
849
|
+
yhat, s2 = nm.predict(Xz, **kwargs)
|
|
850
|
+
else: # only for hbr and in the transfer scenario
|
|
851
|
+
tsbefile = kwargs.get('tsbefile')
|
|
852
|
+
batch_effects_test = fileio.load(tsbefile)
|
|
853
|
+
yhat, s2 = nm.predict_on_new_sites(Xz, batch_effects_test)
|
|
854
|
+
|
|
855
|
+
if outscaler == 'standardize':
|
|
856
|
+
Yhat[:, i] = scaler_resp[job_id].inverse_transform(yhat, index=i)
|
|
857
|
+
S2[:, i] = s2.squeeze() * scaler_resp[job_id].s[i]**2
|
|
858
|
+
elif outscaler in ['minmax', 'robminmax']:
|
|
859
|
+
Yhat[:, i] = scaler_resp[job_id].inverse_transform(yhat, index=i)
|
|
860
|
+
S2[:, i] = s2 * (scaler_resp[job_id].max[i] -
|
|
861
|
+
scaler_resp[job_id].min[i])**2
|
|
862
|
+
else:
|
|
863
|
+
Yhat[:, i] = yhat.squeeze()
|
|
864
|
+
S2[:, i] = s2.squeeze()
|
|
865
|
+
if respfile is not None:
|
|
866
|
+
if alg == 'hbr':
|
|
867
|
+
# Z scores for HBR must be computed independently for each model
|
|
868
|
+
Z[:, i] = nm.get_mcmc_zscores(Xz, Yz[:, i:i+1], **kwargs)
|
|
869
|
+
else:
|
|
870
|
+
Z[:, i] = np.squeeze((Yz[:, i:i+1] - Yhat[:, i:i+1]) / np.sqrt(S2[:, i:i+1]))
|
|
871
|
+
|
|
872
|
+
if respfile is None:
|
|
873
|
+
save_results(None, Yhat, S2, None, outputsuffix=outputsuffix)
|
|
874
|
+
|
|
875
|
+
return (Yhat, S2)
|
|
876
|
+
|
|
877
|
+
else:
|
|
878
|
+
if models is not None and len(Y.shape) > 1:
|
|
879
|
+
Y = Y[:, models]
|
|
880
|
+
# TODO: Needs simplification
|
|
881
|
+
if meta_data:
|
|
882
|
+
if type(mY) is list: # This happens when non-parallel or when using meta data from batches
|
|
883
|
+
mY = mY[0][models]
|
|
884
|
+
sY = sY[0][models]
|
|
885
|
+
else: # This happens when parallel on collected metadata
|
|
886
|
+
mY = mY[models]
|
|
887
|
+
sY = sY[models]
|
|
888
|
+
|
|
889
|
+
if len(Y.shape) == 1:
|
|
890
|
+
Y = Y[:, np.newaxis]
|
|
891
|
+
|
|
892
|
+
# warp the targets?
|
|
893
|
+
if alg == 'blr' and nm.blr.warp is not None:
|
|
894
|
+
warp = True
|
|
895
|
+
Yw = np.zeros_like(Y)
|
|
896
|
+
for i, m in enumerate(models):
|
|
897
|
+
nm = norm_init(Xz)
|
|
898
|
+
nm = nm.load(os.path.join(model_path, 'NM_0_' +
|
|
899
|
+
str(m) + inputsuffix + '.pkl'))
|
|
900
|
+
|
|
901
|
+
warp_param = nm.blr.hyp[1:nm.blr.warp.get_n_params()+1]
|
|
902
|
+
Yw[:, i] = nm.blr.warp.f(Y[:, i], warp_param)
|
|
903
|
+
Y = Yw
|
|
904
|
+
else:
|
|
905
|
+
warp = False
|
|
906
|
+
|
|
907
|
+
if alg != 'hbr':
|
|
908
|
+
# For HBR the Z scores are already computed
|
|
909
|
+
Z = (Y - Yhat) / np.sqrt(S2)
|
|
910
|
+
|
|
911
|
+
print("Evaluating the model ...")
|
|
912
|
+
if meta_data and not warp:
|
|
913
|
+
|
|
914
|
+
results = evaluate(Y, Yhat, S2=S2, mY=mY, sY=sY)
|
|
915
|
+
else:
|
|
916
|
+
results = evaluate(Y, Yhat, S2=S2,
|
|
917
|
+
metrics=['Rho', 'RMSE', 'SMSE', 'EXPV'])
|
|
918
|
+
|
|
919
|
+
print("Evaluations Writing outputs ...")
|
|
920
|
+
|
|
921
|
+
if return_y:
|
|
922
|
+
save_results(respfile, Yhat, S2, maskvol, Z=Z, Y=Y,
|
|
923
|
+
outputsuffix=outputsuffix, results=results)
|
|
924
|
+
return (Yhat, S2, Z, Y)
|
|
925
|
+
else:
|
|
926
|
+
save_results(respfile, Yhat, S2, maskvol, Z=Z,
|
|
927
|
+
outputsuffix=outputsuffix, results=results)
|
|
928
|
+
return (Yhat, S2, Z)
|
|
929
|
+
|
|
930
|
+
|
|
931
|
+
def transfer(covfile, respfile, testcov=None, testresp=None, maskfile=None,
|
|
932
|
+
**kwargs):
|
|
933
|
+
'''
|
|
934
|
+
Transfer learning on the basis of a pre-estimated normative model by using
|
|
935
|
+
the posterior distribution over the parameters as an informed prior for
|
|
936
|
+
new data. currently only supported for HBR.
|
|
937
|
+
|
|
938
|
+
Basic usage::
|
|
939
|
+
|
|
940
|
+
transfer(covfile, respfile, trbefile, model_path, output_path, inputsuffix [extra_arguments])
|
|
941
|
+
|
|
942
|
+
where the variables are defined below.
|
|
943
|
+
|
|
944
|
+
:param covfile: transfer covariates used to predict the response variable
|
|
945
|
+
:param respfile: transfer response variables for the normative model
|
|
946
|
+
:param maskfile: mask used to apply to the data (nifti only)
|
|
947
|
+
:param trbefile: Training batch effects file
|
|
948
|
+
:param testcov: Test covariates
|
|
949
|
+
:param testresp: Test responses
|
|
950
|
+
:param model_path: Directory containing the normative model and metadata
|
|
951
|
+
:param output_path: Address to output directory to save the transferred models
|
|
952
|
+
:param inputsuffix: The suffix for the inout models (default='estimate')
|
|
953
|
+
:param batch_size: batch size (for use with normative_parallel)
|
|
954
|
+
:param job_id: batch id
|
|
955
|
+
|
|
956
|
+
All outputs are written to disk in the same format as the input. These are:
|
|
957
|
+
|
|
958
|
+
:outputs: * Yhat - predictive mean
|
|
959
|
+
* S2 - predictive variance
|
|
960
|
+
* Z - Z scores
|
|
961
|
+
'''
|
|
962
|
+
alg = kwargs.pop('alg').lower()
|
|
963
|
+
|
|
964
|
+
if alg != 'hbr' and alg != 'blr':
|
|
965
|
+
print('Model transfer function is only possible for HBR and BLR models.')
|
|
966
|
+
return
|
|
967
|
+
# testing should not be obligatory for HBR,
|
|
968
|
+
# but should be for BLR (since it doesn't produce transfer models)
|
|
969
|
+
elif ('model_path' not in list(kwargs.keys())) or \
|
|
970
|
+
('trbefile' not in list(kwargs.keys())):
|
|
971
|
+
print('InputError: model_path or trbefile are missing.')
|
|
972
|
+
return
|
|
973
|
+
# hbr has one additional mandatory arguments
|
|
974
|
+
elif alg == 'hbr':
|
|
975
|
+
if ('output_path' not in list(kwargs.keys())):
|
|
976
|
+
print('InputError: output_path is missing.')
|
|
977
|
+
return
|
|
978
|
+
else:
|
|
979
|
+
output_path = kwargs.pop('output_path', None)
|
|
980
|
+
if not os.path.isdir(output_path):
|
|
981
|
+
os.mkdir(output_path)
|
|
982
|
+
|
|
983
|
+
# for hbr, testing is not mandatory, for blr's predict/transfer it is. This will be an architectural choice.
|
|
984
|
+
# or (testresp==None)
|
|
985
|
+
elif alg == 'blr':
|
|
986
|
+
if (testcov == None) or \
|
|
987
|
+
('tsbefile' not in list(kwargs.keys())):
|
|
988
|
+
print('InputError: Some mandatory arguments for blr are missing.')
|
|
989
|
+
return
|
|
990
|
+
# general arguments
|
|
991
|
+
log_path = kwargs.pop('log_path', None)
|
|
992
|
+
model_path = kwargs.pop('model_path')
|
|
993
|
+
outputsuffix = kwargs.pop('outputsuffix', 'transfer')
|
|
994
|
+
outputsuffix = "_" + outputsuffix.replace("_", "")
|
|
995
|
+
inputsuffix = kwargs.pop('inputsuffix', 'estimate')
|
|
996
|
+
inputsuffix = "_" + inputsuffix.replace("_", "")
|
|
997
|
+
tsbefile = kwargs.pop('tsbefile', None)
|
|
998
|
+
trbefile = kwargs.pop('trbefile', None)
|
|
999
|
+
job_id = kwargs.pop('job_id', None)
|
|
1000
|
+
batch_size = kwargs.pop('batch_size', None)
|
|
1001
|
+
fold = kwargs.pop('fold', 0) # This is almost always 0 in the transfer scenario.
|
|
1002
|
+
|
|
1003
|
+
# for PCNonline automated parallel jobs loop
|
|
1004
|
+
count_jobsdone = kwargs.pop('count_jobsdone', 'False')
|
|
1005
|
+
if type(count_jobsdone) is str:
|
|
1006
|
+
count_jobsdone = count_jobsdone == 'True'
|
|
1007
|
+
|
|
1008
|
+
if batch_size is not None:
|
|
1009
|
+
batch_size = int(batch_size)
|
|
1010
|
+
|
|
1011
|
+
if job_id is not None:
|
|
1012
|
+
job_id = int(job_id) - 1
|
|
1013
|
+
parallel = True
|
|
1014
|
+
else:
|
|
1015
|
+
parallel = False
|
|
1016
|
+
job_id = 0
|
|
1017
|
+
|
|
1018
|
+
if not os.path.isdir(model_path):
|
|
1019
|
+
print('Models directory does not exist!')
|
|
1020
|
+
return
|
|
1021
|
+
else:
|
|
1022
|
+
if os.path.exists(os.path.join(model_path, 'meta_data.md')):
|
|
1023
|
+
with open(os.path.join(model_path, 'meta_data.md'), 'rb') as file:
|
|
1024
|
+
my_meta_data = pickle.load(file)
|
|
1025
|
+
inscaler = my_meta_data['inscaler']
|
|
1026
|
+
outscaler = my_meta_data['outscaler']
|
|
1027
|
+
scaler_cov = my_meta_data['scaler_cov']
|
|
1028
|
+
scaler_resp = my_meta_data['scaler_resp']
|
|
1029
|
+
meta_data = True
|
|
1030
|
+
else:
|
|
1031
|
+
print("No meta-data file is found!")
|
|
1032
|
+
inscaler = 'None'
|
|
1033
|
+
outscaler = 'None'
|
|
1034
|
+
meta_data = False
|
|
1035
|
+
|
|
1036
|
+
# load adaptation data
|
|
1037
|
+
print("Loading data ...")
|
|
1038
|
+
X = fileio.load(covfile)
|
|
1039
|
+
Y, maskvol = load_response_vars(respfile, maskfile)
|
|
1040
|
+
if len(Y.shape) == 1:
|
|
1041
|
+
Y = Y[:, np.newaxis]
|
|
1042
|
+
if len(X.shape) == 1:
|
|
1043
|
+
X = X[:, np.newaxis]
|
|
1044
|
+
|
|
1045
|
+
if inscaler in ['standardize', 'minmax', 'robminmax']:
|
|
1046
|
+
if parallel:
|
|
1047
|
+
scaler_cov[job_id][fold].extend(X)
|
|
1048
|
+
X = scaler_cov[job_id][fold].transform(X)
|
|
1049
|
+
else:
|
|
1050
|
+
scaler_cov[fold].extend(X)
|
|
1051
|
+
X = scaler_cov[fold].transform(X)
|
|
1052
|
+
|
|
1053
|
+
if outscaler in ['standardize', 'minmax', 'robminmax']:
|
|
1054
|
+
if parallel:
|
|
1055
|
+
scaler_resp[job_id][fold].extend(Y)
|
|
1056
|
+
Y = scaler_resp[job_id][fold].transform(Y)
|
|
1057
|
+
else:
|
|
1058
|
+
scaler_resp[fold].extend(Y)
|
|
1059
|
+
Y = scaler_resp[fold].transform(Y)
|
|
1060
|
+
|
|
1061
|
+
feature_num = Y.shape[1]
|
|
1062
|
+
|
|
1063
|
+
# mean and std of training data only used for calculating the MSLL
|
|
1064
|
+
mY = np.mean(Y, axis=0)
|
|
1065
|
+
sY = np.std(Y, axis=0)
|
|
1066
|
+
|
|
1067
|
+
|
|
1068
|
+
batch_effects_train = fileio.load(trbefile)
|
|
1069
|
+
|
|
1070
|
+
# load test data
|
|
1071
|
+
if testcov is not None:
|
|
1072
|
+
# we have a separate test dataset
|
|
1073
|
+
Xte = fileio.load(testcov)
|
|
1074
|
+
if len(Xte.shape) == 1:
|
|
1075
|
+
Xte = Xte[:, np.newaxis]
|
|
1076
|
+
ts_sample_num = Xte.shape[0]
|
|
1077
|
+
|
|
1078
|
+
if inscaler in ['standardize', 'minmax', 'robminmax']:
|
|
1079
|
+
if parallel:
|
|
1080
|
+
Xte = scaler_cov[job_id][fold].transform(Xte)
|
|
1081
|
+
else:
|
|
1082
|
+
Xte = scaler_cov[fold].transform(Xte)
|
|
1083
|
+
|
|
1084
|
+
if testresp is not None:
|
|
1085
|
+
Yte, testmask = load_response_vars(testresp, maskfile)
|
|
1086
|
+
if len(Yte.shape) == 1:
|
|
1087
|
+
Yte = Yte[:, np.newaxis]
|
|
1088
|
+
if outscaler in ['standardize', 'minmax', 'robminmax']:
|
|
1089
|
+
if parallel:
|
|
1090
|
+
Yte = scaler_resp[job_id][fold].transform(Yte)
|
|
1091
|
+
else:
|
|
1092
|
+
Yte = scaler_resp[fold].transform(Yte)
|
|
1093
|
+
|
|
1094
|
+
else:
|
|
1095
|
+
Yte = np.zeros([ts_sample_num, feature_num])
|
|
1096
|
+
|
|
1097
|
+
if tsbefile is not None:
|
|
1098
|
+
batch_effects_test = fileio.load(tsbefile)
|
|
1099
|
+
else:
|
|
1100
|
+
batch_effects_test = np.zeros([Xte.shape[0], 2])
|
|
1101
|
+
else:
|
|
1102
|
+
ts_sample_num = 0
|
|
1103
|
+
|
|
1104
|
+
Yhat = np.zeros([ts_sample_num, feature_num])
|
|
1105
|
+
S2 = np.zeros([ts_sample_num, feature_num])
|
|
1106
|
+
Z = np.zeros([ts_sample_num, feature_num])
|
|
1107
|
+
|
|
1108
|
+
if meta_data:
|
|
1109
|
+
my_meta_data['mean_resp'] = mY
|
|
1110
|
+
my_meta_data['std_resp'] = sY
|
|
1111
|
+
if inscaler not in ['None']:
|
|
1112
|
+
my_meta_data['scaler_cov'] = scaler_cov
|
|
1113
|
+
if outscaler not in ['None']:
|
|
1114
|
+
my_meta_data['scaler_resp'] = scaler_resp
|
|
1115
|
+
if parallel:
|
|
1116
|
+
pickle.dump(my_meta_data, open(os.path.join('Models', 'meta_data.md'), 'wb'))
|
|
1117
|
+
else:
|
|
1118
|
+
pickle.dump(my_meta_data, open(os.path.join(output_path, 'meta_data.md'), 'wb'))
|
|
1119
|
+
|
|
1120
|
+
# estimate the models for all subjects
|
|
1121
|
+
for i in range(feature_num):
|
|
1122
|
+
|
|
1123
|
+
if alg == 'hbr':
|
|
1124
|
+
print("Using HBR transform...")
|
|
1125
|
+
nm = norm_init(X)
|
|
1126
|
+
if batch_size is not None: # when using normative_parallel
|
|
1127
|
+
print("Transferring model ", job_id*batch_size+i)
|
|
1128
|
+
nm = nm.load(os.path.join(model_path, 'NM_0_' +
|
|
1129
|
+
str(job_id*batch_size+i) + inputsuffix +
|
|
1130
|
+
'.pkl'))
|
|
1131
|
+
else:
|
|
1132
|
+
print("Transferring model ", i+1, "of", feature_num)
|
|
1133
|
+
nm = nm.load(os.path.join(model_path, 'NM_0_' + str(i) +
|
|
1134
|
+
inputsuffix + '.pkl'))
|
|
1135
|
+
|
|
1136
|
+
nm = nm.transfer(X, Y[:, i], batch_effects_train)
|
|
1137
|
+
|
|
1138
|
+
if batch_size is not None:
|
|
1139
|
+
nm.save(os.path.join(output_path, 'NM_0_' +
|
|
1140
|
+
str(job_id*batch_size+i) + outputsuffix + '.pkl'))
|
|
1141
|
+
else:
|
|
1142
|
+
nm.save(os.path.join(output_path, 'NM_0_' +
|
|
1143
|
+
str(i) + outputsuffix + '.pkl'))
|
|
1144
|
+
|
|
1145
|
+
if testcov is not None:
|
|
1146
|
+
yhat, s2 = nm.predict_on_new_sites(Xte, batch_effects_test)
|
|
1147
|
+
if testresp is not None:
|
|
1148
|
+
Z[:, i] = nm.get_mcmc_zscores(Xte, Yte[:, i:i+1], **kwargs)
|
|
1149
|
+
|
|
1150
|
+
# We basically use normative.predict script here.
|
|
1151
|
+
if alg == 'blr':
|
|
1152
|
+
print("Using BLR transform...")
|
|
1153
|
+
print("Transferring model ", i+1, "of", feature_num)
|
|
1154
|
+
nm = norm_init(X)
|
|
1155
|
+
nm = nm.load(os.path.join(model_path, 'NM_' + str(fold) + '_' +
|
|
1156
|
+
str(i) + inputsuffix + '.pkl'))
|
|
1157
|
+
|
|
1158
|
+
# translate the syntax to what blr understands
|
|
1159
|
+
# first strip existing blr keyword arguments to avoid redundancy
|
|
1160
|
+
adapt_cov = kwargs.pop('adaptcovfile', None)
|
|
1161
|
+
adapt_res = kwargs.pop('adaptrespfile', None)
|
|
1162
|
+
adapt_vg = kwargs.pop('adaptvargroupfile', None)
|
|
1163
|
+
test_vg = kwargs.pop('testvargroupfile', None)
|
|
1164
|
+
if adapt_cov is not None or adapt_res is not None \
|
|
1165
|
+
or adapt_vg is not None or test_vg is not None:
|
|
1166
|
+
print(
|
|
1167
|
+
"Warning: redundant batch effect parameterisation. Using HBR syntax")
|
|
1168
|
+
|
|
1169
|
+
yhat, s2 = nm.predict(Xte, X, Y[:, i],
|
|
1170
|
+
adaptcov=X,
|
|
1171
|
+
adaptresp=Y[:, i],
|
|
1172
|
+
adaptvargroup=batch_effects_train,
|
|
1173
|
+
testvargroup=batch_effects_test,
|
|
1174
|
+
**kwargs)
|
|
1175
|
+
|
|
1176
|
+
if testcov is not None:
|
|
1177
|
+
if outscaler == 'standardize':
|
|
1178
|
+
if parallel:
|
|
1179
|
+
Yhat[:, i] = scaler_resp[job_id][fold].inverse_transform(
|
|
1180
|
+
yhat.squeeze(), index=i)
|
|
1181
|
+
S2[:, i] = s2.squeeze() * scaler_resp[job_id][fold].s[i]**2
|
|
1182
|
+
else:
|
|
1183
|
+
Yhat[:, i] = scaler_resp[fold].inverse_transform(
|
|
1184
|
+
yhat.squeeze(), index=i)
|
|
1185
|
+
S2[:, i] = s2.squeeze() * scaler_resp[fold].s[i]**2
|
|
1186
|
+
|
|
1187
|
+
elif outscaler in ['minmax', 'robminmax']:
|
|
1188
|
+
if parallel:
|
|
1189
|
+
Yhat[:, i] = scaler_resp[job_id][fold].inverse_transform(yhat, index=i)
|
|
1190
|
+
S2[:, i] = s2 * (scaler_resp[job_id][fold].max[i] -
|
|
1191
|
+
scaler_resp[job_id][fold].min[i])**2
|
|
1192
|
+
else:
|
|
1193
|
+
Yhat[:, i] = scaler_resp[fold].inverse_transform(yhat, index=i)
|
|
1194
|
+
S2[:, i] = s2 * (scaler_resp[fold].max[i] -
|
|
1195
|
+
scaler_resp[fold].min[i])**2
|
|
1196
|
+
else:
|
|
1197
|
+
Yhat[:, i] = yhat.squeeze()
|
|
1198
|
+
S2[:, i] = s2.squeeze()
|
|
1199
|
+
|
|
1200
|
+
if testresp is None:
|
|
1201
|
+
save_results(respfile, Yhat, S2, maskvol, outputsuffix=outputsuffix)
|
|
1202
|
+
return (Yhat, S2)
|
|
1203
|
+
else:
|
|
1204
|
+
# warp the targets?
|
|
1205
|
+
if alg == 'blr' and nm.blr.warp is not None:
|
|
1206
|
+
warp = True
|
|
1207
|
+
Yw = np.zeros_like(Yte)
|
|
1208
|
+
for i in range(feature_num):
|
|
1209
|
+
nm = norm_init(Xte)
|
|
1210
|
+
nm = nm.load(os.path.join(model_path, 'NM_' + str(fold) + '_' +
|
|
1211
|
+
str(i) + inputsuffix + '.pkl'))
|
|
1212
|
+
|
|
1213
|
+
warp_param = nm.blr.hyp[1:nm.blr.warp.get_n_params()+1]
|
|
1214
|
+
Yw[:, i] = nm.blr.warp.f(Yte[:, i], warp_param)
|
|
1215
|
+
Yte = Yw
|
|
1216
|
+
else:
|
|
1217
|
+
warp = False
|
|
1218
|
+
# For HBR the Z scores are already computed
|
|
1219
|
+
if alg != 'hbr':
|
|
1220
|
+
Z = (Yte - Yhat) / np.sqrt(S2)
|
|
1221
|
+
|
|
1222
|
+
print("Evaluating the model ...")
|
|
1223
|
+
if meta_data and not warp:
|
|
1224
|
+
results = evaluate(Yte, Yhat, S2=S2, mY=mY, sY=sY)
|
|
1225
|
+
else:
|
|
1226
|
+
results = evaluate(Yte, Yhat, S2=S2,
|
|
1227
|
+
metrics=['Rho', 'RMSE', 'SMSE', 'EXPV'])
|
|
1228
|
+
|
|
1229
|
+
save_results(respfile, Yhat, S2, maskvol, Z=Z, results=results,
|
|
1230
|
+
outputsuffix=outputsuffix)
|
|
1231
|
+
|
|
1232
|
+
# Creates a file for every job succesfully completed (for tracking failed jobs).
|
|
1233
|
+
if count_jobsdone == True:
|
|
1234
|
+
done_path = os.path.join(log_path, str(job_id)+".jobsdone")
|
|
1235
|
+
Path(done_path).touch()
|
|
1236
|
+
|
|
1237
|
+
return (Yhat, S2, Z)
|
|
1238
|
+
|
|
1239
|
+
# Creates a file for every job succesfully completed (for tracking failed jobs).
|
|
1240
|
+
if count_jobsdone == True:
|
|
1241
|
+
done_path = os.path.join(log_path, str(job_id)+".jobsdone")
|
|
1242
|
+
Path(done_path).touch()
|
|
1243
|
+
|
|
1244
|
+
|
|
1245
|
+
def extend(covfile, respfile, maskfile=None, **kwargs):
|
|
1246
|
+
'''
|
|
1247
|
+
This function extends an existing HBR model with data from new sites/scanners.
|
|
1248
|
+
|
|
1249
|
+
Basic usage::
|
|
1250
|
+
|
|
1251
|
+
transfer(covfile, respfile, trbefile, model_path, output_path, inputsuffix [extra_arguments])
|
|
1252
|
+
|
|
1253
|
+
where the variables are defined below.
|
|
1254
|
+
|
|
1255
|
+
:param covfile: covariates for new data
|
|
1256
|
+
:param respfile: response variables for new data
|
|
1257
|
+
:param maskfile: mask used to apply to the data (nifti only)
|
|
1258
|
+
:param model_path: Directory containing the normative model and metadata
|
|
1259
|
+
:param trbefile: file address to batch effects file for new data
|
|
1260
|
+
:param batch_size: batch size (for use with normative_parallel)
|
|
1261
|
+
:param job_id: batch id
|
|
1262
|
+
:param output_path: the path for saving the the extended model
|
|
1263
|
+
:param inputsuffix: The suffix for the input models (default='extend')
|
|
1264
|
+
:param informative_prior: use initial model prior or learn from scratch (default is False).
|
|
1265
|
+
:param generation_factor: generation factor refers to the number of samples generated for each
|
|
1266
|
+
combination of covariates and batch effects. Default is 10.
|
|
1267
|
+
|
|
1268
|
+
|
|
1269
|
+
All outputs are written to disk in the same format as the input.
|
|
1270
|
+
|
|
1271
|
+
'''
|
|
1272
|
+
|
|
1273
|
+
alg = kwargs.pop('alg')
|
|
1274
|
+
if alg != 'hbr':
|
|
1275
|
+
print('Model extention is only possible for HBR models.')
|
|
1276
|
+
return
|
|
1277
|
+
elif ('model_path' not in list(kwargs.keys())) or \
|
|
1278
|
+
('output_path' not in list(kwargs.keys())) or \
|
|
1279
|
+
('trbefile' not in list(kwargs.keys())):
|
|
1280
|
+
print('InputError: Please specify model_path, output_path, and trbefile.')
|
|
1281
|
+
return
|
|
1282
|
+
else:
|
|
1283
|
+
model_path = kwargs.pop('model_path')
|
|
1284
|
+
output_path = kwargs.pop('output_path')
|
|
1285
|
+
trbefile = kwargs.pop('trbefile')
|
|
1286
|
+
|
|
1287
|
+
outputsuffix = kwargs.pop('outputsuffix', 'extend')
|
|
1288
|
+
outputsuffix = "_" + outputsuffix.replace("_", "")
|
|
1289
|
+
inputsuffix = kwargs.pop('inputsuffix', 'extend')
|
|
1290
|
+
inputsuffix = "_" + inputsuffix.replace("_", "")
|
|
1291
|
+
informative_prior = kwargs.pop('informative_prior', 'False') == 'True'
|
|
1292
|
+
generation_factor = int(kwargs.pop('generation_factor', '10'))
|
|
1293
|
+
job_id = kwargs.pop('job_id', None)
|
|
1294
|
+
batch_size = kwargs.pop('batch_size', None)
|
|
1295
|
+
fold = kwargs.pop('fold', 0) # This is almost always 0 in the extend scenario.
|
|
1296
|
+
|
|
1297
|
+
|
|
1298
|
+
|
|
1299
|
+
if batch_size is not None:
|
|
1300
|
+
batch_size = int(batch_size)
|
|
1301
|
+
|
|
1302
|
+
if job_id is not None:
|
|
1303
|
+
job_id = int(job_id) - 1
|
|
1304
|
+
parallel = True
|
|
1305
|
+
else:
|
|
1306
|
+
parallel = False
|
|
1307
|
+
job_id = 0
|
|
1308
|
+
|
|
1309
|
+
if not os.path.isdir(model_path):
|
|
1310
|
+
print('Models directory does not exist!')
|
|
1311
|
+
return
|
|
1312
|
+
else:
|
|
1313
|
+
if os.path.exists(os.path.join(model_path, 'meta_data.md')):
|
|
1314
|
+
with open(os.path.join(model_path, 'meta_data.md'), 'rb') as file:
|
|
1315
|
+
my_meta_data = pickle.load(file)
|
|
1316
|
+
inscaler = my_meta_data['inscaler']
|
|
1317
|
+
outscaler = my_meta_data['outscaler']
|
|
1318
|
+
scaler_cov = my_meta_data['scaler_cov']
|
|
1319
|
+
scaler_resp = my_meta_data['scaler_resp']
|
|
1320
|
+
meta_data = True
|
|
1321
|
+
else:
|
|
1322
|
+
print("No meta-data file is found!")
|
|
1323
|
+
inscaler = 'None'
|
|
1324
|
+
outscaler = 'None'
|
|
1325
|
+
meta_data = False
|
|
1326
|
+
|
|
1327
|
+
if not os.path.isdir(output_path):
|
|
1328
|
+
os.mkdir(output_path)
|
|
1329
|
+
|
|
1330
|
+
# load data
|
|
1331
|
+
print("Loading data ...")
|
|
1332
|
+
X = fileio.load(covfile)
|
|
1333
|
+
Y, maskvol = load_response_vars(respfile, maskfile)
|
|
1334
|
+
batch_effects_train = fileio.load(trbefile)
|
|
1335
|
+
|
|
1336
|
+
if len(Y.shape) == 1:
|
|
1337
|
+
Y = Y[:, np.newaxis]
|
|
1338
|
+
if len(X.shape) == 1:
|
|
1339
|
+
X = X[:, np.newaxis]
|
|
1340
|
+
|
|
1341
|
+
if inscaler in ['standardize', 'minmax', 'robminmax']:
|
|
1342
|
+
if parallel:
|
|
1343
|
+
scaler_cov[job_id][fold].extend(X)
|
|
1344
|
+
X = scaler_cov[job_id][fold].transform(X)
|
|
1345
|
+
else:
|
|
1346
|
+
scaler_cov[fold].extend(X)
|
|
1347
|
+
X = scaler_cov[fold].transform(X)
|
|
1348
|
+
|
|
1349
|
+
if outscaler in ['standardize', 'minmax', 'robminmax']:
|
|
1350
|
+
if parallel:
|
|
1351
|
+
scaler_resp[job_id][fold].extend(Y)
|
|
1352
|
+
Y = scaler_resp[job_id][fold].transform(Y)
|
|
1353
|
+
else:
|
|
1354
|
+
scaler_resp[fold].extend(Y)
|
|
1355
|
+
Y = scaler_resp[fold].transform(Y)
|
|
1356
|
+
|
|
1357
|
+
feature_num = Y.shape[1]
|
|
1358
|
+
|
|
1359
|
+
if meta_data:
|
|
1360
|
+
if inscaler not in ['None']:
|
|
1361
|
+
my_meta_data['scaler_cov'] = scaler_cov
|
|
1362
|
+
if outscaler not in ['None']:
|
|
1363
|
+
my_meta_data['scaler_resp'] = scaler_resp
|
|
1364
|
+
if parallel:
|
|
1365
|
+
pickle.dump(my_meta_data, open(os.path.join('Models', 'meta_data.md'), 'wb'))
|
|
1366
|
+
else:
|
|
1367
|
+
pickle.dump(my_meta_data, open(os.path.join(output_path, 'meta_data.md'), 'wb'))
|
|
1368
|
+
|
|
1369
|
+
|
|
1370
|
+
# estimate the models for all subjects
|
|
1371
|
+
for i in range(feature_num):
|
|
1372
|
+
|
|
1373
|
+
nm = norm_init(X)
|
|
1374
|
+
if parallel: # when using normative_parallel
|
|
1375
|
+
print("Extending model ", job_id*batch_size+i)
|
|
1376
|
+
nm = nm.load(os.path.join(model_path, 'NM_0_' +
|
|
1377
|
+
str(job_id*batch_size+i) + inputsuffix +
|
|
1378
|
+
'.pkl'))
|
|
1379
|
+
else:
|
|
1380
|
+
print("Extending model ", i+1, "of", feature_num)
|
|
1381
|
+
nm = nm.load(os.path.join(model_path, 'NM_0_' + str(i) +
|
|
1382
|
+
inputsuffix + '.pkl'))
|
|
1383
|
+
|
|
1384
|
+
nm = nm.extend(X, Y[:, i:i+1], batch_effects_train,
|
|
1385
|
+
samples=generation_factor,
|
|
1386
|
+
informative_prior=informative_prior)
|
|
1387
|
+
|
|
1388
|
+
if parallel: # The model is save into both output_path and temporary parallel folders
|
|
1389
|
+
nm.save(os.path.join(output_path, 'NM_0_' +
|
|
1390
|
+
str(job_id*batch_size+i) + outputsuffix + '.pkl'))
|
|
1391
|
+
nm.save(os.path.join('Models', 'NM_0_' +
|
|
1392
|
+
str(i) + outputsuffix + '.pkl'))
|
|
1393
|
+
else:
|
|
1394
|
+
nm.save(os.path.join(output_path, 'NM_0_' +
|
|
1395
|
+
str(i) + outputsuffix + '.pkl'))
|
|
1396
|
+
|
|
1397
|
+
|
|
1398
|
+
def tune(covfile, respfile, maskfile=None, **kwargs):
|
|
1399
|
+
'''
|
|
1400
|
+
This function tunes an existing HBR model with real data.
|
|
1401
|
+
|
|
1402
|
+
Basic usage::
|
|
1403
|
+
|
|
1404
|
+
tune(covfile, respfile [extra_arguments])
|
|
1405
|
+
|
|
1406
|
+
where the variables are defined below.
|
|
1407
|
+
|
|
1408
|
+
:param covfile: covariates for new data
|
|
1409
|
+
:param respfile: response variables for new data
|
|
1410
|
+
:param maskfile: mask used to apply to the data (nifti only)
|
|
1411
|
+
:param model_path: Directory containing the normative model and metadata
|
|
1412
|
+
:param trbefile: file address to batch effects file for new data
|
|
1413
|
+
:param batch_size: batch size (for use with normative_parallel)
|
|
1414
|
+
:param job_id: batch id
|
|
1415
|
+
:param output_path: the path for saving the the extended model
|
|
1416
|
+
:param informative_prior: use initial model prior or learn from scracth (default is False).
|
|
1417
|
+
:param generation_factor: see below
|
|
1418
|
+
|
|
1419
|
+
|
|
1420
|
+
generation factor refers to the number of samples generated for each
|
|
1421
|
+
combination of covariates and batch effects. Default is 10.
|
|
1422
|
+
|
|
1423
|
+
|
|
1424
|
+
All outputs are written to disk in the same format as the input.
|
|
1425
|
+
|
|
1426
|
+
'''
|
|
1427
|
+
|
|
1428
|
+
alg = kwargs.pop('alg')
|
|
1429
|
+
if alg != 'hbr':
|
|
1430
|
+
print('Model extention is only possible for HBR models.')
|
|
1431
|
+
return
|
|
1432
|
+
elif ('model_path' not in list(kwargs.keys())) or \
|
|
1433
|
+
('output_path' not in list(kwargs.keys())) or \
|
|
1434
|
+
('trbefile' not in list(kwargs.keys())):
|
|
1435
|
+
print('InputError: Some mandatory arguments are missing.')
|
|
1436
|
+
return
|
|
1437
|
+
else:
|
|
1438
|
+
model_path = kwargs.pop('model_path')
|
|
1439
|
+
output_path = kwargs.pop('output_path')
|
|
1440
|
+
trbefile = kwargs.pop('trbefile')
|
|
1441
|
+
|
|
1442
|
+
outputsuffix = kwargs.pop('outputsuffix', 'tuned')
|
|
1443
|
+
outputsuffix = "_" + outputsuffix.replace("_", "")
|
|
1444
|
+
inputsuffix = kwargs.pop('inputsuffix', 'estimate')
|
|
1445
|
+
inputsuffix = "_" + inputsuffix.replace("_", "")
|
|
1446
|
+
informative_prior = kwargs.pop('informative_prior', 'False') == 'True'
|
|
1447
|
+
generation_factor = int(kwargs.pop('generation_factor', '10'))
|
|
1448
|
+
job_id = kwargs.pop('job_id', None)
|
|
1449
|
+
batch_size = kwargs.pop('batch_size', None)
|
|
1450
|
+
if batch_size is not None:
|
|
1451
|
+
batch_size = int(batch_size)
|
|
1452
|
+
job_id = int(job_id) - 1
|
|
1453
|
+
|
|
1454
|
+
if not os.path.isdir(model_path):
|
|
1455
|
+
print('Models directory does not exist!')
|
|
1456
|
+
return
|
|
1457
|
+
else:
|
|
1458
|
+
if os.path.exists(os.path.join(model_path, 'meta_data.md')):
|
|
1459
|
+
with open(os.path.join(model_path, 'meta_data.md'), 'rb') as file:
|
|
1460
|
+
meta_data = pickle.load(file)
|
|
1461
|
+
if (meta_data['inscaler'] != 'None' or
|
|
1462
|
+
meta_data['outscaler'] != 'None'):
|
|
1463
|
+
print('Models extention on scaled data is not possible!')
|
|
1464
|
+
return
|
|
1465
|
+
|
|
1466
|
+
if not os.path.isdir(output_path):
|
|
1467
|
+
os.mkdir(output_path)
|
|
1468
|
+
|
|
1469
|
+
# load data
|
|
1470
|
+
print("Loading data ...")
|
|
1471
|
+
X = fileio.load(covfile)
|
|
1472
|
+
Y, maskvol = load_response_vars(respfile, maskfile)
|
|
1473
|
+
batch_effects_train = fileio.load(trbefile)
|
|
1474
|
+
|
|
1475
|
+
if len(Y.shape) == 1:
|
|
1476
|
+
Y = Y[:, np.newaxis]
|
|
1477
|
+
if len(X.shape) == 1:
|
|
1478
|
+
X = X[:, np.newaxis]
|
|
1479
|
+
feature_num = Y.shape[1]
|
|
1480
|
+
|
|
1481
|
+
# estimate the models for all subjects
|
|
1482
|
+
for i in range(feature_num):
|
|
1483
|
+
|
|
1484
|
+
nm = norm_init(X)
|
|
1485
|
+
if batch_size is not None: # when using nirmative_parallel
|
|
1486
|
+
print("Tuning model ", job_id*batch_size+i)
|
|
1487
|
+
nm = nm.load(os.path.join(model_path, 'NM_0_' +
|
|
1488
|
+
str(job_id*batch_size+i) + inputsuffix +
|
|
1489
|
+
'.pkl'))
|
|
1490
|
+
else:
|
|
1491
|
+
print("Tuning model ", i+1, "of", feature_num)
|
|
1492
|
+
nm = nm.load(os.path.join(model_path, 'NM_0_' + str(i) +
|
|
1493
|
+
inputsuffix + '.pkl'))
|
|
1494
|
+
|
|
1495
|
+
nm = nm.tune(X, Y[:, i:i+1], batch_effects_train,
|
|
1496
|
+
samples=generation_factor,
|
|
1497
|
+
informative_prior=informative_prior)
|
|
1498
|
+
|
|
1499
|
+
if batch_size is not None:
|
|
1500
|
+
nm.save(os.path.join(output_path, 'NM_0_' +
|
|
1501
|
+
str(job_id*batch_size+i) + outputsuffix + '.pkl'))
|
|
1502
|
+
nm.save(os.path.join('Models', 'NM_0_' +
|
|
1503
|
+
str(i) + outputsuffix + '.pkl'))
|
|
1504
|
+
else:
|
|
1505
|
+
nm.save(os.path.join(output_path, 'NM_0_' +
|
|
1506
|
+
str(i) + outputsuffix + '.pkl'))
|
|
1507
|
+
|
|
1508
|
+
|
|
1509
|
+
def merge(covfile=None, respfile=None, **kwargs):
|
|
1510
|
+
'''
|
|
1511
|
+
This function extends an existing HBR model with data from new sites/scanners.
|
|
1512
|
+
|
|
1513
|
+
Basic usage::
|
|
1514
|
+
|
|
1515
|
+
merge(model_path1, model_path2 [extra_arguments])
|
|
1516
|
+
|
|
1517
|
+
where the variables are defined below.
|
|
1518
|
+
|
|
1519
|
+
:param covfile: Not required. Always set to None.
|
|
1520
|
+
:param respfile: Not required. Always set to None.
|
|
1521
|
+
:param model_path1: Directory containing the model and metadata (1st model)
|
|
1522
|
+
:param model_path2: Directory containing the model and metadata (2nd model)
|
|
1523
|
+
:param batch_size: batch size (for use with normative_parallel)
|
|
1524
|
+
:param job_id: batch id
|
|
1525
|
+
:param output_path: the path for saving the the extended model
|
|
1526
|
+
:param generation_factor: see below
|
|
1527
|
+
|
|
1528
|
+
The generation factor refers tothe number of samples generated for each
|
|
1529
|
+
combination of covariates and batch effects. Default is 10.
|
|
1530
|
+
|
|
1531
|
+
|
|
1532
|
+
All outputs are written to disk in the same format as the input.
|
|
1533
|
+
|
|
1534
|
+
'''
|
|
1535
|
+
|
|
1536
|
+
alg = kwargs.pop('alg')
|
|
1537
|
+
if alg != 'hbr':
|
|
1538
|
+
print('Merging models is only possible for HBR models.')
|
|
1539
|
+
return
|
|
1540
|
+
elif ('model_path1' not in list(kwargs.keys())) or \
|
|
1541
|
+
('model_path2' not in list(kwargs.keys())) or \
|
|
1542
|
+
('output_path' not in list(kwargs.keys())):
|
|
1543
|
+
print('InputError: Some mandatory arguments are missing.')
|
|
1544
|
+
return
|
|
1545
|
+
else:
|
|
1546
|
+
model_path1 = kwargs.pop('model_path1')
|
|
1547
|
+
model_path2 = kwargs.pop('model_path2')
|
|
1548
|
+
output_path = kwargs.pop('output_path')
|
|
1549
|
+
|
|
1550
|
+
outputsuffix = kwargs.pop('outputsuffix', 'merge')
|
|
1551
|
+
outputsuffix = "_" + outputsuffix.replace("_", "")
|
|
1552
|
+
inputsuffix = kwargs.pop('inputsuffix', 'estimate')
|
|
1553
|
+
inputsuffix = "_" + inputsuffix.replace("_", "")
|
|
1554
|
+
generation_factor = int(kwargs.pop('generation_factor', '10'))
|
|
1555
|
+
job_id = kwargs.pop('job_id', None)
|
|
1556
|
+
batch_size = kwargs.pop('batch_size', None)
|
|
1557
|
+
if batch_size is not None:
|
|
1558
|
+
batch_size = int(batch_size)
|
|
1559
|
+
job_id = int(job_id) - 1
|
|
1560
|
+
|
|
1561
|
+
if (not os.path.isdir(model_path1)) or (not os.path.isdir(model_path2)):
|
|
1562
|
+
print('Models directory does not exist!')
|
|
1563
|
+
return
|
|
1564
|
+
else:
|
|
1565
|
+
if batch_size is None:
|
|
1566
|
+
with open(os.path.join(model_path1, 'meta_data.md'), 'rb') as file:
|
|
1567
|
+
meta_data1 = pickle.load(file)
|
|
1568
|
+
with open(os.path.join(model_path2, 'meta_data.md'), 'rb') as file:
|
|
1569
|
+
meta_data2 = pickle.load(file)
|
|
1570
|
+
if meta_data1['valid_voxels'].shape[0] != meta_data2['valid_voxels'].shape[0]:
|
|
1571
|
+
print('Two models are trained on different features!')
|
|
1572
|
+
return
|
|
1573
|
+
else:
|
|
1574
|
+
feature_num = meta_data1['valid_voxels'].shape[0]
|
|
1575
|
+
else:
|
|
1576
|
+
feature_num = batch_size
|
|
1577
|
+
|
|
1578
|
+
if not os.path.isdir(output_path):
|
|
1579
|
+
os.mkdir(output_path)
|
|
1580
|
+
|
|
1581
|
+
# mergeing the models
|
|
1582
|
+
for i in range(feature_num):
|
|
1583
|
+
|
|
1584
|
+
nm1 = norm_init(np.random.rand(100, 10))
|
|
1585
|
+
nm2 = norm_init(np.random.rand(100, 10))
|
|
1586
|
+
if batch_size is not None: # when using nirmative_parallel
|
|
1587
|
+
print("Merging model ", job_id*batch_size+i)
|
|
1588
|
+
nm1 = nm1.load(os.path.join(model_path1, 'NM_0_' +
|
|
1589
|
+
str(job_id*batch_size+i) + inputsuffix +
|
|
1590
|
+
'.pkl'))
|
|
1591
|
+
nm2 = nm2.load(os.path.join(model_path2, 'NM_0_' +
|
|
1592
|
+
str(job_id*batch_size+i) + inputsuffix +
|
|
1593
|
+
'.pkl'))
|
|
1594
|
+
else:
|
|
1595
|
+
print("Merging model ", i+1, "of", feature_num)
|
|
1596
|
+
nm1 = nm1.load(os.path.join(model_path1, 'NM_0_' + str(i) +
|
|
1597
|
+
inputsuffix + '.pkl'))
|
|
1598
|
+
nm2 = nm1.load(os.path.join(model_path2, 'NM_0_' + str(i) +
|
|
1599
|
+
inputsuffix + '.pkl'))
|
|
1600
|
+
|
|
1601
|
+
nm_merged = nm1.merge(nm2, samples=generation_factor)
|
|
1602
|
+
|
|
1603
|
+
if batch_size is not None:
|
|
1604
|
+
nm_merged.save(os.path.join(output_path, 'NM_0_' +
|
|
1605
|
+
str(job_id*batch_size+i) + outputsuffix + '.pkl'))
|
|
1606
|
+
nm_merged.save(os.path.join('Models', 'NM_0_' +
|
|
1607
|
+
str(i) + outputsuffix + '.pkl'))
|
|
1608
|
+
else:
|
|
1609
|
+
nm_merged.save(os.path.join(output_path, 'NM_0_' +
|
|
1610
|
+
str(i) + outputsuffix + '.pkl'))
|
|
1611
|
+
|
|
1612
|
+
|
|
1613
|
+
def main(*args):
|
|
1614
|
+
""" Parse arguments and estimate model
|
|
1615
|
+
"""
|
|
1616
|
+
|
|
1617
|
+
np.seterr(invalid='ignore')
|
|
1618
|
+
|
|
1619
|
+
rfile, mfile, cfile, cv, tcfile, trfile, func, alg, cfg, kw = get_args(
|
|
1620
|
+
args)
|
|
1621
|
+
|
|
1622
|
+
# collect required arguments
|
|
1623
|
+
pos_args = ['cfile', 'rfile']
|
|
1624
|
+
|
|
1625
|
+
# collect basic keyword arguments controlling model estimation
|
|
1626
|
+
kw_args = ['maskfile=mfile',
|
|
1627
|
+
'cvfolds=cv',
|
|
1628
|
+
'testcov=tcfile',
|
|
1629
|
+
'testresp=trfile',
|
|
1630
|
+
'alg=alg',
|
|
1631
|
+
'configparam=cfg']
|
|
1632
|
+
|
|
1633
|
+
# add additional keyword arguments
|
|
1634
|
+
for k in kw:
|
|
1635
|
+
kw_args.append(k + '=' + "'" + kw[k] + "'")
|
|
1636
|
+
all_args = ', '.join(pos_args + kw_args)
|
|
1637
|
+
|
|
1638
|
+
# Executing the target function
|
|
1639
|
+
exec(func + '(' + all_args + ')')
|
|
1640
|
+
|
|
1641
|
+
def entrypoint():
|
|
1642
|
+
main(sys.argv[1:])
|
|
1643
|
+
|
|
1644
|
+
|
|
1645
|
+
# For running from the command line:
|
|
1646
|
+
if __name__ == "__main__":
|
|
1647
|
+
main(sys.argv[1:])
|