pcntoolkit 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pcntoolkit/__init__.py +4 -0
- pcntoolkit/configs.py +9 -0
- pcntoolkit/dataio/__init__.py +1 -0
- pcntoolkit/dataio/fileio.py +608 -0
- pcntoolkit/model/KnuOp.py +48 -0
- pcntoolkit/model/NP.py +88 -0
- pcntoolkit/model/NPR.py +86 -0
- pcntoolkit/model/SHASH.py +509 -0
- pcntoolkit/model/__init__.py +6 -0
- pcntoolkit/model/architecture.py +219 -0
- pcntoolkit/model/bayesreg.py +585 -0
- pcntoolkit/model/core.21290 +0 -0
- pcntoolkit/model/gp.py +489 -0
- pcntoolkit/model/hbr.py +1584 -0
- pcntoolkit/model/rfa.py +245 -0
- pcntoolkit/normative.py +1647 -0
- pcntoolkit/normative_NP.py +336 -0
- pcntoolkit/normative_model/__init__.py +6 -0
- pcntoolkit/normative_model/norm_base.py +62 -0
- pcntoolkit/normative_model/norm_blr.py +303 -0
- pcntoolkit/normative_model/norm_gpr.py +112 -0
- pcntoolkit/normative_model/norm_hbr.py +752 -0
- pcntoolkit/normative_model/norm_np.py +333 -0
- pcntoolkit/normative_model/norm_rfa.py +109 -0
- pcntoolkit/normative_model/norm_utils.py +29 -0
- pcntoolkit/normative_parallel.py +1420 -0
- pcntoolkit/regression_model/blr/warp.py +1 -0
- pcntoolkit/trendsurf.py +315 -0
- pcntoolkit/util/__init__.py +1 -0
- pcntoolkit/util/bspline.py +149 -0
- pcntoolkit/util/hbr_utils.py +242 -0
- pcntoolkit/util/utils.py +1698 -0
- pcntoolkit-0.32.0.dist-info/LICENSE +674 -0
- pcntoolkit-0.32.0.dist-info/METADATA +134 -0
- pcntoolkit-0.32.0.dist-info/RECORD +37 -0
- pcntoolkit-0.32.0.dist-info/WHEEL +4 -0
- pcntoolkit-0.32.0.dist-info/entry_points.txt +5 -0
|
@@ -0,0 +1,1420 @@
|
|
|
1
|
+
#!/opt/conda/bin/python
|
|
2
|
+
|
|
3
|
+
# -----------------------------------------------------------------------------
|
|
4
|
+
# Run parallel normative modelling.
|
|
5
|
+
# All processing takes place in the processing directory (processing_dir)
|
|
6
|
+
# All inputs should be text files or binaries and space seperated
|
|
7
|
+
#
|
|
8
|
+
# It is possible to run these functions using...
|
|
9
|
+
#
|
|
10
|
+
# * k-fold cross-validation
|
|
11
|
+
# * estimating a training dataset then applying to a second test dataset
|
|
12
|
+
#
|
|
13
|
+
# First,the data is split for parallel processing.
|
|
14
|
+
# Second, the splits are submitted to the cluster.
|
|
15
|
+
# Third, the output is collected and combined.
|
|
16
|
+
#
|
|
17
|
+
# witten by (primarily) T Wolfers, (adaptated) SM Kia, H Huijsdens, L Parks,
|
|
18
|
+
# S Rutherford, AF Marquand
|
|
19
|
+
# -----------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
from __future__ import division, print_function
|
|
22
|
+
|
|
23
|
+
import fileinput
|
|
24
|
+
import glob
|
|
25
|
+
import os
|
|
26
|
+
import pickle
|
|
27
|
+
import shutil
|
|
28
|
+
import sys
|
|
29
|
+
import time
|
|
30
|
+
from datetime import datetime
|
|
31
|
+
from subprocess import check_output, run
|
|
32
|
+
|
|
33
|
+
import numpy as np
|
|
34
|
+
import pandas as pd
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
import pcntoolkit as ptk
|
|
38
|
+
import pcntoolkit.dataio.fileio as fileio
|
|
39
|
+
from pcntoolkit import configs
|
|
40
|
+
from pcntoolkit.util.utils import yes_or_no
|
|
41
|
+
ptkpath = ptk.__path__[0]
|
|
42
|
+
except ImportError:
|
|
43
|
+
pass
|
|
44
|
+
ptkpath = os.path.abspath(os.path.dirname(__file__))
|
|
45
|
+
if ptkpath not in sys.path:
|
|
46
|
+
sys.path.append(ptkpath)
|
|
47
|
+
import configs
|
|
48
|
+
import dataio.fileio as fileio
|
|
49
|
+
from util.utils import yes_or_no
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
PICKLE_PROTOCOL = configs.PICKLE_PROTOCOL
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def execute_nm(processing_dir,
|
|
56
|
+
python_path,
|
|
57
|
+
job_name,
|
|
58
|
+
covfile_path,
|
|
59
|
+
respfile_path,
|
|
60
|
+
batch_size,
|
|
61
|
+
memory,
|
|
62
|
+
duration,
|
|
63
|
+
normative_path=None,
|
|
64
|
+
func='estimate',
|
|
65
|
+
interactive=False,
|
|
66
|
+
**kwargs):
|
|
67
|
+
''' Execute parallel normative models
|
|
68
|
+
This function is a mother function that executes all parallel normative
|
|
69
|
+
modelling routines. Different specifications are possible using the sub-
|
|
70
|
+
functions.
|
|
71
|
+
|
|
72
|
+
Basic usage::
|
|
73
|
+
|
|
74
|
+
execute_nm(processing_dir, python_path, job_name, covfile_path, respfile_path, batch_size, memory, duration)
|
|
75
|
+
|
|
76
|
+
:param processing_dir: Full path to the processing dir
|
|
77
|
+
:param python_path: Full path to the python distribution
|
|
78
|
+
:param normative_path: Full path to the normative.py. If None (default) then it will automatically retrieves the path from the installed packeage.
|
|
79
|
+
:param job_name: Name for the bash script that is the output of this function
|
|
80
|
+
:param covfile_path: Full path to a .txt file that contains all covariats (subjects x covariates) for the responsefile
|
|
81
|
+
:param respfile_path: Full path to a .txt that contains all features (subjects x features)
|
|
82
|
+
:param batch_size: Number of features in each batch
|
|
83
|
+
:param memory: Memory requirements written as string for example 4gb or 500mb
|
|
84
|
+
:param duation: The approximate duration of the job, a string with HH:MM:SS for example 01:01:01
|
|
85
|
+
:param cv_folds: Number of cross validations
|
|
86
|
+
:param testcovfile_path: Full path to a .txt file that contains all covariates (subjects x covariates) for the test response file
|
|
87
|
+
:param testrespfile_path: Full path to a .txt file that contains all test features
|
|
88
|
+
:param log_path: Path for saving log files
|
|
89
|
+
:param binary: If True uses binary format for response file otherwise it is text
|
|
90
|
+
:param cluster_spec: 'torque' for PBS Torque and 'slurm' for Slurm clusters.
|
|
91
|
+
:param interactive: If False (default) the user should manually
|
|
92
|
+
rerun the failed jobs or collect the results.
|
|
93
|
+
If 'auto' the job status are checked until all
|
|
94
|
+
jobs are completed then the failed jobs are rerun
|
|
95
|
+
and the results are automaticallu collectted.
|
|
96
|
+
Using 'query' is similar to 'auto' unless it
|
|
97
|
+
asks for user verification thius is immune to
|
|
98
|
+
endless loop in the case of bugs in the code.
|
|
99
|
+
|
|
100
|
+
written by (primarily) T Wolfers, (adapted) SM Kia
|
|
101
|
+
The documentation is adapated by S Rutherford.
|
|
102
|
+
'''
|
|
103
|
+
|
|
104
|
+
if normative_path is None:
|
|
105
|
+
normative_path = ptkpath + '/normative.py'
|
|
106
|
+
|
|
107
|
+
cv_folds = kwargs.get('cv_folds', None)
|
|
108
|
+
testcovfile_path = kwargs.get('testcovfile_path', None)
|
|
109
|
+
testrespfile_path = kwargs.get('testrespfile_path', None)
|
|
110
|
+
outputsuffix = kwargs.get('outputsuffix', 'estimate')
|
|
111
|
+
outputsuffix = "_" + outputsuffix.replace("_", "")
|
|
112
|
+
cluster_spec = kwargs.pop('cluster_spec', 'torque')
|
|
113
|
+
log_path = kwargs.get('log_path', None)
|
|
114
|
+
binary = kwargs.pop('binary', False)
|
|
115
|
+
cores = kwargs.pop('n_cores_per_batch','1')
|
|
116
|
+
|
|
117
|
+
split_nm(processing_dir,
|
|
118
|
+
respfile_path,
|
|
119
|
+
batch_size,
|
|
120
|
+
binary,
|
|
121
|
+
**kwargs)
|
|
122
|
+
|
|
123
|
+
batch_dir = glob.glob(processing_dir + 'batch_*')
|
|
124
|
+
# print(batch_dir)
|
|
125
|
+
number_of_batches = len(batch_dir)
|
|
126
|
+
# print(number_of_batches)
|
|
127
|
+
|
|
128
|
+
if binary:
|
|
129
|
+
file_extentions = '.pkl'
|
|
130
|
+
else:
|
|
131
|
+
file_extentions = '.txt'
|
|
132
|
+
|
|
133
|
+
kwargs.update({'batch_size': str(batch_size)})
|
|
134
|
+
job_ids = []
|
|
135
|
+
start_time = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
|
136
|
+
|
|
137
|
+
for n in range(1, number_of_batches+1):
|
|
138
|
+
kwargs.update({'job_id': str(n)})
|
|
139
|
+
if testrespfile_path is not None:
|
|
140
|
+
if cv_folds is not None:
|
|
141
|
+
raise ValueError("""If the response file is specified
|
|
142
|
+
cv_folds must be equal to None""")
|
|
143
|
+
else:
|
|
144
|
+
# specified train/test split
|
|
145
|
+
batch_processing_dir = processing_dir + 'batch_' + str(n) + '/'
|
|
146
|
+
batch_job_name = job_name + '_' + str(n) + '.sh'
|
|
147
|
+
batch_respfile_path = (batch_processing_dir + 'resp_batch_' +
|
|
148
|
+
str(n) + file_extentions)
|
|
149
|
+
batch_testrespfile_path = (batch_processing_dir +
|
|
150
|
+
'testresp_batch_' +
|
|
151
|
+
str(n) + file_extentions)
|
|
152
|
+
batch_job_path = batch_processing_dir + batch_job_name
|
|
153
|
+
if cluster_spec == 'torque':
|
|
154
|
+
|
|
155
|
+
# update the response file
|
|
156
|
+
kwargs.update({'testrespfile_path':
|
|
157
|
+
batch_testrespfile_path})
|
|
158
|
+
bashwrap_nm(batch_processing_dir,
|
|
159
|
+
python_path,
|
|
160
|
+
normative_path,
|
|
161
|
+
batch_job_name,
|
|
162
|
+
covfile_path,
|
|
163
|
+
batch_respfile_path,
|
|
164
|
+
func=func,
|
|
165
|
+
**kwargs)
|
|
166
|
+
job_id = qsub_nm(job_path=batch_job_path,
|
|
167
|
+
log_path=log_path,
|
|
168
|
+
memory=memory,
|
|
169
|
+
duration=duration,
|
|
170
|
+
cores=cores)
|
|
171
|
+
job_ids.append(job_id)
|
|
172
|
+
elif cluster_spec == 'slurm':
|
|
173
|
+
# update the response file
|
|
174
|
+
kwargs.update({'testrespfile_path':
|
|
175
|
+
batch_testrespfile_path})
|
|
176
|
+
sbatchwrap_nm(batch_processing_dir,
|
|
177
|
+
python_path,
|
|
178
|
+
normative_path,
|
|
179
|
+
batch_job_name,
|
|
180
|
+
covfile_path,
|
|
181
|
+
batch_respfile_path,
|
|
182
|
+
func=func,
|
|
183
|
+
memory=memory,
|
|
184
|
+
duration=duration,
|
|
185
|
+
**kwargs)
|
|
186
|
+
|
|
187
|
+
job_id = sbatch_nm(job_path=batch_job_path)
|
|
188
|
+
job_ids.append(job_id)
|
|
189
|
+
|
|
190
|
+
elif cluster_spec == 'new':
|
|
191
|
+
# this part requires addition in different envioronment [
|
|
192
|
+
sbatchwrap_nm(processing_dir=batch_processing_dir,
|
|
193
|
+
func=func, **kwargs)
|
|
194
|
+
sbatch_nm(processing_dir=batch_processing_dir)
|
|
195
|
+
# ]
|
|
196
|
+
if testrespfile_path is None:
|
|
197
|
+
if testcovfile_path is not None:
|
|
198
|
+
# forward model
|
|
199
|
+
batch_processing_dir = processing_dir + 'batch_' + str(n) + '/'
|
|
200
|
+
batch_job_name = job_name + '_' + str(n) + '.sh'
|
|
201
|
+
batch_respfile_path = (batch_processing_dir + 'resp_batch_' +
|
|
202
|
+
str(n) + file_extentions)
|
|
203
|
+
batch_job_path = batch_processing_dir + batch_job_name
|
|
204
|
+
if cluster_spec == 'torque':
|
|
205
|
+
bashwrap_nm(batch_processing_dir,
|
|
206
|
+
python_path,
|
|
207
|
+
normative_path,
|
|
208
|
+
batch_job_name,
|
|
209
|
+
covfile_path,
|
|
210
|
+
batch_respfile_path,
|
|
211
|
+
func=func,
|
|
212
|
+
**kwargs)
|
|
213
|
+
job_id = qsub_nm(job_path=batch_job_path,
|
|
214
|
+
log_path=log_path,
|
|
215
|
+
memory=memory,
|
|
216
|
+
duration=duration,
|
|
217
|
+
cores=cores)
|
|
218
|
+
job_ids.append(job_id)
|
|
219
|
+
elif cluster_spec == 'slurm':
|
|
220
|
+
sbatchwrap_nm(batch_processing_dir,
|
|
221
|
+
python_path,
|
|
222
|
+
normative_path,
|
|
223
|
+
batch_job_name,
|
|
224
|
+
covfile_path,
|
|
225
|
+
batch_respfile_path,
|
|
226
|
+
func=func,
|
|
227
|
+
memory=memory,
|
|
228
|
+
duration=duration,
|
|
229
|
+
**kwargs)
|
|
230
|
+
|
|
231
|
+
job_id = sbatch_nm(job_path=batch_job_path)
|
|
232
|
+
job_ids.append(job_id)
|
|
233
|
+
elif cluster_spec == 'new':
|
|
234
|
+
# this part requires addition in different envioronment [
|
|
235
|
+
bashwrap_nm(processing_dir=batch_processing_dir, func=func,
|
|
236
|
+
**kwargs)
|
|
237
|
+
qsub_nm(processing_dir=batch_processing_dir)
|
|
238
|
+
# ]
|
|
239
|
+
else:
|
|
240
|
+
# cross-validation
|
|
241
|
+
batch_processing_dir = (processing_dir + 'batch_' +
|
|
242
|
+
str(n) + '/')
|
|
243
|
+
batch_job_name = job_name + '_' + str(n) + '.sh'
|
|
244
|
+
batch_respfile_path = (batch_processing_dir +
|
|
245
|
+
'resp_batch_' + str(n) +
|
|
246
|
+
file_extentions)
|
|
247
|
+
batch_job_path = batch_processing_dir + batch_job_name
|
|
248
|
+
if cluster_spec == 'torque':
|
|
249
|
+
bashwrap_nm(batch_processing_dir,
|
|
250
|
+
python_path,
|
|
251
|
+
normative_path,
|
|
252
|
+
batch_job_name,
|
|
253
|
+
covfile_path,
|
|
254
|
+
batch_respfile_path,
|
|
255
|
+
func=func,
|
|
256
|
+
**kwargs)
|
|
257
|
+
job_id = qsub_nm(job_path=batch_job_path,
|
|
258
|
+
log_path=log_path,
|
|
259
|
+
memory=memory,
|
|
260
|
+
duration=duration,
|
|
261
|
+
cores=cores)
|
|
262
|
+
job_ids.append(job_id)
|
|
263
|
+
elif cluster_spec == 'slurm':
|
|
264
|
+
sbatchwrap_nm(batch_processing_dir,
|
|
265
|
+
python_path,
|
|
266
|
+
normative_path,
|
|
267
|
+
batch_job_name,
|
|
268
|
+
covfile_path,
|
|
269
|
+
batch_respfile_path,
|
|
270
|
+
func=func,
|
|
271
|
+
memory=memory,
|
|
272
|
+
duration=duration,
|
|
273
|
+
**kwargs)
|
|
274
|
+
|
|
275
|
+
job_id = sbatch_nm(job_path=batch_job_path)
|
|
276
|
+
job_ids.append(job_id)
|
|
277
|
+
|
|
278
|
+
elif cluster_spec == 'new':
|
|
279
|
+
# this part requires addition in different envioronment [
|
|
280
|
+
bashwrap_nm(processing_dir=batch_processing_dir, func=func,
|
|
281
|
+
**kwargs)
|
|
282
|
+
qsub_nm(processing_dir=batch_processing_dir)
|
|
283
|
+
# ]
|
|
284
|
+
|
|
285
|
+
if interactive:
|
|
286
|
+
|
|
287
|
+
check_jobs(job_ids, cluster_spec, start_time, delay=60)
|
|
288
|
+
|
|
289
|
+
success = False
|
|
290
|
+
while (not success):
|
|
291
|
+
success = collect_nm(processing_dir,
|
|
292
|
+
job_name,
|
|
293
|
+
func=func,
|
|
294
|
+
collect=False,
|
|
295
|
+
binary=binary,
|
|
296
|
+
batch_size=batch_size,
|
|
297
|
+
outputsuffix=outputsuffix)
|
|
298
|
+
if success:
|
|
299
|
+
break
|
|
300
|
+
else:
|
|
301
|
+
if interactive == 'query':
|
|
302
|
+
response = yes_or_no('Rerun the failed jobs?')
|
|
303
|
+
if response:
|
|
304
|
+
if cluster_spec == 'torque':
|
|
305
|
+
rerun_nm(processing_dir, log_path=log_path, memory=memory,
|
|
306
|
+
duration=duration, binary=binary,
|
|
307
|
+
interactive=interactive, cores=cores)
|
|
308
|
+
elif cluster_spec == 'slurm':
|
|
309
|
+
sbatchrerun_nm(processing_dir,
|
|
310
|
+
memory=memory,
|
|
311
|
+
duration=duration,
|
|
312
|
+
binary=binary,
|
|
313
|
+
log_path=log_path,
|
|
314
|
+
interactive=interactive)
|
|
315
|
+
|
|
316
|
+
else:
|
|
317
|
+
success = True
|
|
318
|
+
else:
|
|
319
|
+
print('Reruning the failed jobs ...')
|
|
320
|
+
if cluster_spec == 'torque':
|
|
321
|
+
rerun_nm(processing_dir, log_path=log_path, memory=memory,
|
|
322
|
+
duration=duration, binary=binary,
|
|
323
|
+
interactive=interactive, cores=cores)
|
|
324
|
+
elif cluster_spec == 'slurm':
|
|
325
|
+
sbatchrerun_nm(processing_dir,
|
|
326
|
+
memory=memory,
|
|
327
|
+
duration=duration,
|
|
328
|
+
binary=binary,
|
|
329
|
+
log_path=log_path,
|
|
330
|
+
interactive=interactive)
|
|
331
|
+
|
|
332
|
+
if interactive == 'query':
|
|
333
|
+
response = yes_or_no('Collect the results?')
|
|
334
|
+
if response:
|
|
335
|
+
success = collect_nm(processing_dir,
|
|
336
|
+
job_name,
|
|
337
|
+
func=func,
|
|
338
|
+
collect=True,
|
|
339
|
+
binary=binary,
|
|
340
|
+
batch_size=batch_size,
|
|
341
|
+
outputsuffix=outputsuffix)
|
|
342
|
+
else:
|
|
343
|
+
print('Collecting the results ...')
|
|
344
|
+
success = collect_nm(processing_dir,
|
|
345
|
+
job_name,
|
|
346
|
+
func=func,
|
|
347
|
+
collect=True,
|
|
348
|
+
binary=binary,
|
|
349
|
+
batch_size=batch_size,
|
|
350
|
+
outputsuffix=outputsuffix)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
"""routines that are environment independent"""
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def split_nm(processing_dir,
|
|
357
|
+
respfile_path,
|
|
358
|
+
batch_size,
|
|
359
|
+
binary,
|
|
360
|
+
**kwargs):
|
|
361
|
+
''' This function prepares the input files for normative_parallel.
|
|
362
|
+
|
|
363
|
+
Basic usage::
|
|
364
|
+
|
|
365
|
+
split_nm(processing_dir, respfile_path, batch_size, binary, testrespfile_path)
|
|
366
|
+
|
|
367
|
+
:param processing_dir: Full path to the processing dir
|
|
368
|
+
:param respfile_path: Full path to the responsefile.txt (subjects x features)
|
|
369
|
+
:param batch_size: Number of features in each batch
|
|
370
|
+
:param testrespfile_path: Full path to the test responsefile.txt (subjects x features)
|
|
371
|
+
:param binary: If True binary file
|
|
372
|
+
|
|
373
|
+
:outputs: The creation of a folder struture for batch-wise processing.
|
|
374
|
+
|
|
375
|
+
witten by (primarily) T Wolfers (adapted) SM Kia, (adapted) S Rutherford.
|
|
376
|
+
'''
|
|
377
|
+
|
|
378
|
+
testrespfile_path = kwargs.pop('testrespfile_path', None)
|
|
379
|
+
|
|
380
|
+
dummy, respfile_extension = os.path.splitext(respfile_path)
|
|
381
|
+
if (binary and respfile_extension != '.pkl'):
|
|
382
|
+
raise ValueError("""If binary is True the file format for the
|
|
383
|
+
testrespfile file must be .pkl""")
|
|
384
|
+
elif (binary == False and respfile_extension != '.txt'):
|
|
385
|
+
raise ValueError("""If binary is False the file format for the
|
|
386
|
+
testrespfile file must be .txt""")
|
|
387
|
+
|
|
388
|
+
# splits response into batches
|
|
389
|
+
if testrespfile_path is None:
|
|
390
|
+
if (binary == False):
|
|
391
|
+
respfile = fileio.load_ascii(respfile_path)
|
|
392
|
+
else:
|
|
393
|
+
respfile = pd.read_pickle(respfile_path)
|
|
394
|
+
|
|
395
|
+
respfile = pd.DataFrame(respfile)
|
|
396
|
+
|
|
397
|
+
numsub = respfile.shape[1]
|
|
398
|
+
batch_vec = np.arange(0,
|
|
399
|
+
numsub,
|
|
400
|
+
batch_size)
|
|
401
|
+
batch_vec = np.append(batch_vec,
|
|
402
|
+
numsub)
|
|
403
|
+
|
|
404
|
+
for n in range(0, (len(batch_vec) - 1)):
|
|
405
|
+
resp_batch = respfile.iloc[:, (batch_vec[n]): batch_vec[n + 1]]
|
|
406
|
+
os.chdir(processing_dir)
|
|
407
|
+
resp = str('resp_batch_' + str(n+1))
|
|
408
|
+
batch = str('batch_' + str(n+1))
|
|
409
|
+
if not os.path.exists(processing_dir + batch):
|
|
410
|
+
os.makedirs(processing_dir + batch)
|
|
411
|
+
os.makedirs(processing_dir + batch + '/Models/')
|
|
412
|
+
if (binary == False):
|
|
413
|
+
fileio.save_pd(resp_batch,
|
|
414
|
+
processing_dir + batch + '/' +
|
|
415
|
+
resp + '.txt')
|
|
416
|
+
else:
|
|
417
|
+
resp_batch.to_pickle(processing_dir + batch + '/' +
|
|
418
|
+
resp + '.pkl', protocol=PICKLE_PROTOCOL)
|
|
419
|
+
|
|
420
|
+
# splits response and test responsefile into batches
|
|
421
|
+
else:
|
|
422
|
+
dummy, testrespfile_extension = os.path.splitext(testrespfile_path)
|
|
423
|
+
if (binary and testrespfile_extension != '.pkl'):
|
|
424
|
+
raise ValueError("""If binary is True the file format for the
|
|
425
|
+
testrespfile file must be .pkl""")
|
|
426
|
+
elif (binary == False and testrespfile_extension != '.txt'):
|
|
427
|
+
raise ValueError("""If binary is False the file format for the
|
|
428
|
+
testrespfile file must be .txt""")
|
|
429
|
+
|
|
430
|
+
if (binary == False):
|
|
431
|
+
respfile = fileio.load_ascii(respfile_path)
|
|
432
|
+
testrespfile = fileio.load_ascii(testrespfile_path)
|
|
433
|
+
else:
|
|
434
|
+
respfile = pd.read_pickle(respfile_path)
|
|
435
|
+
testrespfile = pd.read_pickle(testrespfile_path)
|
|
436
|
+
|
|
437
|
+
respfile = pd.DataFrame(respfile)
|
|
438
|
+
testrespfile = pd.DataFrame(testrespfile)
|
|
439
|
+
|
|
440
|
+
numsub = respfile.shape[1]
|
|
441
|
+
batch_vec = np.arange(0, numsub,
|
|
442
|
+
batch_size)
|
|
443
|
+
batch_vec = np.append(batch_vec,
|
|
444
|
+
numsub)
|
|
445
|
+
for n in range(0, (len(batch_vec) - 1)):
|
|
446
|
+
resp_batch = respfile.iloc[:, (batch_vec[n]): batch_vec[n + 1]]
|
|
447
|
+
testresp_batch = testrespfile.iloc[:, (batch_vec[n]): batch_vec[n +
|
|
448
|
+
1]]
|
|
449
|
+
os.chdir(processing_dir)
|
|
450
|
+
resp = str('resp_batch_' + str(n+1))
|
|
451
|
+
testresp = str('testresp_batch_' + str(n+1))
|
|
452
|
+
batch = str('batch_' + str(n+1))
|
|
453
|
+
if not os.path.exists(processing_dir + batch):
|
|
454
|
+
os.makedirs(processing_dir + batch)
|
|
455
|
+
os.makedirs(processing_dir + batch + '/Models/')
|
|
456
|
+
if (binary == False):
|
|
457
|
+
fileio.save_pd(resp_batch,
|
|
458
|
+
processing_dir + batch + '/' +
|
|
459
|
+
resp + '.txt')
|
|
460
|
+
fileio.save_pd(testresp_batch,
|
|
461
|
+
processing_dir + batch + '/' + testresp +
|
|
462
|
+
'.txt')
|
|
463
|
+
else:
|
|
464
|
+
resp_batch.to_pickle(processing_dir + batch + '/' +
|
|
465
|
+
resp + '.pkl', protocol=PICKLE_PROTOCOL)
|
|
466
|
+
testresp_batch.to_pickle(processing_dir + batch + '/' +
|
|
467
|
+
testresp + '.pkl',
|
|
468
|
+
protocol=PICKLE_PROTOCOL)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def collect_nm(processing_dir,
|
|
472
|
+
job_name,
|
|
473
|
+
func='estimate',
|
|
474
|
+
collect=False,
|
|
475
|
+
binary=False,
|
|
476
|
+
batch_size=None,
|
|
477
|
+
outputsuffix='estimate'):
|
|
478
|
+
'''Function to checks and collects all batches.
|
|
479
|
+
|
|
480
|
+
Basic usage::
|
|
481
|
+
|
|
482
|
+
collect_nm(processing_dir, job_name)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
:param processing_dir: Full path to the processing directory
|
|
486
|
+
:param collect: If True data is checked for failed batches and collected; if False data is just checked
|
|
487
|
+
:param binary: Results in pkl format
|
|
488
|
+
|
|
489
|
+
:outputs: Text or pkl files containing all results accross all batches the combined output (written to disk).
|
|
490
|
+
|
|
491
|
+
:returns 0: if batches fail
|
|
492
|
+
:returns 1: if bathches complete successfully
|
|
493
|
+
|
|
494
|
+
written by (primarily) T Wolfers, (adapted) SM Kia, (adapted) S Rutherford.
|
|
495
|
+
'''
|
|
496
|
+
|
|
497
|
+
outputsuffix = "_" + outputsuffix.replace("_", "")
|
|
498
|
+
|
|
499
|
+
if binary:
|
|
500
|
+
file_extentions = '.pkl'
|
|
501
|
+
else:
|
|
502
|
+
file_extentions = '.txt'
|
|
503
|
+
|
|
504
|
+
# detect number of subjects, batches, hyperparameters and CV
|
|
505
|
+
batches = glob.glob(processing_dir + 'batch_*/')
|
|
506
|
+
|
|
507
|
+
count = 0
|
|
508
|
+
batch_fail = []
|
|
509
|
+
|
|
510
|
+
if (func != 'fit' and func != 'extend' and func != 'merge' and func != 'tune'):
|
|
511
|
+
# TODO: Collect_nm only depends on yhat, thus does not work when no
|
|
512
|
+
# prediction is made (when test cov is not specified).
|
|
513
|
+
files = glob.glob(processing_dir + 'batch_*/' + 'yhat' + outputsuffix
|
|
514
|
+
+ file_extentions)
|
|
515
|
+
if len(files) > 0:
|
|
516
|
+
file_example = fileio.load(files[0])
|
|
517
|
+
else:
|
|
518
|
+
raise ValueError(f"Missing output files (yhats at: {processing_dir + 'batch_*/' + 'yhat' + outputsuffix + file_extentions}")
|
|
519
|
+
|
|
520
|
+
numsubjects = file_example.shape[0]
|
|
521
|
+
try:
|
|
522
|
+
# doesn't exist if size=1, and txt file
|
|
523
|
+
batch_size = file_example.shape[1]
|
|
524
|
+
except:
|
|
525
|
+
batch_size = 1
|
|
526
|
+
|
|
527
|
+
# artificially creates files for batches that were not executed
|
|
528
|
+
batch_dirs = glob.glob(processing_dir + 'batch_*/')
|
|
529
|
+
batch_dirs = fileio.sort_nicely(batch_dirs)
|
|
530
|
+
for batch in batch_dirs:
|
|
531
|
+
filepath = glob.glob(batch + 'yhat' + outputsuffix + '*')
|
|
532
|
+
if filepath == []:
|
|
533
|
+
count = count+1
|
|
534
|
+
batch1 = glob.glob(batch + '/' + job_name + '*.sh')
|
|
535
|
+
print(batch1)
|
|
536
|
+
batch_fail.append(batch1)
|
|
537
|
+
if collect is True:
|
|
538
|
+
pRho = np.ones(batch_size)
|
|
539
|
+
pRho = pRho.transpose()
|
|
540
|
+
pRho = pd.Series(pRho)
|
|
541
|
+
fileio.save(pRho, batch + 'pRho' + outputsuffix +
|
|
542
|
+
file_extentions)
|
|
543
|
+
|
|
544
|
+
Rho = np.zeros(batch_size)
|
|
545
|
+
Rho = Rho.transpose()
|
|
546
|
+
Rho = pd.Series(Rho)
|
|
547
|
+
fileio.save(Rho, batch + 'Rho' + outputsuffix +
|
|
548
|
+
file_extentions)
|
|
549
|
+
|
|
550
|
+
rmse = np.zeros(batch_size)
|
|
551
|
+
rmse = rmse.transpose()
|
|
552
|
+
rmse = pd.Series(rmse)
|
|
553
|
+
fileio.save(rmse, batch + 'RMSE' + outputsuffix +
|
|
554
|
+
file_extentions)
|
|
555
|
+
|
|
556
|
+
smse = np.zeros(batch_size)
|
|
557
|
+
smse = smse.transpose()
|
|
558
|
+
smse = pd.Series(smse)
|
|
559
|
+
fileio.save(smse, batch + 'SMSE' + outputsuffix +
|
|
560
|
+
file_extentions)
|
|
561
|
+
|
|
562
|
+
expv = np.zeros(batch_size)
|
|
563
|
+
expv = expv.transpose()
|
|
564
|
+
expv = pd.Series(expv)
|
|
565
|
+
fileio.save(expv, batch + 'EXPV' + outputsuffix +
|
|
566
|
+
file_extentions)
|
|
567
|
+
|
|
568
|
+
msll = np.zeros(batch_size)
|
|
569
|
+
msll = msll.transpose()
|
|
570
|
+
msll = pd.Series(msll)
|
|
571
|
+
fileio.save(msll, batch + 'MSLL' + outputsuffix +
|
|
572
|
+
file_extentions)
|
|
573
|
+
|
|
574
|
+
yhat = np.zeros([numsubjects, batch_size])
|
|
575
|
+
yhat = pd.DataFrame(yhat)
|
|
576
|
+
fileio.save(yhat, batch + 'yhat' + outputsuffix +
|
|
577
|
+
file_extentions)
|
|
578
|
+
|
|
579
|
+
ys2 = np.zeros([numsubjects, batch_size])
|
|
580
|
+
ys2 = pd.DataFrame(ys2)
|
|
581
|
+
fileio.save(ys2, batch + 'ys2' + outputsuffix +
|
|
582
|
+
file_extentions)
|
|
583
|
+
|
|
584
|
+
Z = np.zeros([numsubjects, batch_size])
|
|
585
|
+
Z = pd.DataFrame(Z)
|
|
586
|
+
fileio.save(Z, batch + 'Z' + outputsuffix +
|
|
587
|
+
file_extentions)
|
|
588
|
+
|
|
589
|
+
nll = np.zeros(batch_size)
|
|
590
|
+
nll = nll.transpose()
|
|
591
|
+
nll = pd.Series(nll)
|
|
592
|
+
fileio.save(nll, batch + 'NLL' + outputsuffix +
|
|
593
|
+
file_extentions)
|
|
594
|
+
|
|
595
|
+
bic = np.zeros(batch_size)
|
|
596
|
+
bic = bic.transpose()
|
|
597
|
+
bic = pd.Series(bic)
|
|
598
|
+
fileio.save(bic, batch + 'BIC' + outputsuffix +
|
|
599
|
+
file_extentions)
|
|
600
|
+
|
|
601
|
+
if not os.path.isdir(batch + 'Models'):
|
|
602
|
+
os.mkdir('Models')
|
|
603
|
+
|
|
604
|
+
else: # if more than 10% of yhat is nan then it is a failed batch
|
|
605
|
+
yhat = fileio.load(filepath[0])
|
|
606
|
+
if np.count_nonzero(~np.isnan(yhat))/(np.prod(yhat.shape)) < 0.9:
|
|
607
|
+
count = count+1
|
|
608
|
+
batch1 = glob.glob(batch + '/' + job_name + '*.sh')
|
|
609
|
+
print('More than 10% nans in ' + batch1[0])
|
|
610
|
+
batch_fail.append(batch1)
|
|
611
|
+
|
|
612
|
+
else:
|
|
613
|
+
batch_dirs = glob.glob(processing_dir + 'batch_*/')
|
|
614
|
+
batch_dirs = fileio.sort_nicely(batch_dirs)
|
|
615
|
+
for batch in batch_dirs:
|
|
616
|
+
filepath = glob.glob(batch + 'Models/' + 'NM_' + '*' + outputsuffix
|
|
617
|
+
+ '*')
|
|
618
|
+
if len(filepath) < batch_size:
|
|
619
|
+
count = count+1
|
|
620
|
+
batch1 = glob.glob(batch + '/' + job_name + '*.sh')
|
|
621
|
+
print(batch1)
|
|
622
|
+
batch_fail.append(batch1)
|
|
623
|
+
|
|
624
|
+
# combines all output files across batches
|
|
625
|
+
if collect is True:
|
|
626
|
+
pRho_filenames = glob.glob(processing_dir + 'batch_*/' + 'pRho' +
|
|
627
|
+
outputsuffix + '*')
|
|
628
|
+
if pRho_filenames:
|
|
629
|
+
pRho_filenames = fileio.sort_nicely(pRho_filenames)
|
|
630
|
+
pRho_dfs = []
|
|
631
|
+
for pRho_filename in pRho_filenames:
|
|
632
|
+
if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
|
|
633
|
+
# from 0d (scalar) to 1d-array
|
|
634
|
+
pRho_dfs.append(pd.DataFrame(
|
|
635
|
+
fileio.load(pRho_filename)[np.newaxis,]))
|
|
636
|
+
else:
|
|
637
|
+
pRho_dfs.append(pd.DataFrame(fileio.load(pRho_filename)))
|
|
638
|
+
pRho_dfs = pd.concat(pRho_dfs, ignore_index=True, axis=0)
|
|
639
|
+
fileio.save(pRho_dfs, processing_dir + 'pRho' + outputsuffix +
|
|
640
|
+
file_extentions)
|
|
641
|
+
del pRho_dfs
|
|
642
|
+
|
|
643
|
+
Rho_filenames = glob.glob(processing_dir + 'batch_*/' + 'Rho' +
|
|
644
|
+
outputsuffix + '*')
|
|
645
|
+
if Rho_filenames:
|
|
646
|
+
Rho_filenames = fileio.sort_nicely(Rho_filenames)
|
|
647
|
+
Rho_dfs = []
|
|
648
|
+
for Rho_filename in Rho_filenames:
|
|
649
|
+
if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
|
|
650
|
+
# from 0d (scalar) to 1d-array
|
|
651
|
+
Rho_dfs.append(pd.DataFrame(
|
|
652
|
+
fileio.load(Rho_filename)[np.newaxis,]))
|
|
653
|
+
else:
|
|
654
|
+
Rho_dfs.append(pd.DataFrame(fileio.load(Rho_filename)))
|
|
655
|
+
Rho_dfs = pd.concat(Rho_dfs, ignore_index=True, axis=0)
|
|
656
|
+
fileio.save(Rho_dfs, processing_dir + 'Rho' + outputsuffix +
|
|
657
|
+
file_extentions)
|
|
658
|
+
del Rho_dfs
|
|
659
|
+
|
|
660
|
+
Z_filenames = glob.glob(processing_dir + 'batch_*/' + 'Z' +
|
|
661
|
+
outputsuffix + '*')
|
|
662
|
+
if Z_filenames:
|
|
663
|
+
Z_filenames = fileio.sort_nicely(Z_filenames)
|
|
664
|
+
Z_dfs = []
|
|
665
|
+
for Z_filename in Z_filenames:
|
|
666
|
+
Z_dfs.append(pd.DataFrame(fileio.load(Z_filename)))
|
|
667
|
+
Z_dfs = pd.concat(Z_dfs, ignore_index=True, axis=1)
|
|
668
|
+
fileio.save(Z_dfs, processing_dir + 'Z' + outputsuffix +
|
|
669
|
+
file_extentions)
|
|
670
|
+
del Z_dfs
|
|
671
|
+
|
|
672
|
+
yhat_filenames = glob.glob(processing_dir + 'batch_*/' + 'yhat' +
|
|
673
|
+
outputsuffix + '*')
|
|
674
|
+
if yhat_filenames:
|
|
675
|
+
yhat_filenames = fileio.sort_nicely(yhat_filenames)
|
|
676
|
+
yhat_dfs = []
|
|
677
|
+
for yhat_filename in yhat_filenames:
|
|
678
|
+
yhat_dfs.append(pd.DataFrame(fileio.load(yhat_filename)))
|
|
679
|
+
yhat_dfs = pd.concat(yhat_dfs, ignore_index=True, axis=1)
|
|
680
|
+
fileio.save(yhat_dfs, processing_dir + 'yhat' + outputsuffix +
|
|
681
|
+
file_extentions)
|
|
682
|
+
del yhat_dfs
|
|
683
|
+
|
|
684
|
+
ys2_filenames = glob.glob(processing_dir + 'batch_*/' + 'ys2' +
|
|
685
|
+
outputsuffix + '*')
|
|
686
|
+
if ys2_filenames:
|
|
687
|
+
ys2_filenames = fileio.sort_nicely(ys2_filenames)
|
|
688
|
+
ys2_dfs = []
|
|
689
|
+
for ys2_filename in ys2_filenames:
|
|
690
|
+
ys2_dfs.append(pd.DataFrame(fileio.load(ys2_filename)))
|
|
691
|
+
ys2_dfs = pd.concat(ys2_dfs, ignore_index=True, axis=1)
|
|
692
|
+
fileio.save(ys2_dfs, processing_dir + 'ys2' + outputsuffix +
|
|
693
|
+
file_extentions)
|
|
694
|
+
del ys2_dfs
|
|
695
|
+
|
|
696
|
+
rmse_filenames = glob.glob(processing_dir + 'batch_*/' + 'RMSE' +
|
|
697
|
+
outputsuffix + '*')
|
|
698
|
+
if rmse_filenames:
|
|
699
|
+
rmse_filenames = fileio.sort_nicely(rmse_filenames)
|
|
700
|
+
rmse_dfs = []
|
|
701
|
+
for rmse_filename in rmse_filenames:
|
|
702
|
+
if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
|
|
703
|
+
# from 0d (scalar) to 1d-array
|
|
704
|
+
rmse_dfs.append(pd.DataFrame(
|
|
705
|
+
fileio.load(rmse_filename)[np.newaxis,]))
|
|
706
|
+
else:
|
|
707
|
+
rmse_dfs.append(pd.DataFrame(fileio.load(rmse_filename)))
|
|
708
|
+
rmse_dfs = pd.concat(rmse_dfs, ignore_index=True, axis=0)
|
|
709
|
+
fileio.save(rmse_dfs, processing_dir + 'RMSE' + outputsuffix +
|
|
710
|
+
file_extentions)
|
|
711
|
+
del rmse_dfs
|
|
712
|
+
|
|
713
|
+
smse_filenames = glob.glob(processing_dir + 'batch_*/' + 'SMSE' +
|
|
714
|
+
outputsuffix + '*')
|
|
715
|
+
if smse_filenames:
|
|
716
|
+
smse_filenames = fileio.sort_nicely(smse_filenames)
|
|
717
|
+
smse_dfs = []
|
|
718
|
+
for smse_filename in smse_filenames:
|
|
719
|
+
if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
|
|
720
|
+
# from 0d (scalar) to 1d-array
|
|
721
|
+
smse_dfs.append(pd.DataFrame(
|
|
722
|
+
fileio.load(smse_filename)[np.newaxis,]))
|
|
723
|
+
else:
|
|
724
|
+
smse_dfs.append(pd.DataFrame(fileio.load(smse_filename)))
|
|
725
|
+
smse_dfs = pd.concat(smse_dfs, ignore_index=True, axis=0)
|
|
726
|
+
fileio.save(smse_dfs, processing_dir + 'SMSE' + outputsuffix +
|
|
727
|
+
file_extentions)
|
|
728
|
+
del smse_dfs
|
|
729
|
+
|
|
730
|
+
expv_filenames = glob.glob(processing_dir + 'batch_*/' + 'EXPV' +
|
|
731
|
+
outputsuffix + '*')
|
|
732
|
+
if expv_filenames:
|
|
733
|
+
expv_filenames = fileio.sort_nicely(expv_filenames)
|
|
734
|
+
expv_dfs = []
|
|
735
|
+
for expv_filename in expv_filenames:
|
|
736
|
+
if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
|
|
737
|
+
# from 0d (scalar) to 1d-array
|
|
738
|
+
expv_dfs.append(pd.DataFrame(
|
|
739
|
+
fileio.load(expv_filename)[np.newaxis,]))
|
|
740
|
+
else:
|
|
741
|
+
expv_dfs.append(pd.DataFrame(fileio.load(expv_filename)))
|
|
742
|
+
expv_dfs = pd.concat(expv_dfs, ignore_index=True, axis=0)
|
|
743
|
+
fileio.save(expv_dfs, processing_dir + 'EXPV' + outputsuffix +
|
|
744
|
+
file_extentions)
|
|
745
|
+
del expv_dfs
|
|
746
|
+
|
|
747
|
+
msll_filenames = glob.glob(processing_dir + 'batch_*/' + 'MSLL' +
|
|
748
|
+
outputsuffix + '*')
|
|
749
|
+
if msll_filenames:
|
|
750
|
+
msll_filenames = fileio.sort_nicely(msll_filenames)
|
|
751
|
+
msll_dfs = []
|
|
752
|
+
for msll_filename in msll_filenames:
|
|
753
|
+
if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
|
|
754
|
+
# from 0d (scalar) to 1d-array
|
|
755
|
+
msll_dfs.append(pd.DataFrame(
|
|
756
|
+
fileio.load(msll_filename)[np.newaxis,]))
|
|
757
|
+
else:
|
|
758
|
+
msll_dfs.append(pd.DataFrame(fileio.load(msll_filename)))
|
|
759
|
+
msll_dfs = pd.concat(msll_dfs, ignore_index=True, axis=0)
|
|
760
|
+
fileio.save(msll_dfs, processing_dir + 'MSLL' + outputsuffix +
|
|
761
|
+
file_extentions)
|
|
762
|
+
del msll_dfs
|
|
763
|
+
|
|
764
|
+
nll_filenames = glob.glob(processing_dir + 'batch_*/' + 'NLL' +
|
|
765
|
+
outputsuffix + '*')
|
|
766
|
+
if nll_filenames:
|
|
767
|
+
nll_filenames = fileio.sort_nicely(nll_filenames)
|
|
768
|
+
nll_dfs = []
|
|
769
|
+
for nll_filename in nll_filenames:
|
|
770
|
+
if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
|
|
771
|
+
# from 0d (scalar) to 1d-array
|
|
772
|
+
nll_dfs.append(pd.DataFrame(
|
|
773
|
+
fileio.load(nll_filename)[np.newaxis,]))
|
|
774
|
+
else:
|
|
775
|
+
nll_dfs.append(pd.DataFrame(fileio.load(nll_filename)))
|
|
776
|
+
nll_dfs = pd.concat(nll_dfs, ignore_index=True, axis=0)
|
|
777
|
+
fileio.save(nll_dfs, processing_dir + 'NLL' + outputsuffix +
|
|
778
|
+
file_extentions)
|
|
779
|
+
del nll_dfs
|
|
780
|
+
|
|
781
|
+
bic_filenames = glob.glob(processing_dir + 'batch_*/' + 'BIC' +
|
|
782
|
+
outputsuffix + '*')
|
|
783
|
+
if bic_filenames:
|
|
784
|
+
bic_filenames = fileio.sort_nicely(bic_filenames)
|
|
785
|
+
bic_dfs = []
|
|
786
|
+
for bic_filename in bic_filenames:
|
|
787
|
+
if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
|
|
788
|
+
# from 0d (scalar) to 1d-array
|
|
789
|
+
bic_dfs.append(pd.DataFrame(
|
|
790
|
+
fileio.load(bic_filename)[np.newaxis,]))
|
|
791
|
+
else:
|
|
792
|
+
bic_dfs.append(pd.DataFrame(fileio.load(bic_filename)))
|
|
793
|
+
bic_dfs = pd.concat(bic_dfs, ignore_index=True, axis=0)
|
|
794
|
+
fileio.save(bic_dfs, processing_dir + 'BIC' + outputsuffix +
|
|
795
|
+
file_extentions)
|
|
796
|
+
del bic_dfs
|
|
797
|
+
|
|
798
|
+
if (func != 'predict' and func != 'extend' and func != 'merge' and func != 'tune'):
|
|
799
|
+
if not os.path.isdir(processing_dir + 'Models') and \
|
|
800
|
+
os.path.exists(os.path.join(batches[0], 'Models')):
|
|
801
|
+
os.mkdir(processing_dir + 'Models')
|
|
802
|
+
|
|
803
|
+
meta_filenames = glob.glob(processing_dir + 'batch_*/Models/' +
|
|
804
|
+
'meta_data.md')
|
|
805
|
+
mY = []
|
|
806
|
+
sY = []
|
|
807
|
+
X_scalers = []
|
|
808
|
+
Y_scalers = []
|
|
809
|
+
if meta_filenames:
|
|
810
|
+
meta_filenames = fileio.sort_nicely(meta_filenames)
|
|
811
|
+
with open(meta_filenames[0], 'rb') as file:
|
|
812
|
+
meta_data = pickle.load(file)
|
|
813
|
+
|
|
814
|
+
for meta_filename in meta_filenames:
|
|
815
|
+
with open(meta_filename, 'rb') as file:
|
|
816
|
+
meta_data = pickle.load(file)
|
|
817
|
+
mY.append(meta_data['mean_resp'])
|
|
818
|
+
sY.append(meta_data['std_resp'])
|
|
819
|
+
if meta_data['inscaler'] in ['standardize', 'minmax',
|
|
820
|
+
'robminmax']:
|
|
821
|
+
X_scalers.append(meta_data['scaler_cov'])
|
|
822
|
+
if meta_data['outscaler'] in ['standardize', 'minmax',
|
|
823
|
+
'robminmax']:
|
|
824
|
+
Y_scalers.append(meta_data['scaler_resp'])
|
|
825
|
+
meta_data['mean_resp'] = np.squeeze(np.column_stack(mY))
|
|
826
|
+
meta_data['std_resp'] = np.squeeze(np.column_stack(sY))
|
|
827
|
+
meta_data['scaler_cov'] = X_scalers
|
|
828
|
+
meta_data['scaler_resp'] = Y_scalers
|
|
829
|
+
|
|
830
|
+
with open(os.path.join(processing_dir, 'Models',
|
|
831
|
+
'meta_data.md'), 'wb') as file:
|
|
832
|
+
pickle.dump(meta_data, file, protocol=PICKLE_PROTOCOL)
|
|
833
|
+
|
|
834
|
+
batch_dirs = glob.glob(processing_dir + 'batch_*/')
|
|
835
|
+
if batch_dirs:
|
|
836
|
+
batch_dirs = fileio.sort_nicely(batch_dirs)
|
|
837
|
+
for b, batch_dir in enumerate(batch_dirs):
|
|
838
|
+
src_files = glob.glob(batch_dir + 'Models/NM*' +
|
|
839
|
+
outputsuffix + '.pkl')
|
|
840
|
+
if src_files:
|
|
841
|
+
src_files = fileio.sort_nicely(src_files)
|
|
842
|
+
for f, full_file_name in enumerate(src_files):
|
|
843
|
+
if os.path.isfile(full_file_name):
|
|
844
|
+
file_name = full_file_name.split('/')[-1]
|
|
845
|
+
n = file_name.split('_')
|
|
846
|
+
n[-2] = str(b * batch_size + f)
|
|
847
|
+
n = '_'.join(n)
|
|
848
|
+
shutil.copy(full_file_name, processing_dir +
|
|
849
|
+
'Models/' + n)
|
|
850
|
+
elif func == 'fit':
|
|
851
|
+
count = count+1
|
|
852
|
+
batch1 = glob.glob(batch_dir + '/' + job_name + '*.sh')
|
|
853
|
+
print('Failed batch: ' + batch1[0])
|
|
854
|
+
batch_fail.append(batch1)
|
|
855
|
+
|
|
856
|
+
# list batches that were not executed
|
|
857
|
+
print('Number of batches that failed:' + str(count))
|
|
858
|
+
batch_fail_df = pd.DataFrame(batch_fail)
|
|
859
|
+
if file_extentions == '.txt':
|
|
860
|
+
fileio.save_pd(batch_fail_df, processing_dir + 'failed_batches' +
|
|
861
|
+
file_extentions)
|
|
862
|
+
else:
|
|
863
|
+
fileio.save(batch_fail_df, processing_dir +
|
|
864
|
+
'failed_batches' +
|
|
865
|
+
file_extentions)
|
|
866
|
+
|
|
867
|
+
if not batch_fail:
|
|
868
|
+
return True
|
|
869
|
+
else:
|
|
870
|
+
return False
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def delete_nm(processing_dir,
|
|
874
|
+
binary=False):
|
|
875
|
+
'''This function deletes all processing for normative modelling and just keeps the combined output.
|
|
876
|
+
|
|
877
|
+
Basic usage::
|
|
878
|
+
|
|
879
|
+
collect_nm(processing_dir)
|
|
880
|
+
|
|
881
|
+
:param processing_dir: Full path to the processing directory.
|
|
882
|
+
:param binary: Results in pkl format.
|
|
883
|
+
|
|
884
|
+
written by (primarily) T Wolfers, (adapted) SM Kia, (adapted) S Rutherford.
|
|
885
|
+
'''
|
|
886
|
+
|
|
887
|
+
if binary:
|
|
888
|
+
file_extentions = '.pkl'
|
|
889
|
+
else:
|
|
890
|
+
file_extentions = '.txt'
|
|
891
|
+
for file in glob.glob(processing_dir + 'batch_*/'):
|
|
892
|
+
shutil.rmtree(file)
|
|
893
|
+
if os.path.exists(processing_dir + 'failed_batches' + file_extentions):
|
|
894
|
+
os.remove(processing_dir + 'failed_batches' + file_extentions)
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
# all routines below are envronment dependent and require adaptation in novel
|
|
898
|
+
# environments -> copy those routines and adapt them in accrodance with your
|
|
899
|
+
# environment
|
|
900
|
+
|
|
901
|
+
def bashwrap_nm(processing_dir,
|
|
902
|
+
python_path,
|
|
903
|
+
normative_path,
|
|
904
|
+
job_name,
|
|
905
|
+
covfile_path,
|
|
906
|
+
respfile_path,
|
|
907
|
+
func='estimate',
|
|
908
|
+
**kwargs):
|
|
909
|
+
''' This function wraps normative modelling into a bash script to run it
|
|
910
|
+
on a torque cluster system.
|
|
911
|
+
|
|
912
|
+
Basic usage::
|
|
913
|
+
|
|
914
|
+
bashwrap_nm(processing_dir, python_path, normative_path, job_name, covfile_path, respfile_path)
|
|
915
|
+
|
|
916
|
+
:param processing_dir: Full path to the processing dir
|
|
917
|
+
:param python_path: Full path to the python distribution
|
|
918
|
+
:param normative_path: Full path to the normative.py
|
|
919
|
+
:param job_name: Name for the bash script that is the output of this function
|
|
920
|
+
:param covfile_path: Full path to a .txt file that contains all covariates (subjects x covariates) for the responsefile
|
|
921
|
+
:param respfile_path: Full path to a .txt that contains all features (subjects x features)
|
|
922
|
+
:param cv_folds: Number of cross validations
|
|
923
|
+
:param testcovfile_path: Full path to a .txt file that contains all covariates (subjects x covariates) for the testresponse file
|
|
924
|
+
:param testrespfile_path: Full path to a .txt file that contains all test features
|
|
925
|
+
:param alg: which algorithm to use
|
|
926
|
+
:param configparam: configuration parameters for this algorithm
|
|
927
|
+
|
|
928
|
+
:outputs: A bash.sh file containing the commands for normative modelling saved to the processing directory (written to disk).
|
|
929
|
+
|
|
930
|
+
written by (primarily) T Wolfers, (adapted) S Rutherford.
|
|
931
|
+
'''
|
|
932
|
+
|
|
933
|
+
# here we use pop not get to remove the arguments as they used
|
|
934
|
+
cv_folds = kwargs.pop('cv_folds', None)
|
|
935
|
+
testcovfile_path = kwargs.pop('testcovfile_path', None)
|
|
936
|
+
testrespfile_path = kwargs.pop('testrespfile_path', None)
|
|
937
|
+
alg = kwargs.pop('alg', None)
|
|
938
|
+
configparam = kwargs.pop('configparam', None)
|
|
939
|
+
# change to processing dir
|
|
940
|
+
os.chdir(processing_dir)
|
|
941
|
+
output_changedir = ['cd ' + processing_dir + '\n']
|
|
942
|
+
|
|
943
|
+
bash_lines = '#!/bin/bash\n'
|
|
944
|
+
bash_cores = 'export OMP_NUM_THREADS=1\n'
|
|
945
|
+
bash_environment = [bash_lines + bash_cores]
|
|
946
|
+
|
|
947
|
+
# creates call of function for normative modelling
|
|
948
|
+
if (testrespfile_path is not None) and (testcovfile_path is not None):
|
|
949
|
+
job_call = [python_path + ' ' + normative_path + ' -c ' +
|
|
950
|
+
covfile_path + ' -t ' + testcovfile_path + ' -r ' +
|
|
951
|
+
testrespfile_path + ' -f ' + func]
|
|
952
|
+
elif (testrespfile_path is None) and (testcovfile_path is not None):
|
|
953
|
+
job_call = [python_path + ' ' + normative_path + ' -c ' +
|
|
954
|
+
covfile_path + ' -t ' + testcovfile_path + ' -f ' + func]
|
|
955
|
+
elif cv_folds is not None:
|
|
956
|
+
job_call = [python_path + ' ' + normative_path + ' -c ' +
|
|
957
|
+
covfile_path + ' -k ' + str(cv_folds) + ' -f ' + func]
|
|
958
|
+
elif func != 'estimate':
|
|
959
|
+
job_call = [python_path + ' ' + normative_path + ' -c ' +
|
|
960
|
+
covfile_path + ' -f ' + func]
|
|
961
|
+
else:
|
|
962
|
+
raise ValueError("""For 'estimate' function either testrespfile_path or cvfold
|
|
963
|
+
must be specified.""")
|
|
964
|
+
|
|
965
|
+
# add algorithm-specific parameters
|
|
966
|
+
if alg is not None:
|
|
967
|
+
job_call = [job_call[0] + ' -a ' + alg]
|
|
968
|
+
if configparam is not None:
|
|
969
|
+
job_call = [job_call[0] + ' -x ' + str(configparam)]
|
|
970
|
+
|
|
971
|
+
# add standardization flag if it is false
|
|
972
|
+
# if not standardize:
|
|
973
|
+
# job_call = [job_call[0] + ' -s']
|
|
974
|
+
|
|
975
|
+
# add responses file
|
|
976
|
+
job_call = [job_call[0] + ' ' + respfile_path]
|
|
977
|
+
|
|
978
|
+
# add in optional arguments.
|
|
979
|
+
for k in kwargs:
|
|
980
|
+
job_call = [job_call[0] + ' ' + k + '=' + str(kwargs[k])]
|
|
981
|
+
|
|
982
|
+
# writes bash file into processing dir
|
|
983
|
+
with open(processing_dir+job_name, 'w') as bash_file:
|
|
984
|
+
bash_file.writelines(bash_environment + output_changedir +
|
|
985
|
+
job_call + ["\n"])
|
|
986
|
+
|
|
987
|
+
# changes permissoins for bash.sh file
|
|
988
|
+
os.chmod(processing_dir + job_name, 0o770)
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
def qsub_nm(job_path,
|
|
992
|
+
log_path,
|
|
993
|
+
memory,
|
|
994
|
+
duration,
|
|
995
|
+
cores):
|
|
996
|
+
'''This function submits a job.sh scipt to the torque custer using the qsub command.
|
|
997
|
+
|
|
998
|
+
Basic usage::
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
qsub_nm(job_path, log_path, memory, duration)
|
|
1002
|
+
|
|
1003
|
+
:param job_path: Full path to the job.sh file.
|
|
1004
|
+
:param memory: Memory requirements written as string for example 4gb or 500mb.
|
|
1005
|
+
:param duation: The approximate duration of the job, a string with HH:MM:SS for example 01:01:01.
|
|
1006
|
+
|
|
1007
|
+
:outputs: Submission of the job to the (torque) cluster.
|
|
1008
|
+
|
|
1009
|
+
written by (primarily) T Wolfers, (adapted) SM Kia, (adapted) S Rutherford.
|
|
1010
|
+
'''
|
|
1011
|
+
|
|
1012
|
+
# created qsub command
|
|
1013
|
+
if log_path is None:
|
|
1014
|
+
qsub_call = ['echo ' + job_path + ' | qsub -N ' + job_path + ' -l ' +
|
|
1015
|
+
'nodes=1:ppn='+ cores + ',mem=' + memory + ',walltime=' + duration]
|
|
1016
|
+
else:
|
|
1017
|
+
qsub_call = ['echo ' + job_path + ' | qsub -N ' + job_path +
|
|
1018
|
+
' -l ' + 'nodes=1:ppn='+ cores + ',mem=' + memory + ',walltime=' +
|
|
1019
|
+
duration + ' -o ' + log_path + ' -e ' + log_path]
|
|
1020
|
+
|
|
1021
|
+
# submits job to cluster
|
|
1022
|
+
# call(qsub_call, shell=True)
|
|
1023
|
+
job_id = check_output(qsub_call, shell=True).decode(
|
|
1024
|
+
sys.stdout.encoding).replace("\n", "")
|
|
1025
|
+
|
|
1026
|
+
return job_id
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
def rerun_nm(processing_dir,
|
|
1030
|
+
log_path,
|
|
1031
|
+
memory,
|
|
1032
|
+
duration,
|
|
1033
|
+
cluster_spec,
|
|
1034
|
+
cores,
|
|
1035
|
+
binary=False,
|
|
1036
|
+
interactive=False):
|
|
1037
|
+
'''This function reruns all failed batched in processing_dir after collect_nm has identified the failed batches.
|
|
1038
|
+
Basic usage::
|
|
1039
|
+
|
|
1040
|
+
rerun_nm(processing_dir, log_path, memory, duration)
|
|
1041
|
+
|
|
1042
|
+
:param processing_dir: Full path to the processing directory
|
|
1043
|
+
:param memory: Memory requirements written as string for example 4gb or 500mb.
|
|
1044
|
+
:param duration: The approximate duration of the job, a string with HH:MM:SS for example 01:01:01.
|
|
1045
|
+
|
|
1046
|
+
written by (primarily) T Wolfers, (adapted) SM Kia, (adapted) S Rutherford.
|
|
1047
|
+
'''
|
|
1048
|
+
|
|
1049
|
+
job_ids = []
|
|
1050
|
+
|
|
1051
|
+
if binary:
|
|
1052
|
+
file_extentions = '.pkl'
|
|
1053
|
+
failed_batches = fileio.load(processing_dir +
|
|
1054
|
+
'failed_batches' + file_extentions)
|
|
1055
|
+
shape = failed_batches.shape
|
|
1056
|
+
for n in range(0, shape[0]):
|
|
1057
|
+
jobpath = failed_batches[n, 0]
|
|
1058
|
+
print(jobpath)
|
|
1059
|
+
job_id = qsub_nm(job_path=jobpath,
|
|
1060
|
+
log_path=log_path,
|
|
1061
|
+
memory=memory,
|
|
1062
|
+
duration=duration,
|
|
1063
|
+
cores=cores)
|
|
1064
|
+
job_ids.append(job_id)
|
|
1065
|
+
else:
|
|
1066
|
+
file_extentions = '.txt'
|
|
1067
|
+
failed_batches = fileio.load_pd(processing_dir +
|
|
1068
|
+
'failed_batches' + file_extentions)
|
|
1069
|
+
shape = failed_batches.shape
|
|
1070
|
+
for n in range(0, shape[0]):
|
|
1071
|
+
jobpath = failed_batches.iloc[n, 0]
|
|
1072
|
+
print(jobpath)
|
|
1073
|
+
job_id = qsub_nm(job_path=jobpath,
|
|
1074
|
+
log_path=log_path,
|
|
1075
|
+
memory=memory,
|
|
1076
|
+
duration=duration,
|
|
1077
|
+
cores=cores)
|
|
1078
|
+
job_ids.append(job_id)
|
|
1079
|
+
|
|
1080
|
+
if interactive:
|
|
1081
|
+
check_jobs(job_ids, cluster_spec, delay=60)
|
|
1082
|
+
|
|
1083
|
+
|
|
1084
|
+
# COPY the rotines above here and aadapt those to your cluster
|
|
1085
|
+
# bashwarp_nm; qsub_nm; rerun_nm
|
|
1086
|
+
|
|
1087
|
+
def sbatchwrap_nm(processing_dir,
|
|
1088
|
+
python_path,
|
|
1089
|
+
normative_path,
|
|
1090
|
+
job_name,
|
|
1091
|
+
covfile_path,
|
|
1092
|
+
respfile_path,
|
|
1093
|
+
memory,
|
|
1094
|
+
duration,
|
|
1095
|
+
log_path,
|
|
1096
|
+
func='estimate',
|
|
1097
|
+
**kwargs):
|
|
1098
|
+
'''This function wraps normative modelling into a bash script to run it
|
|
1099
|
+
on a torque cluster system.
|
|
1100
|
+
|
|
1101
|
+
Basic usage::
|
|
1102
|
+
|
|
1103
|
+
sbatchwrap_nm(processing_dir, python_path, normative_path, job_name, covfile_path, respfile_path, memory, duration)
|
|
1104
|
+
|
|
1105
|
+
:param processing_dir: Full path to the processing dir
|
|
1106
|
+
:param python_path: Full path to the python distribution
|
|
1107
|
+
:param normative_path: Full path to the normative.py
|
|
1108
|
+
:param job_name: Name for the bash script that is the output of this function
|
|
1109
|
+
:param covfile_path: Full path to a .txt file that contains all covariates (subjects x covariates) for the responsefile
|
|
1110
|
+
:param respfile_path: Full path to a .txt that contains all features (subjects x features)
|
|
1111
|
+
:param cv_folds: Number of cross validations
|
|
1112
|
+
:param testcovfile_path: Full path to a .txt file that contains all covariates (subjects x covariates) for the testresponse file
|
|
1113
|
+
:param testrespfile_path: Full path to a .txt file that contains all test features
|
|
1114
|
+
:param alg: which algorithm to use
|
|
1115
|
+
:param configparam: configuration parameters for this algorithm
|
|
1116
|
+
|
|
1117
|
+
:outputs: A bash.sh file containing the commands for normative modelling saved to the processing directory (written to disk).
|
|
1118
|
+
|
|
1119
|
+
written by (primarily) T Wolfers, (adapted) S Rutherford
|
|
1120
|
+
'''
|
|
1121
|
+
|
|
1122
|
+
# here we use pop not get to remove the arguments as they used
|
|
1123
|
+
cv_folds = kwargs.pop('cv_folds', None)
|
|
1124
|
+
testcovfile_path = kwargs.pop('testcovfile_path', None)
|
|
1125
|
+
testrespfile_path = kwargs.pop('testrespfile_path', None)
|
|
1126
|
+
alg = kwargs.pop('alg', None)
|
|
1127
|
+
configparam = kwargs.pop('configparam', None)
|
|
1128
|
+
|
|
1129
|
+
# change to processing dir
|
|
1130
|
+
os.chdir(processing_dir)
|
|
1131
|
+
output_changedir = ['cd ' + processing_dir + '\n']
|
|
1132
|
+
|
|
1133
|
+
sbatch_init = '#!/bin/bash\n'
|
|
1134
|
+
sbatch_jobname = '#SBATCH --job-name=' + job_name + '\n'
|
|
1135
|
+
sbatch_nodes = '#SBATCH --nodes=1\n'
|
|
1136
|
+
sbatch_tasks = '#SBATCH --ntasks=1\n'
|
|
1137
|
+
sbatch_time = '#SBATCH --time=' + str(duration) + '\n'
|
|
1138
|
+
sbatch_memory = '#SBATCH --mem-per-cpu=' + str(memory) + '\n'
|
|
1139
|
+
sbatch_log_out = '#SBATCH -o ' + log_path + '%x_%j.out' + '\n'
|
|
1140
|
+
sbatch_log_error = '#SBATCH -e ' + log_path + '%x_%j.err' + '\n'
|
|
1141
|
+
# sbatch_module = 'module purge\n'
|
|
1142
|
+
# sbatch_anaconda = 'module load anaconda3\n'
|
|
1143
|
+
sbatch_exit = 'set -o errexit\n'
|
|
1144
|
+
|
|
1145
|
+
# echo -n "This script is running on "
|
|
1146
|
+
# hostname
|
|
1147
|
+
|
|
1148
|
+
bash_environment = [sbatch_init +
|
|
1149
|
+
sbatch_jobname +
|
|
1150
|
+
sbatch_nodes +
|
|
1151
|
+
sbatch_tasks +
|
|
1152
|
+
sbatch_time +
|
|
1153
|
+
sbatch_memory +
|
|
1154
|
+
sbatch_log_out +
|
|
1155
|
+
sbatch_log_error
|
|
1156
|
+
]
|
|
1157
|
+
|
|
1158
|
+
# creates call of function for normative modelling
|
|
1159
|
+
if (testrespfile_path is not None) and (testcovfile_path is not None):
|
|
1160
|
+
job_call = [python_path + ' ' + normative_path + ' -c ' +
|
|
1161
|
+
covfile_path + ' -t ' + testcovfile_path + ' -r ' +
|
|
1162
|
+
testrespfile_path + ' -f ' + func]
|
|
1163
|
+
elif (testrespfile_path is None) and (testcovfile_path is not None):
|
|
1164
|
+
job_call = [python_path + ' ' + normative_path + ' -c ' +
|
|
1165
|
+
covfile_path + ' -t ' + testcovfile_path + ' -f ' + func]
|
|
1166
|
+
elif cv_folds is not None:
|
|
1167
|
+
job_call = [python_path + ' ' + normative_path + ' -c ' +
|
|
1168
|
+
covfile_path + ' -k ' + str(cv_folds) + ' -f ' + func]
|
|
1169
|
+
elif func != 'estimate':
|
|
1170
|
+
job_call = [python_path + ' ' + normative_path + ' -c ' +
|
|
1171
|
+
covfile_path + ' -f ' + func]
|
|
1172
|
+
else:
|
|
1173
|
+
raise ValueError("""For 'estimate' function either testrespfile_path or cv_folds
|
|
1174
|
+
must be specified.""")
|
|
1175
|
+
|
|
1176
|
+
# add algorithm-specific parameters
|
|
1177
|
+
if alg is not None:
|
|
1178
|
+
job_call = [job_call[0] + ' -a ' + alg]
|
|
1179
|
+
if configparam is not None:
|
|
1180
|
+
job_call = [job_call[0] + ' -x ' + str(configparam)]
|
|
1181
|
+
|
|
1182
|
+
# add standardization flag if it is false
|
|
1183
|
+
# if not standardize:
|
|
1184
|
+
# job_call = [job_call[0] + ' -s']
|
|
1185
|
+
|
|
1186
|
+
# add responses file
|
|
1187
|
+
job_call = [job_call[0] + ' ' + respfile_path]
|
|
1188
|
+
|
|
1189
|
+
# add in optional arguments.
|
|
1190
|
+
for k in kwargs:
|
|
1191
|
+
job_call = [job_call[0] + ' ' + k + '=' + kwargs[k]]
|
|
1192
|
+
|
|
1193
|
+
# writes bash file into processing dir
|
|
1194
|
+
with open(processing_dir+job_name, 'w') as bash_file:
|
|
1195
|
+
bash_file.writelines(bash_environment + output_changedir +
|
|
1196
|
+
job_call + ["\n"] + [sbatch_exit])
|
|
1197
|
+
|
|
1198
|
+
# changes permissoins for bash.sh file
|
|
1199
|
+
os.chmod(processing_dir + job_name, 0o770)
|
|
1200
|
+
|
|
1201
|
+
|
|
1202
|
+
def sbatch_nm(job_path):
|
|
1203
|
+
'''This function submits a job.sh scipt to the torque custer using the qsub
|
|
1204
|
+
command.
|
|
1205
|
+
|
|
1206
|
+
Basic usage::
|
|
1207
|
+
|
|
1208
|
+
sbatch_nm(job_path)
|
|
1209
|
+
|
|
1210
|
+
:param job_path: Full path to the job.sh file
|
|
1211
|
+
|
|
1212
|
+
:outputs: Submission of the job to the slurm cluster.
|
|
1213
|
+
|
|
1214
|
+
written by (primarily) T Wolfers, (adapted) S Rutherford.
|
|
1215
|
+
'''
|
|
1216
|
+
|
|
1217
|
+
# created qsub command
|
|
1218
|
+
sbatch_call = ['sbatch ' + job_path]
|
|
1219
|
+
|
|
1220
|
+
# submits job to cluster
|
|
1221
|
+
job_id = check_output(sbatch_call, shell=True).decode(
|
|
1222
|
+
sys.stdout.encoding).replace("\n", "")
|
|
1223
|
+
|
|
1224
|
+
return job_id
|
|
1225
|
+
|
|
1226
|
+
|
|
1227
|
+
def sbatchrerun_nm(processing_dir,
|
|
1228
|
+
memory,
|
|
1229
|
+
duration,
|
|
1230
|
+
new_memory=False,
|
|
1231
|
+
new_duration=False,
|
|
1232
|
+
binary=False,
|
|
1233
|
+
interactive=False,
|
|
1234
|
+
**kwargs):
|
|
1235
|
+
'''This function reruns all failed batched in processing_dir after collect_nm has identified he failed batches.
|
|
1236
|
+
|
|
1237
|
+
Basic usage::
|
|
1238
|
+
|
|
1239
|
+
rerun_nm(processing_dir, memory, duration)
|
|
1240
|
+
|
|
1241
|
+
:param processing_dir: Full path to the processing directory.
|
|
1242
|
+
:param memory: Memory requirements written as string, for example 4gb or 500mb.
|
|
1243
|
+
:param duration: The approximate duration of the job, a string with HH:MM:SS for example 01:01:01.
|
|
1244
|
+
:param new_memory: If you want to change the memory you have to indicate it here.
|
|
1245
|
+
:param new_duration: If you want to change the duration you have to indicate it here.
|
|
1246
|
+
|
|
1247
|
+
:outputs: Re-runs failed batches.
|
|
1248
|
+
|
|
1249
|
+
written by (primarily) T Wolfers, (adapted) S Rutherford.
|
|
1250
|
+
'''
|
|
1251
|
+
|
|
1252
|
+
# log_path = kwargs.pop('log_path', None)
|
|
1253
|
+
|
|
1254
|
+
job_ids = []
|
|
1255
|
+
|
|
1256
|
+
start_time = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
|
1257
|
+
|
|
1258
|
+
if binary:
|
|
1259
|
+
file_extentions = '.pkl'
|
|
1260
|
+
failed_batches = fileio.load(
|
|
1261
|
+
processing_dir + 'failed_batches' + file_extentions)
|
|
1262
|
+
shape = failed_batches.shape
|
|
1263
|
+
for n in range(0, shape[0]):
|
|
1264
|
+
jobpath = failed_batches[n, 0]
|
|
1265
|
+
print(jobpath)
|
|
1266
|
+
if new_duration != False:
|
|
1267
|
+
with fileinput.FileInput(jobpath, inplace=True) as file:
|
|
1268
|
+
for line in file:
|
|
1269
|
+
print(line.replace(duration, new_duration), end='')
|
|
1270
|
+
if new_memory != False:
|
|
1271
|
+
with fileinput.FileInput(jobpath, inplace=True) as file:
|
|
1272
|
+
for line in file:
|
|
1273
|
+
print(line.replace(memory, new_memory), end='')
|
|
1274
|
+
job_id = sbatch_nm(jobpath)
|
|
1275
|
+
job_ids.append(job_id)
|
|
1276
|
+
|
|
1277
|
+
else:
|
|
1278
|
+
file_extentions = '.txt'
|
|
1279
|
+
failed_batches = fileio.load_pd(
|
|
1280
|
+
processing_dir + 'failed_batches' + file_extentions)
|
|
1281
|
+
shape = failed_batches.shape
|
|
1282
|
+
for n in range(0, shape[0]):
|
|
1283
|
+
jobpath = failed_batches.iloc[n, 0]
|
|
1284
|
+
print(jobpath)
|
|
1285
|
+
if new_duration != False:
|
|
1286
|
+
with fileinput.FileInput(jobpath, inplace=True) as file:
|
|
1287
|
+
for line in file:
|
|
1288
|
+
print(line.replace(duration, new_duration), end='')
|
|
1289
|
+
if new_memory != False:
|
|
1290
|
+
with fileinput.FileInput(jobpath, inplace=True) as file:
|
|
1291
|
+
for line in file:
|
|
1292
|
+
print(line.replace(memory, new_memory), end='')
|
|
1293
|
+
job_id = sbatch_nm(jobpath)
|
|
1294
|
+
job_ids.append(job_id)
|
|
1295
|
+
|
|
1296
|
+
if interactive:
|
|
1297
|
+
check_jobs(job_ids, cluster_spec='slurm',
|
|
1298
|
+
start_time=start_time, delay=60)
|
|
1299
|
+
|
|
1300
|
+
|
|
1301
|
+
def retrieve_jobs(cluster_spec, start_time=None):
|
|
1302
|
+
"""
|
|
1303
|
+
A utility function to retrieve task status from the outputs of qstat.
|
|
1304
|
+
|
|
1305
|
+
:param cluster_spec: type of cluster, either 'torque' or 'slurm'.
|
|
1306
|
+
|
|
1307
|
+
:return: a dictionary of jobs.
|
|
1308
|
+
|
|
1309
|
+
"""
|
|
1310
|
+
|
|
1311
|
+
if cluster_spec == 'torque':
|
|
1312
|
+
|
|
1313
|
+
output = check_output('qstat', shell=True).decode(sys.stdout.encoding)
|
|
1314
|
+
output = output.split('\n')
|
|
1315
|
+
jobs = dict()
|
|
1316
|
+
for line in output[2:-1]:
|
|
1317
|
+
(Job_ID, Job_Name, User, Wall_Time, Status, Queue) = line.split()
|
|
1318
|
+
jobs[Job_ID] = dict()
|
|
1319
|
+
jobs[Job_ID]['name'] = Job_Name
|
|
1320
|
+
jobs[Job_ID]['walltime'] = Wall_Time
|
|
1321
|
+
jobs[Job_ID]['status'] = Status
|
|
1322
|
+
|
|
1323
|
+
elif cluster_spec == 'slurm':
|
|
1324
|
+
|
|
1325
|
+
end_time = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
|
1326
|
+
cmd = ['sacct', '-n', '-X', '--parsable2', '--noheader',
|
|
1327
|
+
'-S', start_time, '-E', end_time, '--format=JobName,State']
|
|
1328
|
+
jobs = run(cmd, capture_output=True, text=True)
|
|
1329
|
+
|
|
1330
|
+
return jobs
|
|
1331
|
+
|
|
1332
|
+
|
|
1333
|
+
def check_job_status(jobs, cluster_spec, start_time=None):
|
|
1334
|
+
"""
|
|
1335
|
+
A utility function to count the tasks with different status.
|
|
1336
|
+
|
|
1337
|
+
:param jobs: List of job ids.
|
|
1338
|
+
:param cluster_spec: type of cluster, either 'torque' or 'slurm'.
|
|
1339
|
+
:return returns the number of taks athat are queued, running, completed etc
|
|
1340
|
+
|
|
1341
|
+
"""
|
|
1342
|
+
running_jobs = retrieve_jobs(cluster_spec, start_time)
|
|
1343
|
+
|
|
1344
|
+
r = 0
|
|
1345
|
+
c = 0
|
|
1346
|
+
q = 0
|
|
1347
|
+
u = 0
|
|
1348
|
+
|
|
1349
|
+
if cluster_spec == 'torque':
|
|
1350
|
+
|
|
1351
|
+
for job in jobs:
|
|
1352
|
+
try:
|
|
1353
|
+
if running_jobs[job]['status'] == 'C':
|
|
1354
|
+
c += 1
|
|
1355
|
+
elif running_jobs[job]['status'] == 'Q':
|
|
1356
|
+
q += 1
|
|
1357
|
+
elif running_jobs[job]['status'] == 'R':
|
|
1358
|
+
r += 1
|
|
1359
|
+
else:
|
|
1360
|
+
u += 1
|
|
1361
|
+
except: # probably meanwhile the job is finished.
|
|
1362
|
+
c += 1
|
|
1363
|
+
continue
|
|
1364
|
+
|
|
1365
|
+
print('Total Jobs:%d, Queued:%d, Running:%d, Completed:%d, Unknown:%d'
|
|
1366
|
+
% (len(jobs), q, r, c, u))
|
|
1367
|
+
|
|
1368
|
+
elif cluster_spec == 'slurm':
|
|
1369
|
+
|
|
1370
|
+
lines = running_jobs.stdout.strip().split('\n')
|
|
1371
|
+
|
|
1372
|
+
for line in lines:
|
|
1373
|
+
if line:
|
|
1374
|
+
parts = line.split('|')
|
|
1375
|
+
if len(parts) >= 2:
|
|
1376
|
+
job_name, state = parts[0], parts[1]
|
|
1377
|
+
if state == 'PENDING':
|
|
1378
|
+
q += 1
|
|
1379
|
+
elif state == 'RUNNING':
|
|
1380
|
+
r += 1
|
|
1381
|
+
elif state == 'COMPLETED':
|
|
1382
|
+
c += 1
|
|
1383
|
+
elif state == 'FAILED':
|
|
1384
|
+
u += 1
|
|
1385
|
+
|
|
1386
|
+
print('Total Jobs:%d, Pending:%d, Running:%d, Completed:%d, Failed:%d'
|
|
1387
|
+
% (len(jobs), q, r, c, u))
|
|
1388
|
+
|
|
1389
|
+
return q, r, c, u
|
|
1390
|
+
|
|
1391
|
+
|
|
1392
|
+
def check_jobs(jobs, cluster_spec, start_time=None, delay=60):
|
|
1393
|
+
"""
|
|
1394
|
+
A utility function for chacking the status of submitted jobs.
|
|
1395
|
+
|
|
1396
|
+
:param jobs: list of job ids.
|
|
1397
|
+
:param cluster_spec: type of cluster, either 'torque' or 'slurm'.
|
|
1398
|
+
:param delay: the delay (in sec) between two consequative checks, defaults to 60.
|
|
1399
|
+
|
|
1400
|
+
"""
|
|
1401
|
+
|
|
1402
|
+
n = len(jobs)
|
|
1403
|
+
|
|
1404
|
+
while (True):
|
|
1405
|
+
q, r, c, u = check_job_status(jobs, cluster_spec, start_time)
|
|
1406
|
+
if c == n:
|
|
1407
|
+
print('All jobs are completed!')
|
|
1408
|
+
break
|
|
1409
|
+
time.sleep(delay)
|
|
1410
|
+
|
|
1411
|
+
|
|
1412
|
+
def entrypoint(*args):
|
|
1413
|
+
main(*args)
|
|
1414
|
+
|
|
1415
|
+
def main(*args):
|
|
1416
|
+
execute_nm(*args)
|
|
1417
|
+
|
|
1418
|
+
if __name__ == "__main__":
|
|
1419
|
+
main(sys.argv[1:])
|
|
1420
|
+
|