pcntoolkit 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1420 @@
1
+ #!/opt/conda/bin/python
2
+
3
+ # -----------------------------------------------------------------------------
4
+ # Run parallel normative modelling.
5
+ # All processing takes place in the processing directory (processing_dir)
6
+ # All inputs should be text files or binaries and space seperated
7
+ #
8
+ # It is possible to run these functions using...
9
+ #
10
+ # * k-fold cross-validation
11
+ # * estimating a training dataset then applying to a second test dataset
12
+ #
13
+ # First,the data is split for parallel processing.
14
+ # Second, the splits are submitted to the cluster.
15
+ # Third, the output is collected and combined.
16
+ #
17
+ # witten by (primarily) T Wolfers, (adaptated) SM Kia, H Huijsdens, L Parks,
18
+ # S Rutherford, AF Marquand
19
+ # -----------------------------------------------------------------------------
20
+
21
+ from __future__ import division, print_function
22
+
23
+ import fileinput
24
+ import glob
25
+ import os
26
+ import pickle
27
+ import shutil
28
+ import sys
29
+ import time
30
+ from datetime import datetime
31
+ from subprocess import check_output, run
32
+
33
+ import numpy as np
34
+ import pandas as pd
35
+
36
+ try:
37
+ import pcntoolkit as ptk
38
+ import pcntoolkit.dataio.fileio as fileio
39
+ from pcntoolkit import configs
40
+ from pcntoolkit.util.utils import yes_or_no
41
+ ptkpath = ptk.__path__[0]
42
+ except ImportError:
43
+ pass
44
+ ptkpath = os.path.abspath(os.path.dirname(__file__))
45
+ if ptkpath not in sys.path:
46
+ sys.path.append(ptkpath)
47
+ import configs
48
+ import dataio.fileio as fileio
49
+ from util.utils import yes_or_no
50
+
51
+
52
+ PICKLE_PROTOCOL = configs.PICKLE_PROTOCOL
53
+
54
+
55
+ def execute_nm(processing_dir,
56
+ python_path,
57
+ job_name,
58
+ covfile_path,
59
+ respfile_path,
60
+ batch_size,
61
+ memory,
62
+ duration,
63
+ normative_path=None,
64
+ func='estimate',
65
+ interactive=False,
66
+ **kwargs):
67
+ ''' Execute parallel normative models
68
+ This function is a mother function that executes all parallel normative
69
+ modelling routines. Different specifications are possible using the sub-
70
+ functions.
71
+
72
+ Basic usage::
73
+
74
+ execute_nm(processing_dir, python_path, job_name, covfile_path, respfile_path, batch_size, memory, duration)
75
+
76
+ :param processing_dir: Full path to the processing dir
77
+ :param python_path: Full path to the python distribution
78
+ :param normative_path: Full path to the normative.py. If None (default) then it will automatically retrieves the path from the installed packeage.
79
+ :param job_name: Name for the bash script that is the output of this function
80
+ :param covfile_path: Full path to a .txt file that contains all covariats (subjects x covariates) for the responsefile
81
+ :param respfile_path: Full path to a .txt that contains all features (subjects x features)
82
+ :param batch_size: Number of features in each batch
83
+ :param memory: Memory requirements written as string for example 4gb or 500mb
84
+ :param duation: The approximate duration of the job, a string with HH:MM:SS for example 01:01:01
85
+ :param cv_folds: Number of cross validations
86
+ :param testcovfile_path: Full path to a .txt file that contains all covariates (subjects x covariates) for the test response file
87
+ :param testrespfile_path: Full path to a .txt file that contains all test features
88
+ :param log_path: Path for saving log files
89
+ :param binary: If True uses binary format for response file otherwise it is text
90
+ :param cluster_spec: 'torque' for PBS Torque and 'slurm' for Slurm clusters.
91
+ :param interactive: If False (default) the user should manually
92
+ rerun the failed jobs or collect the results.
93
+ If 'auto' the job status are checked until all
94
+ jobs are completed then the failed jobs are rerun
95
+ and the results are automaticallu collectted.
96
+ Using 'query' is similar to 'auto' unless it
97
+ asks for user verification thius is immune to
98
+ endless loop in the case of bugs in the code.
99
+
100
+ written by (primarily) T Wolfers, (adapted) SM Kia
101
+ The documentation is adapated by S Rutherford.
102
+ '''
103
+
104
+ if normative_path is None:
105
+ normative_path = ptkpath + '/normative.py'
106
+
107
+ cv_folds = kwargs.get('cv_folds', None)
108
+ testcovfile_path = kwargs.get('testcovfile_path', None)
109
+ testrespfile_path = kwargs.get('testrespfile_path', None)
110
+ outputsuffix = kwargs.get('outputsuffix', 'estimate')
111
+ outputsuffix = "_" + outputsuffix.replace("_", "")
112
+ cluster_spec = kwargs.pop('cluster_spec', 'torque')
113
+ log_path = kwargs.get('log_path', None)
114
+ binary = kwargs.pop('binary', False)
115
+ cores = kwargs.pop('n_cores_per_batch','1')
116
+
117
+ split_nm(processing_dir,
118
+ respfile_path,
119
+ batch_size,
120
+ binary,
121
+ **kwargs)
122
+
123
+ batch_dir = glob.glob(processing_dir + 'batch_*')
124
+ # print(batch_dir)
125
+ number_of_batches = len(batch_dir)
126
+ # print(number_of_batches)
127
+
128
+ if binary:
129
+ file_extentions = '.pkl'
130
+ else:
131
+ file_extentions = '.txt'
132
+
133
+ kwargs.update({'batch_size': str(batch_size)})
134
+ job_ids = []
135
+ start_time = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
136
+
137
+ for n in range(1, number_of_batches+1):
138
+ kwargs.update({'job_id': str(n)})
139
+ if testrespfile_path is not None:
140
+ if cv_folds is not None:
141
+ raise ValueError("""If the response file is specified
142
+ cv_folds must be equal to None""")
143
+ else:
144
+ # specified train/test split
145
+ batch_processing_dir = processing_dir + 'batch_' + str(n) + '/'
146
+ batch_job_name = job_name + '_' + str(n) + '.sh'
147
+ batch_respfile_path = (batch_processing_dir + 'resp_batch_' +
148
+ str(n) + file_extentions)
149
+ batch_testrespfile_path = (batch_processing_dir +
150
+ 'testresp_batch_' +
151
+ str(n) + file_extentions)
152
+ batch_job_path = batch_processing_dir + batch_job_name
153
+ if cluster_spec == 'torque':
154
+
155
+ # update the response file
156
+ kwargs.update({'testrespfile_path':
157
+ batch_testrespfile_path})
158
+ bashwrap_nm(batch_processing_dir,
159
+ python_path,
160
+ normative_path,
161
+ batch_job_name,
162
+ covfile_path,
163
+ batch_respfile_path,
164
+ func=func,
165
+ **kwargs)
166
+ job_id = qsub_nm(job_path=batch_job_path,
167
+ log_path=log_path,
168
+ memory=memory,
169
+ duration=duration,
170
+ cores=cores)
171
+ job_ids.append(job_id)
172
+ elif cluster_spec == 'slurm':
173
+ # update the response file
174
+ kwargs.update({'testrespfile_path':
175
+ batch_testrespfile_path})
176
+ sbatchwrap_nm(batch_processing_dir,
177
+ python_path,
178
+ normative_path,
179
+ batch_job_name,
180
+ covfile_path,
181
+ batch_respfile_path,
182
+ func=func,
183
+ memory=memory,
184
+ duration=duration,
185
+ **kwargs)
186
+
187
+ job_id = sbatch_nm(job_path=batch_job_path)
188
+ job_ids.append(job_id)
189
+
190
+ elif cluster_spec == 'new':
191
+ # this part requires addition in different envioronment [
192
+ sbatchwrap_nm(processing_dir=batch_processing_dir,
193
+ func=func, **kwargs)
194
+ sbatch_nm(processing_dir=batch_processing_dir)
195
+ # ]
196
+ if testrespfile_path is None:
197
+ if testcovfile_path is not None:
198
+ # forward model
199
+ batch_processing_dir = processing_dir + 'batch_' + str(n) + '/'
200
+ batch_job_name = job_name + '_' + str(n) + '.sh'
201
+ batch_respfile_path = (batch_processing_dir + 'resp_batch_' +
202
+ str(n) + file_extentions)
203
+ batch_job_path = batch_processing_dir + batch_job_name
204
+ if cluster_spec == 'torque':
205
+ bashwrap_nm(batch_processing_dir,
206
+ python_path,
207
+ normative_path,
208
+ batch_job_name,
209
+ covfile_path,
210
+ batch_respfile_path,
211
+ func=func,
212
+ **kwargs)
213
+ job_id = qsub_nm(job_path=batch_job_path,
214
+ log_path=log_path,
215
+ memory=memory,
216
+ duration=duration,
217
+ cores=cores)
218
+ job_ids.append(job_id)
219
+ elif cluster_spec == 'slurm':
220
+ sbatchwrap_nm(batch_processing_dir,
221
+ python_path,
222
+ normative_path,
223
+ batch_job_name,
224
+ covfile_path,
225
+ batch_respfile_path,
226
+ func=func,
227
+ memory=memory,
228
+ duration=duration,
229
+ **kwargs)
230
+
231
+ job_id = sbatch_nm(job_path=batch_job_path)
232
+ job_ids.append(job_id)
233
+ elif cluster_spec == 'new':
234
+ # this part requires addition in different envioronment [
235
+ bashwrap_nm(processing_dir=batch_processing_dir, func=func,
236
+ **kwargs)
237
+ qsub_nm(processing_dir=batch_processing_dir)
238
+ # ]
239
+ else:
240
+ # cross-validation
241
+ batch_processing_dir = (processing_dir + 'batch_' +
242
+ str(n) + '/')
243
+ batch_job_name = job_name + '_' + str(n) + '.sh'
244
+ batch_respfile_path = (batch_processing_dir +
245
+ 'resp_batch_' + str(n) +
246
+ file_extentions)
247
+ batch_job_path = batch_processing_dir + batch_job_name
248
+ if cluster_spec == 'torque':
249
+ bashwrap_nm(batch_processing_dir,
250
+ python_path,
251
+ normative_path,
252
+ batch_job_name,
253
+ covfile_path,
254
+ batch_respfile_path,
255
+ func=func,
256
+ **kwargs)
257
+ job_id = qsub_nm(job_path=batch_job_path,
258
+ log_path=log_path,
259
+ memory=memory,
260
+ duration=duration,
261
+ cores=cores)
262
+ job_ids.append(job_id)
263
+ elif cluster_spec == 'slurm':
264
+ sbatchwrap_nm(batch_processing_dir,
265
+ python_path,
266
+ normative_path,
267
+ batch_job_name,
268
+ covfile_path,
269
+ batch_respfile_path,
270
+ func=func,
271
+ memory=memory,
272
+ duration=duration,
273
+ **kwargs)
274
+
275
+ job_id = sbatch_nm(job_path=batch_job_path)
276
+ job_ids.append(job_id)
277
+
278
+ elif cluster_spec == 'new':
279
+ # this part requires addition in different envioronment [
280
+ bashwrap_nm(processing_dir=batch_processing_dir, func=func,
281
+ **kwargs)
282
+ qsub_nm(processing_dir=batch_processing_dir)
283
+ # ]
284
+
285
+ if interactive:
286
+
287
+ check_jobs(job_ids, cluster_spec, start_time, delay=60)
288
+
289
+ success = False
290
+ while (not success):
291
+ success = collect_nm(processing_dir,
292
+ job_name,
293
+ func=func,
294
+ collect=False,
295
+ binary=binary,
296
+ batch_size=batch_size,
297
+ outputsuffix=outputsuffix)
298
+ if success:
299
+ break
300
+ else:
301
+ if interactive == 'query':
302
+ response = yes_or_no('Rerun the failed jobs?')
303
+ if response:
304
+ if cluster_spec == 'torque':
305
+ rerun_nm(processing_dir, log_path=log_path, memory=memory,
306
+ duration=duration, binary=binary,
307
+ interactive=interactive, cores=cores)
308
+ elif cluster_spec == 'slurm':
309
+ sbatchrerun_nm(processing_dir,
310
+ memory=memory,
311
+ duration=duration,
312
+ binary=binary,
313
+ log_path=log_path,
314
+ interactive=interactive)
315
+
316
+ else:
317
+ success = True
318
+ else:
319
+ print('Reruning the failed jobs ...')
320
+ if cluster_spec == 'torque':
321
+ rerun_nm(processing_dir, log_path=log_path, memory=memory,
322
+ duration=duration, binary=binary,
323
+ interactive=interactive, cores=cores)
324
+ elif cluster_spec == 'slurm':
325
+ sbatchrerun_nm(processing_dir,
326
+ memory=memory,
327
+ duration=duration,
328
+ binary=binary,
329
+ log_path=log_path,
330
+ interactive=interactive)
331
+
332
+ if interactive == 'query':
333
+ response = yes_or_no('Collect the results?')
334
+ if response:
335
+ success = collect_nm(processing_dir,
336
+ job_name,
337
+ func=func,
338
+ collect=True,
339
+ binary=binary,
340
+ batch_size=batch_size,
341
+ outputsuffix=outputsuffix)
342
+ else:
343
+ print('Collecting the results ...')
344
+ success = collect_nm(processing_dir,
345
+ job_name,
346
+ func=func,
347
+ collect=True,
348
+ binary=binary,
349
+ batch_size=batch_size,
350
+ outputsuffix=outputsuffix)
351
+
352
+
353
+ """routines that are environment independent"""
354
+
355
+
356
+ def split_nm(processing_dir,
357
+ respfile_path,
358
+ batch_size,
359
+ binary,
360
+ **kwargs):
361
+ ''' This function prepares the input files for normative_parallel.
362
+
363
+ Basic usage::
364
+
365
+ split_nm(processing_dir, respfile_path, batch_size, binary, testrespfile_path)
366
+
367
+ :param processing_dir: Full path to the processing dir
368
+ :param respfile_path: Full path to the responsefile.txt (subjects x features)
369
+ :param batch_size: Number of features in each batch
370
+ :param testrespfile_path: Full path to the test responsefile.txt (subjects x features)
371
+ :param binary: If True binary file
372
+
373
+ :outputs: The creation of a folder struture for batch-wise processing.
374
+
375
+ witten by (primarily) T Wolfers (adapted) SM Kia, (adapted) S Rutherford.
376
+ '''
377
+
378
+ testrespfile_path = kwargs.pop('testrespfile_path', None)
379
+
380
+ dummy, respfile_extension = os.path.splitext(respfile_path)
381
+ if (binary and respfile_extension != '.pkl'):
382
+ raise ValueError("""If binary is True the file format for the
383
+ testrespfile file must be .pkl""")
384
+ elif (binary == False and respfile_extension != '.txt'):
385
+ raise ValueError("""If binary is False the file format for the
386
+ testrespfile file must be .txt""")
387
+
388
+ # splits response into batches
389
+ if testrespfile_path is None:
390
+ if (binary == False):
391
+ respfile = fileio.load_ascii(respfile_path)
392
+ else:
393
+ respfile = pd.read_pickle(respfile_path)
394
+
395
+ respfile = pd.DataFrame(respfile)
396
+
397
+ numsub = respfile.shape[1]
398
+ batch_vec = np.arange(0,
399
+ numsub,
400
+ batch_size)
401
+ batch_vec = np.append(batch_vec,
402
+ numsub)
403
+
404
+ for n in range(0, (len(batch_vec) - 1)):
405
+ resp_batch = respfile.iloc[:, (batch_vec[n]): batch_vec[n + 1]]
406
+ os.chdir(processing_dir)
407
+ resp = str('resp_batch_' + str(n+1))
408
+ batch = str('batch_' + str(n+1))
409
+ if not os.path.exists(processing_dir + batch):
410
+ os.makedirs(processing_dir + batch)
411
+ os.makedirs(processing_dir + batch + '/Models/')
412
+ if (binary == False):
413
+ fileio.save_pd(resp_batch,
414
+ processing_dir + batch + '/' +
415
+ resp + '.txt')
416
+ else:
417
+ resp_batch.to_pickle(processing_dir + batch + '/' +
418
+ resp + '.pkl', protocol=PICKLE_PROTOCOL)
419
+
420
+ # splits response and test responsefile into batches
421
+ else:
422
+ dummy, testrespfile_extension = os.path.splitext(testrespfile_path)
423
+ if (binary and testrespfile_extension != '.pkl'):
424
+ raise ValueError("""If binary is True the file format for the
425
+ testrespfile file must be .pkl""")
426
+ elif (binary == False and testrespfile_extension != '.txt'):
427
+ raise ValueError("""If binary is False the file format for the
428
+ testrespfile file must be .txt""")
429
+
430
+ if (binary == False):
431
+ respfile = fileio.load_ascii(respfile_path)
432
+ testrespfile = fileio.load_ascii(testrespfile_path)
433
+ else:
434
+ respfile = pd.read_pickle(respfile_path)
435
+ testrespfile = pd.read_pickle(testrespfile_path)
436
+
437
+ respfile = pd.DataFrame(respfile)
438
+ testrespfile = pd.DataFrame(testrespfile)
439
+
440
+ numsub = respfile.shape[1]
441
+ batch_vec = np.arange(0, numsub,
442
+ batch_size)
443
+ batch_vec = np.append(batch_vec,
444
+ numsub)
445
+ for n in range(0, (len(batch_vec) - 1)):
446
+ resp_batch = respfile.iloc[:, (batch_vec[n]): batch_vec[n + 1]]
447
+ testresp_batch = testrespfile.iloc[:, (batch_vec[n]): batch_vec[n +
448
+ 1]]
449
+ os.chdir(processing_dir)
450
+ resp = str('resp_batch_' + str(n+1))
451
+ testresp = str('testresp_batch_' + str(n+1))
452
+ batch = str('batch_' + str(n+1))
453
+ if not os.path.exists(processing_dir + batch):
454
+ os.makedirs(processing_dir + batch)
455
+ os.makedirs(processing_dir + batch + '/Models/')
456
+ if (binary == False):
457
+ fileio.save_pd(resp_batch,
458
+ processing_dir + batch + '/' +
459
+ resp + '.txt')
460
+ fileio.save_pd(testresp_batch,
461
+ processing_dir + batch + '/' + testresp +
462
+ '.txt')
463
+ else:
464
+ resp_batch.to_pickle(processing_dir + batch + '/' +
465
+ resp + '.pkl', protocol=PICKLE_PROTOCOL)
466
+ testresp_batch.to_pickle(processing_dir + batch + '/' +
467
+ testresp + '.pkl',
468
+ protocol=PICKLE_PROTOCOL)
469
+
470
+
471
+ def collect_nm(processing_dir,
472
+ job_name,
473
+ func='estimate',
474
+ collect=False,
475
+ binary=False,
476
+ batch_size=None,
477
+ outputsuffix='estimate'):
478
+ '''Function to checks and collects all batches.
479
+
480
+ Basic usage::
481
+
482
+ collect_nm(processing_dir, job_name)
483
+
484
+
485
+ :param processing_dir: Full path to the processing directory
486
+ :param collect: If True data is checked for failed batches and collected; if False data is just checked
487
+ :param binary: Results in pkl format
488
+
489
+ :outputs: Text or pkl files containing all results accross all batches the combined output (written to disk).
490
+
491
+ :returns 0: if batches fail
492
+ :returns 1: if bathches complete successfully
493
+
494
+ written by (primarily) T Wolfers, (adapted) SM Kia, (adapted) S Rutherford.
495
+ '''
496
+
497
+ outputsuffix = "_" + outputsuffix.replace("_", "")
498
+
499
+ if binary:
500
+ file_extentions = '.pkl'
501
+ else:
502
+ file_extentions = '.txt'
503
+
504
+ # detect number of subjects, batches, hyperparameters and CV
505
+ batches = glob.glob(processing_dir + 'batch_*/')
506
+
507
+ count = 0
508
+ batch_fail = []
509
+
510
+ if (func != 'fit' and func != 'extend' and func != 'merge' and func != 'tune'):
511
+ # TODO: Collect_nm only depends on yhat, thus does not work when no
512
+ # prediction is made (when test cov is not specified).
513
+ files = glob.glob(processing_dir + 'batch_*/' + 'yhat' + outputsuffix
514
+ + file_extentions)
515
+ if len(files) > 0:
516
+ file_example = fileio.load(files[0])
517
+ else:
518
+ raise ValueError(f"Missing output files (yhats at: {processing_dir + 'batch_*/' + 'yhat' + outputsuffix + file_extentions}")
519
+
520
+ numsubjects = file_example.shape[0]
521
+ try:
522
+ # doesn't exist if size=1, and txt file
523
+ batch_size = file_example.shape[1]
524
+ except:
525
+ batch_size = 1
526
+
527
+ # artificially creates files for batches that were not executed
528
+ batch_dirs = glob.glob(processing_dir + 'batch_*/')
529
+ batch_dirs = fileio.sort_nicely(batch_dirs)
530
+ for batch in batch_dirs:
531
+ filepath = glob.glob(batch + 'yhat' + outputsuffix + '*')
532
+ if filepath == []:
533
+ count = count+1
534
+ batch1 = glob.glob(batch + '/' + job_name + '*.sh')
535
+ print(batch1)
536
+ batch_fail.append(batch1)
537
+ if collect is True:
538
+ pRho = np.ones(batch_size)
539
+ pRho = pRho.transpose()
540
+ pRho = pd.Series(pRho)
541
+ fileio.save(pRho, batch + 'pRho' + outputsuffix +
542
+ file_extentions)
543
+
544
+ Rho = np.zeros(batch_size)
545
+ Rho = Rho.transpose()
546
+ Rho = pd.Series(Rho)
547
+ fileio.save(Rho, batch + 'Rho' + outputsuffix +
548
+ file_extentions)
549
+
550
+ rmse = np.zeros(batch_size)
551
+ rmse = rmse.transpose()
552
+ rmse = pd.Series(rmse)
553
+ fileio.save(rmse, batch + 'RMSE' + outputsuffix +
554
+ file_extentions)
555
+
556
+ smse = np.zeros(batch_size)
557
+ smse = smse.transpose()
558
+ smse = pd.Series(smse)
559
+ fileio.save(smse, batch + 'SMSE' + outputsuffix +
560
+ file_extentions)
561
+
562
+ expv = np.zeros(batch_size)
563
+ expv = expv.transpose()
564
+ expv = pd.Series(expv)
565
+ fileio.save(expv, batch + 'EXPV' + outputsuffix +
566
+ file_extentions)
567
+
568
+ msll = np.zeros(batch_size)
569
+ msll = msll.transpose()
570
+ msll = pd.Series(msll)
571
+ fileio.save(msll, batch + 'MSLL' + outputsuffix +
572
+ file_extentions)
573
+
574
+ yhat = np.zeros([numsubjects, batch_size])
575
+ yhat = pd.DataFrame(yhat)
576
+ fileio.save(yhat, batch + 'yhat' + outputsuffix +
577
+ file_extentions)
578
+
579
+ ys2 = np.zeros([numsubjects, batch_size])
580
+ ys2 = pd.DataFrame(ys2)
581
+ fileio.save(ys2, batch + 'ys2' + outputsuffix +
582
+ file_extentions)
583
+
584
+ Z = np.zeros([numsubjects, batch_size])
585
+ Z = pd.DataFrame(Z)
586
+ fileio.save(Z, batch + 'Z' + outputsuffix +
587
+ file_extentions)
588
+
589
+ nll = np.zeros(batch_size)
590
+ nll = nll.transpose()
591
+ nll = pd.Series(nll)
592
+ fileio.save(nll, batch + 'NLL' + outputsuffix +
593
+ file_extentions)
594
+
595
+ bic = np.zeros(batch_size)
596
+ bic = bic.transpose()
597
+ bic = pd.Series(bic)
598
+ fileio.save(bic, batch + 'BIC' + outputsuffix +
599
+ file_extentions)
600
+
601
+ if not os.path.isdir(batch + 'Models'):
602
+ os.mkdir('Models')
603
+
604
+ else: # if more than 10% of yhat is nan then it is a failed batch
605
+ yhat = fileio.load(filepath[0])
606
+ if np.count_nonzero(~np.isnan(yhat))/(np.prod(yhat.shape)) < 0.9:
607
+ count = count+1
608
+ batch1 = glob.glob(batch + '/' + job_name + '*.sh')
609
+ print('More than 10% nans in ' + batch1[0])
610
+ batch_fail.append(batch1)
611
+
612
+ else:
613
+ batch_dirs = glob.glob(processing_dir + 'batch_*/')
614
+ batch_dirs = fileio.sort_nicely(batch_dirs)
615
+ for batch in batch_dirs:
616
+ filepath = glob.glob(batch + 'Models/' + 'NM_' + '*' + outputsuffix
617
+ + '*')
618
+ if len(filepath) < batch_size:
619
+ count = count+1
620
+ batch1 = glob.glob(batch + '/' + job_name + '*.sh')
621
+ print(batch1)
622
+ batch_fail.append(batch1)
623
+
624
+ # combines all output files across batches
625
+ if collect is True:
626
+ pRho_filenames = glob.glob(processing_dir + 'batch_*/' + 'pRho' +
627
+ outputsuffix + '*')
628
+ if pRho_filenames:
629
+ pRho_filenames = fileio.sort_nicely(pRho_filenames)
630
+ pRho_dfs = []
631
+ for pRho_filename in pRho_filenames:
632
+ if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
633
+ # from 0d (scalar) to 1d-array
634
+ pRho_dfs.append(pd.DataFrame(
635
+ fileio.load(pRho_filename)[np.newaxis,]))
636
+ else:
637
+ pRho_dfs.append(pd.DataFrame(fileio.load(pRho_filename)))
638
+ pRho_dfs = pd.concat(pRho_dfs, ignore_index=True, axis=0)
639
+ fileio.save(pRho_dfs, processing_dir + 'pRho' + outputsuffix +
640
+ file_extentions)
641
+ del pRho_dfs
642
+
643
+ Rho_filenames = glob.glob(processing_dir + 'batch_*/' + 'Rho' +
644
+ outputsuffix + '*')
645
+ if Rho_filenames:
646
+ Rho_filenames = fileio.sort_nicely(Rho_filenames)
647
+ Rho_dfs = []
648
+ for Rho_filename in Rho_filenames:
649
+ if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
650
+ # from 0d (scalar) to 1d-array
651
+ Rho_dfs.append(pd.DataFrame(
652
+ fileio.load(Rho_filename)[np.newaxis,]))
653
+ else:
654
+ Rho_dfs.append(pd.DataFrame(fileio.load(Rho_filename)))
655
+ Rho_dfs = pd.concat(Rho_dfs, ignore_index=True, axis=0)
656
+ fileio.save(Rho_dfs, processing_dir + 'Rho' + outputsuffix +
657
+ file_extentions)
658
+ del Rho_dfs
659
+
660
+ Z_filenames = glob.glob(processing_dir + 'batch_*/' + 'Z' +
661
+ outputsuffix + '*')
662
+ if Z_filenames:
663
+ Z_filenames = fileio.sort_nicely(Z_filenames)
664
+ Z_dfs = []
665
+ for Z_filename in Z_filenames:
666
+ Z_dfs.append(pd.DataFrame(fileio.load(Z_filename)))
667
+ Z_dfs = pd.concat(Z_dfs, ignore_index=True, axis=1)
668
+ fileio.save(Z_dfs, processing_dir + 'Z' + outputsuffix +
669
+ file_extentions)
670
+ del Z_dfs
671
+
672
+ yhat_filenames = glob.glob(processing_dir + 'batch_*/' + 'yhat' +
673
+ outputsuffix + '*')
674
+ if yhat_filenames:
675
+ yhat_filenames = fileio.sort_nicely(yhat_filenames)
676
+ yhat_dfs = []
677
+ for yhat_filename in yhat_filenames:
678
+ yhat_dfs.append(pd.DataFrame(fileio.load(yhat_filename)))
679
+ yhat_dfs = pd.concat(yhat_dfs, ignore_index=True, axis=1)
680
+ fileio.save(yhat_dfs, processing_dir + 'yhat' + outputsuffix +
681
+ file_extentions)
682
+ del yhat_dfs
683
+
684
+ ys2_filenames = glob.glob(processing_dir + 'batch_*/' + 'ys2' +
685
+ outputsuffix + '*')
686
+ if ys2_filenames:
687
+ ys2_filenames = fileio.sort_nicely(ys2_filenames)
688
+ ys2_dfs = []
689
+ for ys2_filename in ys2_filenames:
690
+ ys2_dfs.append(pd.DataFrame(fileio.load(ys2_filename)))
691
+ ys2_dfs = pd.concat(ys2_dfs, ignore_index=True, axis=1)
692
+ fileio.save(ys2_dfs, processing_dir + 'ys2' + outputsuffix +
693
+ file_extentions)
694
+ del ys2_dfs
695
+
696
+ rmse_filenames = glob.glob(processing_dir + 'batch_*/' + 'RMSE' +
697
+ outputsuffix + '*')
698
+ if rmse_filenames:
699
+ rmse_filenames = fileio.sort_nicely(rmse_filenames)
700
+ rmse_dfs = []
701
+ for rmse_filename in rmse_filenames:
702
+ if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
703
+ # from 0d (scalar) to 1d-array
704
+ rmse_dfs.append(pd.DataFrame(
705
+ fileio.load(rmse_filename)[np.newaxis,]))
706
+ else:
707
+ rmse_dfs.append(pd.DataFrame(fileio.load(rmse_filename)))
708
+ rmse_dfs = pd.concat(rmse_dfs, ignore_index=True, axis=0)
709
+ fileio.save(rmse_dfs, processing_dir + 'RMSE' + outputsuffix +
710
+ file_extentions)
711
+ del rmse_dfs
712
+
713
+ smse_filenames = glob.glob(processing_dir + 'batch_*/' + 'SMSE' +
714
+ outputsuffix + '*')
715
+ if smse_filenames:
716
+ smse_filenames = fileio.sort_nicely(smse_filenames)
717
+ smse_dfs = []
718
+ for smse_filename in smse_filenames:
719
+ if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
720
+ # from 0d (scalar) to 1d-array
721
+ smse_dfs.append(pd.DataFrame(
722
+ fileio.load(smse_filename)[np.newaxis,]))
723
+ else:
724
+ smse_dfs.append(pd.DataFrame(fileio.load(smse_filename)))
725
+ smse_dfs = pd.concat(smse_dfs, ignore_index=True, axis=0)
726
+ fileio.save(smse_dfs, processing_dir + 'SMSE' + outputsuffix +
727
+ file_extentions)
728
+ del smse_dfs
729
+
730
+ expv_filenames = glob.glob(processing_dir + 'batch_*/' + 'EXPV' +
731
+ outputsuffix + '*')
732
+ if expv_filenames:
733
+ expv_filenames = fileio.sort_nicely(expv_filenames)
734
+ expv_dfs = []
735
+ for expv_filename in expv_filenames:
736
+ if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
737
+ # from 0d (scalar) to 1d-array
738
+ expv_dfs.append(pd.DataFrame(
739
+ fileio.load(expv_filename)[np.newaxis,]))
740
+ else:
741
+ expv_dfs.append(pd.DataFrame(fileio.load(expv_filename)))
742
+ expv_dfs = pd.concat(expv_dfs, ignore_index=True, axis=0)
743
+ fileio.save(expv_dfs, processing_dir + 'EXPV' + outputsuffix +
744
+ file_extentions)
745
+ del expv_dfs
746
+
747
+ msll_filenames = glob.glob(processing_dir + 'batch_*/' + 'MSLL' +
748
+ outputsuffix + '*')
749
+ if msll_filenames:
750
+ msll_filenames = fileio.sort_nicely(msll_filenames)
751
+ msll_dfs = []
752
+ for msll_filename in msll_filenames:
753
+ if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
754
+ # from 0d (scalar) to 1d-array
755
+ msll_dfs.append(pd.DataFrame(
756
+ fileio.load(msll_filename)[np.newaxis,]))
757
+ else:
758
+ msll_dfs.append(pd.DataFrame(fileio.load(msll_filename)))
759
+ msll_dfs = pd.concat(msll_dfs, ignore_index=True, axis=0)
760
+ fileio.save(msll_dfs, processing_dir + 'MSLL' + outputsuffix +
761
+ file_extentions)
762
+ del msll_dfs
763
+
764
+ nll_filenames = glob.glob(processing_dir + 'batch_*/' + 'NLL' +
765
+ outputsuffix + '*')
766
+ if nll_filenames:
767
+ nll_filenames = fileio.sort_nicely(nll_filenames)
768
+ nll_dfs = []
769
+ for nll_filename in nll_filenames:
770
+ if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
771
+ # from 0d (scalar) to 1d-array
772
+ nll_dfs.append(pd.DataFrame(
773
+ fileio.load(nll_filename)[np.newaxis,]))
774
+ else:
775
+ nll_dfs.append(pd.DataFrame(fileio.load(nll_filename)))
776
+ nll_dfs = pd.concat(nll_dfs, ignore_index=True, axis=0)
777
+ fileio.save(nll_dfs, processing_dir + 'NLL' + outputsuffix +
778
+ file_extentions)
779
+ del nll_dfs
780
+
781
+ bic_filenames = glob.glob(processing_dir + 'batch_*/' + 'BIC' +
782
+ outputsuffix + '*')
783
+ if bic_filenames:
784
+ bic_filenames = fileio.sort_nicely(bic_filenames)
785
+ bic_dfs = []
786
+ for bic_filename in bic_filenames:
787
+ if batch_size == 1 and binary is False: # if batch size = 1 and .txt file
788
+ # from 0d (scalar) to 1d-array
789
+ bic_dfs.append(pd.DataFrame(
790
+ fileio.load(bic_filename)[np.newaxis,]))
791
+ else:
792
+ bic_dfs.append(pd.DataFrame(fileio.load(bic_filename)))
793
+ bic_dfs = pd.concat(bic_dfs, ignore_index=True, axis=0)
794
+ fileio.save(bic_dfs, processing_dir + 'BIC' + outputsuffix +
795
+ file_extentions)
796
+ del bic_dfs
797
+
798
+ if (func != 'predict' and func != 'extend' and func != 'merge' and func != 'tune'):
799
+ if not os.path.isdir(processing_dir + 'Models') and \
800
+ os.path.exists(os.path.join(batches[0], 'Models')):
801
+ os.mkdir(processing_dir + 'Models')
802
+
803
+ meta_filenames = glob.glob(processing_dir + 'batch_*/Models/' +
804
+ 'meta_data.md')
805
+ mY = []
806
+ sY = []
807
+ X_scalers = []
808
+ Y_scalers = []
809
+ if meta_filenames:
810
+ meta_filenames = fileio.sort_nicely(meta_filenames)
811
+ with open(meta_filenames[0], 'rb') as file:
812
+ meta_data = pickle.load(file)
813
+
814
+ for meta_filename in meta_filenames:
815
+ with open(meta_filename, 'rb') as file:
816
+ meta_data = pickle.load(file)
817
+ mY.append(meta_data['mean_resp'])
818
+ sY.append(meta_data['std_resp'])
819
+ if meta_data['inscaler'] in ['standardize', 'minmax',
820
+ 'robminmax']:
821
+ X_scalers.append(meta_data['scaler_cov'])
822
+ if meta_data['outscaler'] in ['standardize', 'minmax',
823
+ 'robminmax']:
824
+ Y_scalers.append(meta_data['scaler_resp'])
825
+ meta_data['mean_resp'] = np.squeeze(np.column_stack(mY))
826
+ meta_data['std_resp'] = np.squeeze(np.column_stack(sY))
827
+ meta_data['scaler_cov'] = X_scalers
828
+ meta_data['scaler_resp'] = Y_scalers
829
+
830
+ with open(os.path.join(processing_dir, 'Models',
831
+ 'meta_data.md'), 'wb') as file:
832
+ pickle.dump(meta_data, file, protocol=PICKLE_PROTOCOL)
833
+
834
+ batch_dirs = glob.glob(processing_dir + 'batch_*/')
835
+ if batch_dirs:
836
+ batch_dirs = fileio.sort_nicely(batch_dirs)
837
+ for b, batch_dir in enumerate(batch_dirs):
838
+ src_files = glob.glob(batch_dir + 'Models/NM*' +
839
+ outputsuffix + '.pkl')
840
+ if src_files:
841
+ src_files = fileio.sort_nicely(src_files)
842
+ for f, full_file_name in enumerate(src_files):
843
+ if os.path.isfile(full_file_name):
844
+ file_name = full_file_name.split('/')[-1]
845
+ n = file_name.split('_')
846
+ n[-2] = str(b * batch_size + f)
847
+ n = '_'.join(n)
848
+ shutil.copy(full_file_name, processing_dir +
849
+ 'Models/' + n)
850
+ elif func == 'fit':
851
+ count = count+1
852
+ batch1 = glob.glob(batch_dir + '/' + job_name + '*.sh')
853
+ print('Failed batch: ' + batch1[0])
854
+ batch_fail.append(batch1)
855
+
856
+ # list batches that were not executed
857
+ print('Number of batches that failed:' + str(count))
858
+ batch_fail_df = pd.DataFrame(batch_fail)
859
+ if file_extentions == '.txt':
860
+ fileio.save_pd(batch_fail_df, processing_dir + 'failed_batches' +
861
+ file_extentions)
862
+ else:
863
+ fileio.save(batch_fail_df, processing_dir +
864
+ 'failed_batches' +
865
+ file_extentions)
866
+
867
+ if not batch_fail:
868
+ return True
869
+ else:
870
+ return False
871
+
872
+
873
+ def delete_nm(processing_dir,
874
+ binary=False):
875
+ '''This function deletes all processing for normative modelling and just keeps the combined output.
876
+
877
+ Basic usage::
878
+
879
+ collect_nm(processing_dir)
880
+
881
+ :param processing_dir: Full path to the processing directory.
882
+ :param binary: Results in pkl format.
883
+
884
+ written by (primarily) T Wolfers, (adapted) SM Kia, (adapted) S Rutherford.
885
+ '''
886
+
887
+ if binary:
888
+ file_extentions = '.pkl'
889
+ else:
890
+ file_extentions = '.txt'
891
+ for file in glob.glob(processing_dir + 'batch_*/'):
892
+ shutil.rmtree(file)
893
+ if os.path.exists(processing_dir + 'failed_batches' + file_extentions):
894
+ os.remove(processing_dir + 'failed_batches' + file_extentions)
895
+
896
+
897
+ # all routines below are envronment dependent and require adaptation in novel
898
+ # environments -> copy those routines and adapt them in accrodance with your
899
+ # environment
900
+
901
+ def bashwrap_nm(processing_dir,
902
+ python_path,
903
+ normative_path,
904
+ job_name,
905
+ covfile_path,
906
+ respfile_path,
907
+ func='estimate',
908
+ **kwargs):
909
+ ''' This function wraps normative modelling into a bash script to run it
910
+ on a torque cluster system.
911
+
912
+ Basic usage::
913
+
914
+ bashwrap_nm(processing_dir, python_path, normative_path, job_name, covfile_path, respfile_path)
915
+
916
+ :param processing_dir: Full path to the processing dir
917
+ :param python_path: Full path to the python distribution
918
+ :param normative_path: Full path to the normative.py
919
+ :param job_name: Name for the bash script that is the output of this function
920
+ :param covfile_path: Full path to a .txt file that contains all covariates (subjects x covariates) for the responsefile
921
+ :param respfile_path: Full path to a .txt that contains all features (subjects x features)
922
+ :param cv_folds: Number of cross validations
923
+ :param testcovfile_path: Full path to a .txt file that contains all covariates (subjects x covariates) for the testresponse file
924
+ :param testrespfile_path: Full path to a .txt file that contains all test features
925
+ :param alg: which algorithm to use
926
+ :param configparam: configuration parameters for this algorithm
927
+
928
+ :outputs: A bash.sh file containing the commands for normative modelling saved to the processing directory (written to disk).
929
+
930
+ written by (primarily) T Wolfers, (adapted) S Rutherford.
931
+ '''
932
+
933
+ # here we use pop not get to remove the arguments as they used
934
+ cv_folds = kwargs.pop('cv_folds', None)
935
+ testcovfile_path = kwargs.pop('testcovfile_path', None)
936
+ testrespfile_path = kwargs.pop('testrespfile_path', None)
937
+ alg = kwargs.pop('alg', None)
938
+ configparam = kwargs.pop('configparam', None)
939
+ # change to processing dir
940
+ os.chdir(processing_dir)
941
+ output_changedir = ['cd ' + processing_dir + '\n']
942
+
943
+ bash_lines = '#!/bin/bash\n'
944
+ bash_cores = 'export OMP_NUM_THREADS=1\n'
945
+ bash_environment = [bash_lines + bash_cores]
946
+
947
+ # creates call of function for normative modelling
948
+ if (testrespfile_path is not None) and (testcovfile_path is not None):
949
+ job_call = [python_path + ' ' + normative_path + ' -c ' +
950
+ covfile_path + ' -t ' + testcovfile_path + ' -r ' +
951
+ testrespfile_path + ' -f ' + func]
952
+ elif (testrespfile_path is None) and (testcovfile_path is not None):
953
+ job_call = [python_path + ' ' + normative_path + ' -c ' +
954
+ covfile_path + ' -t ' + testcovfile_path + ' -f ' + func]
955
+ elif cv_folds is not None:
956
+ job_call = [python_path + ' ' + normative_path + ' -c ' +
957
+ covfile_path + ' -k ' + str(cv_folds) + ' -f ' + func]
958
+ elif func != 'estimate':
959
+ job_call = [python_path + ' ' + normative_path + ' -c ' +
960
+ covfile_path + ' -f ' + func]
961
+ else:
962
+ raise ValueError("""For 'estimate' function either testrespfile_path or cvfold
963
+ must be specified.""")
964
+
965
+ # add algorithm-specific parameters
966
+ if alg is not None:
967
+ job_call = [job_call[0] + ' -a ' + alg]
968
+ if configparam is not None:
969
+ job_call = [job_call[0] + ' -x ' + str(configparam)]
970
+
971
+ # add standardization flag if it is false
972
+ # if not standardize:
973
+ # job_call = [job_call[0] + ' -s']
974
+
975
+ # add responses file
976
+ job_call = [job_call[0] + ' ' + respfile_path]
977
+
978
+ # add in optional arguments.
979
+ for k in kwargs:
980
+ job_call = [job_call[0] + ' ' + k + '=' + str(kwargs[k])]
981
+
982
+ # writes bash file into processing dir
983
+ with open(processing_dir+job_name, 'w') as bash_file:
984
+ bash_file.writelines(bash_environment + output_changedir +
985
+ job_call + ["\n"])
986
+
987
+ # changes permissoins for bash.sh file
988
+ os.chmod(processing_dir + job_name, 0o770)
989
+
990
+
991
+ def qsub_nm(job_path,
992
+ log_path,
993
+ memory,
994
+ duration,
995
+ cores):
996
+ '''This function submits a job.sh scipt to the torque custer using the qsub command.
997
+
998
+ Basic usage::
999
+
1000
+
1001
+ qsub_nm(job_path, log_path, memory, duration)
1002
+
1003
+ :param job_path: Full path to the job.sh file.
1004
+ :param memory: Memory requirements written as string for example 4gb or 500mb.
1005
+ :param duation: The approximate duration of the job, a string with HH:MM:SS for example 01:01:01.
1006
+
1007
+ :outputs: Submission of the job to the (torque) cluster.
1008
+
1009
+ written by (primarily) T Wolfers, (adapted) SM Kia, (adapted) S Rutherford.
1010
+ '''
1011
+
1012
+ # created qsub command
1013
+ if log_path is None:
1014
+ qsub_call = ['echo ' + job_path + ' | qsub -N ' + job_path + ' -l ' +
1015
+ 'nodes=1:ppn='+ cores + ',mem=' + memory + ',walltime=' + duration]
1016
+ else:
1017
+ qsub_call = ['echo ' + job_path + ' | qsub -N ' + job_path +
1018
+ ' -l ' + 'nodes=1:ppn='+ cores + ',mem=' + memory + ',walltime=' +
1019
+ duration + ' -o ' + log_path + ' -e ' + log_path]
1020
+
1021
+ # submits job to cluster
1022
+ # call(qsub_call, shell=True)
1023
+ job_id = check_output(qsub_call, shell=True).decode(
1024
+ sys.stdout.encoding).replace("\n", "")
1025
+
1026
+ return job_id
1027
+
1028
+
1029
+ def rerun_nm(processing_dir,
1030
+ log_path,
1031
+ memory,
1032
+ duration,
1033
+ cluster_spec,
1034
+ cores,
1035
+ binary=False,
1036
+ interactive=False):
1037
+ '''This function reruns all failed batched in processing_dir after collect_nm has identified the failed batches.
1038
+ Basic usage::
1039
+
1040
+ rerun_nm(processing_dir, log_path, memory, duration)
1041
+
1042
+ :param processing_dir: Full path to the processing directory
1043
+ :param memory: Memory requirements written as string for example 4gb or 500mb.
1044
+ :param duration: The approximate duration of the job, a string with HH:MM:SS for example 01:01:01.
1045
+
1046
+ written by (primarily) T Wolfers, (adapted) SM Kia, (adapted) S Rutherford.
1047
+ '''
1048
+
1049
+ job_ids = []
1050
+
1051
+ if binary:
1052
+ file_extentions = '.pkl'
1053
+ failed_batches = fileio.load(processing_dir +
1054
+ 'failed_batches' + file_extentions)
1055
+ shape = failed_batches.shape
1056
+ for n in range(0, shape[0]):
1057
+ jobpath = failed_batches[n, 0]
1058
+ print(jobpath)
1059
+ job_id = qsub_nm(job_path=jobpath,
1060
+ log_path=log_path,
1061
+ memory=memory,
1062
+ duration=duration,
1063
+ cores=cores)
1064
+ job_ids.append(job_id)
1065
+ else:
1066
+ file_extentions = '.txt'
1067
+ failed_batches = fileio.load_pd(processing_dir +
1068
+ 'failed_batches' + file_extentions)
1069
+ shape = failed_batches.shape
1070
+ for n in range(0, shape[0]):
1071
+ jobpath = failed_batches.iloc[n, 0]
1072
+ print(jobpath)
1073
+ job_id = qsub_nm(job_path=jobpath,
1074
+ log_path=log_path,
1075
+ memory=memory,
1076
+ duration=duration,
1077
+ cores=cores)
1078
+ job_ids.append(job_id)
1079
+
1080
+ if interactive:
1081
+ check_jobs(job_ids, cluster_spec, delay=60)
1082
+
1083
+
1084
+ # COPY the rotines above here and aadapt those to your cluster
1085
+ # bashwarp_nm; qsub_nm; rerun_nm
1086
+
1087
+ def sbatchwrap_nm(processing_dir,
1088
+ python_path,
1089
+ normative_path,
1090
+ job_name,
1091
+ covfile_path,
1092
+ respfile_path,
1093
+ memory,
1094
+ duration,
1095
+ log_path,
1096
+ func='estimate',
1097
+ **kwargs):
1098
+ '''This function wraps normative modelling into a bash script to run it
1099
+ on a torque cluster system.
1100
+
1101
+ Basic usage::
1102
+
1103
+ sbatchwrap_nm(processing_dir, python_path, normative_path, job_name, covfile_path, respfile_path, memory, duration)
1104
+
1105
+ :param processing_dir: Full path to the processing dir
1106
+ :param python_path: Full path to the python distribution
1107
+ :param normative_path: Full path to the normative.py
1108
+ :param job_name: Name for the bash script that is the output of this function
1109
+ :param covfile_path: Full path to a .txt file that contains all covariates (subjects x covariates) for the responsefile
1110
+ :param respfile_path: Full path to a .txt that contains all features (subjects x features)
1111
+ :param cv_folds: Number of cross validations
1112
+ :param testcovfile_path: Full path to a .txt file that contains all covariates (subjects x covariates) for the testresponse file
1113
+ :param testrespfile_path: Full path to a .txt file that contains all test features
1114
+ :param alg: which algorithm to use
1115
+ :param configparam: configuration parameters for this algorithm
1116
+
1117
+ :outputs: A bash.sh file containing the commands for normative modelling saved to the processing directory (written to disk).
1118
+
1119
+ written by (primarily) T Wolfers, (adapted) S Rutherford
1120
+ '''
1121
+
1122
+ # here we use pop not get to remove the arguments as they used
1123
+ cv_folds = kwargs.pop('cv_folds', None)
1124
+ testcovfile_path = kwargs.pop('testcovfile_path', None)
1125
+ testrespfile_path = kwargs.pop('testrespfile_path', None)
1126
+ alg = kwargs.pop('alg', None)
1127
+ configparam = kwargs.pop('configparam', None)
1128
+
1129
+ # change to processing dir
1130
+ os.chdir(processing_dir)
1131
+ output_changedir = ['cd ' + processing_dir + '\n']
1132
+
1133
+ sbatch_init = '#!/bin/bash\n'
1134
+ sbatch_jobname = '#SBATCH --job-name=' + job_name + '\n'
1135
+ sbatch_nodes = '#SBATCH --nodes=1\n'
1136
+ sbatch_tasks = '#SBATCH --ntasks=1\n'
1137
+ sbatch_time = '#SBATCH --time=' + str(duration) + '\n'
1138
+ sbatch_memory = '#SBATCH --mem-per-cpu=' + str(memory) + '\n'
1139
+ sbatch_log_out = '#SBATCH -o ' + log_path + '%x_%j.out' + '\n'
1140
+ sbatch_log_error = '#SBATCH -e ' + log_path + '%x_%j.err' + '\n'
1141
+ # sbatch_module = 'module purge\n'
1142
+ # sbatch_anaconda = 'module load anaconda3\n'
1143
+ sbatch_exit = 'set -o errexit\n'
1144
+
1145
+ # echo -n "This script is running on "
1146
+ # hostname
1147
+
1148
+ bash_environment = [sbatch_init +
1149
+ sbatch_jobname +
1150
+ sbatch_nodes +
1151
+ sbatch_tasks +
1152
+ sbatch_time +
1153
+ sbatch_memory +
1154
+ sbatch_log_out +
1155
+ sbatch_log_error
1156
+ ]
1157
+
1158
+ # creates call of function for normative modelling
1159
+ if (testrespfile_path is not None) and (testcovfile_path is not None):
1160
+ job_call = [python_path + ' ' + normative_path + ' -c ' +
1161
+ covfile_path + ' -t ' + testcovfile_path + ' -r ' +
1162
+ testrespfile_path + ' -f ' + func]
1163
+ elif (testrespfile_path is None) and (testcovfile_path is not None):
1164
+ job_call = [python_path + ' ' + normative_path + ' -c ' +
1165
+ covfile_path + ' -t ' + testcovfile_path + ' -f ' + func]
1166
+ elif cv_folds is not None:
1167
+ job_call = [python_path + ' ' + normative_path + ' -c ' +
1168
+ covfile_path + ' -k ' + str(cv_folds) + ' -f ' + func]
1169
+ elif func != 'estimate':
1170
+ job_call = [python_path + ' ' + normative_path + ' -c ' +
1171
+ covfile_path + ' -f ' + func]
1172
+ else:
1173
+ raise ValueError("""For 'estimate' function either testrespfile_path or cv_folds
1174
+ must be specified.""")
1175
+
1176
+ # add algorithm-specific parameters
1177
+ if alg is not None:
1178
+ job_call = [job_call[0] + ' -a ' + alg]
1179
+ if configparam is not None:
1180
+ job_call = [job_call[0] + ' -x ' + str(configparam)]
1181
+
1182
+ # add standardization flag if it is false
1183
+ # if not standardize:
1184
+ # job_call = [job_call[0] + ' -s']
1185
+
1186
+ # add responses file
1187
+ job_call = [job_call[0] + ' ' + respfile_path]
1188
+
1189
+ # add in optional arguments.
1190
+ for k in kwargs:
1191
+ job_call = [job_call[0] + ' ' + k + '=' + kwargs[k]]
1192
+
1193
+ # writes bash file into processing dir
1194
+ with open(processing_dir+job_name, 'w') as bash_file:
1195
+ bash_file.writelines(bash_environment + output_changedir +
1196
+ job_call + ["\n"] + [sbatch_exit])
1197
+
1198
+ # changes permissoins for bash.sh file
1199
+ os.chmod(processing_dir + job_name, 0o770)
1200
+
1201
+
1202
+ def sbatch_nm(job_path):
1203
+ '''This function submits a job.sh scipt to the torque custer using the qsub
1204
+ command.
1205
+
1206
+ Basic usage::
1207
+
1208
+ sbatch_nm(job_path)
1209
+
1210
+ :param job_path: Full path to the job.sh file
1211
+
1212
+ :outputs: Submission of the job to the slurm cluster.
1213
+
1214
+ written by (primarily) T Wolfers, (adapted) S Rutherford.
1215
+ '''
1216
+
1217
+ # created qsub command
1218
+ sbatch_call = ['sbatch ' + job_path]
1219
+
1220
+ # submits job to cluster
1221
+ job_id = check_output(sbatch_call, shell=True).decode(
1222
+ sys.stdout.encoding).replace("\n", "")
1223
+
1224
+ return job_id
1225
+
1226
+
1227
+ def sbatchrerun_nm(processing_dir,
1228
+ memory,
1229
+ duration,
1230
+ new_memory=False,
1231
+ new_duration=False,
1232
+ binary=False,
1233
+ interactive=False,
1234
+ **kwargs):
1235
+ '''This function reruns all failed batched in processing_dir after collect_nm has identified he failed batches.
1236
+
1237
+ Basic usage::
1238
+
1239
+ rerun_nm(processing_dir, memory, duration)
1240
+
1241
+ :param processing_dir: Full path to the processing directory.
1242
+ :param memory: Memory requirements written as string, for example 4gb or 500mb.
1243
+ :param duration: The approximate duration of the job, a string with HH:MM:SS for example 01:01:01.
1244
+ :param new_memory: If you want to change the memory you have to indicate it here.
1245
+ :param new_duration: If you want to change the duration you have to indicate it here.
1246
+
1247
+ :outputs: Re-runs failed batches.
1248
+
1249
+ written by (primarily) T Wolfers, (adapted) S Rutherford.
1250
+ '''
1251
+
1252
+ # log_path = kwargs.pop('log_path', None)
1253
+
1254
+ job_ids = []
1255
+
1256
+ start_time = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
1257
+
1258
+ if binary:
1259
+ file_extentions = '.pkl'
1260
+ failed_batches = fileio.load(
1261
+ processing_dir + 'failed_batches' + file_extentions)
1262
+ shape = failed_batches.shape
1263
+ for n in range(0, shape[0]):
1264
+ jobpath = failed_batches[n, 0]
1265
+ print(jobpath)
1266
+ if new_duration != False:
1267
+ with fileinput.FileInput(jobpath, inplace=True) as file:
1268
+ for line in file:
1269
+ print(line.replace(duration, new_duration), end='')
1270
+ if new_memory != False:
1271
+ with fileinput.FileInput(jobpath, inplace=True) as file:
1272
+ for line in file:
1273
+ print(line.replace(memory, new_memory), end='')
1274
+ job_id = sbatch_nm(jobpath)
1275
+ job_ids.append(job_id)
1276
+
1277
+ else:
1278
+ file_extentions = '.txt'
1279
+ failed_batches = fileio.load_pd(
1280
+ processing_dir + 'failed_batches' + file_extentions)
1281
+ shape = failed_batches.shape
1282
+ for n in range(0, shape[0]):
1283
+ jobpath = failed_batches.iloc[n, 0]
1284
+ print(jobpath)
1285
+ if new_duration != False:
1286
+ with fileinput.FileInput(jobpath, inplace=True) as file:
1287
+ for line in file:
1288
+ print(line.replace(duration, new_duration), end='')
1289
+ if new_memory != False:
1290
+ with fileinput.FileInput(jobpath, inplace=True) as file:
1291
+ for line in file:
1292
+ print(line.replace(memory, new_memory), end='')
1293
+ job_id = sbatch_nm(jobpath)
1294
+ job_ids.append(job_id)
1295
+
1296
+ if interactive:
1297
+ check_jobs(job_ids, cluster_spec='slurm',
1298
+ start_time=start_time, delay=60)
1299
+
1300
+
1301
+ def retrieve_jobs(cluster_spec, start_time=None):
1302
+ """
1303
+ A utility function to retrieve task status from the outputs of qstat.
1304
+
1305
+ :param cluster_spec: type of cluster, either 'torque' or 'slurm'.
1306
+
1307
+ :return: a dictionary of jobs.
1308
+
1309
+ """
1310
+
1311
+ if cluster_spec == 'torque':
1312
+
1313
+ output = check_output('qstat', shell=True).decode(sys.stdout.encoding)
1314
+ output = output.split('\n')
1315
+ jobs = dict()
1316
+ for line in output[2:-1]:
1317
+ (Job_ID, Job_Name, User, Wall_Time, Status, Queue) = line.split()
1318
+ jobs[Job_ID] = dict()
1319
+ jobs[Job_ID]['name'] = Job_Name
1320
+ jobs[Job_ID]['walltime'] = Wall_Time
1321
+ jobs[Job_ID]['status'] = Status
1322
+
1323
+ elif cluster_spec == 'slurm':
1324
+
1325
+ end_time = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
1326
+ cmd = ['sacct', '-n', '-X', '--parsable2', '--noheader',
1327
+ '-S', start_time, '-E', end_time, '--format=JobName,State']
1328
+ jobs = run(cmd, capture_output=True, text=True)
1329
+
1330
+ return jobs
1331
+
1332
+
1333
+ def check_job_status(jobs, cluster_spec, start_time=None):
1334
+ """
1335
+ A utility function to count the tasks with different status.
1336
+
1337
+ :param jobs: List of job ids.
1338
+ :param cluster_spec: type of cluster, either 'torque' or 'slurm'.
1339
+ :return returns the number of taks athat are queued, running, completed etc
1340
+
1341
+ """
1342
+ running_jobs = retrieve_jobs(cluster_spec, start_time)
1343
+
1344
+ r = 0
1345
+ c = 0
1346
+ q = 0
1347
+ u = 0
1348
+
1349
+ if cluster_spec == 'torque':
1350
+
1351
+ for job in jobs:
1352
+ try:
1353
+ if running_jobs[job]['status'] == 'C':
1354
+ c += 1
1355
+ elif running_jobs[job]['status'] == 'Q':
1356
+ q += 1
1357
+ elif running_jobs[job]['status'] == 'R':
1358
+ r += 1
1359
+ else:
1360
+ u += 1
1361
+ except: # probably meanwhile the job is finished.
1362
+ c += 1
1363
+ continue
1364
+
1365
+ print('Total Jobs:%d, Queued:%d, Running:%d, Completed:%d, Unknown:%d'
1366
+ % (len(jobs), q, r, c, u))
1367
+
1368
+ elif cluster_spec == 'slurm':
1369
+
1370
+ lines = running_jobs.stdout.strip().split('\n')
1371
+
1372
+ for line in lines:
1373
+ if line:
1374
+ parts = line.split('|')
1375
+ if len(parts) >= 2:
1376
+ job_name, state = parts[0], parts[1]
1377
+ if state == 'PENDING':
1378
+ q += 1
1379
+ elif state == 'RUNNING':
1380
+ r += 1
1381
+ elif state == 'COMPLETED':
1382
+ c += 1
1383
+ elif state == 'FAILED':
1384
+ u += 1
1385
+
1386
+ print('Total Jobs:%d, Pending:%d, Running:%d, Completed:%d, Failed:%d'
1387
+ % (len(jobs), q, r, c, u))
1388
+
1389
+ return q, r, c, u
1390
+
1391
+
1392
+ def check_jobs(jobs, cluster_spec, start_time=None, delay=60):
1393
+ """
1394
+ A utility function for chacking the status of submitted jobs.
1395
+
1396
+ :param jobs: list of job ids.
1397
+ :param cluster_spec: type of cluster, either 'torque' or 'slurm'.
1398
+ :param delay: the delay (in sec) between two consequative checks, defaults to 60.
1399
+
1400
+ """
1401
+
1402
+ n = len(jobs)
1403
+
1404
+ while (True):
1405
+ q, r, c, u = check_job_status(jobs, cluster_spec, start_time)
1406
+ if c == n:
1407
+ print('All jobs are completed!')
1408
+ break
1409
+ time.sleep(delay)
1410
+
1411
+
1412
+ def entrypoint(*args):
1413
+ main(*args)
1414
+
1415
+ def main(*args):
1416
+ execute_nm(*args)
1417
+
1418
+ if __name__ == "__main__":
1419
+ main(sys.argv[1:])
1420
+