coderunner 0.11.1 → 0.11.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/coderunner.gemspec +11 -1
- data/lib/coderunner.rb +1 -1
- data/lib/coderunner/system_modules/franklin.rb +42 -0
- data/lib/coderunner/system_modules/generic_linux.rb +81 -0
- data/lib/coderunner/system_modules/genericlinux_testsystem.rb +42 -0
- data/lib/coderunner/system_modules/hector.rb +13 -0
- data/lib/coderunner/system_modules/helios.rb +13 -0
- data/lib/coderunner/system_modules/juropa.rb +123 -0
- data/lib/coderunner/system_modules/macosx.rb +50 -0
- data/lib/coderunner/system_modules/moab.rb +133 -0
- data/lib/coderunner/system_modules/new_hydra.rb +33 -0
- data/lib/coderunner/system_modules/slurm.rb +124 -0
- metadata +12 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.11.
|
1
|
+
0.11.2
|
data/coderunner.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "coderunner"
|
8
|
-
s.version = "0.11.
|
8
|
+
s.version = "0.11.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Edmund Highcock"]
|
@@ -43,6 +43,16 @@ Gem::Specification.new do |s|
|
|
43
43
|
"lib/coderunner/long_regexen.rb",
|
44
44
|
"lib/coderunner/merged_code_runner.rb",
|
45
45
|
"lib/coderunner/run.rb",
|
46
|
+
"lib/coderunner/system_modules/franklin.rb",
|
47
|
+
"lib/coderunner/system_modules/generic_linux.rb",
|
48
|
+
"lib/coderunner/system_modules/genericlinux_testsystem.rb",
|
49
|
+
"lib/coderunner/system_modules/hector.rb",
|
50
|
+
"lib/coderunner/system_modules/helios.rb",
|
51
|
+
"lib/coderunner/system_modules/juropa.rb",
|
52
|
+
"lib/coderunner/system_modules/macosx.rb",
|
53
|
+
"lib/coderunner/system_modules/moab.rb",
|
54
|
+
"lib/coderunner/system_modules/new_hydra.rb",
|
55
|
+
"lib/coderunner/system_modules/slurm.rb",
|
46
56
|
"test/helper.rb",
|
47
57
|
"test/test_coderunner.rb"
|
48
58
|
]
|
data/lib/coderunner.rb
CHANGED
@@ -10,7 +10,7 @@ end
|
|
10
10
|
class CodeRunner
|
11
11
|
|
12
12
|
COMMAND_FOLDER = Dir.pwd
|
13
|
-
SCRIPT_FOLDER = File.dirname(File.expand_path(__FILE__)) #i.e. where this script is
|
13
|
+
SCRIPT_FOLDER = File.dirname(File.expand_path(__FILE__)) + '/coderunner' #i.e. where this script is
|
14
14
|
if ENV['CODE_RUNNER_OPTIONS']
|
15
15
|
GLOBAL_OPTIONS = eval(ENV['CODE_RUNNER_OPTIONS']) # global options are set by the environment but some can be changed.
|
16
16
|
else
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
require SCRIPT_FOLDER + '/system_modules/moab.rb'
|
3
|
+
module Franklin
|
4
|
+
include Moab
|
5
|
+
|
6
|
+
def batch_script
|
7
|
+
nodes, ppn = @nprocs.split(/x/)
|
8
|
+
eputs "Warning: Underuse of nodes (#{ppn} cores per node instead of 4)" if ppn.to_i < 4
|
9
|
+
raise "Please specify project" unless @project
|
10
|
+
raise "Error: cores per node cannot excede 4" if ppn.to_i > 4
|
11
|
+
# raise "Error: project (i.e. budget) not specified" unless @project
|
12
|
+
ppn ||= 4
|
13
|
+
if @wall_mins
|
14
|
+
ep @wall_mins
|
15
|
+
hours = (@wall_mins / 60).floor
|
16
|
+
mins = @wall_mins.to_i % 60
|
17
|
+
secs = ((@wall_mins - @wall_mins.to_i) * 60).to_i
|
18
|
+
end
|
19
|
+
eputs "Allotted wall time is " + sprintf("%02d:%02d:%02d", hours, mins, secs)
|
20
|
+
nprocstot = nodes.to_i * ppn.to_i
|
21
|
+
<<EOF
|
22
|
+
#!/bin/bash --login
|
23
|
+
#PBS -N #{executable_name}.#{job_identifier}
|
24
|
+
#PBS -l mppwidth=#{nprocstot}
|
25
|
+
#PBS -l mppnppn=#{ppn}
|
26
|
+
#{@wall_mins ? "#PBS -l walltime=#{sprintf("%02d:%02d:%02d", hours, mins, secs)}" : ""}
|
27
|
+
#{@project ? "#PBS -A #@project" : ""}
|
28
|
+
#PBS -q #{@runner.debug ? "debug" : "regular"}
|
29
|
+
|
30
|
+
### start of jobscript
|
31
|
+
cd $PBS_O_WORKDIR
|
32
|
+
echo "workdir: $PBS_O_WORKDIR"
|
33
|
+
|
34
|
+
echo "Submitting #{nodes}x#{ppn} job on Hector for project #@project..."
|
35
|
+
|
36
|
+
|
37
|
+
EOF
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
module GenericLinux
|
3
|
+
|
4
|
+
|
5
|
+
# @@ruby_command = "ruby1.9"
|
6
|
+
|
7
|
+
def queue_status
|
8
|
+
if rcp.uses_mpi
|
9
|
+
return %x[ps -e -U #{Process.uid} | grep mpi] + %x[ps -e -U #{Process.uid} | grep -G '\\bsh\\b'] + %x[ps -e -U #{Process.uid} -o pid,user,cmd | grep coderunner].grep(/launch/)
|
10
|
+
else
|
11
|
+
# ep executable_name
|
12
|
+
return %x[ps -e -U #{Process.uid} | grep '#{executable_name}'] + %x[ps -e -U #{Process.uid} | grep -G '\\bsh\\b']
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def run_command
|
17
|
+
if rcp.uses_mpi
|
18
|
+
return %[time mpirun -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
19
|
+
else
|
20
|
+
return %[#{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def execute
|
25
|
+
log 'execute_submission'
|
26
|
+
# ppipe = PPipe.new(2, false)
|
27
|
+
# trap(0){}
|
28
|
+
# ppipe.fork do
|
29
|
+
# trap(0, 'IGNORE')
|
30
|
+
# trap(2, 'IGNORE')
|
31
|
+
# pid = system(run_command + " & ") # fork{exec run_command}
|
32
|
+
# ppipe.w_send(:pid, pid, tp: 0)
|
33
|
+
# Thread.new{Process.wait(pid)} # Need to pick up the dead process when it finishes
|
34
|
+
# end
|
35
|
+
# pid = ppipe.w_recv(:pid)
|
36
|
+
# ppipe.die
|
37
|
+
# eputs "HERE"
|
38
|
+
|
39
|
+
# trap(0){}
|
40
|
+
# trap(2, 'IGNORE')
|
41
|
+
# pid = spawn("trap '' 2 && trap '' 0 && " + run_command + " & ")
|
42
|
+
if prefix = ENV['CODE_RUNNER_LAUNCHER']
|
43
|
+
launch_id = "#{Time.now.to_i}#{$$}"
|
44
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{launch_id}"
|
45
|
+
File.open(fname + '.start', 'w'){|file| file.puts "cd #{Dir.pwd};#{run_command}"}
|
46
|
+
sleep 1 until FileTest.exist? fname + '.pid'
|
47
|
+
pid = File.read(fname + '.pid').to_i
|
48
|
+
FileUtils.rm fname + '.pid'
|
49
|
+
else
|
50
|
+
pid = Kernel.spawn(run_command + " & ")
|
51
|
+
end
|
52
|
+
|
53
|
+
# require 'rbconfig'
|
54
|
+
# pid = spawn %[#{Config::CONFIG['bindir']}/#{Config::CONFIG['ruby_install_name']} -e 'puts fork{exec("#{run_command}")}' &]
|
55
|
+
|
56
|
+
# eputs "THERE"
|
57
|
+
# Thread.new{Process.wait(pid)}
|
58
|
+
# sleep 0.2
|
59
|
+
return pid
|
60
|
+
end
|
61
|
+
|
62
|
+
def cancel_job
|
63
|
+
children = `ps --ppid #@job_no`.scan(/^\s*(\d+)/).map{|match| match[0].to_i}
|
64
|
+
system "kill #{@job_no}"
|
65
|
+
children.each do |pid|
|
66
|
+
system "kill #{pid}"
|
67
|
+
end
|
68
|
+
# `kill #{@job_no}`
|
69
|
+
end
|
70
|
+
|
71
|
+
def error_file
|
72
|
+
return "#{executable_name}.#{job_identifier}.e"
|
73
|
+
end
|
74
|
+
|
75
|
+
def output_file
|
76
|
+
return "#{executable_name}.#{job_identifier}.o"
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
module GenericlinuxTestsystem
|
3
|
+
|
4
|
+
@@ruby_command = "ruby1.9"
|
5
|
+
|
6
|
+
def queue_status
|
7
|
+
# if @@no_run
|
8
|
+
# return ""
|
9
|
+
# else
|
10
|
+
return %x[ps] #%x[top -b -n 1 -u #{Process.uid} | grep #{@@executable_name}]
|
11
|
+
# end
|
12
|
+
# top runs very slowly. If you have a system (for example your laptop) where you will never run simulations, only analyse them, replace this command with %x[ps] and code runner will run much faster.
|
13
|
+
end
|
14
|
+
|
15
|
+
def run_command
|
16
|
+
if @@uses_mpi
|
17
|
+
return %[mpirun -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string}]
|
18
|
+
else
|
19
|
+
return %[#{executable_location}/#{executable_name} #{parameter_string}]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def execute
|
24
|
+
log 'execute_submission'
|
25
|
+
fork{exec run_command}
|
26
|
+
sleep 0.2
|
27
|
+
end
|
28
|
+
|
29
|
+
def cancel_job
|
30
|
+
`kill #{@job_no}`
|
31
|
+
end
|
32
|
+
|
33
|
+
def error_file
|
34
|
+
return nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def output_file
|
38
|
+
return nil
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
|
3
|
+
# System module for Juropa and HPC-FF
|
4
|
+
|
5
|
+
module Juropa
|
6
|
+
|
7
|
+
def queue_status
|
8
|
+
if prefix = ENV['CODE_RUNNER_LAUNCHER']
|
9
|
+
%x[cat #{ENV['HOME']}/.coderunner_to_launch_#{prefix}/queue_status.txt] +
|
10
|
+
%x[cat #{ENV['HOME']}/.coderunner_to_launch_#{prefix}/queue_status2.txt]
|
11
|
+
else
|
12
|
+
%x[qstat | grep $USER]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def run_command
|
17
|
+
# "msub #{batch_script_file}"
|
18
|
+
if ENV['CODE_RUNNER_LAUNCHER']
|
19
|
+
return %[mpiexec -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
20
|
+
else
|
21
|
+
"mpiexec -np $NSLOTS #{executable_location}/#{executable_name} #{parameter_string}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def execute
|
26
|
+
|
27
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
28
|
+
launch_id = "#{Time.now.to_i}#{$$}"
|
29
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{launch_id}"
|
30
|
+
File.open(fname + '.start', 'w'){|file| file.puts "cd #{Dir.pwd};#{run_command}"}
|
31
|
+
sleep 1 until FileTest.exist? fname + '.pid'
|
32
|
+
pid = File.read(fname + '.pid').to_i
|
33
|
+
FileUtils.rm fname + '.pid'
|
34
|
+
return pid
|
35
|
+
else
|
36
|
+
File.open(batch_script_file, 'w'){|file| file.puts batch_script + run_command + "\n"}
|
37
|
+
jn = %x[msub #{batch_script_file}].scan(/(\d+)\s*\Z/).flatten
|
38
|
+
if jn[0]
|
39
|
+
return jn[0].to_i
|
40
|
+
else
|
41
|
+
return nil
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def batch_script_file
|
47
|
+
"#{executable_name}_#{job_identifier}.sh"
|
48
|
+
end
|
49
|
+
|
50
|
+
def batch_script
|
51
|
+
nodes, ppn = @nprocs.split(/x/)
|
52
|
+
ppn ||= 8
|
53
|
+
if @wall_mins
|
54
|
+
ep @wall_mins
|
55
|
+
hours = (@wall_mins / 60).floor
|
56
|
+
mins = @wall_mins.to_i % 60
|
57
|
+
secs = ((@wall_mins - @wall_mins.to_i) * 60).to_i
|
58
|
+
end
|
59
|
+
eputs "Allotted wall time is " + sprintf("%02d:%02d:%02d", hours, mins, secs)
|
60
|
+
<<EOF
|
61
|
+
#!/bin/bash -x
|
62
|
+
#MSUB -l nodes=#{nodes}:ppn=#{ppn}
|
63
|
+
#MSUB -N #{executable_name}.#{job_identifier}
|
64
|
+
#{@wall_mins ? "#MSUB -l walltime=#{sprintf("%02d:%02d:%02d", hours, mins, secs)}" : ""}
|
65
|
+
|
66
|
+
### start of jobscript
|
67
|
+
cd $PBS_O_WORKDIR
|
68
|
+
echo "workdir: $PBS_O_WORKDIR"
|
69
|
+
NSLOTS=#{nodes.to_i * ppn.to_i}
|
70
|
+
echo "running on $NSLOTS cpus ..."
|
71
|
+
|
72
|
+
EOF
|
73
|
+
|
74
|
+
#MSUB -e #{Dir.pwd}/#{error_file}
|
75
|
+
# if keyword omitted : default is submitting directory
|
76
|
+
#MSUB -o #{Dir.pwd}/#{output_file}
|
77
|
+
# if keyword omitted : default is submitting directory
|
78
|
+
end
|
79
|
+
|
80
|
+
def cancel_job
|
81
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
82
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{$$}.stop"
|
83
|
+
File.open(fname, 'w'){|file| file.puts "\n"}
|
84
|
+
else
|
85
|
+
`canceljob #{@job_no}`
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def error_file
|
90
|
+
#For backwards compatibility
|
91
|
+
return "#{executable_name}.sh.e" if kind_of? CodeRunner::Run and [:Completed, :Failed].include? @status
|
92
|
+
return "#{executable_name}.#{job_identifier}.e#@job_no"
|
93
|
+
end
|
94
|
+
|
95
|
+
def output_file
|
96
|
+
return "#{executable_name}.sh.o" if kind_of? CodeRunner::Run and [:Completed, :Failed].include? @status
|
97
|
+
return "#{executable_name}.#{job_identifier}.o#@job_no"
|
98
|
+
end
|
99
|
+
|
100
|
+
def get_run_status(job_no, current_status)
|
101
|
+
if ENV['CODE_RUNNER_LAUNCHER']
|
102
|
+
return :Unknown
|
103
|
+
end
|
104
|
+
line = current_status.split(/\n/).grep(Regexp.new(job_no.to_s))[0]
|
105
|
+
unless line
|
106
|
+
return :Unknown
|
107
|
+
else
|
108
|
+
if line =~ /\sQ\s/
|
109
|
+
return :Queueing
|
110
|
+
elsif line =~ /\sR\s/
|
111
|
+
return :Running
|
112
|
+
elsif line =~ /\sC\s/
|
113
|
+
return :Unknown
|
114
|
+
else
|
115
|
+
ep 'line', line
|
116
|
+
raise 'Could not get run status'
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
module Macosx
|
3
|
+
# @@ruby_command = "ruby1.9"
|
4
|
+
|
5
|
+
def queue_status
|
6
|
+
%x[ps].grep(Regexp.new(Regexp.escape(executable_name))) #Can't put grep in the shell command because it will grep itself - OS X displays the entire command in ps!
|
7
|
+
end
|
8
|
+
|
9
|
+
def run_command
|
10
|
+
if rcp.uses_mpi
|
11
|
+
return %[mpirun -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
12
|
+
else
|
13
|
+
return %[#{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def execute
|
18
|
+
log 'execute_submission'
|
19
|
+
if ENV['CODE_RUNNER_LAUNCHER']
|
20
|
+
launch_id = "#{Time.now.to_i}#{$$}"
|
21
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch/#{launch_id}"
|
22
|
+
File.open(fname + '.start', 'w'){|file| file.puts "cd #{Dir.pwd};#{run_command}"}
|
23
|
+
sleep 1 until FileTest.exist? fname + '.pid'
|
24
|
+
pid = File.read(fname + '.pid').to_i
|
25
|
+
FileUtils.rm fname + '.pid'
|
26
|
+
else
|
27
|
+
pid = Kernel.spawn(run_command + " & ")
|
28
|
+
end
|
29
|
+
|
30
|
+
return nil # pid
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def cancel_job
|
35
|
+
`kill #{@job_no}`
|
36
|
+
end
|
37
|
+
|
38
|
+
def error_file
|
39
|
+
return executable_name + ".sh.e"
|
40
|
+
end
|
41
|
+
|
42
|
+
def output_file
|
43
|
+
return executable_name + ".sh.o"
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
|
@@ -0,0 +1,133 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
module Moab
|
3
|
+
|
4
|
+
def self.configure_environment
|
5
|
+
eputs "Configuring Hector"
|
6
|
+
conf = <<EOF
|
7
|
+
eval `modulecmd bash swap PrgEnv-pgi PrgEnv-gnu`
|
8
|
+
eval `modulecmd bash load fftw/3.2.2`
|
9
|
+
export XTPE_LINK_TYPE=dynamic
|
10
|
+
export LD_LIBRARY_PATH=/opt/xt-libsci/10.4.1/gnu/lib/44:$LD_LIBRARY_PATH
|
11
|
+
EOF
|
12
|
+
Kernel.change_environment_with_shell_script(conf)
|
13
|
+
end
|
14
|
+
|
15
|
+
def queue_status
|
16
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
17
|
+
%x[cat #{ENV['HOME']}/.coderunner_to_launch_#{prefix}/queue_status.txt] +
|
18
|
+
%x[cat #{ENV['HOME']}/.coderunner_to_launch_#{prefix}/queue_status2.txt]
|
19
|
+
else
|
20
|
+
%x[qstat | grep $USER]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def run_command
|
25
|
+
# "qsub #{batch_script_file}"
|
26
|
+
if (ENV['CODE_RUNNER_LAUNCHER'].size > 0 rescue false)
|
27
|
+
return %[mpiexec -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
28
|
+
else
|
29
|
+
nodes, ppn = @nprocs.split(/x/)
|
30
|
+
nprocstot = nodes.to_i * ppn.to_i
|
31
|
+
"aprun -n #{nprocstot} -N #{ppn} #{executable_location}/#{executable_name} #{parameter_string}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def execute
|
36
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
37
|
+
launch_id = "#{Time.now.to_i}#{$$}"
|
38
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{launch_id}"
|
39
|
+
File.open(fname + '.start', 'w'){|file| file.puts "cd #{Dir.pwd};#{run_command}"}
|
40
|
+
sleep 1 until FileTest.exist? fname + '.pid'
|
41
|
+
pid = File.read(fname + '.pid').to_i
|
42
|
+
FileUtils.rm fname + '.pid'
|
43
|
+
return pid
|
44
|
+
else
|
45
|
+
File.open(batch_script_file, 'w'){|file| file.puts batch_script + run_command + "\n"}
|
46
|
+
pid = %x[qsub #{batch_script_file}].to_i
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def batch_script_file
|
51
|
+
"#{executable_name}_#{job_identifier}.sh"
|
52
|
+
end
|
53
|
+
|
54
|
+
def max_ppn
|
55
|
+
raise "Please define max_ppn for your system"
|
56
|
+
end
|
57
|
+
|
58
|
+
def batch_script
|
59
|
+
|
60
|
+
nodes, ppn = @nprocs.split(/x/)
|
61
|
+
(eputs "Warning: number of nodes is not recommended (8, 16, 32, 64, 128, 256, 512, 1024, 2048 or 4096 recommended)"; sleep 0.2) unless [8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096].include? nodes.to_i
|
62
|
+
(eputs "Warning: number of wall mins is not recommended (20, 60, 180, 360, 720 recomended)"; sleep 0.2) unless [20, 60, 180, 360, 720].include? @wall_mins.to_i
|
63
|
+
eputs "Warning: Underuse of nodes (#{ppn} cores per node instead of #{max_ppn})" if ppn.to_i < max_ppn
|
64
|
+
raise "Error: cores per node cannot excede #{max_ppn}" if ppn.to_i > max_ppn
|
65
|
+
# raise "Error: project (i.e. budget) not specified" unless @project
|
66
|
+
ppn ||= max_ppn
|
67
|
+
if @wall_mins
|
68
|
+
ep @wall_mins
|
69
|
+
hours = (@wall_mins / 60).floor
|
70
|
+
mins = @wall_mins.to_i % 60
|
71
|
+
secs = ((@wall_mins - @wall_mins.to_i) * 60).to_i
|
72
|
+
end
|
73
|
+
eputs "Allotted wall time is " + sprintf("%02d:%02d:%02d", hours, mins, secs)
|
74
|
+
nprocstot = nodes.to_i * ppn.to_i
|
75
|
+
<<EOF
|
76
|
+
#!/bin/bash --login
|
77
|
+
#PBS -N #{executable_name}.#{job_identifier}
|
78
|
+
#PBS -l mppwidth=#{nprocstot}
|
79
|
+
#PBS -l mppnppn=#{ppn}
|
80
|
+
#{@wall_mins ? "#PBS -l walltime=#{sprintf("%02d:%02d:%02d", hours, mins, secs)}" : ""}
|
81
|
+
#{@project ? "#PBS -A #@project" : ""}
|
82
|
+
|
83
|
+
### start of jobscript
|
84
|
+
cd $PBS_O_WORKDIR
|
85
|
+
echo "workdir: $PBS_O_WORKDIR"
|
86
|
+
|
87
|
+
echo "Submitting #{nodes}x#{ppn} job on Hector for project #@project..."
|
88
|
+
|
89
|
+
|
90
|
+
EOF
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
def cancel_job
|
95
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
96
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{$$}.stop"
|
97
|
+
File.open(fname, 'w'){|file| file.puts "\n"}
|
98
|
+
else
|
99
|
+
`qdel #{@job_no}`
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def error_file
|
104
|
+
return "#{executable_name}.#{job_identifier}.e#@job_no"
|
105
|
+
end
|
106
|
+
|
107
|
+
def output_file
|
108
|
+
return "#{executable_name}.#{job_identifier}.o#@job_no"
|
109
|
+
end
|
110
|
+
|
111
|
+
def get_run_status(job_no, current_status)
|
112
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
113
|
+
return :Unknown
|
114
|
+
end
|
115
|
+
line = current_status.split(/\n/).grep(Regexp.new(job_no.to_s))[0]
|
116
|
+
unless line
|
117
|
+
return :Unknown
|
118
|
+
else
|
119
|
+
if line =~ /\sQ\s/
|
120
|
+
return :Queueing
|
121
|
+
elsif line =~ /\sR\s/
|
122
|
+
return :Running
|
123
|
+
elsif line =~ /\sC\s/
|
124
|
+
return :Unknown
|
125
|
+
else
|
126
|
+
ep 'line', line
|
127
|
+
raise 'Could not get run status'
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
module NewHydra
|
3
|
+
|
4
|
+
def queue_status
|
5
|
+
%x[qstat | grep $LOGNAME]
|
6
|
+
end
|
7
|
+
|
8
|
+
def run_command
|
9
|
+
if @runner.debug
|
10
|
+
return %[mpisubshort "40mins" #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} ]
|
11
|
+
else
|
12
|
+
return %[mpisubnoquotes "0.5-10 hrs" #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string}]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def execute
|
17
|
+
system run_command
|
18
|
+
end
|
19
|
+
|
20
|
+
def cancel_job
|
21
|
+
`qdel #{@job_no}`
|
22
|
+
end
|
23
|
+
|
24
|
+
def error_file
|
25
|
+
return "#{executable_name}.sh.e#{@job_no}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def output_file
|
29
|
+
return "#{executable_name}.sh.o#{@job_no}"
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
# A module to let CodeRunner run using the SLURM queue system,
|
3
|
+
# used on certain HPC systems.
|
4
|
+
module Slurm
|
5
|
+
|
6
|
+
def queue_status
|
7
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
8
|
+
%x[cat #{ENV['HOME']}/.coderunner_to_launch_#{prefix}/queue_status.txt] +
|
9
|
+
%x[cat #{ENV['HOME']}/.coderunner_to_launch_#{prefix}/queue_status2.txt]
|
10
|
+
else
|
11
|
+
%x[squeue | grep #{ENV['USER'][0..7]}]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def run_command
|
16
|
+
# "qsub #{batch_script_file}"
|
17
|
+
if (ENV['CODE_RUNNER_LAUNCHER'].size > 0 rescue false)
|
18
|
+
return %[mpiexec -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
19
|
+
else
|
20
|
+
nodes, ppn = @nprocs.split(/x/)
|
21
|
+
nprocstot = nodes.to_i * ppn.to_i
|
22
|
+
"mpirun -np #{nprocstot} #{executable_location}/#{executable_name} #{parameter_string}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def execute
|
27
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
28
|
+
launch_id = "#{Time.now.to_i}#{$$}"
|
29
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{launch_id}"
|
30
|
+
File.open(fname + '.start', 'w'){|file| file.puts "cd #{Dir.pwd};#{run_command}"}
|
31
|
+
sleep 1 until FileTest.exist? fname + '.pid'
|
32
|
+
pid = File.read(fname + '.pid').to_i
|
33
|
+
FileUtils.rm fname + '.pid'
|
34
|
+
return pid
|
35
|
+
else
|
36
|
+
File.open(batch_script_file, 'w'){|file| file.puts batch_script + run_command + "\n"}
|
37
|
+
pid = %x[sbatch #{batch_script_file}].to_i
|
38
|
+
return nil
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def batch_script_file
|
43
|
+
"#{executable_name}.#{job_identifier}.sh"
|
44
|
+
end
|
45
|
+
|
46
|
+
def max_ppn
|
47
|
+
raise "Please define max_ppn for your system"
|
48
|
+
end
|
49
|
+
|
50
|
+
def batch_script
|
51
|
+
|
52
|
+
nodes, ppn = @nprocs.split(/x/)
|
53
|
+
eputs "Warning: Underuse of nodes (#{ppn} cores per node instead of #{max_ppn})" if ppn.to_i < max_ppn
|
54
|
+
raise "Error: cores per node cannot excede #{max_ppn}" if ppn.to_i > max_ppn
|
55
|
+
# raise "Error: project (i.e. budget) not specified" unless @project
|
56
|
+
ppn ||= max_ppn
|
57
|
+
raise "Please specify wall minutes" unless @wall_mins
|
58
|
+
if @wall_mins
|
59
|
+
ep @wall_mins
|
60
|
+
hours = (@wall_mins / 60).floor
|
61
|
+
mins = @wall_mins.to_i % 60
|
62
|
+
secs = ((@wall_mins - @wall_mins.to_i) * 60).to_i
|
63
|
+
end
|
64
|
+
eputs "Allotted wall time is " + sprintf("%02d:%02d:%02d", hours, mins, secs)
|
65
|
+
nprocstot = nodes.to_i * ppn.to_i
|
66
|
+
<<EOF
|
67
|
+
#!/bin/bash
|
68
|
+
#SBATCH -J #{executable_name}.#{job_identifier} # jobname
|
69
|
+
#SBATCH -N #{nodes.to_i} # number of nodes
|
70
|
+
#SBATCH -n #{nprocstot} # number of tasks
|
71
|
+
#SBATCH -o #{executable_name}.#{job_identifier}.o%j # strout filename (%j is jobid)
|
72
|
+
#SBATCH -e #{executable_name}.#{job_identifier}.e%j # stderr filename (%j is jobid)
|
73
|
+
#{@project ? "#SBATCH -A #@project # project to charge" : ""}
|
74
|
+
#{@wall_mins ? "#SBATCH -t #{sprintf("%02d:%02d:%02d", hours, mins, secs)} # walltime" : ""}
|
75
|
+
|
76
|
+
#{code_run_environment}
|
77
|
+
echo "Submitting #{nodes}x#{ppn} job on #{CodeRunner::SYS} for project #@project..."
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
EOF
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
def cancel_job
|
86
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
87
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{$$}.stop"
|
88
|
+
File.open(fname, 'w'){|file| file.puts "\n"}
|
89
|
+
else
|
90
|
+
`scancel #{@job_no}`
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def error_file
|
95
|
+
return "#{executable_name}.#{job_identifier}.e#@job_no"
|
96
|
+
end
|
97
|
+
|
98
|
+
def output_file
|
99
|
+
return "#{executable_name}.#{job_identifier}.o#@job_no"
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_run_status(job_no, current_status)
|
103
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
104
|
+
return :Unknown
|
105
|
+
end
|
106
|
+
line = current_status.split(/\n/).grep(Regexp.new(job_no.to_s))[0]
|
107
|
+
unless line
|
108
|
+
return :Unknown
|
109
|
+
else
|
110
|
+
if line =~ /\sPD\s/
|
111
|
+
return :Queueing
|
112
|
+
elsif line =~ /\sR\s/
|
113
|
+
return :Running
|
114
|
+
elsif line =~ /\sC\s/
|
115
|
+
return :Unknown
|
116
|
+
else
|
117
|
+
ep 'line', line
|
118
|
+
raise 'Could not get run status'
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: coderunner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -178,6 +178,16 @@ files:
|
|
178
178
|
- lib/coderunner/long_regexen.rb
|
179
179
|
- lib/coderunner/merged_code_runner.rb
|
180
180
|
- lib/coderunner/run.rb
|
181
|
+
- lib/coderunner/system_modules/franklin.rb
|
182
|
+
- lib/coderunner/system_modules/generic_linux.rb
|
183
|
+
- lib/coderunner/system_modules/genericlinux_testsystem.rb
|
184
|
+
- lib/coderunner/system_modules/hector.rb
|
185
|
+
- lib/coderunner/system_modules/helios.rb
|
186
|
+
- lib/coderunner/system_modules/juropa.rb
|
187
|
+
- lib/coderunner/system_modules/macosx.rb
|
188
|
+
- lib/coderunner/system_modules/moab.rb
|
189
|
+
- lib/coderunner/system_modules/new_hydra.rb
|
190
|
+
- lib/coderunner/system_modules/slurm.rb
|
181
191
|
- test/helper.rb
|
182
192
|
- test/test_coderunner.rb
|
183
193
|
homepage: http://coderunner.sourceforge.net
|
@@ -195,7 +205,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
195
205
|
version: '0'
|
196
206
|
segments:
|
197
207
|
- 0
|
198
|
-
hash: -
|
208
|
+
hash: -3340194866062964858
|
199
209
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
200
210
|
none: false
|
201
211
|
requirements:
|