coderunner 0.11.1 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/coderunner.gemspec +11 -1
- data/lib/coderunner.rb +1 -1
- data/lib/coderunner/system_modules/franklin.rb +42 -0
- data/lib/coderunner/system_modules/generic_linux.rb +81 -0
- data/lib/coderunner/system_modules/genericlinux_testsystem.rb +42 -0
- data/lib/coderunner/system_modules/hector.rb +13 -0
- data/lib/coderunner/system_modules/helios.rb +13 -0
- data/lib/coderunner/system_modules/juropa.rb +123 -0
- data/lib/coderunner/system_modules/macosx.rb +50 -0
- data/lib/coderunner/system_modules/moab.rb +133 -0
- data/lib/coderunner/system_modules/new_hydra.rb +33 -0
- data/lib/coderunner/system_modules/slurm.rb +124 -0
- metadata +12 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.11.
|
1
|
+
0.11.2
|
data/coderunner.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "coderunner"
|
8
|
-
s.version = "0.11.
|
8
|
+
s.version = "0.11.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Edmund Highcock"]
|
@@ -43,6 +43,16 @@ Gem::Specification.new do |s|
|
|
43
43
|
"lib/coderunner/long_regexen.rb",
|
44
44
|
"lib/coderunner/merged_code_runner.rb",
|
45
45
|
"lib/coderunner/run.rb",
|
46
|
+
"lib/coderunner/system_modules/franklin.rb",
|
47
|
+
"lib/coderunner/system_modules/generic_linux.rb",
|
48
|
+
"lib/coderunner/system_modules/genericlinux_testsystem.rb",
|
49
|
+
"lib/coderunner/system_modules/hector.rb",
|
50
|
+
"lib/coderunner/system_modules/helios.rb",
|
51
|
+
"lib/coderunner/system_modules/juropa.rb",
|
52
|
+
"lib/coderunner/system_modules/macosx.rb",
|
53
|
+
"lib/coderunner/system_modules/moab.rb",
|
54
|
+
"lib/coderunner/system_modules/new_hydra.rb",
|
55
|
+
"lib/coderunner/system_modules/slurm.rb",
|
46
56
|
"test/helper.rb",
|
47
57
|
"test/test_coderunner.rb"
|
48
58
|
]
|
data/lib/coderunner.rb
CHANGED
@@ -10,7 +10,7 @@ end
|
|
10
10
|
class CodeRunner
|
11
11
|
|
12
12
|
COMMAND_FOLDER = Dir.pwd
|
13
|
-
SCRIPT_FOLDER = File.dirname(File.expand_path(__FILE__)) #i.e. where this script is
|
13
|
+
SCRIPT_FOLDER = File.dirname(File.expand_path(__FILE__)) + '/coderunner' #i.e. where this script is
|
14
14
|
if ENV['CODE_RUNNER_OPTIONS']
|
15
15
|
GLOBAL_OPTIONS = eval(ENV['CODE_RUNNER_OPTIONS']) # global options are set by the environment but some can be changed.
|
16
16
|
else
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
require SCRIPT_FOLDER + '/system_modules/moab.rb'
|
3
|
+
module Franklin
|
4
|
+
include Moab
|
5
|
+
|
6
|
+
def batch_script
|
7
|
+
nodes, ppn = @nprocs.split(/x/)
|
8
|
+
eputs "Warning: Underuse of nodes (#{ppn} cores per node instead of 4)" if ppn.to_i < 4
|
9
|
+
raise "Please specify project" unless @project
|
10
|
+
raise "Error: cores per node cannot excede 4" if ppn.to_i > 4
|
11
|
+
# raise "Error: project (i.e. budget) not specified" unless @project
|
12
|
+
ppn ||= 4
|
13
|
+
if @wall_mins
|
14
|
+
ep @wall_mins
|
15
|
+
hours = (@wall_mins / 60).floor
|
16
|
+
mins = @wall_mins.to_i % 60
|
17
|
+
secs = ((@wall_mins - @wall_mins.to_i) * 60).to_i
|
18
|
+
end
|
19
|
+
eputs "Allotted wall time is " + sprintf("%02d:%02d:%02d", hours, mins, secs)
|
20
|
+
nprocstot = nodes.to_i * ppn.to_i
|
21
|
+
<<EOF
|
22
|
+
#!/bin/bash --login
|
23
|
+
#PBS -N #{executable_name}.#{job_identifier}
|
24
|
+
#PBS -l mppwidth=#{nprocstot}
|
25
|
+
#PBS -l mppnppn=#{ppn}
|
26
|
+
#{@wall_mins ? "#PBS -l walltime=#{sprintf("%02d:%02d:%02d", hours, mins, secs)}" : ""}
|
27
|
+
#{@project ? "#PBS -A #@project" : ""}
|
28
|
+
#PBS -q #{@runner.debug ? "debug" : "regular"}
|
29
|
+
|
30
|
+
### start of jobscript
|
31
|
+
cd $PBS_O_WORKDIR
|
32
|
+
echo "workdir: $PBS_O_WORKDIR"
|
33
|
+
|
34
|
+
echo "Submitting #{nodes}x#{ppn} job on Hector for project #@project..."
|
35
|
+
|
36
|
+
|
37
|
+
EOF
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
module GenericLinux
|
3
|
+
|
4
|
+
|
5
|
+
# @@ruby_command = "ruby1.9"
|
6
|
+
|
7
|
+
def queue_status
|
8
|
+
if rcp.uses_mpi
|
9
|
+
return %x[ps -e -U #{Process.uid} | grep mpi] + %x[ps -e -U #{Process.uid} | grep -G '\\bsh\\b'] + %x[ps -e -U #{Process.uid} -o pid,user,cmd | grep coderunner].grep(/launch/)
|
10
|
+
else
|
11
|
+
# ep executable_name
|
12
|
+
return %x[ps -e -U #{Process.uid} | grep '#{executable_name}'] + %x[ps -e -U #{Process.uid} | grep -G '\\bsh\\b']
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def run_command
|
17
|
+
if rcp.uses_mpi
|
18
|
+
return %[time mpirun -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
19
|
+
else
|
20
|
+
return %[#{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def execute
|
25
|
+
log 'execute_submission'
|
26
|
+
# ppipe = PPipe.new(2, false)
|
27
|
+
# trap(0){}
|
28
|
+
# ppipe.fork do
|
29
|
+
# trap(0, 'IGNORE')
|
30
|
+
# trap(2, 'IGNORE')
|
31
|
+
# pid = system(run_command + " & ") # fork{exec run_command}
|
32
|
+
# ppipe.w_send(:pid, pid, tp: 0)
|
33
|
+
# Thread.new{Process.wait(pid)} # Need to pick up the dead process when it finishes
|
34
|
+
# end
|
35
|
+
# pid = ppipe.w_recv(:pid)
|
36
|
+
# ppipe.die
|
37
|
+
# eputs "HERE"
|
38
|
+
|
39
|
+
# trap(0){}
|
40
|
+
# trap(2, 'IGNORE')
|
41
|
+
# pid = spawn("trap '' 2 && trap '' 0 && " + run_command + " & ")
|
42
|
+
if prefix = ENV['CODE_RUNNER_LAUNCHER']
|
43
|
+
launch_id = "#{Time.now.to_i}#{$$}"
|
44
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{launch_id}"
|
45
|
+
File.open(fname + '.start', 'w'){|file| file.puts "cd #{Dir.pwd};#{run_command}"}
|
46
|
+
sleep 1 until FileTest.exist? fname + '.pid'
|
47
|
+
pid = File.read(fname + '.pid').to_i
|
48
|
+
FileUtils.rm fname + '.pid'
|
49
|
+
else
|
50
|
+
pid = Kernel.spawn(run_command + " & ")
|
51
|
+
end
|
52
|
+
|
53
|
+
# require 'rbconfig'
|
54
|
+
# pid = spawn %[#{Config::CONFIG['bindir']}/#{Config::CONFIG['ruby_install_name']} -e 'puts fork{exec("#{run_command}")}' &]
|
55
|
+
|
56
|
+
# eputs "THERE"
|
57
|
+
# Thread.new{Process.wait(pid)}
|
58
|
+
# sleep 0.2
|
59
|
+
return pid
|
60
|
+
end
|
61
|
+
|
62
|
+
def cancel_job
|
63
|
+
children = `ps --ppid #@job_no`.scan(/^\s*(\d+)/).map{|match| match[0].to_i}
|
64
|
+
system "kill #{@job_no}"
|
65
|
+
children.each do |pid|
|
66
|
+
system "kill #{pid}"
|
67
|
+
end
|
68
|
+
# `kill #{@job_no}`
|
69
|
+
end
|
70
|
+
|
71
|
+
def error_file
|
72
|
+
return "#{executable_name}.#{job_identifier}.e"
|
73
|
+
end
|
74
|
+
|
75
|
+
def output_file
|
76
|
+
return "#{executable_name}.#{job_identifier}.o"
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
module GenericlinuxTestsystem
|
3
|
+
|
4
|
+
@@ruby_command = "ruby1.9"
|
5
|
+
|
6
|
+
def queue_status
|
7
|
+
# if @@no_run
|
8
|
+
# return ""
|
9
|
+
# else
|
10
|
+
return %x[ps] #%x[top -b -n 1 -u #{Process.uid} | grep #{@@executable_name}]
|
11
|
+
# end
|
12
|
+
# top runs very slowly. If you have a system (for example your laptop) where you will never run simulations, only analyse them, replace this command with %x[ps] and code runner will run much faster.
|
13
|
+
end
|
14
|
+
|
15
|
+
def run_command
|
16
|
+
if @@uses_mpi
|
17
|
+
return %[mpirun -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string}]
|
18
|
+
else
|
19
|
+
return %[#{executable_location}/#{executable_name} #{parameter_string}]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def execute
|
24
|
+
log 'execute_submission'
|
25
|
+
fork{exec run_command}
|
26
|
+
sleep 0.2
|
27
|
+
end
|
28
|
+
|
29
|
+
def cancel_job
|
30
|
+
`kill #{@job_no}`
|
31
|
+
end
|
32
|
+
|
33
|
+
def error_file
|
34
|
+
return nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def output_file
|
38
|
+
return nil
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
|
3
|
+
# System module for Juropa and HPC-FF
|
4
|
+
|
5
|
+
module Juropa
|
6
|
+
|
7
|
+
def queue_status
|
8
|
+
if prefix = ENV['CODE_RUNNER_LAUNCHER']
|
9
|
+
%x[cat #{ENV['HOME']}/.coderunner_to_launch_#{prefix}/queue_status.txt] +
|
10
|
+
%x[cat #{ENV['HOME']}/.coderunner_to_launch_#{prefix}/queue_status2.txt]
|
11
|
+
else
|
12
|
+
%x[qstat | grep $USER]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def run_command
|
17
|
+
# "msub #{batch_script_file}"
|
18
|
+
if ENV['CODE_RUNNER_LAUNCHER']
|
19
|
+
return %[mpiexec -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
20
|
+
else
|
21
|
+
"mpiexec -np $NSLOTS #{executable_location}/#{executable_name} #{parameter_string}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def execute
|
26
|
+
|
27
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
28
|
+
launch_id = "#{Time.now.to_i}#{$$}"
|
29
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{launch_id}"
|
30
|
+
File.open(fname + '.start', 'w'){|file| file.puts "cd #{Dir.pwd};#{run_command}"}
|
31
|
+
sleep 1 until FileTest.exist? fname + '.pid'
|
32
|
+
pid = File.read(fname + '.pid').to_i
|
33
|
+
FileUtils.rm fname + '.pid'
|
34
|
+
return pid
|
35
|
+
else
|
36
|
+
File.open(batch_script_file, 'w'){|file| file.puts batch_script + run_command + "\n"}
|
37
|
+
jn = %x[msub #{batch_script_file}].scan(/(\d+)\s*\Z/).flatten
|
38
|
+
if jn[0]
|
39
|
+
return jn[0].to_i
|
40
|
+
else
|
41
|
+
return nil
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def batch_script_file
|
47
|
+
"#{executable_name}_#{job_identifier}.sh"
|
48
|
+
end
|
49
|
+
|
50
|
+
def batch_script
|
51
|
+
nodes, ppn = @nprocs.split(/x/)
|
52
|
+
ppn ||= 8
|
53
|
+
if @wall_mins
|
54
|
+
ep @wall_mins
|
55
|
+
hours = (@wall_mins / 60).floor
|
56
|
+
mins = @wall_mins.to_i % 60
|
57
|
+
secs = ((@wall_mins - @wall_mins.to_i) * 60).to_i
|
58
|
+
end
|
59
|
+
eputs "Allotted wall time is " + sprintf("%02d:%02d:%02d", hours, mins, secs)
|
60
|
+
<<EOF
|
61
|
+
#!/bin/bash -x
|
62
|
+
#MSUB -l nodes=#{nodes}:ppn=#{ppn}
|
63
|
+
#MSUB -N #{executable_name}.#{job_identifier}
|
64
|
+
#{@wall_mins ? "#MSUB -l walltime=#{sprintf("%02d:%02d:%02d", hours, mins, secs)}" : ""}
|
65
|
+
|
66
|
+
### start of jobscript
|
67
|
+
cd $PBS_O_WORKDIR
|
68
|
+
echo "workdir: $PBS_O_WORKDIR"
|
69
|
+
NSLOTS=#{nodes.to_i * ppn.to_i}
|
70
|
+
echo "running on $NSLOTS cpus ..."
|
71
|
+
|
72
|
+
EOF
|
73
|
+
|
74
|
+
#MSUB -e #{Dir.pwd}/#{error_file}
|
75
|
+
# if keyword omitted : default is submitting directory
|
76
|
+
#MSUB -o #{Dir.pwd}/#{output_file}
|
77
|
+
# if keyword omitted : default is submitting directory
|
78
|
+
end
|
79
|
+
|
80
|
+
def cancel_job
|
81
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
82
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{$$}.stop"
|
83
|
+
File.open(fname, 'w'){|file| file.puts "\n"}
|
84
|
+
else
|
85
|
+
`canceljob #{@job_no}`
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def error_file
|
90
|
+
#For backwards compatibility
|
91
|
+
return "#{executable_name}.sh.e" if kind_of? CodeRunner::Run and [:Completed, :Failed].include? @status
|
92
|
+
return "#{executable_name}.#{job_identifier}.e#@job_no"
|
93
|
+
end
|
94
|
+
|
95
|
+
def output_file
|
96
|
+
return "#{executable_name}.sh.o" if kind_of? CodeRunner::Run and [:Completed, :Failed].include? @status
|
97
|
+
return "#{executable_name}.#{job_identifier}.o#@job_no"
|
98
|
+
end
|
99
|
+
|
100
|
+
def get_run_status(job_no, current_status)
|
101
|
+
if ENV['CODE_RUNNER_LAUNCHER']
|
102
|
+
return :Unknown
|
103
|
+
end
|
104
|
+
line = current_status.split(/\n/).grep(Regexp.new(job_no.to_s))[0]
|
105
|
+
unless line
|
106
|
+
return :Unknown
|
107
|
+
else
|
108
|
+
if line =~ /\sQ\s/
|
109
|
+
return :Queueing
|
110
|
+
elsif line =~ /\sR\s/
|
111
|
+
return :Running
|
112
|
+
elsif line =~ /\sC\s/
|
113
|
+
return :Unknown
|
114
|
+
else
|
115
|
+
ep 'line', line
|
116
|
+
raise 'Could not get run status'
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
module Macosx
|
3
|
+
# @@ruby_command = "ruby1.9"
|
4
|
+
|
5
|
+
def queue_status
|
6
|
+
%x[ps].grep(Regexp.new(Regexp.escape(executable_name))) #Can't put grep in the shell command because it will grep itself - OS X displays the entire command in ps!
|
7
|
+
end
|
8
|
+
|
9
|
+
def run_command
|
10
|
+
if rcp.uses_mpi
|
11
|
+
return %[mpirun -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
12
|
+
else
|
13
|
+
return %[#{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def execute
|
18
|
+
log 'execute_submission'
|
19
|
+
if ENV['CODE_RUNNER_LAUNCHER']
|
20
|
+
launch_id = "#{Time.now.to_i}#{$$}"
|
21
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch/#{launch_id}"
|
22
|
+
File.open(fname + '.start', 'w'){|file| file.puts "cd #{Dir.pwd};#{run_command}"}
|
23
|
+
sleep 1 until FileTest.exist? fname + '.pid'
|
24
|
+
pid = File.read(fname + '.pid').to_i
|
25
|
+
FileUtils.rm fname + '.pid'
|
26
|
+
else
|
27
|
+
pid = Kernel.spawn(run_command + " & ")
|
28
|
+
end
|
29
|
+
|
30
|
+
return nil # pid
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def cancel_job
|
35
|
+
`kill #{@job_no}`
|
36
|
+
end
|
37
|
+
|
38
|
+
def error_file
|
39
|
+
return executable_name + ".sh.e"
|
40
|
+
end
|
41
|
+
|
42
|
+
def output_file
|
43
|
+
return executable_name + ".sh.o"
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
|
@@ -0,0 +1,133 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
module Moab
|
3
|
+
|
4
|
+
def self.configure_environment
|
5
|
+
eputs "Configuring Hector"
|
6
|
+
conf = <<EOF
|
7
|
+
eval `modulecmd bash swap PrgEnv-pgi PrgEnv-gnu`
|
8
|
+
eval `modulecmd bash load fftw/3.2.2`
|
9
|
+
export XTPE_LINK_TYPE=dynamic
|
10
|
+
export LD_LIBRARY_PATH=/opt/xt-libsci/10.4.1/gnu/lib/44:$LD_LIBRARY_PATH
|
11
|
+
EOF
|
12
|
+
Kernel.change_environment_with_shell_script(conf)
|
13
|
+
end
|
14
|
+
|
15
|
+
def queue_status
|
16
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
17
|
+
%x[cat #{ENV['HOME']}/.coderunner_to_launch_#{prefix}/queue_status.txt] +
|
18
|
+
%x[cat #{ENV['HOME']}/.coderunner_to_launch_#{prefix}/queue_status2.txt]
|
19
|
+
else
|
20
|
+
%x[qstat | grep $USER]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def run_command
|
25
|
+
# "qsub #{batch_script_file}"
|
26
|
+
if (ENV['CODE_RUNNER_LAUNCHER'].size > 0 rescue false)
|
27
|
+
return %[mpiexec -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
28
|
+
else
|
29
|
+
nodes, ppn = @nprocs.split(/x/)
|
30
|
+
nprocstot = nodes.to_i * ppn.to_i
|
31
|
+
"aprun -n #{nprocstot} -N #{ppn} #{executable_location}/#{executable_name} #{parameter_string}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def execute
|
36
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
37
|
+
launch_id = "#{Time.now.to_i}#{$$}"
|
38
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{launch_id}"
|
39
|
+
File.open(fname + '.start', 'w'){|file| file.puts "cd #{Dir.pwd};#{run_command}"}
|
40
|
+
sleep 1 until FileTest.exist? fname + '.pid'
|
41
|
+
pid = File.read(fname + '.pid').to_i
|
42
|
+
FileUtils.rm fname + '.pid'
|
43
|
+
return pid
|
44
|
+
else
|
45
|
+
File.open(batch_script_file, 'w'){|file| file.puts batch_script + run_command + "\n"}
|
46
|
+
pid = %x[qsub #{batch_script_file}].to_i
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def batch_script_file
|
51
|
+
"#{executable_name}_#{job_identifier}.sh"
|
52
|
+
end
|
53
|
+
|
54
|
+
def max_ppn
|
55
|
+
raise "Please define max_ppn for your system"
|
56
|
+
end
|
57
|
+
|
58
|
+
def batch_script
|
59
|
+
|
60
|
+
nodes, ppn = @nprocs.split(/x/)
|
61
|
+
(eputs "Warning: number of nodes is not recommended (8, 16, 32, 64, 128, 256, 512, 1024, 2048 or 4096 recommended)"; sleep 0.2) unless [8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096].include? nodes.to_i
|
62
|
+
(eputs "Warning: number of wall mins is not recommended (20, 60, 180, 360, 720 recomended)"; sleep 0.2) unless [20, 60, 180, 360, 720].include? @wall_mins.to_i
|
63
|
+
eputs "Warning: Underuse of nodes (#{ppn} cores per node instead of #{max_ppn})" if ppn.to_i < max_ppn
|
64
|
+
raise "Error: cores per node cannot excede #{max_ppn}" if ppn.to_i > max_ppn
|
65
|
+
# raise "Error: project (i.e. budget) not specified" unless @project
|
66
|
+
ppn ||= max_ppn
|
67
|
+
if @wall_mins
|
68
|
+
ep @wall_mins
|
69
|
+
hours = (@wall_mins / 60).floor
|
70
|
+
mins = @wall_mins.to_i % 60
|
71
|
+
secs = ((@wall_mins - @wall_mins.to_i) * 60).to_i
|
72
|
+
end
|
73
|
+
eputs "Allotted wall time is " + sprintf("%02d:%02d:%02d", hours, mins, secs)
|
74
|
+
nprocstot = nodes.to_i * ppn.to_i
|
75
|
+
<<EOF
|
76
|
+
#!/bin/bash --login
|
77
|
+
#PBS -N #{executable_name}.#{job_identifier}
|
78
|
+
#PBS -l mppwidth=#{nprocstot}
|
79
|
+
#PBS -l mppnppn=#{ppn}
|
80
|
+
#{@wall_mins ? "#PBS -l walltime=#{sprintf("%02d:%02d:%02d", hours, mins, secs)}" : ""}
|
81
|
+
#{@project ? "#PBS -A #@project" : ""}
|
82
|
+
|
83
|
+
### start of jobscript
|
84
|
+
cd $PBS_O_WORKDIR
|
85
|
+
echo "workdir: $PBS_O_WORKDIR"
|
86
|
+
|
87
|
+
echo "Submitting #{nodes}x#{ppn} job on Hector for project #@project..."
|
88
|
+
|
89
|
+
|
90
|
+
EOF
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
def cancel_job
|
95
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
96
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{$$}.stop"
|
97
|
+
File.open(fname, 'w'){|file| file.puts "\n"}
|
98
|
+
else
|
99
|
+
`qdel #{@job_no}`
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def error_file
|
104
|
+
return "#{executable_name}.#{job_identifier}.e#@job_no"
|
105
|
+
end
|
106
|
+
|
107
|
+
def output_file
|
108
|
+
return "#{executable_name}.#{job_identifier}.o#@job_no"
|
109
|
+
end
|
110
|
+
|
111
|
+
def get_run_status(job_no, current_status)
|
112
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
113
|
+
return :Unknown
|
114
|
+
end
|
115
|
+
line = current_status.split(/\n/).grep(Regexp.new(job_no.to_s))[0]
|
116
|
+
unless line
|
117
|
+
return :Unknown
|
118
|
+
else
|
119
|
+
if line =~ /\sQ\s/
|
120
|
+
return :Queueing
|
121
|
+
elsif line =~ /\sR\s/
|
122
|
+
return :Running
|
123
|
+
elsif line =~ /\sC\s/
|
124
|
+
return :Unknown
|
125
|
+
else
|
126
|
+
ep 'line', line
|
127
|
+
raise 'Could not get run status'
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
module NewHydra
|
3
|
+
|
4
|
+
def queue_status
|
5
|
+
%x[qstat | grep $LOGNAME]
|
6
|
+
end
|
7
|
+
|
8
|
+
def run_command
|
9
|
+
if @runner.debug
|
10
|
+
return %[mpisubshort "40mins" #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} ]
|
11
|
+
else
|
12
|
+
return %[mpisubnoquotes "0.5-10 hrs" #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string}]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def execute
|
17
|
+
system run_command
|
18
|
+
end
|
19
|
+
|
20
|
+
def cancel_job
|
21
|
+
`qdel #{@job_no}`
|
22
|
+
end
|
23
|
+
|
24
|
+
def error_file
|
25
|
+
return "#{executable_name}.sh.e#{@job_no}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def output_file
|
29
|
+
return "#{executable_name}.sh.o#{@job_no}"
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
class CodeRunner
|
2
|
+
# A module to let CodeRunner run using the SLURM queue system,
|
3
|
+
# used on certain HPC systems.
|
4
|
+
module Slurm
|
5
|
+
|
6
|
+
def queue_status
|
7
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
8
|
+
%x[cat #{ENV['HOME']}/.coderunner_to_launch_#{prefix}/queue_status.txt] +
|
9
|
+
%x[cat #{ENV['HOME']}/.coderunner_to_launch_#{prefix}/queue_status2.txt]
|
10
|
+
else
|
11
|
+
%x[squeue | grep #{ENV['USER'][0..7]}]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def run_command
|
16
|
+
# "qsub #{batch_script_file}"
|
17
|
+
if (ENV['CODE_RUNNER_LAUNCHER'].size > 0 rescue false)
|
18
|
+
return %[mpiexec -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file} 2> #{error_file}]
|
19
|
+
else
|
20
|
+
nodes, ppn = @nprocs.split(/x/)
|
21
|
+
nprocstot = nodes.to_i * ppn.to_i
|
22
|
+
"mpirun -np #{nprocstot} #{executable_location}/#{executable_name} #{parameter_string}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def execute
|
27
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
28
|
+
launch_id = "#{Time.now.to_i}#{$$}"
|
29
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{launch_id}"
|
30
|
+
File.open(fname + '.start', 'w'){|file| file.puts "cd #{Dir.pwd};#{run_command}"}
|
31
|
+
sleep 1 until FileTest.exist? fname + '.pid'
|
32
|
+
pid = File.read(fname + '.pid').to_i
|
33
|
+
FileUtils.rm fname + '.pid'
|
34
|
+
return pid
|
35
|
+
else
|
36
|
+
File.open(batch_script_file, 'w'){|file| file.puts batch_script + run_command + "\n"}
|
37
|
+
pid = %x[sbatch #{batch_script_file}].to_i
|
38
|
+
return nil
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def batch_script_file
|
43
|
+
"#{executable_name}.#{job_identifier}.sh"
|
44
|
+
end
|
45
|
+
|
46
|
+
def max_ppn
|
47
|
+
raise "Please define max_ppn for your system"
|
48
|
+
end
|
49
|
+
|
50
|
+
def batch_script
|
51
|
+
|
52
|
+
nodes, ppn = @nprocs.split(/x/)
|
53
|
+
eputs "Warning: Underuse of nodes (#{ppn} cores per node instead of #{max_ppn})" if ppn.to_i < max_ppn
|
54
|
+
raise "Error: cores per node cannot excede #{max_ppn}" if ppn.to_i > max_ppn
|
55
|
+
# raise "Error: project (i.e. budget) not specified" unless @project
|
56
|
+
ppn ||= max_ppn
|
57
|
+
raise "Please specify wall minutes" unless @wall_mins
|
58
|
+
if @wall_mins
|
59
|
+
ep @wall_mins
|
60
|
+
hours = (@wall_mins / 60).floor
|
61
|
+
mins = @wall_mins.to_i % 60
|
62
|
+
secs = ((@wall_mins - @wall_mins.to_i) * 60).to_i
|
63
|
+
end
|
64
|
+
eputs "Allotted wall time is " + sprintf("%02d:%02d:%02d", hours, mins, secs)
|
65
|
+
nprocstot = nodes.to_i * ppn.to_i
|
66
|
+
<<EOF
|
67
|
+
#!/bin/bash
|
68
|
+
#SBATCH -J #{executable_name}.#{job_identifier} # jobname
|
69
|
+
#SBATCH -N #{nodes.to_i} # number of nodes
|
70
|
+
#SBATCH -n #{nprocstot} # number of tasks
|
71
|
+
#SBATCH -o #{executable_name}.#{job_identifier}.o%j # strout filename (%j is jobid)
|
72
|
+
#SBATCH -e #{executable_name}.#{job_identifier}.e%j # stderr filename (%j is jobid)
|
73
|
+
#{@project ? "#SBATCH -A #@project # project to charge" : ""}
|
74
|
+
#{@wall_mins ? "#SBATCH -t #{sprintf("%02d:%02d:%02d", hours, mins, secs)} # walltime" : ""}
|
75
|
+
|
76
|
+
#{code_run_environment}
|
77
|
+
echo "Submitting #{nodes}x#{ppn} job on #{CodeRunner::SYS} for project #@project..."
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
EOF
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
def cancel_job
|
86
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
87
|
+
fname = ENV['HOME'] + "/.coderunner_to_launch_#{prefix}/#{$$}.stop"
|
88
|
+
File.open(fname, 'w'){|file| file.puts "\n"}
|
89
|
+
else
|
90
|
+
`scancel #{@job_no}`
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def error_file
|
95
|
+
return "#{executable_name}.#{job_identifier}.e#@job_no"
|
96
|
+
end
|
97
|
+
|
98
|
+
def output_file
|
99
|
+
return "#{executable_name}.#{job_identifier}.o#@job_no"
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_run_status(job_no, current_status)
|
103
|
+
if ((prefix = ENV['CODE_RUNNER_LAUNCHER']).size > 0 rescue false)
|
104
|
+
return :Unknown
|
105
|
+
end
|
106
|
+
line = current_status.split(/\n/).grep(Regexp.new(job_no.to_s))[0]
|
107
|
+
unless line
|
108
|
+
return :Unknown
|
109
|
+
else
|
110
|
+
if line =~ /\sPD\s/
|
111
|
+
return :Queueing
|
112
|
+
elsif line =~ /\sR\s/
|
113
|
+
return :Running
|
114
|
+
elsif line =~ /\sC\s/
|
115
|
+
return :Unknown
|
116
|
+
else
|
117
|
+
ep 'line', line
|
118
|
+
raise 'Could not get run status'
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: coderunner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -178,6 +178,16 @@ files:
|
|
178
178
|
- lib/coderunner/long_regexen.rb
|
179
179
|
- lib/coderunner/merged_code_runner.rb
|
180
180
|
- lib/coderunner/run.rb
|
181
|
+
- lib/coderunner/system_modules/franklin.rb
|
182
|
+
- lib/coderunner/system_modules/generic_linux.rb
|
183
|
+
- lib/coderunner/system_modules/genericlinux_testsystem.rb
|
184
|
+
- lib/coderunner/system_modules/hector.rb
|
185
|
+
- lib/coderunner/system_modules/helios.rb
|
186
|
+
- lib/coderunner/system_modules/juropa.rb
|
187
|
+
- lib/coderunner/system_modules/macosx.rb
|
188
|
+
- lib/coderunner/system_modules/moab.rb
|
189
|
+
- lib/coderunner/system_modules/new_hydra.rb
|
190
|
+
- lib/coderunner/system_modules/slurm.rb
|
181
191
|
- test/helper.rb
|
182
192
|
- test/test_coderunner.rb
|
183
193
|
homepage: http://coderunner.sourceforge.net
|
@@ -195,7 +205,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
195
205
|
version: '0'
|
196
206
|
segments:
|
197
207
|
- 0
|
198
|
-
hash: -
|
208
|
+
hash: -3340194866062964858
|
199
209
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
200
210
|
none: false
|
201
211
|
requirements:
|