ood_core 0.11.4 → 0.15.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +30 -0
- data/CHANGELOG.md +55 -1
- data/README.md +7 -6
- data/lib/ood_core/job/adapters/ccq.rb +274 -0
- data/lib/ood_core/job/adapters/helper.rb +20 -1
- data/lib/ood_core/job/adapters/kubernetes.rb +193 -0
- data/lib/ood_core/job/adapters/kubernetes/batch.rb +354 -0
- data/lib/ood_core/job/adapters/kubernetes/helper.rb +294 -0
- data/lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb +9 -0
- data/lib/ood_core/job/adapters/kubernetes/resources.rb +58 -0
- data/lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb +158 -0
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +10 -1
- data/lib/ood_core/job/adapters/linux_host/templates/script_wrapper.erb.sh +18 -15
- data/lib/ood_core/job/adapters/lsf.rb +1 -0
- data/lib/ood_core/job/adapters/lsf/batch.rb +5 -3
- data/lib/ood_core/job/adapters/lsf/helper.rb +22 -22
- data/lib/ood_core/job/adapters/pbspro.rb +54 -34
- data/lib/ood_core/job/adapters/sge/batch.rb +6 -5
- data/lib/ood_core/job/adapters/sge/helper.rb +19 -19
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +35 -4
- data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +25 -2
- data/lib/ood_core/job/adapters/slurm.rb +79 -38
- data/lib/ood_core/job/adapters/torque.rb +30 -23
- data/lib/ood_core/job/adapters/torque/batch.rb +29 -12
- data/lib/ood_core/job/script.rb +9 -1
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -1
- metadata +33 -6
- data/.travis.yml +0 -9
@@ -15,7 +15,7 @@ end
|
|
15
15
|
class OodCore::Job::Adapters::Sge::Batch
|
16
16
|
using OodCore::Refinements::HashExtensions
|
17
17
|
|
18
|
-
attr_reader :bin, :bin_overrides, :conf, :cluster, :helper
|
18
|
+
attr_reader :bin, :bin_overrides, :conf, :cluster, :helper, :submit_host, :strict_host_checking
|
19
19
|
|
20
20
|
require "ood_core/job/adapters/sge/qstat_xml_j_r_listener"
|
21
21
|
require "ood_core/job/adapters/sge/qstat_xml_r_listener"
|
@@ -36,6 +36,8 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
36
36
|
@bin = Pathname.new(config.fetch(:bin, nil).to_s)
|
37
37
|
@sge_root = Pathname.new(config[:sge_root] || ENV['SGE_ROOT'] || "/var/lib/gridengine")
|
38
38
|
@bin_overrides = config.fetch(:bin_overrides, {})
|
39
|
+
@submit_host = config.fetch(:submit_host, "")
|
40
|
+
@strict_host_checking = config.fetch(:strict_host_checking, true)
|
39
41
|
|
40
42
|
# FIXME: hack as this affects env of the process!
|
41
43
|
ENV['SGE_ROOT'] = @sge_root.to_s
|
@@ -62,7 +64,7 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
62
64
|
def get_all(owner: nil)
|
63
65
|
listener = QstatXmlRListener.new
|
64
66
|
argv = ['qstat', '-r', '-xml']
|
65
|
-
argv
|
67
|
+
argv.concat ['-u', owner] unless owner.nil?
|
66
68
|
REXML::Parsers::StreamParser.new(call(*argv), listener).parse
|
67
69
|
|
68
70
|
listener.parsed_jobs.map{
|
@@ -166,11 +168,10 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
166
168
|
# Call a forked SGE command for a given batch server
|
167
169
|
def call(cmd, *args, env: {}, stdin: "", chdir: nil)
|
168
170
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
|
169
|
-
args = args.map(&:to_s)
|
170
|
-
|
171
171
|
env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
172
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking, env)
|
172
173
|
chdir ||= "."
|
173
|
-
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
|
174
|
+
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s, chdir: chdir.to_s)
|
174
175
|
s.success? ? o : raise(Error, e)
|
175
176
|
end
|
176
177
|
|
@@ -17,38 +17,38 @@ class OodCore::Job::Adapters::Sge::Helper
|
|
17
17
|
raise_error_on_unsupported_args(script, after: after, afterok: afterok, afternotok: afternotok, afterany: afterany)
|
18
18
|
|
19
19
|
args = []
|
20
|
-
args
|
21
|
-
args
|
22
|
-
script.job_environment.each_pair {|k, v| args
|
23
|
-
args
|
20
|
+
args.concat ['-h'] if script.submit_as_hold
|
21
|
+
args.concat ['-r', 'yes'] if script.rerunnable
|
22
|
+
script.job_environment.each_pair {|k, v| args.concat ['-v', "#{k.to_s}=#{v.to_s}"]} unless script.job_environment.nil?
|
23
|
+
args.concat ["-V"] if script.copy_environment?
|
24
24
|
|
25
25
|
if script.workdir
|
26
|
-
args
|
26
|
+
args.concat ['-wd', script.workdir]
|
27
27
|
elsif ! script_contains_wd_directive?(script.content)
|
28
|
-
args
|
28
|
+
args.concat ['-cwd']
|
29
29
|
end
|
30
30
|
|
31
31
|
on_event_email = []
|
32
32
|
on_event_email << 'b' if script.email_on_started # beginning
|
33
33
|
on_event_email << 'ea' if script.email_on_terminated # end, aborted
|
34
34
|
|
35
|
-
args
|
35
|
+
args.concat ['-M', script.email.first, '-m', on_event_email.join] if script.email && ! on_event_email.empty?
|
36
36
|
|
37
37
|
afterok = Array(afterok).map(&:to_s)
|
38
|
-
args
|
38
|
+
args.concat ['-hold_jid_ad', afterok.join(',')] unless afterok.empty?
|
39
39
|
|
40
40
|
# ignoring email_on_started
|
41
|
-
args
|
42
|
-
args
|
43
|
-
args
|
44
|
-
args
|
45
|
-
args
|
46
|
-
args
|
47
|
-
args
|
48
|
-
args
|
49
|
-
args
|
50
|
-
args
|
51
|
-
args
|
41
|
+
args.concat ['-N', script.job_name] unless script.job_name.nil?
|
42
|
+
args.concat ['-e', script.error_path] unless script.error_path.nil?
|
43
|
+
args.concat ['-o', script.output_path] unless script.output_path.nil?
|
44
|
+
args.concat ['-ar', script.reservation_id] unless script.reservation_id.nil?
|
45
|
+
args.concat ['-q', script.queue_name] unless script.queue_name.nil?
|
46
|
+
args.concat ['-p', script.priority] unless script.priority.nil?
|
47
|
+
args.concat ['-a', script.start_time.strftime('%C%y%m%d%H%M.%S')] unless script.start_time.nil?
|
48
|
+
args.concat ['-l', "h_rt=" + seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
49
|
+
args.concat ['-P', script.accounting_id] unless script.accounting_id.nil?
|
50
|
+
args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
|
51
|
+
args.concat Array.wrap(script.native) if script.native
|
52
52
|
|
53
53
|
args
|
54
54
|
end
|
@@ -28,10 +28,13 @@ class QstatXmlJRListener
|
|
28
28
|
:tasks => [],
|
29
29
|
:status => :queued,
|
30
30
|
:procs => 1,
|
31
|
-
:native => {
|
31
|
+
:native => {
|
32
|
+
:ST_name => ''
|
33
|
+
}
|
32
34
|
}
|
33
35
|
@current_text = nil
|
34
36
|
@current_request = nil
|
37
|
+
@processing_JB_stdout_path_list = false
|
35
38
|
|
36
39
|
@processing_job_array_spec = false
|
37
40
|
@adding_slots = false
|
@@ -42,6 +45,7 @@ class QstatXmlJRListener
|
|
42
45
|
step: 1, # Step can have a default of 1
|
43
46
|
}
|
44
47
|
@running_tasks = []
|
48
|
+
@native_tags = ['JB_job_number', 'JB_job_name', 'JB_version', 'JB_project', 'JB_exec_file', 'JB_script_file', 'JB_script_size', 'JB_submission_time', 'JB_execution_time', 'JB_deadline', 'JB_owner', 'JB_uid', 'JB_group', 'JB_gid', 'JB_account', 'JB_cwd', 'JB_notify', 'JB_type', 'JB_reserve', 'JB_priority', 'JB_jobshare', 'JB_verify', 'JB_checkpoint_attr', 'JB_checkpoint_interval', 'JB_restart']
|
45
49
|
end
|
46
50
|
|
47
51
|
def tag_start(name, attrs)
|
@@ -50,10 +54,17 @@ class QstatXmlJRListener
|
|
50
54
|
toggle_processing_array_spec
|
51
55
|
when 'JB_pe_range'
|
52
56
|
toggle_adding_slots
|
57
|
+
when 'JB_stdout_path_list'
|
58
|
+
@processing_JB_stdout_path_list = true
|
53
59
|
end
|
54
60
|
end
|
55
61
|
|
56
62
|
def tag_end(name)
|
63
|
+
#Add to native hash if in native_tags
|
64
|
+
if (@native_tags.include?(name))
|
65
|
+
@parsed_job[:native][:"#{name}"] = @current_text
|
66
|
+
end
|
67
|
+
|
57
68
|
case name
|
58
69
|
when 'JB_ja_tasks'
|
59
70
|
end_JB_ja_tasks
|
@@ -92,6 +103,10 @@ class QstatXmlJRListener
|
|
92
103
|
toggle_processing_array_spec
|
93
104
|
when 'JB_pe_range'
|
94
105
|
toggle_adding_slots
|
106
|
+
when 'PN_path'
|
107
|
+
end_PN_path
|
108
|
+
when 'ST_name'
|
109
|
+
end_ST_name
|
95
110
|
end
|
96
111
|
end
|
97
112
|
|
@@ -118,7 +133,7 @@ class QstatXmlJRListener
|
|
118
133
|
end
|
119
134
|
|
120
135
|
def end_JB_submission_time
|
121
|
-
@parsed_job[:submission_time] = @current_text.to_i
|
136
|
+
@parsed_job[:submission_time] = ms_to_seconds(@current_text.to_i)
|
122
137
|
end
|
123
138
|
|
124
139
|
def end_JB_ja_tasks
|
@@ -127,7 +142,7 @@ class QstatXmlJRListener
|
|
127
142
|
|
128
143
|
def end_JAT_start_time
|
129
144
|
@parsed_job[:status] = :running
|
130
|
-
@parsed_job[:dispatch_time] = @current_text.to_i
|
145
|
+
@parsed_job[:dispatch_time] = ms_to_seconds(@current_text.to_i)
|
131
146
|
@parsed_job[:wallclock_time] = Time.now.to_i - @parsed_job[:dispatch_time]
|
132
147
|
end
|
133
148
|
|
@@ -153,6 +168,15 @@ class QstatXmlJRListener
|
|
153
168
|
@running_tasks << @current_text
|
154
169
|
end
|
155
170
|
|
171
|
+
def end_PN_path
|
172
|
+
@parsed_job[:native][:PN_path] = @current_text if @processing_JB_stdout_path_list
|
173
|
+
@processing_JB_stdout_path_list = false
|
174
|
+
end
|
175
|
+
|
176
|
+
def end_ST_name
|
177
|
+
@parsed_job[:native][:ST_name] = @parsed_job[:native][:ST_name] + @current_text + ' '
|
178
|
+
end
|
179
|
+
|
156
180
|
def set_job_array_piece(key)
|
157
181
|
@job_array_spec[key] = @current_text if @processing_job_array_spec
|
158
182
|
end
|
@@ -201,5 +225,12 @@ class QstatXmlJRListener
|
|
201
225
|
def set_slots
|
202
226
|
@parsed_job[:procs] = @current_text.to_i
|
203
227
|
end
|
204
|
-
end
|
205
228
|
|
229
|
+
private
|
230
|
+
|
231
|
+
# Some Grid Engines (like UGE) use milliseconds were others use
|
232
|
+
# seconds past the epoch.
|
233
|
+
def ms_to_seconds(raw)
|
234
|
+
raw.digits.length >= 13 ? raw / 1000 : raw
|
235
|
+
end
|
236
|
+
end
|
@@ -24,21 +24,32 @@ class QstatXmlRListener
|
|
24
24
|
@parsed_jobs = []
|
25
25
|
@current_job = {
|
26
26
|
:tasks => [],
|
27
|
-
:native => {
|
27
|
+
:native => {
|
28
|
+
:ST_name => ''
|
29
|
+
}
|
28
30
|
}
|
29
31
|
@current_text = nil
|
32
|
+
@processing_JB_stdout_path_list = false
|
30
33
|
|
31
34
|
@current_request = nil
|
35
|
+
@native_tags = ['JB_job_number', 'JB_job_name', 'JB_version', 'JB_project', 'JB_exec_file', 'JB_script_file', 'JB_script_size', 'JB_submission_time', 'JB_execution_time', 'JB_deadline', 'JB_owner', 'JB_uid', 'JB_group', 'JB_gid', 'JB_account', 'JB_cwd', 'JB_notify', 'JB_type', 'JB_reserve', 'JB_priority', 'JB_jobshare', 'JB_verify', 'JB_checkpoint_attr', 'JB_checkpoint_interval', 'JB_restart']
|
32
36
|
end
|
33
37
|
|
34
38
|
def tag_start(name, attributes)
|
35
39
|
case name
|
36
40
|
when 'hard_request'
|
37
41
|
start_hard_request(attributes)
|
42
|
+
when "JB_stdout_path_list"
|
43
|
+
@processing_JB_stdout_path_list = true
|
38
44
|
end
|
39
45
|
end
|
40
46
|
|
41
47
|
def tag_end(name)
|
48
|
+
#Add text if in native_tags
|
49
|
+
if (@native_tags.include?(name))
|
50
|
+
@current_job[:native][:"#{name}"] = @current_text
|
51
|
+
end
|
52
|
+
|
42
53
|
case name
|
43
54
|
when 'job_list'
|
44
55
|
end_job_list
|
@@ -64,6 +75,10 @@ class QstatXmlRListener
|
|
64
75
|
end_hard_request
|
65
76
|
when 'tasks'
|
66
77
|
add_child_tasks
|
78
|
+
when 'PN_path'
|
79
|
+
end_PN_path
|
80
|
+
when 'ST_name'
|
81
|
+
end_ST_name
|
67
82
|
end
|
68
83
|
end
|
69
84
|
|
@@ -130,6 +145,15 @@ class QstatXmlRListener
|
|
130
145
|
end
|
131
146
|
end
|
132
147
|
|
148
|
+
def end_PN_path
|
149
|
+
@current_job[:native][:PN_path] = @current_text if @processing_JB_stdout_path_list
|
150
|
+
@processing_JB_stdout_path_list = false
|
151
|
+
end
|
152
|
+
|
153
|
+
def end_ST_name
|
154
|
+
@current_job[:native][:ST_name] = @current_job[:native][:ST_name] + @current_text + ' '
|
155
|
+
end
|
156
|
+
|
133
157
|
# Store a completed job and reset current_job for the next pass
|
134
158
|
def end_job_list
|
135
159
|
@parsed_jobs << @current_job
|
@@ -145,4 +169,3 @@ class QstatXmlRListener
|
|
145
169
|
}
|
146
170
|
end
|
147
171
|
end
|
148
|
-
|
@@ -14,13 +14,17 @@ module OodCore
|
|
14
14
|
# @option config [Object] :conf (nil) Path to the slurm conf
|
15
15
|
# @option config [Object] :bin (nil) Path to slurm client binaries
|
16
16
|
# @option config [#to_h] :bin_overrides ({}) Optional overrides to Slurm client executables
|
17
|
+
# @option config [Object] :submit_host ("") Submit job on login node via ssh
|
18
|
+
# @option config [Object] :strict_host_checking (true) Whether to use strict host checking when ssh to submit_host
|
17
19
|
def self.build_slurm(config)
|
18
20
|
c = config.to_h.symbolize_keys
|
19
|
-
cluster
|
20
|
-
conf
|
21
|
-
bin
|
22
|
-
bin_overrides
|
23
|
-
|
21
|
+
cluster = c.fetch(:cluster, nil)
|
22
|
+
conf = c.fetch(:conf, nil)
|
23
|
+
bin = c.fetch(:bin, nil)
|
24
|
+
bin_overrides = c.fetch(:bin_overrides, {})
|
25
|
+
submit_host = c.fetch(:submit_host, "")
|
26
|
+
strict_host_checking = c.fetch(:strict_host_checking, true)
|
27
|
+
slurm = Adapters::Slurm::Batch.new(cluster: cluster, conf: conf, bin: bin, bin_overrides: bin_overrides, submit_host: submit_host, strict_host_checking: strict_host_checking)
|
24
28
|
Adapters::Slurm.new(slurm: slurm)
|
25
29
|
end
|
26
30
|
end
|
@@ -62,18 +66,36 @@ module OodCore
|
|
62
66
|
# @return Hash<String, String>
|
63
67
|
attr_reader :bin_overrides
|
64
68
|
|
69
|
+
# The login node where the job is submitted via ssh
|
70
|
+
# @example owens.osc.edu
|
71
|
+
# @return [String] The login node
|
72
|
+
attr_reader :submit_host
|
73
|
+
|
74
|
+
# Wheter to use strict host checking when ssh to submit_host
|
75
|
+
# @example false
|
76
|
+
# @return [Bool]; true if empty
|
77
|
+
attr_reader :strict_host_checking
|
78
|
+
|
65
79
|
# The root exception class that all Slurm-specific exceptions inherit
|
66
80
|
# from
|
67
81
|
class Error < StandardError; end
|
68
82
|
|
83
|
+
# An error indicating the slurm command timed out
|
84
|
+
class SlurmTimeoutError < Error; end
|
85
|
+
|
69
86
|
# @param cluster [#to_s, nil] the cluster name
|
70
87
|
# @param conf [#to_s, nil] path to the slurm conf
|
71
88
|
# @param bin [#to_s] path to slurm installation binaries
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
@
|
89
|
+
# @param bin_overrides [#to_h] a hash of bin ovverides to be used in job
|
90
|
+
# @param submit_host [#to_s] Submits the job on a login node via ssh
|
91
|
+
# @param strict_host_checking [Bool] Whether to use strict host checking when ssh to submit_host
|
92
|
+
def initialize(cluster: nil, bin: nil, conf: nil, bin_overrides: {}, submit_host: "", strict_host_checking: true)
|
93
|
+
@cluster = cluster && cluster.to_s
|
94
|
+
@conf = conf && Pathname.new(conf.to_s)
|
95
|
+
@bin = Pathname.new(bin.to_s)
|
96
|
+
@bin_overrides = bin_overrides
|
97
|
+
@submit_host = submit_host.to_s
|
98
|
+
@strict_host_checking = strict_host_checking
|
77
99
|
end
|
78
100
|
|
79
101
|
# Get a list of hashes detailing each of the jobs on the batch server
|
@@ -128,6 +150,9 @@ module OodCore
|
|
128
150
|
end
|
129
151
|
jobs
|
130
152
|
end
|
153
|
+
rescue SlurmTimeoutError
|
154
|
+
# TODO: could use a log entry here
|
155
|
+
return [{ id: id, state: 'undetermined' }]
|
131
156
|
end
|
132
157
|
|
133
158
|
def squeue_fields(attrs)
|
@@ -148,9 +173,9 @@ module OodCore
|
|
148
173
|
#TODO: write some barebones test for this? like 2 options and id or no id
|
149
174
|
def squeue_args(id: "", owner: nil, options: [])
|
150
175
|
args = ["--all", "--states=all", "--noconvert"]
|
151
|
-
args
|
152
|
-
args
|
153
|
-
args
|
176
|
+
args.concat ["-o", "#{RECORD_SEPARATOR}#{options.join(UNIT_SEPARATOR)}"]
|
177
|
+
args.concat ["-u", owner.to_s] unless owner.to_s.empty?
|
178
|
+
args.concat ["-j", id.to_s] unless id.to_s.empty?
|
154
179
|
args
|
155
180
|
end
|
156
181
|
|
@@ -275,12 +300,27 @@ module OodCore
|
|
275
300
|
# Call a forked Slurm command for a given cluster
|
276
301
|
def call(cmd, *args, env: {}, stdin: "")
|
277
302
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
|
303
|
+
|
278
304
|
args = args.map(&:to_s)
|
279
|
-
args
|
305
|
+
args.concat ["-M", cluster] if cluster
|
306
|
+
|
280
307
|
env = env.to_h
|
281
308
|
env["SLURM_CONF"] = conf.to_s if conf
|
282
|
-
|
283
|
-
|
309
|
+
|
310
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
|
311
|
+
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
|
312
|
+
s.success? ? interpret_and_raise(o, e) : raise(Error, e)
|
313
|
+
end
|
314
|
+
|
315
|
+
# Helper function to raise an error based on the contents of stderr.
|
316
|
+
# Slurm exits 0 even when the command fails, so we need to interpret stderr
|
317
|
+
# to see if the command was actually successful.
|
318
|
+
def interpret_and_raise(stdout, stderr)
|
319
|
+
return stdout if stderr.empty?
|
320
|
+
|
321
|
+
raise SlurmTimeoutError, stderr if /^slurm_load_jobs error: Socket timed out/.match(stderr)
|
322
|
+
|
323
|
+
stdout
|
284
324
|
end
|
285
325
|
|
286
326
|
def squeue_attrs_for_info_attrs(attrs)
|
@@ -358,30 +398,31 @@ module OodCore
|
|
358
398
|
# Set sbatch options
|
359
399
|
args = []
|
360
400
|
# ignore args, don't know how to do this for slurm
|
361
|
-
args
|
362
|
-
args
|
363
|
-
args
|
364
|
-
args
|
401
|
+
args.concat ["-H"] if script.submit_as_hold
|
402
|
+
args.concat (script.rerunnable ? ["--requeue"] : ["--no-requeue"]) unless script.rerunnable.nil?
|
403
|
+
args.concat ["-D", script.workdir.to_s] unless script.workdir.nil?
|
404
|
+
args.concat ["--mail-user", script.email.join(",")] unless script.email.nil?
|
365
405
|
if script.email_on_started && script.email_on_terminated
|
366
|
-
args
|
406
|
+
args.concat ["--mail-type", "ALL"]
|
367
407
|
elsif script.email_on_started
|
368
|
-
args
|
408
|
+
args.concat ["--mail-type", "BEGIN"]
|
369
409
|
elsif script.email_on_terminated
|
370
|
-
args
|
410
|
+
args.concat ["--mail-type", "END"]
|
371
411
|
elsif script.email_on_started == false && script.email_on_terminated == false
|
372
|
-
args
|
412
|
+
args.concat ["--mail-type", "NONE"]
|
373
413
|
end
|
374
|
-
args
|
375
|
-
args
|
376
|
-
args
|
377
|
-
args
|
378
|
-
args
|
379
|
-
args
|
380
|
-
args
|
381
|
-
args
|
382
|
-
args
|
383
|
-
args
|
384
|
-
args
|
414
|
+
args.concat ["-J", script.job_name] unless script.job_name.nil?
|
415
|
+
args.concat ["-i", script.input_path] unless script.input_path.nil?
|
416
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
417
|
+
args.concat ["-e", script.error_path] unless script.error_path.nil?
|
418
|
+
args.concat ["--reservation", script.reservation_id] unless script.reservation_id.nil?
|
419
|
+
args.concat ["-p", script.queue_name] unless script.queue_name.nil?
|
420
|
+
args.concat ["--priority", script.priority] unless script.priority.nil?
|
421
|
+
args.concat ["--begin", script.start_time.localtime.strftime("%C%y-%m-%dT%H:%M:%S")] unless script.start_time.nil?
|
422
|
+
args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
|
423
|
+
args.concat ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
424
|
+
args.concat ['-a', script.job_array_request] unless script.job_array_request.nil?
|
425
|
+
args.concat ['--qos', script.qos] unless script.qos.nil?
|
385
426
|
# ignore nodes, don't know how to do this for slurm
|
386
427
|
|
387
428
|
# Set dependencies
|
@@ -390,14 +431,14 @@ module OodCore
|
|
390
431
|
depend << "afterok:#{afterok.join(":")}" unless afterok.empty?
|
391
432
|
depend << "afternotok:#{afternotok.join(":")}" unless afternotok.empty?
|
392
433
|
depend << "afterany:#{afterany.join(":")}" unless afterany.empty?
|
393
|
-
args
|
434
|
+
args.concat ["-d", depend.join(",")] unless depend.empty?
|
394
435
|
|
395
436
|
# Set environment variables
|
396
437
|
env = script.job_environment || {}
|
397
|
-
args
|
438
|
+
args.concat ["--export", export_arg(env, script.copy_environment?)]
|
398
439
|
|
399
440
|
# Set native options
|
400
|
-
args
|
441
|
+
args.concat script.native if script.native
|
401
442
|
|
402
443
|
# Set content
|
403
444
|
content = if script.shell_path.nil?
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "ood_core/refinements/hash_extensions"
|
2
2
|
require "ood_core/job/adapters/helper"
|
3
|
+
require 'shellwords'
|
3
4
|
|
4
5
|
module OodCore
|
5
6
|
module Job
|
@@ -9,16 +10,18 @@ module OodCore
|
|
9
10
|
# Build the Torque adapter from a configuration
|
10
11
|
# @param config [#to_h] the configuration for job adapter
|
11
12
|
# @option config [#to_s] :host The batch server host
|
13
|
+
# @option config [#to_s] :submit_host The login node to submit the job via ssh
|
12
14
|
# @option config [#to_s] :lib ('') Path to torque client libraries
|
13
15
|
# @option config [#to_s] :bin ('') Path to torque client binaries
|
14
16
|
# @option config [#to_h] :custom_bin ({}) Optional overrides to Torque client executables
|
15
17
|
def self.build_torque(config)
|
16
18
|
c = config.to_h.symbolize_keys
|
17
19
|
host = c.fetch(:host) { raise ArgumentError, "No host specified. Missing argument: host" }.to_s
|
20
|
+
submit_host = c.fetch(:submit_host, "").to_s
|
18
21
|
lib = c.fetch(:lib, "").to_s
|
19
22
|
bin = c.fetch(:bin, "").to_s
|
20
23
|
custom_bin = c.fetch(:custom_bin, {})
|
21
|
-
pbs = Adapters::Torque::Batch.new(host: host, lib: lib, bin: bin, custom_bin: custom_bin)
|
24
|
+
pbs = Adapters::Torque::Batch.new(host: host, submit_host: submit_host, lib: lib, bin: bin, custom_bin: custom_bin)
|
22
25
|
Adapters::Torque.new(pbs: pbs)
|
23
26
|
end
|
24
27
|
end
|
@@ -85,7 +88,7 @@ module OodCore
|
|
85
88
|
depend << "afterany:#{afterany.join(':')}" unless afterany.empty?
|
86
89
|
|
87
90
|
# Set mailing options
|
88
|
-
mail_points
|
91
|
+
mail_points = ""
|
89
92
|
mail_points += "b" if script.email_on_started
|
90
93
|
mail_points += "e" if script.email_on_terminated
|
91
94
|
|
@@ -129,40 +132,44 @@ module OodCore
|
|
129
132
|
envvars.merge! script.native.fetch(:envvars, {})
|
130
133
|
end
|
131
134
|
|
135
|
+
# Destructively change envvars to shellescape values
|
136
|
+
envvars.transform_values! { |v| Shellwords.escape(v) }
|
137
|
+
|
132
138
|
# Submit job
|
133
139
|
@pbs.submit_string(script.content, queue: script.queue_name, headers: headers, resources: resources, envvars: envvars)
|
134
140
|
else
|
135
141
|
# Set qsub arguments
|
136
142
|
args = []
|
137
|
-
args
|
138
|
-
args
|
139
|
-
args
|
140
|
-
args
|
141
|
-
args
|
142
|
-
args
|
143
|
-
args
|
143
|
+
args.concat ["-F", script.args.join(" ")] unless script.args.nil?
|
144
|
+
args.concat ["-h"] if script.submit_as_hold
|
145
|
+
args.concat ["-r", script.rerunnable ? "y" : "n"] unless script.rerunnable.nil?
|
146
|
+
args.concat ["-M", script.email.join(",")] unless script.email.nil?
|
147
|
+
args.concat ["-m", mail_points] unless mail_points.empty?
|
148
|
+
args.concat ["-N", script.job_name] unless script.job_name.nil?
|
149
|
+
args.concat ["-S", script.shell_path] unless script.shell_path.nil?
|
144
150
|
# ignore input_path (not defined in Torque)
|
145
|
-
args
|
146
|
-
args
|
147
|
-
args
|
148
|
-
args
|
149
|
-
args
|
150
|
-
args
|
151
|
-
args
|
152
|
-
args
|
153
|
-
args
|
154
|
-
args
|
151
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
152
|
+
args.concat ["-e", script.error_path] unless script.error_path.nil?
|
153
|
+
args.concat ["-W", "x=advres:#{script.reservation_id}"] unless script.reservation_id.nil?
|
154
|
+
args.concat ["-q", script.queue_name] unless script.queue_name.nil?
|
155
|
+
args.concat ["-p", script.priority] unless script.priority.nil?
|
156
|
+
args.concat ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil?
|
157
|
+
args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
|
158
|
+
args.concat ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
|
159
|
+
args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
|
160
|
+
args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
|
161
|
+
args.concat ['-l', "qos=#{script.qos}"] unless script.qos.nil?
|
155
162
|
# Set environment variables
|
156
163
|
env = script.job_environment.to_h
|
157
|
-
args
|
158
|
-
args
|
164
|
+
args.concat ["-v", env.keys.join(",")] unless env.empty?
|
165
|
+
args.concat ["-V"] if script.copy_environment?
|
159
166
|
|
160
167
|
# If error_path is not specified we join stdout & stderr (as this
|
161
168
|
# mimics what the other resource managers do)
|
162
|
-
args
|
169
|
+
args.concat ["-j", "oe"] if script.error_path.nil?
|
163
170
|
|
164
171
|
# Set native options
|
165
|
-
args
|
172
|
+
args.concat script.native if script.native
|
166
173
|
|
167
174
|
# Submit job
|
168
175
|
@pbs.submit(script.content, args: args, env: env, chdir: script.workdir)
|