ood_core 0.11.4 → 0.15.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -15,7 +15,7 @@ end
15
15
  class OodCore::Job::Adapters::Sge::Batch
16
16
  using OodCore::Refinements::HashExtensions
17
17
 
18
- attr_reader :bin, :bin_overrides, :conf, :cluster, :helper
18
+ attr_reader :bin, :bin_overrides, :conf, :cluster, :helper, :submit_host, :strict_host_checking
19
19
 
20
20
  require "ood_core/job/adapters/sge/qstat_xml_j_r_listener"
21
21
  require "ood_core/job/adapters/sge/qstat_xml_r_listener"
@@ -36,6 +36,8 @@ class OodCore::Job::Adapters::Sge::Batch
36
36
  @bin = Pathname.new(config.fetch(:bin, nil).to_s)
37
37
  @sge_root = Pathname.new(config[:sge_root] || ENV['SGE_ROOT'] || "/var/lib/gridengine")
38
38
  @bin_overrides = config.fetch(:bin_overrides, {})
39
+ @submit_host = config.fetch(:submit_host, "")
40
+ @strict_host_checking = config.fetch(:strict_host_checking, true)
39
41
 
40
42
  # FIXME: hack as this affects env of the process!
41
43
  ENV['SGE_ROOT'] = @sge_root.to_s
@@ -62,7 +64,7 @@ class OodCore::Job::Adapters::Sge::Batch
62
64
  def get_all(owner: nil)
63
65
  listener = QstatXmlRListener.new
64
66
  argv = ['qstat', '-r', '-xml']
65
- argv += ['-u', owner] unless owner.nil?
67
+ argv.concat ['-u', owner] unless owner.nil?
66
68
  REXML::Parsers::StreamParser.new(call(*argv), listener).parse
67
69
 
68
70
  listener.parsed_jobs.map{
@@ -166,11 +168,10 @@ class OodCore::Job::Adapters::Sge::Batch
166
168
  # Call a forked SGE command for a given batch server
167
169
  def call(cmd, *args, env: {}, stdin: "", chdir: nil)
168
170
  cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
169
- args = args.map(&:to_s)
170
-
171
171
  env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
172
+ cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking, env)
172
173
  chdir ||= "."
173
- o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
174
+ o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s, chdir: chdir.to_s)
174
175
  s.success? ? o : raise(Error, e)
175
176
  end
176
177
 
@@ -17,38 +17,38 @@ class OodCore::Job::Adapters::Sge::Helper
17
17
  raise_error_on_unsupported_args(script, after: after, afterok: afterok, afternotok: afternotok, afterany: afterany)
18
18
 
19
19
  args = []
20
- args += ['-h'] if script.submit_as_hold
21
- args += ['-r', 'yes'] if script.rerunnable
22
- script.job_environment.each_pair {|k, v| args += ['-v', "#{k.to_s}=#{v.to_s}"]} unless script.job_environment.nil?
23
- args += ["-V"] if script.copy_environment?
20
+ args.concat ['-h'] if script.submit_as_hold
21
+ args.concat ['-r', 'yes'] if script.rerunnable
22
+ script.job_environment.each_pair {|k, v| args.concat ['-v', "#{k.to_s}=#{v.to_s}"]} unless script.job_environment.nil?
23
+ args.concat ["-V"] if script.copy_environment?
24
24
 
25
25
  if script.workdir
26
- args += ['-wd', script.workdir]
26
+ args.concat ['-wd', script.workdir]
27
27
  elsif ! script_contains_wd_directive?(script.content)
28
- args += ['-cwd']
28
+ args.concat ['-cwd']
29
29
  end
30
30
 
31
31
  on_event_email = []
32
32
  on_event_email << 'b' if script.email_on_started # beginning
33
33
  on_event_email << 'ea' if script.email_on_terminated # end, aborted
34
34
 
35
- args += ['-M', script.email.first, '-m', on_event_email.join] if script.email && ! on_event_email.empty?
35
+ args.concat ['-M', script.email.first, '-m', on_event_email.join] if script.email && ! on_event_email.empty?
36
36
 
37
37
  afterok = Array(afterok).map(&:to_s)
38
- args += ['-hold_jid_ad', afterok.join(',')] unless afterok.empty?
38
+ args.concat ['-hold_jid_ad', afterok.join(',')] unless afterok.empty?
39
39
 
40
40
  # ignoring email_on_started
41
- args += ['-N', script.job_name] unless script.job_name.nil?
42
- args += ['-e', script.error_path] unless script.error_path.nil?
43
- args += ['-o', script.output_path] unless script.output_path.nil?
44
- args += ['-ar', script.reservation_id] unless script.reservation_id.nil?
45
- args += ['-q', script.queue_name] unless script.queue_name.nil?
46
- args += ['-p', script.priority] unless script.priority.nil?
47
- args += ['-a', script.start_time.strftime('%C%y%m%d%H%M.%S')] unless script.start_time.nil?
48
- args += ['-l', "h_rt=" + seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
49
- args += ['-P', script.accounting_id] unless script.accounting_id.nil?
50
- args += ['-t', script.job_array_request] unless script.job_array_request.nil?
51
- args += Array.wrap(script.native) if script.native
41
+ args.concat ['-N', script.job_name] unless script.job_name.nil?
42
+ args.concat ['-e', script.error_path] unless script.error_path.nil?
43
+ args.concat ['-o', script.output_path] unless script.output_path.nil?
44
+ args.concat ['-ar', script.reservation_id] unless script.reservation_id.nil?
45
+ args.concat ['-q', script.queue_name] unless script.queue_name.nil?
46
+ args.concat ['-p', script.priority] unless script.priority.nil?
47
+ args.concat ['-a', script.start_time.strftime('%C%y%m%d%H%M.%S')] unless script.start_time.nil?
48
+ args.concat ['-l', "h_rt=" + seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
49
+ args.concat ['-P', script.accounting_id] unless script.accounting_id.nil?
50
+ args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
51
+ args.concat Array.wrap(script.native) if script.native
52
52
 
53
53
  args
54
54
  end
@@ -28,10 +28,13 @@ class QstatXmlJRListener
28
28
  :tasks => [],
29
29
  :status => :queued,
30
30
  :procs => 1,
31
- :native => {} # TODO: improve native attribute reporting
31
+ :native => {
32
+ :ST_name => ''
33
+ }
32
34
  }
33
35
  @current_text = nil
34
36
  @current_request = nil
37
+ @processing_JB_stdout_path_list = false
35
38
 
36
39
  @processing_job_array_spec = false
37
40
  @adding_slots = false
@@ -42,6 +45,7 @@ class QstatXmlJRListener
42
45
  step: 1, # Step can have a default of 1
43
46
  }
44
47
  @running_tasks = []
48
+ @native_tags = ['JB_job_number', 'JB_job_name', 'JB_version', 'JB_project', 'JB_exec_file', 'JB_script_file', 'JB_script_size', 'JB_submission_time', 'JB_execution_time', 'JB_deadline', 'JB_owner', 'JB_uid', 'JB_group', 'JB_gid', 'JB_account', 'JB_cwd', 'JB_notify', 'JB_type', 'JB_reserve', 'JB_priority', 'JB_jobshare', 'JB_verify', 'JB_checkpoint_attr', 'JB_checkpoint_interval', 'JB_restart']
45
49
  end
46
50
 
47
51
  def tag_start(name, attrs)
@@ -50,10 +54,17 @@ class QstatXmlJRListener
50
54
  toggle_processing_array_spec
51
55
  when 'JB_pe_range'
52
56
  toggle_adding_slots
57
+ when 'JB_stdout_path_list'
58
+ @processing_JB_stdout_path_list = true
53
59
  end
54
60
  end
55
61
 
56
62
  def tag_end(name)
63
+ #Add to native hash if in native_tags
64
+ if (@native_tags.include?(name))
65
+ @parsed_job[:native][:"#{name}"] = @current_text
66
+ end
67
+
57
68
  case name
58
69
  when 'JB_ja_tasks'
59
70
  end_JB_ja_tasks
@@ -92,6 +103,10 @@ class QstatXmlJRListener
92
103
  toggle_processing_array_spec
93
104
  when 'JB_pe_range'
94
105
  toggle_adding_slots
106
+ when 'PN_path'
107
+ end_PN_path
108
+ when 'ST_name'
109
+ end_ST_name
95
110
  end
96
111
  end
97
112
 
@@ -118,7 +133,7 @@ class QstatXmlJRListener
118
133
  end
119
134
 
120
135
  def end_JB_submission_time
121
- @parsed_job[:submission_time] = @current_text.to_i
136
+ @parsed_job[:submission_time] = ms_to_seconds(@current_text.to_i)
122
137
  end
123
138
 
124
139
  def end_JB_ja_tasks
@@ -127,7 +142,7 @@ class QstatXmlJRListener
127
142
 
128
143
  def end_JAT_start_time
129
144
  @parsed_job[:status] = :running
130
- @parsed_job[:dispatch_time] = @current_text.to_i
145
+ @parsed_job[:dispatch_time] = ms_to_seconds(@current_text.to_i)
131
146
  @parsed_job[:wallclock_time] = Time.now.to_i - @parsed_job[:dispatch_time]
132
147
  end
133
148
 
@@ -153,6 +168,15 @@ class QstatXmlJRListener
153
168
  @running_tasks << @current_text
154
169
  end
155
170
 
171
+ def end_PN_path
172
+ @parsed_job[:native][:PN_path] = @current_text if @processing_JB_stdout_path_list
173
+ @processing_JB_stdout_path_list = false
174
+ end
175
+
176
+ def end_ST_name
177
+ @parsed_job[:native][:ST_name] = @parsed_job[:native][:ST_name] + @current_text + ' '
178
+ end
179
+
156
180
  def set_job_array_piece(key)
157
181
  @job_array_spec[key] = @current_text if @processing_job_array_spec
158
182
  end
@@ -201,5 +225,12 @@ class QstatXmlJRListener
201
225
  def set_slots
202
226
  @parsed_job[:procs] = @current_text.to_i
203
227
  end
204
- end
205
228
 
229
+ private
230
+
231
+ # Some Grid Engines (like UGE) use milliseconds were others use
232
+ # seconds past the epoch.
233
+ def ms_to_seconds(raw)
234
+ raw.digits.length >= 13 ? raw / 1000 : raw
235
+ end
236
+ end
@@ -24,21 +24,32 @@ class QstatXmlRListener
24
24
  @parsed_jobs = []
25
25
  @current_job = {
26
26
  :tasks => [],
27
- :native => {} # TODO: improve native reporting
27
+ :native => {
28
+ :ST_name => ''
29
+ }
28
30
  }
29
31
  @current_text = nil
32
+ @processing_JB_stdout_path_list = false
30
33
 
31
34
  @current_request = nil
35
+ @native_tags = ['JB_job_number', 'JB_job_name', 'JB_version', 'JB_project', 'JB_exec_file', 'JB_script_file', 'JB_script_size', 'JB_submission_time', 'JB_execution_time', 'JB_deadline', 'JB_owner', 'JB_uid', 'JB_group', 'JB_gid', 'JB_account', 'JB_cwd', 'JB_notify', 'JB_type', 'JB_reserve', 'JB_priority', 'JB_jobshare', 'JB_verify', 'JB_checkpoint_attr', 'JB_checkpoint_interval', 'JB_restart']
32
36
  end
33
37
 
34
38
  def tag_start(name, attributes)
35
39
  case name
36
40
  when 'hard_request'
37
41
  start_hard_request(attributes)
42
+ when "JB_stdout_path_list"
43
+ @processing_JB_stdout_path_list = true
38
44
  end
39
45
  end
40
46
 
41
47
  def tag_end(name)
48
+ #Add text if in native_tags
49
+ if (@native_tags.include?(name))
50
+ @current_job[:native][:"#{name}"] = @current_text
51
+ end
52
+
42
53
  case name
43
54
  when 'job_list'
44
55
  end_job_list
@@ -64,6 +75,10 @@ class QstatXmlRListener
64
75
  end_hard_request
65
76
  when 'tasks'
66
77
  add_child_tasks
78
+ when 'PN_path'
79
+ end_PN_path
80
+ when 'ST_name'
81
+ end_ST_name
67
82
  end
68
83
  end
69
84
 
@@ -130,6 +145,15 @@ class QstatXmlRListener
130
145
  end
131
146
  end
132
147
 
148
+ def end_PN_path
149
+ @current_job[:native][:PN_path] = @current_text if @processing_JB_stdout_path_list
150
+ @processing_JB_stdout_path_list = false
151
+ end
152
+
153
+ def end_ST_name
154
+ @current_job[:native][:ST_name] = @current_job[:native][:ST_name] + @current_text + ' '
155
+ end
156
+
133
157
  # Store a completed job and reset current_job for the next pass
134
158
  def end_job_list
135
159
  @parsed_jobs << @current_job
@@ -145,4 +169,3 @@ class QstatXmlRListener
145
169
  }
146
170
  end
147
171
  end
148
-
@@ -14,13 +14,17 @@ module OodCore
14
14
  # @option config [Object] :conf (nil) Path to the slurm conf
15
15
  # @option config [Object] :bin (nil) Path to slurm client binaries
16
16
  # @option config [#to_h] :bin_overrides ({}) Optional overrides to Slurm client executables
17
+ # @option config [Object] :submit_host ("") Submit job on login node via ssh
18
+ # @option config [Object] :strict_host_checking (true) Whether to use strict host checking when ssh to submit_host
17
19
  def self.build_slurm(config)
18
20
  c = config.to_h.symbolize_keys
19
- cluster = c.fetch(:cluster, nil)
20
- conf = c.fetch(:conf, nil)
21
- bin = c.fetch(:bin, nil)
22
- bin_overrides = c.fetch(:bin_overrides, {})
23
- slurm = Adapters::Slurm::Batch.new(cluster: cluster, conf: conf, bin: bin, bin_overrides: bin_overrides)
21
+ cluster = c.fetch(:cluster, nil)
22
+ conf = c.fetch(:conf, nil)
23
+ bin = c.fetch(:bin, nil)
24
+ bin_overrides = c.fetch(:bin_overrides, {})
25
+ submit_host = c.fetch(:submit_host, "")
26
+ strict_host_checking = c.fetch(:strict_host_checking, true)
27
+ slurm = Adapters::Slurm::Batch.new(cluster: cluster, conf: conf, bin: bin, bin_overrides: bin_overrides, submit_host: submit_host, strict_host_checking: strict_host_checking)
24
28
  Adapters::Slurm.new(slurm: slurm)
25
29
  end
26
30
  end
@@ -62,18 +66,36 @@ module OodCore
62
66
  # @return Hash<String, String>
63
67
  attr_reader :bin_overrides
64
68
 
69
+ # The login node where the job is submitted via ssh
70
+ # @example owens.osc.edu
71
+ # @return [String] The login node
72
+ attr_reader :submit_host
73
+
74
+ # Wheter to use strict host checking when ssh to submit_host
75
+ # @example false
76
+ # @return [Bool]; true if empty
77
+ attr_reader :strict_host_checking
78
+
65
79
  # The root exception class that all Slurm-specific exceptions inherit
66
80
  # from
67
81
  class Error < StandardError; end
68
82
 
83
+ # An error indicating the slurm command timed out
84
+ class SlurmTimeoutError < Error; end
85
+
69
86
  # @param cluster [#to_s, nil] the cluster name
70
87
  # @param conf [#to_s, nil] path to the slurm conf
71
88
  # @param bin [#to_s] path to slurm installation binaries
72
- def initialize(cluster: nil, bin: nil, conf: nil, bin_overrides: {})
73
- @cluster = cluster && cluster.to_s
74
- @conf = conf && Pathname.new(conf.to_s)
75
- @bin = Pathname.new(bin.to_s)
76
- @bin_overrides = bin_overrides
89
+ # @param bin_overrides [#to_h] a hash of bin ovverides to be used in job
90
+ # @param submit_host [#to_s] Submits the job on a login node via ssh
91
+ # @param strict_host_checking [Bool] Whether to use strict host checking when ssh to submit_host
92
+ def initialize(cluster: nil, bin: nil, conf: nil, bin_overrides: {}, submit_host: "", strict_host_checking: true)
93
+ @cluster = cluster && cluster.to_s
94
+ @conf = conf && Pathname.new(conf.to_s)
95
+ @bin = Pathname.new(bin.to_s)
96
+ @bin_overrides = bin_overrides
97
+ @submit_host = submit_host.to_s
98
+ @strict_host_checking = strict_host_checking
77
99
  end
78
100
 
79
101
  # Get a list of hashes detailing each of the jobs on the batch server
@@ -128,6 +150,9 @@ module OodCore
128
150
  end
129
151
  jobs
130
152
  end
153
+ rescue SlurmTimeoutError
154
+ # TODO: could use a log entry here
155
+ return [{ id: id, state: 'undetermined' }]
131
156
  end
132
157
 
133
158
  def squeue_fields(attrs)
@@ -148,9 +173,9 @@ module OodCore
148
173
  #TODO: write some barebones test for this? like 2 options and id or no id
149
174
  def squeue_args(id: "", owner: nil, options: [])
150
175
  args = ["--all", "--states=all", "--noconvert"]
151
- args += ["-o", "#{RECORD_SEPARATOR}#{options.join(UNIT_SEPARATOR)}"]
152
- args += ["-u", owner.to_s] unless owner.to_s.empty?
153
- args += ["-j", id.to_s] unless id.to_s.empty?
176
+ args.concat ["-o", "#{RECORD_SEPARATOR}#{options.join(UNIT_SEPARATOR)}"]
177
+ args.concat ["-u", owner.to_s] unless owner.to_s.empty?
178
+ args.concat ["-j", id.to_s] unless id.to_s.empty?
154
179
  args
155
180
  end
156
181
 
@@ -275,12 +300,27 @@ module OodCore
275
300
  # Call a forked Slurm command for a given cluster
276
301
  def call(cmd, *args, env: {}, stdin: "")
277
302
  cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
303
+
278
304
  args = args.map(&:to_s)
279
- args += ["-M", cluster] if cluster
305
+ args.concat ["-M", cluster] if cluster
306
+
280
307
  env = env.to_h
281
308
  env["SLURM_CONF"] = conf.to_s if conf
282
- o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s)
283
- s.success? ? o : raise(Error, e)
309
+
310
+ cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
311
+ o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
312
+ s.success? ? interpret_and_raise(o, e) : raise(Error, e)
313
+ end
314
+
315
+ # Helper function to raise an error based on the contents of stderr.
316
+ # Slurm exits 0 even when the command fails, so we need to interpret stderr
317
+ # to see if the command was actually successful.
318
+ def interpret_and_raise(stdout, stderr)
319
+ return stdout if stderr.empty?
320
+
321
+ raise SlurmTimeoutError, stderr if /^slurm_load_jobs error: Socket timed out/.match(stderr)
322
+
323
+ stdout
284
324
  end
285
325
 
286
326
  def squeue_attrs_for_info_attrs(attrs)
@@ -358,30 +398,31 @@ module OodCore
358
398
  # Set sbatch options
359
399
  args = []
360
400
  # ignore args, don't know how to do this for slurm
361
- args += ["-H"] if script.submit_as_hold
362
- args += (script.rerunnable ? ["--requeue"] : ["--no-requeue"]) unless script.rerunnable.nil?
363
- args += ["-D", script.workdir.to_s] unless script.workdir.nil?
364
- args += ["--mail-user", script.email.join(",")] unless script.email.nil?
401
+ args.concat ["-H"] if script.submit_as_hold
402
+ args.concat (script.rerunnable ? ["--requeue"] : ["--no-requeue"]) unless script.rerunnable.nil?
403
+ args.concat ["-D", script.workdir.to_s] unless script.workdir.nil?
404
+ args.concat ["--mail-user", script.email.join(",")] unless script.email.nil?
365
405
  if script.email_on_started && script.email_on_terminated
366
- args += ["--mail-type", "ALL"]
406
+ args.concat ["--mail-type", "ALL"]
367
407
  elsif script.email_on_started
368
- args += ["--mail-type", "BEGIN"]
408
+ args.concat ["--mail-type", "BEGIN"]
369
409
  elsif script.email_on_terminated
370
- args += ["--mail-type", "END"]
410
+ args.concat ["--mail-type", "END"]
371
411
  elsif script.email_on_started == false && script.email_on_terminated == false
372
- args += ["--mail-type", "NONE"]
412
+ args.concat ["--mail-type", "NONE"]
373
413
  end
374
- args += ["-J", script.job_name] unless script.job_name.nil?
375
- args += ["-i", script.input_path] unless script.input_path.nil?
376
- args += ["-o", script.output_path] unless script.output_path.nil?
377
- args += ["-e", script.error_path] unless script.error_path.nil?
378
- args += ["--reservation", script.reservation_id] unless script.reservation_id.nil?
379
- args += ["-p", script.queue_name] unless script.queue_name.nil?
380
- args += ["--priority", script.priority] unless script.priority.nil?
381
- args += ["--begin", script.start_time.localtime.strftime("%C%y-%m-%dT%H:%M:%S")] unless script.start_time.nil?
382
- args += ["-A", script.accounting_id] unless script.accounting_id.nil?
383
- args += ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
384
- args += ['-a', script.job_array_request] unless script.job_array_request.nil?
414
+ args.concat ["-J", script.job_name] unless script.job_name.nil?
415
+ args.concat ["-i", script.input_path] unless script.input_path.nil?
416
+ args.concat ["-o", script.output_path] unless script.output_path.nil?
417
+ args.concat ["-e", script.error_path] unless script.error_path.nil?
418
+ args.concat ["--reservation", script.reservation_id] unless script.reservation_id.nil?
419
+ args.concat ["-p", script.queue_name] unless script.queue_name.nil?
420
+ args.concat ["--priority", script.priority] unless script.priority.nil?
421
+ args.concat ["--begin", script.start_time.localtime.strftime("%C%y-%m-%dT%H:%M:%S")] unless script.start_time.nil?
422
+ args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
423
+ args.concat ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
424
+ args.concat ['-a', script.job_array_request] unless script.job_array_request.nil?
425
+ args.concat ['--qos', script.qos] unless script.qos.nil?
385
426
  # ignore nodes, don't know how to do this for slurm
386
427
 
387
428
  # Set dependencies
@@ -390,14 +431,14 @@ module OodCore
390
431
  depend << "afterok:#{afterok.join(":")}" unless afterok.empty?
391
432
  depend << "afternotok:#{afternotok.join(":")}" unless afternotok.empty?
392
433
  depend << "afterany:#{afterany.join(":")}" unless afterany.empty?
393
- args += ["-d", depend.join(",")] unless depend.empty?
434
+ args.concat ["-d", depend.join(",")] unless depend.empty?
394
435
 
395
436
  # Set environment variables
396
437
  env = script.job_environment || {}
397
- args += ["--export", export_arg(env, script.copy_environment?)]
438
+ args.concat ["--export", export_arg(env, script.copy_environment?)]
398
439
 
399
440
  # Set native options
400
- args += script.native if script.native
441
+ args.concat script.native if script.native
401
442
 
402
443
  # Set content
403
444
  content = if script.shell_path.nil?
@@ -1,5 +1,6 @@
1
1
  require "ood_core/refinements/hash_extensions"
2
2
  require "ood_core/job/adapters/helper"
3
+ require 'shellwords'
3
4
 
4
5
  module OodCore
5
6
  module Job
@@ -9,16 +10,18 @@ module OodCore
9
10
  # Build the Torque adapter from a configuration
10
11
  # @param config [#to_h] the configuration for job adapter
11
12
  # @option config [#to_s] :host The batch server host
13
+ # @option config [#to_s] :submit_host The login node to submit the job via ssh
12
14
  # @option config [#to_s] :lib ('') Path to torque client libraries
13
15
  # @option config [#to_s] :bin ('') Path to torque client binaries
14
16
  # @option config [#to_h] :custom_bin ({}) Optional overrides to Torque client executables
15
17
  def self.build_torque(config)
16
18
  c = config.to_h.symbolize_keys
17
19
  host = c.fetch(:host) { raise ArgumentError, "No host specified. Missing argument: host" }.to_s
20
+ submit_host = c.fetch(:submit_host, "").to_s
18
21
  lib = c.fetch(:lib, "").to_s
19
22
  bin = c.fetch(:bin, "").to_s
20
23
  custom_bin = c.fetch(:custom_bin, {})
21
- pbs = Adapters::Torque::Batch.new(host: host, lib: lib, bin: bin, custom_bin: custom_bin)
24
+ pbs = Adapters::Torque::Batch.new(host: host, submit_host: submit_host, lib: lib, bin: bin, custom_bin: custom_bin)
22
25
  Adapters::Torque.new(pbs: pbs)
23
26
  end
24
27
  end
@@ -85,7 +88,7 @@ module OodCore
85
88
  depend << "afterany:#{afterany.join(':')}" unless afterany.empty?
86
89
 
87
90
  # Set mailing options
88
- mail_points = ""
91
+ mail_points = ""
89
92
  mail_points += "b" if script.email_on_started
90
93
  mail_points += "e" if script.email_on_terminated
91
94
 
@@ -129,40 +132,44 @@ module OodCore
129
132
  envvars.merge! script.native.fetch(:envvars, {})
130
133
  end
131
134
 
135
+ # Destructively change envvars to shellescape values
136
+ envvars.transform_values! { |v| Shellwords.escape(v) }
137
+
132
138
  # Submit job
133
139
  @pbs.submit_string(script.content, queue: script.queue_name, headers: headers, resources: resources, envvars: envvars)
134
140
  else
135
141
  # Set qsub arguments
136
142
  args = []
137
- args += ["-F", script.args.join(" ")] unless script.args.nil?
138
- args += ["-h"] if script.submit_as_hold
139
- args += ["-r", script.rerunnable ? "y" : "n"] unless script.rerunnable.nil?
140
- args += ["-M", script.email.join(",")] unless script.email.nil?
141
- args += ["-m", mail_points] unless mail_points.empty?
142
- args += ["-N", script.job_name] unless script.job_name.nil?
143
- args += ["-S", script.shell_path] unless script.shell_path.nil?
143
+ args.concat ["-F", script.args.join(" ")] unless script.args.nil?
144
+ args.concat ["-h"] if script.submit_as_hold
145
+ args.concat ["-r", script.rerunnable ? "y" : "n"] unless script.rerunnable.nil?
146
+ args.concat ["-M", script.email.join(",")] unless script.email.nil?
147
+ args.concat ["-m", mail_points] unless mail_points.empty?
148
+ args.concat ["-N", script.job_name] unless script.job_name.nil?
149
+ args.concat ["-S", script.shell_path] unless script.shell_path.nil?
144
150
  # ignore input_path (not defined in Torque)
145
- args += ["-o", script.output_path] unless script.output_path.nil?
146
- args += ["-e", script.error_path] unless script.error_path.nil?
147
- args += ["-W", "x=advres:#{script.reservation_id}"] unless script.reservation_id.nil?
148
- args += ["-q", script.queue_name] unless script.queue_name.nil?
149
- args += ["-p", script.priority] unless script.priority.nil?
150
- args += ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil?
151
- args += ["-A", script.accounting_id] unless script.accounting_id.nil?
152
- args += ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
153
- args += ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
154
- args += ['-t', script.job_array_request] unless script.job_array_request.nil?
151
+ args.concat ["-o", script.output_path] unless script.output_path.nil?
152
+ args.concat ["-e", script.error_path] unless script.error_path.nil?
153
+ args.concat ["-W", "x=advres:#{script.reservation_id}"] unless script.reservation_id.nil?
154
+ args.concat ["-q", script.queue_name] unless script.queue_name.nil?
155
+ args.concat ["-p", script.priority] unless script.priority.nil?
156
+ args.concat ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil?
157
+ args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
158
+ args.concat ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
159
+ args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
160
+ args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
161
+ args.concat ['-l', "qos=#{script.qos}"] unless script.qos.nil?
155
162
  # Set environment variables
156
163
  env = script.job_environment.to_h
157
- args += ["-v", env.keys.join(",")] unless env.empty?
158
- args += ["-V"] if script.copy_environment?
164
+ args.concat ["-v", env.keys.join(",")] unless env.empty?
165
+ args.concat ["-V"] if script.copy_environment?
159
166
 
160
167
  # If error_path is not specified we join stdout & stderr (as this
161
168
  # mimics what the other resource managers do)
162
- args += ["-j", "oe"] if script.error_path.nil?
169
+ args.concat ["-j", "oe"] if script.error_path.nil?
163
170
 
164
171
  # Set native options
165
- args += script.native if script.native
172
+ args.concat script.native if script.native
166
173
 
167
174
  # Submit job
168
175
  @pbs.submit(script.content, args: args, env: env, chdir: script.workdir)