ood_core 0.11.4 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4f452bc936a1246bb7ac53d3aeafe36c7f54ad23a2d754d916b5701c40343288
4
- data.tar.gz: f30a99239692b568b30453a0c519cbf8adb977589c4dfa35132bd4a0f6019c17
3
+ metadata.gz: bb944d43beb0aced99e13efb2ef10bf33f9666c705c50ca5ae1727751de43073
4
+ data.tar.gz: 6e3cd66160be3bbd63124d6f2ddc794bce4ae64e385977828faba5dfd28ff838
5
5
  SHA512:
6
- metadata.gz: eb1d20267c147d723bfdafb1d169d6f7efc8323f66a749959672f5df23dc74d8503453f5a9214bf7bf35e5db74156ac3e48cdf18b19fa256d8a53fd331df5491
7
- data.tar.gz: 1672bfd5d571492d9c5d6e97e1b2f3eeeeb26bd5580bca6e12b5e6282f05be9300b5b58aecb7ec20ecf8b9513983e1df5d78968bc6a23bacfc38262ffb1e1110
6
+ metadata.gz: 176e331a856c1e6958c444426d5c1b41aa881e90a69dca507b07f5463eb81355689e8391e0bf27823fc42a9484789f623ffd566b9d6c414c9cf741a7cafd1def
7
+ data.tar.gz: 15481101ad3120d3e8457612f2b8a8be4f1e268b38538b18b710f27887836f7a47eac3bf2e89d8f73745ae96ae78e21cd5ba5afefb4161cf95f435d6f2fdf001
@@ -6,6 +6,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
6
6
  and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
7
7
 
8
8
  ## [Unreleased]
9
+ ## [0.12.0] - 2020-08-05
10
+ ### Added
11
+ - qos option to Slurm and Torque [#205](https://github.com/OSC/ood_core/pull/205)
12
+ - native hash returned in qstat for SGE adapter [#198](https://github.com/OSC/ood_core/pull/198)
13
+ - option for specifying `submit_host` to submit jobs via ssh on other host [#204](https://github.com/OSC/ood_core/pull/204)
14
+
15
+ ### Fixed
16
+ - SGE handle milliseconds instead of seconds when milliseconds used [#206](https://github.com/OSC/ood_core/issues/206)
17
+ - Torque's native "hash" for job submission now handles env vars values with spaces [#202](https://github.com/OSC/ood_core/pull/202)
18
+
9
19
  ## [0.11.4] - 2020-05-27
10
20
  ### Fixed
11
21
  - Environment exports in SLURM while implementing [#158](https://github.com/OSC/ood_core/issues/158)
@@ -233,7 +243,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
233
243
  ### Added
234
244
  - Initial release!
235
245
 
236
- [Unreleased]: https://github.com/OSC/ood_core/compare/v0.11.4...HEAD
246
+ [Unreleased]: https://github.com/OSC/ood_core/compare/v0.12.0...HEAD
247
+ [0.12.0]: https://github.com/OSC/ood_core/compare/v0.11.4...v0.12.0
237
248
  [0.11.4]: https://github.com/OSC/ood_core/compare/v0.11.3...v0.11.4
238
249
  [0.11.3]: https://github.com/OSC/ood_core/compare/v0.11.2...v0.11.3
239
250
  [0.11.2]: https://github.com/OSC/ood_core/compare/v0.11.1...v0.11.2
data/README.md CHANGED
@@ -4,12 +4,13 @@
4
4
  ![GitHub Release](https://img.shields.io/github/release/osc/ood_core.svg)
5
5
  ![GitHub License](https://img.shields.io/github/license/osc/ood_core.svg)
6
6
 
7
- Welcome to your new gem! In this directory, you'll find the files you need to
8
- be able to package up your Ruby library into a gem. Put your Ruby code in the
9
- file `lib/ood_core`. To experiment with that code, run `bin/console` for an
10
- interactive prompt.
7
+ - Website: http://openondemand.org/
8
+ - Website repo with JOSS publication: https://github.com/OSC/Open-OnDemand
9
+ - Documentation: https://osc.github.io/ood-documentation/master/
10
+ - Main code repo: https://github.com/OSC/ondemand
11
+ - Core library repo: https://github.com/OSC/ood_core
11
12
 
12
- TODO: Delete this and the text above, and describe your gem
13
+ OnDemand core library with adapters for each batch scheduler.
13
14
 
14
15
  ## Installation
15
16
 
@@ -12,7 +12,26 @@ module OodCore
12
12
  def self.bin_path(cmd, bin_default, bin_overrides)
13
13
  bin_overrides.fetch(cmd.to_s) { Pathname.new(bin_default.to_s).join(cmd.to_s).to_s }
14
14
  end
15
+
16
+ # Gets a command that submits command on another host via ssh
17
+ # @param submit_host [String] where to submit the command
18
+ # @param cmd [String] the desired command to execute on another host
19
+ # @param cmd_args [Array] arguments to the command specified above
20
+ # @param strict_host_checking [Bool] whether to use strict_host_checking
21
+ # @param env [Hash] env variables to be set w/ssh
22
+ #
23
+ # @return cmd [String] command wrapped in ssh if submit_host is present
24
+ # @return args [Array] command arguments including ssh_flags and original command
25
+ def self.ssh_wrap(submit_host, cmd, cmd_args, strict_host_checking = true, env = {})
26
+ return cmd, cmd_args if submit_host.to_s.empty?
27
+
28
+ check_host = strict_host_checking ? "yes" : "no"
29
+ args = ['-o', 'BatchMode=yes', '-o', 'UserKnownHostsFile=/dev/null', '-o', "StrictHostKeyChecking=#{check_host}", "#{submit_host}"]
30
+ env.each{|key, value| args.push("export #{key}=#{value};")}
31
+
32
+ return 'ssh', args + [cmd] + cmd_args
33
+ end
15
34
  end
16
35
  end
17
36
  end
18
- end
37
+ end
@@ -174,6 +174,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
174
174
  'session_name' => session_name,
175
175
  'singularity_bin' => singularity_bin,
176
176
  'singularity_image' => singularity_image(script.native),
177
+ 'ssh_hosts' => ssh_hosts,
177
178
  'tmux_bin' => tmux_bin,
178
179
  }.each{
179
180
  |key, value| bnd.local_variable_set(key, value)
@@ -1,5 +1,19 @@
1
1
  #!/bin/bash
2
- hostname
2
+ SSH_HOSTS=(<%= ssh_hosts.join(' ').to_s %>)
3
+ hostnames=`hostname -A`
4
+ for host in ${SSH_HOSTS[@]}
5
+ do
6
+ if [[ " ${hostnames[@]} " =~ " ${host} " ]]; then
7
+ hostname=$host
8
+ fi
9
+ done
10
+
11
+ if [ -z "$hostname" ]; then
12
+ printf >&2 "ERROR: Can't start job on [${hostnames[@]}] because it does not match any hostname configured \nin ssh_hosts [${SSH_HOSTS[@]}]. The output of 'hostname -A' must match an entry in ssh_hosts \nfrom the cluster configuration."
13
+ exit 1
14
+ fi
15
+
16
+ echo $hostname
3
17
 
4
18
  # Put the script into a temp file on localhost
5
19
  <% if debug %>
@@ -14,6 +14,7 @@ module OodCore
14
14
  # @option config [#to_s] :serverdir ('') Path to lsf client etc dir
15
15
  # @option config [#to_s] :cluster ('') name of cluster, if in multi-cluster mode
16
16
  # @option config [#to_h] :bin_overrides ({}) Optional overrides to LSF client executables
17
+ # @option config [#to_s] :submit_host ('') Host to submit commands to
17
18
  def self.build_lsf(config)
18
19
  batch = Adapters::Lsf::Batch.new(config.to_h.symbolize_keys)
19
20
  Adapters::Lsf.new(batch: batch)
@@ -2,21 +2,22 @@
2
2
  #
3
3
  # @api private
4
4
  class OodCore::Job::Adapters::Lsf::Batch
5
- attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster, :bin_overrides
5
+ attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster, :bin_overrides, :submit_host, :strict_host_checking
6
6
 
7
7
  # The root exception class that all LSF-specific exceptions inherit
8
8
  # from
9
9
  class Error < StandardError; end
10
10
 
11
11
  # @param bin [#to_s] path to LSF installation binaries
12
- def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", bin_overrides: {}, **_)
12
+ def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", bin_overrides: {}, submit_host: "", strict_host_checking: true, **_)
13
13
  @bindir = Pathname.new(bindir.to_s)
14
-
15
14
  @envdir = Pathname.new(envdir.to_s)
16
15
  @libdir = Pathname.new(libdir.to_s)
17
16
  @serverdir = Pathname.new(serverdir.to_s)
18
17
  @cluster = cluster.to_s
19
18
  @bin_overrides = bin_overrides
19
+ @submit_host = submit_host.to_s
20
+ @strict_host_checking = strict_host_checking
20
21
  end
21
22
 
22
23
  def default_env
@@ -143,6 +144,7 @@ class OodCore::Job::Adapters::Lsf::Batch
143
144
  cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
144
145
  args = cluster_args + args
145
146
  env = default_env.merge(env.to_h)
147
+ cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking, env)
146
148
  o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
147
149
  s.success? ? o : raise(Error, e)
148
150
  end
@@ -78,40 +78,40 @@ class OodCore::Job::Adapters::Lsf::Helper
78
78
  def batch_submit_args(script, after: [], afterok: [], afternotok: [], afterany: [])
79
79
  args = []
80
80
 
81
- args += ["-P", script.accounting_id] unless script.accounting_id.nil?
82
- args += ["-cwd", script.workdir.to_s] unless script.workdir.nil?
83
- args += ["-J", script.job_name] unless script.job_name.nil?
84
- args[-1] += "[#{script.job_array_request}]" unless script.job_array_request.nil?
85
-
86
- args += ["-q", script.queue_name] unless script.queue_name.nil?
87
- args += ["-U", script.reservation_id] unless script.reservation_id.nil?
88
- args += ["-sp", script.priority] unless script.priority.nil?
89
- args += ["-H"] if script.submit_as_hold
90
- args += (script.rerunnable ? ["-r"] : ["-rn"]) unless script.rerunnable.nil?
91
- args += ["-b", script.start_time.localtime.strftime("%Y:%m:%d:%H:%M")] unless script.start_time.nil?
92
- args += ["-W", (script.wall_time / 60).to_i] unless script.wall_time.nil?
93
- args += ["-L", script.shell_path.to_s] unless script.shell_path.nil?
81
+ args.concat ["-P", script.accounting_id] unless script.accounting_id.nil?
82
+ args.concat ["-cwd", script.workdir.to_s] unless script.workdir.nil?
83
+ args.concat ["-J", script.job_name] unless script.job_name.nil?
84
+ args[-1].concat "[#{script.job_array_request}]" unless script.job_array_request.nil?
85
+
86
+ args.concat ["-q", script.queue_name] unless script.queue_name.nil?
87
+ args.concat ["-U", script.reservation_id] unless script.reservation_id.nil?
88
+ args.concat ["-sp", script.priority] unless script.priority.nil?
89
+ args.concat ["-H"] if script.submit_as_hold
90
+ args.concat (script.rerunnable ? ["-r"] : ["-rn"]) unless script.rerunnable.nil?
91
+ args.concat ["-b", script.start_time.localtime.strftime("%Y:%m:%d:%H:%M")] unless script.start_time.nil?
92
+ args.concat ["-W", (script.wall_time / 60).to_i] unless script.wall_time.nil?
93
+ args.concat ["-L", script.shell_path.to_s] unless script.shell_path.nil?
94
94
 
95
95
  # environment
96
96
  env = script.job_environment || {}
97
97
  # To preserve pre-existing behavior we only act when true or false, when nil we do nothing
98
98
  if script.copy_environment?
99
- args += ["-env", (["all"] + env.keys).join(",")]
99
+ args.concat ["-env", (["all"] + env.keys).join(",")]
100
100
  elsif script.copy_environment? == false
101
- args += ["-env", (["none"] + env.keys).join(",")]
101
+ args.concat ["-env", (["none"] + env.keys).join(",")]
102
102
  end
103
103
 
104
104
  # input and output files
105
- args += ["-i", script.input_path] unless script.input_path.nil?
106
- args += ["-o", script.output_path] unless script.output_path.nil?
107
- args += ["-e", script.error_path] unless script.error_path.nil?
105
+ args.concat ["-i", script.input_path] unless script.input_path.nil?
106
+ args.concat ["-o", script.output_path] unless script.output_path.nil?
107
+ args.concat ["-e", script.error_path] unless script.error_path.nil?
108
108
 
109
109
  # email
110
- args += ["-B"] if script.email_on_started
111
- args += ["-N"] if script.email_on_terminated
112
- args += ["-u", script.email.join(",")] unless script.email.nil? || script.email.empty?
110
+ args.concat ["-B"] if script.email_on_started
111
+ args.concat ["-N"] if script.email_on_terminated
112
+ args.concat ["-u", script.email.join(",")] unless script.email.nil? || script.email.empty?
113
113
 
114
- args += script.native unless script.native.nil?
114
+ args.concat script.native unless script.native.nil?
115
115
 
116
116
  {args: args, env: env}
117
117
  end
@@ -10,17 +10,21 @@ module OodCore
10
10
  # Build the PBS Pro adapter from a configuration
11
11
  # @param config [#to_h] the configuration for job adapter
12
12
  # @option config [Object] :host (nil) The batch server host
13
+ # @option config [Object] :submit_host ("") The login node where the job is submitted
14
+ # @option config [Object] :strict_host_checking (true) Whether to use strict host checking when ssh to submit_host
13
15
  # @option config [Object] :exec (nil) Path to PBS Pro executables
14
16
  # @option config [Object] :qstat_factor (nil) Deciding factor on how to
15
17
  # call qstat for a user
16
18
  # @option config [#to_h] :bin_overrides ({}) Optional overrides to PBS Pro client executables
17
19
  def self.build_pbspro(config)
18
20
  c = config.to_h.compact.symbolize_keys
19
- host = c.fetch(:host, nil)
20
- pbs_exec = c.fetch(:exec, nil)
21
- qstat_factor = c.fetch(:qstat_factor, nil)
22
- bin_overrides = c.fetch(:bin_overrides, {})
23
- pbspro = Adapters::PBSPro::Batch.new(host: host, pbs_exec: pbs_exec, bin_overrides: bin_overrides)
21
+ host = c.fetch(:host, nil)
22
+ submit_host = c.fetch(:submit_host, "")
23
+ strict_host_checking = c.fetch(:strict_host_checking, true)
24
+ pbs_exec = c.fetch(:exec, nil)
25
+ qstat_factor = c.fetch(:qstat_factor, nil)
26
+ bin_overrides = c.fetch(:bin_overrides, {})
27
+ pbspro = Adapters::PBSPro::Batch.new(host: host, submit_host: submit_host, strict_host_checking: strict_host_checking, pbs_exec: pbs_exec, bin_overrides: bin_overrides)
24
28
  Adapters::PBSPro.new(pbspro: pbspro, qstat_factor: qstat_factor)
25
29
  end
26
30
  end
@@ -41,6 +45,18 @@ module OodCore
41
45
  # @return [String, nil] the batch server host
42
46
  attr_reader :host
43
47
 
48
+ # The login node to submit the job via ssh
49
+ # @example
50
+ # my_batch.submit_host #=> "my_batch.server.edu"
51
+ # @return [String, nil] the login node
52
+ attr_reader :submit_host
53
+
54
+ # Whether to use strict host checking when ssh to submit_host
55
+ # @example
56
+ # my_batch.strict_host_checking #=> "false"
57
+ # @return [Bool, true] the login node; true if not present
58
+ attr_reader :strict_host_checking
59
+
44
60
  # The path containing the PBS executables
45
61
  # @example
46
62
  # my_batch.pbs_exec.to_s #=> "/usr/local/pbspro/10.0.0
@@ -58,11 +74,15 @@ module OodCore
58
74
  class Error < StandardError; end
59
75
 
60
76
  # @param host [#to_s, nil] the batch server host
77
+ # @param submit_host [#to_s, nil] the login node to ssh to
78
+ # @param strict_host_checking [bool, true] wheter to use strict host checking when ssh to submit_host
61
79
  # @param exec [#to_s, nil] path to pbs executables
62
- def initialize(host: nil, pbs_exec: nil, bin_overrides: {})
63
- @host = host && host.to_s
64
- @pbs_exec = pbs_exec && Pathname.new(pbs_exec.to_s)
65
- @bin_overrides = bin_overrides
80
+ def initialize(host: nil, submit_host: "", strict_host_checking: true, pbs_exec: nil, bin_overrides: {})
81
+ @host = host && host.to_s
82
+ @submit_host = submit_host && submit_host.to_s
83
+ @strict_host_checking = strict_host_checking
84
+ @pbs_exec = pbs_exec && Pathname.new(pbs_exec.to_s)
85
+ @bin_overrides = bin_overrides
66
86
  end
67
87
 
68
88
  # Get a list of hashes detailing each of the jobs on the batch server
@@ -87,7 +107,7 @@ module OodCore
87
107
  # @return [Array<Hash>] list of details for jobs
88
108
  def get_jobs(id: "")
89
109
  args = ["-f", "-t"] # display all information
90
- args += [id.to_s] unless id.to_s.empty?
110
+ args.concat [id.to_s] unless id.to_s.empty?
91
111
  lines = call("qstat", *args).gsub("\n\t", "").split("\n").map(&:strip)
92
112
 
93
113
  jobs = []
@@ -159,12 +179,12 @@ module OodCore
159
179
  cmd = cmd.to_s
160
180
  bindir = (!!pbs_exec) ? pbs_exec.join("bin").to_s : ''
161
181
  cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
162
- args = args.map(&:to_s)
163
182
  env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
164
183
  env["PBS_DEFAULT"] = host.to_s if host
165
184
  env["PBS_EXEC"] = pbs_exec.to_s if pbs_exec
185
+ cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
166
186
  chdir ||= "."
167
- o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
187
+ o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s, chdir: chdir.to_s)
168
188
  s.success? ? o : raise(Error, e)
169
189
  end
170
190
  end
@@ -227,28 +247,28 @@ module OodCore
227
247
  # Set qsub options
228
248
  args = []
229
249
  # ignore args, can't use these if submitting from STDIN
230
- args += ["-h"] if script.submit_as_hold
231
- args += ["-r", script.rerunnable ? "y" : "n"] unless script.rerunnable.nil?
232
- args += ["-M", script.email.join(",")] unless script.email.nil?
250
+ args.concat ["-h"] if script.submit_as_hold
251
+ args.concat ["-r", script.rerunnable ? "y" : "n"] unless script.rerunnable.nil?
252
+ args.concat ["-M", script.email.join(",")] unless script.email.nil?
233
253
  if script.email_on_started && script.email_on_terminated
234
- args += ["-m", "be"]
254
+ args.concat ["-m", "be"]
235
255
  elsif script.email_on_started
236
- args += ["-m", "b"]
256
+ args.concat ["-m", "b"]
237
257
  elsif script.email_on_terminated
238
- args += ["-m", "e"]
258
+ args.concat ["-m", "e"]
239
259
  end
240
- args += ["-N", script.job_name] unless script.job_name.nil?
241
- args += ["-S", script.shell_path] unless script.shell_path.nil?
260
+ args.concat ["-N", script.job_name] unless script.job_name.nil?
261
+ args.concat ["-S", script.shell_path] unless script.shell_path.nil?
242
262
  # ignore input_path (not defined in PBS Pro)
243
- args += ["-o", script.output_path] unless script.output_path.nil?
244
- args += ["-e", script.error_path] unless script.error_path.nil?
263
+ args.concat ["-o", script.output_path] unless script.output_path.nil?
264
+ args.concat ["-e", script.error_path] unless script.error_path.nil?
245
265
  # Reservations are actually just queues in PBS Pro
246
- args += ["-q", script.reservation_id] if !script.reservation_id.nil? && script.queue_name.nil?
247
- args += ["-q", script.queue_name] unless script.queue_name.nil?
248
- args += ["-p", script.priority] unless script.priority.nil?
249
- args += ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil?
250
- args += ["-A", script.accounting_id] unless script.accounting_id.nil?
251
- args += ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
266
+ args.concat ["-q", script.reservation_id] if !script.reservation_id.nil? && script.queue_name.nil?
267
+ args.concat ["-q", script.queue_name] unless script.queue_name.nil?
268
+ args.concat ["-p", script.priority] unless script.priority.nil?
269
+ args.concat ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil?
270
+ args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
271
+ args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
252
272
 
253
273
  # Set dependencies
254
274
  depend = []
@@ -256,21 +276,21 @@ module OodCore
256
276
  depend << "afterok:#{afterok.join(":")}" unless afterok.empty?
257
277
  depend << "afternotok:#{afternotok.join(":")}" unless afternotok.empty?
258
278
  depend << "afterany:#{afterany.join(":")}" unless afterany.empty?
259
- args += ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
279
+ args.concat ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
260
280
 
261
281
  # Set environment variables
262
282
  envvars = script.job_environment.to_h
263
- args += ["-v", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
264
- args += ["-V"] if script.copy_environment?
283
+ args.concat ["-v", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
284
+ args.concat ["-V"] if script.copy_environment?
265
285
 
266
286
  # If error_path is not specified we join stdout & stderr (as this
267
287
  # mimics what the other resource managers do)
268
- args += ["-j", "oe"] if script.error_path.nil?
288
+ args.concat ["-j", "oe"] if script.error_path.nil?
269
289
 
270
- args += ["-J", script.job_array_request] unless script.job_array_request.nil?
290
+ args.concat ["-J", script.job_array_request] unless script.job_array_request.nil?
271
291
 
272
292
  # Set native options
273
- args += script.native if script.native
293
+ args.concat script.native if script.native
274
294
 
275
295
  # Submit job
276
296
  @pbspro.submit_string(script.content, args: args, chdir: script.workdir)
@@ -15,7 +15,7 @@ end
15
15
  class OodCore::Job::Adapters::Sge::Batch
16
16
  using OodCore::Refinements::HashExtensions
17
17
 
18
- attr_reader :bin, :bin_overrides, :conf, :cluster, :helper
18
+ attr_reader :bin, :bin_overrides, :conf, :cluster, :helper, :submit_host, :strict_host_checking
19
19
 
20
20
  require "ood_core/job/adapters/sge/qstat_xml_j_r_listener"
21
21
  require "ood_core/job/adapters/sge/qstat_xml_r_listener"
@@ -36,6 +36,8 @@ class OodCore::Job::Adapters::Sge::Batch
36
36
  @bin = Pathname.new(config.fetch(:bin, nil).to_s)
37
37
  @sge_root = Pathname.new(config[:sge_root] || ENV['SGE_ROOT'] || "/var/lib/gridengine")
38
38
  @bin_overrides = config.fetch(:bin_overrides, {})
39
+ @submit_host = config.fetch(:submit_host, "")
40
+ @strict_host_checking = config.fetch(:strict_host_checking, true)
39
41
 
40
42
  # FIXME: hack as this affects env of the process!
41
43
  ENV['SGE_ROOT'] = @sge_root.to_s
@@ -62,7 +64,7 @@ class OodCore::Job::Adapters::Sge::Batch
62
64
  def get_all(owner: nil)
63
65
  listener = QstatXmlRListener.new
64
66
  argv = ['qstat', '-r', '-xml']
65
- argv += ['-u', owner] unless owner.nil?
67
+ argv.concat ['-u', owner] unless owner.nil?
66
68
  REXML::Parsers::StreamParser.new(call(*argv), listener).parse
67
69
 
68
70
  listener.parsed_jobs.map{
@@ -166,11 +168,10 @@ class OodCore::Job::Adapters::Sge::Batch
166
168
  # Call a forked SGE command for a given batch server
167
169
  def call(cmd, *args, env: {}, stdin: "", chdir: nil)
168
170
  cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
169
- args = args.map(&:to_s)
170
-
171
171
  env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
172
+ cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking, env)
172
173
  chdir ||= "."
173
- o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
174
+ o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s, chdir: chdir.to_s)
174
175
  s.success? ? o : raise(Error, e)
175
176
  end
176
177
 
@@ -17,38 +17,38 @@ class OodCore::Job::Adapters::Sge::Helper
17
17
  raise_error_on_unsupported_args(script, after: after, afterok: afterok, afternotok: afternotok, afterany: afterany)
18
18
 
19
19
  args = []
20
- args += ['-h'] if script.submit_as_hold
21
- args += ['-r', 'yes'] if script.rerunnable
22
- script.job_environment.each_pair {|k, v| args += ['-v', "#{k.to_s}=#{v.to_s}"]} unless script.job_environment.nil?
23
- args += ["-V"] if script.copy_environment?
20
+ args.concat ['-h'] if script.submit_as_hold
21
+ args.concat ['-r', 'yes'] if script.rerunnable
22
+ script.job_environment.each_pair {|k, v| args.concat ['-v', "#{k.to_s}=#{v.to_s}"]} unless script.job_environment.nil?
23
+ args.concat ["-V"] if script.copy_environment?
24
24
 
25
25
  if script.workdir
26
- args += ['-wd', script.workdir]
26
+ args.concat ['-wd', script.workdir]
27
27
  elsif ! script_contains_wd_directive?(script.content)
28
- args += ['-cwd']
28
+ args.concat ['-cwd']
29
29
  end
30
30
 
31
31
  on_event_email = []
32
32
  on_event_email << 'b' if script.email_on_started # beginning
33
33
  on_event_email << 'ea' if script.email_on_terminated # end, aborted
34
34
 
35
- args += ['-M', script.email.first, '-m', on_event_email.join] if script.email && ! on_event_email.empty?
35
+ args.concat ['-M', script.email.first, '-m', on_event_email.join] if script.email && ! on_event_email.empty?
36
36
 
37
37
  afterok = Array(afterok).map(&:to_s)
38
- args += ['-hold_jid_ad', afterok.join(',')] unless afterok.empty?
38
+ args.concat ['-hold_jid_ad', afterok.join(',')] unless afterok.empty?
39
39
 
40
40
  # ignoring email_on_started
41
- args += ['-N', script.job_name] unless script.job_name.nil?
42
- args += ['-e', script.error_path] unless script.error_path.nil?
43
- args += ['-o', script.output_path] unless script.output_path.nil?
44
- args += ['-ar', script.reservation_id] unless script.reservation_id.nil?
45
- args += ['-q', script.queue_name] unless script.queue_name.nil?
46
- args += ['-p', script.priority] unless script.priority.nil?
47
- args += ['-a', script.start_time.strftime('%C%y%m%d%H%M.%S')] unless script.start_time.nil?
48
- args += ['-l', "h_rt=" + seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
49
- args += ['-P', script.accounting_id] unless script.accounting_id.nil?
50
- args += ['-t', script.job_array_request] unless script.job_array_request.nil?
51
- args += Array.wrap(script.native) if script.native
41
+ args.concat ['-N', script.job_name] unless script.job_name.nil?
42
+ args.concat ['-e', script.error_path] unless script.error_path.nil?
43
+ args.concat ['-o', script.output_path] unless script.output_path.nil?
44
+ args.concat ['-ar', script.reservation_id] unless script.reservation_id.nil?
45
+ args.concat ['-q', script.queue_name] unless script.queue_name.nil?
46
+ args.concat ['-p', script.priority] unless script.priority.nil?
47
+ args.concat ['-a', script.start_time.strftime('%C%y%m%d%H%M.%S')] unless script.start_time.nil?
48
+ args.concat ['-l', "h_rt=" + seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
49
+ args.concat ['-P', script.accounting_id] unless script.accounting_id.nil?
50
+ args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
51
+ args.concat Array.wrap(script.native) if script.native
52
52
 
53
53
  args
54
54
  end
@@ -28,10 +28,13 @@ class QstatXmlJRListener
28
28
  :tasks => [],
29
29
  :status => :queued,
30
30
  :procs => 1,
31
- :native => {} # TODO: improve native attribute reporting
31
+ :native => {
32
+ :ST_name => ''
33
+ }
32
34
  }
33
35
  @current_text = nil
34
36
  @current_request = nil
37
+ @processing_JB_stdout_path_list = false
35
38
 
36
39
  @processing_job_array_spec = false
37
40
  @adding_slots = false
@@ -42,6 +45,7 @@ class QstatXmlJRListener
42
45
  step: 1, # Step can have a default of 1
43
46
  }
44
47
  @running_tasks = []
48
+ @native_tags = ['JB_job_number', 'JB_job_name', 'JB_version', 'JB_project', 'JB_exec_file', 'JB_script_file', 'JB_script_size', 'JB_submission_time', 'JB_execution_time', 'JB_deadline', 'JB_owner', 'JB_uid', 'JB_group', 'JB_gid', 'JB_account', 'JB_cwd', 'JB_notify', 'JB_type', 'JB_reserve', 'JB_priority', 'JB_jobshare', 'JB_verify', 'JB_checkpoint_attr', 'JB_checkpoint_interval', 'JB_restart']
45
49
  end
46
50
 
47
51
  def tag_start(name, attrs)
@@ -50,10 +54,17 @@ class QstatXmlJRListener
50
54
  toggle_processing_array_spec
51
55
  when 'JB_pe_range'
52
56
  toggle_adding_slots
57
+ when 'JB_stdout_path_list'
58
+ @processing_JB_stdout_path_list = true
53
59
  end
54
60
  end
55
61
 
56
62
  def tag_end(name)
63
+ #Add to native hash if in native_tags
64
+ if (@native_tags.include?(name))
65
+ @parsed_job[:native][:"#{name}"] = @current_text
66
+ end
67
+
57
68
  case name
58
69
  when 'JB_ja_tasks'
59
70
  end_JB_ja_tasks
@@ -92,6 +103,10 @@ class QstatXmlJRListener
92
103
  toggle_processing_array_spec
93
104
  when 'JB_pe_range'
94
105
  toggle_adding_slots
106
+ when 'PN_path'
107
+ end_PN_path
108
+ when 'ST_name'
109
+ end_ST_name
95
110
  end
96
111
  end
97
112
 
@@ -118,7 +133,7 @@ class QstatXmlJRListener
118
133
  end
119
134
 
120
135
  def end_JB_submission_time
121
- @parsed_job[:submission_time] = @current_text.to_i
136
+ @parsed_job[:submission_time] = ms_to_seconds(@current_text.to_i)
122
137
  end
123
138
 
124
139
  def end_JB_ja_tasks
@@ -127,7 +142,7 @@ class QstatXmlJRListener
127
142
 
128
143
  def end_JAT_start_time
129
144
  @parsed_job[:status] = :running
130
- @parsed_job[:dispatch_time] = @current_text.to_i
145
+ @parsed_job[:dispatch_time] = ms_to_seconds(@current_text.to_i)
131
146
  @parsed_job[:wallclock_time] = Time.now.to_i - @parsed_job[:dispatch_time]
132
147
  end
133
148
 
@@ -153,6 +168,15 @@ class QstatXmlJRListener
153
168
  @running_tasks << @current_text
154
169
  end
155
170
 
171
+ def end_PN_path
172
+ @parsed_job[:native][:PN_path] = @current_text if @processing_JB_stdout_path_list
173
+ @processing_JB_stdout_path_list = false
174
+ end
175
+
176
+ def end_ST_name
177
+ @parsed_job[:native][:ST_name] = @parsed_job[:native][:ST_name] + @current_text + ' '
178
+ end
179
+
156
180
  def set_job_array_piece(key)
157
181
  @job_array_spec[key] = @current_text if @processing_job_array_spec
158
182
  end
@@ -201,5 +225,12 @@ class QstatXmlJRListener
201
225
  def set_slots
202
226
  @parsed_job[:procs] = @current_text.to_i
203
227
  end
204
- end
205
228
 
229
+ private
230
+
231
+ # Some Grid Engines (like UGE) use milliseconds were others use
232
+ # seconds past the epoch.
233
+ def ms_to_seconds(raw)
234
+ raw.digits.length >= 13 ? raw / 1000 : raw
235
+ end
236
+ end
@@ -24,21 +24,32 @@ class QstatXmlRListener
24
24
  @parsed_jobs = []
25
25
  @current_job = {
26
26
  :tasks => [],
27
- :native => {} # TODO: improve native reporting
27
+ :native => {
28
+ :ST_name => ''
29
+ }
28
30
  }
29
31
  @current_text = nil
32
+ @processing_JB_stdout_path_list = false
30
33
 
31
34
  @current_request = nil
35
+ @native_tags = ['JB_job_number', 'JB_job_name', 'JB_version', 'JB_project', 'JB_exec_file', 'JB_script_file', 'JB_script_size', 'JB_submission_time', 'JB_execution_time', 'JB_deadline', 'JB_owner', 'JB_uid', 'JB_group', 'JB_gid', 'JB_account', 'JB_cwd', 'JB_notify', 'JB_type', 'JB_reserve', 'JB_priority', 'JB_jobshare', 'JB_verify', 'JB_checkpoint_attr', 'JB_checkpoint_interval', 'JB_restart']
32
36
  end
33
37
 
34
38
  def tag_start(name, attributes)
35
39
  case name
36
40
  when 'hard_request'
37
41
  start_hard_request(attributes)
42
+ when "JB_stdout_path_list"
43
+ @processing_JB_stdout_path_list = true
38
44
  end
39
45
  end
40
46
 
41
47
  def tag_end(name)
48
+ #Add text if in native_tags
49
+ if (@native_tags.include?(name))
50
+ @current_job[:native][:"#{name}"] = @current_text
51
+ end
52
+
42
53
  case name
43
54
  when 'job_list'
44
55
  end_job_list
@@ -64,6 +75,10 @@ class QstatXmlRListener
64
75
  end_hard_request
65
76
  when 'tasks'
66
77
  add_child_tasks
78
+ when 'PN_path'
79
+ end_PN_path
80
+ when 'ST_name'
81
+ end_ST_name
67
82
  end
68
83
  end
69
84
 
@@ -130,6 +145,15 @@ class QstatXmlRListener
130
145
  end
131
146
  end
132
147
 
148
+ def end_PN_path
149
+ @current_job[:native][:PN_path] = @current_text if @processing_JB_stdout_path_list
150
+ @processing_JB_stdout_path_list = false
151
+ end
152
+
153
+ def end_ST_name
154
+ @current_job[:native][:ST_name] = @current_job[:native][:ST_name] + @current_text + ' '
155
+ end
156
+
133
157
  # Store a completed job and reset current_job for the next pass
134
158
  def end_job_list
135
159
  @parsed_jobs << @current_job
@@ -145,4 +169,3 @@ class QstatXmlRListener
145
169
  }
146
170
  end
147
171
  end
148
-
@@ -14,13 +14,17 @@ module OodCore
14
14
  # @option config [Object] :conf (nil) Path to the slurm conf
15
15
  # @option config [Object] :bin (nil) Path to slurm client binaries
16
16
  # @option config [#to_h] :bin_overrides ({}) Optional overrides to Slurm client executables
17
+ # @option config [Object] :submit_host ("") Submit job on login node via ssh
18
+ # @option config [Object] :strict_host_checking (true) Whether to use strict host checking when ssh to submit_host
17
19
  def self.build_slurm(config)
18
20
  c = config.to_h.symbolize_keys
19
- cluster = c.fetch(:cluster, nil)
20
- conf = c.fetch(:conf, nil)
21
- bin = c.fetch(:bin, nil)
22
- bin_overrides = c.fetch(:bin_overrides, {})
23
- slurm = Adapters::Slurm::Batch.new(cluster: cluster, conf: conf, bin: bin, bin_overrides: bin_overrides)
21
+ cluster = c.fetch(:cluster, nil)
22
+ conf = c.fetch(:conf, nil)
23
+ bin = c.fetch(:bin, nil)
24
+ bin_overrides = c.fetch(:bin_overrides, {})
25
+ submit_host = c.fetch(:submit_host, "")
26
+ strict_host_checking = c.fetch(:strict_host_checking, true)
27
+ slurm = Adapters::Slurm::Batch.new(cluster: cluster, conf: conf, bin: bin, bin_overrides: bin_overrides, submit_host: submit_host, strict_host_checking: strict_host_checking)
24
28
  Adapters::Slurm.new(slurm: slurm)
25
29
  end
26
30
  end
@@ -62,6 +66,16 @@ module OodCore
62
66
  # @return Hash<String, String>
63
67
  attr_reader :bin_overrides
64
68
 
69
+ # The login node where the job is submitted via ssh
70
+ # @example owens.osc.edu
71
+ # @return [String] The login node
72
+ attr_reader :submit_host
73
+
74
+ # Wheter to use strict host checking when ssh to submit_host
75
+ # @example false
76
+ # @return [Bool]; true if empty
77
+ attr_reader :strict_host_checking
78
+
65
79
  # The root exception class that all Slurm-specific exceptions inherit
66
80
  # from
67
81
  class Error < StandardError; end
@@ -69,11 +83,16 @@ module OodCore
69
83
  # @param cluster [#to_s, nil] the cluster name
70
84
  # @param conf [#to_s, nil] path to the slurm conf
71
85
  # @param bin [#to_s] path to slurm installation binaries
72
- def initialize(cluster: nil, bin: nil, conf: nil, bin_overrides: {})
73
- @cluster = cluster && cluster.to_s
74
- @conf = conf && Pathname.new(conf.to_s)
75
- @bin = Pathname.new(bin.to_s)
76
- @bin_overrides = bin_overrides
86
+ # @param bin_overrides [#to_h] a hash of bin ovverides to be used in job
87
+ # @param submit_host [#to_s] Submits the job on a login node via ssh
88
+ # @param strict_host_checking [Bool] Whether to use strict host checking when ssh to submit_host
89
+ def initialize(cluster: nil, bin: nil, conf: nil, bin_overrides: {}, submit_host: "", strict_host_checking: true)
90
+ @cluster = cluster && cluster.to_s
91
+ @conf = conf && Pathname.new(conf.to_s)
92
+ @bin = Pathname.new(bin.to_s)
93
+ @bin_overrides = bin_overrides
94
+ @submit_host = submit_host.to_s
95
+ @strict_host_checking = strict_host_checking
77
96
  end
78
97
 
79
98
  # Get a list of hashes detailing each of the jobs on the batch server
@@ -148,9 +167,9 @@ module OodCore
148
167
  #TODO: write some barebones test for this? like 2 options and id or no id
149
168
  def squeue_args(id: "", owner: nil, options: [])
150
169
  args = ["--all", "--states=all", "--noconvert"]
151
- args += ["-o", "#{RECORD_SEPARATOR}#{options.join(UNIT_SEPARATOR)}"]
152
- args += ["-u", owner.to_s] unless owner.to_s.empty?
153
- args += ["-j", id.to_s] unless id.to_s.empty?
170
+ args.concat ["-o", "#{RECORD_SEPARATOR}#{options.join(UNIT_SEPARATOR)}"]
171
+ args.concat ["-u", owner.to_s] unless owner.to_s.empty?
172
+ args.concat ["-j", id.to_s] unless id.to_s.empty?
154
173
  args
155
174
  end
156
175
 
@@ -275,11 +294,15 @@ module OodCore
275
294
  # Call a forked Slurm command for a given cluster
276
295
  def call(cmd, *args, env: {}, stdin: "")
277
296
  cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
297
+
278
298
  args = args.map(&:to_s)
279
- args += ["-M", cluster] if cluster
299
+ args.concat ["-M", cluster] if cluster
300
+
280
301
  env = env.to_h
281
302
  env["SLURM_CONF"] = conf.to_s if conf
282
- o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s)
303
+
304
+ cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
305
+ o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
283
306
  s.success? ? o : raise(Error, e)
284
307
  end
285
308
 
@@ -358,30 +381,31 @@ module OodCore
358
381
  # Set sbatch options
359
382
  args = []
360
383
  # ignore args, don't know how to do this for slurm
361
- args += ["-H"] if script.submit_as_hold
362
- args += (script.rerunnable ? ["--requeue"] : ["--no-requeue"]) unless script.rerunnable.nil?
363
- args += ["-D", script.workdir.to_s] unless script.workdir.nil?
364
- args += ["--mail-user", script.email.join(",")] unless script.email.nil?
384
+ args.concat ["-H"] if script.submit_as_hold
385
+ args.concat (script.rerunnable ? ["--requeue"] : ["--no-requeue"]) unless script.rerunnable.nil?
386
+ args.concat ["-D", script.workdir.to_s] unless script.workdir.nil?
387
+ args.concat ["--mail-user", script.email.join(",")] unless script.email.nil?
365
388
  if script.email_on_started && script.email_on_terminated
366
- args += ["--mail-type", "ALL"]
389
+ args.concat ["--mail-type", "ALL"]
367
390
  elsif script.email_on_started
368
- args += ["--mail-type", "BEGIN"]
391
+ args.concat ["--mail-type", "BEGIN"]
369
392
  elsif script.email_on_terminated
370
- args += ["--mail-type", "END"]
393
+ args.concat ["--mail-type", "END"]
371
394
  elsif script.email_on_started == false && script.email_on_terminated == false
372
- args += ["--mail-type", "NONE"]
395
+ args.concat ["--mail-type", "NONE"]
373
396
  end
374
- args += ["-J", script.job_name] unless script.job_name.nil?
375
- args += ["-i", script.input_path] unless script.input_path.nil?
376
- args += ["-o", script.output_path] unless script.output_path.nil?
377
- args += ["-e", script.error_path] unless script.error_path.nil?
378
- args += ["--reservation", script.reservation_id] unless script.reservation_id.nil?
379
- args += ["-p", script.queue_name] unless script.queue_name.nil?
380
- args += ["--priority", script.priority] unless script.priority.nil?
381
- args += ["--begin", script.start_time.localtime.strftime("%C%y-%m-%dT%H:%M:%S")] unless script.start_time.nil?
382
- args += ["-A", script.accounting_id] unless script.accounting_id.nil?
383
- args += ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
384
- args += ['-a', script.job_array_request] unless script.job_array_request.nil?
397
+ args.concat ["-J", script.job_name] unless script.job_name.nil?
398
+ args.concat ["-i", script.input_path] unless script.input_path.nil?
399
+ args.concat ["-o", script.output_path] unless script.output_path.nil?
400
+ args.concat ["-e", script.error_path] unless script.error_path.nil?
401
+ args.concat ["--reservation", script.reservation_id] unless script.reservation_id.nil?
402
+ args.concat ["-p", script.queue_name] unless script.queue_name.nil?
403
+ args.concat ["--priority", script.priority] unless script.priority.nil?
404
+ args.concat ["--begin", script.start_time.localtime.strftime("%C%y-%m-%dT%H:%M:%S")] unless script.start_time.nil?
405
+ args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
406
+ args.concat ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
407
+ args.concat ['-a', script.job_array_request] unless script.job_array_request.nil?
408
+ args.concat ['--qos', script.qos] unless script.qos.nil?
385
409
  # ignore nodes, don't know how to do this for slurm
386
410
 
387
411
  # Set dependencies
@@ -390,14 +414,14 @@ module OodCore
390
414
  depend << "afterok:#{afterok.join(":")}" unless afterok.empty?
391
415
  depend << "afternotok:#{afternotok.join(":")}" unless afternotok.empty?
392
416
  depend << "afterany:#{afterany.join(":")}" unless afterany.empty?
393
- args += ["-d", depend.join(",")] unless depend.empty?
417
+ args.concat ["-d", depend.join(",")] unless depend.empty?
394
418
 
395
419
  # Set environment variables
396
420
  env = script.job_environment || {}
397
- args += ["--export", export_arg(env, script.copy_environment?)]
421
+ args.concat ["--export", export_arg(env, script.copy_environment?)]
398
422
 
399
423
  # Set native options
400
- args += script.native if script.native
424
+ args.concat script.native if script.native
401
425
 
402
426
  # Set content
403
427
  content = if script.shell_path.nil?
@@ -1,5 +1,6 @@
1
1
  require "ood_core/refinements/hash_extensions"
2
2
  require "ood_core/job/adapters/helper"
3
+ require 'shellwords'
3
4
 
4
5
  module OodCore
5
6
  module Job
@@ -9,16 +10,18 @@ module OodCore
9
10
  # Build the Torque adapter from a configuration
10
11
  # @param config [#to_h] the configuration for job adapter
11
12
  # @option config [#to_s] :host The batch server host
13
+ # @option config [#to_s] :submit_host The login node to submit the job via ssh
12
14
  # @option config [#to_s] :lib ('') Path to torque client libraries
13
15
  # @option config [#to_s] :bin ('') Path to torque client binaries
14
16
  # @option config [#to_h] :custom_bin ({}) Optional overrides to Torque client executables
15
17
  def self.build_torque(config)
16
18
  c = config.to_h.symbolize_keys
17
19
  host = c.fetch(:host) { raise ArgumentError, "No host specified. Missing argument: host" }.to_s
20
+ submit_host = c.fetch(:submit_host, "").to_s
18
21
  lib = c.fetch(:lib, "").to_s
19
22
  bin = c.fetch(:bin, "").to_s
20
23
  custom_bin = c.fetch(:custom_bin, {})
21
- pbs = Adapters::Torque::Batch.new(host: host, lib: lib, bin: bin, custom_bin: custom_bin)
24
+ pbs = Adapters::Torque::Batch.new(host: host, submit_host: submit_host, lib: lib, bin: bin, custom_bin: custom_bin)
22
25
  Adapters::Torque.new(pbs: pbs)
23
26
  end
24
27
  end
@@ -85,7 +88,7 @@ module OodCore
85
88
  depend << "afterany:#{afterany.join(':')}" unless afterany.empty?
86
89
 
87
90
  # Set mailing options
88
- mail_points = ""
91
+ mail_points = ""
89
92
  mail_points += "b" if script.email_on_started
90
93
  mail_points += "e" if script.email_on_terminated
91
94
 
@@ -129,40 +132,44 @@ module OodCore
129
132
  envvars.merge! script.native.fetch(:envvars, {})
130
133
  end
131
134
 
135
+ # Destructively change envvars to shellescape values
136
+ envvars.transform_values! { |v| Shellwords.escape(v) }
137
+
132
138
  # Submit job
133
139
  @pbs.submit_string(script.content, queue: script.queue_name, headers: headers, resources: resources, envvars: envvars)
134
140
  else
135
141
  # Set qsub arguments
136
142
  args = []
137
- args += ["-F", script.args.join(" ")] unless script.args.nil?
138
- args += ["-h"] if script.submit_as_hold
139
- args += ["-r", script.rerunnable ? "y" : "n"] unless script.rerunnable.nil?
140
- args += ["-M", script.email.join(",")] unless script.email.nil?
141
- args += ["-m", mail_points] unless mail_points.empty?
142
- args += ["-N", script.job_name] unless script.job_name.nil?
143
- args += ["-S", script.shell_path] unless script.shell_path.nil?
143
+ args.concat ["-F", script.args.join(" ")] unless script.args.nil?
144
+ args.concat ["-h"] if script.submit_as_hold
145
+ args.concat ["-r", script.rerunnable ? "y" : "n"] unless script.rerunnable.nil?
146
+ args.concat ["-M", script.email.join(",")] unless script.email.nil?
147
+ args.concat ["-m", mail_points] unless mail_points.empty?
148
+ args.concat ["-N", script.job_name] unless script.job_name.nil?
149
+ args.concat ["-S", script.shell_path] unless script.shell_path.nil?
144
150
  # ignore input_path (not defined in Torque)
145
- args += ["-o", script.output_path] unless script.output_path.nil?
146
- args += ["-e", script.error_path] unless script.error_path.nil?
147
- args += ["-W", "x=advres:#{script.reservation_id}"] unless script.reservation_id.nil?
148
- args += ["-q", script.queue_name] unless script.queue_name.nil?
149
- args += ["-p", script.priority] unless script.priority.nil?
150
- args += ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil?
151
- args += ["-A", script.accounting_id] unless script.accounting_id.nil?
152
- args += ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
153
- args += ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
154
- args += ['-t', script.job_array_request] unless script.job_array_request.nil?
151
+ args.concat ["-o", script.output_path] unless script.output_path.nil?
152
+ args.concat ["-e", script.error_path] unless script.error_path.nil?
153
+ args.concat ["-W", "x=advres:#{script.reservation_id}"] unless script.reservation_id.nil?
154
+ args.concat ["-q", script.queue_name] unless script.queue_name.nil?
155
+ args.concat ["-p", script.priority] unless script.priority.nil?
156
+ args.concat ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil?
157
+ args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
158
+ args.concat ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
159
+ args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
160
+ args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
161
+ args.concat ['-l', "qos=#{script.qos}"] unless script.qos.nil?
155
162
  # Set environment variables
156
163
  env = script.job_environment.to_h
157
- args += ["-v", env.keys.join(",")] unless env.empty?
158
- args += ["-V"] if script.copy_environment?
164
+ args.concat ["-v", env.keys.join(",")] unless env.empty?
165
+ args.concat ["-V"] if script.copy_environment?
159
166
 
160
167
  # If error_path is not specified we join stdout & stderr (as this
161
168
  # mimics what the other resource managers do)
162
- args += ["-j", "oe"] if script.error_path.nil?
169
+ args.concat ["-j", "oe"] if script.error_path.nil?
163
170
 
164
171
  # Set native options
165
- args += script.native if script.native
172
+ args.concat script.native if script.native
166
173
 
167
174
  # Submit job
168
175
  @pbs.submit(script.content, args: args, env: env, chdir: script.workdir)
@@ -9,6 +9,18 @@ class OodCore::Job::Adapters::Torque
9
9
  # @return [String] the batch server host
10
10
  attr_reader :host
11
11
 
12
+ # The login node where job is submitted via ssh
13
+ # @example OSC's owens login node
14
+ # my_conn.submit_host #=> "owens.osc.edu"
15
+ # @return [String] the login node
16
+ attr_reader :submit_host
17
+
18
+ # Determines whether to use strict_host_checking for ssh
19
+ # @example
20
+ # my_conn.strict_host_checking.to_s #=> "owens.osc.edu"
21
+ # @return [Bool]
22
+ attr_reader :strict_host_checking
23
+
12
24
  # The path to the Torque client installation libraries
13
25
  # @example For Torque 5.0.0
14
26
  # my_conn.lib.to_s #=> "/usr/local/Torque/5.0.0/lib"
@@ -32,19 +44,23 @@ class OodCore::Job::Adapters::Torque
32
44
  class Error < StandardError; end
33
45
 
34
46
  # @param host [#to_s] the batch server host
47
+ # @param submit_host [#to_s] the login node
48
+ # @param strict_host_checking [bool] use strict host checking when ssh to submit_host
35
49
  # @param lib [#to_s] path to FFI installation libraries
36
50
  # @param bin [#to_s] path to FFI installation binaries
37
- def initialize(host:, lib: "", bin: "", bin_overrides: {}, **_)
38
- @host = host.to_s
39
- @lib = Pathname.new(lib.to_s)
40
- @bin = Pathname.new(bin.to_s)
41
- @bin_overrides = bin_overrides
51
+ def initialize(host:, submit_host: "", strict_host_checking: true, lib: "", bin: "", bin_overrides: {}, **_)
52
+ @host = host.to_s
53
+ @submit_host = submit_host.to_s
54
+ @strict_host_checking = strict_host_checking
55
+ @lib = Pathname.new(lib.to_s)
56
+ @bin = Pathname.new(bin.to_s)
57
+ @bin_overrides = bin_overrides
42
58
  end
43
59
 
44
60
  # Convert object to hash
45
61
  # @return [Hash] the hash describing this object
46
62
  def to_h
47
- {host: host, lib: lib, bin: bin}
63
+ {host: host, submit_host: submit_host, strict_host_checking: strict_host_checking, lib: lib, bin: bin}
48
64
  end
49
65
 
50
66
  # The comparison operator
@@ -437,10 +453,10 @@ class OodCore::Job::Adapters::Torque
437
453
  # NB: The binary includes many useful filters and is preferred
438
454
  def qsub_submit(script, queue, headers, resources, envvars)
439
455
  params = []
440
- params += ["-q", "#{queue}"] unless queue.empty?
441
- params += headers.map {|k,v| qsub_arg(k,v)}.flatten
442
- params += resources.map{|k,v| ["-l", "#{k}=#{v}"]}.flatten
443
- params += ["-v", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
456
+ params.concat ["-q", "#{queue}"] unless queue.empty?
457
+ params.concat headers.map {|k,v| qsub_arg(k,v)}.flatten
458
+ params.concat resources.map{|k,v| ["-l", "#{k}=#{v}"]}.flatten
459
+ params.concat ["-v", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
444
460
  params << script
445
461
 
446
462
  env = {
@@ -448,6 +464,7 @@ class OodCore::Job::Adapters::Torque
448
464
  "LD_LIBRARY_PATH" => "#{lib}:#{ENV['LD_LIBRARY_PATH']}"
449
465
  }
450
466
  cmd = OodCore::Job::Adapters::Helper.bin_path('qsub', bin, bin_overrides)
467
+ cmd, params = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, params, strict_host_checking, env)
451
468
  o, e, s = Open3.capture3(env, cmd, *params)
452
469
  raise Error, e unless s.success?
453
470
  o.chomp
@@ -456,14 +473,14 @@ class OodCore::Job::Adapters::Torque
456
473
  # Call a forked PBS command for a given host
457
474
  def call(cmd, *args, env: {}, stdin: "", chdir: nil)
458
475
  cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
459
- args = args.map(&:to_s)
460
476
  env = env.to_h.each_with_object({}) {|(k,v), h| h[k.to_s] = v.to_s}.merge({
461
477
  "PBS_DEFAULT" => host,
462
478
  "LD_LIBRARY_PATH" => %{#{lib}:#{ENV["LD_LIBRARY_PATH"]}}
463
479
  })
480
+ cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking, env)
464
481
  stdin = stdin.to_s
465
482
  chdir ||= "."
466
- o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin, chdir: chdir.to_s)
483
+ o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin, chdir: chdir.to_s)
467
484
  s.success? ? o : raise(Error, e)
468
485
  end
469
486
  end
@@ -99,6 +99,10 @@ module OodCore
99
99
  # @return [String, nil] job array request
100
100
  attr_reader :job_array_request
101
101
 
102
+ # The qos selected for the job
103
+ # @return [String, nil] qos
104
+ attr_reader :qos
105
+
102
106
  # Object detailing any native specifications that are implementation specific
103
107
  # @note Should not be used at all costs.
104
108
  # @return [Object, nil] native specifications
@@ -130,6 +134,8 @@ module OodCore
130
134
  # @param start_time [#to_i, nil] eligible start time
131
135
  # @param wall_time [#to_i, nil] max real time
132
136
  # @param accounting_id [#to_s, nil] accounting id
137
+ # @param job_array_request [#to_s, nil] job array request
138
+ # @param qos [#to_s, nil] qos
133
139
  # @param native [Object, nil] native specifications
134
140
  # @param copy_environment [Boolean, nil] copy the environment
135
141
  def initialize(content:, args: nil, submit_as_hold: nil, rerunnable: nil,
@@ -139,7 +145,7 @@ module OodCore
139
145
  output_path: nil, error_path: nil, reservation_id: nil,
140
146
  queue_name: nil, priority: nil, start_time: nil,
141
147
  wall_time: nil, accounting_id: nil, job_array_request: nil,
142
- native: nil, copy_environment: nil, **_)
148
+ qos: nil, native: nil, copy_environment: nil, **_)
143
149
  @content = content.to_s
144
150
 
145
151
  @submit_as_hold = submit_as_hold
@@ -163,6 +169,7 @@ module OodCore
163
169
  @wall_time = wall_time && wall_time.to_i
164
170
  @accounting_id = accounting_id && accounting_id.to_s
165
171
  @job_array_request = job_array_request && job_array_request.to_s
172
+ @qos = qos && qos.to_s
166
173
  @native = native
167
174
  @copy_environment = (copy_environment.nil?) ? nil : !! copy_environment
168
175
  end
@@ -192,6 +199,7 @@ module OodCore
192
199
  wall_time: wall_time,
193
200
  accounting_id: accounting_id,
194
201
  job_array_request: job_array_request,
202
+ qos: qos,
195
203
  native: native,
196
204
  copy_environment: copy_environment
197
205
  }
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.11.4"
3
+ VERSION = "0.12.0"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.4
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2020-05-27 00:00:00.000000000 Z
13
+ date: 2020-08-05 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support