ood_core 0.11.4 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -1
- data/README.md +6 -5
- data/lib/ood_core/job/adapters/helper.rb +20 -1
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +1 -0
- data/lib/ood_core/job/adapters/linux_host/templates/script_wrapper.erb.sh +15 -1
- data/lib/ood_core/job/adapters/lsf.rb +1 -0
- data/lib/ood_core/job/adapters/lsf/batch.rb +5 -3
- data/lib/ood_core/job/adapters/lsf/helper.rb +22 -22
- data/lib/ood_core/job/adapters/pbspro.rb +54 -34
- data/lib/ood_core/job/adapters/sge/batch.rb +6 -5
- data/lib/ood_core/job/adapters/sge/helper.rb +19 -19
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +35 -4
- data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +25 -2
- data/lib/ood_core/job/adapters/slurm.rb +61 -37
- data/lib/ood_core/job/adapters/torque.rb +30 -23
- data/lib/ood_core/job/adapters/torque/batch.rb +29 -12
- data/lib/ood_core/job/script.rb +9 -1
- data/lib/ood_core/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bb944d43beb0aced99e13efb2ef10bf33f9666c705c50ca5ae1727751de43073
|
4
|
+
data.tar.gz: 6e3cd66160be3bbd63124d6f2ddc794bce4ae64e385977828faba5dfd28ff838
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 176e331a856c1e6958c444426d5c1b41aa881e90a69dca507b07f5463eb81355689e8391e0bf27823fc42a9484789f623ffd566b9d6c414c9cf741a7cafd1def
|
7
|
+
data.tar.gz: 15481101ad3120d3e8457612f2b8a8be4f1e268b38538b18b710f27887836f7a47eac3bf2e89d8f73745ae96ae78e21cd5ba5afefb4161cf95f435d6f2fdf001
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
|
+
## [0.12.0] - 2020-08-05
|
10
|
+
### Added
|
11
|
+
- qos option to Slurm and Torque [#205](https://github.com/OSC/ood_core/pull/205)
|
12
|
+
- native hash returned in qstat for SGE adapter [#198](https://github.com/OSC/ood_core/pull/198)
|
13
|
+
- option for specifying `submit_host` to submit jobs via ssh on other host [#204](https://github.com/OSC/ood_core/pull/204)
|
14
|
+
|
15
|
+
### Fixed
|
16
|
+
- SGE handle milliseconds instead of seconds when milliseconds used [#206](https://github.com/OSC/ood_core/issues/206)
|
17
|
+
- Torque's native "hash" for job submission now handles env vars values with spaces [#202](https://github.com/OSC/ood_core/pull/202)
|
18
|
+
|
9
19
|
## [0.11.4] - 2020-05-27
|
10
20
|
### Fixed
|
11
21
|
- Environment exports in SLURM while implementing [#158](https://github.com/OSC/ood_core/issues/158)
|
@@ -233,7 +243,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
233
243
|
### Added
|
234
244
|
- Initial release!
|
235
245
|
|
236
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
246
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.12.0...HEAD
|
247
|
+
[0.12.0]: https://github.com/OSC/ood_core/compare/v0.11.4...v0.12.0
|
237
248
|
[0.11.4]: https://github.com/OSC/ood_core/compare/v0.11.3...v0.11.4
|
238
249
|
[0.11.3]: https://github.com/OSC/ood_core/compare/v0.11.2...v0.11.3
|
239
250
|
[0.11.2]: https://github.com/OSC/ood_core/compare/v0.11.1...v0.11.2
|
data/README.md
CHANGED
@@ -4,12 +4,13 @@
|
|
4
4
|

|
5
5
|

|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
- Website: http://openondemand.org/
|
8
|
+
- Website repo with JOSS publication: https://github.com/OSC/Open-OnDemand
|
9
|
+
- Documentation: https://osc.github.io/ood-documentation/master/
|
10
|
+
- Main code repo: https://github.com/OSC/ondemand
|
11
|
+
- Core library repo: https://github.com/OSC/ood_core
|
11
12
|
|
12
|
-
|
13
|
+
OnDemand core library with adapters for each batch scheduler.
|
13
14
|
|
14
15
|
## Installation
|
15
16
|
|
@@ -12,7 +12,26 @@ module OodCore
|
|
12
12
|
def self.bin_path(cmd, bin_default, bin_overrides)
|
13
13
|
bin_overrides.fetch(cmd.to_s) { Pathname.new(bin_default.to_s).join(cmd.to_s).to_s }
|
14
14
|
end
|
15
|
+
|
16
|
+
# Gets a command that submits command on another host via ssh
|
17
|
+
# @param submit_host [String] where to submit the command
|
18
|
+
# @param cmd [String] the desired command to execute on another host
|
19
|
+
# @param cmd_args [Array] arguments to the command specified above
|
20
|
+
# @param strict_host_checking [Bool] whether to use strict_host_checking
|
21
|
+
# @param env [Hash] env variables to be set w/ssh
|
22
|
+
#
|
23
|
+
# @return cmd [String] command wrapped in ssh if submit_host is present
|
24
|
+
# @return args [Array] command arguments including ssh_flags and original command
|
25
|
+
def self.ssh_wrap(submit_host, cmd, cmd_args, strict_host_checking = true, env = {})
|
26
|
+
return cmd, cmd_args if submit_host.to_s.empty?
|
27
|
+
|
28
|
+
check_host = strict_host_checking ? "yes" : "no"
|
29
|
+
args = ['-o', 'BatchMode=yes', '-o', 'UserKnownHostsFile=/dev/null', '-o', "StrictHostKeyChecking=#{check_host}", "#{submit_host}"]
|
30
|
+
env.each{|key, value| args.push("export #{key}=#{value};")}
|
31
|
+
|
32
|
+
return 'ssh', args + [cmd] + cmd_args
|
33
|
+
end
|
15
34
|
end
|
16
35
|
end
|
17
36
|
end
|
18
|
-
end
|
37
|
+
end
|
@@ -174,6 +174,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
174
174
|
'session_name' => session_name,
|
175
175
|
'singularity_bin' => singularity_bin,
|
176
176
|
'singularity_image' => singularity_image(script.native),
|
177
|
+
'ssh_hosts' => ssh_hosts,
|
177
178
|
'tmux_bin' => tmux_bin,
|
178
179
|
}.each{
|
179
180
|
|key, value| bnd.local_variable_set(key, value)
|
@@ -1,5 +1,19 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
|
2
|
+
SSH_HOSTS=(<%= ssh_hosts.join(' ').to_s %>)
|
3
|
+
hostnames=`hostname -A`
|
4
|
+
for host in ${SSH_HOSTS[@]}
|
5
|
+
do
|
6
|
+
if [[ " ${hostnames[@]} " =~ " ${host} " ]]; then
|
7
|
+
hostname=$host
|
8
|
+
fi
|
9
|
+
done
|
10
|
+
|
11
|
+
if [ -z "$hostname" ]; then
|
12
|
+
printf >&2 "ERROR: Can't start job on [${hostnames[@]}] because it does not match any hostname configured \nin ssh_hosts [${SSH_HOSTS[@]}]. The output of 'hostname -A' must match an entry in ssh_hosts \nfrom the cluster configuration."
|
13
|
+
exit 1
|
14
|
+
fi
|
15
|
+
|
16
|
+
echo $hostname
|
3
17
|
|
4
18
|
# Put the script into a temp file on localhost
|
5
19
|
<% if debug %>
|
@@ -14,6 +14,7 @@ module OodCore
|
|
14
14
|
# @option config [#to_s] :serverdir ('') Path to lsf client etc dir
|
15
15
|
# @option config [#to_s] :cluster ('') name of cluster, if in multi-cluster mode
|
16
16
|
# @option config [#to_h] :bin_overrides ({}) Optional overrides to LSF client executables
|
17
|
+
# @option config [#to_s] :submit_host ('') Host to submit commands to
|
17
18
|
def self.build_lsf(config)
|
18
19
|
batch = Adapters::Lsf::Batch.new(config.to_h.symbolize_keys)
|
19
20
|
Adapters::Lsf.new(batch: batch)
|
@@ -2,21 +2,22 @@
|
|
2
2
|
#
|
3
3
|
# @api private
|
4
4
|
class OodCore::Job::Adapters::Lsf::Batch
|
5
|
-
attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster, :bin_overrides
|
5
|
+
attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster, :bin_overrides, :submit_host, :strict_host_checking
|
6
6
|
|
7
7
|
# The root exception class that all LSF-specific exceptions inherit
|
8
8
|
# from
|
9
9
|
class Error < StandardError; end
|
10
10
|
|
11
11
|
# @param bin [#to_s] path to LSF installation binaries
|
12
|
-
def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", bin_overrides: {}, **_)
|
12
|
+
def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", bin_overrides: {}, submit_host: "", strict_host_checking: true, **_)
|
13
13
|
@bindir = Pathname.new(bindir.to_s)
|
14
|
-
|
15
14
|
@envdir = Pathname.new(envdir.to_s)
|
16
15
|
@libdir = Pathname.new(libdir.to_s)
|
17
16
|
@serverdir = Pathname.new(serverdir.to_s)
|
18
17
|
@cluster = cluster.to_s
|
19
18
|
@bin_overrides = bin_overrides
|
19
|
+
@submit_host = submit_host.to_s
|
20
|
+
@strict_host_checking = strict_host_checking
|
20
21
|
end
|
21
22
|
|
22
23
|
def default_env
|
@@ -143,6 +144,7 @@ class OodCore::Job::Adapters::Lsf::Batch
|
|
143
144
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
|
144
145
|
args = cluster_args + args
|
145
146
|
env = default_env.merge(env.to_h)
|
147
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking, env)
|
146
148
|
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
|
147
149
|
s.success? ? o : raise(Error, e)
|
148
150
|
end
|
@@ -78,40 +78,40 @@ class OodCore::Job::Adapters::Lsf::Helper
|
|
78
78
|
def batch_submit_args(script, after: [], afterok: [], afternotok: [], afterany: [])
|
79
79
|
args = []
|
80
80
|
|
81
|
-
args
|
82
|
-
args
|
83
|
-
args
|
84
|
-
args[-1]
|
85
|
-
|
86
|
-
args
|
87
|
-
args
|
88
|
-
args
|
89
|
-
args
|
90
|
-
args
|
91
|
-
args
|
92
|
-
args
|
93
|
-
args
|
81
|
+
args.concat ["-P", script.accounting_id] unless script.accounting_id.nil?
|
82
|
+
args.concat ["-cwd", script.workdir.to_s] unless script.workdir.nil?
|
83
|
+
args.concat ["-J", script.job_name] unless script.job_name.nil?
|
84
|
+
args[-1].concat "[#{script.job_array_request}]" unless script.job_array_request.nil?
|
85
|
+
|
86
|
+
args.concat ["-q", script.queue_name] unless script.queue_name.nil?
|
87
|
+
args.concat ["-U", script.reservation_id] unless script.reservation_id.nil?
|
88
|
+
args.concat ["-sp", script.priority] unless script.priority.nil?
|
89
|
+
args.concat ["-H"] if script.submit_as_hold
|
90
|
+
args.concat (script.rerunnable ? ["-r"] : ["-rn"]) unless script.rerunnable.nil?
|
91
|
+
args.concat ["-b", script.start_time.localtime.strftime("%Y:%m:%d:%H:%M")] unless script.start_time.nil?
|
92
|
+
args.concat ["-W", (script.wall_time / 60).to_i] unless script.wall_time.nil?
|
93
|
+
args.concat ["-L", script.shell_path.to_s] unless script.shell_path.nil?
|
94
94
|
|
95
95
|
# environment
|
96
96
|
env = script.job_environment || {}
|
97
97
|
# To preserve pre-existing behavior we only act when true or false, when nil we do nothing
|
98
98
|
if script.copy_environment?
|
99
|
-
args
|
99
|
+
args.concat ["-env", (["all"] + env.keys).join(",")]
|
100
100
|
elsif script.copy_environment? == false
|
101
|
-
args
|
101
|
+
args.concat ["-env", (["none"] + env.keys).join(",")]
|
102
102
|
end
|
103
103
|
|
104
104
|
# input and output files
|
105
|
-
args
|
106
|
-
args
|
107
|
-
args
|
105
|
+
args.concat ["-i", script.input_path] unless script.input_path.nil?
|
106
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
107
|
+
args.concat ["-e", script.error_path] unless script.error_path.nil?
|
108
108
|
|
109
109
|
# email
|
110
|
-
args
|
111
|
-
args
|
112
|
-
args
|
110
|
+
args.concat ["-B"] if script.email_on_started
|
111
|
+
args.concat ["-N"] if script.email_on_terminated
|
112
|
+
args.concat ["-u", script.email.join(",")] unless script.email.nil? || script.email.empty?
|
113
113
|
|
114
|
-
args
|
114
|
+
args.concat script.native unless script.native.nil?
|
115
115
|
|
116
116
|
{args: args, env: env}
|
117
117
|
end
|
@@ -10,17 +10,21 @@ module OodCore
|
|
10
10
|
# Build the PBS Pro adapter from a configuration
|
11
11
|
# @param config [#to_h] the configuration for job adapter
|
12
12
|
# @option config [Object] :host (nil) The batch server host
|
13
|
+
# @option config [Object] :submit_host ("") The login node where the job is submitted
|
14
|
+
# @option config [Object] :strict_host_checking (true) Whether to use strict host checking when ssh to submit_host
|
13
15
|
# @option config [Object] :exec (nil) Path to PBS Pro executables
|
14
16
|
# @option config [Object] :qstat_factor (nil) Deciding factor on how to
|
15
17
|
# call qstat for a user
|
16
18
|
# @option config [#to_h] :bin_overrides ({}) Optional overrides to PBS Pro client executables
|
17
19
|
def self.build_pbspro(config)
|
18
20
|
c = config.to_h.compact.symbolize_keys
|
19
|
-
host
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
host = c.fetch(:host, nil)
|
22
|
+
submit_host = c.fetch(:submit_host, "")
|
23
|
+
strict_host_checking = c.fetch(:strict_host_checking, true)
|
24
|
+
pbs_exec = c.fetch(:exec, nil)
|
25
|
+
qstat_factor = c.fetch(:qstat_factor, nil)
|
26
|
+
bin_overrides = c.fetch(:bin_overrides, {})
|
27
|
+
pbspro = Adapters::PBSPro::Batch.new(host: host, submit_host: submit_host, strict_host_checking: strict_host_checking, pbs_exec: pbs_exec, bin_overrides: bin_overrides)
|
24
28
|
Adapters::PBSPro.new(pbspro: pbspro, qstat_factor: qstat_factor)
|
25
29
|
end
|
26
30
|
end
|
@@ -41,6 +45,18 @@ module OodCore
|
|
41
45
|
# @return [String, nil] the batch server host
|
42
46
|
attr_reader :host
|
43
47
|
|
48
|
+
# The login node to submit the job via ssh
|
49
|
+
# @example
|
50
|
+
# my_batch.submit_host #=> "my_batch.server.edu"
|
51
|
+
# @return [String, nil] the login node
|
52
|
+
attr_reader :submit_host
|
53
|
+
|
54
|
+
# Whether to use strict host checking when ssh to submit_host
|
55
|
+
# @example
|
56
|
+
# my_batch.strict_host_checking #=> "false"
|
57
|
+
# @return [Bool, true] the login node; true if not present
|
58
|
+
attr_reader :strict_host_checking
|
59
|
+
|
44
60
|
# The path containing the PBS executables
|
45
61
|
# @example
|
46
62
|
# my_batch.pbs_exec.to_s #=> "/usr/local/pbspro/10.0.0
|
@@ -58,11 +74,15 @@ module OodCore
|
|
58
74
|
class Error < StandardError; end
|
59
75
|
|
60
76
|
# @param host [#to_s, nil] the batch server host
|
77
|
+
# @param submit_host [#to_s, nil] the login node to ssh to
|
78
|
+
# @param strict_host_checking [bool, true] wheter to use strict host checking when ssh to submit_host
|
61
79
|
# @param exec [#to_s, nil] path to pbs executables
|
62
|
-
def initialize(host: nil, pbs_exec: nil, bin_overrides: {})
|
63
|
-
@host
|
64
|
-
@
|
65
|
-
@
|
80
|
+
def initialize(host: nil, submit_host: "", strict_host_checking: true, pbs_exec: nil, bin_overrides: {})
|
81
|
+
@host = host && host.to_s
|
82
|
+
@submit_host = submit_host && submit_host.to_s
|
83
|
+
@strict_host_checking = strict_host_checking
|
84
|
+
@pbs_exec = pbs_exec && Pathname.new(pbs_exec.to_s)
|
85
|
+
@bin_overrides = bin_overrides
|
66
86
|
end
|
67
87
|
|
68
88
|
# Get a list of hashes detailing each of the jobs on the batch server
|
@@ -87,7 +107,7 @@ module OodCore
|
|
87
107
|
# @return [Array<Hash>] list of details for jobs
|
88
108
|
def get_jobs(id: "")
|
89
109
|
args = ["-f", "-t"] # display all information
|
90
|
-
args
|
110
|
+
args.concat [id.to_s] unless id.to_s.empty?
|
91
111
|
lines = call("qstat", *args).gsub("\n\t", "").split("\n").map(&:strip)
|
92
112
|
|
93
113
|
jobs = []
|
@@ -159,12 +179,12 @@ module OodCore
|
|
159
179
|
cmd = cmd.to_s
|
160
180
|
bindir = (!!pbs_exec) ? pbs_exec.join("bin").to_s : ''
|
161
181
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
|
162
|
-
args = args.map(&:to_s)
|
163
182
|
env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
164
183
|
env["PBS_DEFAULT"] = host.to_s if host
|
165
184
|
env["PBS_EXEC"] = pbs_exec.to_s if pbs_exec
|
185
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
|
166
186
|
chdir ||= "."
|
167
|
-
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
|
187
|
+
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s, chdir: chdir.to_s)
|
168
188
|
s.success? ? o : raise(Error, e)
|
169
189
|
end
|
170
190
|
end
|
@@ -227,28 +247,28 @@ module OodCore
|
|
227
247
|
# Set qsub options
|
228
248
|
args = []
|
229
249
|
# ignore args, can't use these if submitting from STDIN
|
230
|
-
args
|
231
|
-
args
|
232
|
-
args
|
250
|
+
args.concat ["-h"] if script.submit_as_hold
|
251
|
+
args.concat ["-r", script.rerunnable ? "y" : "n"] unless script.rerunnable.nil?
|
252
|
+
args.concat ["-M", script.email.join(",")] unless script.email.nil?
|
233
253
|
if script.email_on_started && script.email_on_terminated
|
234
|
-
args
|
254
|
+
args.concat ["-m", "be"]
|
235
255
|
elsif script.email_on_started
|
236
|
-
args
|
256
|
+
args.concat ["-m", "b"]
|
237
257
|
elsif script.email_on_terminated
|
238
|
-
args
|
258
|
+
args.concat ["-m", "e"]
|
239
259
|
end
|
240
|
-
args
|
241
|
-
args
|
260
|
+
args.concat ["-N", script.job_name] unless script.job_name.nil?
|
261
|
+
args.concat ["-S", script.shell_path] unless script.shell_path.nil?
|
242
262
|
# ignore input_path (not defined in PBS Pro)
|
243
|
-
args
|
244
|
-
args
|
263
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
264
|
+
args.concat ["-e", script.error_path] unless script.error_path.nil?
|
245
265
|
# Reservations are actually just queues in PBS Pro
|
246
|
-
args
|
247
|
-
args
|
248
|
-
args
|
249
|
-
args
|
250
|
-
args
|
251
|
-
args
|
266
|
+
args.concat ["-q", script.reservation_id] if !script.reservation_id.nil? && script.queue_name.nil?
|
267
|
+
args.concat ["-q", script.queue_name] unless script.queue_name.nil?
|
268
|
+
args.concat ["-p", script.priority] unless script.priority.nil?
|
269
|
+
args.concat ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil?
|
270
|
+
args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
|
271
|
+
args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
|
252
272
|
|
253
273
|
# Set dependencies
|
254
274
|
depend = []
|
@@ -256,21 +276,21 @@ module OodCore
|
|
256
276
|
depend << "afterok:#{afterok.join(":")}" unless afterok.empty?
|
257
277
|
depend << "afternotok:#{afternotok.join(":")}" unless afternotok.empty?
|
258
278
|
depend << "afterany:#{afterany.join(":")}" unless afterany.empty?
|
259
|
-
args
|
279
|
+
args.concat ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
|
260
280
|
|
261
281
|
# Set environment variables
|
262
282
|
envvars = script.job_environment.to_h
|
263
|
-
args
|
264
|
-
args
|
283
|
+
args.concat ["-v", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
|
284
|
+
args.concat ["-V"] if script.copy_environment?
|
265
285
|
|
266
286
|
# If error_path is not specified we join stdout & stderr (as this
|
267
287
|
# mimics what the other resource managers do)
|
268
|
-
args
|
288
|
+
args.concat ["-j", "oe"] if script.error_path.nil?
|
269
289
|
|
270
|
-
args
|
290
|
+
args.concat ["-J", script.job_array_request] unless script.job_array_request.nil?
|
271
291
|
|
272
292
|
# Set native options
|
273
|
-
args
|
293
|
+
args.concat script.native if script.native
|
274
294
|
|
275
295
|
# Submit job
|
276
296
|
@pbspro.submit_string(script.content, args: args, chdir: script.workdir)
|
@@ -15,7 +15,7 @@ end
|
|
15
15
|
class OodCore::Job::Adapters::Sge::Batch
|
16
16
|
using OodCore::Refinements::HashExtensions
|
17
17
|
|
18
|
-
attr_reader :bin, :bin_overrides, :conf, :cluster, :helper
|
18
|
+
attr_reader :bin, :bin_overrides, :conf, :cluster, :helper, :submit_host, :strict_host_checking
|
19
19
|
|
20
20
|
require "ood_core/job/adapters/sge/qstat_xml_j_r_listener"
|
21
21
|
require "ood_core/job/adapters/sge/qstat_xml_r_listener"
|
@@ -36,6 +36,8 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
36
36
|
@bin = Pathname.new(config.fetch(:bin, nil).to_s)
|
37
37
|
@sge_root = Pathname.new(config[:sge_root] || ENV['SGE_ROOT'] || "/var/lib/gridengine")
|
38
38
|
@bin_overrides = config.fetch(:bin_overrides, {})
|
39
|
+
@submit_host = config.fetch(:submit_host, "")
|
40
|
+
@strict_host_checking = config.fetch(:strict_host_checking, true)
|
39
41
|
|
40
42
|
# FIXME: hack as this affects env of the process!
|
41
43
|
ENV['SGE_ROOT'] = @sge_root.to_s
|
@@ -62,7 +64,7 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
62
64
|
def get_all(owner: nil)
|
63
65
|
listener = QstatXmlRListener.new
|
64
66
|
argv = ['qstat', '-r', '-xml']
|
65
|
-
argv
|
67
|
+
argv.concat ['-u', owner] unless owner.nil?
|
66
68
|
REXML::Parsers::StreamParser.new(call(*argv), listener).parse
|
67
69
|
|
68
70
|
listener.parsed_jobs.map{
|
@@ -166,11 +168,10 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
166
168
|
# Call a forked SGE command for a given batch server
|
167
169
|
def call(cmd, *args, env: {}, stdin: "", chdir: nil)
|
168
170
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
|
169
|
-
args = args.map(&:to_s)
|
170
|
-
|
171
171
|
env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
172
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking, env)
|
172
173
|
chdir ||= "."
|
173
|
-
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
|
174
|
+
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s, chdir: chdir.to_s)
|
174
175
|
s.success? ? o : raise(Error, e)
|
175
176
|
end
|
176
177
|
|
@@ -17,38 +17,38 @@ class OodCore::Job::Adapters::Sge::Helper
|
|
17
17
|
raise_error_on_unsupported_args(script, after: after, afterok: afterok, afternotok: afternotok, afterany: afterany)
|
18
18
|
|
19
19
|
args = []
|
20
|
-
args
|
21
|
-
args
|
22
|
-
script.job_environment.each_pair {|k, v| args
|
23
|
-
args
|
20
|
+
args.concat ['-h'] if script.submit_as_hold
|
21
|
+
args.concat ['-r', 'yes'] if script.rerunnable
|
22
|
+
script.job_environment.each_pair {|k, v| args.concat ['-v', "#{k.to_s}=#{v.to_s}"]} unless script.job_environment.nil?
|
23
|
+
args.concat ["-V"] if script.copy_environment?
|
24
24
|
|
25
25
|
if script.workdir
|
26
|
-
args
|
26
|
+
args.concat ['-wd', script.workdir]
|
27
27
|
elsif ! script_contains_wd_directive?(script.content)
|
28
|
-
args
|
28
|
+
args.concat ['-cwd']
|
29
29
|
end
|
30
30
|
|
31
31
|
on_event_email = []
|
32
32
|
on_event_email << 'b' if script.email_on_started # beginning
|
33
33
|
on_event_email << 'ea' if script.email_on_terminated # end, aborted
|
34
34
|
|
35
|
-
args
|
35
|
+
args.concat ['-M', script.email.first, '-m', on_event_email.join] if script.email && ! on_event_email.empty?
|
36
36
|
|
37
37
|
afterok = Array(afterok).map(&:to_s)
|
38
|
-
args
|
38
|
+
args.concat ['-hold_jid_ad', afterok.join(',')] unless afterok.empty?
|
39
39
|
|
40
40
|
# ignoring email_on_started
|
41
|
-
args
|
42
|
-
args
|
43
|
-
args
|
44
|
-
args
|
45
|
-
args
|
46
|
-
args
|
47
|
-
args
|
48
|
-
args
|
49
|
-
args
|
50
|
-
args
|
51
|
-
args
|
41
|
+
args.concat ['-N', script.job_name] unless script.job_name.nil?
|
42
|
+
args.concat ['-e', script.error_path] unless script.error_path.nil?
|
43
|
+
args.concat ['-o', script.output_path] unless script.output_path.nil?
|
44
|
+
args.concat ['-ar', script.reservation_id] unless script.reservation_id.nil?
|
45
|
+
args.concat ['-q', script.queue_name] unless script.queue_name.nil?
|
46
|
+
args.concat ['-p', script.priority] unless script.priority.nil?
|
47
|
+
args.concat ['-a', script.start_time.strftime('%C%y%m%d%H%M.%S')] unless script.start_time.nil?
|
48
|
+
args.concat ['-l', "h_rt=" + seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
49
|
+
args.concat ['-P', script.accounting_id] unless script.accounting_id.nil?
|
50
|
+
args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
|
51
|
+
args.concat Array.wrap(script.native) if script.native
|
52
52
|
|
53
53
|
args
|
54
54
|
end
|
@@ -28,10 +28,13 @@ class QstatXmlJRListener
|
|
28
28
|
:tasks => [],
|
29
29
|
:status => :queued,
|
30
30
|
:procs => 1,
|
31
|
-
:native => {
|
31
|
+
:native => {
|
32
|
+
:ST_name => ''
|
33
|
+
}
|
32
34
|
}
|
33
35
|
@current_text = nil
|
34
36
|
@current_request = nil
|
37
|
+
@processing_JB_stdout_path_list = false
|
35
38
|
|
36
39
|
@processing_job_array_spec = false
|
37
40
|
@adding_slots = false
|
@@ -42,6 +45,7 @@ class QstatXmlJRListener
|
|
42
45
|
step: 1, # Step can have a default of 1
|
43
46
|
}
|
44
47
|
@running_tasks = []
|
48
|
+
@native_tags = ['JB_job_number', 'JB_job_name', 'JB_version', 'JB_project', 'JB_exec_file', 'JB_script_file', 'JB_script_size', 'JB_submission_time', 'JB_execution_time', 'JB_deadline', 'JB_owner', 'JB_uid', 'JB_group', 'JB_gid', 'JB_account', 'JB_cwd', 'JB_notify', 'JB_type', 'JB_reserve', 'JB_priority', 'JB_jobshare', 'JB_verify', 'JB_checkpoint_attr', 'JB_checkpoint_interval', 'JB_restart']
|
45
49
|
end
|
46
50
|
|
47
51
|
def tag_start(name, attrs)
|
@@ -50,10 +54,17 @@ class QstatXmlJRListener
|
|
50
54
|
toggle_processing_array_spec
|
51
55
|
when 'JB_pe_range'
|
52
56
|
toggle_adding_slots
|
57
|
+
when 'JB_stdout_path_list'
|
58
|
+
@processing_JB_stdout_path_list = true
|
53
59
|
end
|
54
60
|
end
|
55
61
|
|
56
62
|
def tag_end(name)
|
63
|
+
#Add to native hash if in native_tags
|
64
|
+
if (@native_tags.include?(name))
|
65
|
+
@parsed_job[:native][:"#{name}"] = @current_text
|
66
|
+
end
|
67
|
+
|
57
68
|
case name
|
58
69
|
when 'JB_ja_tasks'
|
59
70
|
end_JB_ja_tasks
|
@@ -92,6 +103,10 @@ class QstatXmlJRListener
|
|
92
103
|
toggle_processing_array_spec
|
93
104
|
when 'JB_pe_range'
|
94
105
|
toggle_adding_slots
|
106
|
+
when 'PN_path'
|
107
|
+
end_PN_path
|
108
|
+
when 'ST_name'
|
109
|
+
end_ST_name
|
95
110
|
end
|
96
111
|
end
|
97
112
|
|
@@ -118,7 +133,7 @@ class QstatXmlJRListener
|
|
118
133
|
end
|
119
134
|
|
120
135
|
def end_JB_submission_time
|
121
|
-
@parsed_job[:submission_time] = @current_text.to_i
|
136
|
+
@parsed_job[:submission_time] = ms_to_seconds(@current_text.to_i)
|
122
137
|
end
|
123
138
|
|
124
139
|
def end_JB_ja_tasks
|
@@ -127,7 +142,7 @@ class QstatXmlJRListener
|
|
127
142
|
|
128
143
|
def end_JAT_start_time
|
129
144
|
@parsed_job[:status] = :running
|
130
|
-
@parsed_job[:dispatch_time] = @current_text.to_i
|
145
|
+
@parsed_job[:dispatch_time] = ms_to_seconds(@current_text.to_i)
|
131
146
|
@parsed_job[:wallclock_time] = Time.now.to_i - @parsed_job[:dispatch_time]
|
132
147
|
end
|
133
148
|
|
@@ -153,6 +168,15 @@ class QstatXmlJRListener
|
|
153
168
|
@running_tasks << @current_text
|
154
169
|
end
|
155
170
|
|
171
|
+
def end_PN_path
|
172
|
+
@parsed_job[:native][:PN_path] = @current_text if @processing_JB_stdout_path_list
|
173
|
+
@processing_JB_stdout_path_list = false
|
174
|
+
end
|
175
|
+
|
176
|
+
def end_ST_name
|
177
|
+
@parsed_job[:native][:ST_name] = @parsed_job[:native][:ST_name] + @current_text + ' '
|
178
|
+
end
|
179
|
+
|
156
180
|
def set_job_array_piece(key)
|
157
181
|
@job_array_spec[key] = @current_text if @processing_job_array_spec
|
158
182
|
end
|
@@ -201,5 +225,12 @@ class QstatXmlJRListener
|
|
201
225
|
def set_slots
|
202
226
|
@parsed_job[:procs] = @current_text.to_i
|
203
227
|
end
|
204
|
-
end
|
205
228
|
|
229
|
+
private
|
230
|
+
|
231
|
+
# Some Grid Engines (like UGE) use milliseconds were others use
|
232
|
+
# seconds past the epoch.
|
233
|
+
def ms_to_seconds(raw)
|
234
|
+
raw.digits.length >= 13 ? raw / 1000 : raw
|
235
|
+
end
|
236
|
+
end
|
@@ -24,21 +24,32 @@ class QstatXmlRListener
|
|
24
24
|
@parsed_jobs = []
|
25
25
|
@current_job = {
|
26
26
|
:tasks => [],
|
27
|
-
:native => {
|
27
|
+
:native => {
|
28
|
+
:ST_name => ''
|
29
|
+
}
|
28
30
|
}
|
29
31
|
@current_text = nil
|
32
|
+
@processing_JB_stdout_path_list = false
|
30
33
|
|
31
34
|
@current_request = nil
|
35
|
+
@native_tags = ['JB_job_number', 'JB_job_name', 'JB_version', 'JB_project', 'JB_exec_file', 'JB_script_file', 'JB_script_size', 'JB_submission_time', 'JB_execution_time', 'JB_deadline', 'JB_owner', 'JB_uid', 'JB_group', 'JB_gid', 'JB_account', 'JB_cwd', 'JB_notify', 'JB_type', 'JB_reserve', 'JB_priority', 'JB_jobshare', 'JB_verify', 'JB_checkpoint_attr', 'JB_checkpoint_interval', 'JB_restart']
|
32
36
|
end
|
33
37
|
|
34
38
|
def tag_start(name, attributes)
|
35
39
|
case name
|
36
40
|
when 'hard_request'
|
37
41
|
start_hard_request(attributes)
|
42
|
+
when "JB_stdout_path_list"
|
43
|
+
@processing_JB_stdout_path_list = true
|
38
44
|
end
|
39
45
|
end
|
40
46
|
|
41
47
|
def tag_end(name)
|
48
|
+
#Add text if in native_tags
|
49
|
+
if (@native_tags.include?(name))
|
50
|
+
@current_job[:native][:"#{name}"] = @current_text
|
51
|
+
end
|
52
|
+
|
42
53
|
case name
|
43
54
|
when 'job_list'
|
44
55
|
end_job_list
|
@@ -64,6 +75,10 @@ class QstatXmlRListener
|
|
64
75
|
end_hard_request
|
65
76
|
when 'tasks'
|
66
77
|
add_child_tasks
|
78
|
+
when 'PN_path'
|
79
|
+
end_PN_path
|
80
|
+
when 'ST_name'
|
81
|
+
end_ST_name
|
67
82
|
end
|
68
83
|
end
|
69
84
|
|
@@ -130,6 +145,15 @@ class QstatXmlRListener
|
|
130
145
|
end
|
131
146
|
end
|
132
147
|
|
148
|
+
def end_PN_path
|
149
|
+
@current_job[:native][:PN_path] = @current_text if @processing_JB_stdout_path_list
|
150
|
+
@processing_JB_stdout_path_list = false
|
151
|
+
end
|
152
|
+
|
153
|
+
def end_ST_name
|
154
|
+
@current_job[:native][:ST_name] = @current_job[:native][:ST_name] + @current_text + ' '
|
155
|
+
end
|
156
|
+
|
133
157
|
# Store a completed job and reset current_job for the next pass
|
134
158
|
def end_job_list
|
135
159
|
@parsed_jobs << @current_job
|
@@ -145,4 +169,3 @@ class QstatXmlRListener
|
|
145
169
|
}
|
146
170
|
end
|
147
171
|
end
|
148
|
-
|
@@ -14,13 +14,17 @@ module OodCore
|
|
14
14
|
# @option config [Object] :conf (nil) Path to the slurm conf
|
15
15
|
# @option config [Object] :bin (nil) Path to slurm client binaries
|
16
16
|
# @option config [#to_h] :bin_overrides ({}) Optional overrides to Slurm client executables
|
17
|
+
# @option config [Object] :submit_host ("") Submit job on login node via ssh
|
18
|
+
# @option config [Object] :strict_host_checking (true) Whether to use strict host checking when ssh to submit_host
|
17
19
|
def self.build_slurm(config)
|
18
20
|
c = config.to_h.symbolize_keys
|
19
|
-
cluster
|
20
|
-
conf
|
21
|
-
bin
|
22
|
-
bin_overrides
|
23
|
-
|
21
|
+
cluster = c.fetch(:cluster, nil)
|
22
|
+
conf = c.fetch(:conf, nil)
|
23
|
+
bin = c.fetch(:bin, nil)
|
24
|
+
bin_overrides = c.fetch(:bin_overrides, {})
|
25
|
+
submit_host = c.fetch(:submit_host, "")
|
26
|
+
strict_host_checking = c.fetch(:strict_host_checking, true)
|
27
|
+
slurm = Adapters::Slurm::Batch.new(cluster: cluster, conf: conf, bin: bin, bin_overrides: bin_overrides, submit_host: submit_host, strict_host_checking: strict_host_checking)
|
24
28
|
Adapters::Slurm.new(slurm: slurm)
|
25
29
|
end
|
26
30
|
end
|
@@ -62,6 +66,16 @@ module OodCore
|
|
62
66
|
# @return Hash<String, String>
|
63
67
|
attr_reader :bin_overrides
|
64
68
|
|
69
|
+
# The login node where the job is submitted via ssh
|
70
|
+
# @example owens.osc.edu
|
71
|
+
# @return [String] The login node
|
72
|
+
attr_reader :submit_host
|
73
|
+
|
74
|
+
# Wheter to use strict host checking when ssh to submit_host
|
75
|
+
# @example false
|
76
|
+
# @return [Bool]; true if empty
|
77
|
+
attr_reader :strict_host_checking
|
78
|
+
|
65
79
|
# The root exception class that all Slurm-specific exceptions inherit
|
66
80
|
# from
|
67
81
|
class Error < StandardError; end
|
@@ -69,11 +83,16 @@ module OodCore
|
|
69
83
|
# @param cluster [#to_s, nil] the cluster name
|
70
84
|
# @param conf [#to_s, nil] path to the slurm conf
|
71
85
|
# @param bin [#to_s] path to slurm installation binaries
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
@
|
86
|
+
# @param bin_overrides [#to_h] a hash of bin ovverides to be used in job
|
87
|
+
# @param submit_host [#to_s] Submits the job on a login node via ssh
|
88
|
+
# @param strict_host_checking [Bool] Whether to use strict host checking when ssh to submit_host
|
89
|
+
def initialize(cluster: nil, bin: nil, conf: nil, bin_overrides: {}, submit_host: "", strict_host_checking: true)
|
90
|
+
@cluster = cluster && cluster.to_s
|
91
|
+
@conf = conf && Pathname.new(conf.to_s)
|
92
|
+
@bin = Pathname.new(bin.to_s)
|
93
|
+
@bin_overrides = bin_overrides
|
94
|
+
@submit_host = submit_host.to_s
|
95
|
+
@strict_host_checking = strict_host_checking
|
77
96
|
end
|
78
97
|
|
79
98
|
# Get a list of hashes detailing each of the jobs on the batch server
|
@@ -148,9 +167,9 @@ module OodCore
|
|
148
167
|
#TODO: write some barebones test for this? like 2 options and id or no id
|
149
168
|
def squeue_args(id: "", owner: nil, options: [])
|
150
169
|
args = ["--all", "--states=all", "--noconvert"]
|
151
|
-
args
|
152
|
-
args
|
153
|
-
args
|
170
|
+
args.concat ["-o", "#{RECORD_SEPARATOR}#{options.join(UNIT_SEPARATOR)}"]
|
171
|
+
args.concat ["-u", owner.to_s] unless owner.to_s.empty?
|
172
|
+
args.concat ["-j", id.to_s] unless id.to_s.empty?
|
154
173
|
args
|
155
174
|
end
|
156
175
|
|
@@ -275,11 +294,15 @@ module OodCore
|
|
275
294
|
# Call a forked Slurm command for a given cluster
|
276
295
|
def call(cmd, *args, env: {}, stdin: "")
|
277
296
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
|
297
|
+
|
278
298
|
args = args.map(&:to_s)
|
279
|
-
args
|
299
|
+
args.concat ["-M", cluster] if cluster
|
300
|
+
|
280
301
|
env = env.to_h
|
281
302
|
env["SLURM_CONF"] = conf.to_s if conf
|
282
|
-
|
303
|
+
|
304
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
|
305
|
+
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
|
283
306
|
s.success? ? o : raise(Error, e)
|
284
307
|
end
|
285
308
|
|
@@ -358,30 +381,31 @@ module OodCore
|
|
358
381
|
# Set sbatch options
|
359
382
|
args = []
|
360
383
|
# ignore args, don't know how to do this for slurm
|
361
|
-
args
|
362
|
-
args
|
363
|
-
args
|
364
|
-
args
|
384
|
+
args.concat ["-H"] if script.submit_as_hold
|
385
|
+
args.concat (script.rerunnable ? ["--requeue"] : ["--no-requeue"]) unless script.rerunnable.nil?
|
386
|
+
args.concat ["-D", script.workdir.to_s] unless script.workdir.nil?
|
387
|
+
args.concat ["--mail-user", script.email.join(",")] unless script.email.nil?
|
365
388
|
if script.email_on_started && script.email_on_terminated
|
366
|
-
args
|
389
|
+
args.concat ["--mail-type", "ALL"]
|
367
390
|
elsif script.email_on_started
|
368
|
-
args
|
391
|
+
args.concat ["--mail-type", "BEGIN"]
|
369
392
|
elsif script.email_on_terminated
|
370
|
-
args
|
393
|
+
args.concat ["--mail-type", "END"]
|
371
394
|
elsif script.email_on_started == false && script.email_on_terminated == false
|
372
|
-
args
|
395
|
+
args.concat ["--mail-type", "NONE"]
|
373
396
|
end
|
374
|
-
args
|
375
|
-
args
|
376
|
-
args
|
377
|
-
args
|
378
|
-
args
|
379
|
-
args
|
380
|
-
args
|
381
|
-
args
|
382
|
-
args
|
383
|
-
args
|
384
|
-
args
|
397
|
+
args.concat ["-J", script.job_name] unless script.job_name.nil?
|
398
|
+
args.concat ["-i", script.input_path] unless script.input_path.nil?
|
399
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
400
|
+
args.concat ["-e", script.error_path] unless script.error_path.nil?
|
401
|
+
args.concat ["--reservation", script.reservation_id] unless script.reservation_id.nil?
|
402
|
+
args.concat ["-p", script.queue_name] unless script.queue_name.nil?
|
403
|
+
args.concat ["--priority", script.priority] unless script.priority.nil?
|
404
|
+
args.concat ["--begin", script.start_time.localtime.strftime("%C%y-%m-%dT%H:%M:%S")] unless script.start_time.nil?
|
405
|
+
args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
|
406
|
+
args.concat ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
407
|
+
args.concat ['-a', script.job_array_request] unless script.job_array_request.nil?
|
408
|
+
args.concat ['--qos', script.qos] unless script.qos.nil?
|
385
409
|
# ignore nodes, don't know how to do this for slurm
|
386
410
|
|
387
411
|
# Set dependencies
|
@@ -390,14 +414,14 @@ module OodCore
|
|
390
414
|
depend << "afterok:#{afterok.join(":")}" unless afterok.empty?
|
391
415
|
depend << "afternotok:#{afternotok.join(":")}" unless afternotok.empty?
|
392
416
|
depend << "afterany:#{afterany.join(":")}" unless afterany.empty?
|
393
|
-
args
|
417
|
+
args.concat ["-d", depend.join(",")] unless depend.empty?
|
394
418
|
|
395
419
|
# Set environment variables
|
396
420
|
env = script.job_environment || {}
|
397
|
-
args
|
421
|
+
args.concat ["--export", export_arg(env, script.copy_environment?)]
|
398
422
|
|
399
423
|
# Set native options
|
400
|
-
args
|
424
|
+
args.concat script.native if script.native
|
401
425
|
|
402
426
|
# Set content
|
403
427
|
content = if script.shell_path.nil?
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "ood_core/refinements/hash_extensions"
|
2
2
|
require "ood_core/job/adapters/helper"
|
3
|
+
require 'shellwords'
|
3
4
|
|
4
5
|
module OodCore
|
5
6
|
module Job
|
@@ -9,16 +10,18 @@ module OodCore
|
|
9
10
|
# Build the Torque adapter from a configuration
|
10
11
|
# @param config [#to_h] the configuration for job adapter
|
11
12
|
# @option config [#to_s] :host The batch server host
|
13
|
+
# @option config [#to_s] :submit_host The login node to submit the job via ssh
|
12
14
|
# @option config [#to_s] :lib ('') Path to torque client libraries
|
13
15
|
# @option config [#to_s] :bin ('') Path to torque client binaries
|
14
16
|
# @option config [#to_h] :custom_bin ({}) Optional overrides to Torque client executables
|
15
17
|
def self.build_torque(config)
|
16
18
|
c = config.to_h.symbolize_keys
|
17
19
|
host = c.fetch(:host) { raise ArgumentError, "No host specified. Missing argument: host" }.to_s
|
20
|
+
submit_host = c.fetch(:submit_host, "").to_s
|
18
21
|
lib = c.fetch(:lib, "").to_s
|
19
22
|
bin = c.fetch(:bin, "").to_s
|
20
23
|
custom_bin = c.fetch(:custom_bin, {})
|
21
|
-
pbs = Adapters::Torque::Batch.new(host: host, lib: lib, bin: bin, custom_bin: custom_bin)
|
24
|
+
pbs = Adapters::Torque::Batch.new(host: host, submit_host: submit_host, lib: lib, bin: bin, custom_bin: custom_bin)
|
22
25
|
Adapters::Torque.new(pbs: pbs)
|
23
26
|
end
|
24
27
|
end
|
@@ -85,7 +88,7 @@ module OodCore
|
|
85
88
|
depend << "afterany:#{afterany.join(':')}" unless afterany.empty?
|
86
89
|
|
87
90
|
# Set mailing options
|
88
|
-
mail_points
|
91
|
+
mail_points = ""
|
89
92
|
mail_points += "b" if script.email_on_started
|
90
93
|
mail_points += "e" if script.email_on_terminated
|
91
94
|
|
@@ -129,40 +132,44 @@ module OodCore
|
|
129
132
|
envvars.merge! script.native.fetch(:envvars, {})
|
130
133
|
end
|
131
134
|
|
135
|
+
# Destructively change envvars to shellescape values
|
136
|
+
envvars.transform_values! { |v| Shellwords.escape(v) }
|
137
|
+
|
132
138
|
# Submit job
|
133
139
|
@pbs.submit_string(script.content, queue: script.queue_name, headers: headers, resources: resources, envvars: envvars)
|
134
140
|
else
|
135
141
|
# Set qsub arguments
|
136
142
|
args = []
|
137
|
-
args
|
138
|
-
args
|
139
|
-
args
|
140
|
-
args
|
141
|
-
args
|
142
|
-
args
|
143
|
-
args
|
143
|
+
args.concat ["-F", script.args.join(" ")] unless script.args.nil?
|
144
|
+
args.concat ["-h"] if script.submit_as_hold
|
145
|
+
args.concat ["-r", script.rerunnable ? "y" : "n"] unless script.rerunnable.nil?
|
146
|
+
args.concat ["-M", script.email.join(",")] unless script.email.nil?
|
147
|
+
args.concat ["-m", mail_points] unless mail_points.empty?
|
148
|
+
args.concat ["-N", script.job_name] unless script.job_name.nil?
|
149
|
+
args.concat ["-S", script.shell_path] unless script.shell_path.nil?
|
144
150
|
# ignore input_path (not defined in Torque)
|
145
|
-
args
|
146
|
-
args
|
147
|
-
args
|
148
|
-
args
|
149
|
-
args
|
150
|
-
args
|
151
|
-
args
|
152
|
-
args
|
153
|
-
args
|
154
|
-
args
|
151
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
152
|
+
args.concat ["-e", script.error_path] unless script.error_path.nil?
|
153
|
+
args.concat ["-W", "x=advres:#{script.reservation_id}"] unless script.reservation_id.nil?
|
154
|
+
args.concat ["-q", script.queue_name] unless script.queue_name.nil?
|
155
|
+
args.concat ["-p", script.priority] unless script.priority.nil?
|
156
|
+
args.concat ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil?
|
157
|
+
args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
|
158
|
+
args.concat ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
|
159
|
+
args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
|
160
|
+
args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
|
161
|
+
args.concat ['-l', "qos=#{script.qos}"] unless script.qos.nil?
|
155
162
|
# Set environment variables
|
156
163
|
env = script.job_environment.to_h
|
157
|
-
args
|
158
|
-
args
|
164
|
+
args.concat ["-v", env.keys.join(",")] unless env.empty?
|
165
|
+
args.concat ["-V"] if script.copy_environment?
|
159
166
|
|
160
167
|
# If error_path is not specified we join stdout & stderr (as this
|
161
168
|
# mimics what the other resource managers do)
|
162
|
-
args
|
169
|
+
args.concat ["-j", "oe"] if script.error_path.nil?
|
163
170
|
|
164
171
|
# Set native options
|
165
|
-
args
|
172
|
+
args.concat script.native if script.native
|
166
173
|
|
167
174
|
# Submit job
|
168
175
|
@pbs.submit(script.content, args: args, env: env, chdir: script.workdir)
|
@@ -9,6 +9,18 @@ class OodCore::Job::Adapters::Torque
|
|
9
9
|
# @return [String] the batch server host
|
10
10
|
attr_reader :host
|
11
11
|
|
12
|
+
# The login node where job is submitted via ssh
|
13
|
+
# @example OSC's owens login node
|
14
|
+
# my_conn.submit_host #=> "owens.osc.edu"
|
15
|
+
# @return [String] the login node
|
16
|
+
attr_reader :submit_host
|
17
|
+
|
18
|
+
# Determines whether to use strict_host_checking for ssh
|
19
|
+
# @example
|
20
|
+
# my_conn.strict_host_checking.to_s #=> "owens.osc.edu"
|
21
|
+
# @return [Bool]
|
22
|
+
attr_reader :strict_host_checking
|
23
|
+
|
12
24
|
# The path to the Torque client installation libraries
|
13
25
|
# @example For Torque 5.0.0
|
14
26
|
# my_conn.lib.to_s #=> "/usr/local/Torque/5.0.0/lib"
|
@@ -32,19 +44,23 @@ class OodCore::Job::Adapters::Torque
|
|
32
44
|
class Error < StandardError; end
|
33
45
|
|
34
46
|
# @param host [#to_s] the batch server host
|
47
|
+
# @param submit_host [#to_s] the login node
|
48
|
+
# @param strict_host_checking [bool] use strict host checking when ssh to submit_host
|
35
49
|
# @param lib [#to_s] path to FFI installation libraries
|
36
50
|
# @param bin [#to_s] path to FFI installation binaries
|
37
|
-
def initialize(host:, lib: "", bin: "", bin_overrides: {}, **_)
|
38
|
-
@host
|
39
|
-
@
|
40
|
-
@
|
41
|
-
@
|
51
|
+
def initialize(host:, submit_host: "", strict_host_checking: true, lib: "", bin: "", bin_overrides: {}, **_)
|
52
|
+
@host = host.to_s
|
53
|
+
@submit_host = submit_host.to_s
|
54
|
+
@strict_host_checking = strict_host_checking
|
55
|
+
@lib = Pathname.new(lib.to_s)
|
56
|
+
@bin = Pathname.new(bin.to_s)
|
57
|
+
@bin_overrides = bin_overrides
|
42
58
|
end
|
43
59
|
|
44
60
|
# Convert object to hash
|
45
61
|
# @return [Hash] the hash describing this object
|
46
62
|
def to_h
|
47
|
-
{host: host, lib: lib, bin: bin}
|
63
|
+
{host: host, submit_host: submit_host, strict_host_checking: strict_host_checking, lib: lib, bin: bin}
|
48
64
|
end
|
49
65
|
|
50
66
|
# The comparison operator
|
@@ -437,10 +453,10 @@ class OodCore::Job::Adapters::Torque
|
|
437
453
|
# NB: The binary includes many useful filters and is preferred
|
438
454
|
def qsub_submit(script, queue, headers, resources, envvars)
|
439
455
|
params = []
|
440
|
-
params
|
441
|
-
params
|
442
|
-
params
|
443
|
-
params
|
456
|
+
params.concat ["-q", "#{queue}"] unless queue.empty?
|
457
|
+
params.concat headers.map {|k,v| qsub_arg(k,v)}.flatten
|
458
|
+
params.concat resources.map{|k,v| ["-l", "#{k}=#{v}"]}.flatten
|
459
|
+
params.concat ["-v", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
|
444
460
|
params << script
|
445
461
|
|
446
462
|
env = {
|
@@ -448,6 +464,7 @@ class OodCore::Job::Adapters::Torque
|
|
448
464
|
"LD_LIBRARY_PATH" => "#{lib}:#{ENV['LD_LIBRARY_PATH']}"
|
449
465
|
}
|
450
466
|
cmd = OodCore::Job::Adapters::Helper.bin_path('qsub', bin, bin_overrides)
|
467
|
+
cmd, params = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, params, strict_host_checking, env)
|
451
468
|
o, e, s = Open3.capture3(env, cmd, *params)
|
452
469
|
raise Error, e unless s.success?
|
453
470
|
o.chomp
|
@@ -456,14 +473,14 @@ class OodCore::Job::Adapters::Torque
|
|
456
473
|
# Call a forked PBS command for a given host
|
457
474
|
def call(cmd, *args, env: {}, stdin: "", chdir: nil)
|
458
475
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
|
459
|
-
args = args.map(&:to_s)
|
460
476
|
env = env.to_h.each_with_object({}) {|(k,v), h| h[k.to_s] = v.to_s}.merge({
|
461
477
|
"PBS_DEFAULT" => host,
|
462
478
|
"LD_LIBRARY_PATH" => %{#{lib}:#{ENV["LD_LIBRARY_PATH"]}}
|
463
479
|
})
|
480
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking, env)
|
464
481
|
stdin = stdin.to_s
|
465
482
|
chdir ||= "."
|
466
|
-
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin, chdir: chdir.to_s)
|
483
|
+
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin, chdir: chdir.to_s)
|
467
484
|
s.success? ? o : raise(Error, e)
|
468
485
|
end
|
469
486
|
end
|
data/lib/ood_core/job/script.rb
CHANGED
@@ -99,6 +99,10 @@ module OodCore
|
|
99
99
|
# @return [String, nil] job array request
|
100
100
|
attr_reader :job_array_request
|
101
101
|
|
102
|
+
# The qos selected for the job
|
103
|
+
# @return [String, nil] qos
|
104
|
+
attr_reader :qos
|
105
|
+
|
102
106
|
# Object detailing any native specifications that are implementation specific
|
103
107
|
# @note Should not be used at all costs.
|
104
108
|
# @return [Object, nil] native specifications
|
@@ -130,6 +134,8 @@ module OodCore
|
|
130
134
|
# @param start_time [#to_i, nil] eligible start time
|
131
135
|
# @param wall_time [#to_i, nil] max real time
|
132
136
|
# @param accounting_id [#to_s, nil] accounting id
|
137
|
+
# @param job_array_request [#to_s, nil] job array request
|
138
|
+
# @param qos [#to_s, nil] qos
|
133
139
|
# @param native [Object, nil] native specifications
|
134
140
|
# @param copy_environment [Boolean, nil] copy the environment
|
135
141
|
def initialize(content:, args: nil, submit_as_hold: nil, rerunnable: nil,
|
@@ -139,7 +145,7 @@ module OodCore
|
|
139
145
|
output_path: nil, error_path: nil, reservation_id: nil,
|
140
146
|
queue_name: nil, priority: nil, start_time: nil,
|
141
147
|
wall_time: nil, accounting_id: nil, job_array_request: nil,
|
142
|
-
native: nil, copy_environment: nil, **_)
|
148
|
+
qos: nil, native: nil, copy_environment: nil, **_)
|
143
149
|
@content = content.to_s
|
144
150
|
|
145
151
|
@submit_as_hold = submit_as_hold
|
@@ -163,6 +169,7 @@ module OodCore
|
|
163
169
|
@wall_time = wall_time && wall_time.to_i
|
164
170
|
@accounting_id = accounting_id && accounting_id.to_s
|
165
171
|
@job_array_request = job_array_request && job_array_request.to_s
|
172
|
+
@qos = qos && qos.to_s
|
166
173
|
@native = native
|
167
174
|
@copy_environment = (copy_environment.nil?) ? nil : !! copy_environment
|
168
175
|
end
|
@@ -192,6 +199,7 @@ module OodCore
|
|
192
199
|
wall_time: wall_time,
|
193
200
|
accounting_id: accounting_id,
|
194
201
|
job_array_request: job_array_request,
|
202
|
+
qos: qos,
|
195
203
|
native: native,
|
196
204
|
copy_environment: copy_environment
|
197
205
|
}
|
data/lib/ood_core/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2020-05
|
13
|
+
date: 2020-08-05 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|