ood_core 0.11.2 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -1
- data/README.md +6 -5
- data/lib/ood_core/job/adapters/ccq.rb +267 -0
- data/lib/ood_core/job/adapters/helper.rb +20 -1
- data/lib/ood_core/job/adapters/kubernetes.rb +193 -0
- data/lib/ood_core/job/adapters/kubernetes/batch.rb +350 -0
- data/lib/ood_core/job/adapters/kubernetes/helper.rb +298 -0
- data/lib/ood_core/job/adapters/kubernetes/resources.rb +56 -0
- data/lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb +123 -0
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +22 -9
- data/lib/ood_core/job/adapters/linux_host/templates/script_wrapper.erb.sh +18 -15
- data/lib/ood_core/job/adapters/lsf.rb +1 -0
- data/lib/ood_core/job/adapters/lsf/batch.rb +5 -3
- data/lib/ood_core/job/adapters/lsf/helper.rb +22 -22
- data/lib/ood_core/job/adapters/pbspro.rb +54 -34
- data/lib/ood_core/job/adapters/sge/batch.rb +6 -5
- data/lib/ood_core/job/adapters/sge/helper.rb +19 -19
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +35 -4
- data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +25 -2
- data/lib/ood_core/job/adapters/slurm.rb +98 -41
- data/lib/ood_core/job/adapters/torque.rb +30 -23
- data/lib/ood_core/job/adapters/torque/batch.rb +29 -12
- data/lib/ood_core/job/script.rb +10 -1
- data/lib/ood_core/version.rb +1 -1
- metadata +9 -3
@@ -0,0 +1,56 @@
|
|
1
|
+
module OodCore::Job::Adapters::Kubernetes::Resources
|
2
|
+
|
3
|
+
class ConfigMap
|
4
|
+
attr_accessor :name, :filename, :data
|
5
|
+
|
6
|
+
def initialize(name, filename, data)
|
7
|
+
@name = name
|
8
|
+
@filename = filename
|
9
|
+
@data = data
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class Container
|
14
|
+
attr_accessor :name, :image, :command, :port, :env, :memory, :cpu, :working_dir,
|
15
|
+
:restart_policy
|
16
|
+
|
17
|
+
def initialize(
|
18
|
+
name, image, command: [], port: nil, env: [], memory: "4Gi", cpu: "1",
|
19
|
+
working_dir: "", restart_policy: "Never"
|
20
|
+
)
|
21
|
+
raise ArgumentError, "containers need valid names and images" unless name && image
|
22
|
+
|
23
|
+
@name = name
|
24
|
+
@image = image
|
25
|
+
@command = command.nil? ? [] : command
|
26
|
+
@port = port&.to_i
|
27
|
+
@env = env.nil? ? [] : env
|
28
|
+
@memory = memory.nil? ? "4Gi" : memory
|
29
|
+
@cpu = cpu.nil? ? "1" : cpu
|
30
|
+
@working_dir = working_dir.nil? ? "" : working_dir
|
31
|
+
@restart_policy = restart_policy.nil? ? "Never" : restart_policy
|
32
|
+
end
|
33
|
+
|
34
|
+
def ==(other)
|
35
|
+
name == other.name &&
|
36
|
+
image == other.image &&
|
37
|
+
command == other.command &&
|
38
|
+
port == other.port &&
|
39
|
+
env == other.env &&
|
40
|
+
memory == other.memory &&
|
41
|
+
cpu == other.cpu &&
|
42
|
+
working_dir == other.working_dir &&
|
43
|
+
restart_policy == other.restart_policy
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
class PodSpec
|
49
|
+
attr_accessor :container, :init_containers
|
50
|
+
def initialize(container, init_containers: nil)
|
51
|
+
@container = container
|
52
|
+
@init_containers = init_containers
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
apiVersion: v1
|
2
|
+
kind: Pod
|
3
|
+
metadata:
|
4
|
+
namespace: <%= namespace %>
|
5
|
+
name: <%= id %>
|
6
|
+
labels:
|
7
|
+
job: <%= id %>
|
8
|
+
app.kubernetes.io/name: <%= container.name %>
|
9
|
+
app.kubernetes.io/managed-by: open-ondemand
|
10
|
+
spec:
|
11
|
+
restartPolicy: <%= spec.container.restart_policy %>
|
12
|
+
securityContext:
|
13
|
+
runAsUser: <%= run_as_user %>
|
14
|
+
runAsGroup: <%= run_as_group %>
|
15
|
+
fsGroup: <%= fs_group %>
|
16
|
+
containers:
|
17
|
+
- name: "<%= spec.container.name %>"
|
18
|
+
image: <%= spec.container.image %>
|
19
|
+
imagePullPolicy: IfNotPresent
|
20
|
+
<% unless spec.container.working_dir.empty? %>
|
21
|
+
workingDir: "<%= spec.container.working_dir %>"
|
22
|
+
<% end %>
|
23
|
+
<% unless spec.container.env.empty? %>
|
24
|
+
env:
|
25
|
+
<% spec.container.env.each do |env| %>
|
26
|
+
- name: <%= env[:name] %>
|
27
|
+
value: "<%= env[:value] %>"
|
28
|
+
<% end %> <%# for each env %>
|
29
|
+
<% end %> <%# unless env is nil %>
|
30
|
+
<% unless spec.container.command.empty? %>
|
31
|
+
command:
|
32
|
+
<% spec.container.command.each do |cmd| %>
|
33
|
+
- "<%= cmd %>"
|
34
|
+
<% end %> <%# for each command %>
|
35
|
+
<% end %> <%# unless command is nil %>
|
36
|
+
<% unless spec.container.port.nil? %>
|
37
|
+
ports:
|
38
|
+
- containerPort: <%= spec.container.port %>
|
39
|
+
<% end %>
|
40
|
+
volumeMounts:
|
41
|
+
<% unless configmap.nil? %>
|
42
|
+
- name: configmap-volume
|
43
|
+
mountPath: <%= configmap_mount_path %>
|
44
|
+
<% end %>
|
45
|
+
<% all_mounts.each do |mount| %>
|
46
|
+
- name: <%= mount[:name] %>
|
47
|
+
mountPath: <%= mount[:destination_path] %>
|
48
|
+
<% end %> <%# for each mount %>
|
49
|
+
resources:
|
50
|
+
limits:
|
51
|
+
memory: "<%= spec.container.memory %>"
|
52
|
+
cpu: "<%= spec.container.cpu %>"
|
53
|
+
requests:
|
54
|
+
memory: "<%= spec.container.memory %>"
|
55
|
+
cpu: "<%= spec.container.cpu %>"
|
56
|
+
<% unless spec.init_containers.nil? %>
|
57
|
+
initContainers:
|
58
|
+
<% spec.init_containers.each do |ctr| %>
|
59
|
+
- name: "<%= ctr.name %>"
|
60
|
+
image: "<%= ctr.image %>"
|
61
|
+
command:
|
62
|
+
<% ctr.command.each do |cmd| %>
|
63
|
+
- "<%= cmd %>"
|
64
|
+
<% end %> <%# command loop %>
|
65
|
+
volumeMounts:
|
66
|
+
<% unless configmap.nil? %>
|
67
|
+
- name: configmap-volume
|
68
|
+
mountPath: <%= configmap_mount_path %>
|
69
|
+
<% end %>
|
70
|
+
<% all_mounts.each do |mount| %>
|
71
|
+
- name: <%= mount[:name] %>
|
72
|
+
mountPath: <%= mount[:destination_path] %>
|
73
|
+
<% end %> <%# for each mount %>
|
74
|
+
<% end %> <%# init container loop %>
|
75
|
+
<% end %> <%# if init containers %>
|
76
|
+
<% unless configmap.nil? || all_mounts.empty? %>
|
77
|
+
volumes:
|
78
|
+
<% end %> <%# configmap.nil? || all_mounts.empty? %>
|
79
|
+
<% unless configmap.nil? %>
|
80
|
+
- name: configmap-volume
|
81
|
+
configMap:
|
82
|
+
name: <%= configmap_name(id) %>
|
83
|
+
<% end %>
|
84
|
+
<% all_mounts.each do |mount| %>
|
85
|
+
<% if mount[:type] == 'nfs' %>
|
86
|
+
- name: <%= mount[:name] %>
|
87
|
+
nfs:
|
88
|
+
server: <%= mount[:host] %>
|
89
|
+
path: <%= mount[:path] %>
|
90
|
+
<% elsif mount[:type] == 'host' %>
|
91
|
+
- name: <%= mount[:name] %>
|
92
|
+
hostPath:
|
93
|
+
path: <%= mount[:path] %>
|
94
|
+
type: <%= mount[:host_type] %>
|
95
|
+
<% end %> <%# if mount is [host,nfs] %>
|
96
|
+
<% end %> <%# for each mount %>
|
97
|
+
---
|
98
|
+
<% unless spec.container.port.nil? %>
|
99
|
+
apiVersion: v1
|
100
|
+
kind: Service
|
101
|
+
metadata:
|
102
|
+
name: <%= service_name(id) %>
|
103
|
+
namespace: <%= namespace %>
|
104
|
+
spec:
|
105
|
+
selector:
|
106
|
+
job: <%= id %>
|
107
|
+
ports:
|
108
|
+
- protocol: TCP
|
109
|
+
port: 80
|
110
|
+
targetPort: <%= spec.container.port %>
|
111
|
+
type: NodePort
|
112
|
+
<% end %> <%# end for service %>
|
113
|
+
---
|
114
|
+
<% unless configmap.nil? %>
|
115
|
+
apiVersion: v1
|
116
|
+
kind: ConfigMap
|
117
|
+
metadata:
|
118
|
+
name: <%= configmap_name(id) %>
|
119
|
+
namespace: <%= namespace %>
|
120
|
+
data:
|
121
|
+
<%= configmap.filename %>: |
|
122
|
+
<% config_data_lines(configmap.data).each do |line| %><%= line %><% end %>
|
123
|
+
<% end %> <%# end for configmap %>
|
@@ -57,7 +57,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
57
57
|
# @param hostname [#to_s] The hostname to submit the work to
|
58
58
|
# @param script [OodCore::Job::Script] The script object defining the work
|
59
59
|
def start_remote_session(script)
|
60
|
-
cmd = ssh_cmd(submit_host(script))
|
60
|
+
cmd = ssh_cmd(submit_host(script), ['/usr/bin/env', 'bash'])
|
61
61
|
|
62
62
|
session_name = unique_session_name
|
63
63
|
output = call(*cmd, stdin: wrapped_script(script, session_name))
|
@@ -67,13 +67,13 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
67
67
|
end
|
68
68
|
|
69
69
|
def stop_remote_session(session_name, hostname)
|
70
|
-
cmd = ssh_cmd(hostname)
|
70
|
+
cmd = ssh_cmd(hostname, ['/usr/bin/env', 'bash'])
|
71
71
|
|
72
72
|
kill_cmd = <<~SCRIPT
|
73
73
|
# Get the tmux pane PID for the target session
|
74
74
|
pane_pid=$(tmux list-panes -aF '\#{session_name} \#{pane_pid}' | grep '#{session_name}' | cut -f 2 -d ' ')
|
75
75
|
# Find the Singularity sinit PID child of the pane process
|
76
|
-
pane_sinit_pid=$(pstree -p "$pane_pid" | grep -o 'sinit([[:digit:]]*' | grep -o '[[:digit:]]*')
|
76
|
+
pane_sinit_pid=$(pstree -p -l "$pane_pid" | grep -o 'sinit([[:digit:]]*' | grep -o '[[:digit:]]*')
|
77
77
|
# Kill sinit which stops both Singularity-based processes and the tmux session
|
78
78
|
kill "$pane_sinit_pid"
|
79
79
|
SCRIPT
|
@@ -116,19 +116,23 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
116
116
|
s.success? ? o : raise(Error, e)
|
117
117
|
end
|
118
118
|
|
119
|
-
# The
|
119
|
+
# The full command to ssh into the destination host and execute the command.
|
120
|
+
# SSH options include:
|
120
121
|
# -t Force pseudo-terminal allocation (required to allow tmux to run)
|
121
122
|
# -o BatchMode=yes (set mode to be non-interactive)
|
122
123
|
# if ! strict_host_checking
|
123
124
|
# -o UserKnownHostsFile=/dev/null (do not update the user's known hosts file)
|
124
125
|
# -o StrictHostKeyChecking=no (do no check the user's known hosts file)
|
125
|
-
|
126
|
+
#
|
127
|
+
# @param destination_host [#to_s] the destination host you wish to ssh into
|
128
|
+
# @param cmd [Array<#to_s>] the command to be executed on the destination host
|
129
|
+
def ssh_cmd(destination_host, cmd)
|
126
130
|
if strict_host_checking
|
127
131
|
[
|
128
132
|
'ssh', '-t',
|
129
133
|
'-o', 'BatchMode=yes',
|
130
134
|
"#{username}@#{destination_host}"
|
131
|
-
]
|
135
|
+
].concat(cmd)
|
132
136
|
else
|
133
137
|
[
|
134
138
|
'ssh', '-t',
|
@@ -136,7 +140,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
136
140
|
'-o', 'UserKnownHostsFile=/dev/null',
|
137
141
|
'-o', 'StrictHostKeyChecking=no',
|
138
142
|
"#{username}@#{destination_host}"
|
139
|
-
]
|
143
|
+
].concat(cmd)
|
140
144
|
end
|
141
145
|
end
|
142
146
|
|
@@ -162,7 +166,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
162
166
|
'email_on_terminated' => script_email_on_event(script, 'terminated'),
|
163
167
|
'email_on_start' => script_email_on_event(script, 'started'),
|
164
168
|
'environment' => export_env(script),
|
165
|
-
'error_path' => (script
|
169
|
+
'error_path' => error_path(script),
|
166
170
|
'job_name' => script.job_name.to_s,
|
167
171
|
'output_path' => (script.output_path) ? script.output_path.to_s : '/dev/null',
|
168
172
|
'script_content' => content,
|
@@ -170,7 +174,9 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
170
174
|
'session_name' => session_name,
|
171
175
|
'singularity_bin' => singularity_bin,
|
172
176
|
'singularity_image' => singularity_image(script.native),
|
177
|
+
'ssh_hosts' => ssh_hosts,
|
173
178
|
'tmux_bin' => tmux_bin,
|
179
|
+
'workdir' => (script.workdir) ? script.workdir.to_s : '/tmp',
|
174
180
|
}.each{
|
175
181
|
|key, value| bnd.local_variable_set(key, value)
|
176
182
|
}
|
@@ -245,7 +251,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
245
251
|
['#{session_name}', '#{session_created}', '#{pane_pid}'].join(UNIT_SEPARATOR)
|
246
252
|
)
|
247
253
|
keys = [:session_name, :session_created, :session_pid]
|
248
|
-
cmd = ssh_cmd(destination_host
|
254
|
+
cmd = ssh_cmd(destination_host, ['tmux', 'list-panes', '-aF', format_str])
|
249
255
|
|
250
256
|
call(*cmd).split(
|
251
257
|
"\n"
|
@@ -267,4 +273,11 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
267
273
|
return false if script.content.empty?
|
268
274
|
script.content.split("\n").first.start_with?('#!/')
|
269
275
|
end
|
276
|
+
|
277
|
+
def error_path(script)
|
278
|
+
return script.error_path.to_s if script.error_path
|
279
|
+
return script.output_path.to_s if script.output_path
|
280
|
+
|
281
|
+
'/dev/null'
|
282
|
+
end
|
270
283
|
end
|
@@ -1,14 +1,24 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
|
2
|
+
SSH_HOSTS=(<%= ssh_hosts.join(' ').to_s %>)
|
3
|
+
hostnames=`hostname -A`
|
4
|
+
for host in ${SSH_HOSTS[@]}
|
5
|
+
do
|
6
|
+
if [[ " ${hostnames[@]} " =~ " ${host} " ]]; then
|
7
|
+
hostname=$host
|
8
|
+
fi
|
9
|
+
done
|
10
|
+
|
11
|
+
if [ -z "$hostname" ]; then
|
12
|
+
printf >&2 "ERROR: Can't start job on [${hostnames[@]}] because it does not match any hostname configured \nin ssh_hosts [${SSH_HOSTS[@]}]. The output of 'hostname -A' must match an entry in ssh_hosts \nfrom the cluster configuration."
|
13
|
+
exit 1
|
14
|
+
fi
|
15
|
+
|
16
|
+
echo $hostname
|
3
17
|
|
4
18
|
# Put the script into a temp file on localhost
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
<% else %>
|
9
|
-
singularity_tmp_file=$(mktemp)
|
10
|
-
tmux_tmp_file=$(mktemp)
|
11
|
-
<% end %>
|
19
|
+
singularity_tmp_file=$(mktemp -p "<%= workdir %>" --suffix '_sing')
|
20
|
+
tmux_tmp_file=$(mktemp -p "<%= workdir %>" --suffix "_tmux")
|
21
|
+
|
12
22
|
|
13
23
|
# Create an executable to run in a tmux session
|
14
24
|
# The escaped HEREDOC means that we need to substitute in $singularity_tmp_file ourselves
|
@@ -55,10 +65,3 @@ SINGULARITY_LAUNCHER
|
|
55
65
|
chmod +x "$singularity_tmp_file"
|
56
66
|
chmod +x "$tmux_tmp_file"
|
57
67
|
<%= tmux_bin %> new-session -d -s "<%= session_name %>" "$tmux_tmp_file"
|
58
|
-
|
59
|
-
# Remove the file
|
60
|
-
<% if ! debug %>
|
61
|
-
# Wait 1 second to ensure that tmux session has started before the file is removed
|
62
|
-
sleep 1
|
63
|
-
rm -f "$tmux_tmp_file"; rm -f "$singularity_tmp_file"
|
64
|
-
<% end %>
|
@@ -14,6 +14,7 @@ module OodCore
|
|
14
14
|
# @option config [#to_s] :serverdir ('') Path to lsf client etc dir
|
15
15
|
# @option config [#to_s] :cluster ('') name of cluster, if in multi-cluster mode
|
16
16
|
# @option config [#to_h] :bin_overrides ({}) Optional overrides to LSF client executables
|
17
|
+
# @option config [#to_s] :submit_host ('') Host to submit commands to
|
17
18
|
def self.build_lsf(config)
|
18
19
|
batch = Adapters::Lsf::Batch.new(config.to_h.symbolize_keys)
|
19
20
|
Adapters::Lsf.new(batch: batch)
|
@@ -2,21 +2,22 @@
|
|
2
2
|
#
|
3
3
|
# @api private
|
4
4
|
class OodCore::Job::Adapters::Lsf::Batch
|
5
|
-
attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster, :bin_overrides
|
5
|
+
attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster, :bin_overrides, :submit_host, :strict_host_checking
|
6
6
|
|
7
7
|
# The root exception class that all LSF-specific exceptions inherit
|
8
8
|
# from
|
9
9
|
class Error < StandardError; end
|
10
10
|
|
11
11
|
# @param bin [#to_s] path to LSF installation binaries
|
12
|
-
def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", bin_overrides: {}, **_)
|
12
|
+
def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", bin_overrides: {}, submit_host: "", strict_host_checking: true, **_)
|
13
13
|
@bindir = Pathname.new(bindir.to_s)
|
14
|
-
|
15
14
|
@envdir = Pathname.new(envdir.to_s)
|
16
15
|
@libdir = Pathname.new(libdir.to_s)
|
17
16
|
@serverdir = Pathname.new(serverdir.to_s)
|
18
17
|
@cluster = cluster.to_s
|
19
18
|
@bin_overrides = bin_overrides
|
19
|
+
@submit_host = submit_host.to_s
|
20
|
+
@strict_host_checking = strict_host_checking
|
20
21
|
end
|
21
22
|
|
22
23
|
def default_env
|
@@ -143,6 +144,7 @@ class OodCore::Job::Adapters::Lsf::Batch
|
|
143
144
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
|
144
145
|
args = cluster_args + args
|
145
146
|
env = default_env.merge(env.to_h)
|
147
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking, env)
|
146
148
|
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
|
147
149
|
s.success? ? o : raise(Error, e)
|
148
150
|
end
|
@@ -78,40 +78,40 @@ class OodCore::Job::Adapters::Lsf::Helper
|
|
78
78
|
def batch_submit_args(script, after: [], afterok: [], afternotok: [], afterany: [])
|
79
79
|
args = []
|
80
80
|
|
81
|
-
args
|
82
|
-
args
|
83
|
-
args
|
84
|
-
args[-1]
|
85
|
-
|
86
|
-
args
|
87
|
-
args
|
88
|
-
args
|
89
|
-
args
|
90
|
-
args
|
91
|
-
args
|
92
|
-
args
|
93
|
-
args
|
81
|
+
args.concat ["-P", script.accounting_id] unless script.accounting_id.nil?
|
82
|
+
args.concat ["-cwd", script.workdir.to_s] unless script.workdir.nil?
|
83
|
+
args.concat ["-J", script.job_name] unless script.job_name.nil?
|
84
|
+
args[-1].concat "[#{script.job_array_request}]" unless script.job_array_request.nil?
|
85
|
+
|
86
|
+
args.concat ["-q", script.queue_name] unless script.queue_name.nil?
|
87
|
+
args.concat ["-U", script.reservation_id] unless script.reservation_id.nil?
|
88
|
+
args.concat ["-sp", script.priority] unless script.priority.nil?
|
89
|
+
args.concat ["-H"] if script.submit_as_hold
|
90
|
+
args.concat (script.rerunnable ? ["-r"] : ["-rn"]) unless script.rerunnable.nil?
|
91
|
+
args.concat ["-b", script.start_time.localtime.strftime("%Y:%m:%d:%H:%M")] unless script.start_time.nil?
|
92
|
+
args.concat ["-W", (script.wall_time / 60).to_i] unless script.wall_time.nil?
|
93
|
+
args.concat ["-L", script.shell_path.to_s] unless script.shell_path.nil?
|
94
94
|
|
95
95
|
# environment
|
96
96
|
env = script.job_environment || {}
|
97
97
|
# To preserve pre-existing behavior we only act when true or false, when nil we do nothing
|
98
98
|
if script.copy_environment?
|
99
|
-
args
|
99
|
+
args.concat ["-env", (["all"] + env.keys).join(",")]
|
100
100
|
elsif script.copy_environment? == false
|
101
|
-
args
|
101
|
+
args.concat ["-env", (["none"] + env.keys).join(",")]
|
102
102
|
end
|
103
103
|
|
104
104
|
# input and output files
|
105
|
-
args
|
106
|
-
args
|
107
|
-
args
|
105
|
+
args.concat ["-i", script.input_path] unless script.input_path.nil?
|
106
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
107
|
+
args.concat ["-e", script.error_path] unless script.error_path.nil?
|
108
108
|
|
109
109
|
# email
|
110
|
-
args
|
111
|
-
args
|
112
|
-
args
|
110
|
+
args.concat ["-B"] if script.email_on_started
|
111
|
+
args.concat ["-N"] if script.email_on_terminated
|
112
|
+
args.concat ["-u", script.email.join(",")] unless script.email.nil? || script.email.empty?
|
113
113
|
|
114
|
-
args
|
114
|
+
args.concat script.native unless script.native.nil?
|
115
115
|
|
116
116
|
{args: args, env: env}
|
117
117
|
end
|
@@ -10,17 +10,21 @@ module OodCore
|
|
10
10
|
# Build the PBS Pro adapter from a configuration
|
11
11
|
# @param config [#to_h] the configuration for job adapter
|
12
12
|
# @option config [Object] :host (nil) The batch server host
|
13
|
+
# @option config [Object] :submit_host ("") The login node where the job is submitted
|
14
|
+
# @option config [Object] :strict_host_checking (true) Whether to use strict host checking when ssh to submit_host
|
13
15
|
# @option config [Object] :exec (nil) Path to PBS Pro executables
|
14
16
|
# @option config [Object] :qstat_factor (nil) Deciding factor on how to
|
15
17
|
# call qstat for a user
|
16
18
|
# @option config [#to_h] :bin_overrides ({}) Optional overrides to PBS Pro client executables
|
17
19
|
def self.build_pbspro(config)
|
18
20
|
c = config.to_h.compact.symbolize_keys
|
19
|
-
host
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
host = c.fetch(:host, nil)
|
22
|
+
submit_host = c.fetch(:submit_host, "")
|
23
|
+
strict_host_checking = c.fetch(:strict_host_checking, true)
|
24
|
+
pbs_exec = c.fetch(:exec, nil)
|
25
|
+
qstat_factor = c.fetch(:qstat_factor, nil)
|
26
|
+
bin_overrides = c.fetch(:bin_overrides, {})
|
27
|
+
pbspro = Adapters::PBSPro::Batch.new(host: host, submit_host: submit_host, strict_host_checking: strict_host_checking, pbs_exec: pbs_exec, bin_overrides: bin_overrides)
|
24
28
|
Adapters::PBSPro.new(pbspro: pbspro, qstat_factor: qstat_factor)
|
25
29
|
end
|
26
30
|
end
|
@@ -41,6 +45,18 @@ module OodCore
|
|
41
45
|
# @return [String, nil] the batch server host
|
42
46
|
attr_reader :host
|
43
47
|
|
48
|
+
# The login node to submit the job via ssh
|
49
|
+
# @example
|
50
|
+
# my_batch.submit_host #=> "my_batch.server.edu"
|
51
|
+
# @return [String, nil] the login node
|
52
|
+
attr_reader :submit_host
|
53
|
+
|
54
|
+
# Whether to use strict host checking when ssh to submit_host
|
55
|
+
# @example
|
56
|
+
# my_batch.strict_host_checking #=> "false"
|
57
|
+
# @return [Bool, true] the login node; true if not present
|
58
|
+
attr_reader :strict_host_checking
|
59
|
+
|
44
60
|
# The path containing the PBS executables
|
45
61
|
# @example
|
46
62
|
# my_batch.pbs_exec.to_s #=> "/usr/local/pbspro/10.0.0
|
@@ -58,11 +74,15 @@ module OodCore
|
|
58
74
|
class Error < StandardError; end
|
59
75
|
|
60
76
|
# @param host [#to_s, nil] the batch server host
|
77
|
+
# @param submit_host [#to_s, nil] the login node to ssh to
|
78
|
+
# @param strict_host_checking [bool, true] wheter to use strict host checking when ssh to submit_host
|
61
79
|
# @param exec [#to_s, nil] path to pbs executables
|
62
|
-
def initialize(host: nil, pbs_exec: nil, bin_overrides: {})
|
63
|
-
@host
|
64
|
-
@
|
65
|
-
@
|
80
|
+
def initialize(host: nil, submit_host: "", strict_host_checking: true, pbs_exec: nil, bin_overrides: {})
|
81
|
+
@host = host && host.to_s
|
82
|
+
@submit_host = submit_host && submit_host.to_s
|
83
|
+
@strict_host_checking = strict_host_checking
|
84
|
+
@pbs_exec = pbs_exec && Pathname.new(pbs_exec.to_s)
|
85
|
+
@bin_overrides = bin_overrides
|
66
86
|
end
|
67
87
|
|
68
88
|
# Get a list of hashes detailing each of the jobs on the batch server
|
@@ -87,7 +107,7 @@ module OodCore
|
|
87
107
|
# @return [Array<Hash>] list of details for jobs
|
88
108
|
def get_jobs(id: "")
|
89
109
|
args = ["-f", "-t"] # display all information
|
90
|
-
args
|
110
|
+
args.concat [id.to_s] unless id.to_s.empty?
|
91
111
|
lines = call("qstat", *args).gsub("\n\t", "").split("\n").map(&:strip)
|
92
112
|
|
93
113
|
jobs = []
|
@@ -159,12 +179,12 @@ module OodCore
|
|
159
179
|
cmd = cmd.to_s
|
160
180
|
bindir = (!!pbs_exec) ? pbs_exec.join("bin").to_s : ''
|
161
181
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
|
162
|
-
args = args.map(&:to_s)
|
163
182
|
env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
164
183
|
env["PBS_DEFAULT"] = host.to_s if host
|
165
184
|
env["PBS_EXEC"] = pbs_exec.to_s if pbs_exec
|
185
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
|
166
186
|
chdir ||= "."
|
167
|
-
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
|
187
|
+
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s, chdir: chdir.to_s)
|
168
188
|
s.success? ? o : raise(Error, e)
|
169
189
|
end
|
170
190
|
end
|
@@ -227,28 +247,28 @@ module OodCore
|
|
227
247
|
# Set qsub options
|
228
248
|
args = []
|
229
249
|
# ignore args, can't use these if submitting from STDIN
|
230
|
-
args
|
231
|
-
args
|
232
|
-
args
|
250
|
+
args.concat ["-h"] if script.submit_as_hold
|
251
|
+
args.concat ["-r", script.rerunnable ? "y" : "n"] unless script.rerunnable.nil?
|
252
|
+
args.concat ["-M", script.email.join(",")] unless script.email.nil?
|
233
253
|
if script.email_on_started && script.email_on_terminated
|
234
|
-
args
|
254
|
+
args.concat ["-m", "be"]
|
235
255
|
elsif script.email_on_started
|
236
|
-
args
|
256
|
+
args.concat ["-m", "b"]
|
237
257
|
elsif script.email_on_terminated
|
238
|
-
args
|
258
|
+
args.concat ["-m", "e"]
|
239
259
|
end
|
240
|
-
args
|
241
|
-
args
|
260
|
+
args.concat ["-N", script.job_name] unless script.job_name.nil?
|
261
|
+
args.concat ["-S", script.shell_path] unless script.shell_path.nil?
|
242
262
|
# ignore input_path (not defined in PBS Pro)
|
243
|
-
args
|
244
|
-
args
|
263
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
264
|
+
args.concat ["-e", script.error_path] unless script.error_path.nil?
|
245
265
|
# Reservations are actually just queues in PBS Pro
|
246
|
-
args
|
247
|
-
args
|
248
|
-
args
|
249
|
-
args
|
250
|
-
args
|
251
|
-
args
|
266
|
+
args.concat ["-q", script.reservation_id] if !script.reservation_id.nil? && script.queue_name.nil?
|
267
|
+
args.concat ["-q", script.queue_name] unless script.queue_name.nil?
|
268
|
+
args.concat ["-p", script.priority] unless script.priority.nil?
|
269
|
+
args.concat ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil?
|
270
|
+
args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
|
271
|
+
args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
|
252
272
|
|
253
273
|
# Set dependencies
|
254
274
|
depend = []
|
@@ -256,21 +276,21 @@ module OodCore
|
|
256
276
|
depend << "afterok:#{afterok.join(":")}" unless afterok.empty?
|
257
277
|
depend << "afternotok:#{afternotok.join(":")}" unless afternotok.empty?
|
258
278
|
depend << "afterany:#{afterany.join(":")}" unless afterany.empty?
|
259
|
-
args
|
279
|
+
args.concat ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
|
260
280
|
|
261
281
|
# Set environment variables
|
262
282
|
envvars = script.job_environment.to_h
|
263
|
-
args
|
264
|
-
args
|
283
|
+
args.concat ["-v", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
|
284
|
+
args.concat ["-V"] if script.copy_environment?
|
265
285
|
|
266
286
|
# If error_path is not specified we join stdout & stderr (as this
|
267
287
|
# mimics what the other resource managers do)
|
268
|
-
args
|
288
|
+
args.concat ["-j", "oe"] if script.error_path.nil?
|
269
289
|
|
270
|
-
args
|
290
|
+
args.concat ["-J", script.job_array_request] unless script.job_array_request.nil?
|
271
291
|
|
272
292
|
# Set native options
|
273
|
-
args
|
293
|
+
args.concat script.native if script.native
|
274
294
|
|
275
295
|
# Submit job
|
276
296
|
@pbspro.submit_string(script.content, args: args, chdir: script.workdir)
|