ood_core 0.11.3 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +30 -0
- data/CHANGELOG.md +51 -1
- data/README.md +7 -6
- data/lib/ood_core/job/adapters/ccq.rb +274 -0
- data/lib/ood_core/job/adapters/helper.rb +20 -1
- data/lib/ood_core/job/adapters/kubernetes.rb +193 -0
- data/lib/ood_core/job/adapters/kubernetes/batch.rb +354 -0
- data/lib/ood_core/job/adapters/kubernetes/helper.rb +294 -0
- data/lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb +9 -0
- data/lib/ood_core/job/adapters/kubernetes/resources.rb +56 -0
- data/lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb +134 -0
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +10 -1
- data/lib/ood_core/job/adapters/linux_host/templates/script_wrapper.erb.sh +18 -15
- data/lib/ood_core/job/adapters/lsf.rb +1 -0
- data/lib/ood_core/job/adapters/lsf/batch.rb +5 -3
- data/lib/ood_core/job/adapters/lsf/helper.rb +22 -22
- data/lib/ood_core/job/adapters/pbspro.rb +54 -34
- data/lib/ood_core/job/adapters/sge/batch.rb +6 -5
- data/lib/ood_core/job/adapters/sge/helper.rb +19 -19
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +35 -4
- data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +25 -2
- data/lib/ood_core/job/adapters/slurm.rb +98 -41
- data/lib/ood_core/job/adapters/torque.rb +30 -23
- data/lib/ood_core/job/adapters/torque/batch.rb +29 -12
- data/lib/ood_core/job/script.rb +10 -1
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -1
- metadata +33 -7
- data/.travis.yml +0 -9
@@ -0,0 +1,9 @@
|
|
1
|
+
# An object that describes a submitted kubernetes job with extended information
|
2
|
+
class OodCore::Job::Adapters::Kubernetes::K8sJobInfo < OodCore::Job::Info
|
3
|
+
attr_reader :ood_connection_info
|
4
|
+
|
5
|
+
def initialize(ood_connection_info: {}, **options)
|
6
|
+
super(options)
|
7
|
+
@ood_connection_info = ood_connection_info
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module OodCore::Job::Adapters::Kubernetes::Resources
|
2
|
+
|
3
|
+
class ConfigMap
|
4
|
+
attr_accessor :name, :filename, :data
|
5
|
+
|
6
|
+
def initialize(name, filename, data)
|
7
|
+
@name = name
|
8
|
+
@filename = filename
|
9
|
+
@data = data
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class Container
|
14
|
+
attr_accessor :name, :image, :command, :port, :env, :memory, :cpu, :working_dir,
|
15
|
+
:restart_policy
|
16
|
+
|
17
|
+
def initialize(
|
18
|
+
name, image, command: [], port: nil, env: [], memory: "4Gi", cpu: "1",
|
19
|
+
working_dir: "", restart_policy: "Never"
|
20
|
+
)
|
21
|
+
raise ArgumentError, "containers need valid names and images" unless name && image
|
22
|
+
|
23
|
+
@name = name
|
24
|
+
@image = image
|
25
|
+
@command = command.nil? ? [] : command
|
26
|
+
@port = port&.to_i
|
27
|
+
@env = env.nil? ? [] : env
|
28
|
+
@memory = memory.nil? ? "4Gi" : memory
|
29
|
+
@cpu = cpu.nil? ? "1" : cpu
|
30
|
+
@working_dir = working_dir.nil? ? "" : working_dir
|
31
|
+
@restart_policy = restart_policy.nil? ? "Never" : restart_policy
|
32
|
+
end
|
33
|
+
|
34
|
+
def ==(other)
|
35
|
+
name == other.name &&
|
36
|
+
image == other.image &&
|
37
|
+
command == other.command &&
|
38
|
+
port == other.port &&
|
39
|
+
env == other.env &&
|
40
|
+
memory == other.memory &&
|
41
|
+
cpu == other.cpu &&
|
42
|
+
working_dir == other.working_dir &&
|
43
|
+
restart_policy == other.restart_policy
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
class PodSpec
|
49
|
+
attr_accessor :container, :init_containers
|
50
|
+
def initialize(container, init_containers: nil)
|
51
|
+
@container = container
|
52
|
+
@init_containers = init_containers
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
apiVersion: v1
|
2
|
+
kind: Pod
|
3
|
+
metadata:
|
4
|
+
namespace: <%= namespace %>
|
5
|
+
name: <%= id %>
|
6
|
+
labels:
|
7
|
+
job: <%= id %>
|
8
|
+
app.kubernetes.io/name: <%= container.name %>
|
9
|
+
app.kubernetes.io/managed-by: open-ondemand
|
10
|
+
<%- if !script.accounting_id.nil? && script.accounting_id != "" -%>
|
11
|
+
account: <%= script.accounting_id %>
|
12
|
+
<%- end -%>
|
13
|
+
annotations:
|
14
|
+
<%- unless script.wall_time.nil? -%>
|
15
|
+
pod.kubernetes.io/lifetime: <%= helper.seconds_to_duration(script.wall_time) %>
|
16
|
+
<%- end -%>
|
17
|
+
spec:
|
18
|
+
restartPolicy: <%= spec.container.restart_policy %>
|
19
|
+
securityContext:
|
20
|
+
runAsUser: <%= run_as_user %>
|
21
|
+
runAsGroup: <%= run_as_group %>
|
22
|
+
fsGroup: <%= fs_group %>
|
23
|
+
containers:
|
24
|
+
- name: "<%= spec.container.name %>"
|
25
|
+
image: <%= spec.container.image %>
|
26
|
+
imagePullPolicy: IfNotPresent
|
27
|
+
<%- unless spec.container.working_dir.empty? -%>
|
28
|
+
workingDir: "<%= spec.container.working_dir %>"
|
29
|
+
<%- end -%>
|
30
|
+
<%- unless spec.container.env.empty? -%>
|
31
|
+
env:
|
32
|
+
<%- spec.container.env.each do |env| -%>
|
33
|
+
- name: <%= env[:name] %>
|
34
|
+
value: "<%= env[:value] %>"
|
35
|
+
<%- end # for each env -%>
|
36
|
+
<%- end # unless env is nil -%>
|
37
|
+
<%- unless spec.container.command.empty? -%>
|
38
|
+
command:
|
39
|
+
<%- spec.container.command.each do |cmd| -%>
|
40
|
+
- "<%= cmd %>"
|
41
|
+
<%- end # for each command -%>
|
42
|
+
<%- end # unless command is nil -%>
|
43
|
+
<%- unless spec.container.port.nil? -%>
|
44
|
+
ports:
|
45
|
+
- containerPort: <%= spec.container.port %>
|
46
|
+
<%- end -%>
|
47
|
+
volumeMounts:
|
48
|
+
<%- unless configmap.nil? -%>
|
49
|
+
- name: configmap-volume
|
50
|
+
mountPath: <%= configmap_mount_path %>
|
51
|
+
<%- end -%>
|
52
|
+
<%- all_mounts.each do |mount| -%>
|
53
|
+
- name: <%= mount[:name] %>
|
54
|
+
mountPath: <%= mount[:destination_path] %>
|
55
|
+
<%- end # for each mount -%>
|
56
|
+
resources:
|
57
|
+
limits:
|
58
|
+
memory: "<%= spec.container.memory %>"
|
59
|
+
cpu: "<%= spec.container.cpu %>"
|
60
|
+
requests:
|
61
|
+
memory: "<%= spec.container.memory %>"
|
62
|
+
cpu: "<%= spec.container.cpu %>"
|
63
|
+
<%- unless spec.init_containers.nil? -%>
|
64
|
+
initContainers:
|
65
|
+
<%- spec.init_containers.each do |ctr| -%>
|
66
|
+
- name: "<%= ctr.name %>"
|
67
|
+
image: "<%= ctr.image %>"
|
68
|
+
command:
|
69
|
+
<%- ctr.command.each do |cmd| -%>
|
70
|
+
- "<%= cmd %>"
|
71
|
+
<%- end # command loop -%>
|
72
|
+
volumeMounts:
|
73
|
+
<%- unless configmap.nil? -%>
|
74
|
+
- name: configmap-volume
|
75
|
+
mountPath: <%= configmap_mount_path %>
|
76
|
+
<%- end -%>
|
77
|
+
<%- all_mounts.each do |mount| -%>
|
78
|
+
- name: <%= mount[:name] %>
|
79
|
+
mountPath: <%= mount[:destination_path] %>
|
80
|
+
<%- end # for each mount -%>
|
81
|
+
<%- end # init container loop -%>
|
82
|
+
<%- end # if init containers -%>
|
83
|
+
<%- unless (configmap.to_s.empty? && all_mounts.empty?) -%>
|
84
|
+
volumes:
|
85
|
+
<%- unless configmap.to_s.empty? -%>
|
86
|
+
- name: configmap-volume
|
87
|
+
configMap:
|
88
|
+
name: <%= configmap_name(id) %>
|
89
|
+
<%- end -%>
|
90
|
+
<%- all_mounts.each do |mount| -%>
|
91
|
+
<%- if mount[:type] == 'nfs' -%>
|
92
|
+
- name: <%= mount[:name] %>
|
93
|
+
nfs:
|
94
|
+
server: <%= mount[:host] %>
|
95
|
+
path: <%= mount[:path] %>
|
96
|
+
<%- elsif mount[:type] == 'host' -%>
|
97
|
+
- name: <%= mount[:name] %>
|
98
|
+
hostPath:
|
99
|
+
path: <%= mount[:path] %>
|
100
|
+
type: <%= mount[:host_type] %>
|
101
|
+
<%- end # if mount is [host,nfs] -%>
|
102
|
+
<%- end # for each mount -%>
|
103
|
+
<%- end # (configmap.to_s.empty? || all_mounts.empty?) -%>
|
104
|
+
---
|
105
|
+
<%- unless spec.container.port.nil? -%>
|
106
|
+
apiVersion: v1
|
107
|
+
kind: Service
|
108
|
+
metadata:
|
109
|
+
name: <%= service_name(id) %>
|
110
|
+
namespace: <%= namespace %>
|
111
|
+
labels:
|
112
|
+
job: <%= id %>
|
113
|
+
spec:
|
114
|
+
selector:
|
115
|
+
job: <%= id %>
|
116
|
+
ports:
|
117
|
+
- protocol: TCP
|
118
|
+
port: 80
|
119
|
+
targetPort: <%= spec.container.port %>
|
120
|
+
type: NodePort
|
121
|
+
<%- end # end for service -%>
|
122
|
+
---
|
123
|
+
<%- unless configmap.nil? -%>
|
124
|
+
apiVersion: v1
|
125
|
+
kind: ConfigMap
|
126
|
+
metadata:
|
127
|
+
name: <%= configmap_name(id) %>
|
128
|
+
namespace: <%= namespace %>
|
129
|
+
labels:
|
130
|
+
job: <%= id %>
|
131
|
+
data:
|
132
|
+
<%= configmap.filename %>: |
|
133
|
+
<% config_data_lines(configmap.data).each do |line| %><%= line %><% end %>
|
134
|
+
<%- end # end for configmap -%>
|
@@ -166,7 +166,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
166
166
|
'email_on_terminated' => script_email_on_event(script, 'terminated'),
|
167
167
|
'email_on_start' => script_email_on_event(script, 'started'),
|
168
168
|
'environment' => export_env(script),
|
169
|
-
'error_path' => (script
|
169
|
+
'error_path' => error_path(script),
|
170
170
|
'job_name' => script.job_name.to_s,
|
171
171
|
'output_path' => (script.output_path) ? script.output_path.to_s : '/dev/null',
|
172
172
|
'script_content' => content,
|
@@ -174,7 +174,9 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
174
174
|
'session_name' => session_name,
|
175
175
|
'singularity_bin' => singularity_bin,
|
176
176
|
'singularity_image' => singularity_image(script.native),
|
177
|
+
'ssh_hosts' => ssh_hosts,
|
177
178
|
'tmux_bin' => tmux_bin,
|
179
|
+
'workdir' => (script.workdir) ? script.workdir.to_s : '/tmp',
|
178
180
|
}.each{
|
179
181
|
|key, value| bnd.local_variable_set(key, value)
|
180
182
|
}
|
@@ -271,4 +273,11 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
271
273
|
return false if script.content.empty?
|
272
274
|
script.content.split("\n").first.start_with?('#!/')
|
273
275
|
end
|
276
|
+
|
277
|
+
def error_path(script)
|
278
|
+
return script.error_path.to_s if script.error_path
|
279
|
+
return script.output_path.to_s if script.output_path
|
280
|
+
|
281
|
+
'/dev/null'
|
282
|
+
end
|
274
283
|
end
|
@@ -1,14 +1,24 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
|
2
|
+
SSH_HOSTS=(<%= ssh_hosts.join(' ').to_s %>)
|
3
|
+
hostnames=`hostname -A`
|
4
|
+
for host in ${SSH_HOSTS[@]}
|
5
|
+
do
|
6
|
+
if [[ " ${hostnames[@]} " =~ " ${host} " ]]; then
|
7
|
+
hostname=$host
|
8
|
+
fi
|
9
|
+
done
|
10
|
+
|
11
|
+
if [ -z "$hostname" ]; then
|
12
|
+
printf >&2 "ERROR: Can't start job on [${hostnames[@]}] because it does not match any hostname configured \nin ssh_hosts [${SSH_HOSTS[@]}]. The output of 'hostname -A' must match an entry in ssh_hosts \nfrom the cluster configuration."
|
13
|
+
exit 1
|
14
|
+
fi
|
15
|
+
|
16
|
+
echo $hostname
|
3
17
|
|
4
18
|
# Put the script into a temp file on localhost
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
<% else %>
|
9
|
-
singularity_tmp_file=$(mktemp)
|
10
|
-
tmux_tmp_file=$(mktemp)
|
11
|
-
<% end %>
|
19
|
+
singularity_tmp_file=$(mktemp -p "<%= workdir %>" --suffix '_sing')
|
20
|
+
tmux_tmp_file=$(mktemp -p "<%= workdir %>" --suffix "_tmux")
|
21
|
+
|
12
22
|
|
13
23
|
# Create an executable to run in a tmux session
|
14
24
|
# The escaped HEREDOC means that we need to substitute in $singularity_tmp_file ourselves
|
@@ -55,10 +65,3 @@ SINGULARITY_LAUNCHER
|
|
55
65
|
chmod +x "$singularity_tmp_file"
|
56
66
|
chmod +x "$tmux_tmp_file"
|
57
67
|
<%= tmux_bin %> new-session -d -s "<%= session_name %>" "$tmux_tmp_file"
|
58
|
-
|
59
|
-
# Remove the file
|
60
|
-
<% if ! debug %>
|
61
|
-
# Wait 1 second to ensure that tmux session has started before the file is removed
|
62
|
-
sleep 1
|
63
|
-
rm -f "$tmux_tmp_file"; rm -f "$singularity_tmp_file"
|
64
|
-
<% end %>
|
@@ -14,6 +14,7 @@ module OodCore
|
|
14
14
|
# @option config [#to_s] :serverdir ('') Path to lsf client etc dir
|
15
15
|
# @option config [#to_s] :cluster ('') name of cluster, if in multi-cluster mode
|
16
16
|
# @option config [#to_h] :bin_overrides ({}) Optional overrides to LSF client executables
|
17
|
+
# @option config [#to_s] :submit_host ('') Host to submit commands to
|
17
18
|
def self.build_lsf(config)
|
18
19
|
batch = Adapters::Lsf::Batch.new(config.to_h.symbolize_keys)
|
19
20
|
Adapters::Lsf.new(batch: batch)
|
@@ -2,21 +2,22 @@
|
|
2
2
|
#
|
3
3
|
# @api private
|
4
4
|
class OodCore::Job::Adapters::Lsf::Batch
|
5
|
-
attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster, :bin_overrides
|
5
|
+
attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster, :bin_overrides, :submit_host, :strict_host_checking
|
6
6
|
|
7
7
|
# The root exception class that all LSF-specific exceptions inherit
|
8
8
|
# from
|
9
9
|
class Error < StandardError; end
|
10
10
|
|
11
11
|
# @param bin [#to_s] path to LSF installation binaries
|
12
|
-
def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", bin_overrides: {}, **_)
|
12
|
+
def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", bin_overrides: {}, submit_host: "", strict_host_checking: true, **_)
|
13
13
|
@bindir = Pathname.new(bindir.to_s)
|
14
|
-
|
15
14
|
@envdir = Pathname.new(envdir.to_s)
|
16
15
|
@libdir = Pathname.new(libdir.to_s)
|
17
16
|
@serverdir = Pathname.new(serverdir.to_s)
|
18
17
|
@cluster = cluster.to_s
|
19
18
|
@bin_overrides = bin_overrides
|
19
|
+
@submit_host = submit_host.to_s
|
20
|
+
@strict_host_checking = strict_host_checking
|
20
21
|
end
|
21
22
|
|
22
23
|
def default_env
|
@@ -143,6 +144,7 @@ class OodCore::Job::Adapters::Lsf::Batch
|
|
143
144
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
|
144
145
|
args = cluster_args + args
|
145
146
|
env = default_env.merge(env.to_h)
|
147
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking, env)
|
146
148
|
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
|
147
149
|
s.success? ? o : raise(Error, e)
|
148
150
|
end
|
@@ -78,40 +78,40 @@ class OodCore::Job::Adapters::Lsf::Helper
|
|
78
78
|
def batch_submit_args(script, after: [], afterok: [], afternotok: [], afterany: [])
|
79
79
|
args = []
|
80
80
|
|
81
|
-
args
|
82
|
-
args
|
83
|
-
args
|
84
|
-
args[-1]
|
85
|
-
|
86
|
-
args
|
87
|
-
args
|
88
|
-
args
|
89
|
-
args
|
90
|
-
args
|
91
|
-
args
|
92
|
-
args
|
93
|
-
args
|
81
|
+
args.concat ["-P", script.accounting_id] unless script.accounting_id.nil?
|
82
|
+
args.concat ["-cwd", script.workdir.to_s] unless script.workdir.nil?
|
83
|
+
args.concat ["-J", script.job_name] unless script.job_name.nil?
|
84
|
+
args[-1].concat "[#{script.job_array_request}]" unless script.job_array_request.nil?
|
85
|
+
|
86
|
+
args.concat ["-q", script.queue_name] unless script.queue_name.nil?
|
87
|
+
args.concat ["-U", script.reservation_id] unless script.reservation_id.nil?
|
88
|
+
args.concat ["-sp", script.priority] unless script.priority.nil?
|
89
|
+
args.concat ["-H"] if script.submit_as_hold
|
90
|
+
args.concat (script.rerunnable ? ["-r"] : ["-rn"]) unless script.rerunnable.nil?
|
91
|
+
args.concat ["-b", script.start_time.localtime.strftime("%Y:%m:%d:%H:%M")] unless script.start_time.nil?
|
92
|
+
args.concat ["-W", (script.wall_time / 60).to_i] unless script.wall_time.nil?
|
93
|
+
args.concat ["-L", script.shell_path.to_s] unless script.shell_path.nil?
|
94
94
|
|
95
95
|
# environment
|
96
96
|
env = script.job_environment || {}
|
97
97
|
# To preserve pre-existing behavior we only act when true or false, when nil we do nothing
|
98
98
|
if script.copy_environment?
|
99
|
-
args
|
99
|
+
args.concat ["-env", (["all"] + env.keys).join(",")]
|
100
100
|
elsif script.copy_environment? == false
|
101
|
-
args
|
101
|
+
args.concat ["-env", (["none"] + env.keys).join(",")]
|
102
102
|
end
|
103
103
|
|
104
104
|
# input and output files
|
105
|
-
args
|
106
|
-
args
|
107
|
-
args
|
105
|
+
args.concat ["-i", script.input_path] unless script.input_path.nil?
|
106
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
107
|
+
args.concat ["-e", script.error_path] unless script.error_path.nil?
|
108
108
|
|
109
109
|
# email
|
110
|
-
args
|
111
|
-
args
|
112
|
-
args
|
110
|
+
args.concat ["-B"] if script.email_on_started
|
111
|
+
args.concat ["-N"] if script.email_on_terminated
|
112
|
+
args.concat ["-u", script.email.join(",")] unless script.email.nil? || script.email.empty?
|
113
113
|
|
114
|
-
args
|
114
|
+
args.concat script.native unless script.native.nil?
|
115
115
|
|
116
116
|
{args: args, env: env}
|
117
117
|
end
|
@@ -10,17 +10,21 @@ module OodCore
|
|
10
10
|
# Build the PBS Pro adapter from a configuration
|
11
11
|
# @param config [#to_h] the configuration for job adapter
|
12
12
|
# @option config [Object] :host (nil) The batch server host
|
13
|
+
# @option config [Object] :submit_host ("") The login node where the job is submitted
|
14
|
+
# @option config [Object] :strict_host_checking (true) Whether to use strict host checking when ssh to submit_host
|
13
15
|
# @option config [Object] :exec (nil) Path to PBS Pro executables
|
14
16
|
# @option config [Object] :qstat_factor (nil) Deciding factor on how to
|
15
17
|
# call qstat for a user
|
16
18
|
# @option config [#to_h] :bin_overrides ({}) Optional overrides to PBS Pro client executables
|
17
19
|
def self.build_pbspro(config)
|
18
20
|
c = config.to_h.compact.symbolize_keys
|
19
|
-
host
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
host = c.fetch(:host, nil)
|
22
|
+
submit_host = c.fetch(:submit_host, "")
|
23
|
+
strict_host_checking = c.fetch(:strict_host_checking, true)
|
24
|
+
pbs_exec = c.fetch(:exec, nil)
|
25
|
+
qstat_factor = c.fetch(:qstat_factor, nil)
|
26
|
+
bin_overrides = c.fetch(:bin_overrides, {})
|
27
|
+
pbspro = Adapters::PBSPro::Batch.new(host: host, submit_host: submit_host, strict_host_checking: strict_host_checking, pbs_exec: pbs_exec, bin_overrides: bin_overrides)
|
24
28
|
Adapters::PBSPro.new(pbspro: pbspro, qstat_factor: qstat_factor)
|
25
29
|
end
|
26
30
|
end
|
@@ -41,6 +45,18 @@ module OodCore
|
|
41
45
|
# @return [String, nil] the batch server host
|
42
46
|
attr_reader :host
|
43
47
|
|
48
|
+
# The login node to submit the job via ssh
|
49
|
+
# @example
|
50
|
+
# my_batch.submit_host #=> "my_batch.server.edu"
|
51
|
+
# @return [String, nil] the login node
|
52
|
+
attr_reader :submit_host
|
53
|
+
|
54
|
+
# Whether to use strict host checking when ssh to submit_host
|
55
|
+
# @example
|
56
|
+
# my_batch.strict_host_checking #=> "false"
|
57
|
+
# @return [Bool, true] the login node; true if not present
|
58
|
+
attr_reader :strict_host_checking
|
59
|
+
|
44
60
|
# The path containing the PBS executables
|
45
61
|
# @example
|
46
62
|
# my_batch.pbs_exec.to_s #=> "/usr/local/pbspro/10.0.0
|
@@ -58,11 +74,15 @@ module OodCore
|
|
58
74
|
class Error < StandardError; end
|
59
75
|
|
60
76
|
# @param host [#to_s, nil] the batch server host
|
77
|
+
# @param submit_host [#to_s, nil] the login node to ssh to
|
78
|
+
# @param strict_host_checking [bool, true] wheter to use strict host checking when ssh to submit_host
|
61
79
|
# @param exec [#to_s, nil] path to pbs executables
|
62
|
-
def initialize(host: nil, pbs_exec: nil, bin_overrides: {})
|
63
|
-
@host
|
64
|
-
@
|
65
|
-
@
|
80
|
+
def initialize(host: nil, submit_host: "", strict_host_checking: true, pbs_exec: nil, bin_overrides: {})
|
81
|
+
@host = host && host.to_s
|
82
|
+
@submit_host = submit_host && submit_host.to_s
|
83
|
+
@strict_host_checking = strict_host_checking
|
84
|
+
@pbs_exec = pbs_exec && Pathname.new(pbs_exec.to_s)
|
85
|
+
@bin_overrides = bin_overrides
|
66
86
|
end
|
67
87
|
|
68
88
|
# Get a list of hashes detailing each of the jobs on the batch server
|
@@ -87,7 +107,7 @@ module OodCore
|
|
87
107
|
# @return [Array<Hash>] list of details for jobs
|
88
108
|
def get_jobs(id: "")
|
89
109
|
args = ["-f", "-t"] # display all information
|
90
|
-
args
|
110
|
+
args.concat [id.to_s] unless id.to_s.empty?
|
91
111
|
lines = call("qstat", *args).gsub("\n\t", "").split("\n").map(&:strip)
|
92
112
|
|
93
113
|
jobs = []
|
@@ -159,12 +179,12 @@ module OodCore
|
|
159
179
|
cmd = cmd.to_s
|
160
180
|
bindir = (!!pbs_exec) ? pbs_exec.join("bin").to_s : ''
|
161
181
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
|
162
|
-
args = args.map(&:to_s)
|
163
182
|
env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
164
183
|
env["PBS_DEFAULT"] = host.to_s if host
|
165
184
|
env["PBS_EXEC"] = pbs_exec.to_s if pbs_exec
|
185
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
|
166
186
|
chdir ||= "."
|
167
|
-
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
|
187
|
+
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s, chdir: chdir.to_s)
|
168
188
|
s.success? ? o : raise(Error, e)
|
169
189
|
end
|
170
190
|
end
|
@@ -227,28 +247,28 @@ module OodCore
|
|
227
247
|
# Set qsub options
|
228
248
|
args = []
|
229
249
|
# ignore args, can't use these if submitting from STDIN
|
230
|
-
args
|
231
|
-
args
|
232
|
-
args
|
250
|
+
args.concat ["-h"] if script.submit_as_hold
|
251
|
+
args.concat ["-r", script.rerunnable ? "y" : "n"] unless script.rerunnable.nil?
|
252
|
+
args.concat ["-M", script.email.join(",")] unless script.email.nil?
|
233
253
|
if script.email_on_started && script.email_on_terminated
|
234
|
-
args
|
254
|
+
args.concat ["-m", "be"]
|
235
255
|
elsif script.email_on_started
|
236
|
-
args
|
256
|
+
args.concat ["-m", "b"]
|
237
257
|
elsif script.email_on_terminated
|
238
|
-
args
|
258
|
+
args.concat ["-m", "e"]
|
239
259
|
end
|
240
|
-
args
|
241
|
-
args
|
260
|
+
args.concat ["-N", script.job_name] unless script.job_name.nil?
|
261
|
+
args.concat ["-S", script.shell_path] unless script.shell_path.nil?
|
242
262
|
# ignore input_path (not defined in PBS Pro)
|
243
|
-
args
|
244
|
-
args
|
263
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
264
|
+
args.concat ["-e", script.error_path] unless script.error_path.nil?
|
245
265
|
# Reservations are actually just queues in PBS Pro
|
246
|
-
args
|
247
|
-
args
|
248
|
-
args
|
249
|
-
args
|
250
|
-
args
|
251
|
-
args
|
266
|
+
args.concat ["-q", script.reservation_id] if !script.reservation_id.nil? && script.queue_name.nil?
|
267
|
+
args.concat ["-q", script.queue_name] unless script.queue_name.nil?
|
268
|
+
args.concat ["-p", script.priority] unless script.priority.nil?
|
269
|
+
args.concat ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil?
|
270
|
+
args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
|
271
|
+
args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
|
252
272
|
|
253
273
|
# Set dependencies
|
254
274
|
depend = []
|
@@ -256,21 +276,21 @@ module OodCore
|
|
256
276
|
depend << "afterok:#{afterok.join(":")}" unless afterok.empty?
|
257
277
|
depend << "afternotok:#{afternotok.join(":")}" unless afternotok.empty?
|
258
278
|
depend << "afterany:#{afterany.join(":")}" unless afterany.empty?
|
259
|
-
args
|
279
|
+
args.concat ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
|
260
280
|
|
261
281
|
# Set environment variables
|
262
282
|
envvars = script.job_environment.to_h
|
263
|
-
args
|
264
|
-
args
|
283
|
+
args.concat ["-v", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
|
284
|
+
args.concat ["-V"] if script.copy_environment?
|
265
285
|
|
266
286
|
# If error_path is not specified we join stdout & stderr (as this
|
267
287
|
# mimics what the other resource managers do)
|
268
|
-
args
|
288
|
+
args.concat ["-j", "oe"] if script.error_path.nil?
|
269
289
|
|
270
|
-
args
|
290
|
+
args.concat ["-J", script.job_array_request] unless script.job_array_request.nil?
|
271
291
|
|
272
292
|
# Set native options
|
273
|
-
args
|
293
|
+
args.concat script.native if script.native
|
274
294
|
|
275
295
|
# Submit job
|
276
296
|
@pbspro.submit_string(script.content, args: args, chdir: script.workdir)
|