ood_core 0.11.4 → 0.15.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +30 -0
- data/CHANGELOG.md +55 -1
- data/README.md +7 -6
- data/lib/ood_core/job/adapters/ccq.rb +274 -0
- data/lib/ood_core/job/adapters/helper.rb +20 -1
- data/lib/ood_core/job/adapters/kubernetes.rb +193 -0
- data/lib/ood_core/job/adapters/kubernetes/batch.rb +354 -0
- data/lib/ood_core/job/adapters/kubernetes/helper.rb +294 -0
- data/lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb +9 -0
- data/lib/ood_core/job/adapters/kubernetes/resources.rb +58 -0
- data/lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb +158 -0
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +10 -1
- data/lib/ood_core/job/adapters/linux_host/templates/script_wrapper.erb.sh +18 -15
- data/lib/ood_core/job/adapters/lsf.rb +1 -0
- data/lib/ood_core/job/adapters/lsf/batch.rb +5 -3
- data/lib/ood_core/job/adapters/lsf/helper.rb +22 -22
- data/lib/ood_core/job/adapters/pbspro.rb +54 -34
- data/lib/ood_core/job/adapters/sge/batch.rb +6 -5
- data/lib/ood_core/job/adapters/sge/helper.rb +19 -19
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +35 -4
- data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +25 -2
- data/lib/ood_core/job/adapters/slurm.rb +79 -38
- data/lib/ood_core/job/adapters/torque.rb +30 -23
- data/lib/ood_core/job/adapters/torque/batch.rb +29 -12
- data/lib/ood_core/job/script.rb +9 -1
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -1
- metadata +33 -6
- data/.travis.yml +0 -9
@@ -0,0 +1,9 @@
|
|
1
|
+
# An object that describes a submitted kubernetes job with extended information
|
2
|
+
class OodCore::Job::Adapters::Kubernetes::K8sJobInfo < OodCore::Job::Info
|
3
|
+
attr_reader :ood_connection_info
|
4
|
+
|
5
|
+
def initialize(ood_connection_info: {}, **options)
|
6
|
+
super(options)
|
7
|
+
@ood_connection_info = ood_connection_info
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module OodCore::Job::Adapters::Kubernetes::Resources
|
2
|
+
|
3
|
+
class ConfigMap
|
4
|
+
attr_accessor :name, :filename, :data
|
5
|
+
|
6
|
+
def initialize(name, filename, data)
|
7
|
+
@name = name
|
8
|
+
@filename = filename
|
9
|
+
@data = data
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class Container
|
14
|
+
attr_accessor :name, :image, :command, :port, :env, :memory, :cpu, :working_dir,
|
15
|
+
:restart_policy, :supplemental_groups
|
16
|
+
|
17
|
+
def initialize(
|
18
|
+
name, image, command: [], port: nil, env: [], memory: "4Gi", cpu: "1",
|
19
|
+
working_dir: "", restart_policy: "Never", supplemental_groups: []
|
20
|
+
)
|
21
|
+
raise ArgumentError, "containers need valid names and images" unless name && image
|
22
|
+
|
23
|
+
@name = name
|
24
|
+
@image = image
|
25
|
+
@command = command.nil? ? [] : command
|
26
|
+
@port = port&.to_i
|
27
|
+
@env = env.nil? ? [] : env
|
28
|
+
@memory = memory.nil? ? "4Gi" : memory
|
29
|
+
@cpu = cpu.nil? ? "1" : cpu
|
30
|
+
@working_dir = working_dir.nil? ? "" : working_dir
|
31
|
+
@restart_policy = restart_policy.nil? ? "Never" : restart_policy
|
32
|
+
@supplemental_groups = supplemental_groups.nil? ? [] : supplemental_groups
|
33
|
+
end
|
34
|
+
|
35
|
+
def ==(other)
|
36
|
+
name == other.name &&
|
37
|
+
image == other.image &&
|
38
|
+
command == other.command &&
|
39
|
+
port == other.port &&
|
40
|
+
env == other.env &&
|
41
|
+
memory == other.memory &&
|
42
|
+
cpu == other.cpu &&
|
43
|
+
working_dir == other.working_dir &&
|
44
|
+
restart_policy == other.restart_policy &&
|
45
|
+
supplemental_groups == other.supplemental_groups
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
class PodSpec
|
51
|
+
attr_accessor :container, :init_containers
|
52
|
+
def initialize(container, init_containers: nil)
|
53
|
+
@container = container
|
54
|
+
@init_containers = init_containers
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
@@ -0,0 +1,158 @@
|
|
1
|
+
apiVersion: v1
|
2
|
+
kind: Pod
|
3
|
+
metadata:
|
4
|
+
namespace: <%= namespace %>
|
5
|
+
name: <%= id %>
|
6
|
+
labels:
|
7
|
+
job: <%= id %>
|
8
|
+
app.kubernetes.io/name: <%= container.name %>
|
9
|
+
app.kubernetes.io/managed-by: open-ondemand
|
10
|
+
<%- if !script.accounting_id.nil? && script.accounting_id != "" -%>
|
11
|
+
account: <%= script.accounting_id %>
|
12
|
+
<%- end -%>
|
13
|
+
annotations:
|
14
|
+
<%- unless script.wall_time.nil? -%>
|
15
|
+
pod.kubernetes.io/lifetime: <%= helper.seconds_to_duration(script.wall_time) %>
|
16
|
+
<%- end -%>
|
17
|
+
spec:
|
18
|
+
restartPolicy: <%= spec.container.restart_policy %>
|
19
|
+
securityContext:
|
20
|
+
runAsUser: <%= run_as_user %>
|
21
|
+
runAsGroup: <%= run_as_group %>
|
22
|
+
runAsNonRoot: true
|
23
|
+
<%- if spec.container.supplemental_groups.empty? -%>
|
24
|
+
supplementalGroups: []
|
25
|
+
<%- else -%>
|
26
|
+
supplementalGroups:
|
27
|
+
<%- spec.container.supplemental_groups.each do |supplemental_group| -%>
|
28
|
+
- "<%= supplemental_group %>"
|
29
|
+
<%- end -%>
|
30
|
+
<%- end -%>
|
31
|
+
fsGroup: <%= fs_group %>
|
32
|
+
hostNetwork: false
|
33
|
+
hostIPC: false
|
34
|
+
hostPID: false
|
35
|
+
containers:
|
36
|
+
- name: "<%= spec.container.name %>"
|
37
|
+
image: <%= spec.container.image %>
|
38
|
+
imagePullPolicy: IfNotPresent
|
39
|
+
<%- unless spec.container.working_dir.empty? -%>
|
40
|
+
workingDir: "<%= spec.container.working_dir %>"
|
41
|
+
<%- end -%>
|
42
|
+
<%- unless spec.container.env.empty? -%>
|
43
|
+
env:
|
44
|
+
<%- spec.container.env.each do |env| -%>
|
45
|
+
- name: <%= env[:name] %>
|
46
|
+
value: "<%= env[:value] %>"
|
47
|
+
<%- end # for each env -%>
|
48
|
+
<%- end # unless env is nil -%>
|
49
|
+
<%- unless spec.container.command.empty? -%>
|
50
|
+
command:
|
51
|
+
<%- spec.container.command.each do |cmd| -%>
|
52
|
+
- "<%= cmd %>"
|
53
|
+
<%- end # for each command -%>
|
54
|
+
<%- end # unless command is nil -%>
|
55
|
+
<%- unless spec.container.port.nil? -%>
|
56
|
+
ports:
|
57
|
+
- containerPort: <%= spec.container.port %>
|
58
|
+
<%- end -%>
|
59
|
+
volumeMounts:
|
60
|
+
<%- unless configmap.nil? -%>
|
61
|
+
- name: configmap-volume
|
62
|
+
mountPath: <%= configmap_mount_path %>
|
63
|
+
<%- end -%>
|
64
|
+
<%- all_mounts.each do |mount| -%>
|
65
|
+
- name: <%= mount[:name] %>
|
66
|
+
mountPath: <%= mount[:destination_path] %>
|
67
|
+
<%- end # for each mount -%>
|
68
|
+
resources:
|
69
|
+
limits:
|
70
|
+
memory: "<%= spec.container.memory %>"
|
71
|
+
cpu: "<%= spec.container.cpu %>"
|
72
|
+
requests:
|
73
|
+
memory: "<%= spec.container.memory %>"
|
74
|
+
cpu: "<%= spec.container.cpu %>"
|
75
|
+
securityContext:
|
76
|
+
allowPrivilegeEscalation: false
|
77
|
+
capabilities:
|
78
|
+
drop:
|
79
|
+
- all
|
80
|
+
privileged: false
|
81
|
+
<%- unless spec.init_containers.nil? -%>
|
82
|
+
initContainers:
|
83
|
+
<%- spec.init_containers.each do |ctr| -%>
|
84
|
+
- name: "<%= ctr.name %>"
|
85
|
+
image: "<%= ctr.image %>"
|
86
|
+
command:
|
87
|
+
<%- ctr.command.each do |cmd| -%>
|
88
|
+
- "<%= cmd %>"
|
89
|
+
<%- end # command loop -%>
|
90
|
+
volumeMounts:
|
91
|
+
<%- unless configmap.nil? -%>
|
92
|
+
- name: configmap-volume
|
93
|
+
mountPath: <%= configmap_mount_path %>
|
94
|
+
<%- end -%>
|
95
|
+
<%- all_mounts.each do |mount| -%>
|
96
|
+
- name: <%= mount[:name] %>
|
97
|
+
mountPath: <%= mount[:destination_path] %>
|
98
|
+
<%- end # for each mount -%>
|
99
|
+
securityContext:
|
100
|
+
allowPrivilegeEscalation: false
|
101
|
+
capabilities:
|
102
|
+
drop:
|
103
|
+
- all
|
104
|
+
privileged: false
|
105
|
+
<%- end # init container loop -%>
|
106
|
+
<%- end # if init containers -%>
|
107
|
+
<%- unless (configmap.to_s.empty? && all_mounts.empty?) -%>
|
108
|
+
volumes:
|
109
|
+
<%- unless configmap.to_s.empty? -%>
|
110
|
+
- name: configmap-volume
|
111
|
+
configMap:
|
112
|
+
name: <%= configmap_name(id) %>
|
113
|
+
<%- end -%>
|
114
|
+
<%- all_mounts.each do |mount| -%>
|
115
|
+
<%- if mount[:type] == 'nfs' -%>
|
116
|
+
- name: <%= mount[:name] %>
|
117
|
+
nfs:
|
118
|
+
server: <%= mount[:host] %>
|
119
|
+
path: <%= mount[:path] %>
|
120
|
+
<%- elsif mount[:type] == 'host' -%>
|
121
|
+
- name: <%= mount[:name] %>
|
122
|
+
hostPath:
|
123
|
+
path: <%= mount[:path] %>
|
124
|
+
type: <%= mount[:host_type] %>
|
125
|
+
<%- end # if mount is [host,nfs] -%>
|
126
|
+
<%- end # for each mount -%>
|
127
|
+
<%- end # (configmap.to_s.empty? || all_mounts.empty?) -%>
|
128
|
+
---
|
129
|
+
<%- unless spec.container.port.nil? -%>
|
130
|
+
apiVersion: v1
|
131
|
+
kind: Service
|
132
|
+
metadata:
|
133
|
+
name: <%= service_name(id) %>
|
134
|
+
namespace: <%= namespace %>
|
135
|
+
labels:
|
136
|
+
job: <%= id %>
|
137
|
+
spec:
|
138
|
+
selector:
|
139
|
+
job: <%= id %>
|
140
|
+
ports:
|
141
|
+
- protocol: TCP
|
142
|
+
port: 80
|
143
|
+
targetPort: <%= spec.container.port %>
|
144
|
+
type: NodePort
|
145
|
+
<%- end # end for service -%>
|
146
|
+
---
|
147
|
+
<%- unless configmap.nil? -%>
|
148
|
+
apiVersion: v1
|
149
|
+
kind: ConfigMap
|
150
|
+
metadata:
|
151
|
+
name: <%= configmap_name(id) %>
|
152
|
+
namespace: <%= namespace %>
|
153
|
+
labels:
|
154
|
+
job: <%= id %>
|
155
|
+
data:
|
156
|
+
<%= configmap.filename %>: |
|
157
|
+
<% config_data_lines(configmap.data).each do |line| %><%= line %><% end %>
|
158
|
+
<%- end # end for configmap -%>
|
@@ -166,7 +166,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
166
166
|
'email_on_terminated' => script_email_on_event(script, 'terminated'),
|
167
167
|
'email_on_start' => script_email_on_event(script, 'started'),
|
168
168
|
'environment' => export_env(script),
|
169
|
-
'error_path' => (script
|
169
|
+
'error_path' => error_path(script),
|
170
170
|
'job_name' => script.job_name.to_s,
|
171
171
|
'output_path' => (script.output_path) ? script.output_path.to_s : '/dev/null',
|
172
172
|
'script_content' => content,
|
@@ -174,7 +174,9 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
174
174
|
'session_name' => session_name,
|
175
175
|
'singularity_bin' => singularity_bin,
|
176
176
|
'singularity_image' => singularity_image(script.native),
|
177
|
+
'ssh_hosts' => ssh_hosts,
|
177
178
|
'tmux_bin' => tmux_bin,
|
179
|
+
'workdir' => (script.workdir) ? script.workdir.to_s : '/tmp',
|
178
180
|
}.each{
|
179
181
|
|key, value| bnd.local_variable_set(key, value)
|
180
182
|
}
|
@@ -271,4 +273,11 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
271
273
|
return false if script.content.empty?
|
272
274
|
script.content.split("\n").first.start_with?('#!/')
|
273
275
|
end
|
276
|
+
|
277
|
+
def error_path(script)
|
278
|
+
return script.error_path.to_s if script.error_path
|
279
|
+
return script.output_path.to_s if script.output_path
|
280
|
+
|
281
|
+
'/dev/null'
|
282
|
+
end
|
274
283
|
end
|
@@ -1,14 +1,24 @@
|
|
1
1
|
#!/bin/bash
|
2
|
-
|
2
|
+
SSH_HOSTS=(<%= ssh_hosts.join(' ').to_s %>)
|
3
|
+
hostnames=`hostname -A`
|
4
|
+
for host in ${SSH_HOSTS[@]}
|
5
|
+
do
|
6
|
+
if [[ " ${hostnames[@]} " =~ " ${host} " ]]; then
|
7
|
+
hostname=$host
|
8
|
+
fi
|
9
|
+
done
|
10
|
+
|
11
|
+
if [ -z "$hostname" ]; then
|
12
|
+
printf >&2 "ERROR: Can't start job on [${hostnames[@]}] because it does not match any hostname configured \nin ssh_hosts [${SSH_HOSTS[@]}]. The output of 'hostname -A' must match an entry in ssh_hosts \nfrom the cluster configuration."
|
13
|
+
exit 1
|
14
|
+
fi
|
15
|
+
|
16
|
+
echo $hostname
|
3
17
|
|
4
18
|
# Put the script into a temp file on localhost
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
<% else %>
|
9
|
-
singularity_tmp_file=$(mktemp)
|
10
|
-
tmux_tmp_file=$(mktemp)
|
11
|
-
<% end %>
|
19
|
+
singularity_tmp_file=$(mktemp -p "<%= workdir %>" --suffix '_sing')
|
20
|
+
tmux_tmp_file=$(mktemp -p "<%= workdir %>" --suffix "_tmux")
|
21
|
+
|
12
22
|
|
13
23
|
# Create an executable to run in a tmux session
|
14
24
|
# The escaped HEREDOC means that we need to substitute in $singularity_tmp_file ourselves
|
@@ -55,10 +65,3 @@ SINGULARITY_LAUNCHER
|
|
55
65
|
chmod +x "$singularity_tmp_file"
|
56
66
|
chmod +x "$tmux_tmp_file"
|
57
67
|
<%= tmux_bin %> new-session -d -s "<%= session_name %>" "$tmux_tmp_file"
|
58
|
-
|
59
|
-
# Remove the file
|
60
|
-
<% if ! debug %>
|
61
|
-
# Wait 1 second to ensure that tmux session has started before the file is removed
|
62
|
-
sleep 1
|
63
|
-
rm -f "$tmux_tmp_file"; rm -f "$singularity_tmp_file"
|
64
|
-
<% end %>
|
@@ -14,6 +14,7 @@ module OodCore
|
|
14
14
|
# @option config [#to_s] :serverdir ('') Path to lsf client etc dir
|
15
15
|
# @option config [#to_s] :cluster ('') name of cluster, if in multi-cluster mode
|
16
16
|
# @option config [#to_h] :bin_overrides ({}) Optional overrides to LSF client executables
|
17
|
+
# @option config [#to_s] :submit_host ('') Host to submit commands to
|
17
18
|
def self.build_lsf(config)
|
18
19
|
batch = Adapters::Lsf::Batch.new(config.to_h.symbolize_keys)
|
19
20
|
Adapters::Lsf.new(batch: batch)
|
@@ -2,21 +2,22 @@
|
|
2
2
|
#
|
3
3
|
# @api private
|
4
4
|
class OodCore::Job::Adapters::Lsf::Batch
|
5
|
-
attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster, :bin_overrides
|
5
|
+
attr_reader :bindir, :libdir, :envdir, :serverdir, :cluster, :bin_overrides, :submit_host, :strict_host_checking
|
6
6
|
|
7
7
|
# The root exception class that all LSF-specific exceptions inherit
|
8
8
|
# from
|
9
9
|
class Error < StandardError; end
|
10
10
|
|
11
11
|
# @param bin [#to_s] path to LSF installation binaries
|
12
|
-
def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", bin_overrides: {}, **_)
|
12
|
+
def initialize(bindir: "", envdir: "", libdir: "", serverdir: "", cluster: "", bin_overrides: {}, submit_host: "", strict_host_checking: true, **_)
|
13
13
|
@bindir = Pathname.new(bindir.to_s)
|
14
|
-
|
15
14
|
@envdir = Pathname.new(envdir.to_s)
|
16
15
|
@libdir = Pathname.new(libdir.to_s)
|
17
16
|
@serverdir = Pathname.new(serverdir.to_s)
|
18
17
|
@cluster = cluster.to_s
|
19
18
|
@bin_overrides = bin_overrides
|
19
|
+
@submit_host = submit_host.to_s
|
20
|
+
@strict_host_checking = strict_host_checking
|
20
21
|
end
|
21
22
|
|
22
23
|
def default_env
|
@@ -143,6 +144,7 @@ class OodCore::Job::Adapters::Lsf::Batch
|
|
143
144
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
|
144
145
|
args = cluster_args + args
|
145
146
|
env = default_env.merge(env.to_h)
|
147
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking, env)
|
146
148
|
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
|
147
149
|
s.success? ? o : raise(Error, e)
|
148
150
|
end
|
@@ -78,40 +78,40 @@ class OodCore::Job::Adapters::Lsf::Helper
|
|
78
78
|
def batch_submit_args(script, after: [], afterok: [], afternotok: [], afterany: [])
|
79
79
|
args = []
|
80
80
|
|
81
|
-
args
|
82
|
-
args
|
83
|
-
args
|
84
|
-
args[-1]
|
85
|
-
|
86
|
-
args
|
87
|
-
args
|
88
|
-
args
|
89
|
-
args
|
90
|
-
args
|
91
|
-
args
|
92
|
-
args
|
93
|
-
args
|
81
|
+
args.concat ["-P", script.accounting_id] unless script.accounting_id.nil?
|
82
|
+
args.concat ["-cwd", script.workdir.to_s] unless script.workdir.nil?
|
83
|
+
args.concat ["-J", script.job_name] unless script.job_name.nil?
|
84
|
+
args[-1].concat "[#{script.job_array_request}]" unless script.job_array_request.nil?
|
85
|
+
|
86
|
+
args.concat ["-q", script.queue_name] unless script.queue_name.nil?
|
87
|
+
args.concat ["-U", script.reservation_id] unless script.reservation_id.nil?
|
88
|
+
args.concat ["-sp", script.priority] unless script.priority.nil?
|
89
|
+
args.concat ["-H"] if script.submit_as_hold
|
90
|
+
args.concat (script.rerunnable ? ["-r"] : ["-rn"]) unless script.rerunnable.nil?
|
91
|
+
args.concat ["-b", script.start_time.localtime.strftime("%Y:%m:%d:%H:%M")] unless script.start_time.nil?
|
92
|
+
args.concat ["-W", (script.wall_time / 60).to_i] unless script.wall_time.nil?
|
93
|
+
args.concat ["-L", script.shell_path.to_s] unless script.shell_path.nil?
|
94
94
|
|
95
95
|
# environment
|
96
96
|
env = script.job_environment || {}
|
97
97
|
# To preserve pre-existing behavior we only act when true or false, when nil we do nothing
|
98
98
|
if script.copy_environment?
|
99
|
-
args
|
99
|
+
args.concat ["-env", (["all"] + env.keys).join(",")]
|
100
100
|
elsif script.copy_environment? == false
|
101
|
-
args
|
101
|
+
args.concat ["-env", (["none"] + env.keys).join(",")]
|
102
102
|
end
|
103
103
|
|
104
104
|
# input and output files
|
105
|
-
args
|
106
|
-
args
|
107
|
-
args
|
105
|
+
args.concat ["-i", script.input_path] unless script.input_path.nil?
|
106
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
107
|
+
args.concat ["-e", script.error_path] unless script.error_path.nil?
|
108
108
|
|
109
109
|
# email
|
110
|
-
args
|
111
|
-
args
|
112
|
-
args
|
110
|
+
args.concat ["-B"] if script.email_on_started
|
111
|
+
args.concat ["-N"] if script.email_on_terminated
|
112
|
+
args.concat ["-u", script.email.join(",")] unless script.email.nil? || script.email.empty?
|
113
113
|
|
114
|
-
args
|
114
|
+
args.concat script.native unless script.native.nil?
|
115
115
|
|
116
116
|
{args: args, env: env}
|
117
117
|
end
|
@@ -10,17 +10,21 @@ module OodCore
|
|
10
10
|
# Build the PBS Pro adapter from a configuration
|
11
11
|
# @param config [#to_h] the configuration for job adapter
|
12
12
|
# @option config [Object] :host (nil) The batch server host
|
13
|
+
# @option config [Object] :submit_host ("") The login node where the job is submitted
|
14
|
+
# @option config [Object] :strict_host_checking (true) Whether to use strict host checking when ssh to submit_host
|
13
15
|
# @option config [Object] :exec (nil) Path to PBS Pro executables
|
14
16
|
# @option config [Object] :qstat_factor (nil) Deciding factor on how to
|
15
17
|
# call qstat for a user
|
16
18
|
# @option config [#to_h] :bin_overrides ({}) Optional overrides to PBS Pro client executables
|
17
19
|
def self.build_pbspro(config)
|
18
20
|
c = config.to_h.compact.symbolize_keys
|
19
|
-
host
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
host = c.fetch(:host, nil)
|
22
|
+
submit_host = c.fetch(:submit_host, "")
|
23
|
+
strict_host_checking = c.fetch(:strict_host_checking, true)
|
24
|
+
pbs_exec = c.fetch(:exec, nil)
|
25
|
+
qstat_factor = c.fetch(:qstat_factor, nil)
|
26
|
+
bin_overrides = c.fetch(:bin_overrides, {})
|
27
|
+
pbspro = Adapters::PBSPro::Batch.new(host: host, submit_host: submit_host, strict_host_checking: strict_host_checking, pbs_exec: pbs_exec, bin_overrides: bin_overrides)
|
24
28
|
Adapters::PBSPro.new(pbspro: pbspro, qstat_factor: qstat_factor)
|
25
29
|
end
|
26
30
|
end
|
@@ -41,6 +45,18 @@ module OodCore
|
|
41
45
|
# @return [String, nil] the batch server host
|
42
46
|
attr_reader :host
|
43
47
|
|
48
|
+
# The login node to submit the job via ssh
|
49
|
+
# @example
|
50
|
+
# my_batch.submit_host #=> "my_batch.server.edu"
|
51
|
+
# @return [String, nil] the login node
|
52
|
+
attr_reader :submit_host
|
53
|
+
|
54
|
+
# Whether to use strict host checking when ssh to submit_host
|
55
|
+
# @example
|
56
|
+
# my_batch.strict_host_checking #=> "false"
|
57
|
+
# @return [Bool, true] the login node; true if not present
|
58
|
+
attr_reader :strict_host_checking
|
59
|
+
|
44
60
|
# The path containing the PBS executables
|
45
61
|
# @example
|
46
62
|
# my_batch.pbs_exec.to_s #=> "/usr/local/pbspro/10.0.0
|
@@ -58,11 +74,15 @@ module OodCore
|
|
58
74
|
class Error < StandardError; end
|
59
75
|
|
60
76
|
# @param host [#to_s, nil] the batch server host
|
77
|
+
# @param submit_host [#to_s, nil] the login node to ssh to
|
78
|
+
# @param strict_host_checking [bool, true] wheter to use strict host checking when ssh to submit_host
|
61
79
|
# @param exec [#to_s, nil] path to pbs executables
|
62
|
-
def initialize(host: nil, pbs_exec: nil, bin_overrides: {})
|
63
|
-
@host
|
64
|
-
@
|
65
|
-
@
|
80
|
+
def initialize(host: nil, submit_host: "", strict_host_checking: true, pbs_exec: nil, bin_overrides: {})
|
81
|
+
@host = host && host.to_s
|
82
|
+
@submit_host = submit_host && submit_host.to_s
|
83
|
+
@strict_host_checking = strict_host_checking
|
84
|
+
@pbs_exec = pbs_exec && Pathname.new(pbs_exec.to_s)
|
85
|
+
@bin_overrides = bin_overrides
|
66
86
|
end
|
67
87
|
|
68
88
|
# Get a list of hashes detailing each of the jobs on the batch server
|
@@ -87,7 +107,7 @@ module OodCore
|
|
87
107
|
# @return [Array<Hash>] list of details for jobs
|
88
108
|
def get_jobs(id: "")
|
89
109
|
args = ["-f", "-t"] # display all information
|
90
|
-
args
|
110
|
+
args.concat [id.to_s] unless id.to_s.empty?
|
91
111
|
lines = call("qstat", *args).gsub("\n\t", "").split("\n").map(&:strip)
|
92
112
|
|
93
113
|
jobs = []
|
@@ -159,12 +179,12 @@ module OodCore
|
|
159
179
|
cmd = cmd.to_s
|
160
180
|
bindir = (!!pbs_exec) ? pbs_exec.join("bin").to_s : ''
|
161
181
|
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bindir, bin_overrides)
|
162
|
-
args = args.map(&:to_s)
|
163
182
|
env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
164
183
|
env["PBS_DEFAULT"] = host.to_s if host
|
165
184
|
env["PBS_EXEC"] = pbs_exec.to_s if pbs_exec
|
185
|
+
cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
|
166
186
|
chdir ||= "."
|
167
|
-
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s, chdir: chdir.to_s)
|
187
|
+
o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s, chdir: chdir.to_s)
|
168
188
|
s.success? ? o : raise(Error, e)
|
169
189
|
end
|
170
190
|
end
|
@@ -227,28 +247,28 @@ module OodCore
|
|
227
247
|
# Set qsub options
|
228
248
|
args = []
|
229
249
|
# ignore args, can't use these if submitting from STDIN
|
230
|
-
args
|
231
|
-
args
|
232
|
-
args
|
250
|
+
args.concat ["-h"] if script.submit_as_hold
|
251
|
+
args.concat ["-r", script.rerunnable ? "y" : "n"] unless script.rerunnable.nil?
|
252
|
+
args.concat ["-M", script.email.join(",")] unless script.email.nil?
|
233
253
|
if script.email_on_started && script.email_on_terminated
|
234
|
-
args
|
254
|
+
args.concat ["-m", "be"]
|
235
255
|
elsif script.email_on_started
|
236
|
-
args
|
256
|
+
args.concat ["-m", "b"]
|
237
257
|
elsif script.email_on_terminated
|
238
|
-
args
|
258
|
+
args.concat ["-m", "e"]
|
239
259
|
end
|
240
|
-
args
|
241
|
-
args
|
260
|
+
args.concat ["-N", script.job_name] unless script.job_name.nil?
|
261
|
+
args.concat ["-S", script.shell_path] unless script.shell_path.nil?
|
242
262
|
# ignore input_path (not defined in PBS Pro)
|
243
|
-
args
|
244
|
-
args
|
263
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
264
|
+
args.concat ["-e", script.error_path] unless script.error_path.nil?
|
245
265
|
# Reservations are actually just queues in PBS Pro
|
246
|
-
args
|
247
|
-
args
|
248
|
-
args
|
249
|
-
args
|
250
|
-
args
|
251
|
-
args
|
266
|
+
args.concat ["-q", script.reservation_id] if !script.reservation_id.nil? && script.queue_name.nil?
|
267
|
+
args.concat ["-q", script.queue_name] unless script.queue_name.nil?
|
268
|
+
args.concat ["-p", script.priority] unless script.priority.nil?
|
269
|
+
args.concat ["-a", script.start_time.localtime.strftime("%C%y%m%d%H%M.%S")] unless script.start_time.nil?
|
270
|
+
args.concat ["-A", script.accounting_id] unless script.accounting_id.nil?
|
271
|
+
args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
|
252
272
|
|
253
273
|
# Set dependencies
|
254
274
|
depend = []
|
@@ -256,21 +276,21 @@ module OodCore
|
|
256
276
|
depend << "afterok:#{afterok.join(":")}" unless afterok.empty?
|
257
277
|
depend << "afternotok:#{afternotok.join(":")}" unless afternotok.empty?
|
258
278
|
depend << "afterany:#{afterany.join(":")}" unless afterany.empty?
|
259
|
-
args
|
279
|
+
args.concat ["-W", "depend=#{depend.join(",")}"] unless depend.empty?
|
260
280
|
|
261
281
|
# Set environment variables
|
262
282
|
envvars = script.job_environment.to_h
|
263
|
-
args
|
264
|
-
args
|
283
|
+
args.concat ["-v", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
|
284
|
+
args.concat ["-V"] if script.copy_environment?
|
265
285
|
|
266
286
|
# If error_path is not specified we join stdout & stderr (as this
|
267
287
|
# mimics what the other resource managers do)
|
268
|
-
args
|
288
|
+
args.concat ["-j", "oe"] if script.error_path.nil?
|
269
289
|
|
270
|
-
args
|
290
|
+
args.concat ["-J", script.job_array_request] unless script.job_array_request.nil?
|
271
291
|
|
272
292
|
# Set native options
|
273
|
-
args
|
293
|
+
args.concat script.native if script.native
|
274
294
|
|
275
295
|
# Submit job
|
276
296
|
@pbspro.submit_string(script.content, args: args, chdir: script.workdir)
|