ood_core 0.18.1 → 0.19.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -1
- data/LICENSE.txt +1 -1
- data/lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb +6 -2
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +1 -1
- data/lib/ood_core/job/adapters/systemd/launcher.rb +252 -0
- data/lib/ood_core/job/adapters/systemd/templates/email.erb.sh +9 -0
- data/lib/ood_core/job/adapters/systemd/templates/script_wrapper.erb.sh +56 -0
- data/lib/ood_core/job/adapters/systemd.rb +230 -0
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +1 -1
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6f8d595b211ea1f26d22bac6d441ddc56a1dd4dc46a8439be2d60c4fdb1545b9
|
4
|
+
data.tar.gz: c04312cea53252aa4cac76a1dbccab6d40f974321630f5ac97931145bb404b0d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5e4d7da375953bf188f09f9124380e62a7599c43c2d758bb3fa14a7f33d397b7134cb1b18e1f9844347f2b9e375e0d864e73b6635e563cc2d127ab435eaff1c
|
7
|
+
data.tar.gz: a2f76e1121289d445f3666c6430420d805dede8c49ceb60a79de6b891b9903ded50f0cbca623f2354f63cf3de72d65cfef9b31ae08762b999888368778ca3a37
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [0.19.0] - 02-03-2022
|
11
|
+
|
12
|
+
### Added
|
13
|
+
|
14
|
+
- Systemd adapter in [743](https://github.com/OSC/ood_core/pull/743).
|
15
|
+
|
16
|
+
### Fixed
|
17
|
+
|
18
|
+
- The linux host adapter is a little more portable in [333](https://github.com/OSC/ood_core/pull/333).
|
19
|
+
- Improved pod security for the k8s adapter in [748](https://github.com/OSC/ood_core/pull/748).
|
20
|
+
|
10
21
|
## [0.18.1] - 10-18-2021
|
11
22
|
|
12
23
|
### Fixed
|
@@ -411,7 +422,8 @@ Functionally the same as [0.17.3] but with some CI updates.
|
|
411
422
|
### Added
|
412
423
|
- Initial release!
|
413
424
|
|
414
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
425
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.19.0...HEAD
|
426
|
+
[0.19.0]: https://github.com/OSC/ood_core/compare/v0.18.1...v0.19.0
|
415
427
|
[0.18.1]: https://github.com/OSC/ood_core/compare/v0.18.0...v0.18.1
|
416
428
|
[0.18.0]: https://github.com/OSC/ood_core/compare/v0.17.8...v0.18.0
|
417
429
|
[0.17.6]: https://github.com/OSC/ood_core/compare/v0.17.5...v0.17.6
|
data/LICENSE.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License (MIT)
|
2
2
|
|
3
|
-
Copyright (c) 2017-
|
3
|
+
Copyright (c) 2017-2022 Ohio Supercomputer Center
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
@@ -106,7 +106,9 @@ spec:
|
|
106
106
|
allowPrivilegeEscalation: false
|
107
107
|
capabilities:
|
108
108
|
drop:
|
109
|
-
-
|
109
|
+
- ALL
|
110
|
+
seccompProfile:
|
111
|
+
type: RuntimeDefault
|
110
112
|
privileged: false
|
111
113
|
<%- unless spec.init_containers.empty? -%>
|
112
114
|
initContainers:
|
@@ -152,7 +154,9 @@ spec:
|
|
152
154
|
allowPrivilegeEscalation: false
|
153
155
|
capabilities:
|
154
156
|
drop:
|
155
|
-
-
|
157
|
+
- ALL
|
158
|
+
seccompProfile:
|
159
|
+
type: RuntimeDefault
|
156
160
|
privileged: false
|
157
161
|
<%- end # init container loop -%>
|
158
162
|
<%- end # if init containers -%>
|
@@ -73,7 +73,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
73
73
|
# Get the tmux pane PID for the target session
|
74
74
|
pane_pid=$(tmux list-panes -aF '\#{session_name} \#{pane_pid}' | grep '#{session_name}' | cut -f 2 -d ' ')
|
75
75
|
# Find the Singularity sinit PID child of the pane process
|
76
|
-
pane_sinit_pid=$(pstree -p -l "$pane_pid" |
|
76
|
+
pane_sinit_pid=$(pstree -p -l "$pane_pid" | egrep -o 'sinit[(][[:digit:]]*|shim-init[(][[:digit:]]*' | grep -o '[[:digit:]]*')
|
77
77
|
# Kill sinit which stops both Singularity-based processes and the tmux session
|
78
78
|
kill "$pane_sinit_pid"
|
79
79
|
SCRIPT
|
@@ -0,0 +1,252 @@
|
|
1
|
+
require 'erb'
|
2
|
+
require 'etc'
|
3
|
+
require 'pathname'
|
4
|
+
require 'securerandom'
|
5
|
+
require 'shellwords'
|
6
|
+
require 'time'
|
7
|
+
|
8
|
+
# Object used for simplified communication SSH hosts
|
9
|
+
#
|
10
|
+
# @api private
|
11
|
+
class OodCore::Job::Adapters::LinuxSystemd::Launcher
|
12
|
+
attr_reader :debug, :site_timeout, :session_name_label, :ssh_hosts,
|
13
|
+
:strict_host_checking, :username
|
14
|
+
# The root exception class that all LinuxSystemd adapter-specific exceptions inherit
|
15
|
+
# from
|
16
|
+
class Error < StandardError; end
|
17
|
+
|
18
|
+
# @param debug Whether the adapter should be used in debug mode
|
19
|
+
# @param site_timeout [#to_i] A period after which the job should be killed or nil
|
20
|
+
# @param ssh_hosts List of hosts to check when scanning for running jobs
|
21
|
+
# @param strict_host_checking Allow SSH to perform strict host checking
|
22
|
+
# @param submit_host The SSH-able host
|
23
|
+
def initialize(
|
24
|
+
debug: false,
|
25
|
+
site_timeout: nil,
|
26
|
+
ssh_hosts:,
|
27
|
+
strict_host_checking: false,
|
28
|
+
submit_host:,
|
29
|
+
**_
|
30
|
+
)
|
31
|
+
@debug = !! debug
|
32
|
+
@site_timeout = site_timeout.to_i
|
33
|
+
@session_name_label = 'ondemand'
|
34
|
+
@ssh_hosts = ssh_hosts
|
35
|
+
@strict_host_checking = strict_host_checking
|
36
|
+
@submit_host = submit_host
|
37
|
+
@username = Etc.getlogin
|
38
|
+
end
|
39
|
+
|
40
|
+
# @param hostname [#to_s] The hostname to submit the work to
|
41
|
+
# @param script [OodCore::Job::Script] The script object defining the work
|
42
|
+
def start_remote_session(script)
|
43
|
+
cmd = ssh_cmd(submit_host(script), ['/usr/bin/env', 'bash'])
|
44
|
+
|
45
|
+
session_name = unique_session_name
|
46
|
+
output = call(*cmd, stdin: wrapped_script(script, session_name))
|
47
|
+
hostname = parse_hostname(output)
|
48
|
+
|
49
|
+
"#{session_name}@#{hostname}"
|
50
|
+
end
|
51
|
+
|
52
|
+
def stop_remote_session(session_name, hostname)
|
53
|
+
cmd = ssh_cmd(hostname, ['/usr/bin/env', 'bash'])
|
54
|
+
|
55
|
+
kill_cmd = <<~SCRIPT
|
56
|
+
# stop the session by name
|
57
|
+
systemctl --user stop #{session_name}.service
|
58
|
+
SCRIPT
|
59
|
+
|
60
|
+
call(*cmd, stdin: kill_cmd)
|
61
|
+
rescue Error => e
|
62
|
+
interpret_and_raise(e)
|
63
|
+
end
|
64
|
+
|
65
|
+
def list_remote_sessions(host: nil)
|
66
|
+
host_list = (host) ? [host] : ssh_hosts
|
67
|
+
|
68
|
+
host_list.map {
|
69
|
+
|hostname| list_remote_systemd_session(hostname)
|
70
|
+
}.flatten.sort_by {
|
71
|
+
|hsh| hsh[:session_name]
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
def submit_host(script = nil)
|
76
|
+
if script && script.native && script.native['submit_host_override']
|
77
|
+
script.native['submit_host_override']
|
78
|
+
else
|
79
|
+
@submit_host
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
# Call a forked Slurm command for a given cluster
|
86
|
+
def call(cmd, *args, env: {}, stdin: "")
|
87
|
+
args = args.map(&:to_s)
|
88
|
+
env = env.to_h
|
89
|
+
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s)
|
90
|
+
s.success? ? o : raise(Error, e)
|
91
|
+
end
|
92
|
+
|
93
|
+
# The full command to ssh into the destination host and execute the command.
|
94
|
+
# SSH options include:
|
95
|
+
# -t Force pseudo-terminal allocation (required to allow tmux to run)
|
96
|
+
# -o BatchMode=yes (set mode to be non-interactive)
|
97
|
+
# if ! strict_host_checking
|
98
|
+
# -o UserKnownHostsFile=/dev/null (do not update the user's known hosts file)
|
99
|
+
# -o StrictHostKeyChecking=no (do no check the user's known hosts file)
|
100
|
+
#
|
101
|
+
# @param destination_host [#to_s] the destination host you wish to ssh into
|
102
|
+
# @param cmd [Array<#to_s>] the command to be executed on the destination host
|
103
|
+
def ssh_cmd(destination_host, cmd)
|
104
|
+
if strict_host_checking
|
105
|
+
[
|
106
|
+
'ssh', '-t',
|
107
|
+
'-o', 'BatchMode=yes',
|
108
|
+
"#{username}@#{destination_host}"
|
109
|
+
].concat(cmd)
|
110
|
+
else
|
111
|
+
[
|
112
|
+
'ssh', '-t',
|
113
|
+
'-o', 'BatchMode=yes',
|
114
|
+
'-o', 'UserKnownHostsFile=/dev/null',
|
115
|
+
'-o', 'StrictHostKeyChecking=no',
|
116
|
+
"#{username}@#{destination_host}"
|
117
|
+
].concat(cmd)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def shell
|
122
|
+
ENV['SHELL'] || '/bin/bash'
|
123
|
+
end
|
124
|
+
|
125
|
+
# Wraps a user-provided script into a systemd-run transient service
|
126
|
+
def wrapped_script(script, session_name)
|
127
|
+
content = script.content
|
128
|
+
unless user_script_has_shebang?(script)
|
129
|
+
content = "#!#{shell}\n#{content}"
|
130
|
+
end
|
131
|
+
|
132
|
+
ERB.new(
|
133
|
+
File.read(Pathname.new(__dir__).join('templates/script_wrapper.erb.sh'))
|
134
|
+
).result(binding.tap {|bnd|
|
135
|
+
{
|
136
|
+
'arguments' => script_arguments(script),
|
137
|
+
'cd_to_workdir' => (script.workdir) ? "cd #{script.workdir}" : '',
|
138
|
+
'debug' => debug,
|
139
|
+
'email_on_terminated' => script_email_on_event(script, 'terminated'),
|
140
|
+
'email_on_start' => script_email_on_event(script, 'started'),
|
141
|
+
'environment' => export_env(script),
|
142
|
+
'error_path' => error_path(script),
|
143
|
+
'job_name' => script.job_name.to_s,
|
144
|
+
'output_path' => (script.output_path) ? script.output_path.to_s : '/dev/null',
|
145
|
+
'script_content' => content,
|
146
|
+
'script_timeout' => script_timeout(script),
|
147
|
+
'session_name' => session_name,
|
148
|
+
'ssh_hosts' => ssh_hosts,
|
149
|
+
'workdir' => (script.workdir) ? script.workdir.to_s : '/tmp',
|
150
|
+
}.each{
|
151
|
+
|key, value| bnd.local_variable_set(key, value)
|
152
|
+
}
|
153
|
+
})
|
154
|
+
end
|
155
|
+
|
156
|
+
# Generate the environment export block for this script
|
157
|
+
def export_env(script)
|
158
|
+
environment = script.job_environment
|
159
|
+
(environment ? environment : {}).map{
|
160
|
+
|key, value| "export #{key}=#{Shellwords.escape(value)}"
|
161
|
+
}.sort.join("\n")
|
162
|
+
end
|
163
|
+
|
164
|
+
def script_timeout(script)
|
165
|
+
wall_time = script.wall_time.to_i
|
166
|
+
if wall_time == 0
|
167
|
+
# this is the only way it can be 0
|
168
|
+
# so make it into infinify for systemd to never terminate
|
169
|
+
site_timeout == 0 ? 'infinity' : site_timeout
|
170
|
+
elsif site_timeout != 0
|
171
|
+
[wall_time, site_timeout].min
|
172
|
+
else
|
173
|
+
wall_time
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def script_arguments(script)
|
178
|
+
return '' unless script.args
|
179
|
+
|
180
|
+
Shellwords.join(script.args)
|
181
|
+
end
|
182
|
+
|
183
|
+
def script_email_on_event(script, event)
|
184
|
+
return false unless script.email && script.send("email_on_#{event}")
|
185
|
+
|
186
|
+
ERB.new(
|
187
|
+
File.read(Pathname.new(__dir__).join('templates/email.erb.sh'))
|
188
|
+
).result(binding.tap {|bnd|
|
189
|
+
{
|
190
|
+
'email_recipients' => script.email.map{|addr| Shellwords.escape(addr)}.join(', '),
|
191
|
+
'job_name' => (script.job_name) ? script.job_name : 'LinuxHost_Adapter_Job',
|
192
|
+
'job_status' => event
|
193
|
+
}.each{
|
194
|
+
|key, value| bnd.local_variable_set(key, value)
|
195
|
+
}
|
196
|
+
})
|
197
|
+
end
|
198
|
+
|
199
|
+
def unique_session_name
|
200
|
+
"#{session_name_label}-#{SecureRandom.alphanumeric(10)}"
|
201
|
+
end
|
202
|
+
|
203
|
+
# List all Systemd sessions on destination_host started by this adapter
|
204
|
+
def list_remote_systemd_session(destination_host)
|
205
|
+
cmd = ssh_cmd(destination_host, ['systemctl', '--user', 'show', '-t', 'service', '--state=running', "#{session_name_label}-*"])
|
206
|
+
|
207
|
+
# individual units are separated with an empty line
|
208
|
+
call(*cmd).split("\n\n").map do |oneunit|
|
209
|
+
Hash[oneunit.split("\n").map{ |line| line.split('=',2) }].tap do |session_hash|
|
210
|
+
session_hash[:session_name] = session_hash['Id'].delete_suffix('.service')
|
211
|
+
session_hash[:destination_host] = destination_host
|
212
|
+
session_hash[:id] = "#{session_hash[:session_name]}@#{destination_host}"
|
213
|
+
session_hash[:session_created] = Time.parse(session_hash['ExecMainStartTimestamp'])
|
214
|
+
session_hash[:job_name] = session_hash['Description']
|
215
|
+
end
|
216
|
+
end
|
217
|
+
rescue Error => e
|
218
|
+
interpret_and_raise(e)
|
219
|
+
[]
|
220
|
+
end
|
221
|
+
|
222
|
+
def user_script_has_shebang?(script)
|
223
|
+
return false if script.content.empty?
|
224
|
+
script.content.split("\n").first.start_with?('#!/')
|
225
|
+
end
|
226
|
+
|
227
|
+
def error_path(script)
|
228
|
+
return script.error_path.to_s if script.error_path
|
229
|
+
return script.output_path.to_s if script.output_path
|
230
|
+
|
231
|
+
'/dev/null'
|
232
|
+
end
|
233
|
+
|
234
|
+
# under some conditions tmux returns status code 1 but it's not an actual
|
235
|
+
# error. These are when the session is not found or there are no sessions
|
236
|
+
# at all.
|
237
|
+
def interpret_and_raise(error)
|
238
|
+
if error.message.include?('failed to connect to server') # no sessions in tmux 1.8
|
239
|
+
nil
|
240
|
+
elsif error.message.include?('no server running on') # no sessions in tmux 2.7+ message
|
241
|
+
nil
|
242
|
+
else
|
243
|
+
raise error
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def parse_hostname(output)
|
248
|
+
output.split($/).map do |line|
|
249
|
+
line[/^HOSTNAME:(.*)$/, 1]
|
250
|
+
end.compact.last.to_s
|
251
|
+
end
|
252
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
SSH_HOSTS=(<%= ssh_hosts.join(' ').to_s %>)
|
3
|
+
hostnames=`hostname -A`
|
4
|
+
for host in ${SSH_HOSTS[@]}
|
5
|
+
do
|
6
|
+
if [[ " ${hostnames[@]} " =~ " ${host} " ]]; then
|
7
|
+
hostname=$host
|
8
|
+
fi
|
9
|
+
done
|
10
|
+
|
11
|
+
if [ -z "$hostname" ]; then
|
12
|
+
printf >&2 "ERROR: Can't start job on [${hostnames[@]}] because it does not match any hostname configured \nin ssh_hosts [${SSH_HOSTS[@]}]. The output of 'hostname -A' must match an entry in ssh_hosts \nfrom the cluster configuration."
|
13
|
+
exit 1
|
14
|
+
fi
|
15
|
+
|
16
|
+
echo ""
|
17
|
+
echo "HOSTNAME:$hostname"
|
18
|
+
|
19
|
+
# we need this user to be enabled for lingering or else the newly started
|
20
|
+
# service will end as soon as the ssh session starting has exited
|
21
|
+
loginctl enable-linger
|
22
|
+
|
23
|
+
# Put the script into a temp file on localhost
|
24
|
+
systemd_service_file="<%= workdir %>/systemd_service.sh"
|
25
|
+
systemd_service_file_pre="<%= workdir %>/systemd_pre.sh"
|
26
|
+
systemd_service_file_post="<%= workdir %>/systemd_post.sh"
|
27
|
+
|
28
|
+
cat << 'SYSTEMD_EXEC_PRE' > "$systemd_service_file_pre"
|
29
|
+
#!/bin/bash
|
30
|
+
<%= cd_to_workdir %>
|
31
|
+
<% if email_on_start %>
|
32
|
+
<%= email_on_start %>
|
33
|
+
<% end %>
|
34
|
+
SYSTEMD_EXEC_PRE
|
35
|
+
|
36
|
+
cat << 'SYSTEMD_EXEC_POST' > "$systemd_service_file_post"
|
37
|
+
#!/bin/bash
|
38
|
+
<%= cd_to_workdir %>
|
39
|
+
<% if email_on_terminated %>
|
40
|
+
<%= email_on_terminated %>
|
41
|
+
<% end %>
|
42
|
+
SYSTEMD_EXEC_POST
|
43
|
+
|
44
|
+
# Create an executable for systemd service to run
|
45
|
+
# Escaped HEREDOC means that we do not have to worry about Shell.escape-ing script_content
|
46
|
+
cat << 'SYSTEMD_EXEC' > "$systemd_service_file"
|
47
|
+
<%= script_content %>
|
48
|
+
SYSTEMD_EXEC
|
49
|
+
|
50
|
+
# Run the script inside a transient systemd user service
|
51
|
+
chmod +x "$systemd_service_file_pre" "$systemd_service_file" "$systemd_service_file_post"
|
52
|
+
<%= cd_to_workdir %>
|
53
|
+
systemd-run --user -r --no-block --unit=<%= session_name %> -p RuntimeMaxSec=<%= script_timeout %> \
|
54
|
+
-p ExecStartPre="$systemd_service_file_pre" -p ExecStartPost="$systemd_service_file_post" \
|
55
|
+
-p StandardOutput="file:<%= output_path %>" -p StandardError="file:<%= error_path %>" \
|
56
|
+
-p Description="<%= job_name %>" "$systemd_service_file"
|
@@ -0,0 +1,230 @@
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "ood_core/refinements/array_extensions"
|
3
|
+
require "ood_core/job/adapters/helper"
|
4
|
+
require "set"
|
5
|
+
|
6
|
+
module OodCore
|
7
|
+
module Job
|
8
|
+
class Factory
|
9
|
+
using Refinements::HashExtensions
|
10
|
+
|
11
|
+
# Build the LinuxSystemd adapter from a configuration
|
12
|
+
# @param config [#to_h] the configuration for job adapter
|
13
|
+
# @option config [Object] :debug (false) Use the adapter in a debug mode
|
14
|
+
# @option config [Object] :max_timeout (nil) The longest 'wall_clock' permissible
|
15
|
+
# @option config [Object] :ssh_hosts (nil) The list of permissable hosts, defaults to :submit_host
|
16
|
+
# @option config [Object] :strict_host_checking (true) Set to false to disable strict host checking and updating the known_hosts file
|
17
|
+
# @option config [Object] :submit_host The SSH target to connect to, may be the head of a round-robin
|
18
|
+
def self.build_systemd(config)
|
19
|
+
c = config.to_h.symbolize_keys
|
20
|
+
debug = c.fetch(:debug, false)
|
21
|
+
max_timeout = c.fetch(:max_timeout, nil)
|
22
|
+
ssh_hosts = c.fetch(:ssh_hosts, [c[:submit_host]])
|
23
|
+
strict_host_checking = c.fetch(:strict_host_checking, true)
|
24
|
+
submit_host = c[:submit_host]
|
25
|
+
|
26
|
+
Adapters::LinuxSystemd.new(
|
27
|
+
ssh_hosts: ssh_hosts,
|
28
|
+
launcher: Adapters::LinuxSystemd::Launcher.new(
|
29
|
+
debug: debug,
|
30
|
+
max_timeout: max_timeout,
|
31
|
+
ssh_hosts: ssh_hosts,
|
32
|
+
strict_host_checking: strict_host_checking,
|
33
|
+
submit_host: submit_host,
|
34
|
+
)
|
35
|
+
)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
module Adapters
|
40
|
+
# An adapter object that describes the communication with a remote host
|
41
|
+
# for job management.
|
42
|
+
class LinuxSystemd < Adapter
|
43
|
+
using Refinements::ArrayExtensions
|
44
|
+
|
45
|
+
require "ood_core/job/adapters/systemd/launcher"
|
46
|
+
|
47
|
+
def initialize(ssh_hosts:, launcher:)
|
48
|
+
@launcher = launcher
|
49
|
+
@ssh_hosts = Set.new(ssh_hosts)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Submit a job with the attributes defined in the job template instance
|
53
|
+
# @param script [Script] script object that describes the script and
|
54
|
+
# attributes for the submitted job
|
55
|
+
# @param after [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
56
|
+
# @param afterok [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
57
|
+
# @param afternotok [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
58
|
+
# @param afterany [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
59
|
+
# @raise [JobAdapterError] if something goes wrong submitting a job
|
60
|
+
# @return [String] the job id returned after successfully submitting a
|
61
|
+
# job
|
62
|
+
# @see Adapter#submit
|
63
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
64
|
+
unless (after.empty? && afterok.empty? && afternotok.empty? && afterany.empty?)
|
65
|
+
raise JobAdapterError, 'Scheduling subsequent jobs is not available.'
|
66
|
+
end
|
67
|
+
|
68
|
+
@launcher.start_remote_session(script)
|
69
|
+
rescue Launcher::Error => e
|
70
|
+
raise JobAdapterError, e.message
|
71
|
+
end
|
72
|
+
|
73
|
+
# Retrieve info for all jobs from the resource manager
|
74
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
75
|
+
# @return [Array<Info>] information describing submitted jobs
|
76
|
+
# @see Adapter#info_all
|
77
|
+
def info_all(attrs: nil, host: nil)
|
78
|
+
host_permitted?(host) if host
|
79
|
+
|
80
|
+
@launcher.list_remote_sessions(host: host).map{
|
81
|
+
|ls_output| ls_to_info(ls_output)
|
82
|
+
}
|
83
|
+
rescue Launcher::Error => e
|
84
|
+
raise JobAdapterError, e.message
|
85
|
+
end
|
86
|
+
|
87
|
+
# Retrieve info for all jobs for a given owner or owners from the
|
88
|
+
# resource manager
|
89
|
+
# Note: owner and attrs are present only to complete the interface and are ignored
|
90
|
+
# Note: since this API is used in production no errors or warnings are thrown / issued
|
91
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
92
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
93
|
+
# @return [Array<Info>] information describing submitted jobs
|
94
|
+
def info_where_owner(_, attrs: nil)
|
95
|
+
info_all
|
96
|
+
end
|
97
|
+
|
98
|
+
# Iterate over each job Info object
|
99
|
+
# @param attrs [Array<symbol>] attrs is present only to complete the interface and is ignored
|
100
|
+
# @yield [Info] of each job to block
|
101
|
+
# @return [Enumerator] if no block given
|
102
|
+
def info_all_each(attrs: nil)
|
103
|
+
return to_enum(:info_all_each, attrs: attrs) unless block_given?
|
104
|
+
|
105
|
+
info_all(attrs: attrs).each do |job|
|
106
|
+
yield job
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Iterate over each job Info object
|
111
|
+
# @param owner [#to_s, Array<#to_s>] owner is present only to complete the interface and is ignored
|
112
|
+
# @param attrs [Array<symbol>] attrs is present only to complete the interface and is ignored
|
113
|
+
# @yield [Info] of each job to block
|
114
|
+
# @return [Enumerator] if no block given
|
115
|
+
def info_where_owner_each(owner, attrs: nil)
|
116
|
+
return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
|
117
|
+
|
118
|
+
info_where_owner(owner, attrs: attrs).each do |job|
|
119
|
+
yield job
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Whether the adapter supports job arrays
|
124
|
+
# @return [Boolean] - false
|
125
|
+
def supports_job_arrays?
|
126
|
+
false
|
127
|
+
end
|
128
|
+
|
129
|
+
# Retrieve job info from the SSH host
|
130
|
+
# @param id [#to_s] the id of the job
|
131
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
132
|
+
# @return [Info] information describing submitted job
|
133
|
+
# @see Adapter#info
|
134
|
+
def info(id)
|
135
|
+
_, host = parse_job_id(id)
|
136
|
+
job = info_all(host: host).select{|info| info.id == id}.first
|
137
|
+
(job) ? job : Info.new(id: id, status: :completed)
|
138
|
+
rescue Launcher::Error => e
|
139
|
+
raise JobAdapterError, e.message
|
140
|
+
end
|
141
|
+
|
142
|
+
# Retrieve job status from resource manager
|
143
|
+
# @note Optimized slightly over retrieving complete job information from server
|
144
|
+
# @abstract Subclass is expected to implement {#status}
|
145
|
+
# @raise [NotImplementedError] if subclass did not define {#status}
|
146
|
+
# @param id [#to_s] the id of the job
|
147
|
+
# @return [Status] status of job
|
148
|
+
def status(id)
|
149
|
+
_, host = parse_job_id(id)
|
150
|
+
job = info_all(host: host).select{|info| info.id == id}.first
|
151
|
+
|
152
|
+
Status.new(state: (job) ? :running : :completed)
|
153
|
+
rescue Launcher::Error => e
|
154
|
+
raise JobAdapterError, e.message
|
155
|
+
end
|
156
|
+
|
157
|
+
# Put the submitted job on hold
|
158
|
+
# @abstract Subclass is expected to implement {#hold}
|
159
|
+
# @raise [NotImplementedError] if subclass did not define {#hold}
|
160
|
+
# @param id [#to_s] the id of the job
|
161
|
+
# @return [void]
|
162
|
+
def hold(id)
|
163
|
+
# Consider sending SIGSTOP?
|
164
|
+
raise NotImplementedError, "subclass did not define #hold"
|
165
|
+
end
|
166
|
+
|
167
|
+
# Release the job that is on hold
|
168
|
+
# @abstract Subclass is expected to implement {#release}
|
169
|
+
# @raise [NotImplementedError] if subclass did not define {#release}
|
170
|
+
# @param id [#to_s] the id of the job
|
171
|
+
# @return [void]
|
172
|
+
def release(id)
|
173
|
+
# Consider sending SIGCONT
|
174
|
+
raise NotImplementedError, "subclass did not define #release"
|
175
|
+
end
|
176
|
+
|
177
|
+
# Delete the submitted job
|
178
|
+
# @abstract Subclass is expected to implement {#delete}
|
179
|
+
# @raise [NotImplementedError] if subclass did not define {#delete}
|
180
|
+
# @param id [#to_s] the id of the job
|
181
|
+
# @return [void]
|
182
|
+
def delete(id)
|
183
|
+
session_name, destination_host = parse_job_id(id)
|
184
|
+
@launcher.stop_remote_session(session_name, destination_host)
|
185
|
+
rescue Launcher::Error => e
|
186
|
+
raise JobAdapterError, e.message
|
187
|
+
end
|
188
|
+
|
189
|
+
def directive_prefix
|
190
|
+
nil
|
191
|
+
end
|
192
|
+
|
193
|
+
private
|
194
|
+
|
195
|
+
def host_permitted?(destination_host)
|
196
|
+
raise JobAdapterError, "Requested destination host (#{destination_host}) not permitted" unless @ssh_hosts.include?(destination_host)
|
197
|
+
end
|
198
|
+
|
199
|
+
def parse_job_id(id)
|
200
|
+
raise JobAdapterError, "#{id} is not a valid LinuxSystemd adapter id because it is missing the '@'." unless id.include?('@')
|
201
|
+
|
202
|
+
return id.split('@')
|
203
|
+
end
|
204
|
+
|
205
|
+
# Convert the returned Hash into an Info object
|
206
|
+
def ls_to_info(ls_output)
|
207
|
+
started = ls_output[:session_created].to_i
|
208
|
+
now = Time.now.to_i
|
209
|
+
ellapsed = now - started
|
210
|
+
Info.new(
|
211
|
+
accounting_id: nil,
|
212
|
+
allocated_nodes: [NodeInfo.new(name: ls_output[:destination_host], procs: 1)],
|
213
|
+
cpu_time: ellapsed,
|
214
|
+
dispatch_time: started,
|
215
|
+
id: ls_output[:id],
|
216
|
+
job_name: ls_output[:job_name],
|
217
|
+
job_owner: Etc.getlogin,
|
218
|
+
native: ls_output,
|
219
|
+
procs: 1,
|
220
|
+
queue_name: "LinuxSystemd adapter for #{@submit_host}",
|
221
|
+
status: :running,
|
222
|
+
submission_time: ellapsed,
|
223
|
+
submit_host: @submit_host,
|
224
|
+
wallclock_time: ellapsed
|
225
|
+
)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
data/lib/ood_core/version.rb
CHANGED
data/ood_core.gemspec
CHANGED
@@ -29,5 +29,5 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.add_development_dependency "rspec", "~> 3.0"
|
30
30
|
spec.add_development_dependency "pry", "~> 0.10"
|
31
31
|
spec.add_development_dependency "timecop", "~> 0.8"
|
32
|
-
spec.add_development_dependency "climate_control", "~> 0.
|
32
|
+
spec.add_development_dependency "climate_control", "~> 1.0.0"
|
33
33
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.19.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2022-03-02 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -122,14 +122,14 @@ dependencies:
|
|
122
122
|
requirements:
|
123
123
|
- - "~>"
|
124
124
|
- !ruby/object:Gem::Version
|
125
|
-
version: 0.
|
125
|
+
version: 1.0.0
|
126
126
|
type: :development
|
127
127
|
prerelease: false
|
128
128
|
version_requirements: !ruby/object:Gem::Requirement
|
129
129
|
requirements:
|
130
130
|
- - "~>"
|
131
131
|
- !ruby/object:Gem::Version
|
132
|
-
version: 0.
|
132
|
+
version: 1.0.0
|
133
133
|
description: Open OnDemand core library that provides support for an HPC Center to
|
134
134
|
globally define HPC services that web applications can then take advantage of.
|
135
135
|
email:
|
@@ -186,6 +186,10 @@ files:
|
|
186
186
|
- lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb
|
187
187
|
- lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb
|
188
188
|
- lib/ood_core/job/adapters/slurm.rb
|
189
|
+
- lib/ood_core/job/adapters/systemd.rb
|
190
|
+
- lib/ood_core/job/adapters/systemd/launcher.rb
|
191
|
+
- lib/ood_core/job/adapters/systemd/templates/email.erb.sh
|
192
|
+
- lib/ood_core/job/adapters/systemd/templates/script_wrapper.erb.sh
|
189
193
|
- lib/ood_core/job/adapters/torque.rb
|
190
194
|
- lib/ood_core/job/adapters/torque/attributes.rb
|
191
195
|
- lib/ood_core/job/adapters/torque/batch.rb
|