ood_core 0.18.1 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -1
- data/LICENSE.txt +1 -1
- data/lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb +6 -2
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +1 -1
- data/lib/ood_core/job/adapters/systemd/launcher.rb +252 -0
- data/lib/ood_core/job/adapters/systemd/templates/email.erb.sh +9 -0
- data/lib/ood_core/job/adapters/systemd/templates/script_wrapper.erb.sh +56 -0
- data/lib/ood_core/job/adapters/systemd.rb +230 -0
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +1 -1
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6f8d595b211ea1f26d22bac6d441ddc56a1dd4dc46a8439be2d60c4fdb1545b9
|
4
|
+
data.tar.gz: c04312cea53252aa4cac76a1dbccab6d40f974321630f5ac97931145bb404b0d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5e4d7da375953bf188f09f9124380e62a7599c43c2d758bb3fa14a7f33d397b7134cb1b18e1f9844347f2b9e375e0d864e73b6635e563cc2d127ab435eaff1c
|
7
|
+
data.tar.gz: a2f76e1121289d445f3666c6430420d805dede8c49ceb60a79de6b891b9903ded50f0cbca623f2354f63cf3de72d65cfef9b31ae08762b999888368778ca3a37
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [0.19.0] - 02-03-2022
|
11
|
+
|
12
|
+
### Added
|
13
|
+
|
14
|
+
- Systemd adapter in [743](https://github.com/OSC/ood_core/pull/743).
|
15
|
+
|
16
|
+
### Fixed
|
17
|
+
|
18
|
+
- The linux host adapter is a little more portable in [333](https://github.com/OSC/ood_core/pull/333).
|
19
|
+
- Improved pod security for the k8s adapter in [748](https://github.com/OSC/ood_core/pull/748).
|
20
|
+
|
10
21
|
## [0.18.1] - 10-18-2021
|
11
22
|
|
12
23
|
### Fixed
|
@@ -411,7 +422,8 @@ Functionally the same as [0.17.3] but with some CI updates.
|
|
411
422
|
### Added
|
412
423
|
- Initial release!
|
413
424
|
|
414
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
425
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.19.0...HEAD
|
426
|
+
[0.19.0]: https://github.com/OSC/ood_core/compare/v0.18.1...v0.19.0
|
415
427
|
[0.18.1]: https://github.com/OSC/ood_core/compare/v0.18.0...v0.18.1
|
416
428
|
[0.18.0]: https://github.com/OSC/ood_core/compare/v0.17.8...v0.18.0
|
417
429
|
[0.17.6]: https://github.com/OSC/ood_core/compare/v0.17.5...v0.17.6
|
data/LICENSE.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License (MIT)
|
2
2
|
|
3
|
-
Copyright (c) 2017-
|
3
|
+
Copyright (c) 2017-2022 Ohio Supercomputer Center
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
@@ -106,7 +106,9 @@ spec:
|
|
106
106
|
allowPrivilegeEscalation: false
|
107
107
|
capabilities:
|
108
108
|
drop:
|
109
|
-
-
|
109
|
+
- ALL
|
110
|
+
seccompProfile:
|
111
|
+
type: RuntimeDefault
|
110
112
|
privileged: false
|
111
113
|
<%- unless spec.init_containers.empty? -%>
|
112
114
|
initContainers:
|
@@ -152,7 +154,9 @@ spec:
|
|
152
154
|
allowPrivilegeEscalation: false
|
153
155
|
capabilities:
|
154
156
|
drop:
|
155
|
-
-
|
157
|
+
- ALL
|
158
|
+
seccompProfile:
|
159
|
+
type: RuntimeDefault
|
156
160
|
privileged: false
|
157
161
|
<%- end # init container loop -%>
|
158
162
|
<%- end # if init containers -%>
|
@@ -73,7 +73,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
73
73
|
# Get the tmux pane PID for the target session
|
74
74
|
pane_pid=$(tmux list-panes -aF '\#{session_name} \#{pane_pid}' | grep '#{session_name}' | cut -f 2 -d ' ')
|
75
75
|
# Find the Singularity sinit PID child of the pane process
|
76
|
-
pane_sinit_pid=$(pstree -p -l "$pane_pid" |
|
76
|
+
pane_sinit_pid=$(pstree -p -l "$pane_pid" | egrep -o 'sinit[(][[:digit:]]*|shim-init[(][[:digit:]]*' | grep -o '[[:digit:]]*')
|
77
77
|
# Kill sinit which stops both Singularity-based processes and the tmux session
|
78
78
|
kill "$pane_sinit_pid"
|
79
79
|
SCRIPT
|
@@ -0,0 +1,252 @@
|
|
1
|
+
require 'erb'
|
2
|
+
require 'etc'
|
3
|
+
require 'pathname'
|
4
|
+
require 'securerandom'
|
5
|
+
require 'shellwords'
|
6
|
+
require 'time'
|
7
|
+
|
8
|
+
# Object used for simplified communication SSH hosts
|
9
|
+
#
|
10
|
+
# @api private
|
11
|
+
class OodCore::Job::Adapters::LinuxSystemd::Launcher
|
12
|
+
attr_reader :debug, :site_timeout, :session_name_label, :ssh_hosts,
|
13
|
+
:strict_host_checking, :username
|
14
|
+
# The root exception class that all LinuxSystemd adapter-specific exceptions inherit
|
15
|
+
# from
|
16
|
+
class Error < StandardError; end
|
17
|
+
|
18
|
+
# @param debug Whether the adapter should be used in debug mode
|
19
|
+
# @param site_timeout [#to_i] A period after which the job should be killed or nil
|
20
|
+
# @param ssh_hosts List of hosts to check when scanning for running jobs
|
21
|
+
# @param strict_host_checking Allow SSH to perform strict host checking
|
22
|
+
# @param submit_host The SSH-able host
|
23
|
+
def initialize(
|
24
|
+
debug: false,
|
25
|
+
site_timeout: nil,
|
26
|
+
ssh_hosts:,
|
27
|
+
strict_host_checking: false,
|
28
|
+
submit_host:,
|
29
|
+
**_
|
30
|
+
)
|
31
|
+
@debug = !! debug
|
32
|
+
@site_timeout = site_timeout.to_i
|
33
|
+
@session_name_label = 'ondemand'
|
34
|
+
@ssh_hosts = ssh_hosts
|
35
|
+
@strict_host_checking = strict_host_checking
|
36
|
+
@submit_host = submit_host
|
37
|
+
@username = Etc.getlogin
|
38
|
+
end
|
39
|
+
|
40
|
+
# @param hostname [#to_s] The hostname to submit the work to
|
41
|
+
# @param script [OodCore::Job::Script] The script object defining the work
|
42
|
+
def start_remote_session(script)
|
43
|
+
cmd = ssh_cmd(submit_host(script), ['/usr/bin/env', 'bash'])
|
44
|
+
|
45
|
+
session_name = unique_session_name
|
46
|
+
output = call(*cmd, stdin: wrapped_script(script, session_name))
|
47
|
+
hostname = parse_hostname(output)
|
48
|
+
|
49
|
+
"#{session_name}@#{hostname}"
|
50
|
+
end
|
51
|
+
|
52
|
+
def stop_remote_session(session_name, hostname)
|
53
|
+
cmd = ssh_cmd(hostname, ['/usr/bin/env', 'bash'])
|
54
|
+
|
55
|
+
kill_cmd = <<~SCRIPT
|
56
|
+
# stop the session by name
|
57
|
+
systemctl --user stop #{session_name}.service
|
58
|
+
SCRIPT
|
59
|
+
|
60
|
+
call(*cmd, stdin: kill_cmd)
|
61
|
+
rescue Error => e
|
62
|
+
interpret_and_raise(e)
|
63
|
+
end
|
64
|
+
|
65
|
+
def list_remote_sessions(host: nil)
|
66
|
+
host_list = (host) ? [host] : ssh_hosts
|
67
|
+
|
68
|
+
host_list.map {
|
69
|
+
|hostname| list_remote_systemd_session(hostname)
|
70
|
+
}.flatten.sort_by {
|
71
|
+
|hsh| hsh[:session_name]
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
def submit_host(script = nil)
|
76
|
+
if script && script.native && script.native['submit_host_override']
|
77
|
+
script.native['submit_host_override']
|
78
|
+
else
|
79
|
+
@submit_host
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
# Call a forked Slurm command for a given cluster
|
86
|
+
def call(cmd, *args, env: {}, stdin: "")
|
87
|
+
args = args.map(&:to_s)
|
88
|
+
env = env.to_h
|
89
|
+
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s)
|
90
|
+
s.success? ? o : raise(Error, e)
|
91
|
+
end
|
92
|
+
|
93
|
+
# The full command to ssh into the destination host and execute the command.
|
94
|
+
# SSH options include:
|
95
|
+
# -t Force pseudo-terminal allocation (required to allow tmux to run)
|
96
|
+
# -o BatchMode=yes (set mode to be non-interactive)
|
97
|
+
# if ! strict_host_checking
|
98
|
+
# -o UserKnownHostsFile=/dev/null (do not update the user's known hosts file)
|
99
|
+
# -o StrictHostKeyChecking=no (do no check the user's known hosts file)
|
100
|
+
#
|
101
|
+
# @param destination_host [#to_s] the destination host you wish to ssh into
|
102
|
+
# @param cmd [Array<#to_s>] the command to be executed on the destination host
|
103
|
+
def ssh_cmd(destination_host, cmd)
|
104
|
+
if strict_host_checking
|
105
|
+
[
|
106
|
+
'ssh', '-t',
|
107
|
+
'-o', 'BatchMode=yes',
|
108
|
+
"#{username}@#{destination_host}"
|
109
|
+
].concat(cmd)
|
110
|
+
else
|
111
|
+
[
|
112
|
+
'ssh', '-t',
|
113
|
+
'-o', 'BatchMode=yes',
|
114
|
+
'-o', 'UserKnownHostsFile=/dev/null',
|
115
|
+
'-o', 'StrictHostKeyChecking=no',
|
116
|
+
"#{username}@#{destination_host}"
|
117
|
+
].concat(cmd)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def shell
|
122
|
+
ENV['SHELL'] || '/bin/bash'
|
123
|
+
end
|
124
|
+
|
125
|
+
# Wraps a user-provided script into a systemd-run transient service
|
126
|
+
def wrapped_script(script, session_name)
|
127
|
+
content = script.content
|
128
|
+
unless user_script_has_shebang?(script)
|
129
|
+
content = "#!#{shell}\n#{content}"
|
130
|
+
end
|
131
|
+
|
132
|
+
ERB.new(
|
133
|
+
File.read(Pathname.new(__dir__).join('templates/script_wrapper.erb.sh'))
|
134
|
+
).result(binding.tap {|bnd|
|
135
|
+
{
|
136
|
+
'arguments' => script_arguments(script),
|
137
|
+
'cd_to_workdir' => (script.workdir) ? "cd #{script.workdir}" : '',
|
138
|
+
'debug' => debug,
|
139
|
+
'email_on_terminated' => script_email_on_event(script, 'terminated'),
|
140
|
+
'email_on_start' => script_email_on_event(script, 'started'),
|
141
|
+
'environment' => export_env(script),
|
142
|
+
'error_path' => error_path(script),
|
143
|
+
'job_name' => script.job_name.to_s,
|
144
|
+
'output_path' => (script.output_path) ? script.output_path.to_s : '/dev/null',
|
145
|
+
'script_content' => content,
|
146
|
+
'script_timeout' => script_timeout(script),
|
147
|
+
'session_name' => session_name,
|
148
|
+
'ssh_hosts' => ssh_hosts,
|
149
|
+
'workdir' => (script.workdir) ? script.workdir.to_s : '/tmp',
|
150
|
+
}.each{
|
151
|
+
|key, value| bnd.local_variable_set(key, value)
|
152
|
+
}
|
153
|
+
})
|
154
|
+
end
|
155
|
+
|
156
|
+
# Generate the environment export block for this script
|
157
|
+
def export_env(script)
|
158
|
+
environment = script.job_environment
|
159
|
+
(environment ? environment : {}).map{
|
160
|
+
|key, value| "export #{key}=#{Shellwords.escape(value)}"
|
161
|
+
}.sort.join("\n")
|
162
|
+
end
|
163
|
+
|
164
|
+
def script_timeout(script)
|
165
|
+
wall_time = script.wall_time.to_i
|
166
|
+
if wall_time == 0
|
167
|
+
# this is the only way it can be 0
|
168
|
+
# so make it into infinify for systemd to never terminate
|
169
|
+
site_timeout == 0 ? 'infinity' : site_timeout
|
170
|
+
elsif site_timeout != 0
|
171
|
+
[wall_time, site_timeout].min
|
172
|
+
else
|
173
|
+
wall_time
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def script_arguments(script)
|
178
|
+
return '' unless script.args
|
179
|
+
|
180
|
+
Shellwords.join(script.args)
|
181
|
+
end
|
182
|
+
|
183
|
+
def script_email_on_event(script, event)
|
184
|
+
return false unless script.email && script.send("email_on_#{event}")
|
185
|
+
|
186
|
+
ERB.new(
|
187
|
+
File.read(Pathname.new(__dir__).join('templates/email.erb.sh'))
|
188
|
+
).result(binding.tap {|bnd|
|
189
|
+
{
|
190
|
+
'email_recipients' => script.email.map{|addr| Shellwords.escape(addr)}.join(', '),
|
191
|
+
'job_name' => (script.job_name) ? script.job_name : 'LinuxHost_Adapter_Job',
|
192
|
+
'job_status' => event
|
193
|
+
}.each{
|
194
|
+
|key, value| bnd.local_variable_set(key, value)
|
195
|
+
}
|
196
|
+
})
|
197
|
+
end
|
198
|
+
|
199
|
+
def unique_session_name
|
200
|
+
"#{session_name_label}-#{SecureRandom.alphanumeric(10)}"
|
201
|
+
end
|
202
|
+
|
203
|
+
# List all Systemd sessions on destination_host started by this adapter
|
204
|
+
def list_remote_systemd_session(destination_host)
|
205
|
+
cmd = ssh_cmd(destination_host, ['systemctl', '--user', 'show', '-t', 'service', '--state=running', "#{session_name_label}-*"])
|
206
|
+
|
207
|
+
# individual units are separated with an empty line
|
208
|
+
call(*cmd).split("\n\n").map do |oneunit|
|
209
|
+
Hash[oneunit.split("\n").map{ |line| line.split('=',2) }].tap do |session_hash|
|
210
|
+
session_hash[:session_name] = session_hash['Id'].delete_suffix('.service')
|
211
|
+
session_hash[:destination_host] = destination_host
|
212
|
+
session_hash[:id] = "#{session_hash[:session_name]}@#{destination_host}"
|
213
|
+
session_hash[:session_created] = Time.parse(session_hash['ExecMainStartTimestamp'])
|
214
|
+
session_hash[:job_name] = session_hash['Description']
|
215
|
+
end
|
216
|
+
end
|
217
|
+
rescue Error => e
|
218
|
+
interpret_and_raise(e)
|
219
|
+
[]
|
220
|
+
end
|
221
|
+
|
222
|
+
def user_script_has_shebang?(script)
|
223
|
+
return false if script.content.empty?
|
224
|
+
script.content.split("\n").first.start_with?('#!/')
|
225
|
+
end
|
226
|
+
|
227
|
+
def error_path(script)
|
228
|
+
return script.error_path.to_s if script.error_path
|
229
|
+
return script.output_path.to_s if script.output_path
|
230
|
+
|
231
|
+
'/dev/null'
|
232
|
+
end
|
233
|
+
|
234
|
+
# under some conditions tmux returns status code 1 but it's not an actual
|
235
|
+
# error. These are when the session is not found or there are no sessions
|
236
|
+
# at all.
|
237
|
+
def interpret_and_raise(error)
|
238
|
+
if error.message.include?('failed to connect to server') # no sessions in tmux 1.8
|
239
|
+
nil
|
240
|
+
elsif error.message.include?('no server running on') # no sessions in tmux 2.7+ message
|
241
|
+
nil
|
242
|
+
else
|
243
|
+
raise error
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def parse_hostname(output)
|
248
|
+
output.split($/).map do |line|
|
249
|
+
line[/^HOSTNAME:(.*)$/, 1]
|
250
|
+
end.compact.last.to_s
|
251
|
+
end
|
252
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
SSH_HOSTS=(<%= ssh_hosts.join(' ').to_s %>)
|
3
|
+
hostnames=`hostname -A`
|
4
|
+
for host in ${SSH_HOSTS[@]}
|
5
|
+
do
|
6
|
+
if [[ " ${hostnames[@]} " =~ " ${host} " ]]; then
|
7
|
+
hostname=$host
|
8
|
+
fi
|
9
|
+
done
|
10
|
+
|
11
|
+
if [ -z "$hostname" ]; then
|
12
|
+
printf >&2 "ERROR: Can't start job on [${hostnames[@]}] because it does not match any hostname configured \nin ssh_hosts [${SSH_HOSTS[@]}]. The output of 'hostname -A' must match an entry in ssh_hosts \nfrom the cluster configuration."
|
13
|
+
exit 1
|
14
|
+
fi
|
15
|
+
|
16
|
+
echo ""
|
17
|
+
echo "HOSTNAME:$hostname"
|
18
|
+
|
19
|
+
# we need this user to be enabled for lingering or else the newly started
|
20
|
+
# service will end as soon as the ssh session starting has exited
|
21
|
+
loginctl enable-linger
|
22
|
+
|
23
|
+
# Put the script into a temp file on localhost
|
24
|
+
systemd_service_file="<%= workdir %>/systemd_service.sh"
|
25
|
+
systemd_service_file_pre="<%= workdir %>/systemd_pre.sh"
|
26
|
+
systemd_service_file_post="<%= workdir %>/systemd_post.sh"
|
27
|
+
|
28
|
+
cat << 'SYSTEMD_EXEC_PRE' > "$systemd_service_file_pre"
|
29
|
+
#!/bin/bash
|
30
|
+
<%= cd_to_workdir %>
|
31
|
+
<% if email_on_start %>
|
32
|
+
<%= email_on_start %>
|
33
|
+
<% end %>
|
34
|
+
SYSTEMD_EXEC_PRE
|
35
|
+
|
36
|
+
cat << 'SYSTEMD_EXEC_POST' > "$systemd_service_file_post"
|
37
|
+
#!/bin/bash
|
38
|
+
<%= cd_to_workdir %>
|
39
|
+
<% if email_on_terminated %>
|
40
|
+
<%= email_on_terminated %>
|
41
|
+
<% end %>
|
42
|
+
SYSTEMD_EXEC_POST
|
43
|
+
|
44
|
+
# Create an executable for systemd service to run
|
45
|
+
# Escaped HEREDOC means that we do not have to worry about Shell.escape-ing script_content
|
46
|
+
cat << 'SYSTEMD_EXEC' > "$systemd_service_file"
|
47
|
+
<%= script_content %>
|
48
|
+
SYSTEMD_EXEC
|
49
|
+
|
50
|
+
# Run the script inside a transient systemd user service
|
51
|
+
chmod +x "$systemd_service_file_pre" "$systemd_service_file" "$systemd_service_file_post"
|
52
|
+
<%= cd_to_workdir %>
|
53
|
+
systemd-run --user -r --no-block --unit=<%= session_name %> -p RuntimeMaxSec=<%= script_timeout %> \
|
54
|
+
-p ExecStartPre="$systemd_service_file_pre" -p ExecStartPost="$systemd_service_file_post" \
|
55
|
+
-p StandardOutput="file:<%= output_path %>" -p StandardError="file:<%= error_path %>" \
|
56
|
+
-p Description="<%= job_name %>" "$systemd_service_file"
|
@@ -0,0 +1,230 @@
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "ood_core/refinements/array_extensions"
|
3
|
+
require "ood_core/job/adapters/helper"
|
4
|
+
require "set"
|
5
|
+
|
6
|
+
module OodCore
|
7
|
+
module Job
|
8
|
+
class Factory
|
9
|
+
using Refinements::HashExtensions
|
10
|
+
|
11
|
+
# Build the LinuxSystemd adapter from a configuration
|
12
|
+
# @param config [#to_h] the configuration for job adapter
|
13
|
+
# @option config [Object] :debug (false) Use the adapter in a debug mode
|
14
|
+
# @option config [Object] :max_timeout (nil) The longest 'wall_clock' permissible
|
15
|
+
# @option config [Object] :ssh_hosts (nil) The list of permissable hosts, defaults to :submit_host
|
16
|
+
# @option config [Object] :strict_host_checking (true) Set to false to disable strict host checking and updating the known_hosts file
|
17
|
+
# @option config [Object] :submit_host The SSH target to connect to, may be the head of a round-robin
|
18
|
+
def self.build_systemd(config)
|
19
|
+
c = config.to_h.symbolize_keys
|
20
|
+
debug = c.fetch(:debug, false)
|
21
|
+
max_timeout = c.fetch(:max_timeout, nil)
|
22
|
+
ssh_hosts = c.fetch(:ssh_hosts, [c[:submit_host]])
|
23
|
+
strict_host_checking = c.fetch(:strict_host_checking, true)
|
24
|
+
submit_host = c[:submit_host]
|
25
|
+
|
26
|
+
Adapters::LinuxSystemd.new(
|
27
|
+
ssh_hosts: ssh_hosts,
|
28
|
+
launcher: Adapters::LinuxSystemd::Launcher.new(
|
29
|
+
debug: debug,
|
30
|
+
max_timeout: max_timeout,
|
31
|
+
ssh_hosts: ssh_hosts,
|
32
|
+
strict_host_checking: strict_host_checking,
|
33
|
+
submit_host: submit_host,
|
34
|
+
)
|
35
|
+
)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
module Adapters
|
40
|
+
# An adapter object that describes the communication with a remote host
|
41
|
+
# for job management.
|
42
|
+
class LinuxSystemd < Adapter
|
43
|
+
using Refinements::ArrayExtensions
|
44
|
+
|
45
|
+
require "ood_core/job/adapters/systemd/launcher"
|
46
|
+
|
47
|
+
def initialize(ssh_hosts:, launcher:)
|
48
|
+
@launcher = launcher
|
49
|
+
@ssh_hosts = Set.new(ssh_hosts)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Submit a job with the attributes defined in the job template instance
|
53
|
+
# @param script [Script] script object that describes the script and
|
54
|
+
# attributes for the submitted job
|
55
|
+
# @param after [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
56
|
+
# @param afterok [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
57
|
+
# @param afternotok [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
58
|
+
# @param afterany [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
59
|
+
# @raise [JobAdapterError] if something goes wrong submitting a job
|
60
|
+
# @return [String] the job id returned after successfully submitting a
|
61
|
+
# job
|
62
|
+
# @see Adapter#submit
|
63
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
64
|
+
unless (after.empty? && afterok.empty? && afternotok.empty? && afterany.empty?)
|
65
|
+
raise JobAdapterError, 'Scheduling subsequent jobs is not available.'
|
66
|
+
end
|
67
|
+
|
68
|
+
@launcher.start_remote_session(script)
|
69
|
+
rescue Launcher::Error => e
|
70
|
+
raise JobAdapterError, e.message
|
71
|
+
end
|
72
|
+
|
73
|
+
# Retrieve info for all jobs from the resource manager
|
74
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
75
|
+
# @return [Array<Info>] information describing submitted jobs
|
76
|
+
# @see Adapter#info_all
|
77
|
+
def info_all(attrs: nil, host: nil)
|
78
|
+
host_permitted?(host) if host
|
79
|
+
|
80
|
+
@launcher.list_remote_sessions(host: host).map{
|
81
|
+
|ls_output| ls_to_info(ls_output)
|
82
|
+
}
|
83
|
+
rescue Launcher::Error => e
|
84
|
+
raise JobAdapterError, e.message
|
85
|
+
end
|
86
|
+
|
87
|
+
# Retrieve info for all jobs for a given owner or owners from the
|
88
|
+
# resource manager
|
89
|
+
# Note: owner and attrs are present only to complete the interface and are ignored
|
90
|
+
# Note: since this API is used in production no errors or warnings are thrown / issued
|
91
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
92
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
93
|
+
# @return [Array<Info>] information describing submitted jobs
|
94
|
+
def info_where_owner(_, attrs: nil)
|
95
|
+
info_all
|
96
|
+
end
|
97
|
+
|
98
|
+
# Iterate over each job Info object
|
99
|
+
# @param attrs [Array<symbol>] attrs is present only to complete the interface and is ignored
|
100
|
+
# @yield [Info] of each job to block
|
101
|
+
# @return [Enumerator] if no block given
|
102
|
+
def info_all_each(attrs: nil)
|
103
|
+
return to_enum(:info_all_each, attrs: attrs) unless block_given?
|
104
|
+
|
105
|
+
info_all(attrs: attrs).each do |job|
|
106
|
+
yield job
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Iterate over each job Info object
|
111
|
+
# @param owner [#to_s, Array<#to_s>] owner is present only to complete the interface and is ignored
|
112
|
+
# @param attrs [Array<symbol>] attrs is present only to complete the interface and is ignored
|
113
|
+
# @yield [Info] of each job to block
|
114
|
+
# @return [Enumerator] if no block given
|
115
|
+
def info_where_owner_each(owner, attrs: nil)
|
116
|
+
return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
|
117
|
+
|
118
|
+
info_where_owner(owner, attrs: attrs).each do |job|
|
119
|
+
yield job
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Whether the adapter supports job arrays
|
124
|
+
# @return [Boolean] - false
|
125
|
+
def supports_job_arrays?
|
126
|
+
false
|
127
|
+
end
|
128
|
+
|
129
|
+
# Retrieve job info from the SSH host
|
130
|
+
# @param id [#to_s] the id of the job
|
131
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
132
|
+
# @return [Info] information describing submitted job
|
133
|
+
# @see Adapter#info
|
134
|
+
def info(id)
|
135
|
+
_, host = parse_job_id(id)
|
136
|
+
job = info_all(host: host).select{|info| info.id == id}.first
|
137
|
+
(job) ? job : Info.new(id: id, status: :completed)
|
138
|
+
rescue Launcher::Error => e
|
139
|
+
raise JobAdapterError, e.message
|
140
|
+
end
|
141
|
+
|
142
|
+
# Retrieve job status from resource manager
|
143
|
+
# @note Optimized slightly over retrieving complete job information from server
|
144
|
+
# @abstract Subclass is expected to implement {#status}
|
145
|
+
# @raise [NotImplementedError] if subclass did not define {#status}
|
146
|
+
# @param id [#to_s] the id of the job
|
147
|
+
# @return [Status] status of job
|
148
|
+
def status(id)
|
149
|
+
_, host = parse_job_id(id)
|
150
|
+
job = info_all(host: host).select{|info| info.id == id}.first
|
151
|
+
|
152
|
+
Status.new(state: (job) ? :running : :completed)
|
153
|
+
rescue Launcher::Error => e
|
154
|
+
raise JobAdapterError, e.message
|
155
|
+
end
|
156
|
+
|
157
|
+
# Put the submitted job on hold
|
158
|
+
# @abstract Subclass is expected to implement {#hold}
|
159
|
+
# @raise [NotImplementedError] if subclass did not define {#hold}
|
160
|
+
# @param id [#to_s] the id of the job
|
161
|
+
# @return [void]
|
162
|
+
def hold(id)
|
163
|
+
# Consider sending SIGSTOP?
|
164
|
+
raise NotImplementedError, "subclass did not define #hold"
|
165
|
+
end
|
166
|
+
|
167
|
+
# Release the job that is on hold
|
168
|
+
# @abstract Subclass is expected to implement {#release}
|
169
|
+
# @raise [NotImplementedError] if subclass did not define {#release}
|
170
|
+
# @param id [#to_s] the id of the job
|
171
|
+
# @return [void]
|
172
|
+
def release(id)
|
173
|
+
# Consider sending SIGCONT
|
174
|
+
raise NotImplementedError, "subclass did not define #release"
|
175
|
+
end
|
176
|
+
|
177
|
+
# Delete the submitted job
|
178
|
+
# @abstract Subclass is expected to implement {#delete}
|
179
|
+
# @raise [NotImplementedError] if subclass did not define {#delete}
|
180
|
+
# @param id [#to_s] the id of the job
|
181
|
+
# @return [void]
|
182
|
+
def delete(id)
|
183
|
+
session_name, destination_host = parse_job_id(id)
|
184
|
+
@launcher.stop_remote_session(session_name, destination_host)
|
185
|
+
rescue Launcher::Error => e
|
186
|
+
raise JobAdapterError, e.message
|
187
|
+
end
|
188
|
+
|
189
|
+
def directive_prefix
|
190
|
+
nil
|
191
|
+
end
|
192
|
+
|
193
|
+
private
|
194
|
+
|
195
|
+
def host_permitted?(destination_host)
|
196
|
+
raise JobAdapterError, "Requested destination host (#{destination_host}) not permitted" unless @ssh_hosts.include?(destination_host)
|
197
|
+
end
|
198
|
+
|
199
|
+
def parse_job_id(id)
|
200
|
+
raise JobAdapterError, "#{id} is not a valid LinuxSystemd adapter id because it is missing the '@'." unless id.include?('@')
|
201
|
+
|
202
|
+
return id.split('@')
|
203
|
+
end
|
204
|
+
|
205
|
+
# Convert the returned Hash into an Info object
|
206
|
+
def ls_to_info(ls_output)
|
207
|
+
started = ls_output[:session_created].to_i
|
208
|
+
now = Time.now.to_i
|
209
|
+
ellapsed = now - started
|
210
|
+
Info.new(
|
211
|
+
accounting_id: nil,
|
212
|
+
allocated_nodes: [NodeInfo.new(name: ls_output[:destination_host], procs: 1)],
|
213
|
+
cpu_time: ellapsed,
|
214
|
+
dispatch_time: started,
|
215
|
+
id: ls_output[:id],
|
216
|
+
job_name: ls_output[:job_name],
|
217
|
+
job_owner: Etc.getlogin,
|
218
|
+
native: ls_output,
|
219
|
+
procs: 1,
|
220
|
+
queue_name: "LinuxSystemd adapter for #{@submit_host}",
|
221
|
+
status: :running,
|
222
|
+
submission_time: ellapsed,
|
223
|
+
submit_host: @submit_host,
|
224
|
+
wallclock_time: ellapsed
|
225
|
+
)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
data/lib/ood_core/version.rb
CHANGED
data/ood_core.gemspec
CHANGED
@@ -29,5 +29,5 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.add_development_dependency "rspec", "~> 3.0"
|
30
30
|
spec.add_development_dependency "pry", "~> 0.10"
|
31
31
|
spec.add_development_dependency "timecop", "~> 0.8"
|
32
|
-
spec.add_development_dependency "climate_control", "~> 0.
|
32
|
+
spec.add_development_dependency "climate_control", "~> 1.0.0"
|
33
33
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.19.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2022-03-02 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -122,14 +122,14 @@ dependencies:
|
|
122
122
|
requirements:
|
123
123
|
- - "~>"
|
124
124
|
- !ruby/object:Gem::Version
|
125
|
-
version: 0.
|
125
|
+
version: 1.0.0
|
126
126
|
type: :development
|
127
127
|
prerelease: false
|
128
128
|
version_requirements: !ruby/object:Gem::Requirement
|
129
129
|
requirements:
|
130
130
|
- - "~>"
|
131
131
|
- !ruby/object:Gem::Version
|
132
|
-
version: 0.
|
132
|
+
version: 1.0.0
|
133
133
|
description: Open OnDemand core library that provides support for an HPC Center to
|
134
134
|
globally define HPC services that web applications can then take advantage of.
|
135
135
|
email:
|
@@ -186,6 +186,10 @@ files:
|
|
186
186
|
- lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb
|
187
187
|
- lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb
|
188
188
|
- lib/ood_core/job/adapters/slurm.rb
|
189
|
+
- lib/ood_core/job/adapters/systemd.rb
|
190
|
+
- lib/ood_core/job/adapters/systemd/launcher.rb
|
191
|
+
- lib/ood_core/job/adapters/systemd/templates/email.erb.sh
|
192
|
+
- lib/ood_core/job/adapters/systemd/templates/script_wrapper.erb.sh
|
189
193
|
- lib/ood_core/job/adapters/torque.rb
|
190
194
|
- lib/ood_core/job/adapters/torque/attributes.rb
|
191
195
|
- lib/ood_core/job/adapters/torque/batch.rb
|