ood_core 0.9.3 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -2
- data/lib/ood_core/clusters.rb +6 -8
- data/lib/ood_core/job/adapters/linux_host.rb +241 -0
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +262 -0
- data/lib/ood_core/job/adapters/linux_host/templates/email.erb.sh +9 -0
- data/lib/ood_core/job/adapters/linux_host/templates/script_wrapper.erb.sh +64 -0
- data/lib/ood_core/version.rb +1 -1
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c668d456d8773bfa4a3af714f73a57d2f9396ee8
|
4
|
+
data.tar.gz: 14cdd12014850a9464505d7e34e5e4397826c6d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ebb2ea211dc272f03884faf0c2245e2f2b945851023ef5f5f26c1862514a0626a57c35c3be636d9287d1581d56dd18a3e6ff014e41837d63b7d05559605acd79
|
7
|
+
data.tar.gz: beb0f071c17c7632e9aae32ee243e3fd47480ff385d1d82cfe42e48652a6bbaaee3db7c224c7c2b4c8f91b11b11b8e5d9170b231f846e60ff9e6e1d2f03a0260
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
|
+
## [0.10.0] - 2019-11-05
|
10
|
+
### Added
|
11
|
+
- Added an adapter for submitting work on Linux hosted systems without using a scheduler
|
12
|
+
|
13
|
+
### Fixed
|
14
|
+
- Fixed bug where an unreadable cluster config would cause crashes
|
15
|
+
|
9
16
|
## [0.9.3] - 2019-05-08
|
10
17
|
### Fixed
|
11
18
|
- Fixed bug relating to cluster comparison
|
@@ -27,7 +34,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
27
34
|
- Grid Engine adapter now starts scripts in the current directory like all other adapters
|
28
35
|
- Fixed issue where Slurm comment field might break job info parsing
|
29
36
|
- Fixed possible crash when comparing two clusters if the id of one of the clusters is nil
|
30
|
-
- Fixed bug with the live system test that impacted non-
|
37
|
+
- Fixed bug with the live system test that impacted non-LSF systems
|
31
38
|
- Fixed bug with Slurm adapter when submit time is not available
|
32
39
|
|
33
40
|
## [0.8.0] - 2019-01-29
|
@@ -189,7 +196,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
189
196
|
### Added
|
190
197
|
- Initial release!
|
191
198
|
|
192
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
199
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.10.0...HEAD
|
200
|
+
[0.10.0]: https://github.com/OSC/ood_core/compare/v0.9.3...v0.10.0
|
193
201
|
[0.9.3]: https://github.com/OSC/ood_core/compare/v0.9.2...v0.9.3
|
194
202
|
[0.9.2]: https://github.com/OSC/ood_core/compare/v0.9.1...v0.9.2
|
195
203
|
[0.9.1]: https://github.com/OSC/ood_core/compare/v0.9.0...v0.9.1
|
data/lib/ood_core/clusters.rb
CHANGED
@@ -19,20 +19,18 @@ module OodCore
|
|
19
19
|
|
20
20
|
clusters = []
|
21
21
|
if config.file?
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
if config.readable?
|
23
|
+
CONFIG_VERSION.any? do |version|
|
24
|
+
YAML.safe_load(config.read).fetch(version, {}).each do |k, v|
|
25
|
+
clusters << Cluster.new(send("parse_#{version}", id: k, cluster: v))
|
26
|
+
end
|
25
27
|
end
|
26
|
-
!clusters.empty?
|
27
28
|
end
|
28
29
|
elsif config.directory?
|
29
|
-
Pathname.glob(config.join("*.yml")).each do |p|
|
30
|
+
Pathname.glob(config.join("*.yml")).select(&:file?).select(&:readable?).each do |p|
|
30
31
|
CONFIG_VERSION.any? do |version|
|
31
32
|
if cluster = YAML.safe_load(p.read).fetch(version, nil)
|
32
33
|
clusters << Cluster.new(send("parse_#{version}", id: p.basename(".yml").to_s, cluster: cluster))
|
33
|
-
true
|
34
|
-
else
|
35
|
-
false
|
36
34
|
end
|
37
35
|
end
|
38
36
|
end
|
@@ -0,0 +1,241 @@
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "ood_core/refinements/array_extensions"
|
3
|
+
require "ood_core/job/adapters/helper"
|
4
|
+
require "set"
|
5
|
+
|
6
|
+
module OodCore
|
7
|
+
module Job
|
8
|
+
class Factory
|
9
|
+
using Refinements::HashExtensions
|
10
|
+
|
11
|
+
# Build the LinuxHost adapter from a configuration
|
12
|
+
# @param config [#to_h] the configuration for job adapter
|
13
|
+
# @option config [Object] :contain (false) Pass `--contain` flag to Singularity; allows overriding bind mounts in singularity.conf
|
14
|
+
# @option config [Object] :debug (false) Use the adapter in a debug mode
|
15
|
+
# @option config [Object] :max_timeout (nil) The longest 'wall_clock' permissible
|
16
|
+
# @option config [Object] :singularity_bin ('/usr/bin/singularity') The path to the Singularity executable
|
17
|
+
# @option config [Object] :singularity_bindpath ('/etc,/media,/mnt,/opt,/srv,/usr,/var,/users') A comma delimited list of paths to bind between the host and the guest
|
18
|
+
# @option config [Object] :singularity_image The path to the Singularity image to use
|
19
|
+
# @option config [Object] :ssh_hosts (nil) The list of permissable hosts, defaults to :submit_host
|
20
|
+
# @option config [Object] :strict_host_checking (true) Set to false to disable strict host checking and updating the known_hosts file
|
21
|
+
# @option config [Object] :submit_host The SSH target to connect to, may be the head of a round-robin
|
22
|
+
# @option config [Object] :tmux_bin ('/usr/bin/tmux') The path to the Tmux executable
|
23
|
+
def self.build_linux_host(config)
|
24
|
+
c = config.to_h.symbolize_keys
|
25
|
+
contain = c.fetch(:contain, false)
|
26
|
+
debug = c.fetch(:debug, false)
|
27
|
+
max_timeout = c.fetch(:max_timeout, nil)
|
28
|
+
singularity_bin = c.fetch(:singularity_bin, '/usr/bin/singularity')
|
29
|
+
singularity_bindpath = c.fetch(:singularity_bindpath, '/etc,/media,/mnt,/opt,/srv,/usr,/var,/users')
|
30
|
+
singularity_image = c[:singularity_image]
|
31
|
+
ssh_hosts = c.fetch(:ssh_hosts, [c[:submit_host]])
|
32
|
+
strict_host_checking = c.fetch(:strict_host_checking, true)
|
33
|
+
submit_host = c[:submit_host]
|
34
|
+
tmux_bin = c.fetch(:tmux_bin, '/usr/bin/tmux')
|
35
|
+
|
36
|
+
Adapters::LinuxHost.new(
|
37
|
+
ssh_hosts: ssh_hosts,
|
38
|
+
launcher: Adapters::LinuxHost::Launcher.new(
|
39
|
+
contain: contain,
|
40
|
+
debug: debug,
|
41
|
+
max_timeout: max_timeout,
|
42
|
+
singularity_bin: singularity_bin,
|
43
|
+
singularity_bindpath: singularity_bindpath, # '/etc,/media,/mnt,/opt,/srv,/usr,/var,/users',
|
44
|
+
singularity_image: singularity_image,
|
45
|
+
ssh_hosts: ssh_hosts,
|
46
|
+
strict_host_checking: strict_host_checking,
|
47
|
+
submit_host: submit_host,
|
48
|
+
tmux_bin: tmux_bin,
|
49
|
+
)
|
50
|
+
)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
module Adapters
|
55
|
+
# An adapter object that describes the communication with a remote host
|
56
|
+
# for job management.
|
57
|
+
class LinuxHost < Adapter
|
58
|
+
using Refinements::ArrayExtensions
|
59
|
+
|
60
|
+
require "ood_core/job/adapters/linux_host/launcher"
|
61
|
+
|
62
|
+
def initialize(ssh_hosts:, launcher:)
|
63
|
+
@launcher = launcher
|
64
|
+
@ssh_hosts = Set.new(ssh_hosts)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Submit a job with the attributes defined in the job template instance
|
68
|
+
# @param script [Script] script object that describes the script and
|
69
|
+
# attributes for the submitted job
|
70
|
+
# @param after [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
71
|
+
# @param afterok [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
72
|
+
# @param afternotok [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
73
|
+
# @param afterany [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
74
|
+
# @raise [JobAdapterError] if something goes wrong submitting a job
|
75
|
+
# @return [String] the job id returned after successfully submitting a
|
76
|
+
# job
|
77
|
+
# @see Adapter#submit
|
78
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
79
|
+
unless (after.empty? && afterok.empty? && afternotok.empty? && afterany.empty?)
|
80
|
+
raise JobAdapterError, 'Scheduling subsequent jobs is not available.'
|
81
|
+
end
|
82
|
+
|
83
|
+
@launcher.start_remote_session(script)
|
84
|
+
rescue Launcher::Error => e
|
85
|
+
raise JobAdapterError, e.message
|
86
|
+
end
|
87
|
+
|
88
|
+
# Retrieve info for all jobs from the resource manager
|
89
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
90
|
+
# @return [Array<Info>] information describing submitted jobs
|
91
|
+
# @see Adapter#info_all
|
92
|
+
def info_all(attrs: nil, host: nil)
|
93
|
+
host_permitted?(host) if host
|
94
|
+
|
95
|
+
@launcher.list_remote_sessions(host: host).map{
|
96
|
+
|ls_output| ls_to_info(ls_output)
|
97
|
+
}
|
98
|
+
rescue Launcher::Error => e
|
99
|
+
raise JobAdapterError, e.message
|
100
|
+
end
|
101
|
+
|
102
|
+
# Retrieve info for all jobs for a given owner or owners from the
|
103
|
+
# resource manager
|
104
|
+
# Note: owner and attrs are present only to complete the interface and are ignored
|
105
|
+
# Note: since this API is used in production no errors or warnings are thrown / issued
|
106
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
107
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
108
|
+
# @return [Array<Info>] information describing submitted jobs
|
109
|
+
def info_where_owner(owner: nil, attrs: nil)
|
110
|
+
info_all
|
111
|
+
end
|
112
|
+
|
113
|
+
# Iterate over each job Info object
|
114
|
+
# @param attrs [Array<symbol>] attrs is present only to complete the interface and is ignored
|
115
|
+
# @yield [Info] of each job to block
|
116
|
+
# @return [Enumerator] if no block given
|
117
|
+
def info_all_each(attrs: nil)
|
118
|
+
return to_enum(:info_all_each, attrs: attrs) unless block_given?
|
119
|
+
|
120
|
+
info_all(attrs: attrs).each do |job|
|
121
|
+
yield job
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
# Iterate over each job Info object
|
126
|
+
# @param owner [#to_s, Array<#to_s>] owner is present only to complete the interface and is ignored
|
127
|
+
# @param attrs [Array<symbol>] attrs is present only to complete the interface and is ignored
|
128
|
+
# @yield [Info] of each job to block
|
129
|
+
# @return [Enumerator] if no block given
|
130
|
+
def info_where_owner_each(owner, attrs: nil)
|
131
|
+
return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
|
132
|
+
|
133
|
+
info_where_owner(owner, attrs: attrs).each do |job|
|
134
|
+
yield job
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# Whether the adapter supports job arrays
|
139
|
+
# @return [Boolean] - false
|
140
|
+
def supports_job_arrays?
|
141
|
+
false
|
142
|
+
end
|
143
|
+
|
144
|
+
# Retrieve job info from the SSH host
|
145
|
+
# @param id [#to_s] the id of the job
|
146
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
147
|
+
# @return [Info] information describing submitted job
|
148
|
+
# @see Adapter#info
|
149
|
+
def info(id)
|
150
|
+
_, host = parse_job_id(id)
|
151
|
+
job = info_all(host: host).select{|info| info.id == id}.first
|
152
|
+
(job) ? job : Info.new(id: id, status: :completed)
|
153
|
+
rescue Launcher::Error => e
|
154
|
+
raise JobAdapterError, e.message
|
155
|
+
end
|
156
|
+
|
157
|
+
# Retrieve job status from resource manager
|
158
|
+
# @note Optimized slightly over retrieving complete job information from server
|
159
|
+
# @abstract Subclass is expected to implement {#status}
|
160
|
+
# @raise [NotImplementedError] if subclass did not define {#status}
|
161
|
+
# @param id [#to_s] the id of the job
|
162
|
+
# @return [Status] status of job
|
163
|
+
def status(id)
|
164
|
+
_, host = parse_job_id(id)
|
165
|
+
job = info_all(host: host).select{|info| info.id == id}.first
|
166
|
+
|
167
|
+
Status.new(state: (job) ? :running : :completed)
|
168
|
+
rescue Launcher::Error => e
|
169
|
+
raise JobAdapterError, e.message
|
170
|
+
end
|
171
|
+
|
172
|
+
# Put the submitted job on hold
|
173
|
+
# @abstract Subclass is expected to implement {#hold}
|
174
|
+
# @raise [NotImplementedError] if subclass did not define {#hold}
|
175
|
+
# @param id [#to_s] the id of the job
|
176
|
+
# @return [void]
|
177
|
+
def hold(id)
|
178
|
+
# Consider sending SIGSTOP?
|
179
|
+
raise NotImplementedError, "subclass did not define #hold"
|
180
|
+
end
|
181
|
+
|
182
|
+
# Release the job that is on hold
|
183
|
+
# @abstract Subclass is expected to implement {#release}
|
184
|
+
# @raise [NotImplementedError] if subclass did not define {#release}
|
185
|
+
# @param id [#to_s] the id of the job
|
186
|
+
# @return [void]
|
187
|
+
def release(id)
|
188
|
+
# Consider sending SIGCONT
|
189
|
+
raise NotImplementedError, "subclass did not define #release"
|
190
|
+
end
|
191
|
+
|
192
|
+
# Delete the submitted job
|
193
|
+
# @abstract Subclass is expected to implement {#delete}
|
194
|
+
# @raise [NotImplementedError] if subclass did not define {#delete}
|
195
|
+
# @param id [#to_s] the id of the job
|
196
|
+
# @return [void]
|
197
|
+
def delete(id)
|
198
|
+
session_name, destination_host = parse_job_id(id)
|
199
|
+
@launcher.stop_remote_session(session_name, destination_host)
|
200
|
+
rescue Launcher::Error => e
|
201
|
+
raise JobAdapterError, e.message
|
202
|
+
end
|
203
|
+
|
204
|
+
private
|
205
|
+
|
206
|
+
def host_permitted?(destination_host)
|
207
|
+
raise JobAdapterError, "Requested destination host (#{destination_host}) not permitted" unless @ssh_hosts.include?(destination_host)
|
208
|
+
end
|
209
|
+
|
210
|
+
def parse_job_id(id)
|
211
|
+
raise JobAdapterError, "#{id} is not a valid LinuxHost adapter id because it is missing the '@'." unless id.include?('@')
|
212
|
+
|
213
|
+
return id.split('@')
|
214
|
+
end
|
215
|
+
|
216
|
+
# Convert the returned Hash into an Info object
|
217
|
+
def ls_to_info(ls_output)
|
218
|
+
started = ls_output[:session_created].to_i
|
219
|
+
now = Time.now.to_i
|
220
|
+
ellapsed = now - started
|
221
|
+
Info.new(
|
222
|
+
accounting_id: nil,
|
223
|
+
allocated_nodes: [NodeInfo.new(name: ls_output[:destination_host], procs: 1)],
|
224
|
+
cpu_time: ellapsed,
|
225
|
+
dispatch_time: started,
|
226
|
+
id: ls_output[:id],
|
227
|
+
job_name: nil, # TODO
|
228
|
+
job_owner: Etc.getlogin,
|
229
|
+
native: ls_output,
|
230
|
+
procs: 1,
|
231
|
+
queue_name: "LinuxHost adapter for #{@submit_host}",
|
232
|
+
status: :running,
|
233
|
+
submission_time: ellapsed,
|
234
|
+
submit_host: @submit_host,
|
235
|
+
wallclock_time: ellapsed
|
236
|
+
)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
@@ -0,0 +1,262 @@
|
|
1
|
+
require 'erb'
|
2
|
+
require 'etc'
|
3
|
+
require 'pathname'
|
4
|
+
require 'securerandom'
|
5
|
+
require 'shellwords'
|
6
|
+
require 'time'
|
7
|
+
|
8
|
+
# Object used for simplified communication SSH hosts
|
9
|
+
#
|
10
|
+
# @api private
|
11
|
+
class OodCore::Job::Adapters::LinuxHost::Launcher
|
12
|
+
attr_reader :contain, :debug, :site_timeout, :session_name_label, :singularity_bin,
|
13
|
+
:site_singularity_bindpath, :default_singularity_image, :ssh_hosts,
|
14
|
+
:strict_host_checking, :submit_host, :tmux_bin, :username
|
15
|
+
# The root exception class that all LinuxHost adapter-specific exceptions inherit
|
16
|
+
# from
|
17
|
+
class Error < StandardError; end
|
18
|
+
|
19
|
+
UNIT_SEPARATOR = "\x1F"
|
20
|
+
|
21
|
+
# @param debug Whether the adapter should be used in debug mode
|
22
|
+
# @param site_timeout [#to_i] A period after which the job should be killed or nil
|
23
|
+
# @param singularity_bin Path to the Singularity executable
|
24
|
+
# @param singularity_bindpath A comma delimited string of host paths to bindmount into the guest; sets SINGULARITY_BINDPATH environment variable
|
25
|
+
# @param singularity_image [#to_s] Path to the Singularity image
|
26
|
+
# @param ssh_hosts List of hosts to check when scanning for running jobs
|
27
|
+
# @param strict_host_checking Allow SSH to perform strict host checking
|
28
|
+
# @param submit_host The SSH-able host
|
29
|
+
# @param tmux_bin [#to_s] Path to the tmux executable
|
30
|
+
def initialize(
|
31
|
+
contain: false,
|
32
|
+
debug: false,
|
33
|
+
site_timeout: nil,
|
34
|
+
singularity_bin:,
|
35
|
+
singularity_bindpath: '/etc,/media,/mnt,/opt,/run,/srv,/usr,/var,/users',
|
36
|
+
singularity_image:,
|
37
|
+
ssh_hosts:,
|
38
|
+
strict_host_checking: false,
|
39
|
+
submit_host:,
|
40
|
+
tmux_bin:,
|
41
|
+
**_
|
42
|
+
)
|
43
|
+
@contain = !! contain
|
44
|
+
@debug = !! debug
|
45
|
+
@site_timeout = site_timeout.to_i
|
46
|
+
@session_name_label = 'launched-by-ondemand'
|
47
|
+
@singularity_bin = Pathname.new(singularity_bin)
|
48
|
+
@site_singularity_bindpath = singularity_bindpath.to_s
|
49
|
+
@default_singularity_image = Pathname.new(singularity_image)
|
50
|
+
@ssh_hosts = ssh_hosts
|
51
|
+
@strict_host_checking = strict_host_checking
|
52
|
+
@submit_host = submit_host
|
53
|
+
@tmux_bin = tmux_bin
|
54
|
+
@username = Etc.getlogin
|
55
|
+
end
|
56
|
+
|
57
|
+
# @param hostname [#to_s] The hostname to submit the work to
|
58
|
+
# @param script [OodCore::Job::Script] The script object defining the work
|
59
|
+
def start_remote_session(script)
|
60
|
+
cmd = ssh_cmd(submit_host)
|
61
|
+
|
62
|
+
session_name = unique_session_name
|
63
|
+
output = call(*cmd, stdin: wrapped_script(script, session_name))
|
64
|
+
hostname = output.strip
|
65
|
+
|
66
|
+
"#{session_name}@#{hostname}"
|
67
|
+
end
|
68
|
+
|
69
|
+
def stop_remote_session(session_name, hostname)
|
70
|
+
cmd = ssh_cmd(hostname)
|
71
|
+
|
72
|
+
kill_cmd = <<~SCRIPT
|
73
|
+
# Get the tmux pane PID for the target session
|
74
|
+
pane_pid=$(tmux list-panes -aF '\#{session_name} \#{pane_pid}' | grep '#{session_name}' | cut -f 2 -d ' ')
|
75
|
+
# Find the Singularity sinit PID child of the pane process
|
76
|
+
pane_sinit_pid=$(pstree -p "$pane_pid" | grep -o 'sinit([[:digit:]]*' | grep -o '[[:digit:]]*')
|
77
|
+
# Kill sinit which stops both Singularity-based processes and the tmux session
|
78
|
+
kill "$pane_sinit_pid"
|
79
|
+
SCRIPT
|
80
|
+
|
81
|
+
call(*cmd, stdin: kill_cmd)
|
82
|
+
rescue Error => e
|
83
|
+
raise e unless (
|
84
|
+
# The tmux server not running is not an error
|
85
|
+
e.message.include?('failed to connect to server') ||
|
86
|
+
# The session not being found is not an error
|
87
|
+
e.message.include?("session not found: #{session_name_label}")
|
88
|
+
)
|
89
|
+
end
|
90
|
+
|
91
|
+
def list_remote_sessions(host: nil)
|
92
|
+
host_list = (host) ? [host] : ssh_hosts
|
93
|
+
|
94
|
+
host_list.map {
|
95
|
+
|hostname| list_remote_tmux_session(hostname)
|
96
|
+
}.flatten.sort_by {
|
97
|
+
|hsh| hsh[:session_name]
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
# Call a forked Slurm command for a given cluster
|
104
|
+
def call(cmd, *args, env: {}, stdin: "")
|
105
|
+
args = args.map(&:to_s)
|
106
|
+
env = env.to_h
|
107
|
+
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s)
|
108
|
+
s.success? ? o : raise(Error, e)
|
109
|
+
end
|
110
|
+
|
111
|
+
# The SSH invocation to send a command
|
112
|
+
# -t Force pseudo-terminal allocation (required to allow tmux to run)
|
113
|
+
# -o BatchMode=yes (set mode to be non-interactive)
|
114
|
+
# if ! strict_host_checking
|
115
|
+
# -o UserKnownHostsFile=/dev/null (do not update the user's known hosts file)
|
116
|
+
# -o StrictHostKeyChecking=no (do no check the user's known hosts file)
|
117
|
+
def ssh_cmd(destination_host)
|
118
|
+
if strict_host_checking
|
119
|
+
[
|
120
|
+
'ssh', '-t',
|
121
|
+
'-o', 'BatchMode=yes',
|
122
|
+
"#{username}@#{destination_host}"
|
123
|
+
]
|
124
|
+
else
|
125
|
+
[
|
126
|
+
'ssh', '-t',
|
127
|
+
'-o', 'BatchMode=yes',
|
128
|
+
'-o', 'UserKnownHostsFile=/dev/null',
|
129
|
+
'-o', 'StrictHostKeyChecking=no',
|
130
|
+
"#{username}@#{destination_host}"
|
131
|
+
]
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def shell
|
136
|
+
ENV['SHELL'] || '/bin/bash'
|
137
|
+
end
|
138
|
+
|
139
|
+
# Wraps a user-provided script into a Tmux invocation
|
140
|
+
def wrapped_script(script, session_name)
|
141
|
+
content = script.content
|
142
|
+
unless user_script_has_shebang?(script)
|
143
|
+
content = "#!#{shell}\n#{content}"
|
144
|
+
end
|
145
|
+
|
146
|
+
ERB.new(
|
147
|
+
File.read(Pathname.new(__dir__).join('templates/script_wrapper.erb.sh'))
|
148
|
+
).result(binding.tap {|bnd|
|
149
|
+
{
|
150
|
+
'arguments' => script_arguments(script),
|
151
|
+
'cd_to_workdir' => (script.workdir) ? "cd #{script.workdir}" : '',
|
152
|
+
'contain' => (contain) ? '--contain' : '',
|
153
|
+
'debug' => debug,
|
154
|
+
'email_on_terminated' => script_email_on_event(script, 'terminated'),
|
155
|
+
'email_on_start' => script_email_on_event(script, 'started'),
|
156
|
+
'environment' => export_env(script),
|
157
|
+
'error_path' => (script.error_path) ? script.error_path.to_s : '/dev/null',
|
158
|
+
'job_name' => script.job_name.to_s,
|
159
|
+
'output_path' => (script.output_path) ? script.output_path.to_s : '/dev/null',
|
160
|
+
'script_content' => content,
|
161
|
+
'script_timeout' => script_timeout(script),
|
162
|
+
'session_name' => session_name,
|
163
|
+
'singularity_bin' => singularity_bin,
|
164
|
+
'singularity_image' => singularity_image(script.native),
|
165
|
+
'tmux_bin' => tmux_bin,
|
166
|
+
}.each{
|
167
|
+
|key, value| bnd.local_variable_set(key, value)
|
168
|
+
}
|
169
|
+
})
|
170
|
+
end
|
171
|
+
|
172
|
+
# Generate the environment export block for this script
|
173
|
+
def export_env(script)
|
174
|
+
environment = script.job_environment
|
175
|
+
(environment ? environment : {}).tap{
|
176
|
+
|hsh|
|
177
|
+
hsh['SINGULARITY_BINDPATH'] = singularity_bindpath(script.native)
|
178
|
+
}.map{
|
179
|
+
|key, value| "export #{key}=#{Shellwords.escape(value)}"
|
180
|
+
}.sort.join("\n")
|
181
|
+
end
|
182
|
+
|
183
|
+
def singularity_image(native)
|
184
|
+
if native && native[:singularity_container]
|
185
|
+
return native[:singularity_container]
|
186
|
+
end
|
187
|
+
|
188
|
+
default_singularity_image
|
189
|
+
end
|
190
|
+
|
191
|
+
def singularity_bindpath(native)
|
192
|
+
return site_singularity_bindpath unless native && native[:singularity_bindpath]
|
193
|
+
|
194
|
+
native[:singularity_bindpath]
|
195
|
+
end
|
196
|
+
|
197
|
+
def script_timeout(script)
|
198
|
+
wall_time = script.wall_time.to_i
|
199
|
+
return site_timeout if wall_time == 0
|
200
|
+
return [wall_time, site_timeout].min unless site_timeout == 0
|
201
|
+
|
202
|
+
wall_time
|
203
|
+
end
|
204
|
+
|
205
|
+
def script_arguments(script)
|
206
|
+
return '' unless script.args
|
207
|
+
|
208
|
+
Shellwords.join(script.args)
|
209
|
+
end
|
210
|
+
|
211
|
+
def script_email_on_event(script, event)
|
212
|
+
return false unless script.email && script.send("email_on_#{event}")
|
213
|
+
|
214
|
+
ERB.new(
|
215
|
+
File.read(Pathname.new(__dir__).join('templates/email.erb.sh'))
|
216
|
+
).result(binding.tap {|bnd|
|
217
|
+
{
|
218
|
+
'email_recipients' => script.email.map{|addr| Shellwords.escape(addr)}.join(', '),
|
219
|
+
'job_name' => (script.job_name) ? script.job_name : 'LinuxHost_Adapter_Job',
|
220
|
+
'job_status' => event
|
221
|
+
}.each{
|
222
|
+
|key, value| bnd.local_variable_set(key, value)
|
223
|
+
}
|
224
|
+
})
|
225
|
+
end
|
226
|
+
|
227
|
+
def unique_session_name
|
228
|
+
"#{session_name_label}-#{SecureRandom.uuid}"
|
229
|
+
end
|
230
|
+
|
231
|
+
# List all Tmux sessions on destination_host started by this adapter
|
232
|
+
# Additional tmux ls options available: http://man7.org/linux/man-pages/man1/tmux.1.html#FORMATS
|
233
|
+
def list_remote_tmux_session(destination_host)
|
234
|
+
# Note that the tmux variable substitution looks like Ruby string sub,
|
235
|
+
# these must either be single quoted strings or Ruby-string escaped as well
|
236
|
+
format_str = Shellwords.escape(
|
237
|
+
['#{session_name}', '#{session_created}', '#{pane_pid}'].join(UNIT_SEPARATOR)
|
238
|
+
)
|
239
|
+
keys = [:session_name, :session_created, :session_pid]
|
240
|
+
cmd = ssh_cmd(destination_host) + ['tmux', 'list-panes', '-aF', format_str]
|
241
|
+
|
242
|
+
call(*cmd).split(
|
243
|
+
"\n"
|
244
|
+
).map do |line|
|
245
|
+
Hash[keys.zip(line.split(UNIT_SEPARATOR))].tap do |session_hash|
|
246
|
+
session_hash[:destination_host] = destination_host
|
247
|
+
session_hash[:id] = "#{session_hash[:session_name]}@#{destination_host}"
|
248
|
+
end
|
249
|
+
end.select{
|
250
|
+
|session_hash| session_hash[:session_name].start_with?(session_name_label)
|
251
|
+
}
|
252
|
+
rescue Error => e
|
253
|
+
# The tmux server not running is not an error
|
254
|
+
raise e unless e.message.include?('failed to connect to server')
|
255
|
+
[]
|
256
|
+
end
|
257
|
+
|
258
|
+
def user_script_has_shebang?(script)
|
259
|
+
return false if script.content.empty?
|
260
|
+
script.content.split("\n").first.start_with?('#!/')
|
261
|
+
end
|
262
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
hostname
|
3
|
+
|
4
|
+
# Put the script into a temp file on localhost
|
5
|
+
<% if debug %>
|
6
|
+
singularity_tmp_file=$(mktemp -p "$HOME" --suffix '_sing')
|
7
|
+
tmux_tmp_file=$(mktemp -p "$HOME" --suffix "_tmux")
|
8
|
+
<% else %>
|
9
|
+
singularity_tmp_file=$(mktemp)
|
10
|
+
tmux_tmp_file=$(mktemp)
|
11
|
+
<% end %>
|
12
|
+
|
13
|
+
# Create an executable to run in a tmux session
|
14
|
+
# The escaped HEREDOC means that we need to substitute in $singularity_tmp_file ourselves
|
15
|
+
cat << 'TMUX_LAUNCHER' | sed "s#\$singularity_tmp_file#${singularity_tmp_file}#" > "$tmux_tmp_file"
|
16
|
+
#!/bin/bash
|
17
|
+
<% if email_on_terminated %>
|
18
|
+
exit_script() {
|
19
|
+
<%# DO NOT INDENT email_on_terminated may have HEREDOCS %>
|
20
|
+
<%= email_on_terminated %>
|
21
|
+
trap - SIGINT SIGTERM # clear the trap
|
22
|
+
kill -- -$$ # Sends SIGTERM to child/sub processes
|
23
|
+
}
|
24
|
+
trap exit_script SIGINT SIGTERM
|
25
|
+
<% end %>
|
26
|
+
|
27
|
+
<%= cd_to_workdir %>
|
28
|
+
<%= environment %>
|
29
|
+
|
30
|
+
<%= email_on_start %>
|
31
|
+
|
32
|
+
# Redirect stdout and stderr to separate files for all commands run within the curly braces
|
33
|
+
# https://unix.stackexchange.com/a/6431/204548
|
34
|
+
# Swap sterr and stdout after stdout has been redirected
|
35
|
+
# https://unix.stackexchange.com/a/61932/204548
|
36
|
+
OUTPUT_PATH=<%= output_path %>
|
37
|
+
ERROR_PATH=<%= error_path %>
|
38
|
+
({
|
39
|
+
timeout <%= script_timeout %>s <%= singularity_bin %> exec <%= contain %> --pid <%= singularity_image %> /bin/bash --login $singularity_tmp_file <%= arguments %>
|
40
|
+
} | tee "$OUTPUT_PATH") 3>&1 1>&2 2>&3 | tee "$ERROR_PATH"
|
41
|
+
|
42
|
+
<%= email_on_terminated %>
|
43
|
+
|
44
|
+
# Exit the tmux session when we are complete
|
45
|
+
exit 0
|
46
|
+
TMUX_LAUNCHER
|
47
|
+
|
48
|
+
# Create an executable for Singularity to run
|
49
|
+
# Escaped HEREDOC means that we do not have to worry about Shell.escape-ing script_content
|
50
|
+
cat << 'SINGULARITY_LAUNCHER' > "$singularity_tmp_file"
|
51
|
+
<%= script_content %>
|
52
|
+
SINGULARITY_LAUNCHER
|
53
|
+
|
54
|
+
# Run the script inside a tmux session
|
55
|
+
chmod +x "$singularity_tmp_file"
|
56
|
+
chmod +x "$tmux_tmp_file"
|
57
|
+
<%= tmux_bin %> new-session -d -s "<%= session_name %>" "$tmux_tmp_file"
|
58
|
+
|
59
|
+
# Remove the file
|
60
|
+
<% if ! debug %>
|
61
|
+
# Wait 1 second to ensure that tmux session has started before the file is removed
|
62
|
+
sleep 1
|
63
|
+
rm -f "$tmux_tmp_file"; rm -f "$singularity_tmp_file"
|
64
|
+
<% end %>
|
data/lib/ood_core/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2019-05
|
13
|
+
date: 2019-11-05 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -150,6 +150,10 @@ files:
|
|
150
150
|
- lib/ood_core/job/adapter.rb
|
151
151
|
- lib/ood_core/job/adapters/drmaa.rb
|
152
152
|
- lib/ood_core/job/adapters/helper.rb
|
153
|
+
- lib/ood_core/job/adapters/linux_host.rb
|
154
|
+
- lib/ood_core/job/adapters/linux_host/launcher.rb
|
155
|
+
- lib/ood_core/job/adapters/linux_host/templates/email.erb.sh
|
156
|
+
- lib/ood_core/job/adapters/linux_host/templates/script_wrapper.erb.sh
|
153
157
|
- lib/ood_core/job/adapters/lsf.rb
|
154
158
|
- lib/ood_core/job/adapters/lsf/batch.rb
|
155
159
|
- lib/ood_core/job/adapters/lsf/helper.rb
|
@@ -197,7 +201,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
197
201
|
version: '0'
|
198
202
|
requirements: []
|
199
203
|
rubyforge_project:
|
200
|
-
rubygems_version: 2.6.
|
204
|
+
rubygems_version: 2.6.11
|
201
205
|
signing_key:
|
202
206
|
specification_version: 4
|
203
207
|
summary: Open OnDemand core library
|