ood_core 0.9.3 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -2
- data/lib/ood_core/clusters.rb +6 -8
- data/lib/ood_core/job/adapters/linux_host.rb +241 -0
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +262 -0
- data/lib/ood_core/job/adapters/linux_host/templates/email.erb.sh +9 -0
- data/lib/ood_core/job/adapters/linux_host/templates/script_wrapper.erb.sh +64 -0
- data/lib/ood_core/version.rb +1 -1
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c668d456d8773bfa4a3af714f73a57d2f9396ee8
|
4
|
+
data.tar.gz: 14cdd12014850a9464505d7e34e5e4397826c6d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ebb2ea211dc272f03884faf0c2245e2f2b945851023ef5f5f26c1862514a0626a57c35c3be636d9287d1581d56dd18a3e6ff014e41837d63b7d05559605acd79
|
7
|
+
data.tar.gz: beb0f071c17c7632e9aae32ee243e3fd47480ff385d1d82cfe42e48652a6bbaaee3db7c224c7c2b4c8f91b11b11b8e5d9170b231f846e60ff9e6e1d2f03a0260
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
|
+
## [0.10.0] - 2019-11-05
|
10
|
+
### Added
|
11
|
+
- Added an adapter for submitting work on Linux hosted systems without using a scheduler
|
12
|
+
|
13
|
+
### Fixed
|
14
|
+
- Fixed bug where an unreadable cluster config would cause crashes
|
15
|
+
|
9
16
|
## [0.9.3] - 2019-05-08
|
10
17
|
### Fixed
|
11
18
|
- Fixed bug relating to cluster comparison
|
@@ -27,7 +34,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
27
34
|
- Grid Engine adapter now starts scripts in the current directory like all other adapters
|
28
35
|
- Fixed issue where Slurm comment field might break job info parsing
|
29
36
|
- Fixed possible crash when comparing two clusters if the id of one of the clusters is nil
|
30
|
-
- Fixed bug with the live system test that impacted non-
|
37
|
+
- Fixed bug with the live system test that impacted non-LSF systems
|
31
38
|
- Fixed bug with Slurm adapter when submit time is not available
|
32
39
|
|
33
40
|
## [0.8.0] - 2019-01-29
|
@@ -189,7 +196,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
189
196
|
### Added
|
190
197
|
- Initial release!
|
191
198
|
|
192
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
199
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.10.0...HEAD
|
200
|
+
[0.10.0]: https://github.com/OSC/ood_core/compare/v0.9.3...v0.10.0
|
193
201
|
[0.9.3]: https://github.com/OSC/ood_core/compare/v0.9.2...v0.9.3
|
194
202
|
[0.9.2]: https://github.com/OSC/ood_core/compare/v0.9.1...v0.9.2
|
195
203
|
[0.9.1]: https://github.com/OSC/ood_core/compare/v0.9.0...v0.9.1
|
data/lib/ood_core/clusters.rb
CHANGED
@@ -19,20 +19,18 @@ module OodCore
|
|
19
19
|
|
20
20
|
clusters = []
|
21
21
|
if config.file?
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
if config.readable?
|
23
|
+
CONFIG_VERSION.any? do |version|
|
24
|
+
YAML.safe_load(config.read).fetch(version, {}).each do |k, v|
|
25
|
+
clusters << Cluster.new(send("parse_#{version}", id: k, cluster: v))
|
26
|
+
end
|
25
27
|
end
|
26
|
-
!clusters.empty?
|
27
28
|
end
|
28
29
|
elsif config.directory?
|
29
|
-
Pathname.glob(config.join("*.yml")).each do |p|
|
30
|
+
Pathname.glob(config.join("*.yml")).select(&:file?).select(&:readable?).each do |p|
|
30
31
|
CONFIG_VERSION.any? do |version|
|
31
32
|
if cluster = YAML.safe_load(p.read).fetch(version, nil)
|
32
33
|
clusters << Cluster.new(send("parse_#{version}", id: p.basename(".yml").to_s, cluster: cluster))
|
33
|
-
true
|
34
|
-
else
|
35
|
-
false
|
36
34
|
end
|
37
35
|
end
|
38
36
|
end
|
@@ -0,0 +1,241 @@
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "ood_core/refinements/array_extensions"
|
3
|
+
require "ood_core/job/adapters/helper"
|
4
|
+
require "set"
|
5
|
+
|
6
|
+
module OodCore
|
7
|
+
module Job
|
8
|
+
class Factory
|
9
|
+
using Refinements::HashExtensions
|
10
|
+
|
11
|
+
# Build the LinuxHost adapter from a configuration
|
12
|
+
# @param config [#to_h] the configuration for job adapter
|
13
|
+
# @option config [Object] :contain (false) Pass `--contain` flag to Singularity; allows overriding bind mounts in singularity.conf
|
14
|
+
# @option config [Object] :debug (false) Use the adapter in a debug mode
|
15
|
+
# @option config [Object] :max_timeout (nil) The longest 'wall_clock' permissible
|
16
|
+
# @option config [Object] :singularity_bin ('/usr/bin/singularity') The path to the Singularity executable
|
17
|
+
# @option config [Object] :singularity_bindpath ('/etc,/media,/mnt,/opt,/srv,/usr,/var,/users') A comma delimited list of paths to bind between the host and the guest
|
18
|
+
# @option config [Object] :singularity_image The path to the Singularity image to use
|
19
|
+
# @option config [Object] :ssh_hosts (nil) The list of permissable hosts, defaults to :submit_host
|
20
|
+
# @option config [Object] :strict_host_checking (true) Set to false to disable strict host checking and updating the known_hosts file
|
21
|
+
# @option config [Object] :submit_host The SSH target to connect to, may be the head of a round-robin
|
22
|
+
# @option config [Object] :tmux_bin ('/usr/bin/tmux') The path to the Tmux executable
|
23
|
+
def self.build_linux_host(config)
|
24
|
+
c = config.to_h.symbolize_keys
|
25
|
+
contain = c.fetch(:contain, false)
|
26
|
+
debug = c.fetch(:debug, false)
|
27
|
+
max_timeout = c.fetch(:max_timeout, nil)
|
28
|
+
singularity_bin = c.fetch(:singularity_bin, '/usr/bin/singularity')
|
29
|
+
singularity_bindpath = c.fetch(:singularity_bindpath, '/etc,/media,/mnt,/opt,/srv,/usr,/var,/users')
|
30
|
+
singularity_image = c[:singularity_image]
|
31
|
+
ssh_hosts = c.fetch(:ssh_hosts, [c[:submit_host]])
|
32
|
+
strict_host_checking = c.fetch(:strict_host_checking, true)
|
33
|
+
submit_host = c[:submit_host]
|
34
|
+
tmux_bin = c.fetch(:tmux_bin, '/usr/bin/tmux')
|
35
|
+
|
36
|
+
Adapters::LinuxHost.new(
|
37
|
+
ssh_hosts: ssh_hosts,
|
38
|
+
launcher: Adapters::LinuxHost::Launcher.new(
|
39
|
+
contain: contain,
|
40
|
+
debug: debug,
|
41
|
+
max_timeout: max_timeout,
|
42
|
+
singularity_bin: singularity_bin,
|
43
|
+
singularity_bindpath: singularity_bindpath, # '/etc,/media,/mnt,/opt,/srv,/usr,/var,/users',
|
44
|
+
singularity_image: singularity_image,
|
45
|
+
ssh_hosts: ssh_hosts,
|
46
|
+
strict_host_checking: strict_host_checking,
|
47
|
+
submit_host: submit_host,
|
48
|
+
tmux_bin: tmux_bin,
|
49
|
+
)
|
50
|
+
)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
module Adapters
|
55
|
+
# An adapter object that describes the communication with a remote host
|
56
|
+
# for job management.
|
57
|
+
class LinuxHost < Adapter
|
58
|
+
using Refinements::ArrayExtensions
|
59
|
+
|
60
|
+
require "ood_core/job/adapters/linux_host/launcher"
|
61
|
+
|
62
|
+
def initialize(ssh_hosts:, launcher:)
|
63
|
+
@launcher = launcher
|
64
|
+
@ssh_hosts = Set.new(ssh_hosts)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Submit a job with the attributes defined in the job template instance
|
68
|
+
# @param script [Script] script object that describes the script and
|
69
|
+
# attributes for the submitted job
|
70
|
+
# @param after [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
71
|
+
# @param afterok [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
72
|
+
# @param afternotok [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
73
|
+
# @param afterany [#to_s, Array<#to_s>] No scheduling is available is used; setting raises JobAdapterError
|
74
|
+
# @raise [JobAdapterError] if something goes wrong submitting a job
|
75
|
+
# @return [String] the job id returned after successfully submitting a
|
76
|
+
# job
|
77
|
+
# @see Adapter#submit
|
78
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
79
|
+
unless (after.empty? && afterok.empty? && afternotok.empty? && afterany.empty?)
|
80
|
+
raise JobAdapterError, 'Scheduling subsequent jobs is not available.'
|
81
|
+
end
|
82
|
+
|
83
|
+
@launcher.start_remote_session(script)
|
84
|
+
rescue Launcher::Error => e
|
85
|
+
raise JobAdapterError, e.message
|
86
|
+
end
|
87
|
+
|
88
|
+
# Retrieve info for all jobs from the resource manager
|
89
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
90
|
+
# @return [Array<Info>] information describing submitted jobs
|
91
|
+
# @see Adapter#info_all
|
92
|
+
def info_all(attrs: nil, host: nil)
|
93
|
+
host_permitted?(host) if host
|
94
|
+
|
95
|
+
@launcher.list_remote_sessions(host: host).map{
|
96
|
+
|ls_output| ls_to_info(ls_output)
|
97
|
+
}
|
98
|
+
rescue Launcher::Error => e
|
99
|
+
raise JobAdapterError, e.message
|
100
|
+
end
|
101
|
+
|
102
|
+
# Retrieve info for all jobs for a given owner or owners from the
|
103
|
+
# resource manager
|
104
|
+
# Note: owner and attrs are present only to complete the interface and are ignored
|
105
|
+
# Note: since this API is used in production no errors or warnings are thrown / issued
|
106
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
107
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
108
|
+
# @return [Array<Info>] information describing submitted jobs
|
109
|
+
def info_where_owner(owner: nil, attrs: nil)
|
110
|
+
info_all
|
111
|
+
end
|
112
|
+
|
113
|
+
# Iterate over each job Info object
|
114
|
+
# @param attrs [Array<symbol>] attrs is present only to complete the interface and is ignored
|
115
|
+
# @yield [Info] of each job to block
|
116
|
+
# @return [Enumerator] if no block given
|
117
|
+
def info_all_each(attrs: nil)
|
118
|
+
return to_enum(:info_all_each, attrs: attrs) unless block_given?
|
119
|
+
|
120
|
+
info_all(attrs: attrs).each do |job|
|
121
|
+
yield job
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
# Iterate over each job Info object
|
126
|
+
# @param owner [#to_s, Array<#to_s>] owner is present only to complete the interface and is ignored
|
127
|
+
# @param attrs [Array<symbol>] attrs is present only to complete the interface and is ignored
|
128
|
+
# @yield [Info] of each job to block
|
129
|
+
# @return [Enumerator] if no block given
|
130
|
+
def info_where_owner_each(owner, attrs: nil)
|
131
|
+
return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
|
132
|
+
|
133
|
+
info_where_owner(owner, attrs: attrs).each do |job|
|
134
|
+
yield job
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
# Whether the adapter supports job arrays
|
139
|
+
# @return [Boolean] - false
|
140
|
+
def supports_job_arrays?
|
141
|
+
false
|
142
|
+
end
|
143
|
+
|
144
|
+
# Retrieve job info from the SSH host
|
145
|
+
# @param id [#to_s] the id of the job
|
146
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
147
|
+
# @return [Info] information describing submitted job
|
148
|
+
# @see Adapter#info
|
149
|
+
def info(id)
|
150
|
+
_, host = parse_job_id(id)
|
151
|
+
job = info_all(host: host).select{|info| info.id == id}.first
|
152
|
+
(job) ? job : Info.new(id: id, status: :completed)
|
153
|
+
rescue Launcher::Error => e
|
154
|
+
raise JobAdapterError, e.message
|
155
|
+
end
|
156
|
+
|
157
|
+
# Retrieve job status from resource manager
|
158
|
+
# @note Optimized slightly over retrieving complete job information from server
|
159
|
+
# @abstract Subclass is expected to implement {#status}
|
160
|
+
# @raise [NotImplementedError] if subclass did not define {#status}
|
161
|
+
# @param id [#to_s] the id of the job
|
162
|
+
# @return [Status] status of job
|
163
|
+
def status(id)
|
164
|
+
_, host = parse_job_id(id)
|
165
|
+
job = info_all(host: host).select{|info| info.id == id}.first
|
166
|
+
|
167
|
+
Status.new(state: (job) ? :running : :completed)
|
168
|
+
rescue Launcher::Error => e
|
169
|
+
raise JobAdapterError, e.message
|
170
|
+
end
|
171
|
+
|
172
|
+
# Put the submitted job on hold
|
173
|
+
# @abstract Subclass is expected to implement {#hold}
|
174
|
+
# @raise [NotImplementedError] if subclass did not define {#hold}
|
175
|
+
# @param id [#to_s] the id of the job
|
176
|
+
# @return [void]
|
177
|
+
def hold(id)
|
178
|
+
# Consider sending SIGSTOP?
|
179
|
+
raise NotImplementedError, "subclass did not define #hold"
|
180
|
+
end
|
181
|
+
|
182
|
+
# Release the job that is on hold
|
183
|
+
# @abstract Subclass is expected to implement {#release}
|
184
|
+
# @raise [NotImplementedError] if subclass did not define {#release}
|
185
|
+
# @param id [#to_s] the id of the job
|
186
|
+
# @return [void]
|
187
|
+
def release(id)
|
188
|
+
# Consider sending SIGCONT
|
189
|
+
raise NotImplementedError, "subclass did not define #release"
|
190
|
+
end
|
191
|
+
|
192
|
+
# Delete the submitted job
|
193
|
+
# @abstract Subclass is expected to implement {#delete}
|
194
|
+
# @raise [NotImplementedError] if subclass did not define {#delete}
|
195
|
+
# @param id [#to_s] the id of the job
|
196
|
+
# @return [void]
|
197
|
+
def delete(id)
|
198
|
+
session_name, destination_host = parse_job_id(id)
|
199
|
+
@launcher.stop_remote_session(session_name, destination_host)
|
200
|
+
rescue Launcher::Error => e
|
201
|
+
raise JobAdapterError, e.message
|
202
|
+
end
|
203
|
+
|
204
|
+
private
|
205
|
+
|
206
|
+
def host_permitted?(destination_host)
|
207
|
+
raise JobAdapterError, "Requested destination host (#{destination_host}) not permitted" unless @ssh_hosts.include?(destination_host)
|
208
|
+
end
|
209
|
+
|
210
|
+
def parse_job_id(id)
|
211
|
+
raise JobAdapterError, "#{id} is not a valid LinuxHost adapter id because it is missing the '@'." unless id.include?('@')
|
212
|
+
|
213
|
+
return id.split('@')
|
214
|
+
end
|
215
|
+
|
216
|
+
# Convert the returned Hash into an Info object
|
217
|
+
def ls_to_info(ls_output)
|
218
|
+
started = ls_output[:session_created].to_i
|
219
|
+
now = Time.now.to_i
|
220
|
+
ellapsed = now - started
|
221
|
+
Info.new(
|
222
|
+
accounting_id: nil,
|
223
|
+
allocated_nodes: [NodeInfo.new(name: ls_output[:destination_host], procs: 1)],
|
224
|
+
cpu_time: ellapsed,
|
225
|
+
dispatch_time: started,
|
226
|
+
id: ls_output[:id],
|
227
|
+
job_name: nil, # TODO
|
228
|
+
job_owner: Etc.getlogin,
|
229
|
+
native: ls_output,
|
230
|
+
procs: 1,
|
231
|
+
queue_name: "LinuxHost adapter for #{@submit_host}",
|
232
|
+
status: :running,
|
233
|
+
submission_time: ellapsed,
|
234
|
+
submit_host: @submit_host,
|
235
|
+
wallclock_time: ellapsed
|
236
|
+
)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
@@ -0,0 +1,262 @@
|
|
1
|
+
require 'erb'
|
2
|
+
require 'etc'
|
3
|
+
require 'pathname'
|
4
|
+
require 'securerandom'
|
5
|
+
require 'shellwords'
|
6
|
+
require 'time'
|
7
|
+
|
8
|
+
# Object used for simplified communication SSH hosts
|
9
|
+
#
|
10
|
+
# @api private
|
11
|
+
class OodCore::Job::Adapters::LinuxHost::Launcher
|
12
|
+
attr_reader :contain, :debug, :site_timeout, :session_name_label, :singularity_bin,
|
13
|
+
:site_singularity_bindpath, :default_singularity_image, :ssh_hosts,
|
14
|
+
:strict_host_checking, :submit_host, :tmux_bin, :username
|
15
|
+
# The root exception class that all LinuxHost adapter-specific exceptions inherit
|
16
|
+
# from
|
17
|
+
class Error < StandardError; end
|
18
|
+
|
19
|
+
UNIT_SEPARATOR = "\x1F"
|
20
|
+
|
21
|
+
# @param debug Whether the adapter should be used in debug mode
|
22
|
+
# @param site_timeout [#to_i] A period after which the job should be killed or nil
|
23
|
+
# @param singularity_bin Path to the Singularity executable
|
24
|
+
# @param singularity_bindpath A comma delimited string of host paths to bindmount into the guest; sets SINGULARITY_BINDPATH environment variable
|
25
|
+
# @param singularity_image [#to_s] Path to the Singularity image
|
26
|
+
# @param ssh_hosts List of hosts to check when scanning for running jobs
|
27
|
+
# @param strict_host_checking Allow SSH to perform strict host checking
|
28
|
+
# @param submit_host The SSH-able host
|
29
|
+
# @param tmux_bin [#to_s] Path to the tmux executable
|
30
|
+
def initialize(
|
31
|
+
contain: false,
|
32
|
+
debug: false,
|
33
|
+
site_timeout: nil,
|
34
|
+
singularity_bin:,
|
35
|
+
singularity_bindpath: '/etc,/media,/mnt,/opt,/run,/srv,/usr,/var,/users',
|
36
|
+
singularity_image:,
|
37
|
+
ssh_hosts:,
|
38
|
+
strict_host_checking: false,
|
39
|
+
submit_host:,
|
40
|
+
tmux_bin:,
|
41
|
+
**_
|
42
|
+
)
|
43
|
+
@contain = !! contain
|
44
|
+
@debug = !! debug
|
45
|
+
@site_timeout = site_timeout.to_i
|
46
|
+
@session_name_label = 'launched-by-ondemand'
|
47
|
+
@singularity_bin = Pathname.new(singularity_bin)
|
48
|
+
@site_singularity_bindpath = singularity_bindpath.to_s
|
49
|
+
@default_singularity_image = Pathname.new(singularity_image)
|
50
|
+
@ssh_hosts = ssh_hosts
|
51
|
+
@strict_host_checking = strict_host_checking
|
52
|
+
@submit_host = submit_host
|
53
|
+
@tmux_bin = tmux_bin
|
54
|
+
@username = Etc.getlogin
|
55
|
+
end
|
56
|
+
|
57
|
+
# @param hostname [#to_s] The hostname to submit the work to
|
58
|
+
# @param script [OodCore::Job::Script] The script object defining the work
|
59
|
+
def start_remote_session(script)
|
60
|
+
cmd = ssh_cmd(submit_host)
|
61
|
+
|
62
|
+
session_name = unique_session_name
|
63
|
+
output = call(*cmd, stdin: wrapped_script(script, session_name))
|
64
|
+
hostname = output.strip
|
65
|
+
|
66
|
+
"#{session_name}@#{hostname}"
|
67
|
+
end
|
68
|
+
|
69
|
+
def stop_remote_session(session_name, hostname)
|
70
|
+
cmd = ssh_cmd(hostname)
|
71
|
+
|
72
|
+
kill_cmd = <<~SCRIPT
|
73
|
+
# Get the tmux pane PID for the target session
|
74
|
+
pane_pid=$(tmux list-panes -aF '\#{session_name} \#{pane_pid}' | grep '#{session_name}' | cut -f 2 -d ' ')
|
75
|
+
# Find the Singularity sinit PID child of the pane process
|
76
|
+
pane_sinit_pid=$(pstree -p "$pane_pid" | grep -o 'sinit([[:digit:]]*' | grep -o '[[:digit:]]*')
|
77
|
+
# Kill sinit which stops both Singularity-based processes and the tmux session
|
78
|
+
kill "$pane_sinit_pid"
|
79
|
+
SCRIPT
|
80
|
+
|
81
|
+
call(*cmd, stdin: kill_cmd)
|
82
|
+
rescue Error => e
|
83
|
+
raise e unless (
|
84
|
+
# The tmux server not running is not an error
|
85
|
+
e.message.include?('failed to connect to server') ||
|
86
|
+
# The session not being found is not an error
|
87
|
+
e.message.include?("session not found: #{session_name_label}")
|
88
|
+
)
|
89
|
+
end
|
90
|
+
|
91
|
+
def list_remote_sessions(host: nil)
|
92
|
+
host_list = (host) ? [host] : ssh_hosts
|
93
|
+
|
94
|
+
host_list.map {
|
95
|
+
|hostname| list_remote_tmux_session(hostname)
|
96
|
+
}.flatten.sort_by {
|
97
|
+
|hsh| hsh[:session_name]
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
private
|
102
|
+
|
103
|
+
# Call a forked Slurm command for a given cluster
|
104
|
+
def call(cmd, *args, env: {}, stdin: "")
|
105
|
+
args = args.map(&:to_s)
|
106
|
+
env = env.to_h
|
107
|
+
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s)
|
108
|
+
s.success? ? o : raise(Error, e)
|
109
|
+
end
|
110
|
+
|
111
|
+
# The SSH invocation to send a command
|
112
|
+
# -t Force pseudo-terminal allocation (required to allow tmux to run)
|
113
|
+
# -o BatchMode=yes (set mode to be non-interactive)
|
114
|
+
# if ! strict_host_checking
|
115
|
+
# -o UserKnownHostsFile=/dev/null (do not update the user's known hosts file)
|
116
|
+
# -o StrictHostKeyChecking=no (do no check the user's known hosts file)
|
117
|
+
def ssh_cmd(destination_host)
|
118
|
+
if strict_host_checking
|
119
|
+
[
|
120
|
+
'ssh', '-t',
|
121
|
+
'-o', 'BatchMode=yes',
|
122
|
+
"#{username}@#{destination_host}"
|
123
|
+
]
|
124
|
+
else
|
125
|
+
[
|
126
|
+
'ssh', '-t',
|
127
|
+
'-o', 'BatchMode=yes',
|
128
|
+
'-o', 'UserKnownHostsFile=/dev/null',
|
129
|
+
'-o', 'StrictHostKeyChecking=no',
|
130
|
+
"#{username}@#{destination_host}"
|
131
|
+
]
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def shell
|
136
|
+
ENV['SHELL'] || '/bin/bash'
|
137
|
+
end
|
138
|
+
|
139
|
+
# Wraps a user-provided script into a Tmux invocation
|
140
|
+
def wrapped_script(script, session_name)
|
141
|
+
content = script.content
|
142
|
+
unless user_script_has_shebang?(script)
|
143
|
+
content = "#!#{shell}\n#{content}"
|
144
|
+
end
|
145
|
+
|
146
|
+
ERB.new(
|
147
|
+
File.read(Pathname.new(__dir__).join('templates/script_wrapper.erb.sh'))
|
148
|
+
).result(binding.tap {|bnd|
|
149
|
+
{
|
150
|
+
'arguments' => script_arguments(script),
|
151
|
+
'cd_to_workdir' => (script.workdir) ? "cd #{script.workdir}" : '',
|
152
|
+
'contain' => (contain) ? '--contain' : '',
|
153
|
+
'debug' => debug,
|
154
|
+
'email_on_terminated' => script_email_on_event(script, 'terminated'),
|
155
|
+
'email_on_start' => script_email_on_event(script, 'started'),
|
156
|
+
'environment' => export_env(script),
|
157
|
+
'error_path' => (script.error_path) ? script.error_path.to_s : '/dev/null',
|
158
|
+
'job_name' => script.job_name.to_s,
|
159
|
+
'output_path' => (script.output_path) ? script.output_path.to_s : '/dev/null',
|
160
|
+
'script_content' => content,
|
161
|
+
'script_timeout' => script_timeout(script),
|
162
|
+
'session_name' => session_name,
|
163
|
+
'singularity_bin' => singularity_bin,
|
164
|
+
'singularity_image' => singularity_image(script.native),
|
165
|
+
'tmux_bin' => tmux_bin,
|
166
|
+
}.each{
|
167
|
+
|key, value| bnd.local_variable_set(key, value)
|
168
|
+
}
|
169
|
+
})
|
170
|
+
end
|
171
|
+
|
172
|
+
# Generate the environment export block for this script
|
173
|
+
def export_env(script)
|
174
|
+
environment = script.job_environment
|
175
|
+
(environment ? environment : {}).tap{
|
176
|
+
|hsh|
|
177
|
+
hsh['SINGULARITY_BINDPATH'] = singularity_bindpath(script.native)
|
178
|
+
}.map{
|
179
|
+
|key, value| "export #{key}=#{Shellwords.escape(value)}"
|
180
|
+
}.sort.join("\n")
|
181
|
+
end
|
182
|
+
|
183
|
+
def singularity_image(native)
|
184
|
+
if native && native[:singularity_container]
|
185
|
+
return native[:singularity_container]
|
186
|
+
end
|
187
|
+
|
188
|
+
default_singularity_image
|
189
|
+
end
|
190
|
+
|
191
|
+
def singularity_bindpath(native)
|
192
|
+
return site_singularity_bindpath unless native && native[:singularity_bindpath]
|
193
|
+
|
194
|
+
native[:singularity_bindpath]
|
195
|
+
end
|
196
|
+
|
197
|
+
def script_timeout(script)
|
198
|
+
wall_time = script.wall_time.to_i
|
199
|
+
return site_timeout if wall_time == 0
|
200
|
+
return [wall_time, site_timeout].min unless site_timeout == 0
|
201
|
+
|
202
|
+
wall_time
|
203
|
+
end
|
204
|
+
|
205
|
+
def script_arguments(script)
|
206
|
+
return '' unless script.args
|
207
|
+
|
208
|
+
Shellwords.join(script.args)
|
209
|
+
end
|
210
|
+
|
211
|
+
def script_email_on_event(script, event)
|
212
|
+
return false unless script.email && script.send("email_on_#{event}")
|
213
|
+
|
214
|
+
ERB.new(
|
215
|
+
File.read(Pathname.new(__dir__).join('templates/email.erb.sh'))
|
216
|
+
).result(binding.tap {|bnd|
|
217
|
+
{
|
218
|
+
'email_recipients' => script.email.map{|addr| Shellwords.escape(addr)}.join(', '),
|
219
|
+
'job_name' => (script.job_name) ? script.job_name : 'LinuxHost_Adapter_Job',
|
220
|
+
'job_status' => event
|
221
|
+
}.each{
|
222
|
+
|key, value| bnd.local_variable_set(key, value)
|
223
|
+
}
|
224
|
+
})
|
225
|
+
end
|
226
|
+
|
227
|
+
def unique_session_name
|
228
|
+
"#{session_name_label}-#{SecureRandom.uuid}"
|
229
|
+
end
|
230
|
+
|
231
|
+
# List all Tmux sessions on destination_host started by this adapter
|
232
|
+
# Additional tmux ls options available: http://man7.org/linux/man-pages/man1/tmux.1.html#FORMATS
|
233
|
+
def list_remote_tmux_session(destination_host)
|
234
|
+
# Note that the tmux variable substitution looks like Ruby string sub,
|
235
|
+
# these must either be single quoted strings or Ruby-string escaped as well
|
236
|
+
format_str = Shellwords.escape(
|
237
|
+
['#{session_name}', '#{session_created}', '#{pane_pid}'].join(UNIT_SEPARATOR)
|
238
|
+
)
|
239
|
+
keys = [:session_name, :session_created, :session_pid]
|
240
|
+
cmd = ssh_cmd(destination_host) + ['tmux', 'list-panes', '-aF', format_str]
|
241
|
+
|
242
|
+
call(*cmd).split(
|
243
|
+
"\n"
|
244
|
+
).map do |line|
|
245
|
+
Hash[keys.zip(line.split(UNIT_SEPARATOR))].tap do |session_hash|
|
246
|
+
session_hash[:destination_host] = destination_host
|
247
|
+
session_hash[:id] = "#{session_hash[:session_name]}@#{destination_host}"
|
248
|
+
end
|
249
|
+
end.select{
|
250
|
+
|session_hash| session_hash[:session_name].start_with?(session_name_label)
|
251
|
+
}
|
252
|
+
rescue Error => e
|
253
|
+
# The tmux server not running is not an error
|
254
|
+
raise e unless e.message.include?('failed to connect to server')
|
255
|
+
[]
|
256
|
+
end
|
257
|
+
|
258
|
+
def user_script_has_shebang?(script)
|
259
|
+
return false if script.content.empty?
|
260
|
+
script.content.split("\n").first.start_with?('#!/')
|
261
|
+
end
|
262
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
hostname
|
3
|
+
|
4
|
+
# Put the script into a temp file on localhost
|
5
|
+
<% if debug %>
|
6
|
+
singularity_tmp_file=$(mktemp -p "$HOME" --suffix '_sing')
|
7
|
+
tmux_tmp_file=$(mktemp -p "$HOME" --suffix "_tmux")
|
8
|
+
<% else %>
|
9
|
+
singularity_tmp_file=$(mktemp)
|
10
|
+
tmux_tmp_file=$(mktemp)
|
11
|
+
<% end %>
|
12
|
+
|
13
|
+
# Create an executable to run in a tmux session
|
14
|
+
# The escaped HEREDOC means that we need to substitute in $singularity_tmp_file ourselves
|
15
|
+
cat << 'TMUX_LAUNCHER' | sed "s#\$singularity_tmp_file#${singularity_tmp_file}#" > "$tmux_tmp_file"
|
16
|
+
#!/bin/bash
|
17
|
+
<% if email_on_terminated %>
|
18
|
+
exit_script() {
|
19
|
+
<%# DO NOT INDENT email_on_terminated may have HEREDOCS %>
|
20
|
+
<%= email_on_terminated %>
|
21
|
+
trap - SIGINT SIGTERM # clear the trap
|
22
|
+
kill -- -$$ # Sends SIGTERM to child/sub processes
|
23
|
+
}
|
24
|
+
trap exit_script SIGINT SIGTERM
|
25
|
+
<% end %>
|
26
|
+
|
27
|
+
<%= cd_to_workdir %>
|
28
|
+
<%= environment %>
|
29
|
+
|
30
|
+
<%= email_on_start %>
|
31
|
+
|
32
|
+
# Redirect stdout and stderr to separate files for all commands run within the curly braces
|
33
|
+
# https://unix.stackexchange.com/a/6431/204548
|
34
|
+
# Swap sterr and stdout after stdout has been redirected
|
35
|
+
# https://unix.stackexchange.com/a/61932/204548
|
36
|
+
OUTPUT_PATH=<%= output_path %>
|
37
|
+
ERROR_PATH=<%= error_path %>
|
38
|
+
({
|
39
|
+
timeout <%= script_timeout %>s <%= singularity_bin %> exec <%= contain %> --pid <%= singularity_image %> /bin/bash --login $singularity_tmp_file <%= arguments %>
|
40
|
+
} | tee "$OUTPUT_PATH") 3>&1 1>&2 2>&3 | tee "$ERROR_PATH"
|
41
|
+
|
42
|
+
<%= email_on_terminated %>
|
43
|
+
|
44
|
+
# Exit the tmux session when we are complete
|
45
|
+
exit 0
|
46
|
+
TMUX_LAUNCHER
|
47
|
+
|
48
|
+
# Create an executable for Singularity to run
|
49
|
+
# Escaped HEREDOC means that we do not have to worry about Shell.escape-ing script_content
|
50
|
+
cat << 'SINGULARITY_LAUNCHER' > "$singularity_tmp_file"
|
51
|
+
<%= script_content %>
|
52
|
+
SINGULARITY_LAUNCHER
|
53
|
+
|
54
|
+
# Run the script inside a tmux session
|
55
|
+
chmod +x "$singularity_tmp_file"
|
56
|
+
chmod +x "$tmux_tmp_file"
|
57
|
+
<%= tmux_bin %> new-session -d -s "<%= session_name %>" "$tmux_tmp_file"
|
58
|
+
|
59
|
+
# Remove the file
|
60
|
+
<% if ! debug %>
|
61
|
+
# Wait 1 second to ensure that tmux session has started before the file is removed
|
62
|
+
sleep 1
|
63
|
+
rm -f "$tmux_tmp_file"; rm -f "$singularity_tmp_file"
|
64
|
+
<% end %>
|
data/lib/ood_core/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2019-05
|
13
|
+
date: 2019-11-05 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -150,6 +150,10 @@ files:
|
|
150
150
|
- lib/ood_core/job/adapter.rb
|
151
151
|
- lib/ood_core/job/adapters/drmaa.rb
|
152
152
|
- lib/ood_core/job/adapters/helper.rb
|
153
|
+
- lib/ood_core/job/adapters/linux_host.rb
|
154
|
+
- lib/ood_core/job/adapters/linux_host/launcher.rb
|
155
|
+
- lib/ood_core/job/adapters/linux_host/templates/email.erb.sh
|
156
|
+
- lib/ood_core/job/adapters/linux_host/templates/script_wrapper.erb.sh
|
153
157
|
- lib/ood_core/job/adapters/lsf.rb
|
154
158
|
- lib/ood_core/job/adapters/lsf/batch.rb
|
155
159
|
- lib/ood_core/job/adapters/lsf/helper.rb
|
@@ -197,7 +201,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
197
201
|
version: '0'
|
198
202
|
requirements: []
|
199
203
|
rubyforge_project:
|
200
|
-
rubygems_version: 2.6.
|
204
|
+
rubygems_version: 2.6.11
|
201
205
|
signing_key:
|
202
206
|
specification_version: 4
|
203
207
|
summary: Open OnDemand core library
|