ood_core 0.9.3 → 0.11.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,274 @@
1
+ require 'erb'
2
+ require 'etc'
3
+ require 'pathname'
4
+ require 'securerandom'
5
+ require 'shellwords'
6
+ require 'time'
7
+
8
+ # Object used for simplified communication SSH hosts
9
+ #
10
+ # @api private
11
+ class OodCore::Job::Adapters::LinuxHost::Launcher
12
+ attr_reader :contain, :debug, :site_timeout, :session_name_label, :singularity_bin,
13
+ :site_singularity_bindpath, :default_singularity_image, :ssh_hosts,
14
+ :strict_host_checking, :tmux_bin, :username
15
+ # The root exception class that all LinuxHost adapter-specific exceptions inherit
16
+ # from
17
+ class Error < StandardError; end
18
+
19
+ UNIT_SEPARATOR = "\x1F"
20
+
21
+ # @param debug Whether the adapter should be used in debug mode
22
+ # @param site_timeout [#to_i] A period after which the job should be killed or nil
23
+ # @param singularity_bin Path to the Singularity executable
24
+ # @param singularity_bindpath A comma delimited string of host paths to bindmount into the guest; sets SINGULARITY_BINDPATH environment variable
25
+ # @param singularity_image [#to_s] Path to the Singularity image
26
+ # @param ssh_hosts List of hosts to check when scanning for running jobs
27
+ # @param strict_host_checking Allow SSH to perform strict host checking
28
+ # @param submit_host The SSH-able host
29
+ # @param tmux_bin [#to_s] Path to the tmux executable
30
+ def initialize(
31
+ contain: false,
32
+ debug: false,
33
+ site_timeout: nil,
34
+ singularity_bin:,
35
+ singularity_bindpath: '/etc,/media,/mnt,/opt,/run,/srv,/usr,/var,/users',
36
+ singularity_image:,
37
+ ssh_hosts:,
38
+ strict_host_checking: false,
39
+ submit_host:,
40
+ tmux_bin:,
41
+ **_
42
+ )
43
+ @contain = !! contain
44
+ @debug = !! debug
45
+ @site_timeout = site_timeout.to_i
46
+ @session_name_label = 'launched-by-ondemand'
47
+ @singularity_bin = Pathname.new(singularity_bin)
48
+ @site_singularity_bindpath = singularity_bindpath.to_s
49
+ @default_singularity_image = Pathname.new(singularity_image)
50
+ @ssh_hosts = ssh_hosts
51
+ @strict_host_checking = strict_host_checking
52
+ @submit_host = submit_host
53
+ @tmux_bin = tmux_bin
54
+ @username = Etc.getlogin
55
+ end
56
+
57
+ # @param hostname [#to_s] The hostname to submit the work to
58
+ # @param script [OodCore::Job::Script] The script object defining the work
59
+ def start_remote_session(script)
60
+ cmd = ssh_cmd(submit_host(script), ['/usr/bin/env', 'bash'])
61
+
62
+ session_name = unique_session_name
63
+ output = call(*cmd, stdin: wrapped_script(script, session_name))
64
+ hostname = output.strip
65
+
66
+ "#{session_name}@#{hostname}"
67
+ end
68
+
69
+ def stop_remote_session(session_name, hostname)
70
+ cmd = ssh_cmd(hostname, ['/usr/bin/env', 'bash'])
71
+
72
+ kill_cmd = <<~SCRIPT
73
+ # Get the tmux pane PID for the target session
74
+ pane_pid=$(tmux list-panes -aF '\#{session_name} \#{pane_pid}' | grep '#{session_name}' | cut -f 2 -d ' ')
75
+ # Find the Singularity sinit PID child of the pane process
76
+ pane_sinit_pid=$(pstree -p -l "$pane_pid" | grep -o 'sinit([[:digit:]]*' | grep -o '[[:digit:]]*')
77
+ # Kill sinit which stops both Singularity-based processes and the tmux session
78
+ kill "$pane_sinit_pid"
79
+ SCRIPT
80
+
81
+ call(*cmd, stdin: kill_cmd)
82
+ rescue Error => e
83
+ raise e unless (
84
+ # The tmux server not running is not an error
85
+ e.message.include?('failed to connect to server') ||
86
+ # The session not being found is not an error
87
+ e.message.include?("session not found: #{session_name_label}")
88
+ )
89
+ end
90
+
91
+ def list_remote_sessions(host: nil)
92
+ host_list = (host) ? [host] : ssh_hosts
93
+
94
+ host_list.map {
95
+ |hostname| list_remote_tmux_session(hostname)
96
+ }.flatten.sort_by {
97
+ |hsh| hsh[:session_name]
98
+ }
99
+ end
100
+
101
+ def submit_host(script = nil)
102
+ if script && script.native && script.native['submit_host_override']
103
+ script.native['submit_host_override']
104
+ else
105
+ @submit_host
106
+ end
107
+ end
108
+
109
+ private
110
+
111
+ # Call a forked Slurm command for a given cluster
112
+ def call(cmd, *args, env: {}, stdin: "")
113
+ args = args.map(&:to_s)
114
+ env = env.to_h
115
+ o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s)
116
+ s.success? ? o : raise(Error, e)
117
+ end
118
+
119
+ # The full command to ssh into the destination host and execute the command.
120
+ # SSH options include:
121
+ # -t Force pseudo-terminal allocation (required to allow tmux to run)
122
+ # -o BatchMode=yes (set mode to be non-interactive)
123
+ # if ! strict_host_checking
124
+ # -o UserKnownHostsFile=/dev/null (do not update the user's known hosts file)
125
+ # -o StrictHostKeyChecking=no (do no check the user's known hosts file)
126
+ #
127
+ # @param destination_host [#to_s] the destination host you wish to ssh into
128
+ # @param cmd [Array<#to_s>] the command to be executed on the destination host
129
+ def ssh_cmd(destination_host, cmd)
130
+ if strict_host_checking
131
+ [
132
+ 'ssh', '-t',
133
+ '-o', 'BatchMode=yes',
134
+ "#{username}@#{destination_host}"
135
+ ].concat(cmd)
136
+ else
137
+ [
138
+ 'ssh', '-t',
139
+ '-o', 'BatchMode=yes',
140
+ '-o', 'UserKnownHostsFile=/dev/null',
141
+ '-o', 'StrictHostKeyChecking=no',
142
+ "#{username}@#{destination_host}"
143
+ ].concat(cmd)
144
+ end
145
+ end
146
+
147
+ def shell
148
+ ENV['SHELL'] || '/bin/bash'
149
+ end
150
+
151
+ # Wraps a user-provided script into a Tmux invocation
152
+ def wrapped_script(script, session_name)
153
+ content = script.content
154
+ unless user_script_has_shebang?(script)
155
+ content = "#!#{shell}\n#{content}"
156
+ end
157
+
158
+ ERB.new(
159
+ File.read(Pathname.new(__dir__).join('templates/script_wrapper.erb.sh'))
160
+ ).result(binding.tap {|bnd|
161
+ {
162
+ 'arguments' => script_arguments(script),
163
+ 'cd_to_workdir' => (script.workdir) ? "cd #{script.workdir}" : '',
164
+ 'contain' => (contain) ? '--contain' : '',
165
+ 'debug' => debug,
166
+ 'email_on_terminated' => script_email_on_event(script, 'terminated'),
167
+ 'email_on_start' => script_email_on_event(script, 'started'),
168
+ 'environment' => export_env(script),
169
+ 'error_path' => (script.error_path) ? script.error_path.to_s : '/dev/null',
170
+ 'job_name' => script.job_name.to_s,
171
+ 'output_path' => (script.output_path) ? script.output_path.to_s : '/dev/null',
172
+ 'script_content' => content,
173
+ 'script_timeout' => script_timeout(script),
174
+ 'session_name' => session_name,
175
+ 'singularity_bin' => singularity_bin,
176
+ 'singularity_image' => singularity_image(script.native),
177
+ 'tmux_bin' => tmux_bin,
178
+ }.each{
179
+ |key, value| bnd.local_variable_set(key, value)
180
+ }
181
+ })
182
+ end
183
+
184
+ # Generate the environment export block for this script
185
+ def export_env(script)
186
+ environment = script.job_environment
187
+ (environment ? environment : {}).tap{
188
+ |hsh|
189
+ hsh['SINGULARITY_BINDPATH'] = singularity_bindpath(script.native)
190
+ }.map{
191
+ |key, value| "export #{key}=#{Shellwords.escape(value)}"
192
+ }.sort.join("\n")
193
+ end
194
+
195
+ def singularity_image(native)
196
+ if native && native[:singularity_container]
197
+ return native[:singularity_container]
198
+ end
199
+
200
+ default_singularity_image
201
+ end
202
+
203
+ def singularity_bindpath(native)
204
+ return site_singularity_bindpath unless native && native[:singularity_bindpath]
205
+
206
+ native[:singularity_bindpath]
207
+ end
208
+
209
+ def script_timeout(script)
210
+ wall_time = script.wall_time.to_i
211
+ return site_timeout if wall_time == 0
212
+ return [wall_time, site_timeout].min unless site_timeout == 0
213
+
214
+ wall_time
215
+ end
216
+
217
+ def script_arguments(script)
218
+ return '' unless script.args
219
+
220
+ Shellwords.join(script.args)
221
+ end
222
+
223
+ def script_email_on_event(script, event)
224
+ return false unless script.email && script.send("email_on_#{event}")
225
+
226
+ ERB.new(
227
+ File.read(Pathname.new(__dir__).join('templates/email.erb.sh'))
228
+ ).result(binding.tap {|bnd|
229
+ {
230
+ 'email_recipients' => script.email.map{|addr| Shellwords.escape(addr)}.join(', '),
231
+ 'job_name' => (script.job_name) ? script.job_name : 'LinuxHost_Adapter_Job',
232
+ 'job_status' => event
233
+ }.each{
234
+ |key, value| bnd.local_variable_set(key, value)
235
+ }
236
+ })
237
+ end
238
+
239
+ def unique_session_name
240
+ "#{session_name_label}-#{SecureRandom.uuid}"
241
+ end
242
+
243
+ # List all Tmux sessions on destination_host started by this adapter
244
+ # Additional tmux ls options available: http://man7.org/linux/man-pages/man1/tmux.1.html#FORMATS
245
+ def list_remote_tmux_session(destination_host)
246
+ # Note that the tmux variable substitution looks like Ruby string sub,
247
+ # these must either be single quoted strings or Ruby-string escaped as well
248
+ format_str = Shellwords.escape(
249
+ ['#{session_name}', '#{session_created}', '#{pane_pid}'].join(UNIT_SEPARATOR)
250
+ )
251
+ keys = [:session_name, :session_created, :session_pid]
252
+ cmd = ssh_cmd(destination_host, ['tmux', 'list-panes', '-aF', format_str])
253
+
254
+ call(*cmd).split(
255
+ "\n"
256
+ ).map do |line|
257
+ Hash[keys.zip(line.split(UNIT_SEPARATOR))].tap do |session_hash|
258
+ session_hash[:destination_host] = destination_host
259
+ session_hash[:id] = "#{session_hash[:session_name]}@#{destination_host}"
260
+ end
261
+ end.select{
262
+ |session_hash| session_hash[:session_name].start_with?(session_name_label)
263
+ }
264
+ rescue Error => e
265
+ # The tmux server not running is not an error
266
+ raise e unless e.message.include?('failed to connect to server')
267
+ []
268
+ end
269
+
270
+ def user_script_has_shebang?(script)
271
+ return false if script.content.empty?
272
+ script.content.split("\n").first.start_with?('#!/')
273
+ end
274
+ end
@@ -0,0 +1,9 @@
1
+ if command -v mail; then
2
+ cat << EMAIL_CONTENT | mail -s "Job <%= job_name %> has <%= job_status %>" <%= email_recipients %>
3
+ Greetings,
4
+
5
+ Your job <%= job_name %> has <%= job_status %>.
6
+
7
+ - The OnDemand Linux Host Adapter
8
+ EMAIL_CONTENT
9
+ fi
@@ -0,0 +1,64 @@
1
+ #!/bin/bash
2
+ hostname
3
+
4
+ # Put the script into a temp file on localhost
5
+ <% if debug %>
6
+ singularity_tmp_file=$(mktemp -p "$HOME" --suffix '_sing')
7
+ tmux_tmp_file=$(mktemp -p "$HOME" --suffix "_tmux")
8
+ <% else %>
9
+ singularity_tmp_file=$(mktemp)
10
+ tmux_tmp_file=$(mktemp)
11
+ <% end %>
12
+
13
+ # Create an executable to run in a tmux session
14
+ # The escaped HEREDOC means that we need to substitute in $singularity_tmp_file ourselves
15
+ cat << 'TMUX_LAUNCHER' | sed "s#\$singularity_tmp_file#${singularity_tmp_file}#" > "$tmux_tmp_file"
16
+ #!/bin/bash
17
+ <% if email_on_terminated %>
18
+ exit_script() {
19
+ <%# DO NOT INDENT email_on_terminated may have HEREDOCS %>
20
+ <%= email_on_terminated %>
21
+ trap - SIGINT SIGTERM # clear the trap
22
+ kill -- -$$ # Sends SIGTERM to child/sub processes
23
+ }
24
+ trap exit_script SIGINT SIGTERM
25
+ <% end %>
26
+
27
+ <%= cd_to_workdir %>
28
+ <%= environment %>
29
+
30
+ <%= email_on_start %>
31
+
32
+ # Redirect stdout and stderr to separate files for all commands run within the curly braces
33
+ # https://unix.stackexchange.com/a/6431/204548
34
+ # Swap sterr and stdout after stdout has been redirected
35
+ # https://unix.stackexchange.com/a/61932/204548
36
+ OUTPUT_PATH=<%= output_path %>
37
+ ERROR_PATH=<%= error_path %>
38
+ ({
39
+ timeout <%= script_timeout %>s <%= singularity_bin %> exec <%= contain %> --pid <%= singularity_image %> /bin/bash --login $singularity_tmp_file <%= arguments %>
40
+ } | tee "$OUTPUT_PATH") 3>&1 1>&2 2>&3 | tee "$ERROR_PATH"
41
+
42
+ <%= email_on_terminated %>
43
+
44
+ # Exit the tmux session when we are complete
45
+ exit 0
46
+ TMUX_LAUNCHER
47
+
48
+ # Create an executable for Singularity to run
49
+ # Escaped HEREDOC means that we do not have to worry about Shell.escape-ing script_content
50
+ cat << 'SINGULARITY_LAUNCHER' > "$singularity_tmp_file"
51
+ <%= script_content %>
52
+ SINGULARITY_LAUNCHER
53
+
54
+ # Run the script inside a tmux session
55
+ chmod +x "$singularity_tmp_file"
56
+ chmod +x "$tmux_tmp_file"
57
+ <%= tmux_bin %> new-session -d -s "<%= session_name %>" "$tmux_tmp_file"
58
+
59
+ # Remove the file
60
+ <% if ! debug %>
61
+ # Wait 1 second to ensure that tmux session has started before the file is removed
62
+ sleep 1
63
+ rm -f "$tmux_tmp_file"; rm -f "$singularity_tmp_file"
64
+ <% end %>
@@ -167,6 +167,10 @@ module OodCore
167
167
  raise JobAdapterError, e.message
168
168
  end
169
169
 
170
+ def directive_prefix
171
+ '#BSUB'
172
+ end
173
+
170
174
  private
171
175
  # Determine state from LSF state code
172
176
  def get_state(st)
@@ -92,6 +92,15 @@ class OodCore::Job::Adapters::Lsf::Helper
92
92
  args += ["-W", (script.wall_time / 60).to_i] unless script.wall_time.nil?
93
93
  args += ["-L", script.shell_path.to_s] unless script.shell_path.nil?
94
94
 
95
+ # environment
96
+ env = script.job_environment || {}
97
+ # To preserve pre-existing behavior we only act when true or false, when nil we do nothing
98
+ if script.copy_environment?
99
+ args += ["-env", (["all"] + env.keys).join(",")]
100
+ elsif script.copy_environment? == false
101
+ args += ["-env", (["none"] + env.keys).join(",")]
102
+ end
103
+
95
104
  # input and output files
96
105
  args += ["-i", script.input_path] unless script.input_path.nil?
97
106
  args += ["-o", script.output_path] unless script.output_path.nil?
@@ -104,9 +113,6 @@ class OodCore::Job::Adapters::Lsf::Helper
104
113
 
105
114
  args += script.native unless script.native.nil?
106
115
 
107
- # environment
108
- env = script.job_environment || {}
109
-
110
116
  {args: args, env: env}
111
117
  end
112
118
  end
@@ -261,6 +261,7 @@ module OodCore
261
261
  # Set environment variables
262
262
  envvars = script.job_environment.to_h
263
263
  args += ["-v", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
264
+ args += ["-V"] if script.copy_environment?
264
265
 
265
266
  # If error_path is not specified we join stdout & stderr (as this
266
267
  # mimics what the other resource managers do)
@@ -397,6 +398,10 @@ module OodCore
397
398
  raise JobAdapterError, e.message unless /Unknown Job Id/ =~ e.message || /Job has finished/ =~ e.message
398
399
  end
399
400
 
401
+ def directive_prefix
402
+ '#PBS'
403
+ end
404
+
400
405
  private
401
406
  # Convert duration to seconds
402
407
  def duration_in_seconds(time)
@@ -157,6 +157,10 @@ module OodCore
157
157
  rescue Batch::Error => e
158
158
  raise JobAdapterError, e.message
159
159
  end
160
+
161
+ def directive_prefix
162
+ '#$'
163
+ end
160
164
  end
161
165
  end
162
166
  end
@@ -33,8 +33,7 @@ class OodCore::Job::Adapters::Sge::Batch
33
33
  # @see Factory.build_sge
34
34
  def initialize(config)
35
35
  @cluster = config.fetch(:cluster, nil)
36
- @conf = Pathname.new(config.fetch(:conf, nil))
37
- @bin = Pathname.new(config.fetch(:bin, nil))
36
+ @bin = Pathname.new(config.fetch(:bin, nil).to_s)
38
37
  @sge_root = Pathname.new(config[:sge_root] || ENV['SGE_ROOT'] || "/var/lib/gridengine")
39
38
  @bin_overrides = config.fetch(:bin_overrides, {})
40
39
 
@@ -20,6 +20,7 @@ class OodCore::Job::Adapters::Sge::Helper
20
20
  args += ['-h'] if script.submit_as_hold
21
21
  args += ['-r', 'yes'] if script.rerunnable
22
22
  script.job_environment.each_pair {|k, v| args += ['-v', "#{k.to_s}=#{v.to_s}"]} unless script.job_environment.nil?
23
+ args += ["-V"] if script.copy_environment?
23
24
 
24
25
  if script.workdir
25
26
  args += ['-wd', script.workdir]
@@ -27,13 +27,15 @@ class QstatXmlJRListener
27
27
  @parsed_job = {
28
28
  :tasks => [],
29
29
  :status => :queued,
30
- :procs => 1, # un-knowable from SGE qstat output
30
+ :procs => 1,
31
31
  :native => {} # TODO: improve native attribute reporting
32
32
  }
33
33
  @current_text = nil
34
34
  @current_request = nil
35
35
 
36
36
  @processing_job_array_spec = false
37
+ @adding_slots = false
38
+
37
39
  @job_array_spec = {
38
40
  start: nil,
39
41
  stop: nil,
@@ -46,6 +48,8 @@ class QstatXmlJRListener
46
48
  case name
47
49
  when 'task_id_range'
48
50
  toggle_processing_array_spec
51
+ when 'JB_pe_range'
52
+ toggle_adding_slots
49
53
  end
50
54
  end
51
55
 
@@ -78,13 +82,16 @@ class QstatXmlJRListener
78
82
  when 'djob_info'
79
83
  finalize_parsed_job
80
84
  when 'RN_min'
81
- set_job_array_piece(:start)
85
+ set_job_array_piece(:start) if @processing_job_array_spec
86
+ set_slots if @adding_slots
82
87
  when 'RN_max'
83
- set_job_array_piece(:stop)
88
+ set_job_array_piece(:stop) if @processing_job_array_spec
84
89
  when 'RN_step'
85
- set_job_array_piece(:step)
90
+ set_job_array_piece(:step) if @processing_job_array_spec
86
91
  when 'task_id_range'
87
92
  toggle_processing_array_spec
93
+ when 'JB_pe_range'
94
+ toggle_adding_slots
88
95
  end
89
96
  end
90
97
 
@@ -186,5 +193,13 @@ class QstatXmlJRListener
186
193
  def toggle_processing_array_spec
187
194
  @processing_job_array_spec = ! @processing_job_array_spec
188
195
  end
196
+
197
+ def toggle_adding_slots
198
+ @adding_slots = ! @adding_slots
199
+ end
200
+
201
+ def set_slots
202
+ @parsed_job[:procs] = @current_text.to_i
203
+ end
189
204
  end
190
205