ood_core 0.13.0 → 0.16.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,7 +16,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
16
16
  # from
17
17
  class Error < StandardError; end
18
18
 
19
- UNIT_SEPARATOR = "\x1F"
19
+ UNIT_SEPARATOR = ","
20
20
 
21
21
  # @param debug Whether the adapter should be used in debug mode
22
22
  # @param site_timeout [#to_i] A period after which the job should be killed or nil
@@ -80,12 +80,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
80
80
 
81
81
  call(*cmd, stdin: kill_cmd)
82
82
  rescue Error => e
83
- raise e unless (
84
- # The tmux server not running is not an error
85
- e.message.include?('failed to connect to server') ||
86
- # The session not being found is not an error
87
- e.message.include?("session not found: #{session_name_label}")
88
- )
83
+ interpret_and_raise(e)
89
84
  end
90
85
 
91
86
  def list_remote_sessions(host: nil)
@@ -166,7 +161,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
166
161
  'email_on_terminated' => script_email_on_event(script, 'terminated'),
167
162
  'email_on_start' => script_email_on_event(script, 'started'),
168
163
  'environment' => export_env(script),
169
- 'error_path' => (script.error_path) ? script.error_path.to_s : '/dev/null',
164
+ 'error_path' => error_path(script),
170
165
  'job_name' => script.job_name.to_s,
171
166
  'output_path' => (script.output_path) ? script.output_path.to_s : '/dev/null',
172
167
  'script_content' => content,
@@ -176,6 +171,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
176
171
  'singularity_image' => singularity_image(script.native),
177
172
  'ssh_hosts' => ssh_hosts,
178
173
  'tmux_bin' => tmux_bin,
174
+ 'workdir' => (script.workdir) ? script.workdir.to_s : '/tmp',
179
175
  }.each{
180
176
  |key, value| bnd.local_variable_set(key, value)
181
177
  }
@@ -263,8 +259,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
263
259
  |session_hash| session_hash[:session_name].start_with?(session_name_label)
264
260
  }
265
261
  rescue Error => e
266
- # The tmux server not running is not an error
267
- raise e unless e.message.include?('failed to connect to server')
262
+ interpret_and_raise(e)
268
263
  []
269
264
  end
270
265
 
@@ -272,4 +267,24 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
272
267
  return false if script.content.empty?
273
268
  script.content.split("\n").first.start_with?('#!/')
274
269
  end
270
+
271
+ def error_path(script)
272
+ return script.error_path.to_s if script.error_path
273
+ return script.output_path.to_s if script.output_path
274
+
275
+ '/dev/null'
276
+ end
277
+
278
+ # under some conditions tmux returns status code 1 but it's not an actual
279
+ # error. These are when the session is not found or there are no sessions
280
+ # at all.
281
+ def interpret_and_raise(error)
282
+ if error.message.include?('failed to connect to server') # no sessions in tmux 1.8
283
+ nil
284
+ elsif error.message.include?('no server running on') # no sessions in tmux 2.7+ message
285
+ nil
286
+ else
287
+ raise error
288
+ end
289
+ end
275
290
  end
@@ -16,13 +16,9 @@ fi
16
16
  echo $hostname
17
17
 
18
18
  # Put the script into a temp file on localhost
19
- <% if debug %>
20
- singularity_tmp_file=$(mktemp -p "$HOME" --suffix '_sing')
21
- tmux_tmp_file=$(mktemp -p "$HOME" --suffix "_tmux")
22
- <% else %>
23
- singularity_tmp_file=$(mktemp)
24
- tmux_tmp_file=$(mktemp)
25
- <% end %>
19
+ singularity_tmp_file=$(mktemp -p "<%= workdir %>" --suffix '_sing')
20
+ tmux_tmp_file=$(mktemp -p "<%= workdir %>" --suffix "_tmux")
21
+
26
22
 
27
23
  # Create an executable to run in a tmux session
28
24
  # The escaped HEREDOC means that we need to substitute in $singularity_tmp_file ourselves
@@ -69,10 +65,3 @@ SINGULARITY_LAUNCHER
69
65
  chmod +x "$singularity_tmp_file"
70
66
  chmod +x "$tmux_tmp_file"
71
67
  <%= tmux_bin %> new-session -d -s "<%= session_name %>" "$tmux_tmp_file"
72
-
73
- # Remove the file
74
- <% if ! debug %>
75
- # Wait 1 second to ensure that tmux session has started before the file is removed
76
- sleep 1
77
- rm -f "$tmux_tmp_file"; rm -f "$singularity_tmp_file"
78
- <% end %>
@@ -80,6 +80,9 @@ module OodCore
80
80
  # from
81
81
  class Error < StandardError; end
82
82
 
83
+ # An error indicating the slurm command timed out
84
+ class SlurmTimeoutError < Error; end
85
+
83
86
  # @param cluster [#to_s, nil] the cluster name
84
87
  # @param conf [#to_s, nil] path to the slurm conf
85
88
  # @param bin [#to_s] path to slurm installation binaries
@@ -147,6 +150,9 @@ module OodCore
147
150
  end
148
151
  jobs
149
152
  end
153
+ rescue SlurmTimeoutError
154
+ # TODO: could use a log entry here
155
+ return [{ id: id, state: 'undetermined' }]
150
156
  end
151
157
 
152
158
  def squeue_fields(attrs)
@@ -303,7 +309,18 @@ module OodCore
303
309
 
304
310
  cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
305
311
  o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
306
- s.success? ? o : raise(Error, e)
312
+ s.success? ? interpret_and_raise(o, e) : raise(Error, e)
313
+ end
314
+
315
+ # Helper function to raise an error based on the contents of stderr.
316
+ # Slurm exits 0 even when the command fails, so we need to interpret stderr
317
+ # to see if the command was actually successful.
318
+ def interpret_and_raise(stdout, stderr)
319
+ return stdout if stderr.empty?
320
+
321
+ raise SlurmTimeoutError, stderr if /^slurm_load_jobs error: Socket timed out/.match(stderr)
322
+
323
+ stdout
307
324
  end
308
325
 
309
326
  def squeue_attrs_for_info_attrs(attrs)
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.13.0"
3
+ VERSION = "0.16.1"
4
4
  end
data/ood_core.gemspec CHANGED
@@ -24,7 +24,8 @@ Gem::Specification.new do |spec|
24
24
 
25
25
  spec.add_runtime_dependency "ood_support", "~> 0.0.2"
26
26
  spec.add_runtime_dependency "ffi", "~> 1.9", ">= 1.9.6"
27
- spec.add_development_dependency "bundler", "~> 1.7"
27
+ spec.add_development_dependency "bundler", "~> 2.1"
28
+ spec.add_runtime_dependency "activesupport", ">= 5.2", "< 6.0"
28
29
  spec.add_development_dependency "rake", "~> 13.0.1"
29
30
  spec.add_development_dependency "rspec", "~> 3.0"
30
31
  spec.add_development_dependency "pry", "~> 0.10"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.16.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2020-08-10 00:00:00.000000000 Z
13
+ date: 2021-04-23 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -52,14 +52,34 @@ dependencies:
52
52
  requirements:
53
53
  - - "~>"
54
54
  - !ruby/object:Gem::Version
55
- version: '1.7'
55
+ version: '2.1'
56
56
  type: :development
57
57
  prerelease: false
58
58
  version_requirements: !ruby/object:Gem::Requirement
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
- version: '1.7'
62
+ version: '2.1'
63
+ - !ruby/object:Gem::Dependency
64
+ name: activesupport
65
+ requirement: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '5.2'
70
+ - - "<"
71
+ - !ruby/object:Gem::Version
72
+ version: '6.0'
73
+ type: :runtime
74
+ prerelease: false
75
+ version_requirements: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '5.2'
80
+ - - "<"
81
+ - !ruby/object:Gem::Version
82
+ version: '6.0'
63
83
  - !ruby/object:Gem::Dependency
64
84
  name: rake
65
85
  requirement: !ruby/object:Gem::Requirement
@@ -140,9 +160,9 @@ executables: []
140
160
  extensions: []
141
161
  extra_rdoc_files: []
142
162
  files:
163
+ - ".github/workflows/test.yml"
143
164
  - ".gitignore"
144
165
  - ".rspec"
145
- - ".travis.yml"
146
166
  - CHANGELOG.md
147
167
  - Gemfile
148
168
  - LICENSE.txt
@@ -166,6 +186,12 @@ files:
166
186
  - lib/ood_core/job/adapters/ccq.rb
167
187
  - lib/ood_core/job/adapters/drmaa.rb
168
188
  - lib/ood_core/job/adapters/helper.rb
189
+ - lib/ood_core/job/adapters/kubernetes.rb
190
+ - lib/ood_core/job/adapters/kubernetes/batch.rb
191
+ - lib/ood_core/job/adapters/kubernetes/helper.rb
192
+ - lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb
193
+ - lib/ood_core/job/adapters/kubernetes/resources.rb
194
+ - lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb
169
195
  - lib/ood_core/job/adapters/linux_host.rb
170
196
  - lib/ood_core/job/adapters/linux_host/launcher.rb
171
197
  - lib/ood_core/job/adapters/linux_host/templates/email.erb.sh
@@ -216,7 +242,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
216
242
  - !ruby/object:Gem::Version
217
243
  version: '0'
218
244
  requirements: []
219
- rubygems_version: 3.0.3
245
+ rubygems_version: 3.1.2
220
246
  signing_key:
221
247
  specification_version: 4
222
248
  summary: Open OnDemand core library
data/.travis.yml DELETED
@@ -1,9 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- rvm:
4
- - 2.5.5
5
- before_install: gem install bundler -v '~> 1.17'
6
- notifications:
7
- email:
8
- on_success: never
9
- on_failure: always