ood_core 0.13.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
16
16
  # from
17
17
  class Error < StandardError; end
18
18
 
19
- UNIT_SEPARATOR = "\x1F"
19
+ UNIT_SEPARATOR = ","
20
20
 
21
21
  # @param debug Whether the adapter should be used in debug mode
22
22
  # @param site_timeout [#to_i] A period after which the job should be killed or nil
@@ -80,12 +80,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
80
80
 
81
81
  call(*cmd, stdin: kill_cmd)
82
82
  rescue Error => e
83
- raise e unless (
84
- # The tmux server not running is not an error
85
- e.message.include?('failed to connect to server') ||
86
- # The session not being found is not an error
87
- e.message.include?("session not found: #{session_name_label}")
88
- )
83
+ interpret_and_raise(e)
89
84
  end
90
85
 
91
86
  def list_remote_sessions(host: nil)
@@ -166,7 +161,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
166
161
  'email_on_terminated' => script_email_on_event(script, 'terminated'),
167
162
  'email_on_start' => script_email_on_event(script, 'started'),
168
163
  'environment' => export_env(script),
169
- 'error_path' => (script.error_path) ? script.error_path.to_s : '/dev/null',
164
+ 'error_path' => error_path(script),
170
165
  'job_name' => script.job_name.to_s,
171
166
  'output_path' => (script.output_path) ? script.output_path.to_s : '/dev/null',
172
167
  'script_content' => content,
@@ -176,6 +171,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
176
171
  'singularity_image' => singularity_image(script.native),
177
172
  'ssh_hosts' => ssh_hosts,
178
173
  'tmux_bin' => tmux_bin,
174
+ 'workdir' => (script.workdir) ? script.workdir.to_s : '/tmp',
179
175
  }.each{
180
176
  |key, value| bnd.local_variable_set(key, value)
181
177
  }
@@ -263,8 +259,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
263
259
  |session_hash| session_hash[:session_name].start_with?(session_name_label)
264
260
  }
265
261
  rescue Error => e
266
- # The tmux server not running is not an error
267
- raise e unless e.message.include?('failed to connect to server')
262
+ interpret_and_raise(e)
268
263
  []
269
264
  end
270
265
 
@@ -272,4 +267,24 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
272
267
  return false if script.content.empty?
273
268
  script.content.split("\n").first.start_with?('#!/')
274
269
  end
270
+
271
+ def error_path(script)
272
+ return script.error_path.to_s if script.error_path
273
+ return script.output_path.to_s if script.output_path
274
+
275
+ '/dev/null'
276
+ end
277
+
278
+ # under some conditions tmux returns status code 1 but it's not an actual
279
+ # error. These are when the session is not found or there are no sessions
280
+ # at all.
281
+ def interpret_and_raise(error)
282
+ if error.message.include?('failed to connect to server') # no sessions in tmux 1.8
283
+ nil
284
+ elsif error.message.include?('no server running on') # no sessions in tmux 2.7+ message
285
+ nil
286
+ else
287
+ raise error
288
+ end
289
+ end
275
290
  end
@@ -16,13 +16,9 @@ fi
16
16
  echo $hostname
17
17
 
18
18
  # Put the script into a temp file on localhost
19
- <% if debug %>
20
- singularity_tmp_file=$(mktemp -p "$HOME" --suffix '_sing')
21
- tmux_tmp_file=$(mktemp -p "$HOME" --suffix "_tmux")
22
- <% else %>
23
- singularity_tmp_file=$(mktemp)
24
- tmux_tmp_file=$(mktemp)
25
- <% end %>
19
+ singularity_tmp_file=$(mktemp -p "<%= workdir %>" --suffix '_sing')
20
+ tmux_tmp_file=$(mktemp -p "<%= workdir %>" --suffix "_tmux")
21
+
26
22
 
27
23
  # Create an executable to run in a tmux session
28
24
  # The escaped HEREDOC means that we need to substitute in $singularity_tmp_file ourselves
@@ -69,10 +65,3 @@ SINGULARITY_LAUNCHER
69
65
  chmod +x "$singularity_tmp_file"
70
66
  chmod +x "$tmux_tmp_file"
71
67
  <%= tmux_bin %> new-session -d -s "<%= session_name %>" "$tmux_tmp_file"
72
-
73
- # Remove the file
74
- <% if ! debug %>
75
- # Wait 1 second to ensure that tmux session has started before the file is removed
76
- sleep 1
77
- rm -f "$tmux_tmp_file"; rm -f "$singularity_tmp_file"
78
- <% end %>
@@ -80,6 +80,9 @@ module OodCore
80
80
  # from
81
81
  class Error < StandardError; end
82
82
 
83
+ # An error indicating the slurm command timed out
84
+ class SlurmTimeoutError < Error; end
85
+
83
86
  # @param cluster [#to_s, nil] the cluster name
84
87
  # @param conf [#to_s, nil] path to the slurm conf
85
88
  # @param bin [#to_s] path to slurm installation binaries
@@ -147,6 +150,9 @@ module OodCore
147
150
  end
148
151
  jobs
149
152
  end
153
+ rescue SlurmTimeoutError
154
+ # TODO: could use a log entry here
155
+ return [{ id: id, state: 'undetermined' }]
150
156
  end
151
157
 
152
158
  def squeue_fields(attrs)
@@ -303,7 +309,18 @@ module OodCore
303
309
 
304
310
  cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
305
311
  o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
306
- s.success? ? o : raise(Error, e)
312
+ s.success? ? interpret_and_raise(o, e) : raise(Error, e)
313
+ end
314
+
315
+ # Helper function to raise an error based on the contents of stderr.
316
+ # Slurm exits 0 even when the command fails, so we need to interpret stderr
317
+ # to see if the command was actually successful.
318
+ def interpret_and_raise(stdout, stderr)
319
+ return stdout if stderr.empty?
320
+
321
+ raise SlurmTimeoutError, stderr if /^slurm_load_jobs error: Socket timed out/.match(stderr)
322
+
323
+ stdout
307
324
  end
308
325
 
309
326
  def squeue_attrs_for_info_attrs(attrs)
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.13.0"
3
+ VERSION = "0.16.1"
4
4
  end
data/ood_core.gemspec CHANGED
@@ -24,7 +24,8 @@ Gem::Specification.new do |spec|
24
24
 
25
25
  spec.add_runtime_dependency "ood_support", "~> 0.0.2"
26
26
  spec.add_runtime_dependency "ffi", "~> 1.9", ">= 1.9.6"
27
- spec.add_development_dependency "bundler", "~> 1.7"
27
+ spec.add_development_dependency "bundler", "~> 2.1"
28
+ spec.add_runtime_dependency "activesupport", ">= 5.2", "< 6.0"
28
29
  spec.add_development_dependency "rake", "~> 13.0.1"
29
30
  spec.add_development_dependency "rspec", "~> 3.0"
30
31
  spec.add_development_dependency "pry", "~> 0.10"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.16.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2020-08-10 00:00:00.000000000 Z
13
+ date: 2021-04-23 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -52,14 +52,34 @@ dependencies:
52
52
  requirements:
53
53
  - - "~>"
54
54
  - !ruby/object:Gem::Version
55
- version: '1.7'
55
+ version: '2.1'
56
56
  type: :development
57
57
  prerelease: false
58
58
  version_requirements: !ruby/object:Gem::Requirement
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
- version: '1.7'
62
+ version: '2.1'
63
+ - !ruby/object:Gem::Dependency
64
+ name: activesupport
65
+ requirement: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '5.2'
70
+ - - "<"
71
+ - !ruby/object:Gem::Version
72
+ version: '6.0'
73
+ type: :runtime
74
+ prerelease: false
75
+ version_requirements: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '5.2'
80
+ - - "<"
81
+ - !ruby/object:Gem::Version
82
+ version: '6.0'
63
83
  - !ruby/object:Gem::Dependency
64
84
  name: rake
65
85
  requirement: !ruby/object:Gem::Requirement
@@ -140,9 +160,9 @@ executables: []
140
160
  extensions: []
141
161
  extra_rdoc_files: []
142
162
  files:
163
+ - ".github/workflows/test.yml"
143
164
  - ".gitignore"
144
165
  - ".rspec"
145
- - ".travis.yml"
146
166
  - CHANGELOG.md
147
167
  - Gemfile
148
168
  - LICENSE.txt
@@ -166,6 +186,12 @@ files:
166
186
  - lib/ood_core/job/adapters/ccq.rb
167
187
  - lib/ood_core/job/adapters/drmaa.rb
168
188
  - lib/ood_core/job/adapters/helper.rb
189
+ - lib/ood_core/job/adapters/kubernetes.rb
190
+ - lib/ood_core/job/adapters/kubernetes/batch.rb
191
+ - lib/ood_core/job/adapters/kubernetes/helper.rb
192
+ - lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb
193
+ - lib/ood_core/job/adapters/kubernetes/resources.rb
194
+ - lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb
169
195
  - lib/ood_core/job/adapters/linux_host.rb
170
196
  - lib/ood_core/job/adapters/linux_host/launcher.rb
171
197
  - lib/ood_core/job/adapters/linux_host/templates/email.erb.sh
@@ -216,7 +242,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
216
242
  - !ruby/object:Gem::Version
217
243
  version: '0'
218
244
  requirements: []
219
- rubygems_version: 3.0.3
245
+ rubygems_version: 3.1.2
220
246
  signing_key:
221
247
  specification_version: 4
222
248
  summary: Open OnDemand core library
data/.travis.yml DELETED
@@ -1,9 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- rvm:
4
- - 2.5.5
5
- before_install: gem install bundler -v '~> 1.17'
6
- notifications:
7
- email:
8
- on_success: never
9
- on_failure: always