ood_core 0.14.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +30 -0
- data/CHANGELOG.md +87 -1
- data/README.md +1 -1
- data/lib/ood_core/cluster.rb +20 -5
- data/lib/ood_core/job/adapters/ccq.rb +19 -12
- data/lib/ood_core/job/adapters/kubernetes.rb +1 -1
- data/lib/ood_core/job/adapters/kubernetes/batch.rb +82 -55
- data/lib/ood_core/job/adapters/kubernetes/helper.rb +79 -69
- data/lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb +9 -0
- data/lib/ood_core/job/adapters/kubernetes/resources.rb +38 -10
- data/lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb +140 -46
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +16 -9
- data/lib/ood_core/job/adapters/slurm.rb +1 -0
- data/lib/ood_core/job/adapters/torque.rb +2 -0
- data/lib/ood_core/job/script.rb +8 -1
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -1
- metadata +27 -6
- data/.travis.yml +0 -9
@@ -16,7 +16,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
16
16
|
# from
|
17
17
|
class Error < StandardError; end
|
18
18
|
|
19
|
-
UNIT_SEPARATOR = "
|
19
|
+
UNIT_SEPARATOR = ","
|
20
20
|
|
21
21
|
# @param debug Whether the adapter should be used in debug mode
|
22
22
|
# @param site_timeout [#to_i] A period after which the job should be killed or nil
|
@@ -80,12 +80,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
80
80
|
|
81
81
|
call(*cmd, stdin: kill_cmd)
|
82
82
|
rescue Error => e
|
83
|
-
|
84
|
-
# The tmux server not running is not an error
|
85
|
-
e.message.include?('failed to connect to server') ||
|
86
|
-
# The session not being found is not an error
|
87
|
-
e.message.include?("session not found: #{session_name_label}")
|
88
|
-
)
|
83
|
+
interpret_and_raise(e)
|
89
84
|
end
|
90
85
|
|
91
86
|
def list_remote_sessions(host: nil)
|
@@ -264,8 +259,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
264
259
|
|session_hash| session_hash[:session_name].start_with?(session_name_label)
|
265
260
|
}
|
266
261
|
rescue Error => e
|
267
|
-
|
268
|
-
raise e unless e.message.include?('failed to connect to server')
|
262
|
+
interpret_and_raise(e)
|
269
263
|
[]
|
270
264
|
end
|
271
265
|
|
@@ -280,4 +274,17 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
280
274
|
|
281
275
|
'/dev/null'
|
282
276
|
end
|
277
|
+
|
278
|
+
# under some conditions tmux returns status code 1 but it's not an actual
|
279
|
+
# error. These are when the session is not found or there are no sessions
|
280
|
+
# at all.
|
281
|
+
def interpret_and_raise(error)
|
282
|
+
if error.message.include?('failed to connect to server') # no sessions in tmux 1.8
|
283
|
+
nil
|
284
|
+
elsif error.message.include?('no server running on') # no sessions in tmux 2.7+ message
|
285
|
+
nil
|
286
|
+
else
|
287
|
+
raise error
|
288
|
+
end
|
289
|
+
end
|
283
290
|
end
|
@@ -423,6 +423,7 @@ module OodCore
|
|
423
423
|
args.concat ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
424
424
|
args.concat ['-a', script.job_array_request] unless script.job_array_request.nil?
|
425
425
|
args.concat ['--qos', script.qos] unless script.qos.nil?
|
426
|
+
args.concat ['--gpus-per-node', script.gpus_per_node] unless script.gpus_per_node.nil?
|
426
427
|
# ignore nodes, don't know how to do this for slurm
|
427
428
|
|
428
429
|
# Set dependencies
|
@@ -159,6 +159,8 @@ module OodCore
|
|
159
159
|
args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
|
160
160
|
args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
|
161
161
|
args.concat ['-l', "qos=#{script.qos}"] unless script.qos.nil?
|
162
|
+
args.concat ['-l', "gpus=#{script.gpus_per_node}"] unless script.gpus_per_node.nil?
|
163
|
+
|
162
164
|
# Set environment variables
|
163
165
|
env = script.job_environment.to_h
|
164
166
|
args.concat ["-v", env.keys.join(",")] unless env.empty?
|
data/lib/ood_core/job/script.rb
CHANGED
@@ -103,6 +103,10 @@ module OodCore
|
|
103
103
|
# @return [String, nil] qos
|
104
104
|
attr_reader :qos
|
105
105
|
|
106
|
+
# The GPUs per node for the job
|
107
|
+
# @return [Integer, nil] gpus per node
|
108
|
+
attr_reader :gpus_per_node
|
109
|
+
|
106
110
|
# Object detailing any native specifications that are implementation specific
|
107
111
|
# @note Should not be used at all costs.
|
108
112
|
# @return [Object, nil] native specifications
|
@@ -136,6 +140,7 @@ module OodCore
|
|
136
140
|
# @param accounting_id [#to_s, nil] accounting id
|
137
141
|
# @param job_array_request [#to_s, nil] job array request
|
138
142
|
# @param qos [#to_s, nil] qos
|
143
|
+
# @param gpus_per_node [#to_i, nil] gpus per node
|
139
144
|
# @param native [Object, nil] native specifications
|
140
145
|
# @param copy_environment [Boolean, nil] copy the environment
|
141
146
|
def initialize(content:, args: nil, submit_as_hold: nil, rerunnable: nil,
|
@@ -145,7 +150,7 @@ module OodCore
|
|
145
150
|
output_path: nil, error_path: nil, reservation_id: nil,
|
146
151
|
queue_name: nil, priority: nil, start_time: nil,
|
147
152
|
wall_time: nil, accounting_id: nil, job_array_request: nil,
|
148
|
-
qos: nil, native: nil, copy_environment: nil, **_)
|
153
|
+
qos: nil, gpus_per_node: nil, native: nil, copy_environment: nil, **_)
|
149
154
|
@content = content.to_s
|
150
155
|
|
151
156
|
@submit_as_hold = submit_as_hold
|
@@ -170,6 +175,7 @@ module OodCore
|
|
170
175
|
@accounting_id = accounting_id && accounting_id.to_s
|
171
176
|
@job_array_request = job_array_request && job_array_request.to_s
|
172
177
|
@qos = qos && qos.to_s
|
178
|
+
@gpus_per_node = gpus_per_node && gpus_per_node.to_i
|
173
179
|
@native = native
|
174
180
|
@copy_environment = (copy_environment.nil?) ? nil : !! copy_environment
|
175
181
|
end
|
@@ -200,6 +206,7 @@ module OodCore
|
|
200
206
|
accounting_id: accounting_id,
|
201
207
|
job_array_request: job_array_request,
|
202
208
|
qos: qos,
|
209
|
+
gpus_per_node: gpus_per_node,
|
203
210
|
native: native,
|
204
211
|
copy_environment: copy_environment
|
205
212
|
}
|
data/lib/ood_core/version.rb
CHANGED
data/ood_core.gemspec
CHANGED
@@ -24,7 +24,8 @@ Gem::Specification.new do |spec|
|
|
24
24
|
|
25
25
|
spec.add_runtime_dependency "ood_support", "~> 0.0.2"
|
26
26
|
spec.add_runtime_dependency "ffi", "~> 1.9", ">= 1.9.6"
|
27
|
-
spec.add_development_dependency "bundler", "~> 1
|
27
|
+
spec.add_development_dependency "bundler", "~> 2.1"
|
28
|
+
spec.add_runtime_dependency "activesupport", ">= 5.2", "< 6.0"
|
28
29
|
spec.add_development_dependency "rake", "~> 13.0.1"
|
29
30
|
spec.add_development_dependency "rspec", "~> 3.0"
|
30
31
|
spec.add_development_dependency "pry", "~> 0.10"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.17.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2021-05-26 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -52,14 +52,34 @@ dependencies:
|
|
52
52
|
requirements:
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version: '1
|
55
|
+
version: '2.1'
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
58
|
version_requirements: !ruby/object:Gem::Requirement
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '1
|
62
|
+
version: '2.1'
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: activesupport
|
65
|
+
requirement: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '5.2'
|
70
|
+
- - "<"
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '6.0'
|
73
|
+
type: :runtime
|
74
|
+
prerelease: false
|
75
|
+
version_requirements: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '5.2'
|
80
|
+
- - "<"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '6.0'
|
63
83
|
- !ruby/object:Gem::Dependency
|
64
84
|
name: rake
|
65
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -140,9 +160,9 @@ executables: []
|
|
140
160
|
extensions: []
|
141
161
|
extra_rdoc_files: []
|
142
162
|
files:
|
163
|
+
- ".github/workflows/test.yml"
|
143
164
|
- ".gitignore"
|
144
165
|
- ".rspec"
|
145
|
-
- ".travis.yml"
|
146
166
|
- CHANGELOG.md
|
147
167
|
- Gemfile
|
148
168
|
- LICENSE.txt
|
@@ -169,6 +189,7 @@ files:
|
|
169
189
|
- lib/ood_core/job/adapters/kubernetes.rb
|
170
190
|
- lib/ood_core/job/adapters/kubernetes/batch.rb
|
171
191
|
- lib/ood_core/job/adapters/kubernetes/helper.rb
|
192
|
+
- lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb
|
172
193
|
- lib/ood_core/job/adapters/kubernetes/resources.rb
|
173
194
|
- lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb
|
174
195
|
- lib/ood_core/job/adapters/linux_host.rb
|
@@ -221,7 +242,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
221
242
|
- !ruby/object:Gem::Version
|
222
243
|
version: '0'
|
223
244
|
requirements: []
|
224
|
-
rubygems_version: 3.
|
245
|
+
rubygems_version: 3.1.2
|
225
246
|
signing_key:
|
226
247
|
specification_version: 4
|
227
248
|
summary: Open OnDemand core library
|