ood_core 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -1
- data/lib/ood_core/batch_connect/templates/vnc.rb +6 -1
- data/lib/ood_core/job/adapters/fujitsu_tcs.rb +403 -0
- data/lib/ood_core/job/adapters/slurm.rb +4 -4
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -2
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6d1d489149a451b24284191ba966ef7c5d85f859c939b050d50b6501fd49a4cb
|
4
|
+
data.tar.gz: 3d438089095a42b66f4edee0d3a6afe683e1d87ebb865d908120b977733c6169
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4106f8af4babd7ae5cf59e133d42e5d1ecda3c1436727740f91f8b3e8a21112254ec8d35f724e2614cc18b65043944ada99adf6dfdeb0959f618d5c90e8178c0
|
7
|
+
data.tar.gz: a3caaaf21cc6ee4bd68fea96817f58b61592c93993813f69333e8acdce95df8b0d8b49b169000a50ddcb7bcd6031d151272882e5dc3cd02e09a0595ef1d31116
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [0.21.0] - 08-01-2022
|
11
|
+
|
12
|
+
### Added
|
13
|
+
|
14
|
+
- Added the `fujitsu_tcs` adapter in [766](https://github.com/OSC/ood_core/pull/766).
|
15
|
+
|
16
|
+
## [0.20.2] - 07-28-2022
|
17
|
+
|
18
|
+
- Fixed an issue with Slurm's `cluster_info` in [762](https://github.com/OSC/ood_core/pull/762).
|
19
|
+
- Relaxed Ruby requirement down to 2.5 in [771](https://github.com/OSC/ood_core/pull/771).
|
20
|
+
|
21
|
+
## [0.20.1] - 07-21-2022
|
22
|
+
|
23
|
+
- Fixed turbovnc compatability issue with the -nohttpd flag in [767](https://github.com/OSC/ood_core/pull/767).
|
24
|
+
|
10
25
|
## [0.20.0] - 06-03-2022
|
11
26
|
|
12
27
|
- Adapters can now respond to `cluster_info` in [752](https://github.com/OSC/ood_core/pull/752). This returns information about the cluster like how many nodes are available and so on. Only Slurm support in this release.
|
@@ -428,7 +443,11 @@ Functionally the same as [0.17.3] but with some CI updates.
|
|
428
443
|
### Added
|
429
444
|
- Initial release!
|
430
445
|
|
431
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
446
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.21.0...HEAD
|
447
|
+
[0.21.0]: https://github.com/OSC/ood_core/compare/v0.20.2...v0.21.0
|
448
|
+
[0.20.2]: https://github.com/OSC/ood_core/compare/v0.20.1...v0.20.2
|
449
|
+
[0.20.1]: https://github.com/OSC/ood_core/compare/v0.20.0...v0.20.1
|
450
|
+
[0.20.0]: https://github.com/OSC/ood_core/compare/v0.19.0...v0.20.0
|
432
451
|
[0.19.0]: https://github.com/OSC/ood_core/compare/v0.18.1...v0.19.0
|
433
452
|
[0.18.1]: https://github.com/OSC/ood_core/compare/v0.18.0...v0.18.1
|
434
453
|
[0.18.0]: https://github.com/OSC/ood_core/compare/v0.17.8...v0.18.0
|
@@ -86,8 +86,13 @@ module OodCore
|
|
86
86
|
# Clean up any old VNC sessions that weren't cleaned before
|
87
87
|
#{vnc_clean}
|
88
88
|
|
89
|
+
# for turbovnc 3.0 compatability.
|
90
|
+
if timeout 2 vncserver --help 2>&1 | grep 'nohttpd' >/dev/null 2>&1; then
|
91
|
+
HTTPD_OPT='-nohttpd'
|
92
|
+
fi
|
93
|
+
|
89
94
|
# Attempt to start VNC server
|
90
|
-
VNC_OUT=$(vncserver -log "#{vnc_log}" -rfbauth "#{vnc_passwd}"
|
95
|
+
VNC_OUT=$(vncserver -log "#{vnc_log}" -rfbauth "#{vnc_passwd}" $HTTPD_OPT -noxstartup #{vnc_args} 2>&1)
|
91
96
|
VNC_PID=$(pgrep -s 0 Xvnc) # the script above will daemonize the Xvnc process
|
92
97
|
echo "${VNC_OUT}"
|
93
98
|
|
@@ -0,0 +1,403 @@
|
|
1
|
+
require "time"
|
2
|
+
require "ood_core/refinements/hash_extensions"
|
3
|
+
require "ood_core/refinements/array_extensions"
|
4
|
+
require "ood_core/job/adapters/helper"
|
5
|
+
|
6
|
+
module OodCore
|
7
|
+
module Job
|
8
|
+
class Factory
|
9
|
+
using Refinements::HashExtensions
|
10
|
+
|
11
|
+
# Build the Fujitsu TCS (Technical Computing Suite) adapter from a configuration
|
12
|
+
# @param config [#to_h] the configuration for job adapter
|
13
|
+
# @option config [Object] :bin (nil) Path to Fujitsu TCS resource manager binaries
|
14
|
+
# @option config [#to_h] :bin_overrides ({}) Optional overrides to Fujitsu TCS resource manager executables
|
15
|
+
def self.build_fujitsu_tcs(config)
|
16
|
+
c = config.to_h.symbolize_keys
|
17
|
+
bin = c.fetch(:bin, nil)
|
18
|
+
bin_overrides = c.fetch(:bin_overrides, {})
|
19
|
+
fujitsu_tcs = Adapters::Fujitsu_TCS::Batch.new(bin: bin, bin_overrides: bin_overrides)
|
20
|
+
Adapters::Fujitsu_TCS.new(fujitsu_tcs: fujitsu_tcs)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
module Adapters
|
25
|
+
# An adapter object that describes the communication with a Fujitsu TCS
|
26
|
+
# resource manager for job management.
|
27
|
+
class Fujitsu_TCS < Adapter
|
28
|
+
using Refinements::HashExtensions
|
29
|
+
using Refinements::ArrayExtensions
|
30
|
+
|
31
|
+
# Object used for simplified communication with a Fujitsu TCS batch server
|
32
|
+
# @api private
|
33
|
+
class Batch
|
34
|
+
# The path to the Fujitsu TCS binaries
|
35
|
+
# @example
|
36
|
+
# my_batch.bin.to_s #=> "/usr/local/fujitsu_tcs/10.0.0/bin"
|
37
|
+
# @return [Pathname] path to Fujitsu TCS binaries
|
38
|
+
attr_reader :bin
|
39
|
+
|
40
|
+
# Optional overrides for Fujitsu TCS executables
|
41
|
+
# @example
|
42
|
+
# {'pjsub' => '/usr/local/bin/pjsub'}
|
43
|
+
# @return Hash<String, String>
|
44
|
+
attr_reader :bin_overrides
|
45
|
+
|
46
|
+
# The root exception class that all Fujitsu TCS specific exceptions inherit
|
47
|
+
# from
|
48
|
+
class Error < StandardError; end
|
49
|
+
|
50
|
+
# An error indicating the Fujitsu TCS command timed out
|
51
|
+
class Fujitsu_TCS_TimeoutError < Error; end
|
52
|
+
|
53
|
+
# @param bin [#to_s] path to Fujitsu TCS installation binaries
|
54
|
+
# @param bin_overrides [#to_h] a hash of bin ovverides to be used in job
|
55
|
+
def initialize(bin: nil, bin_overrides: {})
|
56
|
+
@bin = Pathname.new(bin.to_s)
|
57
|
+
@bin_overrides = bin_overrides
|
58
|
+
end
|
59
|
+
|
60
|
+
# Get a list of hashes detailing each of the jobs on the batch server
|
61
|
+
# @example Status info for all jobs
|
62
|
+
# my_batch.get_jobs
|
63
|
+
# #=>
|
64
|
+
# #[
|
65
|
+
# # {
|
66
|
+
# # :JOB_ID => "123",
|
67
|
+
# # :JOB_NAME => "my_job",
|
68
|
+
# # ...
|
69
|
+
# # },
|
70
|
+
# # {
|
71
|
+
# # :JOB_ID => "125",
|
72
|
+
# # :JOB_NAME => "my_other_job",
|
73
|
+
# # ...
|
74
|
+
# # },
|
75
|
+
# # ...
|
76
|
+
# #]
|
77
|
+
# @param id [#to_s] the id of the job
|
78
|
+
# @param owner [String] the owner(s) of the job
|
79
|
+
# @raise [Error] if `pjstat` command exited unsuccessfully
|
80
|
+
# @return [Array<Hash>] list of details for jobs
|
81
|
+
def get_jobs(id: "", owner: nil)
|
82
|
+
args = ["-s", "--data", "--choose=jid,jnam,rscg,st,std,stde,adt,sdt,nnumr,usr,elpl,elp"]
|
83
|
+
args.concat ["--filter jid=" + id.to_s] unless id.to_s.empty?
|
84
|
+
args.concat ["--filter usr=" + owner.to_s] unless owner.to_s.empty?
|
85
|
+
|
86
|
+
StringIO.open(call("pjstat", *args)) do |output|
|
87
|
+
output.gets() # Skip header
|
88
|
+
jobs = []
|
89
|
+
output.each_line do |line|
|
90
|
+
l = line.split(",")
|
91
|
+
jobs << {:JOB_ID => l[1], :JOB_NAME => l[2], :RSC_GRP => l[3].split(" ")[0],
|
92
|
+
:ST => l[4], :STD => l[5], :STDE => l[6],
|
93
|
+
:ACCEPT => l[7], :START_DATE => l[8], :NODES => l[9].split(":")[0],
|
94
|
+
:USER => l[10], :ELAPSE_LIM => l[11], :ELAPSE_TIM => l[12].split(" ")[0] }
|
95
|
+
end
|
96
|
+
jobs
|
97
|
+
end
|
98
|
+
rescue Fujitsu_TCS_TimeoutError
|
99
|
+
return [{ JOB_ID: id, ST: 'undetermined' }]
|
100
|
+
end
|
101
|
+
|
102
|
+
# Put a specified job on hold
|
103
|
+
# @example Put job "1234" on hold
|
104
|
+
# my_batch.hold_job("1234")
|
105
|
+
# @param id [#to_s] the id of the job
|
106
|
+
# @raise [Error] if `pjhold` command exited unsuccessfully
|
107
|
+
# @return [void]
|
108
|
+
def hold_job(id)
|
109
|
+
call("pjhold", id.to_s)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Release a specified job that is on hold
|
113
|
+
# @example Release job "1234" from on hold
|
114
|
+
# my_batch.release_job("1234")
|
115
|
+
# @param id [#to_s] the id of the job
|
116
|
+
# @raise [Error] if `pjrls` command exited unsuccessfully
|
117
|
+
# @return [void]
|
118
|
+
def release_job(id)
|
119
|
+
call("pjrls", id.to_s)
|
120
|
+
end
|
121
|
+
|
122
|
+
# Delete a specified job from batch server
|
123
|
+
# @example Delete job "1234"
|
124
|
+
# my_batch.delete_job("1234")
|
125
|
+
# @param id [#to_s] the id of the job
|
126
|
+
# @raise [Error] if `pjdel` command exited unsuccessfully
|
127
|
+
# @return [void]
|
128
|
+
def delete_job(id)
|
129
|
+
call("pjdel", id.to_s)
|
130
|
+
end
|
131
|
+
|
132
|
+
# Submit a script expanded as a string to the batch server
|
133
|
+
# @param str [#to_s] script as a string
|
134
|
+
# @param args [Array<#to_s>] arguments passed to `pjsub` command
|
135
|
+
# @raise [Error] if `pjsub` command exited unsuccessfully
|
136
|
+
# @return [String] the id of the job that was created
|
137
|
+
def submit_string(str, args: [])
|
138
|
+
args = args.map(&:to_s)
|
139
|
+
call("pjsub", *args, stdin: str.to_s).split(" ")[5]
|
140
|
+
end
|
141
|
+
|
142
|
+
private
|
143
|
+
# Call a forked Fujitsu TCS command
|
144
|
+
def call(cmd, *args, stdin: "")
|
145
|
+
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
|
146
|
+
args = args.map(&:to_s)
|
147
|
+
o, e, s = Open3.capture3(cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
|
148
|
+
s.success? ? o : raise(Error, e)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# Mapping of state codes for Fujitsu TCS resource manager
|
153
|
+
STATE_MAP = {
|
154
|
+
'ACC' => :queued, # Accepted job submission
|
155
|
+
'RJT' => :completed, # Rejected job submission
|
156
|
+
'QUE' => :queued, # Waiting for job execution
|
157
|
+
'RNA' => :queued, # Acquiring resources required for job execution
|
158
|
+
'RNP' => :running, # Executing prologue
|
159
|
+
'RUN' => :running, # Executing job
|
160
|
+
'RNE' => :running, # Executing epilogue
|
161
|
+
'RNO' => :running, # Waiting for completion of job termination processing
|
162
|
+
'SPP' => :suspended, # Suspend in progress
|
163
|
+
'SPD' => :suspended, # Suspended
|
164
|
+
'RSM' => :running, # Resume in progress
|
165
|
+
'EXT' => :completed, # Exited job end execution
|
166
|
+
'CCL' => :completed, # Exited job execution by interruption
|
167
|
+
'HLD' => :suspended, # In fixed state due to users
|
168
|
+
'ERR' => :completed, # In fixed state due to an error
|
169
|
+
}
|
170
|
+
|
171
|
+
# @api private
|
172
|
+
# @param opts [#to_h] the options defining this adapter
|
173
|
+
# @option opts [Batch] :the Fujitsu TCS batch object
|
174
|
+
# @see Factory.build_fujitsu_tcs
|
175
|
+
def initialize(opts = {})
|
176
|
+
o = opts.to_h.symbolize_keys
|
177
|
+
|
178
|
+
@fujitsu_tcs = o.fetch(:fujitsu_tcs) { raise ArgumentError, "No Fujitsu TCS object specified. Missing argument: fujitsu_tcs" }
|
179
|
+
end
|
180
|
+
|
181
|
+
# Submit a job with the attributes defined in the job template instance
|
182
|
+
# @param script [Script] script object that describes the script and
|
183
|
+
# attributes for the submitted job
|
184
|
+
# @param after [#to_s, Array<#to_s>] this job may be scheduled for
|
185
|
+
# execution at any point after dependent jobs have started execution
|
186
|
+
# @param afterok [#to_s, Array<#to_s>] this job may be scheduled for
|
187
|
+
# execution only after dependent jobs have terminated with no errors
|
188
|
+
# @param afternotok [#to_s, Array<#to_s>] this job may be scheduled for
|
189
|
+
# execution only after dependent jobs have terminated with errors
|
190
|
+
# @param afterany [#to_s, Array<#to_s>] this job may be scheduled for
|
191
|
+
# execution after dependent jobs have terminated
|
192
|
+
# @raise [JobAdapterError] if something goes wrong submitting a job
|
193
|
+
# @return [String] the job id returned after successfully submitting a
|
194
|
+
# job
|
195
|
+
# @see Adapter#submit
|
196
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
197
|
+
#after = Array(after).map(&:to_s)
|
198
|
+
#afterok = Array(afterok).map(&:to_s)
|
199
|
+
#afternotok = Array(afternotok).map(&:to_s)
|
200
|
+
#afterany = Array(afterany).map(&:to_s)
|
201
|
+
if !after.empty? || !afterok.empty? || !afternotok.empty? || !afterany.empty?
|
202
|
+
raise JobAdapterError, "Dependency between jobs has not implemented yet."
|
203
|
+
end
|
204
|
+
|
205
|
+
# Set pjsub options
|
206
|
+
args = []
|
207
|
+
args.concat (script.rerunnable ? ["--restart"] : ["--norestart"]) unless script.rerunnable.nil?
|
208
|
+
args.concat ["--mail-list", script.email.join(",")] unless script.email.nil?
|
209
|
+
if script.email_on_started && script.email_on_terminated
|
210
|
+
args.concat ["-m", "b,e"]
|
211
|
+
elsif script.email_on_started
|
212
|
+
args.concat ["-m", "b"]
|
213
|
+
elsif script.email_on_terminated
|
214
|
+
args.concat ["-m", "e"]
|
215
|
+
end
|
216
|
+
|
217
|
+
args.concat ["-N", script.job_name] unless script.job_name.nil?
|
218
|
+
args.concat ["-o", script.output_path] unless script.output_path.nil?
|
219
|
+
if script.error_path.nil?
|
220
|
+
args.concat ["-j"]
|
221
|
+
else
|
222
|
+
args.concat ["-e", script.error_path]
|
223
|
+
end
|
224
|
+
args.concat ["--rscgrp", script.queue_name] unless script.queue_name.nil?
|
225
|
+
args.concat ["-p", script.priority] unless script.priority.nil?
|
226
|
+
args.concat ["--at", script.start_time.localtime.strftime("%C%y-%m-%dT%H:%M:%S")] unless script.start_time.nil?
|
227
|
+
args.concat ["-L \"elapse=" + seconds_to_duration(script.wall_time) + "\""] unless script.wall_time.nil?
|
228
|
+
args.concat ["--bulk", "--sparam", script.job_array_request] unless script.job_array_request.nil?
|
229
|
+
|
230
|
+
# Set environment variables
|
231
|
+
envvars = script.job_environment.to_h
|
232
|
+
args.concat ["-x", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
|
233
|
+
args.concat ["-X"] if script.copy_environment?
|
234
|
+
|
235
|
+
# Set native options
|
236
|
+
args.concat script.native if script.native
|
237
|
+
|
238
|
+
# Set content
|
239
|
+
content = if script.shell_path.nil?
|
240
|
+
script.content
|
241
|
+
else
|
242
|
+
"#!#{script.shell_path}\n#{script.content}"
|
243
|
+
end
|
244
|
+
|
245
|
+
# Submit job
|
246
|
+
@fujitsu_tcs.submit_string(content, args: args)
|
247
|
+
rescue Batch::Error => e
|
248
|
+
raise JobAdapterError, e.message
|
249
|
+
end
|
250
|
+
|
251
|
+
# Retrieve info for all jobs from the resource manager
|
252
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
253
|
+
# @return [Array<Info>] information describing submitted jobs
|
254
|
+
# @see Adapter#info_all
|
255
|
+
def info_all(attrs: nil)
|
256
|
+
@fujitsu_tcs.get_jobs().map do |v|
|
257
|
+
parse_job_info(v)
|
258
|
+
end
|
259
|
+
rescue Batch::Error => e
|
260
|
+
raise JobAdapterError, e.message
|
261
|
+
end
|
262
|
+
|
263
|
+
# Retrieve job info from the resource manager
|
264
|
+
# @param id [#to_s] the id of the job
|
265
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
266
|
+
# @return [Info] information describing submitted job
|
267
|
+
# @see Adapter#info
|
268
|
+
def info(id)
|
269
|
+
id = id.to_s
|
270
|
+
info_ary = @fujitsu_tcs.get_jobs(id: id).map do |v|
|
271
|
+
parse_job_info(v)
|
272
|
+
end
|
273
|
+
|
274
|
+
# If no job was found we assume that it has completed
|
275
|
+
info_ary.empty? ? Info.new(id: id, status: :completed) : info_ary.first # @fujitsu_tcs.get_jobs() must return only one element.
|
276
|
+
rescue Batch::Error => e
|
277
|
+
# set completed status if can't find job id
|
278
|
+
if /\[ERR\.\] PJM .+ Job .+ does not exist/ =~ e.message
|
279
|
+
Info.new(
|
280
|
+
id: id,
|
281
|
+
status: :completed
|
282
|
+
)
|
283
|
+
else
|
284
|
+
raise JobAdapterError, e.message
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
# Retrieve info for all jobs for a given owner or owners from the
|
289
|
+
# resource manager
|
290
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
291
|
+
# @raise [JobAdapterError] if something goes wrong getting job info
|
292
|
+
# @return [Array<Info>] information describing submitted jobs
|
293
|
+
def info_where_owner(owner, attrs: nil)
|
294
|
+
owner = Array.wrap(owner).map(&:to_s).join('+')
|
295
|
+
@fujitsu_tcs.get_jobs(owner: owner).map do |v|
|
296
|
+
parse_job_info(v)
|
297
|
+
end
|
298
|
+
rescue Batch::Error => e
|
299
|
+
raise JobAdapterError, e.message
|
300
|
+
end
|
301
|
+
|
302
|
+
# Retrieve job status from resource manager
|
303
|
+
# @param id [#to_s] the id of the job
|
304
|
+
# @raise [JobAdapterError] if something goes wrong getting job status
|
305
|
+
# @return [Status] status of job
|
306
|
+
# @see Adapter#status
|
307
|
+
def status(id)
|
308
|
+
id = id.to_s
|
309
|
+
jobs = @fujitsu_tcs.get_jobs(id: id)
|
310
|
+
|
311
|
+
if job = jobs.detect { |j| j[:JOB_ID] == id }
|
312
|
+
Status.new(state: get_state(job[:ST]))
|
313
|
+
else
|
314
|
+
# set completed status if can't find job id
|
315
|
+
Status.new(state: :completed)
|
316
|
+
end
|
317
|
+
rescue Batch::Error => e
|
318
|
+
# set completed status if can't find job id
|
319
|
+
if /\[ERR\.\] PJM .+ Job .+ does not exist/ =~ e.message
|
320
|
+
Status.new(state: :completed)
|
321
|
+
else
|
322
|
+
raise JobAdapterError, e.message
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
# Put the submitted job on hold
|
327
|
+
# @param id [#to_s] the id of the job
|
328
|
+
# @raise [JobAdapterError] if something goes wrong holding a job
|
329
|
+
# @return [void]
|
330
|
+
# @see Adapter#hold
|
331
|
+
def hold(id)
|
332
|
+
@fujitsu_tcs.hold_job(id.to_s)
|
333
|
+
rescue Batch::Error => e
|
334
|
+
# assume successful job hold if can't find job id
|
335
|
+
raise JobAdapterError, e.message unless /\[ERR\.\] PJM .+ Job .+ does not exist/ =~ e.message
|
336
|
+
end
|
337
|
+
|
338
|
+
# Release the job that is on hold
|
339
|
+
# @param id [#to_s] the id of the job
|
340
|
+
# @raise [JobAdapterError] if something goes wrong releasing a job
|
341
|
+
# @return [void]
|
342
|
+
# @see Adapter#release
|
343
|
+
def release(id)
|
344
|
+
@fujitsu_tcs.release_job(id.to_s)
|
345
|
+
rescue Batch::Error => e
|
346
|
+
# assume successful job release if can't find job id
|
347
|
+
raise JobAdapterError, e.message unless /\[ERR\.\] PJM .+ Job .+ does not exist/ =~ e.message
|
348
|
+
end
|
349
|
+
|
350
|
+
# Delete the submitted job
|
351
|
+
# @param id [#to_s] the id of the job
|
352
|
+
# @raise [JobAdapterError] if something goes wrong deleting a job
|
353
|
+
# @return [void]
|
354
|
+
# @see Adapter#delete
|
355
|
+
def delete(id)
|
356
|
+
@fujitsu_tcs.delete_job(id.to_s)
|
357
|
+
rescue Batch::Error => e
|
358
|
+
# assume successful job deletion if can't find job id
|
359
|
+
raise JobAdapterError, e.message unless /\[ERR\.\] PJM .+ Job .+ does not exist/ =~ e.message
|
360
|
+
end
|
361
|
+
|
362
|
+
def directive_prefix
|
363
|
+
'#PJM'
|
364
|
+
end
|
365
|
+
|
366
|
+
private
|
367
|
+
# Convert duration to seconds
|
368
|
+
def duration_in_seconds(time)
|
369
|
+
return 0 if time.nil?
|
370
|
+
time, days = time.split("-").reverse
|
371
|
+
days.to_i * 24 * 3600 +
|
372
|
+
time.split(':').map { |v| v.to_i }.inject(0) { |total, v| total * 60 + v }
|
373
|
+
end
|
374
|
+
|
375
|
+
# Convert seconds to duration
|
376
|
+
def seconds_to_duration(time)
|
377
|
+
"%02d:%02d:%02d" % [time/3600, time/60%60, time%60]
|
378
|
+
end
|
379
|
+
|
380
|
+
# Determine state from Fujitsu TCS state code
|
381
|
+
def get_state(st)
|
382
|
+
STATE_MAP.fetch(st, :undetermined)
|
383
|
+
end
|
384
|
+
|
385
|
+
# Parse hash describing Fujitsu TCS job status
|
386
|
+
def parse_job_info(v)
|
387
|
+
Info.new(
|
388
|
+
id: v[:JOB_ID],
|
389
|
+
job_name: v[:JOB_NAME],
|
390
|
+
status: get_state(v[:ST]),
|
391
|
+
job_owner: v[:USER],
|
392
|
+
dispatch_time: v[:START_DATE],
|
393
|
+
wallclock_time: duration_in_seconds(v[:ELAPSE_TIM]),
|
394
|
+
wallclock_limit: duration_in_seconds(v[:ELAPSE_LIM]),
|
395
|
+
submission_time: v[:ACCEPT],
|
396
|
+
queue_name: v[:RSC_GRP],
|
397
|
+
native: v
|
398
|
+
)
|
399
|
+
end
|
400
|
+
end
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
@@ -39,7 +39,7 @@ module OodCore
|
|
39
39
|
# Get integer representing the number of gpus used by a node or job,
|
40
40
|
# calculated from gres string
|
41
41
|
# @return [Integer] the number of gpus in gres
|
42
|
-
def gpus_from_gres(gres)
|
42
|
+
def self.gpus_from_gres(gres)
|
43
43
|
gres.to_s.scan(/gpu:[^,]*(\d+)/).flatten.map(&:to_i).sum
|
44
44
|
end
|
45
45
|
|
@@ -116,8 +116,8 @@ module OodCore
|
|
116
116
|
total_nodes: node_cpu_info[2].to_i,
|
117
117
|
active_processors: node_cpu_info[3].to_i,
|
118
118
|
total_processors: node_cpu_info[6].to_i,
|
119
|
-
active_gpus: gres_lines.sum { |line| gpus_from_gres(line[2]) },
|
120
|
-
total_gpus: gres_lines.sum { |line| gpus_from_gres(line[1]) }
|
119
|
+
active_gpus: gres_lines.sum { |line| Slurm.gpus_from_gres(line[2]) },
|
120
|
+
total_gpus: gres_lines.sum { |line| Slurm.gpus_from_gres(line[1]) }
|
121
121
|
)
|
122
122
|
end
|
123
123
|
|
@@ -673,7 +673,7 @@ module OodCore
|
|
673
673
|
submission_time: v[:submit_time] ? Time.parse(v[:submit_time]) : nil,
|
674
674
|
dispatch_time: (v[:start_time].nil? || v[:start_time] == "N/A") ? nil : Time.parse(v[:start_time]),
|
675
675
|
native: v,
|
676
|
-
gpus: gpus_from_gres(v[:gres])
|
676
|
+
gpus: self.class.gpus_from_gres(v[:gres])
|
677
677
|
)
|
678
678
|
end
|
679
679
|
|
data/lib/ood_core/version.rb
CHANGED
data/ood_core.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.bindir = "exe"
|
21
21
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
22
|
spec.require_paths = ["lib"]
|
23
|
-
spec.required_ruby_version = ">= 2.
|
23
|
+
spec.required_ruby_version = ">= 2.5.0"
|
24
24
|
|
25
25
|
spec.add_runtime_dependency "ood_support", "~> 0.0.2"
|
26
26
|
spec.add_runtime_dependency "ffi", "~> 1.9", ">= 1.9.6"
|
@@ -30,5 +30,5 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.add_development_dependency "rspec", "~> 3.0"
|
31
31
|
spec.add_development_dependency "pry", "~> 0.10"
|
32
32
|
spec.add_development_dependency "timecop", "~> 0.8"
|
33
|
-
spec.add_development_dependency "climate_control", "~> 1.
|
33
|
+
spec.add_development_dependency "climate_control", "~> 1.1.1"
|
34
34
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.21.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2022-
|
13
|
+
date: 2022-08-01 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -136,14 +136,14 @@ dependencies:
|
|
136
136
|
requirements:
|
137
137
|
- - "~>"
|
138
138
|
- !ruby/object:Gem::Version
|
139
|
-
version: 1.
|
139
|
+
version: 1.1.1
|
140
140
|
type: :development
|
141
141
|
prerelease: false
|
142
142
|
version_requirements: !ruby/object:Gem::Requirement
|
143
143
|
requirements:
|
144
144
|
- - "~>"
|
145
145
|
- !ruby/object:Gem::Version
|
146
|
-
version: 1.
|
146
|
+
version: 1.1.1
|
147
147
|
description: Open OnDemand core library that provides support for an HPC Center to
|
148
148
|
globally define HPC services that web applications can then take advantage of.
|
149
149
|
email:
|
@@ -179,6 +179,7 @@ files:
|
|
179
179
|
- lib/ood_core/job/adapter.rb
|
180
180
|
- lib/ood_core/job/adapters/ccq.rb
|
181
181
|
- lib/ood_core/job/adapters/drmaa.rb
|
182
|
+
- lib/ood_core/job/adapters/fujitsu_tcs.rb
|
182
183
|
- lib/ood_core/job/adapters/helper.rb
|
183
184
|
- lib/ood_core/job/adapters/kubernetes.rb
|
184
185
|
- lib/ood_core/job/adapters/kubernetes/batch.rb
|
@@ -234,7 +235,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
234
235
|
requirements:
|
235
236
|
- - ">="
|
236
237
|
- !ruby/object:Gem::Version
|
237
|
-
version: 2.
|
238
|
+
version: 2.5.0
|
238
239
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
239
240
|
requirements:
|
240
241
|
- - ">="
|