ood_core 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/lib/ood_core/job/adapters/ccq.rb +267 -0
- data/lib/ood_core/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3296708d7bc47f3379a9e4a6c845d3f25c5ccefb599f4b92406d9dffdaef220b
|
4
|
+
data.tar.gz: b6af9e90b67bc9a7a52203808d849d8800336b30b09bdb8ed204526d01bc92e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 623ac6e6f8081d68a3e925d1150c9f20a0f613ccfb6837519d1b95d04533a72caa403c54327aad85dcea9c0694cc23941f40307d942623c095f53fed7fc32026
|
7
|
+
data.tar.gz: 0d785a9ade36b2f6f62f9ae55672091346aa4fb76bf358e6c00d4bc007623b8d1798813474665fc7b4d850d89e041fae5c2fefc9719fbe9f53a161a76127eaad
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
|
+
## [0.13.0] - 2020-08-10
|
10
|
+
### Added
|
11
|
+
- CloudyCluster CCQ Adapter
|
12
|
+
|
9
13
|
## [0.12.0] - 2020-08-05
|
10
14
|
### Added
|
11
15
|
- qos option to Slurm and Torque [#205](https://github.com/OSC/ood_core/pull/205)
|
@@ -243,7 +247,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
243
247
|
### Added
|
244
248
|
- Initial release!
|
245
249
|
|
246
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
250
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.13.0...HEAD
|
251
|
+
[0.13.0]: https://github.com/OSC/ood_core/compare/v0.12.0...v0.13.0
|
247
252
|
[0.12.0]: https://github.com/OSC/ood_core/compare/v0.11.4...v0.12.0
|
248
253
|
[0.11.4]: https://github.com/OSC/ood_core/compare/v0.11.3...v0.11.4
|
249
254
|
[0.11.3]: https://github.com/OSC/ood_core/compare/v0.11.2...v0.11.3
|
@@ -0,0 +1,267 @@
|
|
1
|
+
require "ood_core/job/adapters/helper"
|
2
|
+
require "tempfile"
|
3
|
+
|
4
|
+
module OodCore
|
5
|
+
module Job
|
6
|
+
class Factory
|
7
|
+
using Refinements::HashExtensions
|
8
|
+
|
9
|
+
# Build the Cloudy Cluster adapter from a configuration
|
10
|
+
# @param config [#to_h] the configuration for job adapter
|
11
|
+
# @option config [Object] :image (nil) The default VM image to use
|
12
|
+
# @option config [Object] :cloud (gcp) The cloud provider being used [gcp,aws]
|
13
|
+
# @option config [Object] :scheduler (nil) The name of the scheduler to use
|
14
|
+
# @option config [Object] :sge_root (nil) Path to SGE root, note that
|
15
|
+
# @option config [#to_h] :bin (nil) Path to CC client binaries
|
16
|
+
# @option config [#to_h] :bin_overrides ({}) Optional overrides to CC client executables
|
17
|
+
def self.build_ccq(config)
|
18
|
+
Adapters::CCQ.new(config.to_h.symbolize_keys)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
module Adapters
|
23
|
+
|
24
|
+
class PromptError < StandardError; end
|
25
|
+
|
26
|
+
class CCQ < Adapter
|
27
|
+
using Refinements::ArrayExtensions
|
28
|
+
|
29
|
+
attr_reader :image, :cloud, :scheduler, :bin, :bin_overrides, :jobid_regex
|
30
|
+
|
31
|
+
def initialize(config)
|
32
|
+
@image = config.fetch(:image, nil)
|
33
|
+
@cloud = config.fetch(:cloud, gcp_provider)
|
34
|
+
@scheduler = config.fetch(:scheduler, nil)
|
35
|
+
@bin = config.fetch(:bin, '/opt/CloudyCluster/srv/CCQ')
|
36
|
+
@bin_overrides = config.fetch(:bin_overrides, {})
|
37
|
+
@jobid_regex = config.fetch(:jobid_regex, "job id is: (?<job_id>\\d+) you")
|
38
|
+
end
|
39
|
+
|
40
|
+
# Submit a job with the attributes defined in the job template instance
|
41
|
+
# @param script [Script] script object that describes the script and
|
42
|
+
# attributes for the submitted job
|
43
|
+
# @param after [#to_s, Array<#to_s>] not used
|
44
|
+
# @param afterok [#to_s, Array<#to_s>] not used
|
45
|
+
# @param afternotok [#to_s, Array<#to_s>] not used
|
46
|
+
# @param afterany [#to_s, Array<#to_s>] not used
|
47
|
+
# @return [String] the job id returned after successfully submitting a
|
48
|
+
# job
|
49
|
+
# @see Adapter#submit
|
50
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
51
|
+
script_file = make_script_file(script.content)
|
52
|
+
args = []
|
53
|
+
|
54
|
+
# cluster configuration args
|
55
|
+
args.concat ["-s", scheduler] unless scheduler.nil?
|
56
|
+
args.concat [image_arg, image] unless image.nil?
|
57
|
+
|
58
|
+
args.concat ["-o", script.output_path.to_s] unless script.output_path.nil?
|
59
|
+
args.concat ["-e", script.error_path.to_s] unless script.error_path.nil?
|
60
|
+
args.concat ["-tl", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
61
|
+
args.concat ["-js", script_file.path.to_s]
|
62
|
+
|
63
|
+
args.concat script.native if script.native
|
64
|
+
|
65
|
+
output = call("ccqsub", args: args)
|
66
|
+
parse_job_id_from_ccqsub(output)
|
67
|
+
ensure
|
68
|
+
script_file.close
|
69
|
+
end
|
70
|
+
|
71
|
+
# Retrieve info for all jobs from the resource manager
|
72
|
+
# @return [Array<Info>] information describing submitted jobs
|
73
|
+
def info_all(attrs: nil)
|
74
|
+
args = []
|
75
|
+
args.concat ["-s", scheduler] unless scheduler.nil?
|
76
|
+
|
77
|
+
stat_output = call("ccqstat", args: args)
|
78
|
+
info_from_ccqstat(stat_output)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Retrieve job info from the resource manager
|
82
|
+
# @param id [#to_s] the id of the job
|
83
|
+
# @return [Info] information describing submitted job
|
84
|
+
def info(id)
|
85
|
+
args = []
|
86
|
+
args.concat ["-s", scheduler] unless scheduler.nil?
|
87
|
+
args.concat ["-ji", id]
|
88
|
+
|
89
|
+
stat_output = call("ccqstat", args: args)
|
90
|
+
|
91
|
+
# WARNING: code path differs here than info_all because the output
|
92
|
+
# from ccqstat -ji $JOBID is much more data than just the 4
|
93
|
+
# columns that ccqstat gives.
|
94
|
+
info_from_ccqstat_extended(stat_output)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Retrieve job status from resource manager
|
98
|
+
# @param id [#to_s] the id of the job
|
99
|
+
# @return [Status] status of job
|
100
|
+
# @see Adapter#status
|
101
|
+
def status(id)
|
102
|
+
info(id).status
|
103
|
+
end
|
104
|
+
|
105
|
+
# This adapter does not implement hold and will always raise
|
106
|
+
# an exception.
|
107
|
+
# @param id [#to_s] the id of the job
|
108
|
+
# @raise [JobAdapterError] always
|
109
|
+
# @return [void]
|
110
|
+
def hold(_)
|
111
|
+
raise NotImplementedError, "subclass did not define #hold"
|
112
|
+
end
|
113
|
+
|
114
|
+
# This adapter does not implement release and will always raise
|
115
|
+
# an exception.
|
116
|
+
# @param id [#to_s] the id of the job
|
117
|
+
# @raise [JobAdapterError] always
|
118
|
+
# @return [void]
|
119
|
+
def release(_)
|
120
|
+
raise NotImplementedError, "subclass did not define #release"
|
121
|
+
end
|
122
|
+
|
123
|
+
# Delete the submitted job
|
124
|
+
# @param id [#to_s] the id of the job
|
125
|
+
# @return [void]
|
126
|
+
def delete(id)
|
127
|
+
call("ccqdel", args: [id])
|
128
|
+
end
|
129
|
+
|
130
|
+
def directive_prefix
|
131
|
+
'#CC'
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
# Mapping of state codes
|
137
|
+
STATE_MAP =
|
138
|
+
{
|
139
|
+
'Error' => :suspended, # not running, but infrastructure still possibly exists
|
140
|
+
'CreatingCG' => :queued, # creating control group
|
141
|
+
'Pending' => :queued, # in queue
|
142
|
+
'Submitted' => :queued, #
|
143
|
+
'Provisioning' => :queued, # node is being provisioned
|
144
|
+
'Running' => :running, #
|
145
|
+
'Completed' => :completed, #
|
146
|
+
}.freeze
|
147
|
+
|
148
|
+
def gcp_provider
|
149
|
+
'gcp'
|
150
|
+
end
|
151
|
+
|
152
|
+
def aws_provider
|
153
|
+
'aws'
|
154
|
+
end
|
155
|
+
|
156
|
+
def image_arg
|
157
|
+
if cloud == gcp_provider
|
158
|
+
'-gcpgi'
|
159
|
+
else
|
160
|
+
'-awsami'
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def call(cmd, args: [], env: {}, stdin: "")
|
165
|
+
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
|
166
|
+
args = args.map(&:to_s)
|
167
|
+
env = env.to_h
|
168
|
+
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s)
|
169
|
+
s.success? ? o : interpret_and_raise(e, cmd)
|
170
|
+
end
|
171
|
+
|
172
|
+
# helper function to interpret an error the command had given and
|
173
|
+
# raise a different error.
|
174
|
+
def interpret_and_raise(error, command)
|
175
|
+
# a special case with CCQ that prompts the user for username & password
|
176
|
+
# so let's be helpful and tell the user what to do.
|
177
|
+
if error.end_with?("EOFError: EOF when reading a line\n")
|
178
|
+
raise(
|
179
|
+
PromptError,
|
180
|
+
"The #{command} command was prompted. You need to generate the certificate " +
|
181
|
+
"manually in a shell by running 'ccqstat'\nand entering your username/password"
|
182
|
+
)
|
183
|
+
else
|
184
|
+
raise(JobAdapterError, e.message)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# Convert seconds to duration
|
189
|
+
def seconds_to_duration(seconds)
|
190
|
+
format("%02d:%02d:%02d", seconds / 3600, seconds / 60 % 60, seconds % 60)
|
191
|
+
end
|
192
|
+
|
193
|
+
# helper to make a script file. We can't pipe it into ccq so we have to
|
194
|
+
# write a file.
|
195
|
+
def make_script_file(content)
|
196
|
+
file = Tempfile.new(tmp_file_name)
|
197
|
+
file.write(content.to_s)
|
198
|
+
file.flush
|
199
|
+
file
|
200
|
+
end
|
201
|
+
|
202
|
+
def tmp_file_name
|
203
|
+
'ccq_ood_script_'
|
204
|
+
end
|
205
|
+
|
206
|
+
def parse_job_id_from_ccqsub(output)
|
207
|
+
match_data = /#{jobid_regex}/.match(output)
|
208
|
+
# match_data could be nil, OR re-configured jobid_regex could be looking for a different named group
|
209
|
+
job_id = match_data&.named_captures&.fetch('job_id', nil)
|
210
|
+
throw JobAdapterError.new "Could not extract job id out of ccqsub output '#{output}'" if job_id.nil?
|
211
|
+
job_id
|
212
|
+
end
|
213
|
+
|
214
|
+
# parse an Ood::Job::Info object from extended ccqstat output
|
215
|
+
def info_from_ccqstat_extended(data)
|
216
|
+
raw = extended_data_to_hash(data)
|
217
|
+
data_hash = { native: raw }
|
218
|
+
data_hash[:status] = get_state(raw['status'])
|
219
|
+
data_hash[:id] = raw['name']
|
220
|
+
data_hash[:job_name] = raw['jobName']
|
221
|
+
data_hash[:job_owner] = raw['userName']
|
222
|
+
data_hash[:submit_host] = raw['submitHostInstanceId']
|
223
|
+
data_hash[:dispatch_time] = raw['startTime'].to_i
|
224
|
+
data_hash[:submission_time] = raw['dateSubmitted'].to_i
|
225
|
+
data_hash[:queue_name] = raw['criteriaPriority']
|
226
|
+
|
227
|
+
Info.new(data_hash)
|
228
|
+
end
|
229
|
+
|
230
|
+
# extended data is just lines of 'key: value' value, so parse
|
231
|
+
# it and stick it all in a hash.
|
232
|
+
def extended_data_to_hash(data)
|
233
|
+
Hash[data.to_s.scan(/(\w+): (\S+)/)]
|
234
|
+
end
|
235
|
+
|
236
|
+
def info_from_ccqstat(data)
|
237
|
+
infos = []
|
238
|
+
|
239
|
+
data.to_s.each_line do |line|
|
240
|
+
words = line.split(/\s/).reject(&:empty?)
|
241
|
+
next if !words.empty? && words[0] == "Id" # just skip the header
|
242
|
+
|
243
|
+
infos << Info.new(line_to_hash(words)) if words.size == 5
|
244
|
+
end
|
245
|
+
|
246
|
+
infos
|
247
|
+
end
|
248
|
+
|
249
|
+
def line_to_hash(words)
|
250
|
+
return unless words.size == 5
|
251
|
+
|
252
|
+
data_hash = {}
|
253
|
+
data_hash[:id] = words[0]
|
254
|
+
data_hash[:job_name] = words[1]
|
255
|
+
data_hash[:job_owner] = words[2]
|
256
|
+
data_hash[:status] = get_state(words[4])
|
257
|
+
|
258
|
+
data_hash
|
259
|
+
end
|
260
|
+
|
261
|
+
def get_state(state)
|
262
|
+
STATE_MAP.fetch(state, :undetermined)
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
data/lib/ood_core/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2020-08-
|
13
|
+
date: 2020-08-10 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -163,6 +163,7 @@ files:
|
|
163
163
|
- lib/ood_core/errors.rb
|
164
164
|
- lib/ood_core/invalid_cluster.rb
|
165
165
|
- lib/ood_core/job/adapter.rb
|
166
|
+
- lib/ood_core/job/adapters/ccq.rb
|
166
167
|
- lib/ood_core/job/adapters/drmaa.rb
|
167
168
|
- lib/ood_core/job/adapters/helper.rb
|
168
169
|
- lib/ood_core/job/adapters/linux_host.rb
|