ood_core 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/lib/ood_core/job/adapters/ccq.rb +267 -0
- data/lib/ood_core/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3296708d7bc47f3379a9e4a6c845d3f25c5ccefb599f4b92406d9dffdaef220b
|
4
|
+
data.tar.gz: b6af9e90b67bc9a7a52203808d849d8800336b30b09bdb8ed204526d01bc92e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 623ac6e6f8081d68a3e925d1150c9f20a0f613ccfb6837519d1b95d04533a72caa403c54327aad85dcea9c0694cc23941f40307d942623c095f53fed7fc32026
|
7
|
+
data.tar.gz: 0d785a9ade36b2f6f62f9ae55672091346aa4fb76bf358e6c00d4bc007623b8d1798813474665fc7b4d850d89e041fae5c2fefc9719fbe9f53a161a76127eaad
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
|
+
## [0.13.0] - 2020-08-10
|
10
|
+
### Added
|
11
|
+
- CloudyCluster CCQ Adapter
|
12
|
+
|
9
13
|
## [0.12.0] - 2020-08-05
|
10
14
|
### Added
|
11
15
|
- qos option to Slurm and Torque [#205](https://github.com/OSC/ood_core/pull/205)
|
@@ -243,7 +247,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
243
247
|
### Added
|
244
248
|
- Initial release!
|
245
249
|
|
246
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
250
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.13.0...HEAD
|
251
|
+
[0.13.0]: https://github.com/OSC/ood_core/compare/v0.12.0...v0.13.0
|
247
252
|
[0.12.0]: https://github.com/OSC/ood_core/compare/v0.11.4...v0.12.0
|
248
253
|
[0.11.4]: https://github.com/OSC/ood_core/compare/v0.11.3...v0.11.4
|
249
254
|
[0.11.3]: https://github.com/OSC/ood_core/compare/v0.11.2...v0.11.3
|
@@ -0,0 +1,267 @@
|
|
1
|
+
require "ood_core/job/adapters/helper"
|
2
|
+
require "tempfile"
|
3
|
+
|
4
|
+
module OodCore
|
5
|
+
module Job
|
6
|
+
class Factory
|
7
|
+
using Refinements::HashExtensions
|
8
|
+
|
9
|
+
# Build the Cloudy Cluster adapter from a configuration
|
10
|
+
# @param config [#to_h] the configuration for job adapter
|
11
|
+
# @option config [Object] :image (nil) The default VM image to use
|
12
|
+
# @option config [Object] :cloud (gcp) The cloud provider being used [gcp,aws]
|
13
|
+
# @option config [Object] :scheduler (nil) The name of the scheduler to use
|
14
|
+
# @option config [Object] :sge_root (nil) Path to SGE root, note that
|
15
|
+
# @option config [#to_h] :bin (nil) Path to CC client binaries
|
16
|
+
# @option config [#to_h] :bin_overrides ({}) Optional overrides to CC client executables
|
17
|
+
def self.build_ccq(config)
|
18
|
+
Adapters::CCQ.new(config.to_h.symbolize_keys)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
module Adapters
|
23
|
+
|
24
|
+
class PromptError < StandardError; end
|
25
|
+
|
26
|
+
class CCQ < Adapter
|
27
|
+
using Refinements::ArrayExtensions
|
28
|
+
|
29
|
+
attr_reader :image, :cloud, :scheduler, :bin, :bin_overrides, :jobid_regex
|
30
|
+
|
31
|
+
def initialize(config)
|
32
|
+
@image = config.fetch(:image, nil)
|
33
|
+
@cloud = config.fetch(:cloud, gcp_provider)
|
34
|
+
@scheduler = config.fetch(:scheduler, nil)
|
35
|
+
@bin = config.fetch(:bin, '/opt/CloudyCluster/srv/CCQ')
|
36
|
+
@bin_overrides = config.fetch(:bin_overrides, {})
|
37
|
+
@jobid_regex = config.fetch(:jobid_regex, "job id is: (?<job_id>\\d+) you")
|
38
|
+
end
|
39
|
+
|
40
|
+
# Submit a job with the attributes defined in the job template instance
|
41
|
+
# @param script [Script] script object that describes the script and
|
42
|
+
# attributes for the submitted job
|
43
|
+
# @param after [#to_s, Array<#to_s>] not used
|
44
|
+
# @param afterok [#to_s, Array<#to_s>] not used
|
45
|
+
# @param afternotok [#to_s, Array<#to_s>] not used
|
46
|
+
# @param afterany [#to_s, Array<#to_s>] not used
|
47
|
+
# @return [String] the job id returned after successfully submitting a
|
48
|
+
# job
|
49
|
+
# @see Adapter#submit
|
50
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
51
|
+
script_file = make_script_file(script.content)
|
52
|
+
args = []
|
53
|
+
|
54
|
+
# cluster configuration args
|
55
|
+
args.concat ["-s", scheduler] unless scheduler.nil?
|
56
|
+
args.concat [image_arg, image] unless image.nil?
|
57
|
+
|
58
|
+
args.concat ["-o", script.output_path.to_s] unless script.output_path.nil?
|
59
|
+
args.concat ["-e", script.error_path.to_s] unless script.error_path.nil?
|
60
|
+
args.concat ["-tl", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
61
|
+
args.concat ["-js", script_file.path.to_s]
|
62
|
+
|
63
|
+
args.concat script.native if script.native
|
64
|
+
|
65
|
+
output = call("ccqsub", args: args)
|
66
|
+
parse_job_id_from_ccqsub(output)
|
67
|
+
ensure
|
68
|
+
script_file.close
|
69
|
+
end
|
70
|
+
|
71
|
+
# Retrieve info for all jobs from the resource manager
|
72
|
+
# @return [Array<Info>] information describing submitted jobs
|
73
|
+
def info_all(attrs: nil)
|
74
|
+
args = []
|
75
|
+
args.concat ["-s", scheduler] unless scheduler.nil?
|
76
|
+
|
77
|
+
stat_output = call("ccqstat", args: args)
|
78
|
+
info_from_ccqstat(stat_output)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Retrieve job info from the resource manager
|
82
|
+
# @param id [#to_s] the id of the job
|
83
|
+
# @return [Info] information describing submitted job
|
84
|
+
def info(id)
|
85
|
+
args = []
|
86
|
+
args.concat ["-s", scheduler] unless scheduler.nil?
|
87
|
+
args.concat ["-ji", id]
|
88
|
+
|
89
|
+
stat_output = call("ccqstat", args: args)
|
90
|
+
|
91
|
+
# WARNING: code path differs here than info_all because the output
|
92
|
+
# from ccqstat -ji $JOBID is much more data than just the 4
|
93
|
+
# columns that ccqstat gives.
|
94
|
+
info_from_ccqstat_extended(stat_output)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Retrieve job status from resource manager
|
98
|
+
# @param id [#to_s] the id of the job
|
99
|
+
# @return [Status] status of job
|
100
|
+
# @see Adapter#status
|
101
|
+
def status(id)
|
102
|
+
info(id).status
|
103
|
+
end
|
104
|
+
|
105
|
+
# This adapter does not implement hold and will always raise
|
106
|
+
# an exception.
|
107
|
+
# @param id [#to_s] the id of the job
|
108
|
+
# @raise [JobAdapterError] always
|
109
|
+
# @return [void]
|
110
|
+
def hold(_)
|
111
|
+
raise NotImplementedError, "subclass did not define #hold"
|
112
|
+
end
|
113
|
+
|
114
|
+
# This adapter does not implement release and will always raise
|
115
|
+
# an exception.
|
116
|
+
# @param id [#to_s] the id of the job
|
117
|
+
# @raise [JobAdapterError] always
|
118
|
+
# @return [void]
|
119
|
+
def release(_)
|
120
|
+
raise NotImplementedError, "subclass did not define #release"
|
121
|
+
end
|
122
|
+
|
123
|
+
# Delete the submitted job
|
124
|
+
# @param id [#to_s] the id of the job
|
125
|
+
# @return [void]
|
126
|
+
def delete(id)
|
127
|
+
call("ccqdel", args: [id])
|
128
|
+
end
|
129
|
+
|
130
|
+
def directive_prefix
|
131
|
+
'#CC'
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
# Mapping of state codes
|
137
|
+
STATE_MAP =
|
138
|
+
{
|
139
|
+
'Error' => :suspended, # not running, but infrastructure still possibly exists
|
140
|
+
'CreatingCG' => :queued, # creating control group
|
141
|
+
'Pending' => :queued, # in queue
|
142
|
+
'Submitted' => :queued, #
|
143
|
+
'Provisioning' => :queued, # node is being provisioned
|
144
|
+
'Running' => :running, #
|
145
|
+
'Completed' => :completed, #
|
146
|
+
}.freeze
|
147
|
+
|
148
|
+
def gcp_provider
|
149
|
+
'gcp'
|
150
|
+
end
|
151
|
+
|
152
|
+
def aws_provider
|
153
|
+
'aws'
|
154
|
+
end
|
155
|
+
|
156
|
+
def image_arg
|
157
|
+
if cloud == gcp_provider
|
158
|
+
'-gcpgi'
|
159
|
+
else
|
160
|
+
'-awsami'
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def call(cmd, args: [], env: {}, stdin: "")
|
165
|
+
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
|
166
|
+
args = args.map(&:to_s)
|
167
|
+
env = env.to_h
|
168
|
+
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s)
|
169
|
+
s.success? ? o : interpret_and_raise(e, cmd)
|
170
|
+
end
|
171
|
+
|
172
|
+
# helper function to interpret an error the command had given and
|
173
|
+
# raise a different error.
|
174
|
+
def interpret_and_raise(error, command)
|
175
|
+
# a special case with CCQ that prompts the user for username & password
|
176
|
+
# so let's be helpful and tell the user what to do.
|
177
|
+
if error.end_with?("EOFError: EOF when reading a line\n")
|
178
|
+
raise(
|
179
|
+
PromptError,
|
180
|
+
"The #{command} command was prompted. You need to generate the certificate " +
|
181
|
+
"manually in a shell by running 'ccqstat'\nand entering your username/password"
|
182
|
+
)
|
183
|
+
else
|
184
|
+
raise(JobAdapterError, e.message)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# Convert seconds to duration
|
189
|
+
def seconds_to_duration(seconds)
|
190
|
+
format("%02d:%02d:%02d", seconds / 3600, seconds / 60 % 60, seconds % 60)
|
191
|
+
end
|
192
|
+
|
193
|
+
# helper to make a script file. We can't pipe it into ccq so we have to
|
194
|
+
# write a file.
|
195
|
+
def make_script_file(content)
|
196
|
+
file = Tempfile.new(tmp_file_name)
|
197
|
+
file.write(content.to_s)
|
198
|
+
file.flush
|
199
|
+
file
|
200
|
+
end
|
201
|
+
|
202
|
+
def tmp_file_name
|
203
|
+
'ccq_ood_script_'
|
204
|
+
end
|
205
|
+
|
206
|
+
def parse_job_id_from_ccqsub(output)
|
207
|
+
match_data = /#{jobid_regex}/.match(output)
|
208
|
+
# match_data could be nil, OR re-configured jobid_regex could be looking for a different named group
|
209
|
+
job_id = match_data&.named_captures&.fetch('job_id', nil)
|
210
|
+
throw JobAdapterError.new "Could not extract job id out of ccqsub output '#{output}'" if job_id.nil?
|
211
|
+
job_id
|
212
|
+
end
|
213
|
+
|
214
|
+
# parse an Ood::Job::Info object from extended ccqstat output
|
215
|
+
def info_from_ccqstat_extended(data)
|
216
|
+
raw = extended_data_to_hash(data)
|
217
|
+
data_hash = { native: raw }
|
218
|
+
data_hash[:status] = get_state(raw['status'])
|
219
|
+
data_hash[:id] = raw['name']
|
220
|
+
data_hash[:job_name] = raw['jobName']
|
221
|
+
data_hash[:job_owner] = raw['userName']
|
222
|
+
data_hash[:submit_host] = raw['submitHostInstanceId']
|
223
|
+
data_hash[:dispatch_time] = raw['startTime'].to_i
|
224
|
+
data_hash[:submission_time] = raw['dateSubmitted'].to_i
|
225
|
+
data_hash[:queue_name] = raw['criteriaPriority']
|
226
|
+
|
227
|
+
Info.new(data_hash)
|
228
|
+
end
|
229
|
+
|
230
|
+
# extended data is just lines of 'key: value' value, so parse
|
231
|
+
# it and stick it all in a hash.
|
232
|
+
def extended_data_to_hash(data)
|
233
|
+
Hash[data.to_s.scan(/(\w+): (\S+)/)]
|
234
|
+
end
|
235
|
+
|
236
|
+
def info_from_ccqstat(data)
|
237
|
+
infos = []
|
238
|
+
|
239
|
+
data.to_s.each_line do |line|
|
240
|
+
words = line.split(/\s/).reject(&:empty?)
|
241
|
+
next if !words.empty? && words[0] == "Id" # just skip the header
|
242
|
+
|
243
|
+
infos << Info.new(line_to_hash(words)) if words.size == 5
|
244
|
+
end
|
245
|
+
|
246
|
+
infos
|
247
|
+
end
|
248
|
+
|
249
|
+
def line_to_hash(words)
|
250
|
+
return unless words.size == 5
|
251
|
+
|
252
|
+
data_hash = {}
|
253
|
+
data_hash[:id] = words[0]
|
254
|
+
data_hash[:job_name] = words[1]
|
255
|
+
data_hash[:job_owner] = words[2]
|
256
|
+
data_hash[:status] = get_state(words[4])
|
257
|
+
|
258
|
+
data_hash
|
259
|
+
end
|
260
|
+
|
261
|
+
def get_state(state)
|
262
|
+
STATE_MAP.fetch(state, :undetermined)
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
data/lib/ood_core/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2020-08-
|
13
|
+
date: 2020-08-10 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -163,6 +163,7 @@ files:
|
|
163
163
|
- lib/ood_core/errors.rb
|
164
164
|
- lib/ood_core/invalid_cluster.rb
|
165
165
|
- lib/ood_core/job/adapter.rb
|
166
|
+
- lib/ood_core/job/adapters/ccq.rb
|
166
167
|
- lib/ood_core/job/adapters/drmaa.rb
|
167
168
|
- lib/ood_core/job/adapters/helper.rb
|
168
169
|
- lib/ood_core/job/adapters/linux_host.rb
|