ood_core 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bb944d43beb0aced99e13efb2ef10bf33f9666c705c50ca5ae1727751de43073
4
- data.tar.gz: 6e3cd66160be3bbd63124d6f2ddc794bce4ae64e385977828faba5dfd28ff838
3
+ metadata.gz: 3296708d7bc47f3379a9e4a6c845d3f25c5ccefb599f4b92406d9dffdaef220b
4
+ data.tar.gz: b6af9e90b67bc9a7a52203808d849d8800336b30b09bdb8ed204526d01bc92e9
5
5
  SHA512:
6
- metadata.gz: 176e331a856c1e6958c444426d5c1b41aa881e90a69dca507b07f5463eb81355689e8391e0bf27823fc42a9484789f623ffd566b9d6c414c9cf741a7cafd1def
7
- data.tar.gz: 15481101ad3120d3e8457612f2b8a8be4f1e268b38538b18b710f27887836f7a47eac3bf2e89d8f73745ae96ae78e21cd5ba5afefb4161cf95f435d6f2fdf001
6
+ metadata.gz: 623ac6e6f8081d68a3e925d1150c9f20a0f613ccfb6837519d1b95d04533a72caa403c54327aad85dcea9c0694cc23941f40307d942623c095f53fed7fc32026
7
+ data.tar.gz: 0d785a9ade36b2f6f62f9ae55672091346aa4fb76bf358e6c00d4bc007623b8d1798813474665fc7b4d850d89e041fae5c2fefc9719fbe9f53a161a76127eaad
@@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
6
6
  and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
7
7
 
8
8
  ## [Unreleased]
9
+ ## [0.13.0] - 2020-08-10
10
+ ### Added
11
+ - CloudyCluster CCQ Adapter
12
+
9
13
  ## [0.12.0] - 2020-08-05
10
14
  ### Added
11
15
  - qos option to Slurm and Torque [#205](https://github.com/OSC/ood_core/pull/205)
@@ -243,7 +247,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
243
247
  ### Added
244
248
  - Initial release!
245
249
 
246
- [Unreleased]: https://github.com/OSC/ood_core/compare/v0.12.0...HEAD
250
+ [Unreleased]: https://github.com/OSC/ood_core/compare/v0.13.0...HEAD
251
+ [0.13.0]: https://github.com/OSC/ood_core/compare/v0.12.0...v0.13.0
247
252
  [0.12.0]: https://github.com/OSC/ood_core/compare/v0.11.4...v0.12.0
248
253
  [0.11.4]: https://github.com/OSC/ood_core/compare/v0.11.3...v0.11.4
249
254
  [0.11.3]: https://github.com/OSC/ood_core/compare/v0.11.2...v0.11.3
@@ -0,0 +1,267 @@
1
+ require "ood_core/job/adapters/helper"
2
+ require "tempfile"
3
+
4
+ module OodCore
5
+ module Job
6
+ class Factory
7
+ using Refinements::HashExtensions
8
+
9
+ # Build the Cloudy Cluster adapter from a configuration
10
+ # @param config [#to_h] the configuration for job adapter
11
+ # @option config [Object] :image (nil) The default VM image to use
12
+ # @option config [Object] :cloud (gcp) The cloud provider being used [gcp,aws]
13
+ # @option config [Object] :scheduler (nil) The name of the scheduler to use
14
+ # @option config [Object] :sge_root (nil) Path to SGE root, note that
15
+ # @option config [#to_h] :bin (nil) Path to CC client binaries
16
+ # @option config [#to_h] :bin_overrides ({}) Optional overrides to CC client executables
17
+ def self.build_ccq(config)
18
+ Adapters::CCQ.new(config.to_h.symbolize_keys)
19
+ end
20
+ end
21
+
22
+ module Adapters
23
+
24
+ class PromptError < StandardError; end
25
+
26
+ class CCQ < Adapter
27
+ using Refinements::ArrayExtensions
28
+
29
+ attr_reader :image, :cloud, :scheduler, :bin, :bin_overrides, :jobid_regex
30
+
31
+ def initialize(config)
32
+ @image = config.fetch(:image, nil)
33
+ @cloud = config.fetch(:cloud, gcp_provider)
34
+ @scheduler = config.fetch(:scheduler, nil)
35
+ @bin = config.fetch(:bin, '/opt/CloudyCluster/srv/CCQ')
36
+ @bin_overrides = config.fetch(:bin_overrides, {})
37
+ @jobid_regex = config.fetch(:jobid_regex, "job id is: (?<job_id>\\d+) you")
38
+ end
39
+
40
+ # Submit a job with the attributes defined in the job template instance
41
+ # @param script [Script] script object that describes the script and
42
+ # attributes for the submitted job
43
+ # @param after [#to_s, Array<#to_s>] not used
44
+ # @param afterok [#to_s, Array<#to_s>] not used
45
+ # @param afternotok [#to_s, Array<#to_s>] not used
46
+ # @param afterany [#to_s, Array<#to_s>] not used
47
+ # @return [String] the job id returned after successfully submitting a
48
+ # job
49
+ # @see Adapter#submit
50
+ def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
51
+ script_file = make_script_file(script.content)
52
+ args = []
53
+
54
+ # cluster configuration args
55
+ args.concat ["-s", scheduler] unless scheduler.nil?
56
+ args.concat [image_arg, image] unless image.nil?
57
+
58
+ args.concat ["-o", script.output_path.to_s] unless script.output_path.nil?
59
+ args.concat ["-e", script.error_path.to_s] unless script.error_path.nil?
60
+ args.concat ["-tl", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
61
+ args.concat ["-js", script_file.path.to_s]
62
+
63
+ args.concat script.native if script.native
64
+
65
+ output = call("ccqsub", args: args)
66
+ parse_job_id_from_ccqsub(output)
67
+ ensure
68
+ script_file.close
69
+ end
70
+
71
+ # Retrieve info for all jobs from the resource manager
72
+ # @return [Array<Info>] information describing submitted jobs
73
+ def info_all(attrs: nil)
74
+ args = []
75
+ args.concat ["-s", scheduler] unless scheduler.nil?
76
+
77
+ stat_output = call("ccqstat", args: args)
78
+ info_from_ccqstat(stat_output)
79
+ end
80
+
81
+ # Retrieve job info from the resource manager
82
+ # @param id [#to_s] the id of the job
83
+ # @return [Info] information describing submitted job
84
+ def info(id)
85
+ args = []
86
+ args.concat ["-s", scheduler] unless scheduler.nil?
87
+ args.concat ["-ji", id]
88
+
89
+ stat_output = call("ccqstat", args: args)
90
+
91
+ # WARNING: code path differs here than info_all because the output
92
+ # from ccqstat -ji $JOBID is much more data than just the 4
93
+ # columns that ccqstat gives.
94
+ info_from_ccqstat_extended(stat_output)
95
+ end
96
+
97
+ # Retrieve job status from resource manager
98
+ # @param id [#to_s] the id of the job
99
+ # @return [Status] status of job
100
+ # @see Adapter#status
101
+ def status(id)
102
+ info(id).status
103
+ end
104
+
105
+ # This adapter does not implement hold and will always raise
106
+ # an exception.
107
+ # @param id [#to_s] the id of the job
108
+ # @raise [JobAdapterError] always
109
+ # @return [void]
110
+ def hold(_)
111
+ raise NotImplementedError, "subclass did not define #hold"
112
+ end
113
+
114
+ # This adapter does not implement release and will always raise
115
+ # an exception.
116
+ # @param id [#to_s] the id of the job
117
+ # @raise [JobAdapterError] always
118
+ # @return [void]
119
+ def release(_)
120
+ raise NotImplementedError, "subclass did not define #release"
121
+ end
122
+
123
+ # Delete the submitted job
124
+ # @param id [#to_s] the id of the job
125
+ # @return [void]
126
+ def delete(id)
127
+ call("ccqdel", args: [id])
128
+ end
129
+
130
+ def directive_prefix
131
+ '#CC'
132
+ end
133
+
134
+ private
135
+
136
+ # Mapping of state codes
137
+ STATE_MAP =
138
+ {
139
+ 'Error' => :suspended, # not running, but infrastructure still possibly exists
140
+ 'CreatingCG' => :queued, # creating control group
141
+ 'Pending' => :queued, # in queue
142
+ 'Submitted' => :queued, #
143
+ 'Provisioning' => :queued, # node is being provisioned
144
+ 'Running' => :running, #
145
+ 'Completed' => :completed, #
146
+ }.freeze
147
+
148
+ def gcp_provider
149
+ 'gcp'
150
+ end
151
+
152
+ def aws_provider
153
+ 'aws'
154
+ end
155
+
156
+ def image_arg
157
+ if cloud == gcp_provider
158
+ '-gcpgi'
159
+ else
160
+ '-awsami'
161
+ end
162
+ end
163
+
164
+ def call(cmd, args: [], env: {}, stdin: "")
165
+ cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
166
+ args = args.map(&:to_s)
167
+ env = env.to_h
168
+ o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s)
169
+ s.success? ? o : interpret_and_raise(e, cmd)
170
+ end
171
+
172
+ # helper function to interpret an error the command had given and
173
+ # raise a different error.
174
+ def interpret_and_raise(error, command)
175
+ # a special case with CCQ that prompts the user for username & password
176
+ # so let's be helpful and tell the user what to do.
177
+ if error.end_with?("EOFError: EOF when reading a line\n")
178
+ raise(
179
+ PromptError,
180
+ "The #{command} command was prompted. You need to generate the certificate " +
181
+ "manually in a shell by running 'ccqstat'\nand entering your username/password"
182
+ )
183
+ else
184
+ raise(JobAdapterError, e.message)
185
+ end
186
+ end
187
+
188
+ # Convert seconds to duration
189
+ def seconds_to_duration(seconds)
190
+ format("%02d:%02d:%02d", seconds / 3600, seconds / 60 % 60, seconds % 60)
191
+ end
192
+
193
+ # helper to make a script file. We can't pipe it into ccq so we have to
194
+ # write a file.
195
+ def make_script_file(content)
196
+ file = Tempfile.new(tmp_file_name)
197
+ file.write(content.to_s)
198
+ file.flush
199
+ file
200
+ end
201
+
202
+ def tmp_file_name
203
+ 'ccq_ood_script_'
204
+ end
205
+
206
+ def parse_job_id_from_ccqsub(output)
207
+ match_data = /#{jobid_regex}/.match(output)
208
+ # match_data could be nil, OR re-configured jobid_regex could be looking for a different named group
209
+ job_id = match_data&.named_captures&.fetch('job_id', nil)
210
+ throw JobAdapterError.new "Could not extract job id out of ccqsub output '#{output}'" if job_id.nil?
211
+ job_id
212
+ end
213
+
214
+ # parse an Ood::Job::Info object from extended ccqstat output
215
+ def info_from_ccqstat_extended(data)
216
+ raw = extended_data_to_hash(data)
217
+ data_hash = { native: raw }
218
+ data_hash[:status] = get_state(raw['status'])
219
+ data_hash[:id] = raw['name']
220
+ data_hash[:job_name] = raw['jobName']
221
+ data_hash[:job_owner] = raw['userName']
222
+ data_hash[:submit_host] = raw['submitHostInstanceId']
223
+ data_hash[:dispatch_time] = raw['startTime'].to_i
224
+ data_hash[:submission_time] = raw['dateSubmitted'].to_i
225
+ data_hash[:queue_name] = raw['criteriaPriority']
226
+
227
+ Info.new(data_hash)
228
+ end
229
+
230
+ # extended data is just lines of 'key: value' value, so parse
231
+ # it and stick it all in a hash.
232
+ def extended_data_to_hash(data)
233
+ Hash[data.to_s.scan(/(\w+): (\S+)/)]
234
+ end
235
+
236
+ def info_from_ccqstat(data)
237
+ infos = []
238
+
239
+ data.to_s.each_line do |line|
240
+ words = line.split(/\s/).reject(&:empty?)
241
+ next if !words.empty? && words[0] == "Id" # just skip the header
242
+
243
+ infos << Info.new(line_to_hash(words)) if words.size == 5
244
+ end
245
+
246
+ infos
247
+ end
248
+
249
+ def line_to_hash(words)
250
+ return unless words.size == 5
251
+
252
+ data_hash = {}
253
+ data_hash[:id] = words[0]
254
+ data_hash[:job_name] = words[1]
255
+ data_hash[:job_owner] = words[2]
256
+ data_hash[:status] = get_state(words[4])
257
+
258
+ data_hash
259
+ end
260
+
261
+ def get_state(state)
262
+ STATE_MAP.fetch(state, :undetermined)
263
+ end
264
+ end
265
+ end
266
+ end
267
+ end
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.12.0"
3
+ VERSION = "0.13.0"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2020-08-05 00:00:00.000000000 Z
13
+ date: 2020-08-10 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -163,6 +163,7 @@ files:
163
163
  - lib/ood_core/errors.rb
164
164
  - lib/ood_core/invalid_cluster.rb
165
165
  - lib/ood_core/job/adapter.rb
166
+ - lib/ood_core/job/adapters/ccq.rb
166
167
  - lib/ood_core/job/adapters/drmaa.rb
167
168
  - lib/ood_core/job/adapters/helper.rb
168
169
  - lib/ood_core/job/adapters/linux_host.rb