ood_core 0.11.4 → 0.15.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +30 -0
- data/CHANGELOG.md +55 -1
- data/README.md +7 -6
- data/lib/ood_core/job/adapters/ccq.rb +274 -0
- data/lib/ood_core/job/adapters/helper.rb +20 -1
- data/lib/ood_core/job/adapters/kubernetes.rb +193 -0
- data/lib/ood_core/job/adapters/kubernetes/batch.rb +354 -0
- data/lib/ood_core/job/adapters/kubernetes/helper.rb +294 -0
- data/lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb +9 -0
- data/lib/ood_core/job/adapters/kubernetes/resources.rb +58 -0
- data/lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb +158 -0
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +10 -1
- data/lib/ood_core/job/adapters/linux_host/templates/script_wrapper.erb.sh +18 -15
- data/lib/ood_core/job/adapters/lsf.rb +1 -0
- data/lib/ood_core/job/adapters/lsf/batch.rb +5 -3
- data/lib/ood_core/job/adapters/lsf/helper.rb +22 -22
- data/lib/ood_core/job/adapters/pbspro.rb +54 -34
- data/lib/ood_core/job/adapters/sge/batch.rb +6 -5
- data/lib/ood_core/job/adapters/sge/helper.rb +19 -19
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +35 -4
- data/lib/ood_core/job/adapters/sge/qstat_xml_r_listener.rb +25 -2
- data/lib/ood_core/job/adapters/slurm.rb +79 -38
- data/lib/ood_core/job/adapters/torque.rb +30 -23
- data/lib/ood_core/job/adapters/torque/batch.rb +29 -12
- data/lib/ood_core/job/script.rb +9 -1
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -1
- metadata +33 -6
- data/.travis.yml +0 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fde32140cb148c6ea939a1d2308446e9144aad5c853fc8c41ea839beadedf03b
|
4
|
+
data.tar.gz: 5925bb0f8576864a3e37696d1c5b32a258edac5ebf78d07a6d509f4ec77c2339
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c9e1d0bd9e423af5289a445c12438875ae5d74b25295c8917209f19ec69b8e84bdb74b6f4ae2da3450a344398e8d96da4ae464498a38c9146d87fff1d1bbb2dd
|
7
|
+
data.tar.gz: b8daebdca0ed93b8d2ebb9089657efbb2b5a88e0b78b76607090b7c5befb96fbaa4a51e820b0236dc596d76c81b8110d1d8b53090f38abbfe01e00d411c96cd5
|
@@ -0,0 +1,30 @@
|
|
1
|
+
name: Unit Tests
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches:
|
6
|
+
- master
|
7
|
+
pull_request:
|
8
|
+
branches:
|
9
|
+
- master
|
10
|
+
|
11
|
+
jobs:
|
12
|
+
tests:
|
13
|
+
runs-on: ubuntu-latest
|
14
|
+
|
15
|
+
steps:
|
16
|
+
- name: checkout
|
17
|
+
uses: actions/checkout@v2
|
18
|
+
|
19
|
+
- name: Setup Ruby using Bundler
|
20
|
+
uses: ruby/setup-ruby@v1
|
21
|
+
with:
|
22
|
+
ruby-version: "2.7.1"
|
23
|
+
bundler-cache: true
|
24
|
+
bundler: "2.1.4"
|
25
|
+
|
26
|
+
- name: install gems
|
27
|
+
run: bundle install
|
28
|
+
|
29
|
+
- name: test
|
30
|
+
run: bundle exec rake spec
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,55 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
|
+
## [0.15.1] - 2021-02-25
|
10
|
+
### Fixed
|
11
|
+
- kubernetes adapter uses the full module for helpers in [245](https://github.com/OSC/ood_core/pull/245).
|
12
|
+
|
13
|
+
### Changed
|
14
|
+
- kubernetes pods spawn with runAsNonRoot set to true in [247](https://github.com/OSC/ood_core/pull/247).
|
15
|
+
- kubernetes pods can spawn with supplemental groups along with some other in security defaults in
|
16
|
+
[246](https://github.com/OSC/ood_core/pull/246).
|
17
|
+
|
18
|
+
## [0.15.0] - 2021-01-26
|
19
|
+
### Fixed
|
20
|
+
- ccq adapter now accepts job names with spaces in [210](https://github.com/OSC/ood_core/pull/209)
|
21
|
+
- k8s correctly handles having no mount volumes in [239](https://github.com/OSC/ood_core/pull/239)
|
22
|
+
|
23
|
+
### Added
|
24
|
+
- k8s adapter now applies account metadata to resources in [216](https://github.com/OSC/ood_core/pull/216) and
|
25
|
+
[231](https://github.com/OSC/ood_core/pull/231)
|
26
|
+
- k8s adapter can now prefix namespaces in [218](https://github.com/OSC/ood_core/pull/218)
|
27
|
+
- k8s adapter now applies time limits to pods in [224](https://github.com/OSC/ood_core/pull/224)
|
28
|
+
|
29
|
+
### Changed
|
30
|
+
- testing automation is now done in github actions in [221](https://github.com/OSC/ood_core/pull/218)
|
31
|
+
- update bunlder to 2.1.4 and ruby to 2.7 in [235](https://github.com/OSC/ood_core/pull/218) updated bundler and ruby
|
32
|
+
- k8s adapter more appropriately labels unschedulable pods as queued in [230](https://github.com/OSC/ood_core/pull/230)
|
33
|
+
- k8s adapter now uses the script#ood_connection_info API instead of script#native in
|
34
|
+
[222](https://github.com/OSC/ood_core/pull/222)
|
35
|
+
|
36
|
+
## [0.14.0] - 2020-10-01
|
37
|
+
### Added
|
38
|
+
- Kubernetes adapter in PR [156](https://github.com/OSC/ood_core/pull/156)
|
39
|
+
|
40
|
+
### Fixed
|
41
|
+
- Catch Slurm times. [209](https://github.com/OSC/ood_core/pull/209)
|
42
|
+
- LHA race condition in deleteing tmp files. [212](https://github.com/OSC/ood_core/pull/212)
|
43
|
+
|
44
|
+
## [0.13.0] - 2020-08-10
|
45
|
+
### Added
|
46
|
+
- CloudyCluster CCQ Adapter
|
47
|
+
|
48
|
+
## [0.12.0] - 2020-08-05
|
49
|
+
### Added
|
50
|
+
- qos option to Slurm and Torque [#205](https://github.com/OSC/ood_core/pull/205)
|
51
|
+
- native hash returned in qstat for SGE adapter [#198](https://github.com/OSC/ood_core/pull/198)
|
52
|
+
- option for specifying `submit_host` to submit jobs via ssh on other host [#204](https://github.com/OSC/ood_core/pull/204)
|
53
|
+
|
54
|
+
### Fixed
|
55
|
+
- SGE handle milliseconds instead of seconds when milliseconds used [#206](https://github.com/OSC/ood_core/issues/206)
|
56
|
+
- Torque's native "hash" for job submission now handles env vars values with spaces [#202](https://github.com/OSC/ood_core/pull/202)
|
57
|
+
|
9
58
|
## [0.11.4] - 2020-05-27
|
10
59
|
### Fixed
|
11
60
|
- Environment exports in SLURM while implementing [#158](https://github.com/OSC/ood_core/issues/158)
|
@@ -233,7 +282,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
233
282
|
### Added
|
234
283
|
- Initial release!
|
235
284
|
|
236
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
285
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.15.1...HEAD
|
286
|
+
[0.15.1]: https://github.com/OSC/ood_core/compare/v0.15.0...v0.15.1
|
287
|
+
[0.15.0]: https://github.com/OSC/ood_core/compare/v0.14.0...v0.15.0
|
288
|
+
[0.14.0]: https://github.com/OSC/ood_core/compare/v0.13.0...v0.14.0
|
289
|
+
[0.13.0]: https://github.com/OSC/ood_core/compare/v0.12.0...v0.13.0
|
290
|
+
[0.12.0]: https://github.com/OSC/ood_core/compare/v0.11.4...v0.12.0
|
237
291
|
[0.11.4]: https://github.com/OSC/ood_core/compare/v0.11.3...v0.11.4
|
238
292
|
[0.11.3]: https://github.com/OSC/ood_core/compare/v0.11.2...v0.11.3
|
239
293
|
[0.11.2]: https://github.com/OSC/ood_core/compare/v0.11.1...v0.11.2
|
data/README.md
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
# OodCore
|
2
2
|
|
3
|
-
[![Build Status](https://
|
3
|
+
[![Build Status](https://github.com/osc/ood_core/workflows/Unit%20Tests/badge.svg)](https://github.com/OSC/ood_core/actions?query=workflow%3A%22Unit+Tests%22)
|
4
4
|
![GitHub Release](https://img.shields.io/github/release/osc/ood_core.svg)
|
5
5
|
![GitHub License](https://img.shields.io/github/license/osc/ood_core.svg)
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
7
|
+
- Website: http://openondemand.org/
|
8
|
+
- Website repo with JOSS publication: https://github.com/OSC/Open-OnDemand
|
9
|
+
- Documentation: https://osc.github.io/ood-documentation/latest/
|
10
|
+
- Main code repo: https://github.com/OSC/ondemand
|
11
|
+
- Core library repo: https://github.com/OSC/ood_core
|
11
12
|
|
12
|
-
|
13
|
+
OnDemand core library with adapters for each batch scheduler.
|
13
14
|
|
14
15
|
## Installation
|
15
16
|
|
@@ -0,0 +1,274 @@
|
|
1
|
+
require "ood_core/job/adapters/helper"
|
2
|
+
require "tempfile"
|
3
|
+
|
4
|
+
module OodCore
|
5
|
+
module Job
|
6
|
+
class Factory
|
7
|
+
using Refinements::HashExtensions
|
8
|
+
|
9
|
+
# Build the Cloudy Cluster adapter from a configuration
|
10
|
+
# @param config [#to_h] the configuration for job adapter
|
11
|
+
# @option config [Object] :image (nil) The default VM image to use
|
12
|
+
# @option config [Object] :cloud (gcp) The cloud provider being used [gcp,aws]
|
13
|
+
# @option config [Object] :scheduler (nil) The name of the scheduler to use
|
14
|
+
# @option config [Object] :sge_root (nil) Path to SGE root, note that
|
15
|
+
# @option config [#to_h] :bin (nil) Path to CC client binaries
|
16
|
+
# @option config [#to_h] :bin_overrides ({}) Optional overrides to CC client executables
|
17
|
+
def self.build_ccq(config)
|
18
|
+
Adapters::CCQ.new(config.to_h.symbolize_keys)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
module Adapters
|
23
|
+
|
24
|
+
class PromptError < StandardError; end
|
25
|
+
|
26
|
+
class CCQ < Adapter
|
27
|
+
using Refinements::ArrayExtensions
|
28
|
+
|
29
|
+
attr_reader :image, :cloud, :scheduler, :bin, :bin_overrides, :jobid_regex
|
30
|
+
|
31
|
+
def initialize(config)
|
32
|
+
@image = config.fetch(:image, nil)
|
33
|
+
@cloud = config.fetch(:cloud, gcp_provider)
|
34
|
+
@scheduler = config.fetch(:scheduler, nil)
|
35
|
+
@bin = config.fetch(:bin, '/opt/CloudyCluster/srv/CCQ')
|
36
|
+
@bin_overrides = config.fetch(:bin_overrides, {})
|
37
|
+
@jobid_regex = config.fetch(:jobid_regex, "job id is: (?<job_id>\\d+) you")
|
38
|
+
end
|
39
|
+
|
40
|
+
# Submit a job with the attributes defined in the job template instance
|
41
|
+
# @param script [Script] script object that describes the script and
|
42
|
+
# attributes for the submitted job
|
43
|
+
# @param after [#to_s, Array<#to_s>] not used
|
44
|
+
# @param afterok [#to_s, Array<#to_s>] not used
|
45
|
+
# @param afternotok [#to_s, Array<#to_s>] not used
|
46
|
+
# @param afterany [#to_s, Array<#to_s>] not used
|
47
|
+
# @return [String] the job id returned after successfully submitting a
|
48
|
+
# job
|
49
|
+
# @see Adapter#submit
|
50
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
51
|
+
script_file = make_script_file(script.content)
|
52
|
+
args = []
|
53
|
+
|
54
|
+
# cluster configuration args
|
55
|
+
args.concat ["-s", scheduler] unless scheduler.nil?
|
56
|
+
args.concat [image_arg, image] unless image.nil?
|
57
|
+
|
58
|
+
args.concat ["-o", script.output_path.to_s] unless script.output_path.nil?
|
59
|
+
args.concat ["-e", script.error_path.to_s] unless script.error_path.nil?
|
60
|
+
args.concat ["-tl", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
|
61
|
+
args.concat ["-js", script_file.path.to_s]
|
62
|
+
|
63
|
+
args.concat script.native if script.native
|
64
|
+
|
65
|
+
output = call("ccqsub", args: args)
|
66
|
+
parse_job_id_from_ccqsub(output)
|
67
|
+
ensure
|
68
|
+
script_file.close
|
69
|
+
end
|
70
|
+
|
71
|
+
# Retrieve info for all jobs from the resource manager
|
72
|
+
# @return [Array<Info>] information describing submitted jobs
|
73
|
+
def info_all(attrs: nil)
|
74
|
+
args = []
|
75
|
+
args.concat ["-s", scheduler] unless scheduler.nil?
|
76
|
+
|
77
|
+
stat_output = call("ccqstat", args: args)
|
78
|
+
info_from_ccqstat(stat_output)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Retrieve job info from the resource manager
|
82
|
+
# @param id [#to_s] the id of the job
|
83
|
+
# @return [Info] information describing submitted job
|
84
|
+
def info(id)
|
85
|
+
args = []
|
86
|
+
args.concat ["-s", scheduler] unless scheduler.nil?
|
87
|
+
args.concat ["-ji", id]
|
88
|
+
|
89
|
+
stat_output = call("ccqstat", args: args)
|
90
|
+
|
91
|
+
# WARNING: code path differs here than info_all because the output
|
92
|
+
# from ccqstat -ji $JOBID is much more data than just the 4
|
93
|
+
# columns that ccqstat gives.
|
94
|
+
info_from_ccqstat_extended(stat_output)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Retrieve job status from resource manager
|
98
|
+
# @param id [#to_s] the id of the job
|
99
|
+
# @return [Status] status of job
|
100
|
+
# @see Adapter#status
|
101
|
+
def status(id)
|
102
|
+
info(id).status
|
103
|
+
end
|
104
|
+
|
105
|
+
# This adapter does not implement hold and will always raise
|
106
|
+
# an exception.
|
107
|
+
# @param id [#to_s] the id of the job
|
108
|
+
# @raise [JobAdapterError] always
|
109
|
+
# @return [void]
|
110
|
+
def hold(_)
|
111
|
+
raise NotImplementedError, "subclass did not define #hold"
|
112
|
+
end
|
113
|
+
|
114
|
+
# This adapter does not implement release and will always raise
|
115
|
+
# an exception.
|
116
|
+
# @param id [#to_s] the id of the job
|
117
|
+
# @raise [JobAdapterError] always
|
118
|
+
# @return [void]
|
119
|
+
def release(_)
|
120
|
+
raise NotImplementedError, "subclass did not define #release"
|
121
|
+
end
|
122
|
+
|
123
|
+
# Delete the submitted job
|
124
|
+
# @param id [#to_s] the id of the job
|
125
|
+
# @return [void]
|
126
|
+
def delete(id)
|
127
|
+
call("ccqdel", args: [id])
|
128
|
+
end
|
129
|
+
|
130
|
+
def directive_prefix
|
131
|
+
'#CC'
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
# Mapping of state codes
|
137
|
+
STATE_MAP =
|
138
|
+
{
|
139
|
+
'Error' => :suspended, # not running, but infrastructure still possibly exists
|
140
|
+
'CreatingCG' => :queued, # creating control group
|
141
|
+
'Pending' => :queued, # in queue
|
142
|
+
'Submitted' => :queued, #
|
143
|
+
'Provisioning' => :queued, # node is being provisioned
|
144
|
+
'Running' => :running, #
|
145
|
+
'Completed' => :completed, #
|
146
|
+
}.freeze
|
147
|
+
|
148
|
+
def gcp_provider
|
149
|
+
'gcp'
|
150
|
+
end
|
151
|
+
|
152
|
+
def aws_provider
|
153
|
+
'aws'
|
154
|
+
end
|
155
|
+
|
156
|
+
def image_arg
|
157
|
+
if cloud == gcp_provider
|
158
|
+
'-gcpgi'
|
159
|
+
else
|
160
|
+
'-awsami'
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def call(cmd, args: [], env: {}, stdin: "")
|
165
|
+
cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
|
166
|
+
args = args.map(&:to_s)
|
167
|
+
env = env.to_h
|
168
|
+
o, e, s = Open3.capture3(env, cmd, *args, stdin_data: stdin.to_s)
|
169
|
+
s.success? ? o : interpret_and_raise(e, cmd)
|
170
|
+
end
|
171
|
+
|
172
|
+
# helper function to interpret an error the command had given and
|
173
|
+
# raise a different error.
|
174
|
+
def interpret_and_raise(error, command)
|
175
|
+
# a special case with CCQ that prompts the user for username & password
|
176
|
+
# so let's be helpful and tell the user what to do.
|
177
|
+
if error.end_with?("EOFError: EOF when reading a line\n")
|
178
|
+
raise(
|
179
|
+
PromptError,
|
180
|
+
"The #{command} command was prompted. You need to generate the certificate " +
|
181
|
+
"manually in a shell by running 'ccqstat'\nand entering your username/password"
|
182
|
+
)
|
183
|
+
else
|
184
|
+
raise(JobAdapterError, e.message)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# Convert seconds to duration
|
189
|
+
def seconds_to_duration(seconds)
|
190
|
+
format("%02d:%02d:%02d", seconds / 3600, seconds / 60 % 60, seconds % 60)
|
191
|
+
end
|
192
|
+
|
193
|
+
# helper to make a script file. We can't pipe it into ccq so we have to
|
194
|
+
# write a file.
|
195
|
+
def make_script_file(content)
|
196
|
+
file = Tempfile.new(tmp_file_name)
|
197
|
+
file.write(content.to_s)
|
198
|
+
file.flush
|
199
|
+
file
|
200
|
+
end
|
201
|
+
|
202
|
+
def tmp_file_name
|
203
|
+
'ccq_ood_script_'
|
204
|
+
end
|
205
|
+
|
206
|
+
def ccqstat_regex
|
207
|
+
/^(?<id>\S+)\s+(?<name>.+)\s+(?<username>\S+)\s+(?<scheduler>\S+)\s+(?<status>\S+)\s*$/
|
208
|
+
end
|
209
|
+
|
210
|
+
def parse_job_id_from_ccqsub(output)
|
211
|
+
match_data = /#{jobid_regex}/.match(output)
|
212
|
+
# match_data could be nil, OR re-configured jobid_regex could be looking for a different named group
|
213
|
+
job_id = match_data&.named_captures&.fetch('job_id', nil)
|
214
|
+
throw JobAdapterError.new "Could not extract job id out of ccqsub output '#{output}'" if job_id.nil?
|
215
|
+
job_id
|
216
|
+
end
|
217
|
+
|
218
|
+
# parse an Ood::Job::Info object from extended ccqstat output
|
219
|
+
def info_from_ccqstat_extended(data)
|
220
|
+
raw = extended_data_to_hash(data)
|
221
|
+
data_hash = { native: raw }
|
222
|
+
data_hash[:status] = get_state(raw['status'])
|
223
|
+
data_hash[:id] = raw['name']
|
224
|
+
data_hash[:job_name] = raw['jobName']
|
225
|
+
data_hash[:job_owner] = raw['userName']
|
226
|
+
data_hash[:submit_host] = raw['submitHostInstanceId']
|
227
|
+
data_hash[:dispatch_time] = raw['startTime'].to_i
|
228
|
+
data_hash[:submission_time] = raw['dateSubmitted'].to_i
|
229
|
+
data_hash[:queue_name] = raw['criteriaPriority']
|
230
|
+
|
231
|
+
Info.new(data_hash)
|
232
|
+
end
|
233
|
+
|
234
|
+
# extended data is just lines of 'key: value' value, so parse
|
235
|
+
# it and stick it all in a hash.
|
236
|
+
def extended_data_to_hash(data)
|
237
|
+
Hash[data.to_s.scan(/(\w+): (\S+)/)]
|
238
|
+
end
|
239
|
+
|
240
|
+
def info_from_ccqstat(data)
|
241
|
+
infos = []
|
242
|
+
|
243
|
+
data.to_s.lines.drop(1).each do |line|
|
244
|
+
match_data = ccqstat_regex.match(line)
|
245
|
+
infos << Info.new(ccqstat_match_to_hash(match_data)) if valid_ccqstat_match?(match_data)
|
246
|
+
end
|
247
|
+
|
248
|
+
infos
|
249
|
+
end
|
250
|
+
|
251
|
+
def ccqstat_match_to_hash(match)
|
252
|
+
data_hash = {}
|
253
|
+
data_hash[:id] = match.named_captures.fetch('id', nil)
|
254
|
+
data_hash[:job_owner] = match.named_captures.fetch('username', nil)
|
255
|
+
data_hash[:status] = get_state(match.named_captures.fetch('status', nil))
|
256
|
+
|
257
|
+
# The regex leaves trailing empty spaces. There's no way to tell if they're _actually_
|
258
|
+
# a part of the job name or not, so we assume they're not and add the rstrip.
|
259
|
+
data_hash[:job_name] = match.named_captures.fetch('name', nil).to_s.rstrip
|
260
|
+
|
261
|
+
data_hash
|
262
|
+
end
|
263
|
+
|
264
|
+
def valid_ccqstat_match?(match)
|
265
|
+
!match.nil? && !match.named_captures.fetch('id', nil).nil?
|
266
|
+
end
|
267
|
+
|
268
|
+
def get_state(state)
|
269
|
+
STATE_MAP.fetch(state, :undetermined)
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
@@ -12,7 +12,26 @@ module OodCore
|
|
12
12
|
def self.bin_path(cmd, bin_default, bin_overrides)
|
13
13
|
bin_overrides.fetch(cmd.to_s) { Pathname.new(bin_default.to_s).join(cmd.to_s).to_s }
|
14
14
|
end
|
15
|
+
|
16
|
+
# Gets a command that submits command on another host via ssh
|
17
|
+
# @param submit_host [String] where to submit the command
|
18
|
+
# @param cmd [String] the desired command to execute on another host
|
19
|
+
# @param cmd_args [Array] arguments to the command specified above
|
20
|
+
# @param strict_host_checking [Bool] whether to use strict_host_checking
|
21
|
+
# @param env [Hash] env variables to be set w/ssh
|
22
|
+
#
|
23
|
+
# @return cmd [String] command wrapped in ssh if submit_host is present
|
24
|
+
# @return args [Array] command arguments including ssh_flags and original command
|
25
|
+
def self.ssh_wrap(submit_host, cmd, cmd_args, strict_host_checking = true, env = {})
|
26
|
+
return cmd, cmd_args if submit_host.to_s.empty?
|
27
|
+
|
28
|
+
check_host = strict_host_checking ? "yes" : "no"
|
29
|
+
args = ['-o', 'BatchMode=yes', '-o', 'UserKnownHostsFile=/dev/null', '-o', "StrictHostKeyChecking=#{check_host}", "#{submit_host}"]
|
30
|
+
env.each{|key, value| args.push("export #{key}=#{value};")}
|
31
|
+
|
32
|
+
return 'ssh', args + [cmd] + cmd_args
|
33
|
+
end
|
15
34
|
end
|
16
35
|
end
|
17
36
|
end
|
18
|
-
end
|
37
|
+
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "ood_core/refinements/array_extensions"
|
3
|
+
|
4
|
+
module OodCore
|
5
|
+
module Job
|
6
|
+
class Factory
|
7
|
+
using Refinements::HashExtensions
|
8
|
+
|
9
|
+
def self.build_kubernetes(config)
|
10
|
+
batch = Adapters::Kubernetes::Batch.new(config.to_h.symbolize_keys)
|
11
|
+
Adapters::Kubernetes.new(batch)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module Adapters
|
16
|
+
class Kubernetes < Adapter
|
17
|
+
|
18
|
+
using Refinements::ArrayExtensions
|
19
|
+
using Refinements::HashExtensions
|
20
|
+
|
21
|
+
require "ood_core/job/adapters/kubernetes/batch"
|
22
|
+
|
23
|
+
attr_reader :batch
|
24
|
+
|
25
|
+
def initialize(batch)
|
26
|
+
@batch = batch
|
27
|
+
end
|
28
|
+
|
29
|
+
# Submit a job with the attributes defined in the job template instance
|
30
|
+
# @abstract Subclass is expected to implement {#submit}
|
31
|
+
# @raise [NotImplementedError] if subclass did not define {#submit}
|
32
|
+
# @example Submit job template to cluster
|
33
|
+
# solver_id = job_adapter.submit(solver_script)
|
34
|
+
# #=> "1234.server"
|
35
|
+
# @example Submit job that depends on previous job
|
36
|
+
# post_id = job_adapter.submit(
|
37
|
+
# post_script,
|
38
|
+
# afterok: solver_id
|
39
|
+
# )
|
40
|
+
# #=> "1235.server"
|
41
|
+
# @param script [Script] script object that describes the
|
42
|
+
# script and attributes for the submitted job
|
43
|
+
# @param after [#to_s, Array<#to_s>] this job may be scheduled for execution
|
44
|
+
# at any point after dependent jobs have started execution
|
45
|
+
# @param afterok [#to_s, Array<#to_s>] this job may be scheduled for
|
46
|
+
# execution only after dependent jobs have terminated with no errors
|
47
|
+
# @param afternotok [#to_s, Array<#to_s>] this job may be scheduled for
|
48
|
+
# execution only after dependent jobs have terminated with errors
|
49
|
+
# @param afterany [#to_s, Array<#to_s>] this job may be scheduled for
|
50
|
+
# execution after dependent jobs have terminated
|
51
|
+
# @return [String] the job id returned after successfully submitting a job
|
52
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
53
|
+
raise ArgumentError, 'Must specify the script' if script.nil?
|
54
|
+
|
55
|
+
batch.submit(script)
|
56
|
+
rescue Batch::Error => e
|
57
|
+
raise JobAdapterError, e.message
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
# Retrieve info for all jobs from the resource manager
|
62
|
+
# @abstract Subclass is expected to implement {#info_all}
|
63
|
+
# @raise [NotImplementedError] if subclass did not define {#info_all}
|
64
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
65
|
+
# This array specifies only attrs you want, in addition to id and status.
|
66
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
67
|
+
# to have a value for any attr besides the ones specified and id and status.
|
68
|
+
#
|
69
|
+
# For certain adapters this may speed up the response since
|
70
|
+
# adapters can get by without populating the entire Info object
|
71
|
+
# @return [Array<Info>] information describing submitted jobs
|
72
|
+
def info_all(attrs: nil)
|
73
|
+
batch.info_all(attrs: attrs)
|
74
|
+
rescue Batch::Error => e
|
75
|
+
raise JobAdapterError, e.message
|
76
|
+
end
|
77
|
+
|
78
|
+
# Retrieve info for all jobs for a given owner or owners from the
|
79
|
+
# resource manager
|
80
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
81
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
82
|
+
# This array specifies only attrs you want, in addition to id and status.
|
83
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
84
|
+
# to have a value for any attr besides the ones specified and id and status.
|
85
|
+
#
|
86
|
+
# For certain adapters this may speed up the response since
|
87
|
+
# adapters can get by without populating the entire Info object
|
88
|
+
# @return [Array<Info>] information describing submitted jobs
|
89
|
+
def info_where_owner(owner, attrs: nil)
|
90
|
+
owner = Array.wrap(owner).map(&:to_s)
|
91
|
+
|
92
|
+
# must at least have job_owner to filter by job_owner
|
93
|
+
attrs = Array.wrap(attrs) | [:job_owner] unless attrs.nil?
|
94
|
+
|
95
|
+
info_all(attrs: attrs).select { |info| owner.include? info.job_owner }
|
96
|
+
end
|
97
|
+
|
98
|
+
# Iterate over each job Info object
|
99
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
100
|
+
# This array specifies only attrs you want, in addition to id and status.
|
101
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
102
|
+
# to have a value for any attr besides the ones specified and id and status.
|
103
|
+
#
|
104
|
+
# For certain adapters this may speed up the response since
|
105
|
+
# adapters can get by without populating the entire Info object
|
106
|
+
# @yield [Info] of each job to block
|
107
|
+
# @return [Enumerator] if no block given
|
108
|
+
def info_all_each(attrs: nil)
|
109
|
+
return to_enum(:info_all_each, attrs: attrs) unless block_given?
|
110
|
+
|
111
|
+
info_all(attrs: attrs).each do |job|
|
112
|
+
yield job
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Iterate over each job Info object
|
117
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
118
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
119
|
+
# This array specifies only attrs you want, in addition to id and status.
|
120
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
121
|
+
# to have a value for any attr besides the ones specified and id and status.
|
122
|
+
#
|
123
|
+
# For certain adapters this may speed up the response since
|
124
|
+
# adapters can get by without populating the entire Info object
|
125
|
+
# @yield [Info] of each job to block
|
126
|
+
# @return [Enumerator] if no block given
|
127
|
+
def info_where_owner_each(owner, attrs: nil)
|
128
|
+
return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
|
129
|
+
|
130
|
+
info_where_owner(owner, attrs: attrs).each do |job|
|
131
|
+
yield job
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Whether the adapter supports job arrays
|
136
|
+
# @return [Boolean] - assumes true; but can be overridden by adapters that
|
137
|
+
# explicitly do not
|
138
|
+
def supports_job_arrays?
|
139
|
+
false
|
140
|
+
end
|
141
|
+
|
142
|
+
# Retrieve job info from the resource manager
|
143
|
+
# @abstract Subclass is expected to implement {#info}
|
144
|
+
# @raise [NotImplementedError] if subclass did not define {#info}
|
145
|
+
# @param id [#to_s] the id of the job
|
146
|
+
# @return [Info] information describing submitted job
|
147
|
+
def info(id)
|
148
|
+
batch.info(id.to_s)
|
149
|
+
rescue Batch::Error => e
|
150
|
+
raise JobAdapterError, e.message
|
151
|
+
end
|
152
|
+
|
153
|
+
# Retrieve job status from resource manager
|
154
|
+
# @note Optimized slightly over retrieving complete job information from server
|
155
|
+
# @abstract Subclass is expected to implement {#status}
|
156
|
+
# @raise [NotImplementedError] if subclass did not define {#status}
|
157
|
+
# @param id [#to_s] the id of the job
|
158
|
+
# @return [Status] status of job
|
159
|
+
def status(id)
|
160
|
+
info(id).status
|
161
|
+
end
|
162
|
+
|
163
|
+
# Put the submitted job on hold
|
164
|
+
# @abstract Subclass is expected to implement {#hold}
|
165
|
+
# @raise [NotImplementedError] if subclass did not define {#hold}
|
166
|
+
# @param id [#to_s] the id of the job
|
167
|
+
# @return [void]
|
168
|
+
def hold(id)
|
169
|
+
raise NotImplementedError, 'subclass did not define #hold'
|
170
|
+
end
|
171
|
+
|
172
|
+
# Release the job that is on hold
|
173
|
+
# @abstract Subclass is expected to implement {#release}
|
174
|
+
# @raise [NotImplementedError] if subclass did not define {#release}
|
175
|
+
# @param id [#to_s] the id of the job
|
176
|
+
# @return [void]
|
177
|
+
def release(id)
|
178
|
+
raise NotImplementedError, 'subclass did not define #release'
|
179
|
+
end
|
180
|
+
|
181
|
+
# Delete the submitted job.
|
182
|
+
#
|
183
|
+
# @param id [#to_s] the id of the job
|
184
|
+
# @return [void]
|
185
|
+
def delete(id)
|
186
|
+
batch.delete(id.to_s)
|
187
|
+
rescue Batch::Error => e
|
188
|
+
raise JobAdapterError, e.message
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|