ood_core 0.13.0 → 0.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +30 -0
- data/CHANGELOG.md +64 -1
- data/README.md +2 -2
- data/lib/ood_core/cluster.rb +11 -5
- data/lib/ood_core/job/adapters/ccq.rb +19 -12
- data/lib/ood_core/job/adapters/kubernetes.rb +193 -0
- data/lib/ood_core/job/adapters/kubernetes/batch.rb +372 -0
- data/lib/ood_core/job/adapters/kubernetes/helper.rb +299 -0
- data/lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb +9 -0
- data/lib/ood_core/job/adapters/kubernetes/resources.rb +82 -0
- data/lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb +188 -0
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +25 -10
- data/lib/ood_core/job/adapters/linux_host/templates/script_wrapper.erb.sh +3 -14
- data/lib/ood_core/job/adapters/slurm.rb +18 -1
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -1
- metadata +32 -6
- data/.travis.yml +0 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 19665b6db28d01da39093dc90d4a5023ca12264f07b932aebc8ec8c443bafa25
|
|
4
|
+
data.tar.gz: d9c8c6d8f30851ea9138c8325aafd750823534a51f36601a20366265ac4feec2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1ed1eaa873366ad5e825ed29c7401dd3bca4a424ab7a689a19479f297ec20d7e019cd53609006b0919a365dd0002eb0c1e9c0cabcc9f69579cf7ae81b33b3ae7
|
|
7
|
+
data.tar.gz: 90a4cfa3ee8b1f76ef7e1f28df6d8e64725d1eaff005b4bd4ff7fc8f88e5bfda8a15e706636c18e7b5ac74451071eaea4e6814945ea25e95f6c7ed2de8fd2fec
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
name: Unit Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- master
|
|
7
|
+
pull_request:
|
|
8
|
+
branches:
|
|
9
|
+
- master
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
tests:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- name: checkout
|
|
17
|
+
uses: actions/checkout@v2
|
|
18
|
+
|
|
19
|
+
- name: Setup Ruby using Bundler
|
|
20
|
+
uses: ruby/setup-ruby@v1
|
|
21
|
+
with:
|
|
22
|
+
ruby-version: "2.7.1"
|
|
23
|
+
bundler-cache: true
|
|
24
|
+
bundler: "2.1.4"
|
|
25
|
+
|
|
26
|
+
- name: install gems
|
|
27
|
+
run: bundle install
|
|
28
|
+
|
|
29
|
+
- name: test
|
|
30
|
+
run: bundle exec rake spec
|
data/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,64 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
|
+
## [0.16.1] - 2021-04-23
|
|
10
|
+
### Fixed
|
|
11
|
+
- memorized some allow? variables to have better support around ACLS in
|
|
12
|
+
[267](https://github.com/OSC/ood_core/pull/267)
|
|
13
|
+
|
|
14
|
+
## [0.16.0] - 2021-04-20
|
|
15
|
+
### Fixed
|
|
16
|
+
- tmux 2.7+ bug in the linux host adapter in [2.5.8](https://github.com/OSC/ood_core/pull/258)
|
|
17
|
+
and [259](https://github.com/OSC/ood_core/pull/259).
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
|
|
21
|
+
- Changed how k8s configmaps in are defined in [251](https://github.com/OSC/ood_core/pull/251).
|
|
22
|
+
The data structure now expects a key called files which is an array of objects that hold
|
|
23
|
+
filename, data, mount_path, sub_path and init_mount_path.
|
|
24
|
+
[255](https://github.com/OSC/ood_core/pull/255) also relates to this interface change.
|
|
25
|
+
|
|
26
|
+
### Added
|
|
27
|
+
|
|
28
|
+
- The k8s adapter can now specify environment variables and creates defaults
|
|
29
|
+
in [252](https://github.com/OSC/ood_core/pull/252).
|
|
30
|
+
- The k8s adapter can now specify image pull secrets in [253](https://github.com/OSC/ood_core/pull/253).
|
|
31
|
+
|
|
32
|
+
## [0.15.1] - 2021-02-25
|
|
33
|
+
### Fixed
|
|
34
|
+
- kubernetes adapter uses the full module for helpers in [245](https://github.com/OSC/ood_core/pull/245).
|
|
35
|
+
|
|
36
|
+
### Changed
|
|
37
|
+
- kubernetes pods spawn with runAsNonRoot set to true in [247](https://github.com/OSC/ood_core/pull/247).
|
|
38
|
+
- kubernetes pods can spawn with supplemental groups along with some other in security defaults in
|
|
39
|
+
[246](https://github.com/OSC/ood_core/pull/246).
|
|
40
|
+
|
|
41
|
+
## [0.15.0] - 2021-01-26
|
|
42
|
+
### Fixed
|
|
43
|
+
- ccq adapter now accepts job names with spaces in [210](https://github.com/OSC/ood_core/pull/209)
|
|
44
|
+
- k8s correctly handles having no mount volumes in [239](https://github.com/OSC/ood_core/pull/239)
|
|
45
|
+
|
|
46
|
+
### Added
|
|
47
|
+
- k8s adapter now applies account metadata to resources in [216](https://github.com/OSC/ood_core/pull/216) and
|
|
48
|
+
[231](https://github.com/OSC/ood_core/pull/231)
|
|
49
|
+
- k8s adapter can now prefix namespaces in [218](https://github.com/OSC/ood_core/pull/218)
|
|
50
|
+
- k8s adapter now applies time limits to pods in [224](https://github.com/OSC/ood_core/pull/224)
|
|
51
|
+
|
|
52
|
+
### Changed
|
|
53
|
+
- testing automation is now done in github actions in [221](https://github.com/OSC/ood_core/pull/218)
|
|
54
|
+
- update bunlder to 2.1.4 and ruby to 2.7 in [235](https://github.com/OSC/ood_core/pull/218) updated bundler and ruby
|
|
55
|
+
- k8s adapter more appropriately labels unschedulable pods as queued in [230](https://github.com/OSC/ood_core/pull/230)
|
|
56
|
+
- k8s adapter now uses the script#ood_connection_info API instead of script#native in
|
|
57
|
+
[222](https://github.com/OSC/ood_core/pull/222)
|
|
58
|
+
|
|
59
|
+
## [0.14.0] - 2020-10-01
|
|
60
|
+
### Added
|
|
61
|
+
- Kubernetes adapter in PR [156](https://github.com/OSC/ood_core/pull/156)
|
|
62
|
+
|
|
63
|
+
### Fixed
|
|
64
|
+
- Catch Slurm times. [209](https://github.com/OSC/ood_core/pull/209)
|
|
65
|
+
- LHA race condition in deleteing tmp files. [212](https://github.com/OSC/ood_core/pull/212)
|
|
66
|
+
|
|
9
67
|
## [0.13.0] - 2020-08-10
|
|
10
68
|
### Added
|
|
11
69
|
- CloudyCluster CCQ Adapter
|
|
@@ -247,7 +305,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
|
247
305
|
### Added
|
|
248
306
|
- Initial release!
|
|
249
307
|
|
|
250
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
|
308
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.16.1...HEAD
|
|
309
|
+
[0.16.1]: https://github.com/OSC/ood_core/compare/v0.16.0...v0.16.1
|
|
310
|
+
[0.16.0]: https://github.com/OSC/ood_core/compare/v0.15.1...v0.16.0
|
|
311
|
+
[0.15.1]: https://github.com/OSC/ood_core/compare/v0.15.0...v0.15.1
|
|
312
|
+
[0.15.0]: https://github.com/OSC/ood_core/compare/v0.14.0...v0.15.0
|
|
313
|
+
[0.14.0]: https://github.com/OSC/ood_core/compare/v0.13.0...v0.14.0
|
|
251
314
|
[0.13.0]: https://github.com/OSC/ood_core/compare/v0.12.0...v0.13.0
|
|
252
315
|
[0.12.0]: https://github.com/OSC/ood_core/compare/v0.11.4...v0.12.0
|
|
253
316
|
[0.11.4]: https://github.com/OSC/ood_core/compare/v0.11.3...v0.11.4
|
data/README.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# OodCore
|
|
2
2
|
|
|
3
|
-
[](https://github.com/OSC/ood_core/actions?query=workflow%3A%22Unit+Tests%22)
|
|
4
4
|

|
|
5
5
|

|
|
6
6
|
|
|
7
7
|
- Website: http://openondemand.org/
|
|
8
8
|
- Website repo with JOSS publication: https://github.com/OSC/Open-OnDemand
|
|
9
|
-
- Documentation: https://osc.github.io/ood-documentation/
|
|
9
|
+
- Documentation: https://osc.github.io/ood-documentation/latest/
|
|
10
10
|
- Main code repo: https://github.com/OSC/ondemand
|
|
11
11
|
- Core library repo: https://github.com/OSC/ood_core
|
|
12
12
|
|
data/lib/ood_core/cluster.rb
CHANGED
|
@@ -78,7 +78,9 @@ module OodCore
|
|
|
78
78
|
# Whether the login feature is allowed
|
|
79
79
|
# @return [Boolean] is login allowed
|
|
80
80
|
def login_allow?
|
|
81
|
-
|
|
81
|
+
return @login_allow if defined?(@login_allow)
|
|
82
|
+
|
|
83
|
+
@login_allow = (allow? && !login_config.empty?)
|
|
82
84
|
end
|
|
83
85
|
|
|
84
86
|
# Build a job adapter from the job configuration
|
|
@@ -90,9 +92,11 @@ module OodCore
|
|
|
90
92
|
# Whether the job feature is allowed based on the ACLs
|
|
91
93
|
# @return [Boolean] is the job feature allowed
|
|
92
94
|
def job_allow?
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
95
|
+
return @job_allow if defined?(@job_allow)
|
|
96
|
+
|
|
97
|
+
@job_allow = (allow? && ! job_config.empty? && build_acls(
|
|
98
|
+
job_config.fetch(:acls, []).map(&:to_h)
|
|
99
|
+
).all?(&:allow?))
|
|
96
100
|
end
|
|
97
101
|
|
|
98
102
|
# The batch connect template configuration used for this cluster
|
|
@@ -138,7 +142,9 @@ module OodCore
|
|
|
138
142
|
# Whether this cluster is allowed to be used
|
|
139
143
|
# @return [Boolean] whether cluster is allowed
|
|
140
144
|
def allow?
|
|
141
|
-
|
|
145
|
+
return @allow if defined?(@allow)
|
|
146
|
+
|
|
147
|
+
@allow = acls.all?(&:allow?)
|
|
142
148
|
end
|
|
143
149
|
|
|
144
150
|
# The comparison operator
|
|
@@ -203,6 +203,10 @@ module OodCore
|
|
|
203
203
|
'ccq_ood_script_'
|
|
204
204
|
end
|
|
205
205
|
|
|
206
|
+
def ccqstat_regex
|
|
207
|
+
/^(?<id>\S+)\s+(?<name>.+)\s+(?<username>\S+)\s+(?<scheduler>\S+)\s+(?<status>\S+)\s*$/
|
|
208
|
+
end
|
|
209
|
+
|
|
206
210
|
def parse_job_id_from_ccqsub(output)
|
|
207
211
|
match_data = /#{jobid_regex}/.match(output)
|
|
208
212
|
# match_data could be nil, OR re-configured jobid_regex could be looking for a different named group
|
|
@@ -236,28 +240,31 @@ module OodCore
|
|
|
236
240
|
def info_from_ccqstat(data)
|
|
237
241
|
infos = []
|
|
238
242
|
|
|
239
|
-
data.to_s.
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
infos << Info.new(line_to_hash(words)) if words.size == 5
|
|
243
|
+
data.to_s.lines.drop(1).each do |line|
|
|
244
|
+
match_data = ccqstat_regex.match(line)
|
|
245
|
+
infos << Info.new(ccqstat_match_to_hash(match_data)) if valid_ccqstat_match?(match_data)
|
|
244
246
|
end
|
|
245
247
|
|
|
246
248
|
infos
|
|
247
249
|
end
|
|
248
250
|
|
|
249
|
-
def
|
|
250
|
-
return unless words.size == 5
|
|
251
|
-
|
|
251
|
+
def ccqstat_match_to_hash(match)
|
|
252
252
|
data_hash = {}
|
|
253
|
-
data_hash[:id] =
|
|
254
|
-
data_hash[:
|
|
255
|
-
data_hash[:
|
|
256
|
-
|
|
253
|
+
data_hash[:id] = match.named_captures.fetch('id', nil)
|
|
254
|
+
data_hash[:job_owner] = match.named_captures.fetch('username', nil)
|
|
255
|
+
data_hash[:status] = get_state(match.named_captures.fetch('status', nil))
|
|
256
|
+
|
|
257
|
+
# The regex leaves trailing empty spaces. There's no way to tell if they're _actually_
|
|
258
|
+
# a part of the job name or not, so we assume they're not and add the rstrip.
|
|
259
|
+
data_hash[:job_name] = match.named_captures.fetch('name', nil).to_s.rstrip
|
|
257
260
|
|
|
258
261
|
data_hash
|
|
259
262
|
end
|
|
260
263
|
|
|
264
|
+
def valid_ccqstat_match?(match)
|
|
265
|
+
!match.nil? && !match.named_captures.fetch('id', nil).nil?
|
|
266
|
+
end
|
|
267
|
+
|
|
261
268
|
def get_state(state)
|
|
262
269
|
STATE_MAP.fetch(state, :undetermined)
|
|
263
270
|
end
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
|
2
|
+
require "ood_core/refinements/array_extensions"
|
|
3
|
+
|
|
4
|
+
module OodCore
|
|
5
|
+
module Job
|
|
6
|
+
class Factory
|
|
7
|
+
using Refinements::HashExtensions
|
|
8
|
+
|
|
9
|
+
def self.build_kubernetes(config)
|
|
10
|
+
batch = Adapters::Kubernetes::Batch.new(config.to_h.symbolize_keys)
|
|
11
|
+
Adapters::Kubernetes.new(batch)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
module Adapters
|
|
16
|
+
class Kubernetes < Adapter
|
|
17
|
+
|
|
18
|
+
using Refinements::ArrayExtensions
|
|
19
|
+
using Refinements::HashExtensions
|
|
20
|
+
|
|
21
|
+
require "ood_core/job/adapters/kubernetes/batch"
|
|
22
|
+
|
|
23
|
+
attr_reader :batch
|
|
24
|
+
|
|
25
|
+
def initialize(batch)
|
|
26
|
+
@batch = batch
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Submit a job with the attributes defined in the job template instance
|
|
30
|
+
# @abstract Subclass is expected to implement {#submit}
|
|
31
|
+
# @raise [NotImplementedError] if subclass did not define {#submit}
|
|
32
|
+
# @example Submit job template to cluster
|
|
33
|
+
# solver_id = job_adapter.submit(solver_script)
|
|
34
|
+
# #=> "1234.server"
|
|
35
|
+
# @example Submit job that depends on previous job
|
|
36
|
+
# post_id = job_adapter.submit(
|
|
37
|
+
# post_script,
|
|
38
|
+
# afterok: solver_id
|
|
39
|
+
# )
|
|
40
|
+
# #=> "1235.server"
|
|
41
|
+
# @param script [Script] script object that describes the
|
|
42
|
+
# script and attributes for the submitted job
|
|
43
|
+
# @param after [#to_s, Array<#to_s>] this job may be scheduled for execution
|
|
44
|
+
# at any point after dependent jobs have started execution
|
|
45
|
+
# @param afterok [#to_s, Array<#to_s>] this job may be scheduled for
|
|
46
|
+
# execution only after dependent jobs have terminated with no errors
|
|
47
|
+
# @param afternotok [#to_s, Array<#to_s>] this job may be scheduled for
|
|
48
|
+
# execution only after dependent jobs have terminated with errors
|
|
49
|
+
# @param afterany [#to_s, Array<#to_s>] this job may be scheduled for
|
|
50
|
+
# execution after dependent jobs have terminated
|
|
51
|
+
# @return [String] the job id returned after successfully submitting a job
|
|
52
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
|
53
|
+
raise ArgumentError, 'Must specify the script' if script.nil?
|
|
54
|
+
|
|
55
|
+
batch.submit(script)
|
|
56
|
+
rescue Batch::Error => e
|
|
57
|
+
raise JobAdapterError, e.message
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# Retrieve info for all jobs from the resource manager
|
|
62
|
+
# @abstract Subclass is expected to implement {#info_all}
|
|
63
|
+
# @raise [NotImplementedError] if subclass did not define {#info_all}
|
|
64
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
|
65
|
+
# This array specifies only attrs you want, in addition to id and status.
|
|
66
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
|
67
|
+
# to have a value for any attr besides the ones specified and id and status.
|
|
68
|
+
#
|
|
69
|
+
# For certain adapters this may speed up the response since
|
|
70
|
+
# adapters can get by without populating the entire Info object
|
|
71
|
+
# @return [Array<Info>] information describing submitted jobs
|
|
72
|
+
def info_all(attrs: nil)
|
|
73
|
+
batch.info_all(attrs: attrs)
|
|
74
|
+
rescue Batch::Error => e
|
|
75
|
+
raise JobAdapterError, e.message
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Retrieve info for all jobs for a given owner or owners from the
|
|
79
|
+
# resource manager
|
|
80
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
|
81
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
|
82
|
+
# This array specifies only attrs you want, in addition to id and status.
|
|
83
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
|
84
|
+
# to have a value for any attr besides the ones specified and id and status.
|
|
85
|
+
#
|
|
86
|
+
# For certain adapters this may speed up the response since
|
|
87
|
+
# adapters can get by without populating the entire Info object
|
|
88
|
+
# @return [Array<Info>] information describing submitted jobs
|
|
89
|
+
def info_where_owner(owner, attrs: nil)
|
|
90
|
+
owner = Array.wrap(owner).map(&:to_s)
|
|
91
|
+
|
|
92
|
+
# must at least have job_owner to filter by job_owner
|
|
93
|
+
attrs = Array.wrap(attrs) | [:job_owner] unless attrs.nil?
|
|
94
|
+
|
|
95
|
+
info_all(attrs: attrs).select { |info| owner.include? info.job_owner }
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Iterate over each job Info object
|
|
99
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
|
100
|
+
# This array specifies only attrs you want, in addition to id and status.
|
|
101
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
|
102
|
+
# to have a value for any attr besides the ones specified and id and status.
|
|
103
|
+
#
|
|
104
|
+
# For certain adapters this may speed up the response since
|
|
105
|
+
# adapters can get by without populating the entire Info object
|
|
106
|
+
# @yield [Info] of each job to block
|
|
107
|
+
# @return [Enumerator] if no block given
|
|
108
|
+
def info_all_each(attrs: nil)
|
|
109
|
+
return to_enum(:info_all_each, attrs: attrs) unless block_given?
|
|
110
|
+
|
|
111
|
+
info_all(attrs: attrs).each do |job|
|
|
112
|
+
yield job
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Iterate over each job Info object
|
|
117
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
|
118
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
|
119
|
+
# This array specifies only attrs you want, in addition to id and status.
|
|
120
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
|
121
|
+
# to have a value for any attr besides the ones specified and id and status.
|
|
122
|
+
#
|
|
123
|
+
# For certain adapters this may speed up the response since
|
|
124
|
+
# adapters can get by without populating the entire Info object
|
|
125
|
+
# @yield [Info] of each job to block
|
|
126
|
+
# @return [Enumerator] if no block given
|
|
127
|
+
def info_where_owner_each(owner, attrs: nil)
|
|
128
|
+
return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
|
|
129
|
+
|
|
130
|
+
info_where_owner(owner, attrs: attrs).each do |job|
|
|
131
|
+
yield job
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Whether the adapter supports job arrays
|
|
136
|
+
# @return [Boolean] - assumes true; but can be overridden by adapters that
|
|
137
|
+
# explicitly do not
|
|
138
|
+
def supports_job_arrays?
|
|
139
|
+
false
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Retrieve job info from the resource manager
|
|
143
|
+
# @abstract Subclass is expected to implement {#info}
|
|
144
|
+
# @raise [NotImplementedError] if subclass did not define {#info}
|
|
145
|
+
# @param id [#to_s] the id of the job
|
|
146
|
+
# @return [Info] information describing submitted job
|
|
147
|
+
def info(id)
|
|
148
|
+
batch.info(id.to_s)
|
|
149
|
+
rescue Batch::Error => e
|
|
150
|
+
raise JobAdapterError, e.message
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Retrieve job status from resource manager
|
|
154
|
+
# @note Optimized slightly over retrieving complete job information from server
|
|
155
|
+
# @abstract Subclass is expected to implement {#status}
|
|
156
|
+
# @raise [NotImplementedError] if subclass did not define {#status}
|
|
157
|
+
# @param id [#to_s] the id of the job
|
|
158
|
+
# @return [Status] status of job
|
|
159
|
+
def status(id)
|
|
160
|
+
info(id).status
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Put the submitted job on hold
|
|
164
|
+
# @abstract Subclass is expected to implement {#hold}
|
|
165
|
+
# @raise [NotImplementedError] if subclass did not define {#hold}
|
|
166
|
+
# @param id [#to_s] the id of the job
|
|
167
|
+
# @return [void]
|
|
168
|
+
def hold(id)
|
|
169
|
+
raise NotImplementedError, 'subclass did not define #hold'
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Release the job that is on hold
|
|
173
|
+
# @abstract Subclass is expected to implement {#release}
|
|
174
|
+
# @raise [NotImplementedError] if subclass did not define {#release}
|
|
175
|
+
# @param id [#to_s] the id of the job
|
|
176
|
+
# @return [void]
|
|
177
|
+
def release(id)
|
|
178
|
+
raise NotImplementedError, 'subclass did not define #release'
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Delete the submitted job.
|
|
182
|
+
#
|
|
183
|
+
# @param id [#to_s] the id of the job
|
|
184
|
+
# @return [void]
|
|
185
|
+
def delete(id)
|
|
186
|
+
batch.delete(id.to_s)
|
|
187
|
+
rescue Batch::Error => e
|
|
188
|
+
raise JobAdapterError, e.message
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
|
2
|
+
require "json"
|
|
3
|
+
|
|
4
|
+
class OodCore::Job::Adapters::Kubernetes::Batch
|
|
5
|
+
|
|
6
|
+
require_relative "helper"
|
|
7
|
+
require_relative "k8s_job_info"
|
|
8
|
+
|
|
9
|
+
using OodCore::Refinements::HashExtensions
|
|
10
|
+
|
|
11
|
+
class Error < StandardError; end
|
|
12
|
+
class NotFoundError < StandardError; end
|
|
13
|
+
|
|
14
|
+
attr_reader :config_file, :bin, :cluster, :mounts
|
|
15
|
+
attr_reader :all_namespaces, :using_context, :helper
|
|
16
|
+
attr_reader :username_prefix, :namespace_prefix
|
|
17
|
+
|
|
18
|
+
def initialize(options = {})
|
|
19
|
+
options = options.to_h.symbolize_keys
|
|
20
|
+
|
|
21
|
+
@config_file = options.fetch(:config_file, default_config_file)
|
|
22
|
+
@bin = options.fetch(:bin, '/usr/bin/kubectl')
|
|
23
|
+
@cluster = options.fetch(:cluster, 'open-ondemand')
|
|
24
|
+
@mounts = options.fetch(:mounts, []).map { |m| m.to_h.symbolize_keys }
|
|
25
|
+
@all_namespaces = options.fetch(:all_namespaces, false)
|
|
26
|
+
@username_prefix = options.fetch(:username_prefix, nil)
|
|
27
|
+
@namespace_prefix = options.fetch(:namespace_prefix, '')
|
|
28
|
+
|
|
29
|
+
@using_context = false
|
|
30
|
+
@helper = OodCore::Job::Adapters::Kubernetes::Helper.new
|
|
31
|
+
|
|
32
|
+
begin
|
|
33
|
+
make_kubectl_config(options)
|
|
34
|
+
rescue
|
|
35
|
+
# FIXME could use a log here
|
|
36
|
+
# means you couldn't 'kubectl set config'
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def resource_file(resource_type = 'pod')
|
|
41
|
+
File.dirname(__FILE__) + "/templates/#{resource_type}.yml.erb"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
|
45
|
+
raise ArgumentError, 'Must specify the script' if script.nil?
|
|
46
|
+
|
|
47
|
+
resource_yml, id = generate_id_yml(script)
|
|
48
|
+
call("#{formatted_ns_cmd} create -f -", stdin: resource_yml)
|
|
49
|
+
|
|
50
|
+
id
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def generate_id(name)
|
|
54
|
+
# 2_821_109_907_456 = 36**8
|
|
55
|
+
name.downcase.tr(' ', '-') + '-' + rand(2_821_109_907_456).to_s(36)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def info_all(attrs: nil)
|
|
59
|
+
cmd = if all_namespaces
|
|
60
|
+
"#{base_cmd} get pods -o json --all-namespaces"
|
|
61
|
+
else
|
|
62
|
+
"#{namespaced_cmd} get pods -o json"
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
output = call(cmd)
|
|
66
|
+
all_pods_to_info(output)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def info_where_owner(owner, attrs: nil)
|
|
70
|
+
owner = Array.wrap(owner).map(&:to_s)
|
|
71
|
+
|
|
72
|
+
# must at least have job_owner to filter by job_owner
|
|
73
|
+
attrs = Array.wrap(attrs) | [:job_owner] unless attrs.nil?
|
|
74
|
+
|
|
75
|
+
info_all(attrs: attrs).select { |info| owner.include? info.job_owner }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def info_all_each(attrs: nil)
|
|
79
|
+
return to_enum(:info_all_each, attrs: attrs) unless block_given?
|
|
80
|
+
|
|
81
|
+
info_all(attrs: attrs).each do |job|
|
|
82
|
+
yield job
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def info_where_owner_each(owner, attrs: nil)
|
|
87
|
+
return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
|
|
88
|
+
|
|
89
|
+
info_where_owner(owner, attrs: attrs).each do |job|
|
|
90
|
+
yield job
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def info(id)
|
|
95
|
+
pod_json = safe_call('get', 'pod', id)
|
|
96
|
+
return OodCore::Job::Info.new({ id: id, status: 'completed' }) if pod_json.empty?
|
|
97
|
+
|
|
98
|
+
service_json = safe_call('get', 'service', service_name(id))
|
|
99
|
+
secret_json = safe_call('get', 'secret', secret_name(id))
|
|
100
|
+
|
|
101
|
+
helper.info_from_json(pod_json: pod_json, service_json: service_json, secret_json: secret_json)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def status(id)
|
|
105
|
+
info(id).status
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def delete(id)
|
|
109
|
+
safe_call("delete", "pod", id)
|
|
110
|
+
safe_call("delete", "service", service_name(id))
|
|
111
|
+
safe_call("delete", "secret", secret_name(id))
|
|
112
|
+
safe_call("delete", "configmap", configmap_name(id))
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
private
|
|
116
|
+
|
|
117
|
+
def safe_call(verb, resource, id)
|
|
118
|
+
begin
|
|
119
|
+
case verb.to_s
|
|
120
|
+
when "get"
|
|
121
|
+
call_json_output('get', resource, id)
|
|
122
|
+
when "delete"
|
|
123
|
+
call("#{namespaced_cmd} delete #{resource} #{id}")
|
|
124
|
+
end
|
|
125
|
+
rescue NotFoundError
|
|
126
|
+
{}
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# helper to help format multi-line yaml data from the submit.yml into
|
|
131
|
+
# mutli-line yaml in the pod.yml.erb
|
|
132
|
+
def config_data_lines(data)
|
|
133
|
+
output = []
|
|
134
|
+
first = true
|
|
135
|
+
|
|
136
|
+
data.to_s.each_line do |line|
|
|
137
|
+
output.append(first ? line : line.prepend(" "))
|
|
138
|
+
first = false
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
output
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def username
|
|
145
|
+
@username ||= Etc.getlogin
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def k8s_username
|
|
149
|
+
username_prefix.nil? ? username : "#{username_prefix}-#{username}"
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def user
|
|
153
|
+
@user ||= Etc.getpwnam(username)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def home_dir
|
|
157
|
+
user.dir
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def run_as_user
|
|
161
|
+
user.uid
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def run_as_group
|
|
165
|
+
user.gid
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def fs_group
|
|
169
|
+
run_as_group
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def group
|
|
173
|
+
Etc.getgrgid(run_as_group).name
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def default_env
|
|
177
|
+
{
|
|
178
|
+
USER: username,
|
|
179
|
+
UID: run_as_user,
|
|
180
|
+
HOME: home_dir,
|
|
181
|
+
GROUP: group,
|
|
182
|
+
GID: run_as_group,
|
|
183
|
+
}
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# helper to template resource yml you're going to submit and
|
|
187
|
+
# create an id.
|
|
188
|
+
def generate_id_yml(script)
|
|
189
|
+
native_data = script.native
|
|
190
|
+
container = helper.container_from_native(native_data[:container], default_env)
|
|
191
|
+
id = generate_id(container.name)
|
|
192
|
+
configmap = helper.configmap_from_native(native_data, id)
|
|
193
|
+
init_containers = helper.init_ctrs_from_native(native_data[:init_containers], container.env)
|
|
194
|
+
spec = OodCore::Job::Adapters::Kubernetes::Resources::PodSpec.new(container, init_containers: init_containers)
|
|
195
|
+
all_mounts = native_data[:mounts].nil? ? mounts : mounts + native_data[:mounts]
|
|
196
|
+
|
|
197
|
+
template = ERB.new(File.read(resource_file), nil, '-')
|
|
198
|
+
|
|
199
|
+
[template.result(binding), id]
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# helper to call kubectl and get json data back.
|
|
203
|
+
# verb, resrouce and id are the kubernetes parlance terms.
|
|
204
|
+
# example: 'kubectl get pod my-pod-id' is verb=get, resource=pod
|
|
205
|
+
# and id=my-pod-id
|
|
206
|
+
def call_json_output(verb, resource, id, stdin: nil)
|
|
207
|
+
cmd = "#{formatted_ns_cmd} #{verb} #{resource} #{id}"
|
|
208
|
+
data = call(cmd, stdin: stdin)
|
|
209
|
+
data = data.empty? ? '{}' : data
|
|
210
|
+
json_data = JSON.parse(data, symbolize_names: true)
|
|
211
|
+
|
|
212
|
+
json_data
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def service_name(id)
|
|
216
|
+
helper.service_name(id)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def secret_name(id)
|
|
220
|
+
helper.secret_name(id)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def configmap_name(id)
|
|
224
|
+
helper.configmap_name(id)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def namespace
|
|
228
|
+
"#{namespace_prefix}#{username}"
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def context
|
|
232
|
+
cluster
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def default_config_file
|
|
236
|
+
(ENV['KUBECONFIG'] || "#{Dir.home}/.kube/config")
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def default_auth
|
|
240
|
+
{
|
|
241
|
+
type: 'managaged'
|
|
242
|
+
}.symbolize_keys
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def default_server
|
|
246
|
+
{
|
|
247
|
+
endpoint: 'https://localhost:8080',
|
|
248
|
+
cert_authority_file: nil
|
|
249
|
+
}.symbolize_keys
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def formatted_ns_cmd
|
|
253
|
+
"#{namespaced_cmd} -o json"
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def namespaced_cmd
|
|
257
|
+
"#{base_cmd} --namespace=#{namespace}"
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def base_cmd
|
|
261
|
+
base = "#{bin} --kubeconfig=#{config_file}"
|
|
262
|
+
base << " --context=#{context}" if using_context
|
|
263
|
+
base
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def all_pods_to_info(data)
|
|
267
|
+
json_data = JSON.parse(data, symbolize_names: true)
|
|
268
|
+
pods = json_data.dig(:items)
|
|
269
|
+
|
|
270
|
+
info_array = []
|
|
271
|
+
pods.each do |pod|
|
|
272
|
+
info = pod_info_from_json(pod)
|
|
273
|
+
info_array.push(info) unless info.nil?
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
info_array
|
|
277
|
+
rescue JSON::ParserError
|
|
278
|
+
# 'no resources in <namespace>' throws parse error
|
|
279
|
+
[]
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def pod_info_from_json(pod)
|
|
283
|
+
hash = helper.pod_info_from_json(pod)
|
|
284
|
+
K8sJobInfo.new(hash)
|
|
285
|
+
rescue Helper::K8sDataError
|
|
286
|
+
# FIXME: silently eating error, could probably use a logger
|
|
287
|
+
nil
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def make_kubectl_config(config)
|
|
291
|
+
set_cluster(config.fetch(:server, default_server).to_h.symbolize_keys)
|
|
292
|
+
configure_auth(config.fetch(:auth, default_auth).to_h.symbolize_keys)
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def configure_auth(auth)
|
|
296
|
+
type = auth.fetch(:type)
|
|
297
|
+
return if managed?(type)
|
|
298
|
+
|
|
299
|
+
case type
|
|
300
|
+
when 'gke'
|
|
301
|
+
set_gke_config(auth)
|
|
302
|
+
when 'oidc'
|
|
303
|
+
set_context
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def use_context
|
|
308
|
+
@using_context = true
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def managed?(type)
|
|
312
|
+
if type.nil?
|
|
313
|
+
true # maybe should be false?
|
|
314
|
+
else
|
|
315
|
+
type.to_s == 'managed'
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def set_gke_config(auth)
|
|
320
|
+
cred_file = auth.fetch(:svc_acct_file)
|
|
321
|
+
|
|
322
|
+
cmd = "gcloud auth activate-service-account --key-file=#{cred_file}"
|
|
323
|
+
call(cmd)
|
|
324
|
+
|
|
325
|
+
set_gke_credentials(auth)
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def set_gke_credentials(auth)
|
|
329
|
+
|
|
330
|
+
zone = auth.fetch(:zone, nil)
|
|
331
|
+
region = auth.fetch(:region, nil)
|
|
332
|
+
|
|
333
|
+
locale = ''
|
|
334
|
+
locale = "--zone=#{zone}" unless zone.nil?
|
|
335
|
+
locale = "--region=#{region}" unless region.nil?
|
|
336
|
+
|
|
337
|
+
# gke cluster name can probably can differ from what ood calls the cluster
|
|
338
|
+
cmd = "gcloud container clusters get-credentials #{locale} #{cluster}"
|
|
339
|
+
env = { 'KUBECONFIG' => config_file }
|
|
340
|
+
call(cmd, env)
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
def set_context
|
|
344
|
+
cmd = "#{base_cmd} config set-context #{cluster}"
|
|
345
|
+
cmd << " --cluster=#{cluster} --namespace=#{namespace}"
|
|
346
|
+
cmd << " --user=#{k8s_username}"
|
|
347
|
+
|
|
348
|
+
call(cmd)
|
|
349
|
+
use_context
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
def set_cluster(config)
|
|
353
|
+
server = config.fetch(:endpoint)
|
|
354
|
+
cert = config.fetch(:cert_authority_file, nil)
|
|
355
|
+
|
|
356
|
+
cmd = "#{base_cmd} config set-cluster #{cluster}"
|
|
357
|
+
cmd << " --server=#{server}"
|
|
358
|
+
cmd << " --certificate-authority=#{cert}" unless cert.nil?
|
|
359
|
+
|
|
360
|
+
call(cmd)
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def call(cmd = '', env: {}, stdin: nil)
|
|
364
|
+
o, e, s = Open3.capture3(env, cmd, stdin_data: stdin.to_s)
|
|
365
|
+
s.success? ? o : interpret_and_raise(e)
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
def interpret_and_raise(stderr)
|
|
369
|
+
raise NotFoundError, stderr if /^Error from server \(NotFound\):/.match(stderr)
|
|
370
|
+
raise(Error, stderr)
|
|
371
|
+
end
|
|
372
|
+
end
|