ood_core 0.13.0 → 0.16.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +30 -0
- data/CHANGELOG.md +64 -1
- data/README.md +2 -2
- data/lib/ood_core/cluster.rb +11 -5
- data/lib/ood_core/job/adapters/ccq.rb +19 -12
- data/lib/ood_core/job/adapters/kubernetes.rb +193 -0
- data/lib/ood_core/job/adapters/kubernetes/batch.rb +372 -0
- data/lib/ood_core/job/adapters/kubernetes/helper.rb +299 -0
- data/lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb +9 -0
- data/lib/ood_core/job/adapters/kubernetes/resources.rb +82 -0
- data/lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb +188 -0
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +25 -10
- data/lib/ood_core/job/adapters/linux_host/templates/script_wrapper.erb.sh +3 -14
- data/lib/ood_core/job/adapters/slurm.rb +18 -1
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -1
- metadata +32 -6
- data/.travis.yml +0 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19665b6db28d01da39093dc90d4a5023ca12264f07b932aebc8ec8c443bafa25
|
4
|
+
data.tar.gz: d9c8c6d8f30851ea9138c8325aafd750823534a51f36601a20366265ac4feec2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ed1eaa873366ad5e825ed29c7401dd3bca4a424ab7a689a19479f297ec20d7e019cd53609006b0919a365dd0002eb0c1e9c0cabcc9f69579cf7ae81b33b3ae7
|
7
|
+
data.tar.gz: 90a4cfa3ee8b1f76ef7e1f28df6d8e64725d1eaff005b4bd4ff7fc8f88e5bfda8a15e706636c18e7b5ac74451071eaea4e6814945ea25e95f6c7ed2de8fd2fec
|
@@ -0,0 +1,30 @@
|
|
1
|
+
name: Unit Tests
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches:
|
6
|
+
- master
|
7
|
+
pull_request:
|
8
|
+
branches:
|
9
|
+
- master
|
10
|
+
|
11
|
+
jobs:
|
12
|
+
tests:
|
13
|
+
runs-on: ubuntu-latest
|
14
|
+
|
15
|
+
steps:
|
16
|
+
- name: checkout
|
17
|
+
uses: actions/checkout@v2
|
18
|
+
|
19
|
+
- name: Setup Ruby using Bundler
|
20
|
+
uses: ruby/setup-ruby@v1
|
21
|
+
with:
|
22
|
+
ruby-version: "2.7.1"
|
23
|
+
bundler-cache: true
|
24
|
+
bundler: "2.1.4"
|
25
|
+
|
26
|
+
- name: install gems
|
27
|
+
run: bundle install
|
28
|
+
|
29
|
+
- name: test
|
30
|
+
run: bundle exec rake spec
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,64 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
|
+
## [0.16.1] - 2021-04-23
|
10
|
+
### Fixed
|
11
|
+
- memorized some allow? variables to have better support around ACLS in
|
12
|
+
[267](https://github.com/OSC/ood_core/pull/267)
|
13
|
+
|
14
|
+
## [0.16.0] - 2021-04-20
|
15
|
+
### Fixed
|
16
|
+
- tmux 2.7+ bug in the linux host adapter in [2.5.8](https://github.com/OSC/ood_core/pull/258)
|
17
|
+
and [259](https://github.com/OSC/ood_core/pull/259).
|
18
|
+
|
19
|
+
### Changed
|
20
|
+
|
21
|
+
- Changed how k8s configmaps in are defined in [251](https://github.com/OSC/ood_core/pull/251).
|
22
|
+
The data structure now expects a key called files which is an array of objects that hold
|
23
|
+
filename, data, mount_path, sub_path and init_mount_path.
|
24
|
+
[255](https://github.com/OSC/ood_core/pull/255) also relates to this interface change.
|
25
|
+
|
26
|
+
### Added
|
27
|
+
|
28
|
+
- The k8s adapter can now specify environment variables and creates defaults
|
29
|
+
in [252](https://github.com/OSC/ood_core/pull/252).
|
30
|
+
- The k8s adapter can now specify image pull secrets in [253](https://github.com/OSC/ood_core/pull/253).
|
31
|
+
|
32
|
+
## [0.15.1] - 2021-02-25
|
33
|
+
### Fixed
|
34
|
+
- kubernetes adapter uses the full module for helpers in [245](https://github.com/OSC/ood_core/pull/245).
|
35
|
+
|
36
|
+
### Changed
|
37
|
+
- kubernetes pods spawn with runAsNonRoot set to true in [247](https://github.com/OSC/ood_core/pull/247).
|
38
|
+
- kubernetes pods can spawn with supplemental groups along with some other in security defaults in
|
39
|
+
[246](https://github.com/OSC/ood_core/pull/246).
|
40
|
+
|
41
|
+
## [0.15.0] - 2021-01-26
|
42
|
+
### Fixed
|
43
|
+
- ccq adapter now accepts job names with spaces in [210](https://github.com/OSC/ood_core/pull/209)
|
44
|
+
- k8s correctly handles having no mount volumes in [239](https://github.com/OSC/ood_core/pull/239)
|
45
|
+
|
46
|
+
### Added
|
47
|
+
- k8s adapter now applies account metadata to resources in [216](https://github.com/OSC/ood_core/pull/216) and
|
48
|
+
[231](https://github.com/OSC/ood_core/pull/231)
|
49
|
+
- k8s adapter can now prefix namespaces in [218](https://github.com/OSC/ood_core/pull/218)
|
50
|
+
- k8s adapter now applies time limits to pods in [224](https://github.com/OSC/ood_core/pull/224)
|
51
|
+
|
52
|
+
### Changed
|
53
|
+
- testing automation is now done in github actions in [221](https://github.com/OSC/ood_core/pull/218)
|
54
|
+
- update bunlder to 2.1.4 and ruby to 2.7 in [235](https://github.com/OSC/ood_core/pull/218) updated bundler and ruby
|
55
|
+
- k8s adapter more appropriately labels unschedulable pods as queued in [230](https://github.com/OSC/ood_core/pull/230)
|
56
|
+
- k8s adapter now uses the script#ood_connection_info API instead of script#native in
|
57
|
+
[222](https://github.com/OSC/ood_core/pull/222)
|
58
|
+
|
59
|
+
## [0.14.0] - 2020-10-01
|
60
|
+
### Added
|
61
|
+
- Kubernetes adapter in PR [156](https://github.com/OSC/ood_core/pull/156)
|
62
|
+
|
63
|
+
### Fixed
|
64
|
+
- Catch Slurm times. [209](https://github.com/OSC/ood_core/pull/209)
|
65
|
+
- LHA race condition in deleteing tmp files. [212](https://github.com/OSC/ood_core/pull/212)
|
66
|
+
|
9
67
|
## [0.13.0] - 2020-08-10
|
10
68
|
### Added
|
11
69
|
- CloudyCluster CCQ Adapter
|
@@ -247,7 +305,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
247
305
|
### Added
|
248
306
|
- Initial release!
|
249
307
|
|
250
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
308
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.16.1...HEAD
|
309
|
+
[0.16.1]: https://github.com/OSC/ood_core/compare/v0.16.0...v0.16.1
|
310
|
+
[0.16.0]: https://github.com/OSC/ood_core/compare/v0.15.1...v0.16.0
|
311
|
+
[0.15.1]: https://github.com/OSC/ood_core/compare/v0.15.0...v0.15.1
|
312
|
+
[0.15.0]: https://github.com/OSC/ood_core/compare/v0.14.0...v0.15.0
|
313
|
+
[0.14.0]: https://github.com/OSC/ood_core/compare/v0.13.0...v0.14.0
|
251
314
|
[0.13.0]: https://github.com/OSC/ood_core/compare/v0.12.0...v0.13.0
|
252
315
|
[0.12.0]: https://github.com/OSC/ood_core/compare/v0.11.4...v0.12.0
|
253
316
|
[0.11.4]: https://github.com/OSC/ood_core/compare/v0.11.3...v0.11.4
|
data/README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
# OodCore
|
2
2
|
|
3
|
-
[![Build Status](https://
|
3
|
+
[![Build Status](https://github.com/osc/ood_core/workflows/Unit%20Tests/badge.svg)](https://github.com/OSC/ood_core/actions?query=workflow%3A%22Unit+Tests%22)
|
4
4
|
![GitHub Release](https://img.shields.io/github/release/osc/ood_core.svg)
|
5
5
|
![GitHub License](https://img.shields.io/github/license/osc/ood_core.svg)
|
6
6
|
|
7
7
|
- Website: http://openondemand.org/
|
8
8
|
- Website repo with JOSS publication: https://github.com/OSC/Open-OnDemand
|
9
|
-
- Documentation: https://osc.github.io/ood-documentation/
|
9
|
+
- Documentation: https://osc.github.io/ood-documentation/latest/
|
10
10
|
- Main code repo: https://github.com/OSC/ondemand
|
11
11
|
- Core library repo: https://github.com/OSC/ood_core
|
12
12
|
|
data/lib/ood_core/cluster.rb
CHANGED
@@ -78,7 +78,9 @@ module OodCore
|
|
78
78
|
# Whether the login feature is allowed
|
79
79
|
# @return [Boolean] is login allowed
|
80
80
|
def login_allow?
|
81
|
-
|
81
|
+
return @login_allow if defined?(@login_allow)
|
82
|
+
|
83
|
+
@login_allow = (allow? && !login_config.empty?)
|
82
84
|
end
|
83
85
|
|
84
86
|
# Build a job adapter from the job configuration
|
@@ -90,9 +92,11 @@ module OodCore
|
|
90
92
|
# Whether the job feature is allowed based on the ACLs
|
91
93
|
# @return [Boolean] is the job feature allowed
|
92
94
|
def job_allow?
|
93
|
-
|
94
|
-
|
95
|
-
|
95
|
+
return @job_allow if defined?(@job_allow)
|
96
|
+
|
97
|
+
@job_allow = (allow? && ! job_config.empty? && build_acls(
|
98
|
+
job_config.fetch(:acls, []).map(&:to_h)
|
99
|
+
).all?(&:allow?))
|
96
100
|
end
|
97
101
|
|
98
102
|
# The batch connect template configuration used for this cluster
|
@@ -138,7 +142,9 @@ module OodCore
|
|
138
142
|
# Whether this cluster is allowed to be used
|
139
143
|
# @return [Boolean] whether cluster is allowed
|
140
144
|
def allow?
|
141
|
-
|
145
|
+
return @allow if defined?(@allow)
|
146
|
+
|
147
|
+
@allow = acls.all?(&:allow?)
|
142
148
|
end
|
143
149
|
|
144
150
|
# The comparison operator
|
@@ -203,6 +203,10 @@ module OodCore
|
|
203
203
|
'ccq_ood_script_'
|
204
204
|
end
|
205
205
|
|
206
|
+
def ccqstat_regex
|
207
|
+
/^(?<id>\S+)\s+(?<name>.+)\s+(?<username>\S+)\s+(?<scheduler>\S+)\s+(?<status>\S+)\s*$/
|
208
|
+
end
|
209
|
+
|
206
210
|
def parse_job_id_from_ccqsub(output)
|
207
211
|
match_data = /#{jobid_regex}/.match(output)
|
208
212
|
# match_data could be nil, OR re-configured jobid_regex could be looking for a different named group
|
@@ -236,28 +240,31 @@ module OodCore
|
|
236
240
|
def info_from_ccqstat(data)
|
237
241
|
infos = []
|
238
242
|
|
239
|
-
data.to_s.
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
infos << Info.new(line_to_hash(words)) if words.size == 5
|
243
|
+
data.to_s.lines.drop(1).each do |line|
|
244
|
+
match_data = ccqstat_regex.match(line)
|
245
|
+
infos << Info.new(ccqstat_match_to_hash(match_data)) if valid_ccqstat_match?(match_data)
|
244
246
|
end
|
245
247
|
|
246
248
|
infos
|
247
249
|
end
|
248
250
|
|
249
|
-
def
|
250
|
-
return unless words.size == 5
|
251
|
-
|
251
|
+
def ccqstat_match_to_hash(match)
|
252
252
|
data_hash = {}
|
253
|
-
data_hash[:id] =
|
254
|
-
data_hash[:
|
255
|
-
data_hash[:
|
256
|
-
|
253
|
+
data_hash[:id] = match.named_captures.fetch('id', nil)
|
254
|
+
data_hash[:job_owner] = match.named_captures.fetch('username', nil)
|
255
|
+
data_hash[:status] = get_state(match.named_captures.fetch('status', nil))
|
256
|
+
|
257
|
+
# The regex leaves trailing empty spaces. There's no way to tell if they're _actually_
|
258
|
+
# a part of the job name or not, so we assume they're not and add the rstrip.
|
259
|
+
data_hash[:job_name] = match.named_captures.fetch('name', nil).to_s.rstrip
|
257
260
|
|
258
261
|
data_hash
|
259
262
|
end
|
260
263
|
|
264
|
+
def valid_ccqstat_match?(match)
|
265
|
+
!match.nil? && !match.named_captures.fetch('id', nil).nil?
|
266
|
+
end
|
267
|
+
|
261
268
|
def get_state(state)
|
262
269
|
STATE_MAP.fetch(state, :undetermined)
|
263
270
|
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "ood_core/refinements/array_extensions"
|
3
|
+
|
4
|
+
module OodCore
|
5
|
+
module Job
|
6
|
+
class Factory
|
7
|
+
using Refinements::HashExtensions
|
8
|
+
|
9
|
+
def self.build_kubernetes(config)
|
10
|
+
batch = Adapters::Kubernetes::Batch.new(config.to_h.symbolize_keys)
|
11
|
+
Adapters::Kubernetes.new(batch)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module Adapters
|
16
|
+
class Kubernetes < Adapter
|
17
|
+
|
18
|
+
using Refinements::ArrayExtensions
|
19
|
+
using Refinements::HashExtensions
|
20
|
+
|
21
|
+
require "ood_core/job/adapters/kubernetes/batch"
|
22
|
+
|
23
|
+
attr_reader :batch
|
24
|
+
|
25
|
+
def initialize(batch)
|
26
|
+
@batch = batch
|
27
|
+
end
|
28
|
+
|
29
|
+
# Submit a job with the attributes defined in the job template instance
|
30
|
+
# @abstract Subclass is expected to implement {#submit}
|
31
|
+
# @raise [NotImplementedError] if subclass did not define {#submit}
|
32
|
+
# @example Submit job template to cluster
|
33
|
+
# solver_id = job_adapter.submit(solver_script)
|
34
|
+
# #=> "1234.server"
|
35
|
+
# @example Submit job that depends on previous job
|
36
|
+
# post_id = job_adapter.submit(
|
37
|
+
# post_script,
|
38
|
+
# afterok: solver_id
|
39
|
+
# )
|
40
|
+
# #=> "1235.server"
|
41
|
+
# @param script [Script] script object that describes the
|
42
|
+
# script and attributes for the submitted job
|
43
|
+
# @param after [#to_s, Array<#to_s>] this job may be scheduled for execution
|
44
|
+
# at any point after dependent jobs have started execution
|
45
|
+
# @param afterok [#to_s, Array<#to_s>] this job may be scheduled for
|
46
|
+
# execution only after dependent jobs have terminated with no errors
|
47
|
+
# @param afternotok [#to_s, Array<#to_s>] this job may be scheduled for
|
48
|
+
# execution only after dependent jobs have terminated with errors
|
49
|
+
# @param afterany [#to_s, Array<#to_s>] this job may be scheduled for
|
50
|
+
# execution after dependent jobs have terminated
|
51
|
+
# @return [String] the job id returned after successfully submitting a job
|
52
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
53
|
+
raise ArgumentError, 'Must specify the script' if script.nil?
|
54
|
+
|
55
|
+
batch.submit(script)
|
56
|
+
rescue Batch::Error => e
|
57
|
+
raise JobAdapterError, e.message
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
# Retrieve info for all jobs from the resource manager
|
62
|
+
# @abstract Subclass is expected to implement {#info_all}
|
63
|
+
# @raise [NotImplementedError] if subclass did not define {#info_all}
|
64
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
65
|
+
# This array specifies only attrs you want, in addition to id and status.
|
66
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
67
|
+
# to have a value for any attr besides the ones specified and id and status.
|
68
|
+
#
|
69
|
+
# For certain adapters this may speed up the response since
|
70
|
+
# adapters can get by without populating the entire Info object
|
71
|
+
# @return [Array<Info>] information describing submitted jobs
|
72
|
+
def info_all(attrs: nil)
|
73
|
+
batch.info_all(attrs: attrs)
|
74
|
+
rescue Batch::Error => e
|
75
|
+
raise JobAdapterError, e.message
|
76
|
+
end
|
77
|
+
|
78
|
+
# Retrieve info for all jobs for a given owner or owners from the
|
79
|
+
# resource manager
|
80
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
81
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
82
|
+
# This array specifies only attrs you want, in addition to id and status.
|
83
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
84
|
+
# to have a value for any attr besides the ones specified and id and status.
|
85
|
+
#
|
86
|
+
# For certain adapters this may speed up the response since
|
87
|
+
# adapters can get by without populating the entire Info object
|
88
|
+
# @return [Array<Info>] information describing submitted jobs
|
89
|
+
def info_where_owner(owner, attrs: nil)
|
90
|
+
owner = Array.wrap(owner).map(&:to_s)
|
91
|
+
|
92
|
+
# must at least have job_owner to filter by job_owner
|
93
|
+
attrs = Array.wrap(attrs) | [:job_owner] unless attrs.nil?
|
94
|
+
|
95
|
+
info_all(attrs: attrs).select { |info| owner.include? info.job_owner }
|
96
|
+
end
|
97
|
+
|
98
|
+
# Iterate over each job Info object
|
99
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
100
|
+
# This array specifies only attrs you want, in addition to id and status.
|
101
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
102
|
+
# to have a value for any attr besides the ones specified and id and status.
|
103
|
+
#
|
104
|
+
# For certain adapters this may speed up the response since
|
105
|
+
# adapters can get by without populating the entire Info object
|
106
|
+
# @yield [Info] of each job to block
|
107
|
+
# @return [Enumerator] if no block given
|
108
|
+
def info_all_each(attrs: nil)
|
109
|
+
return to_enum(:info_all_each, attrs: attrs) unless block_given?
|
110
|
+
|
111
|
+
info_all(attrs: attrs).each do |job|
|
112
|
+
yield job
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Iterate over each job Info object
|
117
|
+
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
118
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
119
|
+
# This array specifies only attrs you want, in addition to id and status.
|
120
|
+
# If an array, the Info object that is returned to you is not guarenteed
|
121
|
+
# to have a value for any attr besides the ones specified and id and status.
|
122
|
+
#
|
123
|
+
# For certain adapters this may speed up the response since
|
124
|
+
# adapters can get by without populating the entire Info object
|
125
|
+
# @yield [Info] of each job to block
|
126
|
+
# @return [Enumerator] if no block given
|
127
|
+
def info_where_owner_each(owner, attrs: nil)
|
128
|
+
return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
|
129
|
+
|
130
|
+
info_where_owner(owner, attrs: attrs).each do |job|
|
131
|
+
yield job
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Whether the adapter supports job arrays
|
136
|
+
# @return [Boolean] - assumes true; but can be overridden by adapters that
|
137
|
+
# explicitly do not
|
138
|
+
def supports_job_arrays?
|
139
|
+
false
|
140
|
+
end
|
141
|
+
|
142
|
+
# Retrieve job info from the resource manager
|
143
|
+
# @abstract Subclass is expected to implement {#info}
|
144
|
+
# @raise [NotImplementedError] if subclass did not define {#info}
|
145
|
+
# @param id [#to_s] the id of the job
|
146
|
+
# @return [Info] information describing submitted job
|
147
|
+
def info(id)
|
148
|
+
batch.info(id.to_s)
|
149
|
+
rescue Batch::Error => e
|
150
|
+
raise JobAdapterError, e.message
|
151
|
+
end
|
152
|
+
|
153
|
+
# Retrieve job status from resource manager
|
154
|
+
# @note Optimized slightly over retrieving complete job information from server
|
155
|
+
# @abstract Subclass is expected to implement {#status}
|
156
|
+
# @raise [NotImplementedError] if subclass did not define {#status}
|
157
|
+
# @param id [#to_s] the id of the job
|
158
|
+
# @return [Status] status of job
|
159
|
+
def status(id)
|
160
|
+
info(id).status
|
161
|
+
end
|
162
|
+
|
163
|
+
# Put the submitted job on hold
|
164
|
+
# @abstract Subclass is expected to implement {#hold}
|
165
|
+
# @raise [NotImplementedError] if subclass did not define {#hold}
|
166
|
+
# @param id [#to_s] the id of the job
|
167
|
+
# @return [void]
|
168
|
+
def hold(id)
|
169
|
+
raise NotImplementedError, 'subclass did not define #hold'
|
170
|
+
end
|
171
|
+
|
172
|
+
# Release the job that is on hold
|
173
|
+
# @abstract Subclass is expected to implement {#release}
|
174
|
+
# @raise [NotImplementedError] if subclass did not define {#release}
|
175
|
+
# @param id [#to_s] the id of the job
|
176
|
+
# @return [void]
|
177
|
+
def release(id)
|
178
|
+
raise NotImplementedError, 'subclass did not define #release'
|
179
|
+
end
|
180
|
+
|
181
|
+
# Delete the submitted job.
|
182
|
+
#
|
183
|
+
# @param id [#to_s] the id of the job
|
184
|
+
# @return [void]
|
185
|
+
def delete(id)
|
186
|
+
batch.delete(id.to_s)
|
187
|
+
rescue Batch::Error => e
|
188
|
+
raise JobAdapterError, e.message
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,372 @@
|
|
1
|
+
require "ood_core/refinements/hash_extensions"
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
class OodCore::Job::Adapters::Kubernetes::Batch
|
5
|
+
|
6
|
+
require_relative "helper"
|
7
|
+
require_relative "k8s_job_info"
|
8
|
+
|
9
|
+
using OodCore::Refinements::HashExtensions
|
10
|
+
|
11
|
+
class Error < StandardError; end
|
12
|
+
class NotFoundError < StandardError; end
|
13
|
+
|
14
|
+
attr_reader :config_file, :bin, :cluster, :mounts
|
15
|
+
attr_reader :all_namespaces, :using_context, :helper
|
16
|
+
attr_reader :username_prefix, :namespace_prefix
|
17
|
+
|
18
|
+
def initialize(options = {})
|
19
|
+
options = options.to_h.symbolize_keys
|
20
|
+
|
21
|
+
@config_file = options.fetch(:config_file, default_config_file)
|
22
|
+
@bin = options.fetch(:bin, '/usr/bin/kubectl')
|
23
|
+
@cluster = options.fetch(:cluster, 'open-ondemand')
|
24
|
+
@mounts = options.fetch(:mounts, []).map { |m| m.to_h.symbolize_keys }
|
25
|
+
@all_namespaces = options.fetch(:all_namespaces, false)
|
26
|
+
@username_prefix = options.fetch(:username_prefix, nil)
|
27
|
+
@namespace_prefix = options.fetch(:namespace_prefix, '')
|
28
|
+
|
29
|
+
@using_context = false
|
30
|
+
@helper = OodCore::Job::Adapters::Kubernetes::Helper.new
|
31
|
+
|
32
|
+
begin
|
33
|
+
make_kubectl_config(options)
|
34
|
+
rescue
|
35
|
+
# FIXME could use a log here
|
36
|
+
# means you couldn't 'kubectl set config'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def resource_file(resource_type = 'pod')
|
41
|
+
File.dirname(__FILE__) + "/templates/#{resource_type}.yml.erb"
|
42
|
+
end
|
43
|
+
|
44
|
+
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
45
|
+
raise ArgumentError, 'Must specify the script' if script.nil?
|
46
|
+
|
47
|
+
resource_yml, id = generate_id_yml(script)
|
48
|
+
call("#{formatted_ns_cmd} create -f -", stdin: resource_yml)
|
49
|
+
|
50
|
+
id
|
51
|
+
end
|
52
|
+
|
53
|
+
def generate_id(name)
|
54
|
+
# 2_821_109_907_456 = 36**8
|
55
|
+
name.downcase.tr(' ', '-') + '-' + rand(2_821_109_907_456).to_s(36)
|
56
|
+
end
|
57
|
+
|
58
|
+
def info_all(attrs: nil)
|
59
|
+
cmd = if all_namespaces
|
60
|
+
"#{base_cmd} get pods -o json --all-namespaces"
|
61
|
+
else
|
62
|
+
"#{namespaced_cmd} get pods -o json"
|
63
|
+
end
|
64
|
+
|
65
|
+
output = call(cmd)
|
66
|
+
all_pods_to_info(output)
|
67
|
+
end
|
68
|
+
|
69
|
+
def info_where_owner(owner, attrs: nil)
|
70
|
+
owner = Array.wrap(owner).map(&:to_s)
|
71
|
+
|
72
|
+
# must at least have job_owner to filter by job_owner
|
73
|
+
attrs = Array.wrap(attrs) | [:job_owner] unless attrs.nil?
|
74
|
+
|
75
|
+
info_all(attrs: attrs).select { |info| owner.include? info.job_owner }
|
76
|
+
end
|
77
|
+
|
78
|
+
def info_all_each(attrs: nil)
|
79
|
+
return to_enum(:info_all_each, attrs: attrs) unless block_given?
|
80
|
+
|
81
|
+
info_all(attrs: attrs).each do |job|
|
82
|
+
yield job
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def info_where_owner_each(owner, attrs: nil)
|
87
|
+
return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
|
88
|
+
|
89
|
+
info_where_owner(owner, attrs: attrs).each do |job|
|
90
|
+
yield job
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def info(id)
|
95
|
+
pod_json = safe_call('get', 'pod', id)
|
96
|
+
return OodCore::Job::Info.new({ id: id, status: 'completed' }) if pod_json.empty?
|
97
|
+
|
98
|
+
service_json = safe_call('get', 'service', service_name(id))
|
99
|
+
secret_json = safe_call('get', 'secret', secret_name(id))
|
100
|
+
|
101
|
+
helper.info_from_json(pod_json: pod_json, service_json: service_json, secret_json: secret_json)
|
102
|
+
end
|
103
|
+
|
104
|
+
def status(id)
|
105
|
+
info(id).status
|
106
|
+
end
|
107
|
+
|
108
|
+
def delete(id)
|
109
|
+
safe_call("delete", "pod", id)
|
110
|
+
safe_call("delete", "service", service_name(id))
|
111
|
+
safe_call("delete", "secret", secret_name(id))
|
112
|
+
safe_call("delete", "configmap", configmap_name(id))
|
113
|
+
end
|
114
|
+
|
115
|
+
private
|
116
|
+
|
117
|
+
def safe_call(verb, resource, id)
|
118
|
+
begin
|
119
|
+
case verb.to_s
|
120
|
+
when "get"
|
121
|
+
call_json_output('get', resource, id)
|
122
|
+
when "delete"
|
123
|
+
call("#{namespaced_cmd} delete #{resource} #{id}")
|
124
|
+
end
|
125
|
+
rescue NotFoundError
|
126
|
+
{}
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# helper to help format multi-line yaml data from the submit.yml into
|
131
|
+
# mutli-line yaml in the pod.yml.erb
|
132
|
+
def config_data_lines(data)
|
133
|
+
output = []
|
134
|
+
first = true
|
135
|
+
|
136
|
+
data.to_s.each_line do |line|
|
137
|
+
output.append(first ? line : line.prepend(" "))
|
138
|
+
first = false
|
139
|
+
end
|
140
|
+
|
141
|
+
output
|
142
|
+
end
|
143
|
+
|
144
|
+
def username
|
145
|
+
@username ||= Etc.getlogin
|
146
|
+
end
|
147
|
+
|
148
|
+
def k8s_username
|
149
|
+
username_prefix.nil? ? username : "#{username_prefix}-#{username}"
|
150
|
+
end
|
151
|
+
|
152
|
+
def user
|
153
|
+
@user ||= Etc.getpwnam(username)
|
154
|
+
end
|
155
|
+
|
156
|
+
def home_dir
|
157
|
+
user.dir
|
158
|
+
end
|
159
|
+
|
160
|
+
def run_as_user
|
161
|
+
user.uid
|
162
|
+
end
|
163
|
+
|
164
|
+
def run_as_group
|
165
|
+
user.gid
|
166
|
+
end
|
167
|
+
|
168
|
+
def fs_group
|
169
|
+
run_as_group
|
170
|
+
end
|
171
|
+
|
172
|
+
def group
|
173
|
+
Etc.getgrgid(run_as_group).name
|
174
|
+
end
|
175
|
+
|
176
|
+
def default_env
|
177
|
+
{
|
178
|
+
USER: username,
|
179
|
+
UID: run_as_user,
|
180
|
+
HOME: home_dir,
|
181
|
+
GROUP: group,
|
182
|
+
GID: run_as_group,
|
183
|
+
}
|
184
|
+
end
|
185
|
+
|
186
|
+
# helper to template resource yml you're going to submit and
|
187
|
+
# create an id.
|
188
|
+
def generate_id_yml(script)
|
189
|
+
native_data = script.native
|
190
|
+
container = helper.container_from_native(native_data[:container], default_env)
|
191
|
+
id = generate_id(container.name)
|
192
|
+
configmap = helper.configmap_from_native(native_data, id)
|
193
|
+
init_containers = helper.init_ctrs_from_native(native_data[:init_containers], container.env)
|
194
|
+
spec = OodCore::Job::Adapters::Kubernetes::Resources::PodSpec.new(container, init_containers: init_containers)
|
195
|
+
all_mounts = native_data[:mounts].nil? ? mounts : mounts + native_data[:mounts]
|
196
|
+
|
197
|
+
template = ERB.new(File.read(resource_file), nil, '-')
|
198
|
+
|
199
|
+
[template.result(binding), id]
|
200
|
+
end
|
201
|
+
|
202
|
+
# helper to call kubectl and get json data back.
|
203
|
+
# verb, resrouce and id are the kubernetes parlance terms.
|
204
|
+
# example: 'kubectl get pod my-pod-id' is verb=get, resource=pod
|
205
|
+
# and id=my-pod-id
|
206
|
+
def call_json_output(verb, resource, id, stdin: nil)
|
207
|
+
cmd = "#{formatted_ns_cmd} #{verb} #{resource} #{id}"
|
208
|
+
data = call(cmd, stdin: stdin)
|
209
|
+
data = data.empty? ? '{}' : data
|
210
|
+
json_data = JSON.parse(data, symbolize_names: true)
|
211
|
+
|
212
|
+
json_data
|
213
|
+
end
|
214
|
+
|
215
|
+
def service_name(id)
|
216
|
+
helper.service_name(id)
|
217
|
+
end
|
218
|
+
|
219
|
+
def secret_name(id)
|
220
|
+
helper.secret_name(id)
|
221
|
+
end
|
222
|
+
|
223
|
+
def configmap_name(id)
|
224
|
+
helper.configmap_name(id)
|
225
|
+
end
|
226
|
+
|
227
|
+
def namespace
|
228
|
+
"#{namespace_prefix}#{username}"
|
229
|
+
end
|
230
|
+
|
231
|
+
def context
|
232
|
+
cluster
|
233
|
+
end
|
234
|
+
|
235
|
+
def default_config_file
|
236
|
+
(ENV['KUBECONFIG'] || "#{Dir.home}/.kube/config")
|
237
|
+
end
|
238
|
+
|
239
|
+
def default_auth
|
240
|
+
{
|
241
|
+
type: 'managaged'
|
242
|
+
}.symbolize_keys
|
243
|
+
end
|
244
|
+
|
245
|
+
def default_server
|
246
|
+
{
|
247
|
+
endpoint: 'https://localhost:8080',
|
248
|
+
cert_authority_file: nil
|
249
|
+
}.symbolize_keys
|
250
|
+
end
|
251
|
+
|
252
|
+
def formatted_ns_cmd
|
253
|
+
"#{namespaced_cmd} -o json"
|
254
|
+
end
|
255
|
+
|
256
|
+
def namespaced_cmd
|
257
|
+
"#{base_cmd} --namespace=#{namespace}"
|
258
|
+
end
|
259
|
+
|
260
|
+
def base_cmd
|
261
|
+
base = "#{bin} --kubeconfig=#{config_file}"
|
262
|
+
base << " --context=#{context}" if using_context
|
263
|
+
base
|
264
|
+
end
|
265
|
+
|
266
|
+
def all_pods_to_info(data)
|
267
|
+
json_data = JSON.parse(data, symbolize_names: true)
|
268
|
+
pods = json_data.dig(:items)
|
269
|
+
|
270
|
+
info_array = []
|
271
|
+
pods.each do |pod|
|
272
|
+
info = pod_info_from_json(pod)
|
273
|
+
info_array.push(info) unless info.nil?
|
274
|
+
end
|
275
|
+
|
276
|
+
info_array
|
277
|
+
rescue JSON::ParserError
|
278
|
+
# 'no resources in <namespace>' throws parse error
|
279
|
+
[]
|
280
|
+
end
|
281
|
+
|
282
|
+
def pod_info_from_json(pod)
|
283
|
+
hash = helper.pod_info_from_json(pod)
|
284
|
+
K8sJobInfo.new(hash)
|
285
|
+
rescue Helper::K8sDataError
|
286
|
+
# FIXME: silently eating error, could probably use a logger
|
287
|
+
nil
|
288
|
+
end
|
289
|
+
|
290
|
+
def make_kubectl_config(config)
|
291
|
+
set_cluster(config.fetch(:server, default_server).to_h.symbolize_keys)
|
292
|
+
configure_auth(config.fetch(:auth, default_auth).to_h.symbolize_keys)
|
293
|
+
end
|
294
|
+
|
295
|
+
def configure_auth(auth)
|
296
|
+
type = auth.fetch(:type)
|
297
|
+
return if managed?(type)
|
298
|
+
|
299
|
+
case type
|
300
|
+
when 'gke'
|
301
|
+
set_gke_config(auth)
|
302
|
+
when 'oidc'
|
303
|
+
set_context
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
def use_context
|
308
|
+
@using_context = true
|
309
|
+
end
|
310
|
+
|
311
|
+
def managed?(type)
|
312
|
+
if type.nil?
|
313
|
+
true # maybe should be false?
|
314
|
+
else
|
315
|
+
type.to_s == 'managed'
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
def set_gke_config(auth)
|
320
|
+
cred_file = auth.fetch(:svc_acct_file)
|
321
|
+
|
322
|
+
cmd = "gcloud auth activate-service-account --key-file=#{cred_file}"
|
323
|
+
call(cmd)
|
324
|
+
|
325
|
+
set_gke_credentials(auth)
|
326
|
+
end
|
327
|
+
|
328
|
+
def set_gke_credentials(auth)
|
329
|
+
|
330
|
+
zone = auth.fetch(:zone, nil)
|
331
|
+
region = auth.fetch(:region, nil)
|
332
|
+
|
333
|
+
locale = ''
|
334
|
+
locale = "--zone=#{zone}" unless zone.nil?
|
335
|
+
locale = "--region=#{region}" unless region.nil?
|
336
|
+
|
337
|
+
# gke cluster name can probably can differ from what ood calls the cluster
|
338
|
+
cmd = "gcloud container clusters get-credentials #{locale} #{cluster}"
|
339
|
+
env = { 'KUBECONFIG' => config_file }
|
340
|
+
call(cmd, env)
|
341
|
+
end
|
342
|
+
|
343
|
+
def set_context
|
344
|
+
cmd = "#{base_cmd} config set-context #{cluster}"
|
345
|
+
cmd << " --cluster=#{cluster} --namespace=#{namespace}"
|
346
|
+
cmd << " --user=#{k8s_username}"
|
347
|
+
|
348
|
+
call(cmd)
|
349
|
+
use_context
|
350
|
+
end
|
351
|
+
|
352
|
+
def set_cluster(config)
|
353
|
+
server = config.fetch(:endpoint)
|
354
|
+
cert = config.fetch(:cert_authority_file, nil)
|
355
|
+
|
356
|
+
cmd = "#{base_cmd} config set-cluster #{cluster}"
|
357
|
+
cmd << " --server=#{server}"
|
358
|
+
cmd << " --certificate-authority=#{cert}" unless cert.nil?
|
359
|
+
|
360
|
+
call(cmd)
|
361
|
+
end
|
362
|
+
|
363
|
+
def call(cmd = '', env: {}, stdin: nil)
|
364
|
+
o, e, s = Open3.capture3(env, cmd, stdin_data: stdin.to_s)
|
365
|
+
s.success? ? o : interpret_and_raise(e)
|
366
|
+
end
|
367
|
+
|
368
|
+
def interpret_and_raise(stderr)
|
369
|
+
raise NotFoundError, stderr if /^Error from server \(NotFound\):/.match(stderr)
|
370
|
+
raise(Error, stderr)
|
371
|
+
end
|
372
|
+
end
|