ood_core 0.14.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +30 -0
- data/CHANGELOG.md +87 -1
- data/README.md +1 -1
- data/lib/ood_core/cluster.rb +20 -5
- data/lib/ood_core/job/adapters/ccq.rb +19 -12
- data/lib/ood_core/job/adapters/kubernetes.rb +1 -1
- data/lib/ood_core/job/adapters/kubernetes/batch.rb +82 -55
- data/lib/ood_core/job/adapters/kubernetes/helper.rb +79 -69
- data/lib/ood_core/job/adapters/kubernetes/k8s_job_info.rb +9 -0
- data/lib/ood_core/job/adapters/kubernetes/resources.rb +38 -10
- data/lib/ood_core/job/adapters/kubernetes/templates/pod.yml.erb +140 -46
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +16 -9
- data/lib/ood_core/job/adapters/slurm.rb +1 -0
- data/lib/ood_core/job/adapters/torque.rb +2 -0
- data/lib/ood_core/job/script.rb +8 -1
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -1
- metadata +27 -6
- data/.travis.yml +0 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 266e24cd2e11c2c712b6128bdc3f82bf9781ae9885f8f0eb21439cb80c889c90
|
|
4
|
+
data.tar.gz: 01e682b6313468371076cdfd4ff2df2f3c06c661af9b4d7d7a65b7dcf3e2d836
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 22721c9d368ec44533d93914f977576ee77786ad0926976fa24067c7f353104edd32baffac34723b730e7f711c5b7581cf4f72f6232d050c719edcfc1b3cb14f
|
|
7
|
+
data.tar.gz: 5d99a3c782aad5e420333653e51073cdb6535b7689053452da7ee7ef234292769e70d4010d751aacd4576daf531700136c2aa0ef7a2efc31edcb29e4c01c8be4
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
name: Unit Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- master
|
|
7
|
+
pull_request:
|
|
8
|
+
branches:
|
|
9
|
+
- master
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
tests:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- name: checkout
|
|
17
|
+
uses: actions/checkout@v2
|
|
18
|
+
|
|
19
|
+
- name: Setup Ruby using Bundler
|
|
20
|
+
uses: ruby/setup-ruby@v1
|
|
21
|
+
with:
|
|
22
|
+
ruby-version: "2.7.1"
|
|
23
|
+
bundler-cache: true
|
|
24
|
+
bundler: "2.1.4"
|
|
25
|
+
|
|
26
|
+
- name: install gems
|
|
27
|
+
run: bundle install
|
|
28
|
+
|
|
29
|
+
- name: test
|
|
30
|
+
run: bundle exec rake spec
|
data/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,87 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.17.0] - 5-26-2021
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- All Kubernetes resources now have the same labels in [280](https://github.com/OSC/ood_core/pull/280).
|
|
15
|
+
- Kubernetes does not crash when no configmap is defined in [282](https://github.com/OSC/ood_core/pull/282).
|
|
16
|
+
- Kubernetes will not specify init containers if there are none in
|
|
17
|
+
[284](https://github.com/OSC/ood_core/pull/284).
|
|
18
|
+
|
|
19
|
+
### Added
|
|
20
|
+
|
|
21
|
+
- Kubernetes, Slurm and Torque now support the script option `gpus_per_node` in
|
|
22
|
+
[266](https://github.com/OSC/ood_core/pull/266).
|
|
23
|
+
- Kubernetes will now save the pod.yml into the staged root in
|
|
24
|
+
[277](https://github.com/OSC/ood_core/pull/277).
|
|
25
|
+
- Kubernetes now allows for node selector in [264](https://github.com/OSC/ood_core/pull/264).
|
|
26
|
+
- Kubernetes pods now have access the environment variable POD_NAMESPACE in
|
|
27
|
+
[275](https://github.com/OSC/ood_core/pull/275).
|
|
28
|
+
- Kubernetes pods can now specify the image pull policy in [272](https://github.com/OSC/ood_core/pull/272).
|
|
29
|
+
- Cluster config's batch_connect now support `ssh_allow` to disable sshing to compute
|
|
30
|
+
nodes per cluster in [286](https://github.com/OSC/ood_core/pull/286).
|
|
31
|
+
- Kubernetes will now add the templated script content to a configmap in
|
|
32
|
+
[273](https://github.com/OSC/ood_core/pull/273).
|
|
33
|
+
|
|
34
|
+
### Changed
|
|
35
|
+
|
|
36
|
+
- Kubernetes username prefix no longer appends a - in [271](https://github.com/OSC/ood_core/pull/271).
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
## [0.16.1] - 2021-04-23
|
|
41
|
+
### Fixed
|
|
42
|
+
- memorized some allow? variables to have better support around ACLS in
|
|
43
|
+
[267](https://github.com/OSC/ood_core/pull/267)
|
|
44
|
+
|
|
45
|
+
## [0.16.0] - 2021-04-20
|
|
46
|
+
### Fixed
|
|
47
|
+
- tmux 2.7+ bug in the linux host adapter in [2.5.8](https://github.com/OSC/ood_core/pull/258)
|
|
48
|
+
and [259](https://github.com/OSC/ood_core/pull/259).
|
|
49
|
+
|
|
50
|
+
### Changed
|
|
51
|
+
|
|
52
|
+
- Changed how k8s configmaps in are defined in [251](https://github.com/OSC/ood_core/pull/251).
|
|
53
|
+
The data structure now expects a key called files which is an array of objects that hold
|
|
54
|
+
filename, data, mount_path, sub_path and init_mount_path.
|
|
55
|
+
[255](https://github.com/OSC/ood_core/pull/255) also relates to this interface change.
|
|
56
|
+
|
|
57
|
+
### Added
|
|
58
|
+
|
|
59
|
+
- The k8s adapter can now specify environment variables and creates defaults
|
|
60
|
+
in [252](https://github.com/OSC/ood_core/pull/252).
|
|
61
|
+
- The k8s adapter can now specify image pull secrets in [253](https://github.com/OSC/ood_core/pull/253).
|
|
62
|
+
|
|
63
|
+
## [0.15.1] - 2021-02-25
|
|
64
|
+
### Fixed
|
|
65
|
+
- kubernetes adapter uses the full module for helpers in [245](https://github.com/OSC/ood_core/pull/245).
|
|
66
|
+
|
|
67
|
+
### Changed
|
|
68
|
+
- kubernetes pods spawn with runAsNonRoot set to true in [247](https://github.com/OSC/ood_core/pull/247).
|
|
69
|
+
- kubernetes pods can spawn with supplemental groups along with some other in security defaults in
|
|
70
|
+
[246](https://github.com/OSC/ood_core/pull/246).
|
|
71
|
+
|
|
72
|
+
## [0.15.0] - 2021-01-26
|
|
73
|
+
### Fixed
|
|
74
|
+
- ccq adapter now accepts job names with spaces in [210](https://github.com/OSC/ood_core/pull/209)
|
|
75
|
+
- k8s correctly handles having no mount volumes in [239](https://github.com/OSC/ood_core/pull/239)
|
|
76
|
+
|
|
77
|
+
### Added
|
|
78
|
+
- k8s adapter now applies account metadata to resources in [216](https://github.com/OSC/ood_core/pull/216) and
|
|
79
|
+
[231](https://github.com/OSC/ood_core/pull/231)
|
|
80
|
+
- k8s adapter can now prefix namespaces in [218](https://github.com/OSC/ood_core/pull/218)
|
|
81
|
+
- k8s adapter now applies time limits to pods in [224](https://github.com/OSC/ood_core/pull/224)
|
|
82
|
+
|
|
83
|
+
### Changed
|
|
84
|
+
- testing automation is now done in github actions in [221](https://github.com/OSC/ood_core/pull/218)
|
|
85
|
+
- update bunlder to 2.1.4 and ruby to 2.7 in [235](https://github.com/OSC/ood_core/pull/218) updated bundler and ruby
|
|
86
|
+
- k8s adapter more appropriately labels unschedulable pods as queued in [230](https://github.com/OSC/ood_core/pull/230)
|
|
87
|
+
- k8s adapter now uses the script#ood_connection_info API instead of script#native in
|
|
88
|
+
[222](https://github.com/OSC/ood_core/pull/222)
|
|
89
|
+
|
|
9
90
|
## [0.14.0] - 2020-10-01
|
|
10
91
|
### Added
|
|
11
92
|
- Kubernetes adapter in PR [156](https://github.com/OSC/ood_core/pull/156)
|
|
@@ -255,7 +336,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
|
255
336
|
### Added
|
|
256
337
|
- Initial release!
|
|
257
338
|
|
|
258
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
|
339
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.16.0...HEAD
|
|
340
|
+
[0.17.0]: https://github.com/OSC/ood_core/compare/v0.16.1...v0.17.0
|
|
341
|
+
[0.16.1]: https://github.com/OSC/ood_core/compare/v0.16.0...v0.16.1
|
|
342
|
+
[0.16.0]: https://github.com/OSC/ood_core/compare/v0.15.1...v0.16.0
|
|
343
|
+
[0.15.1]: https://github.com/OSC/ood_core/compare/v0.15.0...v0.15.1
|
|
344
|
+
[0.15.0]: https://github.com/OSC/ood_core/compare/v0.14.0...v0.15.0
|
|
259
345
|
[0.14.0]: https://github.com/OSC/ood_core/compare/v0.13.0...v0.14.0
|
|
260
346
|
[0.13.0]: https://github.com/OSC/ood_core/compare/v0.12.0...v0.13.0
|
|
261
347
|
[0.12.0]: https://github.com/OSC/ood_core/compare/v0.11.4...v0.12.0
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# OodCore
|
|
2
2
|
|
|
3
|
-
[](https://github.com/OSC/ood_core/actions?query=workflow%3A%22Unit+Tests%22)
|
|
4
4
|

|
|
5
5
|

|
|
6
6
|
|
data/lib/ood_core/cluster.rb
CHANGED
|
@@ -78,7 +78,9 @@ module OodCore
|
|
|
78
78
|
# Whether the login feature is allowed
|
|
79
79
|
# @return [Boolean] is login allowed
|
|
80
80
|
def login_allow?
|
|
81
|
-
|
|
81
|
+
return @login_allow if defined?(@login_allow)
|
|
82
|
+
|
|
83
|
+
@login_allow = (allow? && !login_config.empty?)
|
|
82
84
|
end
|
|
83
85
|
|
|
84
86
|
# Build a job adapter from the job configuration
|
|
@@ -90,9 +92,11 @@ module OodCore
|
|
|
90
92
|
# Whether the job feature is allowed based on the ACLs
|
|
91
93
|
# @return [Boolean] is the job feature allowed
|
|
92
94
|
def job_allow?
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
95
|
+
return @job_allow if defined?(@job_allow)
|
|
96
|
+
|
|
97
|
+
@job_allow = (allow? && ! job_config.empty? && build_acls(
|
|
98
|
+
job_config.fetch(:acls, []).map(&:to_h)
|
|
99
|
+
).all?(&:allow?))
|
|
96
100
|
end
|
|
97
101
|
|
|
98
102
|
# The batch connect template configuration used for this cluster
|
|
@@ -138,7 +142,18 @@ module OodCore
|
|
|
138
142
|
# Whether this cluster is allowed to be used
|
|
139
143
|
# @return [Boolean] whether cluster is allowed
|
|
140
144
|
def allow?
|
|
141
|
-
|
|
145
|
+
return @allow if defined?(@allow)
|
|
146
|
+
|
|
147
|
+
@allow = acls.all?(&:allow?)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Whether this cluster supports SSH to batch connect nodes
|
|
151
|
+
# @return [Boolean] whether cluster supports SSH to batch connect node
|
|
152
|
+
def batch_connect_ssh_allow?
|
|
153
|
+
return @batch_connect_ssh_allow if defined?(@batch_connect_ssh_allow)
|
|
154
|
+
return @batch_connect_ssh_allow = true if batch_connect_config.nil?
|
|
155
|
+
|
|
156
|
+
@batch_connect_ssh_allow = batch_connect_config.fetch(:ssh_allow, true)
|
|
142
157
|
end
|
|
143
158
|
|
|
144
159
|
# The comparison operator
|
|
@@ -203,6 +203,10 @@ module OodCore
|
|
|
203
203
|
'ccq_ood_script_'
|
|
204
204
|
end
|
|
205
205
|
|
|
206
|
+
def ccqstat_regex
|
|
207
|
+
/^(?<id>\S+)\s+(?<name>.+)\s+(?<username>\S+)\s+(?<scheduler>\S+)\s+(?<status>\S+)\s*$/
|
|
208
|
+
end
|
|
209
|
+
|
|
206
210
|
def parse_job_id_from_ccqsub(output)
|
|
207
211
|
match_data = /#{jobid_regex}/.match(output)
|
|
208
212
|
# match_data could be nil, OR re-configured jobid_regex could be looking for a different named group
|
|
@@ -236,28 +240,31 @@ module OodCore
|
|
|
236
240
|
def info_from_ccqstat(data)
|
|
237
241
|
infos = []
|
|
238
242
|
|
|
239
|
-
data.to_s.
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
infos << Info.new(line_to_hash(words)) if words.size == 5
|
|
243
|
+
data.to_s.lines.drop(1).each do |line|
|
|
244
|
+
match_data = ccqstat_regex.match(line)
|
|
245
|
+
infos << Info.new(ccqstat_match_to_hash(match_data)) if valid_ccqstat_match?(match_data)
|
|
244
246
|
end
|
|
245
247
|
|
|
246
248
|
infos
|
|
247
249
|
end
|
|
248
250
|
|
|
249
|
-
def
|
|
250
|
-
return unless words.size == 5
|
|
251
|
-
|
|
251
|
+
def ccqstat_match_to_hash(match)
|
|
252
252
|
data_hash = {}
|
|
253
|
-
data_hash[:id] =
|
|
254
|
-
data_hash[:
|
|
255
|
-
data_hash[:
|
|
256
|
-
|
|
253
|
+
data_hash[:id] = match.named_captures.fetch('id', nil)
|
|
254
|
+
data_hash[:job_owner] = match.named_captures.fetch('username', nil)
|
|
255
|
+
data_hash[:status] = get_state(match.named_captures.fetch('status', nil))
|
|
256
|
+
|
|
257
|
+
# The regex leaves trailing empty spaces. There's no way to tell if they're _actually_
|
|
258
|
+
# a part of the job name or not, so we assume they're not and add the rstrip.
|
|
259
|
+
data_hash[:job_name] = match.named_captures.fetch('name', nil).to_s.rstrip
|
|
257
260
|
|
|
258
261
|
data_hash
|
|
259
262
|
end
|
|
260
263
|
|
|
264
|
+
def valid_ccqstat_match?(match)
|
|
265
|
+
!match.nil? && !match.named_captures.fetch('id', nil).nil?
|
|
266
|
+
end
|
|
267
|
+
|
|
261
268
|
def get_state(state)
|
|
262
269
|
STATE_MAP.fetch(state, :undetermined)
|
|
263
270
|
end
|
|
@@ -7,7 +7,7 @@ module OodCore
|
|
|
7
7
|
using Refinements::HashExtensions
|
|
8
8
|
|
|
9
9
|
def self.build_kubernetes(config)
|
|
10
|
-
batch = Adapters::Kubernetes::Batch.new(config.to_h.symbolize_keys
|
|
10
|
+
batch = Adapters::Kubernetes::Batch.new(config.to_h.symbolize_keys)
|
|
11
11
|
Adapters::Kubernetes.new(batch)
|
|
12
12
|
end
|
|
13
13
|
end
|
|
@@ -3,31 +3,31 @@ require "json"
|
|
|
3
3
|
|
|
4
4
|
class OodCore::Job::Adapters::Kubernetes::Batch
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
Helper = OodCore::Job::Adapters::Kubernetes::Helper
|
|
9
|
-
Resources = OodCore::Job::Adapters::Kubernetes::Resources
|
|
6
|
+
require_relative "helper"
|
|
7
|
+
require_relative "k8s_job_info"
|
|
10
8
|
|
|
11
9
|
using OodCore::Refinements::HashExtensions
|
|
12
10
|
|
|
13
11
|
class Error < StandardError; end
|
|
12
|
+
class NotFoundError < StandardError; end
|
|
14
13
|
|
|
15
|
-
attr_reader :config_file, :bin, :
|
|
14
|
+
attr_reader :config_file, :bin, :cluster, :mounts
|
|
16
15
|
attr_reader :all_namespaces, :using_context, :helper
|
|
17
|
-
attr_reader :username_prefix
|
|
16
|
+
attr_reader :username_prefix, :namespace_prefix
|
|
18
17
|
|
|
19
|
-
def initialize(options = {}
|
|
18
|
+
def initialize(options = {})
|
|
20
19
|
options = options.to_h.symbolize_keys
|
|
21
20
|
|
|
22
21
|
@config_file = options.fetch(:config_file, default_config_file)
|
|
23
22
|
@bin = options.fetch(:bin, '/usr/bin/kubectl')
|
|
24
|
-
@
|
|
23
|
+
@cluster = options.fetch(:cluster, 'open-ondemand')
|
|
25
24
|
@mounts = options.fetch(:mounts, []).map { |m| m.to_h.symbolize_keys }
|
|
26
25
|
@all_namespaces = options.fetch(:all_namespaces, false)
|
|
27
|
-
@username_prefix = options.fetch(:username_prefix,
|
|
26
|
+
@username_prefix = options.fetch(:username_prefix, '')
|
|
27
|
+
@namespace_prefix = options.fetch(:namespace_prefix, '')
|
|
28
28
|
|
|
29
29
|
@using_context = false
|
|
30
|
-
@helper =
|
|
30
|
+
@helper = OodCore::Job::Adapters::Kubernetes::Helper.new
|
|
31
31
|
|
|
32
32
|
begin
|
|
33
33
|
make_kubectl_config(options)
|
|
@@ -44,7 +44,10 @@ class OodCore::Job::Adapters::Kubernetes::Batch
|
|
|
44
44
|
def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
|
|
45
45
|
raise ArgumentError, 'Must specify the script' if script.nil?
|
|
46
46
|
|
|
47
|
-
resource_yml, id = generate_id_yml(script
|
|
47
|
+
resource_yml, id = generate_id_yml(script)
|
|
48
|
+
if !script.workdir.nil? && Dir.exist?(script.workdir)
|
|
49
|
+
File.open(File.join(script.workdir, 'pod.yml'), 'w') { |f| f.write resource_yml }
|
|
50
|
+
end
|
|
48
51
|
call("#{formatted_ns_cmd} create -f -", stdin: resource_yml)
|
|
49
52
|
|
|
50
53
|
id
|
|
@@ -92,16 +95,11 @@ class OodCore::Job::Adapters::Kubernetes::Batch
|
|
|
92
95
|
end
|
|
93
96
|
|
|
94
97
|
def info(id)
|
|
95
|
-
pod_json =
|
|
98
|
+
pod_json = safe_call('get', 'pod', id)
|
|
99
|
+
return OodCore::Job::Info.new({ id: id, status: 'completed' }) if pod_json.empty?
|
|
96
100
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
secret_json = call_json_output('get', 'secret', secret_name(id))
|
|
100
|
-
rescue
|
|
101
|
-
# it's ok if these don't exist
|
|
102
|
-
service_json ||= nil
|
|
103
|
-
secret_json ||= nil
|
|
104
|
-
end
|
|
101
|
+
service_json = safe_call('get', 'service', service_name(id))
|
|
102
|
+
secret_json = safe_call('get', 'secret', secret_name(id))
|
|
105
103
|
|
|
106
104
|
helper.info_from_json(pod_json: pod_json, service_json: service_json, secret_json: secret_json)
|
|
107
105
|
end
|
|
@@ -111,24 +109,27 @@ class OodCore::Job::Adapters::Kubernetes::Batch
|
|
|
111
109
|
end
|
|
112
110
|
|
|
113
111
|
def delete(id)
|
|
114
|
-
|
|
112
|
+
safe_call("delete", "pod", id)
|
|
113
|
+
safe_call("delete", "service", service_name(id))
|
|
114
|
+
safe_call("delete", "secret", secret_name(id))
|
|
115
|
+
safe_call("delete", "configmap", configmap_name(id))
|
|
116
|
+
end
|
|
115
117
|
|
|
118
|
+
private
|
|
119
|
+
|
|
120
|
+
def safe_call(verb, resource, id)
|
|
116
121
|
begin
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
122
|
+
case verb.to_s
|
|
123
|
+
when "get"
|
|
124
|
+
call_json_output('get', resource, id)
|
|
125
|
+
when "delete"
|
|
126
|
+
call("#{namespaced_cmd} delete #{resource} #{id}")
|
|
127
|
+
end
|
|
128
|
+
rescue NotFoundError
|
|
129
|
+
{}
|
|
123
130
|
end
|
|
124
131
|
end
|
|
125
132
|
|
|
126
|
-
def configmap_mount_path
|
|
127
|
-
'/ood'
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
private
|
|
131
|
-
|
|
132
133
|
# helper to help format multi-line yaml data from the submit.yml into
|
|
133
134
|
# mutli-line yaml in the pod.yml.erb
|
|
134
135
|
def config_data_lines(data)
|
|
@@ -148,32 +149,57 @@ class OodCore::Job::Adapters::Kubernetes::Batch
|
|
|
148
149
|
end
|
|
149
150
|
|
|
150
151
|
def k8s_username
|
|
151
|
-
|
|
152
|
+
"#{username_prefix}#{username}"
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def user
|
|
156
|
+
@user ||= Etc.getpwnam(username)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def home_dir
|
|
160
|
+
user.dir
|
|
152
161
|
end
|
|
153
162
|
|
|
154
163
|
def run_as_user
|
|
155
|
-
|
|
164
|
+
user.uid
|
|
156
165
|
end
|
|
157
166
|
|
|
158
167
|
def run_as_group
|
|
159
|
-
|
|
168
|
+
user.gid
|
|
160
169
|
end
|
|
161
170
|
|
|
162
171
|
def fs_group
|
|
163
172
|
run_as_group
|
|
164
173
|
end
|
|
165
174
|
|
|
175
|
+
def group
|
|
176
|
+
Etc.getgrgid(run_as_group).name
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def default_env
|
|
180
|
+
{
|
|
181
|
+
USER: username,
|
|
182
|
+
UID: run_as_user,
|
|
183
|
+
HOME: home_dir,
|
|
184
|
+
GROUP: group,
|
|
185
|
+
GID: run_as_group,
|
|
186
|
+
}
|
|
187
|
+
end
|
|
188
|
+
|
|
166
189
|
# helper to template resource yml you're going to submit and
|
|
167
190
|
# create an id.
|
|
168
|
-
def generate_id_yml(
|
|
169
|
-
|
|
191
|
+
def generate_id_yml(script)
|
|
192
|
+
native_data = script.native
|
|
193
|
+
container = helper.container_from_native(native_data[:container], default_env)
|
|
170
194
|
id = generate_id(container.name)
|
|
171
|
-
configmap = helper.configmap_from_native(native_data, id)
|
|
172
|
-
init_containers = helper.init_ctrs_from_native(native_data[:init_containers])
|
|
173
|
-
spec = Resources::PodSpec.new(container, init_containers: init_containers)
|
|
195
|
+
configmap = helper.configmap_from_native(native_data, id, script.content)
|
|
196
|
+
init_containers = helper.init_ctrs_from_native(native_data[:init_containers], container.env)
|
|
197
|
+
spec = OodCore::Job::Adapters::Kubernetes::Resources::PodSpec.new(container, init_containers: init_containers)
|
|
174
198
|
all_mounts = native_data[:mounts].nil? ? mounts : mounts + native_data[:mounts]
|
|
199
|
+
node_selector = native_data[:node_selector].nil? ? {} : native_data[:node_selector]
|
|
200
|
+
gpu_type = native_data[:gpu_type].nil? ? "nvidia.com/gpu" : native_data[:gpu_type]
|
|
175
201
|
|
|
176
|
-
template = ERB.new(File.read(resource_file))
|
|
202
|
+
template = ERB.new(File.read(resource_file), nil, '-')
|
|
177
203
|
|
|
178
204
|
[template.result(binding), id]
|
|
179
205
|
end
|
|
@@ -204,15 +230,11 @@ class OodCore::Job::Adapters::Kubernetes::Batch
|
|
|
204
230
|
end
|
|
205
231
|
|
|
206
232
|
def namespace
|
|
207
|
-
|
|
208
|
-
end
|
|
209
|
-
|
|
210
|
-
def default_namespace
|
|
211
|
-
username
|
|
233
|
+
"#{namespace_prefix}#{username}"
|
|
212
234
|
end
|
|
213
235
|
|
|
214
236
|
def context
|
|
215
|
-
|
|
237
|
+
cluster
|
|
216
238
|
end
|
|
217
239
|
|
|
218
240
|
def default_config_file
|
|
@@ -264,7 +286,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
|
|
|
264
286
|
|
|
265
287
|
def pod_info_from_json(pod)
|
|
266
288
|
hash = helper.pod_info_from_json(pod)
|
|
267
|
-
|
|
289
|
+
K8sJobInfo.new(hash)
|
|
268
290
|
rescue Helper::K8sDataError
|
|
269
291
|
# FIXME: silently eating error, could probably use a logger
|
|
270
292
|
nil
|
|
@@ -318,14 +340,14 @@ class OodCore::Job::Adapters::Kubernetes::Batch
|
|
|
318
340
|
locale = "--region=#{region}" unless region.nil?
|
|
319
341
|
|
|
320
342
|
# gke cluster name can probably can differ from what ood calls the cluster
|
|
321
|
-
cmd = "gcloud container clusters get-credentials #{locale} #{
|
|
343
|
+
cmd = "gcloud container clusters get-credentials #{locale} #{cluster}"
|
|
322
344
|
env = { 'KUBECONFIG' => config_file }
|
|
323
345
|
call(cmd, env)
|
|
324
346
|
end
|
|
325
347
|
|
|
326
348
|
def set_context
|
|
327
|
-
cmd = "#{base_cmd} config set-context #{
|
|
328
|
-
cmd << " --cluster=#{
|
|
349
|
+
cmd = "#{base_cmd} config set-context #{cluster}"
|
|
350
|
+
cmd << " --cluster=#{cluster} --namespace=#{namespace}"
|
|
329
351
|
cmd << " --user=#{k8s_username}"
|
|
330
352
|
|
|
331
353
|
call(cmd)
|
|
@@ -336,7 +358,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
|
|
|
336
358
|
server = config.fetch(:endpoint)
|
|
337
359
|
cert = config.fetch(:cert_authority_file, nil)
|
|
338
360
|
|
|
339
|
-
cmd = "#{base_cmd} config set-cluster #{
|
|
361
|
+
cmd = "#{base_cmd} config set-cluster #{cluster}"
|
|
340
362
|
cmd << " --server=#{server}"
|
|
341
363
|
cmd << " --certificate-authority=#{cert}" unless cert.nil?
|
|
342
364
|
|
|
@@ -344,7 +366,12 @@ class OodCore::Job::Adapters::Kubernetes::Batch
|
|
|
344
366
|
end
|
|
345
367
|
|
|
346
368
|
def call(cmd = '', env: {}, stdin: nil)
|
|
347
|
-
o,
|
|
348
|
-
s.success? ? o :
|
|
369
|
+
o, e, s = Open3.capture3(env, cmd, stdin_data: stdin.to_s)
|
|
370
|
+
s.success? ? o : interpret_and_raise(e)
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
def interpret_and_raise(stderr)
|
|
374
|
+
raise NotFoundError, stderr if /^Error from server \(NotFound\):/.match(stderr)
|
|
375
|
+
raise(Error, stderr)
|
|
349
376
|
end
|
|
350
377
|
end
|