ood_core 0.11.2 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,350 @@
1
+ require "ood_core/refinements/hash_extensions"
2
+ require "json"
3
+
4
+ class OodCore::Job::Adapters::Kubernetes::Batch
5
+
6
+ require "ood_core/job/adapters/kubernetes/helper"
7
+
8
+ Helper = OodCore::Job::Adapters::Kubernetes::Helper
9
+ Resources = OodCore::Job::Adapters::Kubernetes::Resources
10
+
11
+ using OodCore::Refinements::HashExtensions
12
+
13
+ class Error < StandardError; end
14
+
15
+ attr_reader :config_file, :bin, :cluster_name, :mounts
16
+ attr_reader :all_namespaces, :using_context, :helper
17
+ attr_reader :username_prefix
18
+
19
+ def initialize(options = {}, helper = Helper.new)
20
+ options = options.to_h.symbolize_keys
21
+
22
+ @config_file = options.fetch(:config_file, default_config_file)
23
+ @bin = options.fetch(:bin, '/usr/bin/kubectl')
24
+ @cluster_name = options.fetch(:cluster_name, 'open-ondemand')
25
+ @mounts = options.fetch(:mounts, []).map { |m| m.to_h.symbolize_keys }
26
+ @all_namespaces = options.fetch(:all_namespaces, false)
27
+ @username_prefix = options.fetch(:username_prefix, nil)
28
+
29
+ @using_context = false
30
+ @helper = helper
31
+
32
+ begin
33
+ make_kubectl_config(options)
34
+ rescue
35
+ # FIXME could use a log here
36
+ # means you couldn't 'kubectl set config'
37
+ end
38
+ end
39
+
40
+ def resource_file(resource_type = 'pod')
41
+ File.dirname(__FILE__) + "/templates/#{resource_type}.yml.erb"
42
+ end
43
+
44
+ def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
45
+ raise ArgumentError, 'Must specify the script' if script.nil?
46
+
47
+ resource_yml, id = generate_id_yml(script.native)
48
+ call("#{formatted_ns_cmd} create -f -", stdin: resource_yml)
49
+
50
+ id
51
+ end
52
+
53
+ def generate_id(name)
54
+ # 2_821_109_907_456 = 36**8
55
+ name.downcase.tr(' ', '-') + '-' + rand(2_821_109_907_456).to_s(36)
56
+ end
57
+
58
+ def info_all(attrs: nil)
59
+ cmd = if all_namespaces
60
+ "#{base_cmd} get pods -o json --all-namespaces"
61
+ else
62
+ "#{namespaced_cmd} get pods -o json"
63
+ end
64
+
65
+ output = call(cmd)
66
+ all_pods_to_info(output)
67
+ end
68
+
69
+ def info_where_owner(owner, attrs: nil)
70
+ owner = Array.wrap(owner).map(&:to_s)
71
+
72
+ # must at least have job_owner to filter by job_owner
73
+ attrs = Array.wrap(attrs) | [:job_owner] unless attrs.nil?
74
+
75
+ info_all(attrs: attrs).select { |info| owner.include? info.job_owner }
76
+ end
77
+
78
+ def info_all_each(attrs: nil)
79
+ return to_enum(:info_all_each, attrs: attrs) unless block_given?
80
+
81
+ info_all(attrs: attrs).each do |job|
82
+ yield job
83
+ end
84
+ end
85
+
86
+ def info_where_owner_each(owner, attrs: nil)
87
+ return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
88
+
89
+ info_where_owner(owner, attrs: attrs).each do |job|
90
+ yield job
91
+ end
92
+ end
93
+
94
+ def info(id)
95
+ pod_json = call_json_output('get', 'pod', id)
96
+
97
+ begin
98
+ service_json = call_json_output('get', 'service', service_name(id))
99
+ secret_json = call_json_output('get', 'secret', secret_name(id))
100
+ rescue
101
+ # it's ok if these don't exist
102
+ service_json ||= nil
103
+ secret_json ||= nil
104
+ end
105
+
106
+ helper.info_from_json(pod_json: pod_json, service_json: service_json, secret_json: secret_json)
107
+ end
108
+
109
+ def status(id)
110
+ info(id).status
111
+ end
112
+
113
+ def delete(id)
114
+ call("#{namespaced_cmd} delete pod #{id}")
115
+
116
+ begin
117
+ call("#{namespaced_cmd} delete service #{service_name(id)}")
118
+ call("#{namespaced_cmd} delete secret #{secret_name(id)}")
119
+ call("#{namespaced_cmd} delete configmap #{configmap_name(id)}")
120
+ rescue
121
+ # FIXME: retries? delete if exists?
122
+ # just eat the results of deleting services and secrets
123
+ end
124
+ end
125
+
126
+ def configmap_mount_path
127
+ '/ood'
128
+ end
129
+
130
+ private
131
+
132
+ # helper to help format multi-line yaml data from the submit.yml into
133
+ # mutli-line yaml in the pod.yml.erb
134
+ def config_data_lines(data)
135
+ output = []
136
+ first = true
137
+
138
+ data.to_s.each_line do |line|
139
+ output.append(first ? line : line.prepend(" "))
140
+ first = false
141
+ end
142
+
143
+ output
144
+ end
145
+
146
+ def username
147
+ @username ||= Etc.getlogin
148
+ end
149
+
150
+ def k8s_username
151
+ username_prefix.nil? ? username : "#{username_prefix}-#{username}"
152
+ end
153
+
154
+ def run_as_user
155
+ Etc.getpwnam(username).uid
156
+ end
157
+
158
+ def run_as_group
159
+ Etc.getpwnam(username).gid
160
+ end
161
+
162
+ def fs_group
163
+ run_as_group
164
+ end
165
+
166
+ # helper to template resource yml you're going to submit and
167
+ # create an id.
168
+ def generate_id_yml(native_data)
169
+ container = helper.container_from_native(native_data[:container])
170
+ id = generate_id(container.name)
171
+ configmap = helper.configmap_from_native(native_data, id)
172
+ init_containers = helper.init_ctrs_from_native(native_data[:init_containers])
173
+ spec = Resources::PodSpec.new(container, init_containers: init_containers)
174
+ all_mounts = native_data[:mounts].nil? ? mounts : mounts + native_data[:mounts]
175
+
176
+ template = ERB.new(File.read(resource_file))
177
+
178
+ [template.result(binding), id]
179
+ end
180
+
181
+ # helper to call kubectl and get json data back.
182
+ # verb, resrouce and id are the kubernetes parlance terms.
183
+ # example: 'kubectl get pod my-pod-id' is verb=get, resource=pod
184
+ # and id=my-pod-id
185
+ def call_json_output(verb, resource, id, stdin: nil)
186
+ cmd = "#{formatted_ns_cmd} #{verb} #{resource} #{id}"
187
+ data = call(cmd, stdin: stdin)
188
+ data = data.empty? ? '{}' : data
189
+ json_data = JSON.parse(data, symbolize_names: true)
190
+
191
+ json_data
192
+ end
193
+
194
+ def service_name(id)
195
+ helper.service_name(id)
196
+ end
197
+
198
+ def secret_name(id)
199
+ helper.secret_name(id)
200
+ end
201
+
202
+ def configmap_name(id)
203
+ helper.configmap_name(id)
204
+ end
205
+
206
+ def namespace
207
+ default_namespace
208
+ end
209
+
210
+ def default_namespace
211
+ username
212
+ end
213
+
214
+ def context
215
+ cluster_name
216
+ end
217
+
218
+ def default_config_file
219
+ (ENV['KUBECONFIG'] || "#{Dir.home}/.kube/config")
220
+ end
221
+
222
+ def default_auth
223
+ {
224
+ type: 'managaged'
225
+ }.symbolize_keys
226
+ end
227
+
228
+ def default_server
229
+ {
230
+ endpoint: 'https://localhost:8080',
231
+ cert_authority_file: nil
232
+ }.symbolize_keys
233
+ end
234
+
235
+ def formatted_ns_cmd
236
+ "#{namespaced_cmd} -o json"
237
+ end
238
+
239
+ def namespaced_cmd
240
+ "#{base_cmd} --namespace=#{namespace}"
241
+ end
242
+
243
+ def base_cmd
244
+ base = "#{bin} --kubeconfig=#{config_file}"
245
+ base << " --context=#{context}" if using_context
246
+ base
247
+ end
248
+
249
+ def all_pods_to_info(data)
250
+ json_data = JSON.parse(data, symbolize_names: true)
251
+ pods = json_data.dig(:items)
252
+
253
+ info_array = []
254
+ pods.each do |pod|
255
+ info = pod_info_from_json(pod)
256
+ info_array.push(info) unless info.nil?
257
+ end
258
+
259
+ info_array
260
+ rescue JSON::ParserError
261
+ # 'no resources in <namespace>' throws parse error
262
+ []
263
+ end
264
+
265
+ def pod_info_from_json(pod)
266
+ hash = helper.pod_info_from_json(pod)
267
+ OodCore::Job::Info.new(hash)
268
+ rescue Helper::K8sDataError
269
+ # FIXME: silently eating error, could probably use a logger
270
+ nil
271
+ end
272
+
273
+ def make_kubectl_config(config)
274
+ set_cluster(config.fetch(:server, default_server).to_h.symbolize_keys)
275
+ configure_auth(config.fetch(:auth, default_auth).to_h.symbolize_keys)
276
+ end
277
+
278
+ def configure_auth(auth)
279
+ type = auth.fetch(:type)
280
+ return if managed?(type)
281
+
282
+ case type
283
+ when 'gke'
284
+ set_gke_config(auth)
285
+ when 'oidc'
286
+ set_context
287
+ end
288
+ end
289
+
290
+ def use_context
291
+ @using_context = true
292
+ end
293
+
294
+ def managed?(type)
295
+ if type.nil?
296
+ true # maybe should be false?
297
+ else
298
+ type.to_s == 'managed'
299
+ end
300
+ end
301
+
302
+ def set_gke_config(auth)
303
+ cred_file = auth.fetch(:svc_acct_file)
304
+
305
+ cmd = "gcloud auth activate-service-account --key-file=#{cred_file}"
306
+ call(cmd)
307
+
308
+ set_gke_credentials(auth)
309
+ end
310
+
311
+ def set_gke_credentials(auth)
312
+
313
+ zone = auth.fetch(:zone, nil)
314
+ region = auth.fetch(:region, nil)
315
+
316
+ locale = ''
317
+ locale = "--zone=#{zone}" unless zone.nil?
318
+ locale = "--region=#{region}" unless region.nil?
319
+
320
+ # gke cluster name can probably can differ from what ood calls the cluster
321
+ cmd = "gcloud container clusters get-credentials #{locale} #{cluster_name}"
322
+ env = { 'KUBECONFIG' => config_file }
323
+ call(cmd, env)
324
+ end
325
+
326
+ def set_context
327
+ cmd = "#{base_cmd} config set-context #{cluster_name}"
328
+ cmd << " --cluster=#{cluster_name} --namespace=#{namespace}"
329
+ cmd << " --user=#{k8s_username}"
330
+
331
+ call(cmd)
332
+ use_context
333
+ end
334
+
335
+ def set_cluster(config)
336
+ server = config.fetch(:endpoint)
337
+ cert = config.fetch(:cert_authority_file, nil)
338
+
339
+ cmd = "#{base_cmd} config set-cluster #{cluster_name}"
340
+ cmd << " --server=#{server}"
341
+ cmd << " --certificate-authority=#{cert}" unless cert.nil?
342
+
343
+ call(cmd)
344
+ end
345
+
346
+ def call(cmd = '', env: {}, stdin: nil)
347
+ o, error, s = Open3.capture3(env, cmd, stdin_data: stdin.to_s)
348
+ s.success? ? o : raise(Error, error)
349
+ end
350
+ end
@@ -0,0 +1,298 @@
1
+ class OodCore::Job::Adapters::Kubernetes::Helper
2
+
3
+ require 'ood_core/job/adapters/kubernetes/resources'
4
+ require 'resolv'
5
+ require 'base64'
6
+
7
+ class K8sDataError < StandardError; end
8
+
9
+ Resources = OodCore::Job::Adapters::Kubernetes::Resources
10
+
11
+ # Extract info from json data. The data is expected to be from the kubectl
12
+ # command and conform to kubernetes' datatype structures.
13
+ #
14
+ # Returns { native: {host: localhost, port:80, password: sshhh }} in the info
15
+ # object field in lieu of writing a connection.yml
16
+ #
17
+ # @param pod_json [#to_h]
18
+ # the pod data returned from 'kubectl get pod abc-123'
19
+ # @param service_json [#to_h]
20
+ # the service data returned from 'kubectl get service abc-123-service'
21
+ # @param secret_json [#to_h]
22
+ # the secret data returned from 'kubectl get secret abc-123-secret'
23
+ # @return [OodCore::Job::Info]
24
+ def info_from_json(pod_json: nil, service_json: nil, secret_json: nil)
25
+ pod_hash = pod_info_from_json(pod_json)
26
+ service_hash = service_info_from_json(service_json)
27
+ secret_hash = secret_info_from_json(secret_json)
28
+
29
+ # can't just use deep_merge bc we don't depend *directly* on rails
30
+ pod_hash[:native] = pod_hash[:native].merge(service_hash[:native])
31
+ pod_hash[:native] = pod_hash[:native].merge(secret_hash[:native])
32
+ OodCore::Job::Info.new(pod_hash)
33
+ rescue NoMethodError
34
+ raise K8sDataError, "unable to read data correctly from json"
35
+ end
36
+
37
+ # Turn a container hash into a Kubernetes::Resources::Container
38
+ #
39
+ # @param container [#to_h]
40
+ # the input container hash
41
+ # @return [OodCore::Job::Adapters::Kubernetes::Resources::Container]
42
+ def container_from_native(container)
43
+ Resources::Container.new(
44
+ container[:name],
45
+ container[:image],
46
+ command: parse_command(container[:command]),
47
+ port: container[:port],
48
+ env: container.fetch(:env, []),
49
+ memory: container[:memory],
50
+ cpu: container[:cpu],
51
+ working_dir: container[:working_dir],
52
+ restart_policy: container[:restart_policy]
53
+ )
54
+ end
55
+
56
+ # Parse a command string given from a user and return an array.
57
+ # If given an array, the input is simply returned back.
58
+ #
59
+ # @param cmd [#to_s]
60
+ # the command to parse
61
+ # @return [Array<#to_s>]
62
+ # the command parsed into an array of arguements
63
+ def parse_command(cmd)
64
+ if cmd&.is_a?(Array)
65
+ cmd
66
+ else
67
+ Shellwords.split(cmd.to_s)
68
+ end
69
+ end
70
+
71
+ # Turn a configmap hash into a Kubernetes::Resources::ConfigMap
72
+ # that can be used in templates. Needs an id so that the resulting
73
+ # configmap has a known name.
74
+ #
75
+ # @param native [#to_h]
76
+ # the input configmap hash
77
+ # @param id [#to_s]
78
+ # the id to use for giving the configmap a name
79
+ # @return [OodCore::Job::Adapters::Kubernetes::Resources::ConfigMap]
80
+ def configmap_from_native(native, id)
81
+ configmap = native.fetch(:configmap, nil)
82
+ return nil if configmap.nil?
83
+
84
+ Resources::ConfigMap.new(
85
+ configmap_name(id),
86
+ configmap[:filename],
87
+ configmap[:data]
88
+ )
89
+ end
90
+
91
+ # parse initialization containers from native data
92
+ #
93
+ # @param native_data [#to_h]
94
+ # the native data to parse. Expected key init_ctrs and for that
95
+ # key to be an array of hashes.
96
+ # @return [Array<OodCore::Job::Adapters::Kubernetes::Resources::Container>]
97
+ # the array of init containers
98
+ def init_ctrs_from_native(ctrs)
99
+ init_ctrs = []
100
+
101
+ ctrs&.each do |ctr_raw|
102
+ ctr = container_from_native(ctr_raw)
103
+ init_ctrs.push(ctr)
104
+ end
105
+
106
+ init_ctrs
107
+ end
108
+
109
+ def service_name(id)
110
+ id + '-service'
111
+ end
112
+
113
+ def secret_name(id)
114
+ id + '-secret'
115
+ end
116
+
117
+ def configmap_name(id)
118
+ id + '-configmap'
119
+ end
120
+
121
+ # Extract pod info from json data. The data is expected to be from the kubectl
122
+ # command and conform to kubernetes' datatype structures.
123
+ #
124
+ # @param json_data [#to_h]
125
+ # the pod data returned from 'kubectl get pod abc-123'
126
+ # @return [#to_h]
127
+ # the hash of info expected from adapters
128
+ def pod_info_from_json(json_data)
129
+ {
130
+ id: json_data.dig(:metadata, :name).to_s,
131
+ job_name: name_from_metadata(json_data.dig(:metadata)),
132
+ status: pod_status_from_json(json_data),
133
+ job_owner: json_data.dig(:metadata, :namespace).to_s,
134
+ submission_time: submission_time(json_data),
135
+ dispatch_time: dispatch_time(json_data),
136
+ wallclock_time: wallclock_time(json_data),
137
+ native: {
138
+ host: get_host(json_data.dig(:status, :hostIP))
139
+ },
140
+ procs: procs_from_json(json_data)
141
+ }
142
+ rescue NoMethodError
143
+ # gotta raise an error because Info.new will throw an error if id is undefined
144
+ raise K8sDataError, "unable to read data correctly from json"
145
+ end
146
+
147
+ private
148
+
149
+ def get_host(ip)
150
+ Resolv.getname(ip)
151
+ rescue Resolv::ResolvError
152
+ ip
153
+ end
154
+
155
+ def name_from_metadata(metadata)
156
+ name = metadata.dig(:labels, :'app.kubernetes.io/name')
157
+ name = metadata.dig(:labels, :'k8s-app') if name.nil?
158
+ name = metadata.dig(:name) if name.nil? # pod-id but better than nil?
159
+ name
160
+ end
161
+
162
+ def service_info_from_json(json_data)
163
+ # all we need is the port - .spec.ports[0].nodePort
164
+ ports = json_data.dig(:spec, :ports)
165
+ {
166
+ native:
167
+ {
168
+ port: ports[0].dig(:nodePort)
169
+ }
170
+ }
171
+ rescue
172
+ empty_native
173
+ end
174
+
175
+ def secret_info_from_json(json_data)
176
+ raw = json_data.dig(:data, :password)
177
+ {
178
+ native:
179
+ {
180
+ password: Base64.decode64(raw)
181
+ }
182
+ }
183
+ rescue
184
+ empty_native
185
+ end
186
+
187
+ def empty_native
188
+ {
189
+ native: {}
190
+ }
191
+ end
192
+
193
+ def dispatch_time(json_data)
194
+ status = pod_status_from_json(json_data)
195
+ return nil if status == 'undetermined'
196
+
197
+ state_data = json_data.dig(:status, :containerStatuses)[0].dig(:state)
198
+ date_string = nil
199
+
200
+ if status == 'completed'
201
+ date_string = state_data.dig(:terminated, :startedAt)
202
+ elsif status == 'running'
203
+ date_string = state_data.dig(:running, :startedAt)
204
+ end
205
+
206
+ date_string.nil? ? nil : DateTime.parse(date_string).to_time.to_i
207
+ end
208
+
209
+ def wallclock_time(json_data)
210
+ status = pod_status_from_json(json_data)
211
+ return nil if status == 'undetermined'
212
+
213
+ state_data = json_data.dig(:status, :containerStatuses)[0].dig(:state)
214
+ start_time = dispatch_time(json_data)
215
+ return nil if start_time.nil?
216
+
217
+ et = end_time(status, state_data)
218
+
219
+ et.nil? ? nil : et - start_time
220
+ end
221
+
222
+ def end_time(status, state_data)
223
+ if status == 'completed'
224
+ end_time_string = state_data.dig(:terminated, :finishedAt)
225
+ et = DateTime.parse(end_time_string).to_time.to_i
226
+ elsif status == 'running'
227
+ et = DateTime.now.to_time.to_i
228
+ else
229
+ et = nil
230
+ end
231
+
232
+ et
233
+ end
234
+
235
+ def submission_time(json_data)
236
+ status = json_data.dig(:status)
237
+ start = status.dig(:startTime)
238
+
239
+ if start.nil?
240
+ # the pod is in some pending state limbo
241
+ conditions = status.dig(:conditions)
242
+ # best guess to start time is just the first condition's
243
+ # transition time
244
+ str = conditions[0].dig(:lastTransitionTime)
245
+ else
246
+ str = start
247
+ end
248
+
249
+ DateTime.parse(str).to_time.to_i
250
+ end
251
+
252
+ def pod_status_from_json(json_data)
253
+ state = 'undetermined'
254
+ status = json_data.dig(:status)
255
+ container_statuses = status.dig(:containerStatuses)
256
+
257
+ if container_statuses.nil?
258
+ # if you're here, it means you're pending, probably unschedulable
259
+ return OodCore::Job::Status.new(state: state)
260
+ end
261
+
262
+ # only support 1 container/pod
263
+ json_state = container_statuses[0].dig(:state)
264
+ state = 'running' unless json_state.dig(:running).nil?
265
+ state = terminated_state(json_state) unless json_state.dig(:terminated).nil?
266
+ state = 'queued' unless json_state.dig(:waiting).nil?
267
+
268
+ OodCore::Job::Status.new(state: state)
269
+ end
270
+
271
+ def terminated_state(status)
272
+ reason = status.dig(:terminated, :reason)
273
+ if reason == 'Error'
274
+ 'suspended'
275
+ else
276
+ 'completed'
277
+ end
278
+ end
279
+
280
+ def procs_from_json(json_data)
281
+ containers = json_data.dig(:spec, :containers)
282
+ resources = containers[0].dig(:resources)
283
+
284
+ cpu = resources.dig(:limits, :cpu)
285
+ millicores_rex = /(\d+)m/
286
+
287
+ # ok to return string bc nil.to_i == 0 and we'd rather return
288
+ # nil (undefined) than 0 which is confusing.
289
+ if millicores_rex.match?(cpu)
290
+ millicores = millicores_rex.match(cpu)[1].to_i
291
+
292
+ # have to return at least 1 bc 200m could be 0
293
+ ((millicores + 1000) / 1000).to_s
294
+ else
295
+ cpu
296
+ end
297
+ end
298
+ end