ood_core 0.12.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,372 @@
1
+ require "ood_core/refinements/hash_extensions"
2
+ require "json"
3
+
4
+ class OodCore::Job::Adapters::Kubernetes::Batch
5
+
6
+ require_relative "helper"
7
+ require_relative "k8s_job_info"
8
+
9
+ using OodCore::Refinements::HashExtensions
10
+
11
+ class Error < StandardError; end
12
+ class NotFoundError < StandardError; end
13
+
14
+ attr_reader :config_file, :bin, :cluster, :mounts
15
+ attr_reader :all_namespaces, :using_context, :helper
16
+ attr_reader :username_prefix, :namespace_prefix
17
+
18
+ def initialize(options = {})
19
+ options = options.to_h.symbolize_keys
20
+
21
+ @config_file = options.fetch(:config_file, default_config_file)
22
+ @bin = options.fetch(:bin, '/usr/bin/kubectl')
23
+ @cluster = options.fetch(:cluster, 'open-ondemand')
24
+ @mounts = options.fetch(:mounts, []).map { |m| m.to_h.symbolize_keys }
25
+ @all_namespaces = options.fetch(:all_namespaces, false)
26
+ @username_prefix = options.fetch(:username_prefix, nil)
27
+ @namespace_prefix = options.fetch(:namespace_prefix, '')
28
+
29
+ @using_context = false
30
+ @helper = OodCore::Job::Adapters::Kubernetes::Helper.new
31
+
32
+ begin
33
+ make_kubectl_config(options)
34
+ rescue
35
+ # FIXME could use a log here
36
+ # means you couldn't 'kubectl set config'
37
+ end
38
+ end
39
+
40
+ def resource_file(resource_type = 'pod')
41
+ File.dirname(__FILE__) + "/templates/#{resource_type}.yml.erb"
42
+ end
43
+
44
+ def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
45
+ raise ArgumentError, 'Must specify the script' if script.nil?
46
+
47
+ resource_yml, id = generate_id_yml(script)
48
+ call("#{formatted_ns_cmd} create -f -", stdin: resource_yml)
49
+
50
+ id
51
+ end
52
+
53
+ def generate_id(name)
54
+ # 2_821_109_907_456 = 36**8
55
+ name.downcase.tr(' ', '-') + '-' + rand(2_821_109_907_456).to_s(36)
56
+ end
57
+
58
+ def info_all(attrs: nil)
59
+ cmd = if all_namespaces
60
+ "#{base_cmd} get pods -o json --all-namespaces"
61
+ else
62
+ "#{namespaced_cmd} get pods -o json"
63
+ end
64
+
65
+ output = call(cmd)
66
+ all_pods_to_info(output)
67
+ end
68
+
69
+ def info_where_owner(owner, attrs: nil)
70
+ owner = Array.wrap(owner).map(&:to_s)
71
+
72
+ # must at least have job_owner to filter by job_owner
73
+ attrs = Array.wrap(attrs) | [:job_owner] unless attrs.nil?
74
+
75
+ info_all(attrs: attrs).select { |info| owner.include? info.job_owner }
76
+ end
77
+
78
+ def info_all_each(attrs: nil)
79
+ return to_enum(:info_all_each, attrs: attrs) unless block_given?
80
+
81
+ info_all(attrs: attrs).each do |job|
82
+ yield job
83
+ end
84
+ end
85
+
86
+ def info_where_owner_each(owner, attrs: nil)
87
+ return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
88
+
89
+ info_where_owner(owner, attrs: attrs).each do |job|
90
+ yield job
91
+ end
92
+ end
93
+
94
+ def info(id)
95
+ pod_json = safe_call('get', 'pod', id)
96
+ return OodCore::Job::Info.new({ id: id, status: 'completed' }) if pod_json.empty?
97
+
98
+ service_json = safe_call('get', 'service', service_name(id))
99
+ secret_json = safe_call('get', 'secret', secret_name(id))
100
+
101
+ helper.info_from_json(pod_json: pod_json, service_json: service_json, secret_json: secret_json)
102
+ end
103
+
104
+ def status(id)
105
+ info(id).status
106
+ end
107
+
108
+ def delete(id)
109
+ safe_call("delete", "pod", id)
110
+ safe_call("delete", "service", service_name(id))
111
+ safe_call("delete", "secret", secret_name(id))
112
+ safe_call("delete", "configmap", configmap_name(id))
113
+ end
114
+
115
+ private
116
+
117
+ def safe_call(verb, resource, id)
118
+ begin
119
+ case verb.to_s
120
+ when "get"
121
+ call_json_output('get', resource, id)
122
+ when "delete"
123
+ call("#{namespaced_cmd} delete #{resource} #{id}")
124
+ end
125
+ rescue NotFoundError
126
+ {}
127
+ end
128
+ end
129
+
130
+ # helper to help format multi-line yaml data from the submit.yml into
131
+ # mutli-line yaml in the pod.yml.erb
132
+ def config_data_lines(data)
133
+ output = []
134
+ first = true
135
+
136
+ data.to_s.each_line do |line|
137
+ output.append(first ? line : line.prepend(" "))
138
+ first = false
139
+ end
140
+
141
+ output
142
+ end
143
+
144
+ def username
145
+ @username ||= Etc.getlogin
146
+ end
147
+
148
+ def k8s_username
149
+ username_prefix.nil? ? username : "#{username_prefix}-#{username}"
150
+ end
151
+
152
+ def user
153
+ @user ||= Etc.getpwnam(username)
154
+ end
155
+
156
+ def home_dir
157
+ user.dir
158
+ end
159
+
160
+ def run_as_user
161
+ user.uid
162
+ end
163
+
164
+ def run_as_group
165
+ user.gid
166
+ end
167
+
168
+ def fs_group
169
+ run_as_group
170
+ end
171
+
172
+ def group
173
+ Etc.getgrgid(run_as_group).name
174
+ end
175
+
176
+ def default_env
177
+ {
178
+ USER: username,
179
+ UID: run_as_user,
180
+ HOME: home_dir,
181
+ GROUP: group,
182
+ GID: run_as_group,
183
+ }
184
+ end
185
+
186
+ # helper to template resource yml you're going to submit and
187
+ # create an id.
188
+ def generate_id_yml(script)
189
+ native_data = script.native
190
+ container = helper.container_from_native(native_data[:container], default_env)
191
+ id = generate_id(container.name)
192
+ configmap = helper.configmap_from_native(native_data, id)
193
+ init_containers = helper.init_ctrs_from_native(native_data[:init_containers], container.env)
194
+ spec = OodCore::Job::Adapters::Kubernetes::Resources::PodSpec.new(container, init_containers: init_containers)
195
+ all_mounts = native_data[:mounts].nil? ? mounts : mounts + native_data[:mounts]
196
+
197
+ template = ERB.new(File.read(resource_file), nil, '-')
198
+
199
+ [template.result(binding), id]
200
+ end
201
+
202
+ # helper to call kubectl and get json data back.
203
+ # verb, resrouce and id are the kubernetes parlance terms.
204
+ # example: 'kubectl get pod my-pod-id' is verb=get, resource=pod
205
+ # and id=my-pod-id
206
+ def call_json_output(verb, resource, id, stdin: nil)
207
+ cmd = "#{formatted_ns_cmd} #{verb} #{resource} #{id}"
208
+ data = call(cmd, stdin: stdin)
209
+ data = data.empty? ? '{}' : data
210
+ json_data = JSON.parse(data, symbolize_names: true)
211
+
212
+ json_data
213
+ end
214
+
215
+ def service_name(id)
216
+ helper.service_name(id)
217
+ end
218
+
219
+ def secret_name(id)
220
+ helper.secret_name(id)
221
+ end
222
+
223
+ def configmap_name(id)
224
+ helper.configmap_name(id)
225
+ end
226
+
227
+ def namespace
228
+ "#{namespace_prefix}#{username}"
229
+ end
230
+
231
+ def context
232
+ cluster
233
+ end
234
+
235
+ def default_config_file
236
+ (ENV['KUBECONFIG'] || "#{Dir.home}/.kube/config")
237
+ end
238
+
239
+ def default_auth
240
+ {
241
+ type: 'managaged'
242
+ }.symbolize_keys
243
+ end
244
+
245
+ def default_server
246
+ {
247
+ endpoint: 'https://localhost:8080',
248
+ cert_authority_file: nil
249
+ }.symbolize_keys
250
+ end
251
+
252
+ def formatted_ns_cmd
253
+ "#{namespaced_cmd} -o json"
254
+ end
255
+
256
+ def namespaced_cmd
257
+ "#{base_cmd} --namespace=#{namespace}"
258
+ end
259
+
260
+ def base_cmd
261
+ base = "#{bin} --kubeconfig=#{config_file}"
262
+ base << " --context=#{context}" if using_context
263
+ base
264
+ end
265
+
266
+ def all_pods_to_info(data)
267
+ json_data = JSON.parse(data, symbolize_names: true)
268
+ pods = json_data.dig(:items)
269
+
270
+ info_array = []
271
+ pods.each do |pod|
272
+ info = pod_info_from_json(pod)
273
+ info_array.push(info) unless info.nil?
274
+ end
275
+
276
+ info_array
277
+ rescue JSON::ParserError
278
+ # 'no resources in <namespace>' throws parse error
279
+ []
280
+ end
281
+
282
+ def pod_info_from_json(pod)
283
+ hash = helper.pod_info_from_json(pod)
284
+ K8sJobInfo.new(hash)
285
+ rescue Helper::K8sDataError
286
+ # FIXME: silently eating error, could probably use a logger
287
+ nil
288
+ end
289
+
290
+ def make_kubectl_config(config)
291
+ set_cluster(config.fetch(:server, default_server).to_h.symbolize_keys)
292
+ configure_auth(config.fetch(:auth, default_auth).to_h.symbolize_keys)
293
+ end
294
+
295
+ def configure_auth(auth)
296
+ type = auth.fetch(:type)
297
+ return if managed?(type)
298
+
299
+ case type
300
+ when 'gke'
301
+ set_gke_config(auth)
302
+ when 'oidc'
303
+ set_context
304
+ end
305
+ end
306
+
307
+ def use_context
308
+ @using_context = true
309
+ end
310
+
311
+ def managed?(type)
312
+ if type.nil?
313
+ true # maybe should be false?
314
+ else
315
+ type.to_s == 'managed'
316
+ end
317
+ end
318
+
319
+ def set_gke_config(auth)
320
+ cred_file = auth.fetch(:svc_acct_file)
321
+
322
+ cmd = "gcloud auth activate-service-account --key-file=#{cred_file}"
323
+ call(cmd)
324
+
325
+ set_gke_credentials(auth)
326
+ end
327
+
328
+ def set_gke_credentials(auth)
329
+
330
+ zone = auth.fetch(:zone, nil)
331
+ region = auth.fetch(:region, nil)
332
+
333
+ locale = ''
334
+ locale = "--zone=#{zone}" unless zone.nil?
335
+ locale = "--region=#{region}" unless region.nil?
336
+
337
+ # gke cluster name can probably can differ from what ood calls the cluster
338
+ cmd = "gcloud container clusters get-credentials #{locale} #{cluster}"
339
+ env = { 'KUBECONFIG' => config_file }
340
+ call(cmd, env)
341
+ end
342
+
343
+ def set_context
344
+ cmd = "#{base_cmd} config set-context #{cluster}"
345
+ cmd << " --cluster=#{cluster} --namespace=#{namespace}"
346
+ cmd << " --user=#{k8s_username}"
347
+
348
+ call(cmd)
349
+ use_context
350
+ end
351
+
352
+ def set_cluster(config)
353
+ server = config.fetch(:endpoint)
354
+ cert = config.fetch(:cert_authority_file, nil)
355
+
356
+ cmd = "#{base_cmd} config set-cluster #{cluster}"
357
+ cmd << " --server=#{server}"
358
+ cmd << " --certificate-authority=#{cert}" unless cert.nil?
359
+
360
+ call(cmd)
361
+ end
362
+
363
+ def call(cmd = '', env: {}, stdin: nil)
364
+ o, e, s = Open3.capture3(env, cmd, stdin_data: stdin.to_s)
365
+ s.success? ? o : interpret_and_raise(e)
366
+ end
367
+
368
+ def interpret_and_raise(stderr)
369
+ raise NotFoundError, stderr if /^Error from server \(NotFound\):/.match(stderr)
370
+ raise(Error, stderr)
371
+ end
372
+ end
@@ -0,0 +1,299 @@
1
+ class OodCore::Job::Adapters::Kubernetes::Helper
2
+
3
+ require_relative 'resources'
4
+ require_relative 'k8s_job_info'
5
+ require 'resolv'
6
+ require 'base64'
7
+ require 'active_support/core_ext/hash'
8
+
9
+ class K8sDataError < StandardError; end
10
+
11
+ # Extract info from json data. The data is expected to be from the kubectl
12
+ # command and conform to kubernetes' datatype structures.
13
+ #
14
+ # Returns K8sJobInfo in the in lieu of writing a connection.yml
15
+ #
16
+ # @param pod_json [#to_h]
17
+ # the pod data returned from 'kubectl get pod abc-123'
18
+ # @param service_json [#to_h]
19
+ # the service data returned from 'kubectl get service abc-123-service'
20
+ # @param secret_json [#to_h]
21
+ # the secret data returned from 'kubectl get secret abc-123-secret'
22
+ # @param ns_prefix [#to_s]
23
+ # the namespace prefix so that namespaces can be converted back to usernames
24
+ # @return [OodCore::Job::Adapters::Kubernetes::K8sJobInfo]
25
+ def info_from_json(pod_json: nil, service_json: nil, secret_json: nil, ns_prefix: nil)
26
+ pod_hash = pod_info_from_json(pod_json, ns_prefix: ns_prefix)
27
+ service_hash = service_info_from_json(service_json)
28
+ secret_hash = secret_info_from_json(secret_json)
29
+
30
+ pod_hash.deep_merge!(service_hash)
31
+ pod_hash.deep_merge!(secret_hash)
32
+ OodCore::Job::Adapters::Kubernetes::K8sJobInfo.new(pod_hash)
33
+ rescue NoMethodError
34
+ raise K8sDataError, "unable to read data correctly from json"
35
+ end
36
+
37
+ # Turn a container hash into a Kubernetes::Resources::Container
38
+ #
39
+ # @param container [#to_h]
40
+ # the input container hash
41
+ # @param default_env [#to_h]
42
+ # Default env to merge with defined env
43
+ # @return [OodCore::Job::Adapters::Kubernetes::Resources::Container]
44
+ def container_from_native(container, default_env)
45
+ env = container.fetch(:env, {}).to_h.symbolize_keys
46
+ OodCore::Job::Adapters::Kubernetes::Resources::Container.new(
47
+ container[:name],
48
+ container[:image],
49
+ command: parse_command(container[:command]),
50
+ port: container[:port],
51
+ env: default_env.merge(env),
52
+ memory: container[:memory],
53
+ cpu: container[:cpu],
54
+ working_dir: container[:working_dir],
55
+ restart_policy: container[:restart_policy],
56
+ image_pull_secret: container[:image_pull_secret]
57
+ )
58
+ end
59
+
60
+ # Parse a command string given from a user and return an array.
61
+ # If given an array, the input is simply returned back.
62
+ #
63
+ # @param cmd [#to_s]
64
+ # the command to parse
65
+ # @return [Array<#to_s>]
66
+ # the command parsed into an array of arguements
67
+ def parse_command(cmd)
68
+ if cmd&.is_a?(Array)
69
+ cmd
70
+ else
71
+ Shellwords.split(cmd.to_s)
72
+ end
73
+ end
74
+
75
+ # Turn a configmap hash into a Kubernetes::Resources::ConfigMap
76
+ # that can be used in templates. Needs an id so that the resulting
77
+ # configmap has a known name.
78
+ #
79
+ # @param native [#to_h]
80
+ # the input configmap hash
81
+ # @param id [#to_s]
82
+ # the id to use for giving the configmap a name
83
+ # @return [OodCore::Job::Adapters::Kubernetes::Resources::ConfigMap]
84
+ def configmap_from_native(native, id)
85
+ configmap = native.fetch(:configmap, nil)
86
+ return nil if configmap.nil?
87
+
88
+ OodCore::Job::Adapters::Kubernetes::Resources::ConfigMap.new(
89
+ configmap_name(id),
90
+ (configmap[:files] || [])
91
+ )
92
+ end
93
+
94
+ # parse initialization containers from native data
95
+ #
96
+ # @param native_data [#to_h]
97
+ # the native data to parse. Expected key init_ctrs and for that
98
+ # key to be an array of hashes.
99
+ # @param default_env [#to_h]
100
+ # Default env to merge with defined env
101
+ # @return [Array<OodCore::Job::Adapters::Kubernetes::Resources::Container>]
102
+ # the array of init containers
103
+ def init_ctrs_from_native(ctrs, default_env)
104
+ init_ctrs = []
105
+
106
+ ctrs&.each do |ctr_raw|
107
+ ctr = container_from_native(ctr_raw, default_env)
108
+ init_ctrs.push(ctr)
109
+ end
110
+
111
+ init_ctrs
112
+ end
113
+
114
+ def service_name(id)
115
+ id + '-service'
116
+ end
117
+
118
+ def secret_name(id)
119
+ id + '-secret'
120
+ end
121
+
122
+ def configmap_name(id)
123
+ id + '-configmap'
124
+ end
125
+
126
+ def seconds_to_duration(s)
127
+ "%02dh%02dm%02ds" % [s / 3600, s / 60 % 60, s % 60]
128
+ end
129
+
130
+ # Extract pod info from json data. The data is expected to be from the kubectl
131
+ # command and conform to kubernetes' datatype structures.
132
+ #
133
+ # @param json_data [#to_h]
134
+ # the pod data returned from 'kubectl get pod abc-123'
135
+ # @param ns_prefix [#to_s]
136
+ # the namespace prefix so that namespaces can be converted back to usernames
137
+ # @return [#to_h]
138
+ # the hash of info expected from adapters
139
+ def pod_info_from_json(json_data, ns_prefix: nil)
140
+ {
141
+ id: json_data.dig(:metadata, :name).to_s,
142
+ job_name: name_from_metadata(json_data.dig(:metadata)),
143
+ status: pod_status_from_json(json_data),
144
+ job_owner: job_owner_from_json(json_data, ns_prefix),
145
+ submission_time: submission_time(json_data),
146
+ dispatch_time: dispatch_time(json_data),
147
+ wallclock_time: wallclock_time(json_data),
148
+ ood_connection_info: { host: get_host(json_data.dig(:status, :hostIP)) },
149
+ procs: procs_from_json(json_data)
150
+ }
151
+ rescue NoMethodError
152
+ # gotta raise an error because Info.new will throw an error if id is undefined
153
+ raise K8sDataError, "unable to read data correctly from json"
154
+ end
155
+
156
+ private
157
+
158
+ def get_host(ip)
159
+ Resolv.getname(ip)
160
+ rescue Resolv::ResolvError
161
+ ip
162
+ end
163
+
164
+ def name_from_metadata(metadata)
165
+ name = metadata.dig(:labels, :'app.kubernetes.io/name')
166
+ name = metadata.dig(:labels, :'k8s-app') if name.nil?
167
+ name = metadata.dig(:name) if name.nil? # pod-id but better than nil?
168
+ name
169
+ end
170
+
171
+ def service_info_from_json(json_data)
172
+ # all we need is the port - .spec.ports[0].nodePort
173
+ ports = json_data.dig(:spec, :ports)
174
+ { ood_connection_info: { port: ports[0].dig(:nodePort) } }
175
+ rescue
176
+ {}
177
+ end
178
+
179
+ def secret_info_from_json(json_data)
180
+ raw = json_data.dig(:data, :password)
181
+ { ood_connection_info: { password: Base64.decode64(raw) } }
182
+ rescue
183
+ {}
184
+ end
185
+
186
+ def dispatch_time(json_data)
187
+ status = pod_status_from_json(json_data)
188
+ container_statuses = json_data.dig(:status, :containerStatuses)
189
+ return nil if container_statuses.nil?
190
+
191
+ state_data = container_statuses[0].dig(:state)
192
+ date_string = nil
193
+
194
+ if status == 'completed'
195
+ date_string = state_data.dig(:terminated, :startedAt)
196
+ elsif status == 'running'
197
+ date_string = state_data.dig(:running, :startedAt)
198
+ end
199
+
200
+ date_string.nil? ? nil : DateTime.parse(date_string).to_time.to_i
201
+ end
202
+
203
+ def wallclock_time(json_data)
204
+ status = pod_status_from_json(json_data)
205
+ container_statuses = json_data.dig(:status, :containerStatuses)
206
+ return nil if container_statuses.nil?
207
+
208
+ state_data = container_statuses[0].dig(:state)
209
+ start_time = dispatch_time(json_data)
210
+ return nil if start_time.nil?
211
+
212
+ et = end_time(status, state_data)
213
+
214
+ et.nil? ? nil : et - start_time
215
+ end
216
+
217
+ def end_time(status, state_data)
218
+ if status == 'completed'
219
+ end_time_string = state_data.dig(:terminated, :finishedAt)
220
+ et = DateTime.parse(end_time_string).to_time.to_i
221
+ elsif status == 'running'
222
+ et = DateTime.now.to_time.to_i
223
+ else
224
+ et = nil
225
+ end
226
+
227
+ et
228
+ end
229
+
230
+ def submission_time(json_data)
231
+ status = json_data.dig(:status)
232
+ start = status.dig(:startTime)
233
+
234
+ if start.nil?
235
+ # the pod is in some pending state limbo
236
+ conditions = status.dig(:conditions)
237
+ # best guess to start time is just the first condition's
238
+ # transition time
239
+ str = conditions[0].dig(:lastTransitionTime)
240
+ else
241
+ str = start
242
+ end
243
+
244
+ DateTime.parse(str).to_time.to_i
245
+ end
246
+
247
+ def pod_status_from_json(json_data)
248
+ phase = json_data.dig(:status, :phase)
249
+ state = case phase
250
+ when "Running"
251
+ "running"
252
+ when "Pending"
253
+ "queued"
254
+ when "Failed"
255
+ "suspended"
256
+ when "Succeeded"
257
+ "completed"
258
+ when "Unknown"
259
+ "undetermined"
260
+ else
261
+ "undetermined"
262
+ end
263
+
264
+ OodCore::Job::Status.new(state: state)
265
+ end
266
+
267
+ def terminated_state(status)
268
+ reason = status.dig(:terminated, :reason)
269
+ if reason == 'Error'
270
+ 'suspended'
271
+ else
272
+ 'completed'
273
+ end
274
+ end
275
+
276
+ def procs_from_json(json_data)
277
+ containers = json_data.dig(:spec, :containers)
278
+ resources = containers[0].dig(:resources)
279
+
280
+ cpu = resources.dig(:limits, :cpu)
281
+ millicores_rex = /(\d+)m/
282
+
283
+ # ok to return string bc nil.to_i == 0 and we'd rather return
284
+ # nil (undefined) than 0 which is confusing.
285
+ if millicores_rex.match?(cpu)
286
+ millicores = millicores_rex.match(cpu)[1].to_i
287
+
288
+ # have to return at least 1 bc 200m could be 0
289
+ ((millicores + 1000) / 1000).to_s
290
+ else
291
+ cpu
292
+ end
293
+ end
294
+
295
+ def job_owner_from_json(json_data = {}, ns_prefix = nil)
296
+ namespace = json_data.dig(:metadata, :namespace).to_s
297
+ namespace.delete_prefix(ns_prefix.to_s)
298
+ end
299
+ end