ood_core 0.11.2 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,350 @@
1
+ require "ood_core/refinements/hash_extensions"
2
+ require "json"
3
+
4
+ class OodCore::Job::Adapters::Kubernetes::Batch
5
+
6
+ require "ood_core/job/adapters/kubernetes/helper"
7
+
8
+ Helper = OodCore::Job::Adapters::Kubernetes::Helper
9
+ Resources = OodCore::Job::Adapters::Kubernetes::Resources
10
+
11
+ using OodCore::Refinements::HashExtensions
12
+
13
+ class Error < StandardError; end
14
+
15
+ attr_reader :config_file, :bin, :cluster_name, :mounts
16
+ attr_reader :all_namespaces, :using_context, :helper
17
+ attr_reader :username_prefix
18
+
19
+ def initialize(options = {}, helper = Helper.new)
20
+ options = options.to_h.symbolize_keys
21
+
22
+ @config_file = options.fetch(:config_file, default_config_file)
23
+ @bin = options.fetch(:bin, '/usr/bin/kubectl')
24
+ @cluster_name = options.fetch(:cluster_name, 'open-ondemand')
25
+ @mounts = options.fetch(:mounts, []).map { |m| m.to_h.symbolize_keys }
26
+ @all_namespaces = options.fetch(:all_namespaces, false)
27
+ @username_prefix = options.fetch(:username_prefix, nil)
28
+
29
+ @using_context = false
30
+ @helper = helper
31
+
32
+ begin
33
+ make_kubectl_config(options)
34
+ rescue
35
+ # FIXME could use a log here
36
+ # means you couldn't 'kubectl set config'
37
+ end
38
+ end
39
+
40
+ def resource_file(resource_type = 'pod')
41
+ File.dirname(__FILE__) + "/templates/#{resource_type}.yml.erb"
42
+ end
43
+
44
+ def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
45
+ raise ArgumentError, 'Must specify the script' if script.nil?
46
+
47
+ resource_yml, id = generate_id_yml(script.native)
48
+ call("#{formatted_ns_cmd} create -f -", stdin: resource_yml)
49
+
50
+ id
51
+ end
52
+
53
+ def generate_id(name)
54
+ # 2_821_109_907_456 = 36**8
55
+ name.downcase.tr(' ', '-') + '-' + rand(2_821_109_907_456).to_s(36)
56
+ end
57
+
58
+ def info_all(attrs: nil)
59
+ cmd = if all_namespaces
60
+ "#{base_cmd} get pods -o json --all-namespaces"
61
+ else
62
+ "#{namespaced_cmd} get pods -o json"
63
+ end
64
+
65
+ output = call(cmd)
66
+ all_pods_to_info(output)
67
+ end
68
+
69
+ def info_where_owner(owner, attrs: nil)
70
+ owner = Array.wrap(owner).map(&:to_s)
71
+
72
+ # must at least have job_owner to filter by job_owner
73
+ attrs = Array.wrap(attrs) | [:job_owner] unless attrs.nil?
74
+
75
+ info_all(attrs: attrs).select { |info| owner.include? info.job_owner }
76
+ end
77
+
78
+ def info_all_each(attrs: nil)
79
+ return to_enum(:info_all_each, attrs: attrs) unless block_given?
80
+
81
+ info_all(attrs: attrs).each do |job|
82
+ yield job
83
+ end
84
+ end
85
+
86
+ def info_where_owner_each(owner, attrs: nil)
87
+ return to_enum(:info_where_owner_each, owner, attrs: attrs) unless block_given?
88
+
89
+ info_where_owner(owner, attrs: attrs).each do |job|
90
+ yield job
91
+ end
92
+ end
93
+
94
+ def info(id)
95
+ pod_json = call_json_output('get', 'pod', id)
96
+
97
+ begin
98
+ service_json = call_json_output('get', 'service', service_name(id))
99
+ secret_json = call_json_output('get', 'secret', secret_name(id))
100
+ rescue
101
+ # it's ok if these don't exist
102
+ service_json ||= nil
103
+ secret_json ||= nil
104
+ end
105
+
106
+ helper.info_from_json(pod_json: pod_json, service_json: service_json, secret_json: secret_json)
107
+ end
108
+
109
+ def status(id)
110
+ info(id).status
111
+ end
112
+
113
+ def delete(id)
114
+ call("#{namespaced_cmd} delete pod #{id}")
115
+
116
+ begin
117
+ call("#{namespaced_cmd} delete service #{service_name(id)}")
118
+ call("#{namespaced_cmd} delete secret #{secret_name(id)}")
119
+ call("#{namespaced_cmd} delete configmap #{configmap_name(id)}")
120
+ rescue
121
+ # FIXME: retries? delete if exists?
122
+ # just eat the results of deleting services and secrets
123
+ end
124
+ end
125
+
126
+ def configmap_mount_path
127
+ '/ood'
128
+ end
129
+
130
+ private
131
+
132
+ # helper to help format multi-line yaml data from the submit.yml into
133
+ # mutli-line yaml in the pod.yml.erb
134
+ def config_data_lines(data)
135
+ output = []
136
+ first = true
137
+
138
+ data.to_s.each_line do |line|
139
+ output.append(first ? line : line.prepend(" "))
140
+ first = false
141
+ end
142
+
143
+ output
144
+ end
145
+
146
+ def username
147
+ @username ||= Etc.getlogin
148
+ end
149
+
150
+ def k8s_username
151
+ username_prefix.nil? ? username : "#{username_prefix}-#{username}"
152
+ end
153
+
154
+ def run_as_user
155
+ Etc.getpwnam(username).uid
156
+ end
157
+
158
+ def run_as_group
159
+ Etc.getpwnam(username).gid
160
+ end
161
+
162
+ def fs_group
163
+ run_as_group
164
+ end
165
+
166
+ # helper to template resource yml you're going to submit and
167
+ # create an id.
168
+ def generate_id_yml(native_data)
169
+ container = helper.container_from_native(native_data[:container])
170
+ id = generate_id(container.name)
171
+ configmap = helper.configmap_from_native(native_data, id)
172
+ init_containers = helper.init_ctrs_from_native(native_data[:init_containers])
173
+ spec = Resources::PodSpec.new(container, init_containers: init_containers)
174
+ all_mounts = native_data[:mounts].nil? ? mounts : mounts + native_data[:mounts]
175
+
176
+ template = ERB.new(File.read(resource_file))
177
+
178
+ [template.result(binding), id]
179
+ end
180
+
181
+ # helper to call kubectl and get json data back.
182
+ # verb, resrouce and id are the kubernetes parlance terms.
183
+ # example: 'kubectl get pod my-pod-id' is verb=get, resource=pod
184
+ # and id=my-pod-id
185
+ def call_json_output(verb, resource, id, stdin: nil)
186
+ cmd = "#{formatted_ns_cmd} #{verb} #{resource} #{id}"
187
+ data = call(cmd, stdin: stdin)
188
+ data = data.empty? ? '{}' : data
189
+ json_data = JSON.parse(data, symbolize_names: true)
190
+
191
+ json_data
192
+ end
193
+
194
+ def service_name(id)
195
+ helper.service_name(id)
196
+ end
197
+
198
+ def secret_name(id)
199
+ helper.secret_name(id)
200
+ end
201
+
202
+ def configmap_name(id)
203
+ helper.configmap_name(id)
204
+ end
205
+
206
+ def namespace
207
+ default_namespace
208
+ end
209
+
210
+ def default_namespace
211
+ username
212
+ end
213
+
214
+ def context
215
+ cluster_name
216
+ end
217
+
218
+ def default_config_file
219
+ (ENV['KUBECONFIG'] || "#{Dir.home}/.kube/config")
220
+ end
221
+
222
+ def default_auth
223
+ {
224
+ type: 'managaged'
225
+ }.symbolize_keys
226
+ end
227
+
228
+ def default_server
229
+ {
230
+ endpoint: 'https://localhost:8080',
231
+ cert_authority_file: nil
232
+ }.symbolize_keys
233
+ end
234
+
235
+ def formatted_ns_cmd
236
+ "#{namespaced_cmd} -o json"
237
+ end
238
+
239
+ def namespaced_cmd
240
+ "#{base_cmd} --namespace=#{namespace}"
241
+ end
242
+
243
+ def base_cmd
244
+ base = "#{bin} --kubeconfig=#{config_file}"
245
+ base << " --context=#{context}" if using_context
246
+ base
247
+ end
248
+
249
+ def all_pods_to_info(data)
250
+ json_data = JSON.parse(data, symbolize_names: true)
251
+ pods = json_data.dig(:items)
252
+
253
+ info_array = []
254
+ pods.each do |pod|
255
+ info = pod_info_from_json(pod)
256
+ info_array.push(info) unless info.nil?
257
+ end
258
+
259
+ info_array
260
+ rescue JSON::ParserError
261
+ # 'no resources in <namespace>' throws parse error
262
+ []
263
+ end
264
+
265
+ def pod_info_from_json(pod)
266
+ hash = helper.pod_info_from_json(pod)
267
+ OodCore::Job::Info.new(hash)
268
+ rescue Helper::K8sDataError
269
+ # FIXME: silently eating error, could probably use a logger
270
+ nil
271
+ end
272
+
273
+ def make_kubectl_config(config)
274
+ set_cluster(config.fetch(:server, default_server).to_h.symbolize_keys)
275
+ configure_auth(config.fetch(:auth, default_auth).to_h.symbolize_keys)
276
+ end
277
+
278
+ def configure_auth(auth)
279
+ type = auth.fetch(:type)
280
+ return if managed?(type)
281
+
282
+ case type
283
+ when 'gke'
284
+ set_gke_config(auth)
285
+ when 'oidc'
286
+ set_context
287
+ end
288
+ end
289
+
290
+ def use_context
291
+ @using_context = true
292
+ end
293
+
294
+ def managed?(type)
295
+ if type.nil?
296
+ true # maybe should be false?
297
+ else
298
+ type.to_s == 'managed'
299
+ end
300
+ end
301
+
302
+ def set_gke_config(auth)
303
+ cred_file = auth.fetch(:svc_acct_file)
304
+
305
+ cmd = "gcloud auth activate-service-account --key-file=#{cred_file}"
306
+ call(cmd)
307
+
308
+ set_gke_credentials(auth)
309
+ end
310
+
311
+ def set_gke_credentials(auth)
312
+
313
+ zone = auth.fetch(:zone, nil)
314
+ region = auth.fetch(:region, nil)
315
+
316
+ locale = ''
317
+ locale = "--zone=#{zone}" unless zone.nil?
318
+ locale = "--region=#{region}" unless region.nil?
319
+
320
+ # gke cluster name can probably can differ from what ood calls the cluster
321
+ cmd = "gcloud container clusters get-credentials #{locale} #{cluster_name}"
322
+ env = { 'KUBECONFIG' => config_file }
323
+ call(cmd, env)
324
+ end
325
+
326
+ def set_context
327
+ cmd = "#{base_cmd} config set-context #{cluster_name}"
328
+ cmd << " --cluster=#{cluster_name} --namespace=#{namespace}"
329
+ cmd << " --user=#{k8s_username}"
330
+
331
+ call(cmd)
332
+ use_context
333
+ end
334
+
335
+ def set_cluster(config)
336
+ server = config.fetch(:endpoint)
337
+ cert = config.fetch(:cert_authority_file, nil)
338
+
339
+ cmd = "#{base_cmd} config set-cluster #{cluster_name}"
340
+ cmd << " --server=#{server}"
341
+ cmd << " --certificate-authority=#{cert}" unless cert.nil?
342
+
343
+ call(cmd)
344
+ end
345
+
346
+ def call(cmd = '', env: {}, stdin: nil)
347
+ o, error, s = Open3.capture3(env, cmd, stdin_data: stdin.to_s)
348
+ s.success? ? o : raise(Error, error)
349
+ end
350
+ end
@@ -0,0 +1,298 @@
1
+ class OodCore::Job::Adapters::Kubernetes::Helper
2
+
3
+ require 'ood_core/job/adapters/kubernetes/resources'
4
+ require 'resolv'
5
+ require 'base64'
6
+
7
+ class K8sDataError < StandardError; end
8
+
9
+ Resources = OodCore::Job::Adapters::Kubernetes::Resources
10
+
11
+ # Extract info from json data. The data is expected to be from the kubectl
12
+ # command and conform to kubernetes' datatype structures.
13
+ #
14
+ # Returns { native: {host: localhost, port:80, password: sshhh }} in the info
15
+ # object field in lieu of writing a connection.yml
16
+ #
17
+ # @param pod_json [#to_h]
18
+ # the pod data returned from 'kubectl get pod abc-123'
19
+ # @param service_json [#to_h]
20
+ # the service data returned from 'kubectl get service abc-123-service'
21
+ # @param secret_json [#to_h]
22
+ # the secret data returned from 'kubectl get secret abc-123-secret'
23
+ # @return [OodCore::Job::Info]
24
+ def info_from_json(pod_json: nil, service_json: nil, secret_json: nil)
25
+ pod_hash = pod_info_from_json(pod_json)
26
+ service_hash = service_info_from_json(service_json)
27
+ secret_hash = secret_info_from_json(secret_json)
28
+
29
+ # can't just use deep_merge bc we don't depend *directly* on rails
30
+ pod_hash[:native] = pod_hash[:native].merge(service_hash[:native])
31
+ pod_hash[:native] = pod_hash[:native].merge(secret_hash[:native])
32
+ OodCore::Job::Info.new(pod_hash)
33
+ rescue NoMethodError
34
+ raise K8sDataError, "unable to read data correctly from json"
35
+ end
36
+
37
+ # Turn a container hash into a Kubernetes::Resources::Container
38
+ #
39
+ # @param container [#to_h]
40
+ # the input container hash
41
+ # @return [OodCore::Job::Adapters::Kubernetes::Resources::Container]
42
+ def container_from_native(container)
43
+ Resources::Container.new(
44
+ container[:name],
45
+ container[:image],
46
+ command: parse_command(container[:command]),
47
+ port: container[:port],
48
+ env: container.fetch(:env, []),
49
+ memory: container[:memory],
50
+ cpu: container[:cpu],
51
+ working_dir: container[:working_dir],
52
+ restart_policy: container[:restart_policy]
53
+ )
54
+ end
55
+
56
+ # Parse a command string given from a user and return an array.
57
+ # If given an array, the input is simply returned back.
58
+ #
59
+ # @param cmd [#to_s]
60
+ # the command to parse
61
+ # @return [Array<#to_s>]
62
+ # the command parsed into an array of arguements
63
+ def parse_command(cmd)
64
+ if cmd&.is_a?(Array)
65
+ cmd
66
+ else
67
+ Shellwords.split(cmd.to_s)
68
+ end
69
+ end
70
+
71
+ # Turn a configmap hash into a Kubernetes::Resources::ConfigMap
72
+ # that can be used in templates. Needs an id so that the resulting
73
+ # configmap has a known name.
74
+ #
75
+ # @param native [#to_h]
76
+ # the input configmap hash
77
+ # @param id [#to_s]
78
+ # the id to use for giving the configmap a name
79
+ # @return [OodCore::Job::Adapters::Kubernetes::Resources::ConfigMap]
80
+ def configmap_from_native(native, id)
81
+ configmap = native.fetch(:configmap, nil)
82
+ return nil if configmap.nil?
83
+
84
+ Resources::ConfigMap.new(
85
+ configmap_name(id),
86
+ configmap[:filename],
87
+ configmap[:data]
88
+ )
89
+ end
90
+
91
+ # parse initialization containers from native data
92
+ #
93
+ # @param native_data [#to_h]
94
+ # the native data to parse. Expected key init_ctrs and for that
95
+ # key to be an array of hashes.
96
+ # @return [Array<OodCore::Job::Adapters::Kubernetes::Resources::Container>]
97
+ # the array of init containers
98
+ def init_ctrs_from_native(ctrs)
99
+ init_ctrs = []
100
+
101
+ ctrs&.each do |ctr_raw|
102
+ ctr = container_from_native(ctr_raw)
103
+ init_ctrs.push(ctr)
104
+ end
105
+
106
+ init_ctrs
107
+ end
108
+
109
+ def service_name(id)
110
+ id + '-service'
111
+ end
112
+
113
+ def secret_name(id)
114
+ id + '-secret'
115
+ end
116
+
117
+ def configmap_name(id)
118
+ id + '-configmap'
119
+ end
120
+
121
+ # Extract pod info from json data. The data is expected to be from the kubectl
122
+ # command and conform to kubernetes' datatype structures.
123
+ #
124
+ # @param json_data [#to_h]
125
+ # the pod data returned from 'kubectl get pod abc-123'
126
+ # @return [#to_h]
127
+ # the hash of info expected from adapters
128
+ def pod_info_from_json(json_data)
129
+ {
130
+ id: json_data.dig(:metadata, :name).to_s,
131
+ job_name: name_from_metadata(json_data.dig(:metadata)),
132
+ status: pod_status_from_json(json_data),
133
+ job_owner: json_data.dig(:metadata, :namespace).to_s,
134
+ submission_time: submission_time(json_data),
135
+ dispatch_time: dispatch_time(json_data),
136
+ wallclock_time: wallclock_time(json_data),
137
+ native: {
138
+ host: get_host(json_data.dig(:status, :hostIP))
139
+ },
140
+ procs: procs_from_json(json_data)
141
+ }
142
+ rescue NoMethodError
143
+ # gotta raise an error because Info.new will throw an error if id is undefined
144
+ raise K8sDataError, "unable to read data correctly from json"
145
+ end
146
+
147
+ private
148
+
149
+ def get_host(ip)
150
+ Resolv.getname(ip)
151
+ rescue Resolv::ResolvError
152
+ ip
153
+ end
154
+
155
+ def name_from_metadata(metadata)
156
+ name = metadata.dig(:labels, :'app.kubernetes.io/name')
157
+ name = metadata.dig(:labels, :'k8s-app') if name.nil?
158
+ name = metadata.dig(:name) if name.nil? # pod-id but better than nil?
159
+ name
160
+ end
161
+
162
+ def service_info_from_json(json_data)
163
+ # all we need is the port - .spec.ports[0].nodePort
164
+ ports = json_data.dig(:spec, :ports)
165
+ {
166
+ native:
167
+ {
168
+ port: ports[0].dig(:nodePort)
169
+ }
170
+ }
171
+ rescue
172
+ empty_native
173
+ end
174
+
175
+ def secret_info_from_json(json_data)
176
+ raw = json_data.dig(:data, :password)
177
+ {
178
+ native:
179
+ {
180
+ password: Base64.decode64(raw)
181
+ }
182
+ }
183
+ rescue
184
+ empty_native
185
+ end
186
+
187
+ def empty_native
188
+ {
189
+ native: {}
190
+ }
191
+ end
192
+
193
+ def dispatch_time(json_data)
194
+ status = pod_status_from_json(json_data)
195
+ return nil if status == 'undetermined'
196
+
197
+ state_data = json_data.dig(:status, :containerStatuses)[0].dig(:state)
198
+ date_string = nil
199
+
200
+ if status == 'completed'
201
+ date_string = state_data.dig(:terminated, :startedAt)
202
+ elsif status == 'running'
203
+ date_string = state_data.dig(:running, :startedAt)
204
+ end
205
+
206
+ date_string.nil? ? nil : DateTime.parse(date_string).to_time.to_i
207
+ end
208
+
209
+ def wallclock_time(json_data)
210
+ status = pod_status_from_json(json_data)
211
+ return nil if status == 'undetermined'
212
+
213
+ state_data = json_data.dig(:status, :containerStatuses)[0].dig(:state)
214
+ start_time = dispatch_time(json_data)
215
+ return nil if start_time.nil?
216
+
217
+ et = end_time(status, state_data)
218
+
219
+ et.nil? ? nil : et - start_time
220
+ end
221
+
222
+ def end_time(status, state_data)
223
+ if status == 'completed'
224
+ end_time_string = state_data.dig(:terminated, :finishedAt)
225
+ et = DateTime.parse(end_time_string).to_time.to_i
226
+ elsif status == 'running'
227
+ et = DateTime.now.to_time.to_i
228
+ else
229
+ et = nil
230
+ end
231
+
232
+ et
233
+ end
234
+
235
+ def submission_time(json_data)
236
+ status = json_data.dig(:status)
237
+ start = status.dig(:startTime)
238
+
239
+ if start.nil?
240
+ # the pod is in some pending state limbo
241
+ conditions = status.dig(:conditions)
242
+ # best guess to start time is just the first condition's
243
+ # transition time
244
+ str = conditions[0].dig(:lastTransitionTime)
245
+ else
246
+ str = start
247
+ end
248
+
249
+ DateTime.parse(str).to_time.to_i
250
+ end
251
+
252
+ def pod_status_from_json(json_data)
253
+ state = 'undetermined'
254
+ status = json_data.dig(:status)
255
+ container_statuses = status.dig(:containerStatuses)
256
+
257
+ if container_statuses.nil?
258
+ # if you're here, it means you're pending, probably unschedulable
259
+ return OodCore::Job::Status.new(state: state)
260
+ end
261
+
262
+ # only support 1 container/pod
263
+ json_state = container_statuses[0].dig(:state)
264
+ state = 'running' unless json_state.dig(:running).nil?
265
+ state = terminated_state(json_state) unless json_state.dig(:terminated).nil?
266
+ state = 'queued' unless json_state.dig(:waiting).nil?
267
+
268
+ OodCore::Job::Status.new(state: state)
269
+ end
270
+
271
+ def terminated_state(status)
272
+ reason = status.dig(:terminated, :reason)
273
+ if reason == 'Error'
274
+ 'suspended'
275
+ else
276
+ 'completed'
277
+ end
278
+ end
279
+
280
+ def procs_from_json(json_data)
281
+ containers = json_data.dig(:spec, :containers)
282
+ resources = containers[0].dig(:resources)
283
+
284
+ cpu = resources.dig(:limits, :cpu)
285
+ millicores_rex = /(\d+)m/
286
+
287
+ # ok to return string bc nil.to_i == 0 and we'd rather return
288
+ # nil (undefined) than 0 which is confusing.
289
+ if millicores_rex.match?(cpu)
290
+ millicores = millicores_rex.match(cpu)[1].to_i
291
+
292
+ # have to return at least 1 bc 200m could be 0
293
+ ((millicores + 1000) / 1000).to_s
294
+ else
295
+ cpu
296
+ end
297
+ end
298
+ end