floe 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,329 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Floe
4
+ class ContainerRunner
5
+ class Kubernetes < Floe::Runner
6
+ include Floe::ContainerRunner::DockerMixin
7
+
8
+ TOKEN_FILE = "/run/secrets/kubernetes.io/serviceaccount/token"
9
+ CA_CERT_FILE = "/run/secrets/kubernetes.io/serviceaccount/ca.crt"
10
+ RUNNING_PHASES = %w[Pending Running].freeze
11
+ FAILURE_REASONS = %w[CrashLoopBackOff ImagePullBackOff ErrImagePull].freeze
12
+
13
+ def initialize(options = {})
14
+ require "active_support/core_ext/hash/keys"
15
+ require "awesome_spawn"
16
+ require "securerandom"
17
+ require "base64"
18
+ require "kubeclient"
19
+ require "yaml"
20
+
21
+ @kubeconfig_file = ENV.fetch("KUBECONFIG", nil) || options.fetch("kubeconfig", File.join(Dir.home, ".kube", "config"))
22
+ @kubeconfig_context = options["kubeconfig_context"]
23
+
24
+ @token = options["token"]
25
+ @token ||= File.read(options["token_file"]) if options.key?("token_file")
26
+ @token ||= File.read(TOKEN_FILE) if File.exist?(TOKEN_FILE)
27
+
28
+ @server = options["server"]
29
+ @server ||= URI::HTTPS.build(:host => ENV.fetch("KUBERNETES_SERVICE_HOST"), :port => ENV.fetch("KUBERNETES_SERVICE_PORT", 6443)) if ENV.key?("KUBERNETES_SERVICE_HOST")
30
+
31
+ @ca_file = options["ca_file"]
32
+ @ca_file ||= CA_CERT_FILE if File.exist?(CA_CERT_FILE)
33
+
34
+ @verify_ssl = options["verify_ssl"] == "false" ? OpenSSL::SSL::VERIFY_NONE : OpenSSL::SSL::VERIFY_PEER
35
+
36
+ if server.nil? && token.nil? && !File.exist?(kubeconfig_file)
37
+ raise ArgumentError, "Missing connections options, provide a kubeconfig file or pass server and token via --docker-runner-options"
38
+ end
39
+
40
+ @namespace = options.fetch("namespace", "default")
41
+
42
+ @pull_policy = options["pull-policy"]
43
+ @task_service_account = options["task_service_account"]
44
+
45
+ super
46
+ end
47
+
48
+ def run_async!(resource, env = {}, secrets = {}, _context = {})
49
+ raise ArgumentError, "Invalid resource" unless resource&.start_with?("docker://")
50
+
51
+ image = resource.sub("docker://", "")
52
+ name = container_name(image)
53
+ secret = create_secret!(secrets) if secrets && !secrets.empty?
54
+
55
+ runner_context = {"container_ref" => name, "container_state" => {"phase" => "Pending"}, "secrets_ref" => secret}
56
+
57
+ begin
58
+ create_pod!(name, image, env, secret)
59
+ runner_context
60
+ rescue Kubeclient::HttpError => err
61
+ cleanup(runner_context)
62
+ {"Error" => "States.TaskFailed", "Cause" => err.to_s}
63
+ end
64
+ end
65
+
66
+ def status!(runner_context)
67
+ return if runner_context.key?("Error")
68
+
69
+ runner_context["container_state"] = pod_info(runner_context["container_ref"]).to_h.deep_stringify_keys["status"]
70
+ end
71
+
72
+ def running?(runner_context)
73
+ return false unless pod_running?(runner_context)
74
+ # If a pod is Pending and the containers are waiting with a failure
75
+ # reason such as ImagePullBackOff or CrashLoopBackOff then the pod
76
+ # will never be run.
77
+ return false if container_failed?(runner_context)
78
+
79
+ true
80
+ end
81
+
82
+ def success?(runner_context)
83
+ runner_context.dig("container_state", "phase") == "Succeeded"
84
+ end
85
+
86
+ def output(runner_context)
87
+ if runner_context.key?("Error")
88
+ runner_context.slice("Error", "Cause")
89
+ elsif container_failed?(runner_context)
90
+ failed_state = failed_container_states(runner_context).first
91
+ {"Error" => failed_state["reason"], "Cause" => failed_state["message"]}
92
+ else
93
+ runner_context["output"] = kubeclient.get_pod_log(runner_context["container_ref"], namespace).body
94
+ end
95
+ end
96
+
97
+ def cleanup(runner_context)
98
+ pod, secret = runner_context.values_at("container_ref", "secrets_ref")
99
+
100
+ delete_pod(pod) if pod
101
+ delete_secret(secret) if secret
102
+ end
103
+
104
+ def wait(timeout: nil, events: %i[create update delete])
105
+ retry_connection = true
106
+
107
+ begin
108
+ watcher = kubeclient.watch_pods(:namespace => namespace)
109
+
110
+ retry_connection = true
111
+
112
+ if timeout.to_i > 0
113
+ timeout_thread = Thread.new do
114
+ sleep(timeout)
115
+ watcher.finish
116
+ end
117
+ end
118
+
119
+ watcher.each do |notice|
120
+ break if error_notice?(notice)
121
+
122
+ event = kube_notice_type_to_event(notice.type)
123
+ next unless events.include?(event)
124
+
125
+ runner_context = parse_notice(notice)
126
+ next if runner_context.nil?
127
+
128
+ if block_given?
129
+ yield [event, runner_context]
130
+ else
131
+ timeout_thread&.kill # If we break out before the timeout, kill the timeout thread
132
+ return [[event, runner_context]]
133
+ end
134
+ end
135
+ rescue Kubeclient::HttpError => err
136
+ raise unless err.error_code == 401 && retry_connection
137
+
138
+ @kubeclient = nil
139
+ retry_connection = false
140
+ retry
141
+ ensure
142
+ begin
143
+ watch&.finish
144
+ rescue
145
+ nil
146
+ end
147
+
148
+ timeout_thread&.join(0)
149
+ end
150
+ end
151
+
152
+ private
153
+
154
+ attr_reader :ca_file, :kubeconfig_file, :kubeconfig_context, :namespace, :server, :token, :verify_ssl
155
+
156
+ def pod_info(pod_name)
157
+ kubeclient.get_pod(pod_name, namespace)
158
+ end
159
+
160
+ def pod_running?(context)
161
+ RUNNING_PHASES.include?(context.dig("container_state", "phase"))
162
+ end
163
+
164
+ def failed_container_states(context)
165
+ container_statuses = context.dig("container_state", "containerStatuses") || []
166
+ container_statuses.filter_map { |status| status["state"]&.values&.first }
167
+ .select { |state| FAILURE_REASONS.include?(state["reason"]) }
168
+ end
169
+
170
+ def container_failed?(context)
171
+ failed_container_states(context).any?
172
+ end
173
+
174
+ def pod_spec(name, image, env, secret = nil)
175
+ spec = {
176
+ :kind => "Pod",
177
+ :apiVersion => "v1",
178
+ :metadata => {
179
+ :name => name,
180
+ :namespace => namespace
181
+ },
182
+ :spec => {
183
+ :containers => [
184
+ {
185
+ :name => name[0...-9], # remove the random suffix and its leading hyphen
186
+ :image => image,
187
+ :env => env.map { |k, v| {:name => k, :value => v.to_s} }
188
+ }
189
+ ],
190
+ :restartPolicy => "Never"
191
+ }
192
+ }
193
+
194
+ spec[:spec][:imagePullPolicy] = @pull_policy if @pull_policy
195
+ spec[:spec][:serviceAccountName] = @task_service_account if @task_service_account
196
+
197
+ if secret
198
+ spec[:spec][:volumes] = [
199
+ {
200
+ :name => "secret-volume",
201
+ :secret => {:secretName => secret}
202
+ }
203
+ ]
204
+
205
+ spec[:spec][:containers][0][:env] << {
206
+ :name => "_CREDENTIALS",
207
+ :value => "/run/secrets/#{secret}/secret"
208
+ }
209
+
210
+ spec[:spec][:containers][0][:volumeMounts] = [
211
+ {
212
+ :name => "secret-volume",
213
+ :mountPath => "/run/secrets/#{secret}",
214
+ :readOnly => true
215
+ }
216
+ ]
217
+ end
218
+
219
+ spec
220
+ end
221
+
222
+ def create_pod!(name, image, env, secret = nil)
223
+ kubeclient.create_pod(pod_spec(name, image, env, secret))
224
+ end
225
+
226
+ def delete_pod!(name)
227
+ kubeclient.delete_pod(name, namespace)
228
+ end
229
+
230
+ def delete_pod(name)
231
+ delete_pod!(name)
232
+ rescue
233
+ nil
234
+ end
235
+
236
+ def create_secret!(secrets)
237
+ secret_name = SecureRandom.uuid
238
+
239
+ secret_config = {
240
+ :kind => "Secret",
241
+ :apiVersion => "v1",
242
+ :metadata => {
243
+ :name => secret_name,
244
+ :namespace => namespace
245
+ },
246
+ :data => {
247
+ :secret => Base64.urlsafe_encode64(secrets.to_json)
248
+ },
249
+ :type => "Opaque"
250
+ }
251
+
252
+ kubeclient.create_secret(secret_config)
253
+
254
+ secret_name
255
+ end
256
+
257
+ def delete_secret!(secret_name)
258
+ kubeclient.delete_secret(secret_name, namespace)
259
+ end
260
+
261
+ def delete_secret(name)
262
+ delete_secret!(name)
263
+ rescue
264
+ nil
265
+ end
266
+
267
+ def kube_notice_type_to_event(type)
268
+ case type
269
+ when "ADDED"
270
+ :create
271
+ when "MODIFIED"
272
+ :update
273
+ when "DELETED"
274
+ :delete
275
+ else
276
+ :unknown
277
+ end
278
+ end
279
+
280
+ def error_notice?(notice)
281
+ return false unless notice.type == "ERROR"
282
+
283
+ message = notice.object&.message
284
+ code = notice.object&.code
285
+ reason = notice.object&.reason
286
+
287
+ logger.warn("Received [#{code} #{reason}], [#{message}]")
288
+
289
+ true
290
+ end
291
+
292
+ def parse_notice(notice)
293
+ return if notice.object.nil?
294
+
295
+ pod = notice.object
296
+ container_ref = pod.metadata.name
297
+ container_state = pod.to_h[:status].deep_stringify_keys
298
+
299
+ {"container_ref" => container_ref, "container_state" => container_state}
300
+ end
301
+
302
+ def kubeclient
303
+ return @kubeclient unless @kubeclient.nil?
304
+
305
+ if server && token
306
+ api_endpoint = server
307
+ auth_options = {:bearer_token => token}
308
+ ssl_options = {:verify_ssl => verify_ssl}
309
+ ssl_options[:ca_file] = ca_file if ca_file
310
+ else
311
+ context = kubeconfig&.context(kubeconfig_context)
312
+ raise ArgumentError, "Missing connections options, provide a kubeconfig file or pass server and token via --docker-runner-options" if context.nil?
313
+
314
+ api_endpoint = context.api_endpoint
315
+ auth_options = context.auth_options
316
+ ssl_options = context.ssl_options
317
+ end
318
+
319
+ @kubeclient = Kubeclient::Client.new(api_endpoint, "v1", :ssl_options => ssl_options, :auth_options => auth_options).tap(&:discover)
320
+ end
321
+
322
+ def kubeconfig
323
+ return if kubeconfig_file.nil? || !File.exist?(kubeconfig_file)
324
+
325
+ Kubeclient::Config.read(kubeconfig_file)
326
+ end
327
+ end
328
+ end
329
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Floe
4
+ class ContainerRunner
5
+ class Podman < Docker
6
+ DOCKER_COMMAND = "podman"
7
+
8
+ def initialize(options = {})
9
+ require "awesome_spawn"
10
+ require "securerandom"
11
+
12
+ super
13
+
14
+ @identity = options["identity"]
15
+ @log_level = options["log-level"]
16
+ @network = options["network"]
17
+ @noout = options["noout"].to_s == "true" if options.key?("noout")
18
+ @pull_policy = options["pull-policy"]
19
+ @root = options["root"]
20
+ @runroot = options["runroot"]
21
+ @runtime = options["runtime"]
22
+ @runtime_flag = options["runtime-flag"]
23
+ @storage_driver = options["storage-driver"]
24
+ @storage_opt = options["storage-opt"]
25
+ @syslog = options["syslog"].to_s == "true" if options.key?("syslog")
26
+ @tmpdir = options["tmpdir"]
27
+ @transient_store = !!options["transient-store"] if options.key?("transient-store")
28
+ @volumepath = options["volumepath"]
29
+ end
30
+
31
+ private
32
+
33
+ def run_container_params(image, env, secret)
34
+ params = ["run"]
35
+ params << :detach
36
+ params += env.map { |k, v| [:e, "#{k}=#{v}"] }
37
+ params << [:e, "_CREDENTIALS=/run/secrets/#{secret}"] if secret
38
+ params << [:pull, @pull_policy] if @pull_policy
39
+ params << [:net, "host"] if @network == "host"
40
+ params << [:secret, secret] if secret
41
+ params << [:name, container_name(image)]
42
+ params << image
43
+ end
44
+
45
+ def create_secret(secrets)
46
+ secret_guid = SecureRandom.uuid
47
+ podman!("secret", "create", secret_guid, "-", :in_data => secrets.to_json)
48
+ secret_guid
49
+ end
50
+
51
+ def delete_secret(secret_guid)
52
+ podman!("secret", "rm", secret_guid)
53
+ rescue
54
+ nil
55
+ end
56
+
57
+ def parse_notice(notice)
58
+ id, status, exit_code = JSON.parse(notice).values_at("ID", "Status", "ContainerExitCode")
59
+
60
+ event = podman_event_status_to_event(status)
61
+ running = event != :delete
62
+
63
+ runner_context = {"container_ref" => id, "container_state" => {"Running" => running, "ExitCode" => exit_code.to_i}}
64
+
65
+ [event, runner_context]
66
+ rescue JSON::ParserError
67
+ []
68
+ end
69
+
70
+ def podman_event_status_to_event(status)
71
+ case status
72
+ when "create"
73
+ :create
74
+ when "init", "start"
75
+ :update
76
+ when "died", "cleanup", "remove"
77
+ :delete
78
+ else
79
+ :unknown
80
+ end
81
+ end
82
+
83
+ alias podman! docker!
84
+
85
+ def global_docker_options
86
+ options = []
87
+ options << [:identity, @identity] if @identity
88
+ options << [:"log-level", @log_level] if @log_level
89
+ options << :noout if @noout
90
+ options << [:root, @root] if @root
91
+ options << [:runroot, @runroot] if @runroot
92
+ options << [:runtime, @runtime] if @runtime
93
+ options << [:"runtime-flag", @runtime_flag] if @runtime_flag
94
+ options << [:"storage-driver", @storage_driver] if @storage_driver
95
+ options << [:"storage-opt", @storage_opt] if @storage_opt
96
+ options << :syslog if @syslog
97
+ options << [:tmpdir, @tmpdir] if @tmpdir
98
+ options << [:"transient-store", @transient_store] if @transient_store
99
+ options << [:volumepath, @volumepath] if @volumepath
100
+ options
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "container_runner/docker_mixin"
4
+ require_relative "container_runner/docker"
5
+ require_relative "container_runner/kubernetes"
6
+ require_relative "container_runner/podman"
7
+
8
+ module Floe
9
+ class ContainerRunner
10
+ class << self
11
+ def cli_options(optimist)
12
+ optimist.banner("")
13
+ optimist.banner("Container runner options:")
14
+
15
+ optimist.opt :container_runner, "Type of runner for docker container images (docker, podman, or kubernetes)", :type => :string, :short => 'r'
16
+ optimist.opt :container_runner_options, "Options to pass to the container runner", :type => :strings, :short => 'o'
17
+
18
+ optimist.opt :docker, "Use docker to run container images (short for --container-runner=docker)", :type => :boolean
19
+ optimist.opt :podman, "Use podman to run container images (short for --container-runner=podman)", :type => :boolean
20
+ optimist.opt :kubernetes, "Use kubernetes to run container images (short for --container-runner=kubernetes)", :type => :boolean
21
+ end
22
+
23
+ def resolve_cli_options!(opts)
24
+ # shortcut support
25
+ opts[:container_runner] ||= "docker" if opts[:docker]
26
+ opts[:container_runner] ||= "podman" if opts[:podman]
27
+ opts[:container_runner] ||= "kubernetes" if opts[:kubernetes]
28
+
29
+ runner_options = opts[:container_runner_options].to_h { |opt| opt.split("=", 2) }
30
+
31
+ begin
32
+ set_runner(opts[:container_runner], runner_options)
33
+ rescue ArgumentError => e
34
+ Optimist.die(:container_runner, e.message)
35
+ end
36
+ end
37
+
38
+ def runner
39
+ @runner || set_runner(nil)
40
+ end
41
+
42
+ def set_runner(name_or_instance, options = {})
43
+ @runner =
44
+ case name_or_instance
45
+ when "docker", nil
46
+ Floe::ContainerRunner::Docker.new(options)
47
+ when "podman"
48
+ Floe::ContainerRunner::Podman.new(options)
49
+ when "kubernetes"
50
+ Floe::ContainerRunner::Kubernetes.new(options)
51
+ when Floe::Runner
52
+ name_or_instance
53
+ else
54
+ raise ArgumentError, "container runner must be one of: docker, podman, kubernetes"
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+
61
+ Floe::Runner.register_scheme("docker", -> { Floe::ContainerRunner.runner })
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Floe
4
+ class Runner
5
+ include Logging
6
+
7
+ OUTPUT_MARKER = "__FLOE_OUTPUT__\n"
8
+
9
+ def initialize(_options = {})
10
+ end
11
+
12
+ @runners = {}
13
+ class << self
14
+ def register_scheme(scheme, klass_or_proc)
15
+ @runners[scheme] = klass_or_proc
16
+ end
17
+
18
+ private def resolve_scheme(scheme)
19
+ runner = @runners[scheme]
20
+ runner = @runners[scheme] = @runners[scheme].call if runner.is_a?(Proc)
21
+ runner
22
+ end
23
+
24
+ def for_resource(resource)
25
+ raise ArgumentError, "resource cannot be nil" if resource.nil?
26
+
27
+ scheme = resource.split("://").first
28
+ resolve_scheme(scheme) || raise(ArgumentError, "Invalid resource scheme [#{scheme}]")
29
+ end
30
+ end
31
+
32
+ # Run a command asynchronously and create a runner_context
33
+ # @return [Hash] runner_context
34
+ def run_async!(_resource, _env = {}, _secrets = {}, _context = {})
35
+ raise NotImplementedError, "Must be implemented in a subclass"
36
+ end
37
+
38
+ # update the runner_context
39
+ # @param [Hash] runner_context (the value returned from run_async!)
40
+ # @return [void]
41
+ def status!(_runner_context)
42
+ raise NotImplementedError, "Must be implemented in a subclass"
43
+ end
44
+
45
+ # check runner_contet to determine if the task is still running or completed
46
+ # @param [Hash] runner_context (the value returned from run_async!)
47
+ # @return [Boolean] value if the task is still running
48
+ # true if the task is still running
49
+ # false if it has completed
50
+ def running?(_runner_context)
51
+ raise NotImplementedError, "Must be implemented in a subclass"
52
+ end
53
+
54
+ # For a non-running? task, check if it was successful
55
+ # @param [Hash] runner_context (the value returned from run_async!)
56
+ # @return [Boolean] value if the task is still running
57
+ # true if the task completed successfully
58
+ # false if the task had an error
59
+ def success?(_runner_context)
60
+ raise NotImplementedError, "Must be implemented in a subclass"
61
+ end
62
+
63
+ # For a successful task, return the output
64
+ # @param [Hash] runner_context (the value returned from run_async!)
65
+ # @return [String, Hash] output from task
66
+ def output(_runner_context)
67
+ raise NotImplementedError, "Must be implemented in a subclass"
68
+ end
69
+
70
+ # Cleanup runner context resources
71
+ # Called after a task is completed and the runner_context is no longer needed.
72
+ # @param [Hash] runner_context (the value returned from run_async!)
73
+ # @return [void]
74
+ def cleanup(_runner_context)
75
+ raise NotImplementedError, "Must be implemented in a subclass"
76
+ end
77
+
78
+ def wait(timeout: nil, events: %i[create update delete])
79
+ raise NotImplementedError, "Must be implemented in a subclass"
80
+ end
81
+ end
82
+ end
data/lib/floe/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Floe
4
- VERSION = "0.10.0"
4
+ VERSION = "0.11.0"
5
5
  end
@@ -5,8 +5,10 @@ module Floe
5
5
  class Context
6
6
  # @param context [Json|Hash] (default, create another with input and execution params)
7
7
  # @param input [Hash] (default: {})
8
- def initialize(context = nil, input: {})
8
+ def initialize(context = nil, input: nil)
9
9
  context = JSON.parse(context) if context.kind_of?(String)
10
+
11
+ input ||= {}
10
12
  input = JSON.parse(input) if input.kind_of?(String)
11
13
 
12
14
  @context = context || {}
@@ -18,7 +18,7 @@ module Floe
18
18
  @next = payload["Next"]
19
19
  @end = !!payload["End"]
20
20
  @resource = payload["Resource"]
21
- @runner = Floe::Workflow::Runner.for_resource(@resource)
21
+ @runner = Floe::Runner.for_resource(@resource)
22
22
  @timeout_seconds = payload["TimeoutSeconds"]
23
23
  @retry = payload["Retry"].to_a.map { |retrier| Retrier.new(retrier) }
24
24
  @catch = payload["Catch"].to_a.map { |catcher| Catcher.new(catcher) }
@@ -36,7 +36,7 @@ module Floe
36
36
  super
37
37
 
38
38
  input = process_input(input)
39
- runner_context = runner.run_async!(resource, input, credentials&.value({}, workflow.credentials))
39
+ runner_context = runner.run_async!(resource, input, credentials&.value({}, workflow.credentials), context)
40
40
 
41
41
  context.state["RunnerContext"] = runner_context
42
42
  end
data/lib/floe.rb CHANGED
@@ -5,6 +5,8 @@ require_relative "floe/version"
5
5
  require_relative "floe/null_logger"
6
6
  require_relative "floe/logging"
7
7
 
8
+ require_relative "floe/runner"
9
+
8
10
  require_relative "floe/workflow"
9
11
  require_relative "floe/workflow/catcher"
10
12
  require_relative "floe/workflow/choice_rule"
@@ -17,11 +19,6 @@ require_relative "floe/workflow/path"
17
19
  require_relative "floe/workflow/payload_template"
18
20
  require_relative "floe/workflow/reference_path"
19
21
  require_relative "floe/workflow/retrier"
20
- require_relative "floe/workflow/runner"
21
- require_relative "floe/workflow/runner/docker_mixin"
22
- require_relative "floe/workflow/runner/docker"
23
- require_relative "floe/workflow/runner/kubernetes"
24
- require_relative "floe/workflow/runner/podman"
25
22
  require_relative "floe/workflow/state"
26
23
  require_relative "floe/workflow/states/choice"
27
24
  require_relative "floe/workflow/states/fail"
@@ -55,17 +52,4 @@ module Floe
55
52
  def self.logger=(logger)
56
53
  @logger = logger
57
54
  end
58
-
59
- # Set the runner to use
60
- #
61
- # @example
62
- # Floe.set_runner "docker", kubernetes", {}
63
- # Floe.set_runner "docker", Floe::Workflow::Runner::Kubernetes.new({})
64
- #
65
- # @param scheme [String] scheme Protocol to register (e.g.: docker)
66
- # @param name_or_instance [String|Floe::Workflow::Runner] Name of runner to use for docker (e.g.: docker)
67
- # @param options [Hash] Options for constructor of the runner (optional)
68
- def self.set_runner(scheme, name_or_instance, options = {})
69
- Floe::Workflow::Runner.set_runner(scheme, name_or_instance, options)
70
- end
71
55
  end