kubernetes-deploy 0.29.0 → 1.0.0.pre.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/.buildkite/pipeline.nightly.yml +7 -0
  3. data/.rubocop.yml +0 -12
  4. data/.shopify-build/{kubernetes-deploy.yml → krane.yml} +8 -2
  5. data/1.0-Upgrade.md +109 -0
  6. data/CHANGELOG.md +60 -0
  7. data/CONTRIBUTING.md +2 -2
  8. data/Gemfile +1 -0
  9. data/README.md +86 -2
  10. data/dev.yml +3 -1
  11. data/dev/flamegraph-from-tests +1 -1
  12. data/exe/kubernetes-deploy +12 -9
  13. data/exe/kubernetes-render +9 -7
  14. data/exe/kubernetes-restart +3 -3
  15. data/exe/kubernetes-run +1 -1
  16. data/kubernetes-deploy.gemspec +5 -5
  17. data/lib/krane.rb +5 -3
  18. data/lib/{kubernetes-deploy → krane}/bindings_parser.rb +1 -1
  19. data/lib/krane/cli/deploy_command.rb +25 -13
  20. data/lib/krane/cli/global_deploy_command.rb +55 -0
  21. data/lib/krane/cli/krane.rb +12 -3
  22. data/lib/krane/cli/render_command.rb +19 -9
  23. data/lib/krane/cli/restart_command.rb +4 -4
  24. data/lib/krane/cli/run_command.rb +4 -4
  25. data/lib/krane/cli/version_command.rb +1 -1
  26. data/lib/krane/cluster_resource_discovery.rb +113 -0
  27. data/lib/{kubernetes-deploy → krane}/common.rb +8 -9
  28. data/lib/krane/concerns/template_reporting.rb +29 -0
  29. data/lib/{kubernetes-deploy → krane}/concurrency.rb +1 -1
  30. data/lib/{kubernetes-deploy → krane}/container_logs.rb +3 -2
  31. data/lib/{kubernetes-deploy → krane}/deferred_summary_logging.rb +2 -2
  32. data/lib/{kubernetes-deploy → krane}/delayed_exceptions.rb +0 -0
  33. data/lib/krane/deploy_task.rb +16 -0
  34. data/lib/krane/deploy_task_config_validator.rb +29 -0
  35. data/lib/krane/deprecated_deploy_task.rb +404 -0
  36. data/lib/{kubernetes-deploy → krane}/duration_parser.rb +1 -3
  37. data/lib/{kubernetes-deploy → krane}/ejson_secret_provisioner.rb +10 -13
  38. data/lib/krane/errors.rb +28 -0
  39. data/lib/{kubernetes-deploy → krane}/formatted_logger.rb +2 -2
  40. data/lib/krane/global_deploy_task.rb +210 -0
  41. data/lib/krane/global_deploy_task_config_validator.rb +12 -0
  42. data/lib/{kubernetes-deploy → krane}/kubeclient_builder.rb +13 -5
  43. data/lib/{kubernetes-deploy → krane}/kubectl.rb +14 -16
  44. data/lib/{kubernetes-deploy → krane}/kubernetes_resource.rb +110 -27
  45. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/cloudsql.rb +1 -1
  46. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/config_map.rb +1 -1
  47. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/cron_job.rb +1 -1
  48. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/custom_resource.rb +2 -2
  49. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/custom_resource_definition.rb +1 -5
  50. data/lib/krane/kubernetes_resource/daemon_set.rb +90 -0
  51. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/deployment.rb +2 -2
  52. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/horizontal_pod_autoscaler.rb +1 -1
  53. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/ingress.rb +1 -1
  54. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/job.rb +1 -1
  55. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/network_policy.rb +1 -1
  56. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/persistent_volume_claim.rb +1 -1
  57. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/pod.rb +6 -2
  58. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/pod_disruption_budget.rb +2 -2
  59. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/pod_set_base.rb +3 -3
  60. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/pod_template.rb +1 -1
  61. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/replica_set.rb +2 -2
  62. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/resource_quota.rb +1 -1
  63. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/role.rb +1 -1
  64. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/role_binding.rb +1 -1
  65. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/secret.rb +1 -1
  66. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/service.rb +2 -2
  67. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/service_account.rb +1 -1
  68. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/stateful_set.rb +2 -2
  69. data/lib/{kubernetes-deploy → krane}/label_selector.rb +1 -1
  70. data/lib/{kubernetes-deploy → krane}/oj.rb +0 -0
  71. data/lib/{kubernetes-deploy → krane}/options_helper.rb +2 -2
  72. data/lib/{kubernetes-deploy → krane}/remote_logs.rb +2 -2
  73. data/lib/krane/render_task.rb +149 -0
  74. data/lib/{kubernetes-deploy → krane}/renderer.rb +1 -1
  75. data/lib/{kubernetes-deploy → krane}/resource_cache.rb +10 -9
  76. data/lib/krane/resource_deployer.rb +265 -0
  77. data/lib/{kubernetes-deploy → krane}/resource_watcher.rb +24 -25
  78. data/lib/krane/restart_task.rb +228 -0
  79. data/lib/{kubernetes-deploy → krane}/rollout_conditions.rb +1 -1
  80. data/lib/krane/runner_task.rb +212 -0
  81. data/lib/{kubernetes-deploy → krane}/runner_task_config_validator.rb +1 -1
  82. data/lib/{kubernetes-deploy → krane}/statsd.rb +13 -27
  83. data/lib/krane/task_config.rb +22 -0
  84. data/lib/{kubernetes-deploy → krane}/task_config_validator.rb +1 -1
  85. data/lib/{kubernetes-deploy → krane}/template_sets.rb +5 -5
  86. data/lib/krane/version.rb +4 -0
  87. data/lib/kubernetes-deploy/deploy_task.rb +6 -608
  88. data/lib/kubernetes-deploy/errors.rb +1 -26
  89. data/lib/kubernetes-deploy/render_task.rb +5 -122
  90. data/lib/kubernetes-deploy/rescue_krane_exceptions.rb +18 -0
  91. data/lib/kubernetes-deploy/restart_task.rb +6 -198
  92. data/lib/kubernetes-deploy/runner_task.rb +6 -184
  93. metadata +96 -70
  94. data/lib/kubernetes-deploy/cluster_resource_discovery.rb +0 -34
  95. data/lib/kubernetes-deploy/kubernetes_resource/daemon_set.rb +0 -54
  96. data/lib/kubernetes-deploy/task_config.rb +0 -16
  97. data/lib/kubernetes-deploy/version.rb +0 -4
@@ -1,23 +1,22 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'kubernetes-deploy/concurrency'
4
- require 'kubernetes-deploy/resource_cache'
3
+ require 'krane/concurrency'
4
+ require 'krane/resource_cache'
5
5
 
6
- module KubernetesDeploy
6
+ module Krane
7
7
  class ResourceWatcher
8
- extend KubernetesDeploy::StatsD::MeasureMethods
8
+ extend Krane::StatsD::MeasureMethods
9
+ delegate :namespace, :context, :logger, to: :@task_config
9
10
 
10
- def initialize(resources:, logger:, context:, namespace:,
11
- deploy_started_at: Time.now.utc, operation_name: "deploy", timeout: nil, sha: nil)
11
+ def initialize(resources:, task_config:, deploy_started_at: Time.now.utc,
12
+ operation_name: "deploy", timeout: nil, sha: nil)
12
13
  unless resources.is_a?(Enumerable)
13
14
  raise ArgumentError, <<~MSG
14
15
  ResourceWatcher expects Enumerable collection, got `#{resources.class}` instead
15
16
  MSG
16
17
  end
17
18
  @resources = resources
18
- @logger = logger
19
- @namespace = namespace
20
- @context = context
19
+ @task_config = task_config
21
20
  @deploy_started_at = deploy_started_at
22
21
  @operation_name = operation_name
23
22
  @timeout = timeout
@@ -53,16 +52,16 @@ module KubernetesDeploy
53
52
  private
54
53
 
55
54
  def sync_resources(resources)
56
- cache = ResourceCache.new(@namespace, @context, @logger)
57
- KubernetesDeploy::Concurrency.split_across_threads(resources) { |r| r.sync(cache) }
55
+ cache = ResourceCache.new(@task_config)
56
+ Krane::Concurrency.split_across_threads(resources) { |r| r.sync(cache) }
58
57
  resources.each(&:after_sync)
59
58
  end
60
59
  measure_method(:sync_resources, "sync.duration")
61
60
 
62
61
  def statsd_tags
63
62
  {
64
- namespace: @namespace,
65
- context: @context,
63
+ namespace: namespace,
64
+ context: context,
66
65
  sha: @sha,
67
66
  }
68
67
  end
@@ -83,18 +82,18 @@ module KubernetesDeploy
83
82
  watch_time = (Time.now.utc - @deploy_started_at).round(1)
84
83
  new_failures.each do |resource|
85
84
  resource.report_status_to_statsd(watch_time)
86
- @logger.error("#{resource.id} failed to #{@operation_name} after #{watch_time}s")
85
+ logger.error("#{resource.id} failed to #{@operation_name} after #{watch_time}s")
87
86
  end
88
87
 
89
88
  new_timeouts.each do |resource|
90
89
  resource.report_status_to_statsd(watch_time)
91
- @logger.error("#{resource.id} rollout timed out after #{watch_time}s")
90
+ logger.error("#{resource.id} rollout timed out after #{watch_time}s")
92
91
  end
93
92
 
94
93
  if new_successes.present?
95
94
  new_successes.each { |r| r.report_status_to_statsd(watch_time) }
96
95
  success_string = ColorizedString.new("Successfully #{past_tense_operation} in #{watch_time}s:").green
97
- @logger.info("#{success_string} #{new_successes.map(&:id).join(', ')}")
96
+ logger.info("#{success_string} #{new_successes.map(&:id).join(', ')}")
98
97
  end
99
98
  end
100
99
 
@@ -102,7 +101,7 @@ module KubernetesDeploy
102
101
  return unless resources.present?
103
102
  resource_list = resources.map(&:id).join(', ')
104
103
  msg = reminder ? "Still waiting for: #{resource_list}" : "Continuing to wait for: #{resource_list}"
105
- @logger.info(msg)
104
+ logger.info(msg)
106
105
  end
107
106
 
108
107
  def report_and_give_up(remaining_resources)
@@ -130,34 +129,34 @@ module KubernetesDeploy
130
129
  timeouts, failures = failed_resources.partition(&:deploy_timed_out?)
131
130
  timeouts += global_timeouts
132
131
  if timeouts.present?
133
- @logger.summary.add_action(
132
+ logger.summary.add_action(
134
133
  "timed out waiting for #{timeouts.length} #{'resource'.pluralize(timeouts.length)} to #{@operation_name}"
135
134
  )
136
135
  end
137
136
 
138
137
  if failures.present?
139
- @logger.summary.add_action(
138
+ logger.summary.add_action(
140
139
  "failed to #{@operation_name} #{failures.length} #{'resource'.pluralize(failures.length)}"
141
140
  )
142
141
  end
143
142
 
144
- kubectl = Kubectl.new(namespace: @namespace, context: @context, logger: @logger, log_failure_by_default: false)
145
- KubernetesDeploy::Concurrency.split_across_threads(failed_resources + global_timeouts) do |r|
143
+ kubectl = Kubectl.new(task_config: @task_config, log_failure_by_default: false)
144
+ Krane::Concurrency.split_across_threads(failed_resources + global_timeouts) do |r|
146
145
  r.sync_debug_info(kubectl)
147
146
  end
148
147
 
149
- failed_resources.each { |r| @logger.summary.add_paragraph(r.debug_message) }
150
- global_timeouts.each { |r| @logger.summary.add_paragraph(r.debug_message(:gave_up, timeout: @timeout)) }
148
+ failed_resources.each { |r| logger.summary.add_paragraph(r.debug_message) }
149
+ global_timeouts.each { |r| logger.summary.add_paragraph(r.debug_message(:gave_up, timeout: @timeout)) }
151
150
  end
152
151
  end
153
152
 
154
153
  def record_success_statuses(successful_resources)
155
154
  success_count = successful_resources.length
156
155
  if success_count > 0
157
- @logger.summary.add_action("successfully #{past_tense_operation} #{success_count} "\
156
+ logger.summary.add_action("successfully #{past_tense_operation} #{success_count} "\
158
157
  "#{'resource'.pluralize(success_count)}")
159
158
  final_statuses = successful_resources.map(&:pretty_status).join("\n")
160
- @logger.summary.add_paragraph("#{ColorizedString.new('Successful resources').green}\n#{final_statuses}")
159
+ logger.summary.add_paragraph("#{ColorizedString.new('Successful resources').green}\n#{final_statuses}")
161
160
  end
162
161
  end
163
162
 
@@ -0,0 +1,228 @@
1
+ # frozen_string_literal: true
2
+ require 'krane/common'
3
+ require 'krane/kubernetes_resource'
4
+ require 'krane/kubernetes_resource/deployment'
5
+ require 'krane/kubeclient_builder'
6
+ require 'krane/resource_watcher'
7
+ require 'krane/kubectl'
8
+
9
+ module Krane
10
+ # Restart the pods in one or more deployments
11
+ class RestartTask
12
+ class FatalRestartError < FatalDeploymentError; end
13
+
14
+ class RestartAPIError < FatalRestartError
15
+ def initialize(deployment_name, response)
16
+ super("Failed to restart #{deployment_name}. " \
17
+ "API returned non-200 response code (#{response.code})\n" \
18
+ "Response:\n#{response.body}")
19
+ end
20
+ end
21
+
22
+ HTTP_OK_RANGE = 200..299
23
+ ANNOTATION = "shipit.shopify.io/restart"
24
+
25
+ # Initializes the restart task
26
+ #
27
+ # @param context [String] Kubernetes context / cluster
28
+ # @param namespace [String] Kubernetes namespace
29
+ # @param logger [Object] Logger object (defaults to an instance of Krane::FormattedLogger)
30
+ # @param max_watch_seconds [Integer] Timeout in seconds
31
+ def initialize(context:, namespace:, logger: nil, max_watch_seconds: nil)
32
+ @logger = logger || Krane::FormattedLogger.build(namespace, context)
33
+ @task_config = Krane::TaskConfig.new(context, namespace, @logger)
34
+ @context = context
35
+ @namespace = namespace
36
+ @max_watch_seconds = max_watch_seconds
37
+ end
38
+
39
+ # Runs the task, returning a boolean representing success or failure
40
+ #
41
+ # @return [Boolean]
42
+ def run(*args)
43
+ perform!(*args)
44
+ true
45
+ rescue FatalDeploymentError
46
+ false
47
+ end
48
+ alias_method :perform, :run
49
+
50
+ # Runs the task, raising exceptions in case of issues
51
+ #
52
+ # @param deployments_names [Array<String>] Array of workload names to restart
53
+ # @param selector [Hash] Selector(s) parsed by Krane::LabelSelector
54
+ # @param verify_result [Boolean] Wait for completion and verify success
55
+ #
56
+ # @return [nil]
57
+ def run!(deployments_names = nil, selector: nil, verify_result: true)
58
+ start = Time.now.utc
59
+ @logger.reset
60
+
61
+ @logger.phase_heading("Initializing restart")
62
+ verify_config!
63
+ deployments = identify_target_deployments(deployments_names, selector: selector)
64
+
65
+ @logger.phase_heading("Triggering restart by touching ENV[RESTARTED_AT]")
66
+ patch_kubeclient_deployments(deployments)
67
+
68
+ if verify_result
69
+ @logger.phase_heading("Waiting for rollout")
70
+ resources = build_watchables(deployments, start)
71
+ verify_restart(resources)
72
+ else
73
+ warning = "Result verification is disabled for this task"
74
+ @logger.summary.add_paragraph(ColorizedString.new(warning).yellow)
75
+ end
76
+ StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('success', deployments))
77
+ @logger.print_summary(:success)
78
+ rescue DeploymentTimeoutError
79
+ StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('timeout', deployments))
80
+ @logger.print_summary(:timed_out)
81
+ raise
82
+ rescue FatalDeploymentError => error
83
+ StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('failure', deployments))
84
+ @logger.summary.add_action(error.message) if error.message != error.class.to_s
85
+ @logger.print_summary(:failure)
86
+ raise
87
+ end
88
+ alias_method :perform!, :run!
89
+
90
+ private
91
+
92
+ def tags(status, deployments)
93
+ %W(namespace:#{@namespace} context:#{@context} status:#{status} deployments:#{deployments.to_a.length}})
94
+ end
95
+
96
+ def identify_target_deployments(deployment_names, selector: nil)
97
+ if deployment_names.nil?
98
+ deployments = if selector.nil?
99
+ @logger.info("Configured to restart all deployments with the `#{ANNOTATION}` annotation")
100
+ apps_v1_kubeclient.get_deployments(namespace: @namespace)
101
+ else
102
+ selector_string = selector.to_s
103
+ @logger.info(
104
+ "Configured to restart all deployments with the `#{ANNOTATION}` annotation and #{selector_string} selector"
105
+ )
106
+ apps_v1_kubeclient.get_deployments(namespace: @namespace, label_selector: selector_string)
107
+ end
108
+ deployments.select! { |d| d.metadata.annotations[ANNOTATION] }
109
+
110
+ if deployments.none?
111
+ raise FatalRestartError, "no deployments with the `#{ANNOTATION}` annotation found in namespace #{@namespace}"
112
+ end
113
+ elsif deployment_names.empty?
114
+ raise FatalRestartError, "Configured to restart deployments by name, but list of names was blank"
115
+ elsif !selector.nil?
116
+ raise FatalRestartError, "Can't specify deployment names and selector at the same time"
117
+ else
118
+ deployment_names = deployment_names.uniq
119
+ list = deployment_names.join(', ')
120
+ @logger.info("Configured to restart deployments by name: #{list}")
121
+
122
+ deployments = fetch_deployments(deployment_names)
123
+ if deployments.none?
124
+ raise FatalRestartError, "no deployments with names #{list} found in namespace #{@namespace}"
125
+ end
126
+ end
127
+ deployments
128
+ end
129
+
130
+ def build_watchables(kubeclient_resources, started)
131
+ kubeclient_resources.map do |d|
132
+ definition = d.to_h.deep_stringify_keys
133
+ r = Deployment.new(namespace: @namespace, context: @context, definition: definition, logger: @logger)
134
+ r.deploy_started_at = started # we don't care what happened to the resource before the restart cmd ran
135
+ r
136
+ end
137
+ end
138
+
139
+ def patch_deployment_with_restart(record)
140
+ apps_v1_kubeclient.patch_deployment(
141
+ record.metadata.name,
142
+ build_patch_payload(record),
143
+ @namespace
144
+ )
145
+ end
146
+
147
+ def patch_kubeclient_deployments(deployments)
148
+ deployments.each do |record|
149
+ begin
150
+ patch_deployment_with_restart(record)
151
+ @logger.info("Triggered `#{record.metadata.name}` restart")
152
+ rescue Kubeclient::HttpError => e
153
+ raise RestartAPIError.new(record.metadata.name, e.message)
154
+ end
155
+ end
156
+ end
157
+
158
+ def fetch_deployments(list)
159
+ list.map do |name|
160
+ record = nil
161
+ begin
162
+ record = apps_v1_kubeclient.get_deployment(name, @namespace)
163
+ rescue Kubeclient::ResourceNotFoundError
164
+ raise FatalRestartError, "Deployment `#{name}` not found in namespace `#{@namespace}`"
165
+ end
166
+ record
167
+ end
168
+ end
169
+
170
+ def build_patch_payload(deployment)
171
+ containers = deployment.spec.template.spec.containers
172
+ {
173
+ spec: {
174
+ template: {
175
+ spec: {
176
+ containers: containers.map do |container|
177
+ {
178
+ name: container.name,
179
+ env: [{ name: "RESTARTED_AT", value: Time.now.to_i.to_s }],
180
+ }
181
+ end,
182
+ },
183
+ },
184
+ },
185
+ }
186
+ end
187
+
188
+ def verify_restart(resources)
189
+ ResourceWatcher.new(resources: resources, operation_name: "restart",
190
+ timeout: @max_watch_seconds, task_config: @task_config).run
191
+ failed_resources = resources.reject(&:deploy_succeeded?)
192
+ success = failed_resources.empty?
193
+ if !success && failed_resources.all?(&:deploy_timed_out?)
194
+ raise DeploymentTimeoutError
195
+ end
196
+ raise FatalDeploymentError unless success
197
+ end
198
+
199
+ def verify_config!
200
+ task_config_validator = TaskConfigValidator.new(@task_config, kubectl, kubeclient_builder)
201
+ unless task_config_validator.valid?
202
+ @logger.summary.add_action("Configuration invalid")
203
+ @logger.summary.add_paragraph(task_config_validator.errors.map { |err| "- #{err}" }.join("\n"))
204
+ raise Krane::TaskConfigurationError
205
+ end
206
+ end
207
+
208
+ def apps_v1_kubeclient
209
+ @apps_v1_kubeclient ||= kubeclient_builder.build_apps_v1_kubeclient(@context)
210
+ end
211
+
212
+ def kubeclient
213
+ @kubeclient ||= kubeclient_builder.build_v1_kubeclient(@context)
214
+ end
215
+
216
+ def kubectl
217
+ @kubectl ||= Kubectl.new(task_config: @task_config, log_failure_by_default: true)
218
+ end
219
+
220
+ def v1beta1_kubeclient
221
+ @v1beta1_kubeclient ||= kubeclient_builder.build_v1beta1_kubeclient(@context)
222
+ end
223
+
224
+ def kubeclient_builder
225
+ @kubeclient_builder ||= KubeclientBuilder.new
226
+ end
227
+ end
228
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
- module KubernetesDeploy
2
+ module Krane
3
3
  class RolloutConditionsError < StandardError
4
4
  end
5
5
 
@@ -0,0 +1,212 @@
1
+ # frozen_string_literal: true
2
+ require 'tempfile'
3
+
4
+ require 'krane/common'
5
+ require 'krane/kubeclient_builder'
6
+ require 'krane/kubectl'
7
+ require 'krane/resource_cache'
8
+ require 'krane/resource_watcher'
9
+ require 'krane/kubernetes_resource'
10
+ require 'krane/kubernetes_resource/pod'
11
+ require 'krane/runner_task_config_validator'
12
+
13
+ module Krane
14
+ # Run a pod that exits upon completing a task
15
+ class RunnerTask
16
+ class TaskTemplateMissingError < TaskConfigurationError; end
17
+
18
+ attr_reader :pod_name
19
+
20
+ # Initializes the runner task
21
+ #
22
+ # @param namespace [String] Kubernetes namespace
23
+ # @param context [String] Kubernetes context / cluster
24
+ # @param logger [Object] Logger object (defaults to an instance of Krane::FormattedLogger)
25
+ # @param max_watch_seconds [Integer] Timeout in seconds
26
+ def initialize(namespace:, context:, logger: nil, max_watch_seconds: nil)
27
+ @logger = logger || Krane::FormattedLogger.build(namespace, context)
28
+ @task_config = Krane::TaskConfig.new(context, namespace, @logger)
29
+ @namespace = namespace
30
+ @context = context
31
+ @max_watch_seconds = max_watch_seconds
32
+ end
33
+
34
+ # Runs the task, returning a boolean representing success or failure
35
+ #
36
+ # @return [Boolean]
37
+ def run(*args)
38
+ run!(*args)
39
+ true
40
+ rescue DeploymentTimeoutError, FatalDeploymentError
41
+ false
42
+ end
43
+
44
+ # Runs the task, raising exceptions in case of issues
45
+ #
46
+ # @param task_template [String] The template file you'll be rendering
47
+ # @param entrypoint [Array<String>] Override the default command in the container image
48
+ # @param args [Array<String>] Override the default arguments for the command
49
+ # @param env_vars [Array<String>] List of env vars
50
+ # @param verify_result [Boolean] Wait for completion and verify pod success
51
+ #
52
+ # @return [nil]
53
+ def run!(task_template:, entrypoint:, args:, env_vars: [], verify_result: true)
54
+ start = Time.now.utc
55
+ @logger.reset
56
+
57
+ @logger.phase_heading("Initializing task")
58
+
59
+ @logger.info("Validating configuration")
60
+ verify_config!(task_template, args)
61
+ @logger.info("Using namespace '#{@namespace}' in context '#{@context}'")
62
+
63
+ pod = build_pod(task_template, entrypoint, args, env_vars, verify_result)
64
+ validate_pod(pod)
65
+
66
+ @logger.phase_heading("Running pod")
67
+ create_pod(pod)
68
+
69
+ if verify_result
70
+ @logger.phase_heading("Streaming logs")
71
+ watch_pod(pod)
72
+ else
73
+ record_status_once(pod)
74
+ end
75
+ StatsD.client.distribution('task_runner.duration', StatsD.duration(start), tags: statsd_tags('success'))
76
+ @logger.print_summary(:success)
77
+ rescue DeploymentTimeoutError
78
+ StatsD.client.distribution('task_runner.duration', StatsD.duration(start), tags: statsd_tags('timeout'))
79
+ @logger.print_summary(:timed_out)
80
+ raise
81
+ rescue FatalDeploymentError
82
+ StatsD.client.distribution('task_runner.duration', StatsD.duration(start), tags: statsd_tags('failure'))
83
+ @logger.print_summary(:failure)
84
+ raise
85
+ end
86
+
87
+ private
88
+
89
+ def create_pod(pod)
90
+ @logger.info("Creating pod '#{pod.name}'")
91
+ pod.deploy_started_at = Time.now.utc
92
+ kubeclient.create_pod(pod.to_kubeclient_resource)
93
+ @pod_name = pod.name
94
+ @logger.info("Pod creation succeeded")
95
+ rescue Kubeclient::HttpError => e
96
+ msg = "Failed to create pod: #{e.class.name}: #{e.message}"
97
+ @logger.summary.add_paragraph(msg)
98
+ raise FatalDeploymentError, msg
99
+ end
100
+
101
+ def build_pod(template_name, entrypoint, args, env_vars, verify_result)
102
+ task_template = get_template(template_name)
103
+ @logger.info("Using template '#{template_name}'")
104
+ pod_template = build_pod_definition(task_template)
105
+ set_container_overrides!(pod_template, entrypoint, args, env_vars)
106
+ ensure_valid_restart_policy!(pod_template, verify_result)
107
+ Pod.new(namespace: @namespace, context: @context, logger: @logger, stream_logs: true,
108
+ definition: pod_template.to_hash.deep_stringify_keys, statsd_tags: [])
109
+ end
110
+
111
+ def validate_pod(pod)
112
+ pod.validate_definition(kubectl)
113
+ end
114
+
115
+ def watch_pod(pod)
116
+ rw = ResourceWatcher.new(resources: [pod], timeout: @max_watch_seconds,
117
+ operation_name: "run", task_config: @task_config)
118
+ rw.run(delay_sync: 1, reminder_interval: 30.seconds)
119
+ raise DeploymentTimeoutError if pod.deploy_timed_out?
120
+ raise FatalDeploymentError if pod.deploy_failed?
121
+ end
122
+
123
+ def record_status_once(pod)
124
+ cache = ResourceCache.new(@task_config)
125
+ pod.sync(cache)
126
+ warning = <<~STRING
127
+ #{ColorizedString.new('Result verification is disabled for this task.').yellow}
128
+ The following status was observed immediately after pod creation:
129
+ #{pod.pretty_status}
130
+ STRING
131
+ @logger.summary.add_paragraph(warning)
132
+ end
133
+
134
+ def verify_config!(task_template, args)
135
+ task_config_validator = RunnerTaskConfigValidator.new(task_template, args, @task_config, kubectl,
136
+ kubeclient_builder)
137
+ unless task_config_validator.valid?
138
+ @logger.summary.add_action("Configuration invalid")
139
+ @logger.summary.add_paragraph([task_config_validator.errors].map { |err| "- #{err}" }.join("\n"))
140
+ raise Krane::TaskConfigurationError
141
+ end
142
+ end
143
+
144
+ def get_template(template_name)
145
+ pod_template = kubeclient.get_pod_template(template_name, @namespace)
146
+ pod_template.template
147
+ rescue Kubeclient::ResourceNotFoundError
148
+ msg = "Pod template `#{template_name}` not found in namespace `#{@namespace}`, context `#{@context}`"
149
+ @logger.summary.add_paragraph(msg)
150
+ raise TaskTemplateMissingError, msg
151
+ rescue Kubeclient::HttpError => error
152
+ raise FatalKubeAPIError, "Error retrieving pod template: #{error.class.name}: #{error.message}"
153
+ end
154
+
155
+ def build_pod_definition(base_template)
156
+ pod_definition = base_template.dup
157
+ pod_definition.kind = 'Pod'
158
+ pod_definition.apiVersion = 'v1'
159
+ pod_definition.metadata.namespace = @namespace
160
+
161
+ unique_name = pod_definition.metadata.name + "-" + SecureRandom.hex(8)
162
+ @logger.warn("Name is too long, using '#{unique_name[0..62]}'") if unique_name.length > 63
163
+ pod_definition.metadata.name = unique_name[0..62]
164
+
165
+ pod_definition
166
+ end
167
+
168
+ def set_container_overrides!(pod_definition, entrypoint, args, env_vars)
169
+ container = pod_definition.spec.containers.find { |cont| cont.name == 'task-runner' }
170
+ if container.nil?
171
+ message = "Pod spec does not contain a template container called 'task-runner'"
172
+ @logger.summary.add_paragraph(message)
173
+ raise TaskConfigurationError, message
174
+ end
175
+
176
+ container.command = entrypoint if entrypoint
177
+ container.args = args if args
178
+
179
+ env_args = env_vars.map do |env|
180
+ key, value = env.split('=', 2)
181
+ { name: key, value: value }
182
+ end
183
+ container.env ||= []
184
+ container.env = container.env.map(&:to_h) + env_args
185
+ end
186
+
187
+ def ensure_valid_restart_policy!(template, verify)
188
+ restart_policy = template.spec.restartPolicy
189
+ if verify && restart_policy != "Never"
190
+ @logger.warn("Changed Pod RestartPolicy from '#{restart_policy}' to 'Never'. Disable "\
191
+ "result verification to use '#{restart_policy}'.")
192
+ template.spec.restartPolicy = "Never"
193
+ end
194
+ end
195
+
196
+ def kubectl
197
+ @kubectl ||= Kubectl.new(task_config: @task_config, log_failure_by_default: true)
198
+ end
199
+
200
+ def kubeclient
201
+ @kubeclient ||= kubeclient_builder.build_v1_kubeclient(@context)
202
+ end
203
+
204
+ def kubeclient_builder
205
+ @kubeclient_builder ||= KubeclientBuilder.new
206
+ end
207
+
208
+ def statsd_tags(status)
209
+ %W(namespace:#{@namespace} context:#{@context} status:#{status})
210
+ end
211
+ end
212
+ end