kubernetes-deploy 0.30.0 → 0.31.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -1
  3. data/CHANGELOG.md +15 -0
  4. data/CONTRIBUTING.md +2 -2
  5. data/README.md +1 -1
  6. data/dev.yml +1 -1
  7. data/dev/flamegraph-from-tests +1 -1
  8. data/exe/kubernetes-deploy +11 -9
  9. data/exe/kubernetes-render +9 -7
  10. data/exe/kubernetes-restart +3 -3
  11. data/exe/kubernetes-run +1 -1
  12. data/kubernetes-deploy.gemspec +3 -3
  13. data/lib/krane.rb +5 -3
  14. data/lib/{kubernetes-deploy → krane}/bindings_parser.rb +1 -1
  15. data/lib/krane/cli/deploy_command.rb +14 -11
  16. data/lib/krane/cli/global_deploy_command.rb +47 -0
  17. data/lib/krane/cli/krane.rb +12 -3
  18. data/lib/krane/cli/render_command.rb +11 -9
  19. data/lib/krane/cli/restart_command.rb +4 -4
  20. data/lib/krane/cli/run_command.rb +3 -3
  21. data/lib/krane/cli/version_command.rb +1 -1
  22. data/lib/krane/cluster_resource_discovery.rb +102 -0
  23. data/lib/{kubernetes-deploy → krane}/common.rb +8 -9
  24. data/lib/krane/concerns/template_reporting.rb +29 -0
  25. data/lib/{kubernetes-deploy → krane}/concurrency.rb +1 -1
  26. data/lib/{kubernetes-deploy → krane}/container_logs.rb +1 -1
  27. data/lib/{kubernetes-deploy → krane}/deferred_summary_logging.rb +2 -2
  28. data/lib/{kubernetes-deploy → krane}/delayed_exceptions.rb +0 -0
  29. data/lib/krane/deploy_task.rb +2 -2
  30. data/lib/{kubernetes-deploy → krane}/deploy_task_config_validator.rb +1 -1
  31. data/lib/krane/deprecated_deploy_task.rb +404 -0
  32. data/lib/{kubernetes-deploy → krane}/duration_parser.rb +1 -1
  33. data/lib/{kubernetes-deploy → krane}/ejson_secret_provisioner.rb +3 -3
  34. data/lib/krane/errors.rb +28 -0
  35. data/lib/{kubernetes-deploy → krane}/formatted_logger.rb +2 -2
  36. data/lib/krane/global_deploy_task.rb +210 -0
  37. data/lib/krane/global_deploy_task_config_validator.rb +12 -0
  38. data/lib/{kubernetes-deploy → krane}/kubeclient_builder.rb +11 -3
  39. data/lib/{kubernetes-deploy → krane}/kubectl.rb +2 -2
  40. data/lib/{kubernetes-deploy → krane}/kubernetes_resource.rb +54 -22
  41. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/cloudsql.rb +1 -1
  42. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/config_map.rb +1 -1
  43. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/cron_job.rb +1 -1
  44. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/custom_resource.rb +2 -2
  45. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/custom_resource_definition.rb +1 -5
  46. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/daemon_set.rb +7 -4
  47. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/deployment.rb +2 -2
  48. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/horizontal_pod_autoscaler.rb +1 -1
  49. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/ingress.rb +1 -1
  50. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/job.rb +1 -1
  51. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/network_policy.rb +1 -1
  52. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/persistent_volume_claim.rb +1 -1
  53. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/pod.rb +2 -2
  54. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/pod_disruption_budget.rb +2 -2
  55. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/pod_set_base.rb +3 -3
  56. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/pod_template.rb +1 -1
  57. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/replica_set.rb +2 -2
  58. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/resource_quota.rb +1 -1
  59. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/role.rb +1 -1
  60. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/role_binding.rb +1 -1
  61. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/secret.rb +1 -1
  62. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/service.rb +2 -2
  63. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/service_account.rb +1 -1
  64. data/lib/{kubernetes-deploy → krane}/kubernetes_resource/stateful_set.rb +2 -2
  65. data/lib/{kubernetes-deploy → krane}/label_selector.rb +1 -1
  66. data/lib/{kubernetes-deploy → krane}/oj.rb +0 -0
  67. data/lib/{kubernetes-deploy → krane}/options_helper.rb +2 -2
  68. data/lib/{kubernetes-deploy → krane}/remote_logs.rb +2 -2
  69. data/lib/krane/render_task.rb +149 -0
  70. data/lib/{kubernetes-deploy → krane}/renderer.rb +1 -1
  71. data/lib/{kubernetes-deploy → krane}/resource_cache.rb +4 -3
  72. data/lib/krane/resource_deployer.rb +265 -0
  73. data/lib/{kubernetes-deploy → krane}/resource_watcher.rb +6 -6
  74. data/lib/krane/restart_task.rb +224 -0
  75. data/lib/{kubernetes-deploy → krane}/rollout_conditions.rb +1 -1
  76. data/lib/krane/runner_task.rb +212 -0
  77. data/lib/{kubernetes-deploy → krane}/runner_task_config_validator.rb +1 -1
  78. data/lib/{kubernetes-deploy → krane}/statsd.rb +13 -27
  79. data/lib/krane/task_config.rb +19 -0
  80. data/lib/{kubernetes-deploy → krane}/task_config_validator.rb +1 -1
  81. data/lib/{kubernetes-deploy → krane}/template_sets.rb +5 -5
  82. data/lib/krane/version.rb +4 -0
  83. data/lib/kubernetes-deploy/deploy_task.rb +6 -603
  84. data/lib/kubernetes-deploy/errors.rb +1 -26
  85. data/lib/kubernetes-deploy/render_task.rb +5 -139
  86. data/lib/kubernetes-deploy/rescue_krane_exceptions.rb +18 -0
  87. data/lib/kubernetes-deploy/restart_task.rb +6 -215
  88. data/lib/kubernetes-deploy/runner_task.rb +6 -203
  89. metadata +75 -58
  90. data/lib/kubernetes-deploy/cluster_resource_discovery.rb +0 -57
  91. data/lib/kubernetes-deploy/task_config.rb +0 -16
  92. data/lib/kubernetes-deploy/version.rb +0 -4
@@ -5,7 +5,7 @@ require 'securerandom'
5
5
  require 'yaml'
6
6
  require 'json'
7
7
 
8
- module KubernetesDeploy
8
+ module Krane
9
9
  class Renderer
10
10
  class InvalidPartialError < InvalidTemplateError
11
11
  attr_accessor :parents, :content, :filename
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'concurrent/hash'
4
4
 
5
- module KubernetesDeploy
5
+ module Krane
6
6
  class ResourceCache
7
7
  delegate :namespace, :context, :logger, to: :@task_config
8
8
 
@@ -17,7 +17,7 @@ module KubernetesDeploy
17
17
  def get_instance(kind, resource_name, raise_if_not_found: false)
18
18
  instance = use_or_populate_cache(kind).fetch(resource_name, {})
19
19
  if instance.blank? && raise_if_not_found
20
- raise KubernetesDeploy::Kubectl::ResourceNotFoundError, "Resource does not exist (used cache for kind #{kind})"
20
+ raise Krane::Kubectl::ResourceNotFoundError, "Resource does not exist (used cache for kind #{kind})"
21
21
  end
22
22
  instance
23
23
  rescue KubectlError
@@ -51,9 +51,10 @@ module KubernetesDeploy
51
51
 
52
52
  def fetch_by_kind(kind)
53
53
  resource_class = KubernetesResource.class_for_kind(kind)
54
+ global_kind = @task_config.global_kinds.map(&:downcase).include?(kind.downcase)
54
55
  output_is_sensitive = resource_class.nil? ? false : resource_class::SENSITIVE_TEMPLATE_CONTENT
55
56
  raw_json, _, st = @kubectl.run("get", kind, "--chunk-size=0", attempts: 5, output: "json",
56
- output_is_sensitive: output_is_sensitive)
57
+ output_is_sensitive: output_is_sensitive, use_namespace: !global_kind)
57
58
  raise KubectlError unless st.success?
58
59
 
59
60
  instances = {}
@@ -0,0 +1,265 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'krane/resource_watcher'
4
+ require 'krane/concerns/template_reporting'
5
+
6
+ module Krane
7
+ class ResourceDeployer
8
+ extend Krane::StatsD::MeasureMethods
9
+ include Krane::TemplateReporting
10
+
11
+ delegate :logger, to: :@task_config
12
+ attr_reader :statsd_tags
13
+
14
+ def initialize(task_config:, prune_whitelist:, max_watch_seconds:, current_sha: nil, selector:, statsd_tags:)
15
+ @task_config = task_config
16
+ @prune_whitelist = prune_whitelist
17
+ @max_watch_seconds = max_watch_seconds
18
+ @current_sha = current_sha
19
+ @selector = selector
20
+ @statsd_tags = statsd_tags
21
+ end
22
+
23
+ def deploy!(resources, verify_result, prune)
24
+ if verify_result
25
+ deploy_all_resources(resources, prune: prune, verify: true)
26
+ failed_resources = resources.reject(&:deploy_succeeded?)
27
+ success = failed_resources.empty?
28
+ if !success && failed_resources.all?(&:deploy_timed_out?)
29
+ raise DeploymentTimeoutError
30
+ end
31
+ raise FatalDeploymentError unless success
32
+ else
33
+ deploy_all_resources(resources, prune: prune, verify: false)
34
+ logger.summary.add_action("deployed #{resources.length} #{'resource'.pluralize(resources.length)}")
35
+ warning = <<~MSG
36
+ Deploy result verification is disabled for this deploy.
37
+ This means the desired changes were communicated to Kubernetes, but the deploy did not make sure they actually succeeded.
38
+ MSG
39
+ logger.summary.add_paragraph(ColorizedString.new(warning).yellow)
40
+ end
41
+ end
42
+
43
+ def predeploy_priority_resources(resource_list, predeploy_sequence)
44
+ bare_pods = resource_list.select { |resource| resource.is_a?(Pod) }
45
+ if bare_pods.count == 1
46
+ bare_pods.first.stream_logs = true
47
+ end
48
+
49
+ predeploy_sequence.each do |resource_type|
50
+ matching_resources = resource_list.select { |r| r.type == resource_type }
51
+ next if matching_resources.empty?
52
+ deploy_resources(matching_resources, verify: true, record_summary: false)
53
+
54
+ failed_resources = matching_resources.reject(&:deploy_succeeded?)
55
+ fail_count = failed_resources.length
56
+ if fail_count > 0
57
+ Krane::Concurrency.split_across_threads(failed_resources) do |r|
58
+ r.sync_debug_info(kubectl)
59
+ end
60
+ failed_resources.each { |r| logger.summary.add_paragraph(r.debug_message) }
61
+ raise FatalDeploymentError, "Failed to deploy #{fail_count} priority #{'resource'.pluralize(fail_count)}"
62
+ end
63
+ logger.blank_line
64
+ end
65
+ end
66
+ measure_method(:predeploy_priority_resources, 'priority_resources.duration')
67
+
68
+ private
69
+
70
+ def deploy_all_resources(resources, prune: false, verify:, record_summary: true)
71
+ deploy_resources(resources, prune: prune, verify: verify, record_summary: record_summary)
72
+ end
73
+ measure_method(:deploy_all_resources, 'normal_resources.duration')
74
+
75
+ def deploy_resources(resources, prune: false, verify:, record_summary: true)
76
+ return if resources.empty?
77
+ deploy_started_at = Time.now.utc
78
+
79
+ if resources.length > 1
80
+ logger.info("Deploying resources:")
81
+ resources.each do |r|
82
+ logger.info("- #{r.id} (#{r.pretty_timeout_type})")
83
+ end
84
+ else
85
+ resource = resources.first
86
+ logger.info("Deploying #{resource.id} (#{resource.pretty_timeout_type})")
87
+ end
88
+
89
+ # Apply can be done in one large batch, the rest have to be done individually
90
+ applyables, individuals = resources.partition { |r| r.deploy_method == :apply }
91
+ # Prunable resources should also applied so that they can be pruned
92
+ pruneable_types = @prune_whitelist.map { |t| t.split("/").last }
93
+ applyables += individuals.select { |r| pruneable_types.include?(r.type) }
94
+
95
+ individuals.each do |individual_resource|
96
+ individual_resource.deploy_started_at = Time.now.utc
97
+
98
+ case individual_resource.deploy_method
99
+ when :create
100
+ err, status = create_resource(individual_resource)
101
+ when :replace
102
+ err, status = replace_or_create_resource(individual_resource)
103
+ when :replace_force
104
+ err, status = replace_or_create_resource(individual_resource, force: true)
105
+ else
106
+ # Fail Fast! This is a programmer mistake.
107
+ raise ArgumentError, "Unexpected deploy method! (#{individual_resource.deploy_method.inspect})"
108
+ end
109
+
110
+ next if status.success?
111
+
112
+ raise FatalDeploymentError, <<~MSG
113
+ Failed to replace or create resource: #{individual_resource.id}
114
+ #{individual_resource.sensitive_template_content? ? '<suppressed sensitive output>' : err}
115
+ MSG
116
+ end
117
+
118
+ apply_all(applyables, prune)
119
+
120
+ if verify
121
+ watcher = Krane::ResourceWatcher.new(resources: resources, deploy_started_at: deploy_started_at,
122
+ timeout: @max_watch_seconds, task_config: @task_config, sha: @current_sha)
123
+ watcher.run(record_summary: record_summary)
124
+ end
125
+ end
126
+
127
+ def apply_all(resources, prune)
128
+ return unless resources.present?
129
+ command = %w(apply)
130
+
131
+ Dir.mktmpdir do |tmp_dir|
132
+ resources.each do |r|
133
+ FileUtils.symlink(r.file_path, tmp_dir)
134
+ r.deploy_started_at = Time.now.utc
135
+ end
136
+ command.push("-f", tmp_dir)
137
+
138
+ if prune && @prune_whitelist.present?
139
+ command.push("--prune")
140
+ if @selector
141
+ command.push("--selector", @selector.to_s)
142
+ else
143
+ command.push("--all")
144
+ end
145
+ @prune_whitelist.each { |type| command.push("--prune-whitelist=#{type}") }
146
+ end
147
+
148
+ output_is_sensitive = resources.any?(&:sensitive_template_content?)
149
+ global_mode = resources.all?(&:global?)
150
+ out, err, st = kubectl.run(*command, log_failure: false, output_is_sensitive: output_is_sensitive,
151
+ use_namespace: !global_mode)
152
+
153
+ if st.success?
154
+ log_pruning(out) if prune
155
+ else
156
+ record_apply_failure(err, resources: resources)
157
+ raise FatalDeploymentError, "Command failed: #{Shellwords.join(command)}"
158
+ end
159
+ end
160
+ end
161
+ measure_method(:apply_all)
162
+
163
+ def log_pruning(kubectl_output)
164
+ pruned = kubectl_output.scan(/^(.*) pruned$/)
165
+ return unless pruned.present?
166
+
167
+ logger.info("The following resources were pruned: #{pruned.join(', ')}")
168
+ logger.summary.add_action("pruned #{pruned.length} #{'resource'.pluralize(pruned.length)}")
169
+ end
170
+
171
+ def record_apply_failure(err, resources: [])
172
+ warn_msg = "WARNING: Any resources not mentioned in the error(s) below were likely created/updated. " \
173
+ "You may wish to roll back this deploy."
174
+ logger.summary.add_paragraph(ColorizedString.new(warn_msg).yellow)
175
+
176
+ unidentified_errors = []
177
+ filenames_with_sensitive_content = resources
178
+ .select(&:sensitive_template_content?)
179
+ .map { |r| File.basename(r.file_path) }
180
+
181
+ server_dry_run_validated_resource = resources
182
+ .select(&:server_dry_run_validated?)
183
+ .map { |r| File.basename(r.file_path) }
184
+
185
+ err.each_line do |line|
186
+ bad_files = find_bad_files_from_kubectl_output(line)
187
+ unless bad_files.present?
188
+ unidentified_errors << line
189
+ next
190
+ end
191
+
192
+ bad_files.each do |f|
193
+ err_msg = f[:err]
194
+ if filenames_with_sensitive_content.include?(f[:filename])
195
+ # Hide the error and template contents in case it has sensitive information
196
+ # we display full error messages as we assume there's no sensitive info leak after server-dry-run
197
+ err_msg = "SUPPRESSED FOR SECURITY" unless server_dry_run_validated_resource.include?(f[:filename])
198
+ record_invalid_template(logger: logger, err: err_msg, filename: f[:filename], content: nil)
199
+ else
200
+ record_invalid_template(logger: logger, err: err_msg, filename: f[:filename], content: f[:content])
201
+ end
202
+ end
203
+ end
204
+ return unless unidentified_errors.any?
205
+
206
+ if (filenames_with_sensitive_content - server_dry_run_validated_resource).present?
207
+ warn_msg = "WARNING: There was an error applying some or all resources. The raw output may be sensitive and " \
208
+ "so cannot be displayed."
209
+ logger.summary.add_paragraph(ColorizedString.new(warn_msg).yellow)
210
+ else
211
+ heading = ColorizedString.new('Unidentified error(s):').red
212
+ msg = FormattedLogger.indent_four(unidentified_errors.join)
213
+ logger.summary.add_paragraph("#{heading}\n#{msg}")
214
+ end
215
+ end
216
+
217
+ def replace_or_create_resource(resource, force: false)
218
+ args = if force
219
+ ["replace", "--force", "--cascade", "-f", resource.file_path]
220
+ else
221
+ ["replace", "-f", resource.file_path]
222
+ end
223
+
224
+ _, err, status = kubectl.run(*args, log_failure: false, output_is_sensitive: resource.sensitive_template_content?,
225
+ raise_if_not_found: true, use_namespace: !resource.global?)
226
+
227
+ [err, status]
228
+ rescue Krane::Kubectl::ResourceNotFoundError
229
+ # it doesn't exist so we can't replace it, we try to create it
230
+ create_resource(resource)
231
+ end
232
+
233
+ def create_resource(resource)
234
+ out, err, status = kubectl.run("create", "-f", resource.file_path, log_failure: false,
235
+ output: 'json', output_is_sensitive: resource.sensitive_template_content?,
236
+ use_namespace: !resource.global?)
237
+
238
+ # For resources that rely on a generateName attribute, we get the `name` from the result of the call to `create`
239
+ # We must explicitly set this name value so that the `apply` step for pruning can run successfully
240
+ if status.success? && resource.uses_generate_name?
241
+ resource.use_generated_name(JSON.parse(out))
242
+ end
243
+
244
+ [err, status]
245
+ end
246
+
247
+ # Inspect the file referenced in the kubectl stderr
248
+ # to make it easier for developer to understand what's going on
249
+ def find_bad_files_from_kubectl_output(line)
250
+ # stderr often contains one or more lines like the following, from which we can extract the file path(s):
251
+ # Error from server (TypeOfError): error when creating "/path/to/service-gqq5oh.yml": Service "web" is invalid:
252
+
253
+ line.scan(%r{"(/\S+\.ya?ml\S*)"}).each_with_object([]) do |matches, bad_files|
254
+ matches.each do |path|
255
+ content = File.read(path) if File.file?(path)
256
+ bad_files << { filename: File.basename(path), err: line, content: content }
257
+ end
258
+ end
259
+ end
260
+
261
+ def kubectl
262
+ @kubectl ||= Kubectl.new(task_config: @task_config, log_failure_by_default: true)
263
+ end
264
+ end
265
+ end
@@ -1,11 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'kubernetes-deploy/concurrency'
4
- require 'kubernetes-deploy/resource_cache'
3
+ require 'krane/concurrency'
4
+ require 'krane/resource_cache'
5
5
 
6
- module KubernetesDeploy
6
+ module Krane
7
7
  class ResourceWatcher
8
- extend KubernetesDeploy::StatsD::MeasureMethods
8
+ extend Krane::StatsD::MeasureMethods
9
9
  delegate :namespace, :context, :logger, to: :@task_config
10
10
 
11
11
  def initialize(resources:, task_config:, deploy_started_at: Time.now.utc,
@@ -53,7 +53,7 @@ module KubernetesDeploy
53
53
 
54
54
  def sync_resources(resources)
55
55
  cache = ResourceCache.new(@task_config)
56
- KubernetesDeploy::Concurrency.split_across_threads(resources) { |r| r.sync(cache) }
56
+ Krane::Concurrency.split_across_threads(resources) { |r| r.sync(cache) }
57
57
  resources.each(&:after_sync)
58
58
  end
59
59
  measure_method(:sync_resources, "sync.duration")
@@ -141,7 +141,7 @@ module KubernetesDeploy
141
141
  end
142
142
 
143
143
  kubectl = Kubectl.new(task_config: @task_config, log_failure_by_default: false)
144
- KubernetesDeploy::Concurrency.split_across_threads(failed_resources + global_timeouts) do |r|
144
+ Krane::Concurrency.split_across_threads(failed_resources + global_timeouts) do |r|
145
145
  r.sync_debug_info(kubectl)
146
146
  end
147
147
 
@@ -0,0 +1,224 @@
1
+ # frozen_string_literal: true
2
+ require 'krane/common'
3
+ require 'krane/kubernetes_resource'
4
+ require 'krane/kubernetes_resource/deployment'
5
+ require 'krane/kubeclient_builder'
6
+ require 'krane/resource_watcher'
7
+ require 'krane/kubectl'
8
+
9
+ module Krane
10
+ # Restart the pods in one or more deployments
11
+ class RestartTask
12
+ class FatalRestartError < FatalDeploymentError; end
13
+
14
+ class RestartAPIError < FatalRestartError
15
+ def initialize(deployment_name, response)
16
+ super("Failed to restart #{deployment_name}. " \
17
+ "API returned non-200 response code (#{response.code})\n" \
18
+ "Response:\n#{response.body}")
19
+ end
20
+ end
21
+
22
+ HTTP_OK_RANGE = 200..299
23
+ ANNOTATION = "shipit.shopify.io/restart"
24
+
25
+ # Initializes the restart task
26
+ #
27
+ # @param context [String] Kubernetes context / cluster
28
+ # @param namespace [String] Kubernetes namespace
29
+ # @param logger [Object] Logger object (defaults to an instance of Krane::FormattedLogger)
30
+ # @param max_watch_seconds [Integer] Timeout in seconds
31
+ def initialize(context:, namespace:, logger: nil, max_watch_seconds: nil)
32
+ @logger = logger || Krane::FormattedLogger.build(namespace, context)
33
+ @task_config = Krane::TaskConfig.new(context, namespace, @logger)
34
+ @context = context
35
+ @namespace = namespace
36
+ @max_watch_seconds = max_watch_seconds
37
+ end
38
+
39
+ # Runs the task, returning a boolean representing success or failure
40
+ #
41
+ # @return [Boolean]
42
+ def run(*args)
43
+ perform!(*args)
44
+ true
45
+ rescue FatalDeploymentError
46
+ false
47
+ end
48
+ alias_method :perform, :run
49
+
50
+ # Runs the task, raising exceptions in case of issues
51
+ #
52
+ # @param deployments_names [Array<String>] Array of workload names to restart
53
+ # @param selector [Hash] Selector(s) parsed by Krane::LabelSelector
54
+ # @param verify_result [Boolean] Wait for completion and verify success
55
+ #
56
+ # @return [nil]
57
+ def run!(deployments_names = nil, selector: nil, verify_result: true)
58
+ start = Time.now.utc
59
+ @logger.reset
60
+
61
+ @logger.phase_heading("Initializing restart")
62
+ verify_config!
63
+ deployments = identify_target_deployments(deployments_names, selector: selector)
64
+
65
+ @logger.phase_heading("Triggering restart by touching ENV[RESTARTED_AT]")
66
+ patch_kubeclient_deployments(deployments)
67
+
68
+ if verify_result
69
+ @logger.phase_heading("Waiting for rollout")
70
+ resources = build_watchables(deployments, start)
71
+ verify_restart(resources)
72
+ else
73
+ warning = "Result verification is disabled for this task"
74
+ @logger.summary.add_paragraph(ColorizedString.new(warning).yellow)
75
+ end
76
+ StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('success', deployments))
77
+ @logger.print_summary(:success)
78
+ rescue DeploymentTimeoutError
79
+ StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('timeout', deployments))
80
+ @logger.print_summary(:timed_out)
81
+ raise
82
+ rescue FatalDeploymentError => error
83
+ StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('failure', deployments))
84
+ @logger.summary.add_action(error.message) if error.message != error.class.to_s
85
+ @logger.print_summary(:failure)
86
+ raise
87
+ end
88
+ alias_method :perform!, :run!
89
+
90
+ private
91
+
92
+ def tags(status, deployments)
93
+ %W(namespace:#{@namespace} context:#{@context} status:#{status} deployments:#{deployments.to_a.length}})
94
+ end
95
+
96
+ def identify_target_deployments(deployment_names, selector: nil)
97
+ if deployment_names.nil?
98
+ deployments = if selector.nil?
99
+ @logger.info("Configured to restart all deployments with the `#{ANNOTATION}` annotation")
100
+ v1beta1_kubeclient.get_deployments(namespace: @namespace)
101
+ else
102
+ selector_string = selector.to_s
103
+ @logger.info(
104
+ "Configured to restart all deployments with the `#{ANNOTATION}` annotation and #{selector_string} selector"
105
+ )
106
+ v1beta1_kubeclient.get_deployments(namespace: @namespace, label_selector: selector_string)
107
+ end
108
+ deployments.select! { |d| d.metadata.annotations[ANNOTATION] }
109
+
110
+ if deployments.none?
111
+ raise FatalRestartError, "no deployments with the `#{ANNOTATION}` annotation found in namespace #{@namespace}"
112
+ end
113
+ elsif deployment_names.empty?
114
+ raise FatalRestartError, "Configured to restart deployments by name, but list of names was blank"
115
+ elsif !selector.nil?
116
+ raise FatalRestartError, "Can't specify deployment names and selector at the same time"
117
+ else
118
+ deployment_names = deployment_names.uniq
119
+ list = deployment_names.join(', ')
120
+ @logger.info("Configured to restart deployments by name: #{list}")
121
+
122
+ deployments = fetch_deployments(deployment_names)
123
+ if deployments.none?
124
+ raise FatalRestartError, "no deployments with names #{list} found in namespace #{@namespace}"
125
+ end
126
+ end
127
+ deployments
128
+ end
129
+
130
+ def build_watchables(kubeclient_resources, started)
131
+ kubeclient_resources.map do |d|
132
+ definition = d.to_h.deep_stringify_keys
133
+ r = Deployment.new(namespace: @namespace, context: @context, definition: definition, logger: @logger)
134
+ r.deploy_started_at = started # we don't care what happened to the resource before the restart cmd ran
135
+ r
136
+ end
137
+ end
138
+
139
+ def patch_deployment_with_restart(record)
140
+ v1beta1_kubeclient.patch_deployment(
141
+ record.metadata.name,
142
+ build_patch_payload(record),
143
+ @namespace
144
+ )
145
+ end
146
+
147
+ def patch_kubeclient_deployments(deployments)
148
+ deployments.each do |record|
149
+ begin
150
+ patch_deployment_with_restart(record)
151
+ @logger.info("Triggered `#{record.metadata.name}` restart")
152
+ rescue Kubeclient::HttpError => e
153
+ raise RestartAPIError.new(record.metadata.name, e.message)
154
+ end
155
+ end
156
+ end
157
+
158
+ def fetch_deployments(list)
159
+ list.map do |name|
160
+ record = nil
161
+ begin
162
+ record = v1beta1_kubeclient.get_deployment(name, @namespace)
163
+ rescue Kubeclient::ResourceNotFoundError
164
+ raise FatalRestartError, "Deployment `#{name}` not found in namespace `#{@namespace}`"
165
+ end
166
+ record
167
+ end
168
+ end
169
+
170
+ def build_patch_payload(deployment)
171
+ containers = deployment.spec.template.spec.containers
172
+ {
173
+ spec: {
174
+ template: {
175
+ spec: {
176
+ containers: containers.map do |container|
177
+ {
178
+ name: container.name,
179
+ env: [{ name: "RESTARTED_AT", value: Time.now.to_i.to_s }],
180
+ }
181
+ end,
182
+ },
183
+ },
184
+ },
185
+ }
186
+ end
187
+
188
+ def verify_restart(resources)
189
+ ResourceWatcher.new(resources: resources, operation_name: "restart",
190
+ timeout: @max_watch_seconds, task_config: @task_config).run
191
+ failed_resources = resources.reject(&:deploy_succeeded?)
192
+ success = failed_resources.empty?
193
+ if !success && failed_resources.all?(&:deploy_timed_out?)
194
+ raise DeploymentTimeoutError
195
+ end
196
+ raise FatalDeploymentError unless success
197
+ end
198
+
199
+ def verify_config!
200
+ task_config_validator = TaskConfigValidator.new(@task_config, kubectl, kubeclient_builder)
201
+ unless task_config_validator.valid?
202
+ @logger.summary.add_action("Configuration invalid")
203
+ @logger.summary.add_paragraph(task_config_validator.errors.map { |err| "- #{err}" }.join("\n"))
204
+ raise Krane::TaskConfigurationError
205
+ end
206
+ end
207
+
208
+ def kubeclient
209
+ @kubeclient ||= kubeclient_builder.build_v1_kubeclient(@context)
210
+ end
211
+
212
+ def kubectl
213
+ @kubectl ||= Kubectl.new(task_config: @task_config, log_failure_by_default: true)
214
+ end
215
+
216
+ def v1beta1_kubeclient
217
+ @v1beta1_kubeclient ||= kubeclient_builder.build_v1beta1_kubeclient(@context)
218
+ end
219
+
220
+ def kubeclient_builder
221
+ @kubeclient_builder ||= KubeclientBuilder.new
222
+ end
223
+ end
224
+ end