kubernetes-deploy 0.22.0 → 0.23.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +8 -0
  3. data/CHANGELOG.md +16 -0
  4. data/README.md +32 -0
  5. data/exe/kubernetes-deploy +2 -15
  6. data/exe/kubernetes-render +32 -0
  7. data/kubernetes-deploy.gemspec +5 -3
  8. data/lib/kubernetes-deploy.rb +5 -3
  9. data/lib/kubernetes-deploy/cluster_resource_discovery.rb +34 -0
  10. data/lib/kubernetes-deploy/container_logs.rb +25 -13
  11. data/lib/kubernetes-deploy/deploy_task.rb +68 -50
  12. data/lib/kubernetes-deploy/errors.rb +1 -0
  13. data/lib/kubernetes-deploy/formatted_logger.rb +16 -2
  14. data/lib/kubernetes-deploy/kubeclient_builder/google_friendly_config.rb +4 -6
  15. data/lib/kubernetes-deploy/kubectl.rb +20 -9
  16. data/lib/kubernetes-deploy/kubernetes_resource.rb +5 -6
  17. data/lib/kubernetes-deploy/kubernetes_resource/cloudsql.rb +3 -4
  18. data/lib/kubernetes-deploy/kubernetes_resource/daemon_set.rb +4 -5
  19. data/lib/kubernetes-deploy/kubernetes_resource/deployment.rb +7 -8
  20. data/lib/kubernetes-deploy/kubernetes_resource/memcached.rb +4 -5
  21. data/lib/kubernetes-deploy/kubernetes_resource/pod.rb +7 -5
  22. data/lib/kubernetes-deploy/kubernetes_resource/pod_set_base.rb +12 -6
  23. data/lib/kubernetes-deploy/kubernetes_resource/redis.rb +5 -6
  24. data/lib/kubernetes-deploy/kubernetes_resource/replica_set.rb +23 -5
  25. data/lib/kubernetes-deploy/kubernetes_resource/role.rb +22 -0
  26. data/lib/kubernetes-deploy/kubernetes_resource/service.rb +8 -4
  27. data/lib/kubernetes-deploy/kubernetes_resource/stateful_set.rb +2 -3
  28. data/lib/kubernetes-deploy/oj.rb +4 -0
  29. data/lib/kubernetes-deploy/options_helper.rb +27 -0
  30. data/lib/kubernetes-deploy/remote_logs.rb +10 -4
  31. data/lib/kubernetes-deploy/render_task.rb +119 -0
  32. data/lib/kubernetes-deploy/renderer.rb +1 -1
  33. data/lib/kubernetes-deploy/resource_cache.rb +64 -0
  34. data/lib/kubernetes-deploy/resource_watcher.rb +27 -6
  35. data/lib/kubernetes-deploy/restart_task.rb +5 -6
  36. data/lib/kubernetes-deploy/runner_task.rb +6 -10
  37. data/lib/kubernetes-deploy/statsd.rb +60 -7
  38. data/lib/kubernetes-deploy/template_discovery.rb +15 -0
  39. data/lib/kubernetes-deploy/version.rb +1 -1
  40. data/pull_request_template.md +8 -0
  41. metadata +47 -5
  42. data/lib/kubernetes-deploy/resource_discovery.rb +0 -19
  43. data/lib/kubernetes-deploy/sync_mediator.rb +0 -80
@@ -3,6 +3,7 @@ module KubernetesDeploy
3
3
  class FatalDeploymentError < StandardError; end
4
4
  class FatalKubeAPIError < FatalDeploymentError; end
5
5
  class KubectlError < StandardError; end
6
+ class TaskConfigurationError < FatalDeploymentError; end
6
7
 
7
8
  class InvalidTemplateError < FatalDeploymentError
8
9
  attr_reader :content
@@ -6,12 +6,26 @@ module KubernetesDeploy
6
6
  class FormattedLogger < Logger
7
7
  include DeferredSummaryLogging
8
8
 
9
- def self.build(namespace, context, stream = $stderr, verbose_prefix: false)
9
+ def self.indent_four(str)
10
+ " " + str.to_s.gsub("\n", "\n ")
11
+ end
12
+
13
+ def self.build(namespace = nil, context = nil, stream = $stderr, verbose_prefix: false)
10
14
  l = new(stream)
11
15
  l.level = level_from_env
12
16
 
17
+ middle = if verbose_prefix
18
+ if namespace.blank?
19
+ raise ArgumentError, 'Must pass a namespace if logging verbosely'
20
+ end
21
+ if context.blank?
22
+ raise ArgumentError, 'Must pass a context if logging verbosely'
23
+ end
24
+
25
+ "[#{context}][#{namespace}]"
26
+ end
27
+
13
28
  l.formatter = proc do |severity, datetime, _progname, msg|
14
- middle = verbose_prefix ? "[#{context}][#{namespace}]" : ""
15
29
  colorized_line = ColorizedString.new("[#{severity}][#{datetime}]#{middle}\t#{msg}\n")
16
30
 
17
31
  case severity
@@ -32,12 +32,10 @@ module KubernetesDeploy
32
32
  private
33
33
 
34
34
  def json_error_message(body)
35
- json_error_msg = begin
36
- JSON.parse(body || '') || {}
37
- rescue JSON::ParserError
38
- {}
39
- end
40
- json_error_msg['message']
35
+ err = JSON.parse(body || '') || {}
36
+ err['message']
37
+ rescue JSON::ParserError
38
+ nil
41
39
  end
42
40
  end
43
41
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module KubernetesDeploy
4
4
  class Kubectl
5
- DEFAULT_TIMEOUT = 30
5
+ DEFAULT_TIMEOUT = 15
6
6
  NOT_FOUND_ERROR_TEXT = 'NotFound'
7
7
 
8
8
  class ResourceNotFoundError < StandardError; end
@@ -20,32 +20,43 @@ module KubernetesDeploy
20
20
  raise ArgumentError, "context is required" if context.blank?
21
21
  end
22
22
 
23
- def run(*args, log_failure: nil, use_context: true, use_namespace: true, raise_if_not_found: false)
23
+ def run(*args, log_failure: nil, use_context: true, use_namespace: true, raise_if_not_found: false, attempts: 1)
24
24
  log_failure = @log_failure_by_default if log_failure.nil?
25
25
 
26
26
  args = args.unshift("kubectl")
27
27
  args.push("--namespace=#{@namespace}") if use_namespace
28
28
  args.push("--context=#{@context}") if use_context
29
29
  args.push("--request-timeout=#{@default_timeout}") if @default_timeout
30
+ out, err, st = nil
30
31
 
31
- @logger.debug Shellwords.join(args)
32
- out, err, st = Open3.capture3(*args)
33
- @logger.debug(out.shellescape) unless output_is_sensitive?
32
+ (1..attempts).to_a.each do |attempt|
33
+ @logger.debug "Running command (attempt #{attempt}): #{args.join(' ')}"
34
+ out, err, st = Open3.capture3(*args)
35
+ @logger.debug("Kubectl out: " + out.gsub(/\s+/, ' ')) unless output_is_sensitive?
36
+
37
+ break if st.success?
34
38
 
35
- unless st.success?
36
39
  if log_failure
37
- @logger.warn("The following command failed: #{Shellwords.join(args)}")
40
+ @logger.warn("The following command failed (attempt #{attempt}/#{attempts}): #{Shellwords.join(args)}")
38
41
  @logger.warn(err) unless output_is_sensitive?
39
42
  end
40
43
 
41
- if raise_if_not_found && err.match(NOT_FOUND_ERROR_TEXT)
42
- raise ResourceNotFoundError, err
44
+ if err.match(NOT_FOUND_ERROR_TEXT)
45
+ raise(ResourceNotFoundError, err) if raise_if_not_found
46
+ else
47
+ @logger.debug("Kubectl err: #{err}") unless output_is_sensitive?
48
+ StatsD.increment('kubectl.error', 1, tags: { context: @context, namespace: @namespace, cmd: args[1] })
43
49
  end
50
+ sleep retry_delay(attempt) unless attempt == attempts
44
51
  end
45
52
 
46
53
  [out.chomp, err.chomp, st]
47
54
  end
48
55
 
56
+ def retry_delay(attempt)
57
+ attempt
58
+ end
59
+
49
60
  def version_info
50
61
  @version_info ||=
51
62
  begin
@@ -121,8 +121,8 @@ module KubernetesDeploy
121
121
  file.path
122
122
  end
123
123
 
124
- def sync(mediator)
125
- @instance_data = mediator.get_instance(kubectl_resource_type, name, raise_if_not_found: true)
124
+ def sync(cache)
125
+ @instance_data = cache.get_instance(kubectl_resource_type, name, raise_if_not_found: true)
126
126
  rescue KubernetesDeploy::Kubectl::ResourceNotFoundError
127
127
  @disappeared = true if deploy_started?
128
128
  @instance_data = {}
@@ -195,7 +195,7 @@ module KubernetesDeploy
195
195
 
196
196
  def sync_debug_info(kubectl)
197
197
  @debug_events = fetch_events(kubectl) unless ENV[DISABLE_FETCHING_EVENT_INFO]
198
- @debug_logs = fetch_debug_logs(kubectl) if print_debug_logs? && !ENV[DISABLE_FETCHING_LOG_INFO]
198
+ @debug_logs = fetch_debug_logs if print_debug_logs? && !ENV[DISABLE_FETCHING_LOG_INFO]
199
199
  end
200
200
 
201
201
  def debug_message(cause = nil, info_hash = {})
@@ -293,7 +293,7 @@ module KubernetesDeploy
293
293
 
294
294
  def report_status_to_statsd(watch_time)
295
295
  unless @statsd_report_done
296
- ::StatsD.measure('resource.duration', watch_time, tags: statsd_tags)
296
+ StatsD.distribution('resource.duration', watch_time, tags: statsd_tags)
297
297
  @statsd_report_done = true
298
298
  end
299
299
  end
@@ -407,8 +407,7 @@ module KubernetesDeploy
407
407
  else
408
408
  "unknown"
409
409
  end
410
- tags = %W(context:#{context} namespace:#{namespace} resource:#{id}
411
- type:#{type} sha:#{ENV['REVISION']} status:#{status})
410
+ tags = %W(context:#{context} namespace:#{namespace} type:#{type} status:#{status})
412
411
  tags | @optional_statsd_tags
413
412
  end
414
413
  end
@@ -3,11 +3,10 @@ module KubernetesDeploy
3
3
  class Cloudsql < KubernetesResource
4
4
  TIMEOUT = 10.minutes
5
5
 
6
- SYNC_DEPENDENCIES = %w(Deployment Service)
7
- def sync(mediator)
6
+ def sync(cache)
8
7
  super
9
- @proxy_deployment = mediator.get_instance(Deployment.kind, "cloudsql-#{cloudsql_resource_uuid}")
10
- @proxy_service = mediator.get_instance(Service.kind, "cloudsql-#{@name}")
8
+ @proxy_deployment = cache.get_instance(Deployment.kind, "cloudsql-#{cloudsql_resource_uuid}")
9
+ @proxy_service = cache.get_instance(Service.kind, "cloudsql-#{@name}")
11
10
  end
12
11
 
13
12
  def status
@@ -5,10 +5,9 @@ module KubernetesDeploy
5
5
  TIMEOUT = 5.minutes
6
6
  attr_reader :pods
7
7
 
8
- SYNC_DEPENDENCIES = %w(Pod)
9
- def sync(mediator)
8
+ def sync(cache)
10
9
  super
11
- @pods = exists? ? find_pods(mediator) : []
10
+ @pods = exists? ? find_pods(cache) : []
12
11
  end
13
12
 
14
13
  def status
@@ -28,9 +27,9 @@ module KubernetesDeploy
28
27
  observed_generation == current_generation
29
28
  end
30
29
 
31
- def fetch_debug_logs(kubectl)
30
+ def fetch_debug_logs
32
31
  most_useful_pod = pods.find(&:deploy_failed?) || pods.find(&:deploy_timed_out?) || pods.first
33
- most_useful_pod.fetch_debug_logs(kubectl)
32
+ most_useful_pod.fetch_debug_logs
34
33
  end
35
34
 
36
35
  def print_debug_logs?
@@ -6,10 +6,9 @@ module KubernetesDeploy
6
6
  REQUIRED_ROLLOUT_TYPES = %w(maxUnavailable full none).freeze
7
7
  DEFAULT_REQUIRED_ROLLOUT = 'full'
8
8
 
9
- SYNC_DEPENDENCIES = %w(Pod ReplicaSet)
10
- def sync(mediator)
9
+ def sync(cache)
11
10
  super
12
- @latest_rs = exists? ? find_latest_rs(mediator) : nil
11
+ @latest_rs = exists? ? find_latest_rs(cache) : nil
13
12
  end
14
13
 
15
14
  def status
@@ -27,8 +26,8 @@ module KubernetesDeploy
27
26
  @latest_rs.present?
28
27
  end
29
28
 
30
- def fetch_debug_logs(kubectl)
31
- @latest_rs.fetch_debug_logs(kubectl)
29
+ def fetch_debug_logs
30
+ @latest_rs.fetch_debug_logs
32
31
  end
33
32
 
34
33
  def deploy_succeeded?
@@ -151,8 +150,8 @@ module KubernetesDeploy
151
150
  progress_condition["status"] == 'False'
152
151
  end
153
152
 
154
- def find_latest_rs(mediator)
155
- all_rs_data = mediator.get_all(ReplicaSet.kind, @instance_data["spec"]["selector"]["matchLabels"])
153
+ def find_latest_rs(cache)
154
+ all_rs_data = cache.get_all(ReplicaSet.kind, @instance_data["spec"]["selector"]["matchLabels"])
156
155
  current_revision = @instance_data["metadata"]["annotations"]["deployment.kubernetes.io/revision"]
157
156
 
158
157
  latest_rs_data = all_rs_data.find do |rs|
@@ -170,7 +169,7 @@ module KubernetesDeploy
170
169
  parent: "#{@name.capitalize} deployment",
171
170
  deploy_started_at: @deploy_started_at
172
171
  )
173
- rs.sync(mediator)
172
+ rs.sync(cache)
174
173
  rs
175
174
  end
176
175
 
@@ -4,12 +4,11 @@ module KubernetesDeploy
4
4
  TIMEOUT = 5.minutes
5
5
  CONFIGMAP_NAME = "memcached-url"
6
6
 
7
- SYNC_DEPENDENCIES = %w(Deployment Service ConfigMap)
8
- def sync(mediator)
7
+ def sync(cache)
9
8
  super
10
- @deployment = mediator.get_instance(Deployment.kind, "memcached-#{@name}")
11
- @service = mediator.get_instance(Service.kind, "memcached-#{@name}")
12
- @configmap = mediator.get_instance(ConfigMap.kind, CONFIGMAP_NAME)
9
+ @deployment = cache.get_instance(Deployment.kind, "memcached-#{@name}")
10
+ @service = cache.get_instance(Service.kind, "memcached-#{@name}")
11
+ @configmap = cache.get_instance(ConfigMap.kind, CONFIGMAP_NAME)
13
12
  end
14
13
 
15
14
  def status
@@ -25,12 +25,12 @@ module KubernetesDeploy
25
25
  logger: logger, statsd_tags: statsd_tags)
26
26
  end
27
27
 
28
- def sync(mediator)
28
+ def sync(_cache)
29
29
  super
30
30
  raise_predates_deploy_error if exists? && unmanaged? && !deploy_started?
31
31
 
32
32
  if exists?
33
- logs.sync(mediator.kubectl) if unmanaged?
33
+ logs.sync if unmanaged?
34
34
  update_container_statuses(@instance_data["status"])
35
35
  else # reset
36
36
  @containers.each(&:reset_status)
@@ -85,8 +85,8 @@ module KubernetesDeploy
85
85
  "#{phase_failure_message} #{container_problems}".strip.presence
86
86
  end
87
87
 
88
- def fetch_debug_logs(kubectl)
89
- logs.sync(kubectl)
88
+ def fetch_debug_logs
89
+ logs.sync
90
90
  logs
91
91
  end
92
92
 
@@ -123,7 +123,9 @@ module KubernetesDeploy
123
123
  @logs ||= KubernetesDeploy::RemoteLogs.new(
124
124
  logger: @logger,
125
125
  parent_id: id,
126
- container_names: @containers.map(&:name)
126
+ container_names: @containers.map(&:name),
127
+ namespace: @namespace,
128
+ context: @context
127
129
  )
128
130
  end
129
131
 
@@ -16,9 +16,15 @@ module KubernetesDeploy
16
16
  own_events.merge(most_useful_pod.fetch_events(kubectl))
17
17
  end
18
18
 
19
- def fetch_debug_logs(kubectl)
20
- logs = KubernetesDeploy::RemoteLogs.new(logger: @logger, parent_id: id, container_names: container_names)
21
- logs.sync(kubectl)
19
+ def fetch_debug_logs
20
+ logs = KubernetesDeploy::RemoteLogs.new(
21
+ logger: @logger,
22
+ parent_id: id,
23
+ container_names: container_names,
24
+ namespace: @namespace,
25
+ context: @context
26
+ )
27
+ logs.sync
22
28
  logs
23
29
  end
24
30
 
@@ -42,8 +48,8 @@ module KubernetesDeploy
42
48
  regular_containers + init_containers
43
49
  end
44
50
 
45
- def find_pods(mediator)
46
- all_pods = mediator.get_all(Pod.kind, @instance_data["spec"]["selector"]["matchLabels"])
51
+ def find_pods(cache)
52
+ all_pods = cache.get_all(Pod.kind, @instance_data["spec"]["selector"]["matchLabels"])
47
53
 
48
54
  all_pods.each_with_object([]) do |pod_data, relevant_pods|
49
55
  next unless parent_of_pod?(pod_data)
@@ -55,7 +61,7 @@ module KubernetesDeploy
55
61
  parent: "#{name.capitalize} #{type}",
56
62
  deploy_started_at: @deploy_started_at
57
63
  )
58
- pod.sync(mediator)
64
+ pod.sync(cache)
59
65
  relevant_pods << pod
60
66
  end
61
67
  end
@@ -4,15 +4,14 @@ module KubernetesDeploy
4
4
  TIMEOUT = 5.minutes
5
5
  UUID_ANNOTATION = "redis.stable.shopify.io/owner_uid"
6
6
 
7
- SYNC_DEPENDENCIES = %w(Deployment Service)
8
- def sync(mediator)
7
+ def sync(cache)
9
8
  super
10
9
 
11
- @deployment = mediator.get_instance(Deployment.kind, name)
12
- @deployment = mediator.get_instance(Deployment.kind, deprecated_name) if @deployment.empty?
10
+ @deployment = cache.get_instance(Deployment.kind, name)
11
+ @deployment = cache.get_instance(Deployment.kind, deprecated_name) if @deployment.empty?
13
12
 
14
- @service = mediator.get_instance(Service.kind, name)
15
- @service = mediator.get_instance(Service.kind, deprecated_name) if @service.empty?
13
+ @service = cache.get_instance(Service.kind, name)
14
+ @service = cache.get_instance(Service.kind, deprecated_name) if @service.empty?
16
15
  end
17
16
 
18
17
  def status
@@ -14,10 +14,9 @@ module KubernetesDeploy
14
14
  logger: logger, statsd_tags: statsd_tags)
15
15
  end
16
16
 
17
- SYNC_DEPENDENCIES = %w(Pod)
18
- def sync(mediator)
17
+ def sync(cache)
19
18
  super
20
- @pods = exists? ? find_pods(mediator) : []
19
+ @pods = fetch_pods_if_needed(cache) || []
21
20
  end
22
21
 
23
22
  def status
@@ -26,7 +25,7 @@ module KubernetesDeploy
26
25
  end
27
26
 
28
27
  def deploy_succeeded?
29
- observed_generation == current_generation &&
28
+ return false if stale_status?
30
29
  desired_replicas == rollout_data["availableReplicas"].to_i &&
31
30
  desired_replicas == rollout_data["readyReplicas"].to_i
32
31
  end
@@ -34,7 +33,7 @@ module KubernetesDeploy
34
33
  def deploy_failed?
35
34
  pods.present? &&
36
35
  pods.all?(&:deploy_failed?) &&
37
- observed_generation == current_generation
36
+ !stale_status?
38
37
  end
39
38
 
40
39
  def desired_replicas
@@ -54,6 +53,25 @@ module KubernetesDeploy
54
53
 
55
54
  private
56
55
 
56
+ def stale_status?
57
+ observed_generation != current_generation
58
+ end
59
+
60
+ def fetch_pods_if_needed(cache)
61
+ # If the ReplicaSet doesn't exist, its pods won't either
62
+ return unless exists?
63
+ # If the status hasn't been updated yet, we're not going to make a determination anyway
64
+ return if stale_status?
65
+ # If we don't want any pods at all, we don't need to look for them
66
+ return if desired_replicas == 0
67
+ # We only need to fetch pods so that deploy_failed? can check that they aren't ALL bad.
68
+ # If we can already tell some pods are ok from the RS data, don't bother fetching them (which can be expensive)
69
+ # Lower numbers here make us more susceptible to being fooled by replicas without probes briefly appearing ready
70
+ return if ready_replicas > 1
71
+
72
+ find_pods(cache)
73
+ end
74
+
57
75
  def rollout_data
58
76
  return { "replicas" => 0 } unless exists?
59
77
  { "replicas" => 0 }.merge(
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+ module KubernetesDeploy
3
+ class Role < KubernetesResource
4
+ TIMEOUT = 30.seconds
5
+
6
+ def status
7
+ exists? ? "Created" : "Unknown"
8
+ end
9
+
10
+ def deploy_succeeded?
11
+ exists?
12
+ end
13
+
14
+ def deploy_failed?
15
+ false
16
+ end
17
+
18
+ def timeout_message
19
+ UNUSUAL_FAILURE_MESSAGE
20
+ end
21
+ end
22
+ end
@@ -3,11 +3,15 @@ module KubernetesDeploy
3
3
  class Service < KubernetesResource
4
4
  TIMEOUT = 7.minutes
5
5
 
6
- SYNC_DEPENDENCIES = %w(Pod Deployment)
7
- def sync(mediator)
6
+ def sync(cache)
8
7
  super
9
- @related_deployments = selector.present? ? mediator.get_all(Deployment.kind, selector) : []
10
- @related_pods = selector.present? ? mediator.get_all(Pod.kind, selector) : []
8
+ if exists? && selector.present?
9
+ @related_deployments = cache.get_all(Deployment.kind, selector)
10
+ @related_pods = cache.get_all(Pod.kind, selector)
11
+ else
12
+ @related_deployments = []
13
+ @related_pods = []
14
+ end
11
15
  end
12
16
 
13
17
  def status