tobsch-krane 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +7 -0
  2. data/.buildkite/pipeline.nightly.yml +43 -0
  3. data/.github/probots.yml +2 -0
  4. data/.gitignore +20 -0
  5. data/.rubocop.yml +17 -0
  6. data/.shopify-build/VERSION +1 -0
  7. data/.shopify-build/kubernetes-deploy.yml +53 -0
  8. data/1.0-Upgrade.md +185 -0
  9. data/CHANGELOG.md +431 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +164 -0
  12. data/Gemfile +16 -0
  13. data/ISSUE_TEMPLATE.md +25 -0
  14. data/LICENSE.txt +21 -0
  15. data/README.md +655 -0
  16. data/Rakefile +36 -0
  17. data/bin/ci +21 -0
  18. data/bin/setup +16 -0
  19. data/bin/test +47 -0
  20. data/dev.yml +28 -0
  21. data/dev/flamegraph-from-tests +35 -0
  22. data/exe/krane +5 -0
  23. data/krane.gemspec +44 -0
  24. data/lib/krane.rb +7 -0
  25. data/lib/krane/bindings_parser.rb +88 -0
  26. data/lib/krane/cli/deploy_command.rb +75 -0
  27. data/lib/krane/cli/global_deploy_command.rb +54 -0
  28. data/lib/krane/cli/krane.rb +91 -0
  29. data/lib/krane/cli/render_command.rb +41 -0
  30. data/lib/krane/cli/restart_command.rb +34 -0
  31. data/lib/krane/cli/run_command.rb +54 -0
  32. data/lib/krane/cli/version_command.rb +13 -0
  33. data/lib/krane/cluster_resource_discovery.rb +113 -0
  34. data/lib/krane/common.rb +23 -0
  35. data/lib/krane/concerns/template_reporting.rb +29 -0
  36. data/lib/krane/concurrency.rb +18 -0
  37. data/lib/krane/container_logs.rb +106 -0
  38. data/lib/krane/deferred_summary_logging.rb +95 -0
  39. data/lib/krane/delayed_exceptions.rb +14 -0
  40. data/lib/krane/deploy_task.rb +363 -0
  41. data/lib/krane/deploy_task_config_validator.rb +29 -0
  42. data/lib/krane/duration_parser.rb +27 -0
  43. data/lib/krane/ejson_secret_provisioner.rb +154 -0
  44. data/lib/krane/errors.rb +28 -0
  45. data/lib/krane/formatted_logger.rb +57 -0
  46. data/lib/krane/global_deploy_task.rb +210 -0
  47. data/lib/krane/global_deploy_task_config_validator.rb +12 -0
  48. data/lib/krane/kubeclient_builder.rb +156 -0
  49. data/lib/krane/kubectl.rb +120 -0
  50. data/lib/krane/kubernetes_resource.rb +621 -0
  51. data/lib/krane/kubernetes_resource/cloudsql.rb +43 -0
  52. data/lib/krane/kubernetes_resource/config_map.rb +22 -0
  53. data/lib/krane/kubernetes_resource/cron_job.rb +18 -0
  54. data/lib/krane/kubernetes_resource/custom_resource.rb +87 -0
  55. data/lib/krane/kubernetes_resource/custom_resource_definition.rb +98 -0
  56. data/lib/krane/kubernetes_resource/daemon_set.rb +90 -0
  57. data/lib/krane/kubernetes_resource/deployment.rb +213 -0
  58. data/lib/krane/kubernetes_resource/horizontal_pod_autoscaler.rb +65 -0
  59. data/lib/krane/kubernetes_resource/ingress.rb +18 -0
  60. data/lib/krane/kubernetes_resource/job.rb +60 -0
  61. data/lib/krane/kubernetes_resource/network_policy.rb +22 -0
  62. data/lib/krane/kubernetes_resource/persistent_volume_claim.rb +80 -0
  63. data/lib/krane/kubernetes_resource/pod.rb +269 -0
  64. data/lib/krane/kubernetes_resource/pod_disruption_budget.rb +23 -0
  65. data/lib/krane/kubernetes_resource/pod_set_base.rb +71 -0
  66. data/lib/krane/kubernetes_resource/pod_template.rb +20 -0
  67. data/lib/krane/kubernetes_resource/replica_set.rb +92 -0
  68. data/lib/krane/kubernetes_resource/resource_quota.rb +22 -0
  69. data/lib/krane/kubernetes_resource/role.rb +22 -0
  70. data/lib/krane/kubernetes_resource/role_binding.rb +22 -0
  71. data/lib/krane/kubernetes_resource/secret.rb +24 -0
  72. data/lib/krane/kubernetes_resource/service.rb +104 -0
  73. data/lib/krane/kubernetes_resource/service_account.rb +22 -0
  74. data/lib/krane/kubernetes_resource/stateful_set.rb +70 -0
  75. data/lib/krane/label_selector.rb +42 -0
  76. data/lib/krane/oj.rb +4 -0
  77. data/lib/krane/options_helper.rb +39 -0
  78. data/lib/krane/remote_logs.rb +60 -0
  79. data/lib/krane/render_task.rb +118 -0
  80. data/lib/krane/renderer.rb +118 -0
  81. data/lib/krane/resource_cache.rb +68 -0
  82. data/lib/krane/resource_deployer.rb +265 -0
  83. data/lib/krane/resource_watcher.rb +171 -0
  84. data/lib/krane/restart_task.rb +228 -0
  85. data/lib/krane/rollout_conditions.rb +103 -0
  86. data/lib/krane/runner_task.rb +212 -0
  87. data/lib/krane/runner_task_config_validator.rb +18 -0
  88. data/lib/krane/statsd.rb +65 -0
  89. data/lib/krane/task_config.rb +22 -0
  90. data/lib/krane/task_config_validator.rb +96 -0
  91. data/lib/krane/template_sets.rb +173 -0
  92. data/lib/krane/version.rb +4 -0
  93. data/pull_request_template.md +8 -0
  94. data/screenshots/deploy-demo.gif +0 -0
  95. data/screenshots/migrate-logs.png +0 -0
  96. data/screenshots/missing-secret-fail.png +0 -0
  97. data/screenshots/success.png +0 -0
  98. data/screenshots/test-output.png +0 -0
  99. metadata +375 -0
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'krane/task_config_validator'
4
+
5
+ module Krane
6
+ class GlobalDeployTaskConfigValidator < Krane::TaskConfigValidator
7
+ def initialize(*arguments)
8
+ super(*arguments)
9
+ @validations -= [:validate_namespace_exists]
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+ require 'kubeclient'
3
+
4
+ module Krane
5
+ class KubeclientBuilder
6
+ class ContextMissingError < FatalDeploymentError
7
+ def initialize(context_name, kubeconfig)
8
+ super("`#{context_name}` context must be configured in your " \
9
+ "KUBECONFIG file(s) (#{kubeconfig.join(', ')}).")
10
+ end
11
+ end
12
+
13
+ def initialize(kubeconfig: ENV["KUBECONFIG"])
14
+ files = kubeconfig || "#{Dir.home}/.kube/config"
15
+ # Split the list by colon for Linux and Mac, and semicolon for Windows.
16
+ @kubeconfig_files = files.split(/[:;]/).map!(&:strip).reject(&:empty?)
17
+ end
18
+
19
+ def build_v1_kubeclient(context)
20
+ build_kubeclient(
21
+ api_version: "v1",
22
+ context: context
23
+ )
24
+ end
25
+
26
+ def build_v1beta1_kubeclient(context)
27
+ build_kubeclient(
28
+ api_version: "v1beta1",
29
+ context: context,
30
+ endpoint_path: "/apis/extensions/"
31
+ )
32
+ end
33
+
34
+ def build_batch_v1beta1_kubeclient(context)
35
+ build_kubeclient(
36
+ api_version: "v1beta1",
37
+ context: context,
38
+ endpoint_path: "/apis/batch/"
39
+ )
40
+ end
41
+
42
+ def build_batch_v1_kubeclient(context)
43
+ build_kubeclient(
44
+ api_version: "v1",
45
+ context: context,
46
+ endpoint_path: "/apis/batch/"
47
+ )
48
+ end
49
+
50
+ def build_policy_v1beta1_kubeclient(context)
51
+ build_kubeclient(
52
+ api_version: "v1beta1",
53
+ context: context,
54
+ endpoint_path: "/apis/policy/"
55
+ )
56
+ end
57
+
58
+ def build_apps_v1_kubeclient(context)
59
+ build_kubeclient(
60
+ api_version: "v1",
61
+ context: context,
62
+ endpoint_path: "/apis/apps"
63
+ )
64
+ end
65
+
66
+ def build_apiextensions_v1beta1_kubeclient(context)
67
+ build_kubeclient(
68
+ api_version: "v1beta1",
69
+ context: context,
70
+ endpoint_path: "/apis/apiextensions.k8s.io"
71
+ )
72
+ end
73
+
74
+ def build_autoscaling_v1_kubeclient(context)
75
+ build_kubeclient(
76
+ api_version: "v2beta1",
77
+ context: context,
78
+ endpoint_path: "/apis/autoscaling"
79
+ )
80
+ end
81
+
82
+ def build_rbac_v1_kubeclient(context)
83
+ build_kubeclient(
84
+ api_version: "v1",
85
+ context: context,
86
+ endpoint_path: "/apis/rbac.authorization.k8s.io"
87
+ )
88
+ end
89
+
90
+ def build_networking_v1_kubeclient(context)
91
+ build_kubeclient(
92
+ api_version: "v1",
93
+ context: context,
94
+ endpoint_path: "/apis/networking.k8s.io"
95
+ )
96
+ end
97
+
98
+ def build_storage_v1_kubeclient(context)
99
+ build_kubeclient(
100
+ api_version: "v1",
101
+ context: context,
102
+ endpoint_path: "/apis/storage.k8s.io"
103
+ )
104
+ end
105
+
106
+ def build_scheduling_v1beta1_kubeclient(context)
107
+ build_kubeclient(
108
+ api_version: "v1beta1",
109
+ context: context,
110
+ endpoint_path: "/apis/scheduling.k8s.io"
111
+ )
112
+ end
113
+
114
+ def validate_config_files
115
+ errors = []
116
+ if @kubeconfig_files.empty?
117
+ errors << "Kubeconfig file name(s) not set in $KUBECONFIG"
118
+ else
119
+ @kubeconfig_files.each do |f|
120
+ # If any files in the list are not valid, we can't be sure the merged context list is what the user intended
121
+ errors << "Kubeconfig not found at #{f}" unless File.file?(f)
122
+ end
123
+ end
124
+ errors
125
+ end
126
+
127
+ def validate_config_files!
128
+ errors = validate_config_files
129
+ raise TaskConfigurationError, errors.join(', ') if errors.present?
130
+ end
131
+
132
+ private
133
+
134
+ def build_kubeclient(api_version:, context:, endpoint_path: nil)
135
+ validate_config_files!
136
+ @kubeclient_configs ||= @kubeconfig_files.map { |f| Kubeclient::Config.read(f) }
137
+ # Find a context defined in kube conf files that matches the input context by name
138
+ config = @kubeclient_configs.find { |c| c.contexts.include?(context) }
139
+ raise ContextMissingError.new(context, @kubeconfig_files) unless config
140
+
141
+ kube_context = config.context(context)
142
+ client = Kubeclient::Client.new(
143
+ "#{kube_context.api_endpoint}#{endpoint_path}",
144
+ api_version,
145
+ ssl_options: kube_context.ssl_options,
146
+ auth_options: kube_context.auth_options,
147
+ timeouts: {
148
+ open: Krane::Kubectl::DEFAULT_TIMEOUT,
149
+ read: Krane::Kubectl::DEFAULT_TIMEOUT,
150
+ }
151
+ )
152
+ client.discover
153
+ client
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+ require 'open3'
3
+
4
+ module Krane
5
+ class Kubectl
6
+ ERROR_MATCHERS = {
7
+ not_found: /NotFound/,
8
+ client_timeout: /Client\.Timeout exceeded while awaiting headers/,
9
+ }
10
+ DEFAULT_TIMEOUT = 15
11
+ MAX_RETRY_DELAY = 16
12
+ SERVER_DRY_RUN_MIN_VERSION = "1.13"
13
+
14
+ class ResourceNotFoundError < StandardError; end
15
+
16
+ delegate :namespace, :context, :logger, to: :@task_config
17
+
18
+ def initialize(task_config:, log_failure_by_default:, default_timeout: DEFAULT_TIMEOUT,
19
+ output_is_sensitive_default: false)
20
+ @task_config = task_config
21
+ @log_failure_by_default = log_failure_by_default
22
+ @default_timeout = default_timeout
23
+ @output_is_sensitive_default = output_is_sensitive_default
24
+ end
25
+
26
+ def run(*args, log_failure: nil, use_context: true, use_namespace: true, output: nil,
27
+ raise_if_not_found: false, attempts: 1, output_is_sensitive: nil, retry_whitelist: nil)
28
+ raise ArgumentError, "namespace is required" if namespace.blank? && use_namespace
29
+ log_failure = @log_failure_by_default if log_failure.nil?
30
+ output_is_sensitive = @output_is_sensitive_default if output_is_sensitive.nil?
31
+ cmd = build_command_from_options(args, use_namespace, use_context, output)
32
+ out, err, st = nil
33
+
34
+ (1..attempts).to_a.each do |current_attempt|
35
+ logger.debug("Running command (attempt #{current_attempt}): #{cmd.join(' ')}")
36
+ out, err, st = Open3.capture3(*cmd)
37
+ logger.debug("Kubectl out: " + out.gsub(/\s+/, ' ')) unless output_is_sensitive
38
+
39
+ break if st.success?
40
+ raise(ResourceNotFoundError, err) if err.match(ERROR_MATCHERS[:not_found]) && raise_if_not_found
41
+
42
+ if log_failure
43
+ warning = if current_attempt == attempts
44
+ "The following command failed (attempt #{current_attempt}/#{attempts})"
45
+ elsif retriable_err?(err, retry_whitelist)
46
+ "The following command failed and will be retried (attempt #{current_attempt}/#{attempts})"
47
+ else
48
+ "The following command failed and cannot be retried"
49
+ end
50
+ logger.warn("#{warning}: #{Shellwords.join(cmd)}")
51
+ logger.warn(err) unless output_is_sensitive
52
+ else
53
+ logger.debug("Kubectl err: #{output_is_sensitive ? '<suppressed sensitive output>' : err}")
54
+ end
55
+ StatsD.client.increment('kubectl.error', 1, tags: { context: context, namespace: namespace, cmd: cmd[1] })
56
+
57
+ break unless retriable_err?(err, retry_whitelist) && current_attempt < attempts
58
+ sleep(retry_delay(current_attempt))
59
+ end
60
+
61
+ [out.chomp, err.chomp, st]
62
+ end
63
+
64
+ def retry_delay(attempt)
65
+ # exponential backoff starting at 1s with cap at 16s, offset by up to 0.5s
66
+ [2**(attempt - 1), MAX_RETRY_DELAY].min - Random.rand(0.5).round(1)
67
+ end
68
+
69
+ def version_info
70
+ @version_info ||=
71
+ begin
72
+ response, _, status = run("version", use_namespace: false, log_failure: true)
73
+ raise KubectlError, "Could not retrieve kubectl version info" unless status.success?
74
+ extract_version_info_from_kubectl_response(response)
75
+ end
76
+ end
77
+
78
+ def client_version
79
+ version_info[:client]
80
+ end
81
+
82
+ def server_version
83
+ version_info[:server]
84
+ end
85
+
86
+ def server_dry_run_enabled?
87
+ server_version >= Gem::Version.new(SERVER_DRY_RUN_MIN_VERSION)
88
+ end
89
+
90
+ private
91
+
92
+ def build_command_from_options(args, use_namespace, use_context, output)
93
+ cmd = ["kubectl"] + args
94
+ cmd.push("--namespace=#{namespace}") if use_namespace
95
+ cmd.push("--context=#{context}") if use_context
96
+ cmd.push("--output=#{output}") if output
97
+ cmd.push("--request-timeout=#{@default_timeout}") if @default_timeout
98
+ cmd
99
+ end
100
+
101
+ def retriable_err?(err, retry_whitelist)
102
+ return !err.match(ERROR_MATCHERS[:not_found]) if retry_whitelist.nil?
103
+ retry_whitelist.any? do |retriable|
104
+ raise NotImplementedError, "No matcher defined for #{retriable.inspect}" unless ERROR_MATCHERS.key?(retriable)
105
+ err.match(ERROR_MATCHERS[retriable])
106
+ end
107
+ end
108
+
109
+ def extract_version_info_from_kubectl_response(response)
110
+ info = {}
111
+ response.each_line do |l|
112
+ match = l.match(/^(?<kind>Client|Server).* GitVersion:"v(?<version>\d+\.\d+\.\d+)/)
113
+ if match
114
+ info[match[:kind].downcase.to_sym] = Gem::Version.new(match[:version])
115
+ end
116
+ end
117
+ info
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,621 @@
1
+ # frozen_string_literal: true
2
+ require 'json'
3
+ require 'shellwords'
4
+
5
+ require 'krane/remote_logs'
6
+ require 'krane/duration_parser'
7
+ require 'krane/label_selector'
8
+ require 'krane/rollout_conditions'
9
+
10
+ module Krane
11
+ class KubernetesResource
12
+ attr_reader :name, :namespace, :context
13
+ attr_writer :type, :deploy_started_at, :global
14
+
15
+ GLOBAL = false
16
+ TIMEOUT = 5.minutes
17
+ LOG_LINE_COUNT = 250
18
+ SERVER_DRY_RUN_DISABLED_ERROR =
19
+ /(unknown flag: --server-dry-run)|(doesn't support dry-run)|(dryRun alpha feature is disabled)/
20
+
21
+ DISABLE_FETCHING_LOG_INFO = 'DISABLE_FETCHING_LOG_INFO'
22
+ DISABLE_FETCHING_EVENT_INFO = 'DISABLE_FETCHING_EVENT_INFO'
23
+ DISABLED_LOG_INFO_MESSAGE = "collection is disabled by the #{DISABLE_FETCHING_LOG_INFO} env var."
24
+ DISABLED_EVENT_INFO_MESSAGE = "collection is disabled by the #{DISABLE_FETCHING_EVENT_INFO} env var."
25
+ DEBUG_RESOURCE_NOT_FOUND_MESSAGE = "None found. Please check your usual logging service (e.g. Splunk)."
26
+ UNUSUAL_FAILURE_MESSAGE = <<~MSG
27
+ It is very unusual for this resource type to fail to deploy. Please try the deploy again.
28
+ If that new deploy also fails, contact your cluster administrator.
29
+ MSG
30
+ STANDARD_TIMEOUT_MESSAGE = <<~MSG
31
+ Kubernetes will continue to attempt to deploy this resource in the cluster, but at this point it is considered unlikely that it will succeed.
32
+ If you have reason to believe it will succeed, retry the deploy to continue to monitor the rollout.
33
+ MSG
34
+
35
+ TIMEOUT_OVERRIDE_ANNOTATION_SUFFIX = "timeout-override"
36
+ TIMEOUT_OVERRIDE_ANNOTATION_DEPRECATED = "kubernetes-deploy.shopify.io/#{TIMEOUT_OVERRIDE_ANNOTATION_SUFFIX}"
37
+ TIMEOUT_OVERRIDE_ANNOTATION = "krane.shopify.io/#{TIMEOUT_OVERRIDE_ANNOTATION_SUFFIX}"
38
+ LAST_APPLIED_ANNOTATION = "kubectl.kubernetes.io/last-applied-configuration"
39
+ SENSITIVE_TEMPLATE_CONTENT = false
40
+ SERVER_DRY_RUNNABLE = false
41
+
42
+ class << self
43
+ def build(namespace: nil, context:, definition:, logger:, statsd_tags:, crd: nil, global_names: [])
44
+ validate_definition_essentials(definition)
45
+ opts = { namespace: namespace, context: context, definition: definition, logger: logger,
46
+ statsd_tags: statsd_tags }
47
+ if (klass = class_for_kind(definition["kind"]))
48
+ return klass.new(**opts)
49
+ end
50
+ if crd
51
+ CustomResource.new(crd: crd, **opts)
52
+ else
53
+ type = definition["kind"]
54
+ inst = new(**opts)
55
+ inst.type = type
56
+ inst.global = global_names.map(&:downcase).include?(type.downcase)
57
+ inst
58
+ end
59
+ end
60
+
61
+ def class_for_kind(kind)
62
+ if Krane.const_defined?(kind)
63
+ Krane.const_get(kind)
64
+ end
65
+ rescue NameError
66
+ nil
67
+ end
68
+
69
+ def timeout
70
+ self::TIMEOUT
71
+ end
72
+
73
+ def kind
74
+ name.demodulize
75
+ end
76
+
77
+ private
78
+
79
+ def validate_definition_essentials(definition)
80
+ debug_content = <<~STRING
81
+ apiVersion: #{definition.fetch('apiVersion', '<missing>')}
82
+ kind: #{definition.fetch('kind', '<missing>')}
83
+ metadata: #{definition.fetch('metadata', {})}
84
+ <Template body suppressed because content sensitivity could not be determined.>
85
+ STRING
86
+ if definition["kind"].blank?
87
+ raise InvalidTemplateError.new("Template is missing required field 'kind'", content: debug_content)
88
+ end
89
+
90
+ if definition.dig('metadata', 'name').blank? && definition.dig('metadata', 'generateName').blank?
91
+ raise InvalidTemplateError.new("Template must specify one of 'metadata.name' or 'metadata.generateName'",
92
+ content: debug_content)
93
+ end
94
+ end
95
+ end
96
+
97
+ def timeout
98
+ return timeout_override if timeout_override.present?
99
+ self.class.timeout
100
+ end
101
+
102
+ def timeout_override
103
+ return @timeout_override if defined?(@timeout_override)
104
+
105
+ @timeout_override = DurationParser.new(krane_annotation_value(TIMEOUT_OVERRIDE_ANNOTATION_SUFFIX)).parse!.to_i
106
+ rescue DurationParser::ParsingError
107
+ @timeout_override = nil
108
+ end
109
+
110
+ def pretty_timeout_type
111
+ "timeout: #{timeout}s"
112
+ end
113
+
114
+ def initialize(namespace:, context:, definition:, logger:, statsd_tags: [])
115
+ # subclasses must also set these if they define their own initializer
116
+ @name = (definition.dig("metadata", "name") || definition.dig("metadata", "generateName")).to_s
117
+ @optional_statsd_tags = statsd_tags
118
+ @namespace = namespace
119
+ @context = context
120
+ @logger = logger
121
+ @definition = definition
122
+ @statsd_report_done = false
123
+ @disappeared = false
124
+ @validation_errors = []
125
+ @validation_warnings = []
126
+ @instance_data = {}
127
+ @server_dry_run_validated = false
128
+ end
129
+
130
+ def to_kubeclient_resource
131
+ Kubeclient::Resource.new(@definition)
132
+ end
133
+
134
+ def validate_definition(kubectl, selector: nil)
135
+ @validation_errors = []
136
+ @validation_warnings = []
137
+ validate_selector(selector) if selector
138
+ validate_timeout_annotation
139
+ validate_annotation_version
140
+ validate_spec_with_kubectl(kubectl)
141
+ @validation_errors.present?
142
+ end
143
+
144
+ def validation_warning_msg
145
+ @validation_warnings.join("\n")
146
+ end
147
+
148
+ def has_warnings?
149
+ @validation_warnings.present?
150
+ end
151
+
152
+ def validation_error_msg
153
+ @validation_errors.join("\n")
154
+ end
155
+
156
+ def validation_failed?
157
+ @validation_errors.present?
158
+ end
159
+
160
+ def id
161
+ "#{type}/#{name}"
162
+ end
163
+
164
+ def <=>(other)
165
+ id <=> other.id
166
+ end
167
+
168
+ def file_path
169
+ file.path
170
+ end
171
+
172
+ def sync(cache)
173
+ @instance_data = cache.get_instance(kubectl_resource_type, name, raise_if_not_found: true)
174
+ rescue Krane::Kubectl::ResourceNotFoundError
175
+ @disappeared = true if deploy_started?
176
+ @instance_data = {}
177
+ end
178
+
179
+ def after_sync
180
+ end
181
+
182
+ def terminating?
183
+ @instance_data.dig('metadata', 'deletionTimestamp').present?
184
+ end
185
+
186
+ def disappeared?
187
+ @disappeared
188
+ end
189
+
190
+ def deploy_failed?
191
+ false
192
+ end
193
+
194
+ def deploy_started?
195
+ @deploy_started_at.present?
196
+ end
197
+
198
+ def deploy_succeeded?
199
+ return false unless deploy_started?
200
+ unless @success_assumption_warning_shown
201
+ @logger.warn("Don't know how to monitor resources of type #{type}. Assuming #{id} deployed successfully.")
202
+ @success_assumption_warning_shown = true
203
+ end
204
+ true
205
+ end
206
+
207
+ def exists?
208
+ @instance_data.present?
209
+ end
210
+
211
+ def current_generation
212
+ return -1 unless exists? # must be different default than observed_generation
213
+ @instance_data.dig("metadata", "generation")
214
+ end
215
+
216
+ def observed_generation
217
+ return -2 unless exists?
218
+ # populating this is a best practice, but not all controllers actually do it
219
+ @instance_data.dig('status', 'observedGeneration')
220
+ end
221
+
222
+ def status
223
+ exists? ? "Exists" : "Not Found"
224
+ end
225
+
226
+ def type
227
+ @type || self.class.kind
228
+ end
229
+
230
+ def kubectl_resource_type
231
+ type
232
+ end
233
+
234
+ def deploy_timed_out?
235
+ return false unless deploy_started?
236
+ !deploy_succeeded? && !deploy_failed? && (Time.now.utc - @deploy_started_at > timeout)
237
+ end
238
+
239
+ # Expected values: :apply, :create, :replace, :replace_force
240
+ def deploy_method
241
+ if @definition.dig("metadata", "name").blank? && uses_generate_name?
242
+ :create
243
+ else
244
+ :apply
245
+ end
246
+ end
247
+
248
+ def sync_debug_info(kubectl)
249
+ @debug_events = fetch_events(kubectl) unless ENV[DISABLE_FETCHING_EVENT_INFO]
250
+ @debug_logs = fetch_debug_logs if print_debug_logs? && !ENV[DISABLE_FETCHING_LOG_INFO]
251
+ end
252
+
253
+ def debug_message(cause = nil, info_hash = {})
254
+ helpful_info = []
255
+ if cause == :gave_up
256
+ debug_heading = ColorizedString.new("#{id}: GLOBAL WATCH TIMEOUT (#{info_hash[:timeout]} seconds)").yellow
257
+ helpful_info << "If you expected it to take longer than #{info_hash[:timeout]} seconds for your deploy"\
258
+ " to roll out, increase --max-watch-seconds."
259
+ elsif deploy_failed?
260
+ debug_heading = ColorizedString.new("#{id}: FAILED").red
261
+ helpful_info << failure_message if failure_message.present?
262
+ elsif deploy_timed_out?
263
+ debug_heading = ColorizedString.new("#{id}: TIMED OUT (#{pretty_timeout_type})").yellow
264
+ helpful_info << timeout_message if timeout_message.present?
265
+ else
266
+ # Arriving in debug_message when we neither failed nor timed out is very unexpected. Dump all available info.
267
+ debug_heading = ColorizedString.new("#{id}: MONITORING ERROR").red
268
+ helpful_info << failure_message if failure_message.present?
269
+ helpful_info << timeout_message if timeout_message.present? && timeout_message != STANDARD_TIMEOUT_MESSAGE
270
+ end
271
+
272
+ final_status = " - Final status: #{status}"
273
+ final_status = "\n#{final_status}" if helpful_info.present? && !helpful_info.last.end_with?("\n")
274
+ helpful_info.prepend(debug_heading)
275
+ helpful_info << final_status
276
+
277
+ if @debug_events.present?
278
+ helpful_info << " - Events (common success events excluded):"
279
+ @debug_events.each do |identifier, event_hashes|
280
+ event_hashes.each { |event| helpful_info << " [#{identifier}]\t#{event}" }
281
+ end
282
+ elsif ENV[DISABLE_FETCHING_EVENT_INFO]
283
+ helpful_info << " - Events: #{DISABLED_EVENT_INFO_MESSAGE}"
284
+ else
285
+ helpful_info << " - Events: #{DEBUG_RESOURCE_NOT_FOUND_MESSAGE}"
286
+ end
287
+
288
+ if print_debug_logs?
289
+ if ENV[DISABLE_FETCHING_LOG_INFO]
290
+ helpful_info << " - Logs: #{DISABLED_LOG_INFO_MESSAGE}"
291
+ elsif @debug_logs.blank?
292
+ helpful_info << " - Logs: #{DEBUG_RESOURCE_NOT_FOUND_MESSAGE}"
293
+ else
294
+ container_logs = @debug_logs.container_logs.sort_by { |c| c.lines.length }
295
+ container_logs.each do |logs|
296
+ if logs.empty?
297
+ helpful_info << " - Logs from container '#{logs.container_name}': #{DEBUG_RESOURCE_NOT_FOUND_MESSAGE}"
298
+ next
299
+ end
300
+
301
+ if logs.lines.length == ContainerLogs::DEFAULT_LINE_LIMIT
302
+ truncated = " (last #{ContainerLogs::DEFAULT_LINE_LIMIT} lines shown)"
303
+ end
304
+ helpful_info << " - Logs from container '#{logs.container_name}'#{truncated}:"
305
+ logs.lines.each do |line|
306
+ helpful_info << " #{line}"
307
+ end
308
+ end
309
+ end
310
+ end
311
+
312
+ helpful_info.join("\n")
313
+ end
314
+
315
+ # Returns a hash in the following format:
316
+ # {
317
+ # "pod/web-1" => [
318
+ # "Pulling: pulling image "hello-world:latest" (1 events)",
319
+ # "Pulled: Successfully pulled image "hello-world:latest" (1 events)"
320
+ # ]
321
+ # }
322
+ def fetch_events(kubectl)
323
+ return {} unless exists?
324
+ out, _err, st = kubectl.run("get", "events", "--output=go-template=#{Event.go_template_for(type, name)}",
325
+ log_failure: false, use_namespace: !global?)
326
+ return {} unless st.success?
327
+
328
+ event_collector = Hash.new { |hash, key| hash[key] = [] }
329
+ Event.extract_all_from_go_template_blob(out).each_with_object(event_collector) do |candidate, events|
330
+ events[id] << candidate.to_s if candidate.seen_since?(@deploy_started_at - 5.seconds)
331
+ end
332
+ end
333
+
334
+ def timeout_message
335
+ STANDARD_TIMEOUT_MESSAGE
336
+ end
337
+
338
+ def failure_message
339
+ end
340
+
341
+ def pretty_status
342
+ padding = " " * [50 - id.length, 1].max
343
+ "#{id}#{padding}#{status}"
344
+ end
345
+
346
+ def report_status_to_statsd(watch_time)
347
+ unless @statsd_report_done
348
+ StatsD.client.distribution('resource.duration', watch_time, tags: statsd_tags)
349
+ @statsd_report_done = true
350
+ end
351
+ end
352
+
353
+ def sensitive_template_content?
354
+ self.class::SENSITIVE_TEMPLATE_CONTENT
355
+ end
356
+
357
+ def server_dry_runnable_resource?
358
+ # generateName and server-side dry run are incompatible because the former only works with `create`
359
+ # and the latter only works with `apply`
360
+ self.class::SERVER_DRY_RUNNABLE && !uses_generate_name?
361
+ end
362
+
363
+ def uses_generate_name?
364
+ @definition.dig('metadata', 'generateName').present?
365
+ end
366
+
367
+ def server_dry_run_validated?
368
+ @server_dry_run_validated
369
+ end
370
+
371
+ # If a resource uses generateName, we don't know the full name of the resource until it's deployed to the cluster.
372
+ # In this case, we need to update our local definition with the realized name in order to accurately track the
373
+ # resource during deploy
374
+ def use_generated_name(instance_data)
375
+ @name = instance_data.dig('metadata', 'name')
376
+ @definition['metadata']['name'] = @name
377
+ @definition['metadata'].delete('generateName')
378
+ @file = create_definition_tempfile
379
+ end
380
+
381
+ class Event
382
+ EVENT_SEPARATOR = "ENDEVENT--BEGINEVENT"
383
+ FIELD_SEPARATOR = "ENDFIELD--BEGINFIELD"
384
+ FIELDS = %w(
385
+ .involvedObject.kind
386
+ .involvedObject.name
387
+ .count
388
+ .lastTimestamp
389
+ .reason
390
+ .message
391
+ .eventTime
392
+ .deprecatedCount
393
+ .deprecatedLastTimestamp
394
+ .series
395
+ )
396
+ FIELD_EMPTY_VALUE = '<no value>'
397
+
398
+ def self.go_template_for(kind, name)
399
+ and_conditions = [
400
+ %[(eq .involvedObject.kind "#{kind}")],
401
+ %[(eq .involvedObject.name "#{name}")],
402
+ '(ne .reason "Started")',
403
+ '(ne .reason "Created")',
404
+ '(ne .reason "SuccessfulCreate")',
405
+ '(ne .reason "Scheduled")',
406
+ '(ne .reason "Pulling")',
407
+ '(ne .reason "Pulled")',
408
+ ]
409
+ condition_start = "{{if and #{and_conditions.join(' ')}}}"
410
+ field_part = FIELDS.map { |f| "{{#{f}}}" }.join(%({{print "#{FIELD_SEPARATOR}"}}))
411
+ %({{range .items}}#{condition_start}#{field_part}{{print "#{EVENT_SEPARATOR}"}}{{end}}{{end}})
412
+ end
413
+
414
+ def self.extract_all_from_go_template_blob(blob)
415
+ blob.split(EVENT_SEPARATOR).map do |event_blob|
416
+ pieces = event_blob.split(FIELD_SEPARATOR, FIELDS.length)
417
+ count = extract_event_count(pieces)
418
+ timestamp = extract_event_timestamp(pieces)
419
+
420
+ new(
421
+ subject_kind: pieces[FIELDS.index(".involvedObject.kind")],
422
+ subject_name: pieces[FIELDS.index(".involvedObject.name")],
423
+ count: count,
424
+ last_timestamp: timestamp,
425
+ reason: pieces[FIELDS.index(".reason")],
426
+ message: pieces[FIELDS.index(".message")]
427
+ )
428
+ end
429
+ end
430
+
431
+ def self.extract_event_count(pieces)
432
+ series = pieces[FIELDS.index(".series")]
433
+ count = pieces[FIELDS.index(".count")]
434
+ deprecated_count = pieces[FIELDS.index(".deprecatedCount")]
435
+
436
+ # Find the right event count according to Kubernetes API and kubectl version
437
+ if count.present? && count != FIELD_EMPTY_VALUE
438
+ count # This is the default field, so let's try to use it first
439
+ elsif series.present? && series != FIELD_EMPTY_VALUE
440
+ # kubectl 1.16 uses Events/v1, which has the .series/.count field
441
+ count_regex = /count:(?<value>\S+?(?=\s))/
442
+ count_regex.match(series)['value']
443
+ elsif deprecated_count.present? && deprecated_count != FIELD_EMPTY_VALUE
444
+ # kubectl < 1.16 uses events.k8s.io/v1beta1, which has .deprecatedCount
445
+ deprecated_count
446
+ else
447
+ "1" # Fallback to 1 when all count fields are null
448
+ end
449
+ end
450
+
451
+ def self.extract_event_timestamp(pieces)
452
+ series = pieces[FIELDS.index(".series")]
453
+ last_timestamp = pieces[FIELDS.index(".lastTimestamp")]
454
+ deprecated_timestamp = pieces[FIELDS.index(".deprecatedLastTimestamp")]
455
+
456
+ # Find the right event timestamp according to Kubernetes API and kubectl version
457
+ if last_timestamp.present? && last_timestamp != FIELD_EMPTY_VALUE
458
+ last_timestamp # kubernetes 1.16 also exposes .last_timestamp field, so let's support it
459
+ elsif series.present? && series != FIELD_EMPTY_VALUE
460
+ # kubectl 1.16 uses Events/v1, which has the .series/.lastObservedTime field
461
+ timestamp_regex = /lastObservedTime:(?<value>\S+?(?=\]))/
462
+ timestamp_regex.match(series)['value']
463
+ elsif deprecated_timestamp.present? && deprecated_timestamp != FIELD_EMPTY_VALUE
464
+ # kubectl < 1.16 uses events.k8s.io/v1beta1, which has .deprecatedLastTimestamp
465
+ deprecated_timestamp
466
+ else
467
+ pieces[FIELDS.index(".eventTime")] # Fallback to eventTime when other timestamp fields are null
468
+ end
469
+ end
470
+ private_class_method :extract_event_timestamp, :extract_event_count
471
+
472
+ def initialize(subject_kind:, last_timestamp:, reason:, message:, count:, subject_name:)
473
+ @subject_kind = subject_kind
474
+ @subject_name = subject_name
475
+ @last_timestamp = Time.parse(last_timestamp)
476
+ @reason = reason
477
+ @message = message.tr("\n", '')
478
+ @count = count.to_i
479
+ end
480
+
481
+ def seen_since?(time)
482
+ time.to_i <= @last_timestamp.to_i
483
+ end
484
+
485
+ def to_s
486
+ "#{@reason}: #{@message} (#{@count} events)"
487
+ end
488
+ end
489
+
490
+ def global?
491
+ @global || self.class::GLOBAL
492
+ end
493
+
494
+ private
495
+
496
+ def validate_timeout_annotation
497
+ timeout_override_value = krane_annotation_value(TIMEOUT_OVERRIDE_ANNOTATION_SUFFIX)
498
+ timeout_annotation_key = krane_annotation_key(TIMEOUT_OVERRIDE_ANNOTATION_SUFFIX)
499
+ return if timeout_override_value.nil?
500
+
501
+ override = DurationParser.new(timeout_override_value).parse!
502
+ if override <= 0
503
+ @validation_errors << "#{timeout_annotation_key} annotation is invalid: Value must be greater than 0"
504
+ elsif override > 24.hours
505
+ @validation_errors << "#{timeout_annotation_key} annotation is invalid: Value must be less than 24h"
506
+ end
507
+ rescue DurationParser::ParsingError => e
508
+ @validation_errors << "#{timeout_annotation_key} annotation is invalid: #{e}"
509
+ end
510
+
511
+ def validate_annotation_version
512
+ return if validation_warning_msg.include?("annotations is deprecated")
513
+ annotation_keys = @definition.dig("metadata", "annotations")&.keys
514
+ annotation_keys&.each do |annotation|
515
+ if annotation.include?("kubernetes-deploy.shopify.io")
516
+ annotation_prefix = annotation.split('/').first
517
+ @validation_warnings << "#{annotation_prefix} as a prefix for annotations is deprecated: "\
518
+ "Use the 'krane.shopify.io' annotation prefix instead"
519
+ return
520
+ end
521
+ end
522
+ end
523
+
524
+ def krane_annotation_value(suffix)
525
+ @definition.dig("metadata", "annotations", "kubernetes-deploy.shopify.io/#{suffix}") ||
526
+ @definition.dig("metadata", "annotations", "krane.shopify.io/#{suffix}")
527
+ end
528
+
529
+ def krane_annotation_key(suffix)
530
+ if @definition.dig("metadata", "annotations", "kubernetes-deploy.shopify.io/#{suffix}")
531
+ "kubernetes-deploy.shopify.io/#{suffix}"
532
+ elsif @definition.dig("metadata", "annotations", "krane.shopify.io/#{suffix}")
533
+ "krane.shopify.io/#{suffix}"
534
+ end
535
+ end
536
+
537
+ def validate_selector(selector)
538
+ if labels.nil?
539
+ @validation_errors << "selector #{selector} passed in, but no labels were defined"
540
+ return
541
+ end
542
+
543
+ unless selector.to_h <= labels
544
+ label_name = 'label'.pluralize(labels.size)
545
+ label_string = LabelSelector.new(labels).to_s
546
+ @validation_errors << "selector #{selector} does not match #{label_name} #{label_string}"
547
+ end
548
+ end
549
+
550
+ def validate_spec_with_kubectl(kubectl)
551
+ err = ""
552
+ if kubectl.server_dry_run_enabled? && server_dry_runnable_resource?
553
+ _, err, st = validate_with_server_side_dry_run(kubectl)
554
+ @server_dry_run_validated = st.success?
555
+ return true if st.success?
556
+ end
557
+
558
+ if err.empty? || err.match(SERVER_DRY_RUN_DISABLED_ERROR)
559
+ _, err, st = validate_with_local_dry_run(kubectl)
560
+ end
561
+
562
+ return true if st.success?
563
+ @validation_errors << if sensitive_template_content?
564
+ "Validation for #{id} failed. Detailed information is unavailable as the raw error may contain sensitive data."
565
+ else
566
+ err
567
+ end
568
+ end
569
+
570
+ # Server side dry run is only supported on apply
571
+ def validate_with_server_side_dry_run(kubectl)
572
+ command = ["apply", "-f", file_path, "--server-dry-run", "--output=name"]
573
+ kubectl.run(*command, log_failure: false, output_is_sensitive: sensitive_template_content?,
574
+ retry_whitelist: [:client_timeout], attempts: 3)
575
+ end
576
+
577
+ # Local dry run is supported on only create and apply
578
+ # If the deploy method is create, validating with apply will fail
579
+ # If the resource template uses generateName, validating with apply will fail
580
+ def validate_with_local_dry_run(kubectl)
581
+ verb = deploy_method == :apply ? "apply" : "create"
582
+ command = [verb, "-f", file_path, "--dry-run", "--output=name"]
583
+ kubectl.run(*command, log_failure: false, output_is_sensitive: sensitive_template_content?,
584
+ retry_whitelist: [:client_timeout], attempts: 3, use_namespace: !global?)
585
+ end
586
+
587
+ def labels
588
+ @definition.dig("metadata", "labels")
589
+ end
590
+
591
+ def file
592
+ @file ||= create_definition_tempfile
593
+ end
594
+
595
+ def create_definition_tempfile
596
+ file = Tempfile.new(["#{type}-#{name}", ".yml"])
597
+ file.write(YAML.dump(@definition))
598
+ file
599
+ ensure
600
+ file&.close
601
+ end
602
+
603
+ def print_debug_logs?
604
+ false
605
+ end
606
+
607
+ def statsd_tags
608
+ status = if deploy_failed?
609
+ "failure"
610
+ elsif deploy_timed_out?
611
+ "timeout"
612
+ elsif deploy_succeeded?
613
+ "success"
614
+ else
615
+ "unknown"
616
+ end
617
+ tags = %W(context:#{context} namespace:#{namespace} type:#{type} status:#{status})
618
+ tags | @optional_statsd_tags
619
+ end
620
+ end
621
+ end