tobsch-krane 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. checksums.yaml +7 -0
  2. data/.buildkite/pipeline.nightly.yml +43 -0
  3. data/.github/probots.yml +2 -0
  4. data/.gitignore +20 -0
  5. data/.rubocop.yml +17 -0
  6. data/.shopify-build/VERSION +1 -0
  7. data/.shopify-build/kubernetes-deploy.yml +53 -0
  8. data/1.0-Upgrade.md +185 -0
  9. data/CHANGELOG.md +431 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +164 -0
  12. data/Gemfile +16 -0
  13. data/ISSUE_TEMPLATE.md +25 -0
  14. data/LICENSE.txt +21 -0
  15. data/README.md +655 -0
  16. data/Rakefile +36 -0
  17. data/bin/ci +21 -0
  18. data/bin/setup +16 -0
  19. data/bin/test +47 -0
  20. data/dev.yml +28 -0
  21. data/dev/flamegraph-from-tests +35 -0
  22. data/exe/krane +5 -0
  23. data/krane.gemspec +44 -0
  24. data/lib/krane.rb +7 -0
  25. data/lib/krane/bindings_parser.rb +88 -0
  26. data/lib/krane/cli/deploy_command.rb +75 -0
  27. data/lib/krane/cli/global_deploy_command.rb +54 -0
  28. data/lib/krane/cli/krane.rb +91 -0
  29. data/lib/krane/cli/render_command.rb +41 -0
  30. data/lib/krane/cli/restart_command.rb +34 -0
  31. data/lib/krane/cli/run_command.rb +54 -0
  32. data/lib/krane/cli/version_command.rb +13 -0
  33. data/lib/krane/cluster_resource_discovery.rb +113 -0
  34. data/lib/krane/common.rb +23 -0
  35. data/lib/krane/concerns/template_reporting.rb +29 -0
  36. data/lib/krane/concurrency.rb +18 -0
  37. data/lib/krane/container_logs.rb +106 -0
  38. data/lib/krane/deferred_summary_logging.rb +95 -0
  39. data/lib/krane/delayed_exceptions.rb +14 -0
  40. data/lib/krane/deploy_task.rb +363 -0
  41. data/lib/krane/deploy_task_config_validator.rb +29 -0
  42. data/lib/krane/duration_parser.rb +27 -0
  43. data/lib/krane/ejson_secret_provisioner.rb +154 -0
  44. data/lib/krane/errors.rb +28 -0
  45. data/lib/krane/formatted_logger.rb +57 -0
  46. data/lib/krane/global_deploy_task.rb +210 -0
  47. data/lib/krane/global_deploy_task_config_validator.rb +12 -0
  48. data/lib/krane/kubeclient_builder.rb +156 -0
  49. data/lib/krane/kubectl.rb +120 -0
  50. data/lib/krane/kubernetes_resource.rb +621 -0
  51. data/lib/krane/kubernetes_resource/cloudsql.rb +43 -0
  52. data/lib/krane/kubernetes_resource/config_map.rb +22 -0
  53. data/lib/krane/kubernetes_resource/cron_job.rb +18 -0
  54. data/lib/krane/kubernetes_resource/custom_resource.rb +87 -0
  55. data/lib/krane/kubernetes_resource/custom_resource_definition.rb +98 -0
  56. data/lib/krane/kubernetes_resource/daemon_set.rb +90 -0
  57. data/lib/krane/kubernetes_resource/deployment.rb +213 -0
  58. data/lib/krane/kubernetes_resource/horizontal_pod_autoscaler.rb +65 -0
  59. data/lib/krane/kubernetes_resource/ingress.rb +18 -0
  60. data/lib/krane/kubernetes_resource/job.rb +60 -0
  61. data/lib/krane/kubernetes_resource/network_policy.rb +22 -0
  62. data/lib/krane/kubernetes_resource/persistent_volume_claim.rb +80 -0
  63. data/lib/krane/kubernetes_resource/pod.rb +269 -0
  64. data/lib/krane/kubernetes_resource/pod_disruption_budget.rb +23 -0
  65. data/lib/krane/kubernetes_resource/pod_set_base.rb +71 -0
  66. data/lib/krane/kubernetes_resource/pod_template.rb +20 -0
  67. data/lib/krane/kubernetes_resource/replica_set.rb +92 -0
  68. data/lib/krane/kubernetes_resource/resource_quota.rb +22 -0
  69. data/lib/krane/kubernetes_resource/role.rb +22 -0
  70. data/lib/krane/kubernetes_resource/role_binding.rb +22 -0
  71. data/lib/krane/kubernetes_resource/secret.rb +24 -0
  72. data/lib/krane/kubernetes_resource/service.rb +104 -0
  73. data/lib/krane/kubernetes_resource/service_account.rb +22 -0
  74. data/lib/krane/kubernetes_resource/stateful_set.rb +70 -0
  75. data/lib/krane/label_selector.rb +42 -0
  76. data/lib/krane/oj.rb +4 -0
  77. data/lib/krane/options_helper.rb +39 -0
  78. data/lib/krane/remote_logs.rb +60 -0
  79. data/lib/krane/render_task.rb +118 -0
  80. data/lib/krane/renderer.rb +118 -0
  81. data/lib/krane/resource_cache.rb +68 -0
  82. data/lib/krane/resource_deployer.rb +265 -0
  83. data/lib/krane/resource_watcher.rb +171 -0
  84. data/lib/krane/restart_task.rb +228 -0
  85. data/lib/krane/rollout_conditions.rb +103 -0
  86. data/lib/krane/runner_task.rb +212 -0
  87. data/lib/krane/runner_task_config_validator.rb +18 -0
  88. data/lib/krane/statsd.rb +65 -0
  89. data/lib/krane/task_config.rb +22 -0
  90. data/lib/krane/task_config_validator.rb +96 -0
  91. data/lib/krane/template_sets.rb +173 -0
  92. data/lib/krane/version.rb +4 -0
  93. data/pull_request_template.md +8 -0
  94. data/screenshots/deploy-demo.gif +0 -0
  95. data/screenshots/migrate-logs.png +0 -0
  96. data/screenshots/missing-secret-fail.png +0 -0
  97. data/screenshots/success.png +0 -0
  98. data/screenshots/test-output.png +0 -0
  99. metadata +375 -0
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'krane/task_config_validator'
4
+
5
+ module Krane
6
+ class GlobalDeployTaskConfigValidator < Krane::TaskConfigValidator
7
+ def initialize(*arguments)
8
+ super(*arguments)
9
+ @validations -= [:validate_namespace_exists]
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,156 @@
1
+ # frozen_string_literal: true
2
+ require 'kubeclient'
3
+
4
+ module Krane
5
+ class KubeclientBuilder
6
+ class ContextMissingError < FatalDeploymentError
7
+ def initialize(context_name, kubeconfig)
8
+ super("`#{context_name}` context must be configured in your " \
9
+ "KUBECONFIG file(s) (#{kubeconfig.join(', ')}).")
10
+ end
11
+ end
12
+
13
+ def initialize(kubeconfig: ENV["KUBECONFIG"])
14
+ files = kubeconfig || "#{Dir.home}/.kube/config"
15
+ # Split the list by colon for Linux and Mac, and semicolon for Windows.
16
+ @kubeconfig_files = files.split(/[:;]/).map!(&:strip).reject(&:empty?)
17
+ end
18
+
19
+ def build_v1_kubeclient(context)
20
+ build_kubeclient(
21
+ api_version: "v1",
22
+ context: context
23
+ )
24
+ end
25
+
26
+ def build_v1beta1_kubeclient(context)
27
+ build_kubeclient(
28
+ api_version: "v1beta1",
29
+ context: context,
30
+ endpoint_path: "/apis/extensions/"
31
+ )
32
+ end
33
+
34
+ def build_batch_v1beta1_kubeclient(context)
35
+ build_kubeclient(
36
+ api_version: "v1beta1",
37
+ context: context,
38
+ endpoint_path: "/apis/batch/"
39
+ )
40
+ end
41
+
42
+ def build_batch_v1_kubeclient(context)
43
+ build_kubeclient(
44
+ api_version: "v1",
45
+ context: context,
46
+ endpoint_path: "/apis/batch/"
47
+ )
48
+ end
49
+
50
+ def build_policy_v1beta1_kubeclient(context)
51
+ build_kubeclient(
52
+ api_version: "v1beta1",
53
+ context: context,
54
+ endpoint_path: "/apis/policy/"
55
+ )
56
+ end
57
+
58
+ def build_apps_v1_kubeclient(context)
59
+ build_kubeclient(
60
+ api_version: "v1",
61
+ context: context,
62
+ endpoint_path: "/apis/apps"
63
+ )
64
+ end
65
+
66
+ def build_apiextensions_v1beta1_kubeclient(context)
67
+ build_kubeclient(
68
+ api_version: "v1beta1",
69
+ context: context,
70
+ endpoint_path: "/apis/apiextensions.k8s.io"
71
+ )
72
+ end
73
+
74
+ def build_autoscaling_v1_kubeclient(context)
75
+ build_kubeclient(
76
+ api_version: "v2beta1",
77
+ context: context,
78
+ endpoint_path: "/apis/autoscaling"
79
+ )
80
+ end
81
+
82
+ def build_rbac_v1_kubeclient(context)
83
+ build_kubeclient(
84
+ api_version: "v1",
85
+ context: context,
86
+ endpoint_path: "/apis/rbac.authorization.k8s.io"
87
+ )
88
+ end
89
+
90
+ def build_networking_v1_kubeclient(context)
91
+ build_kubeclient(
92
+ api_version: "v1",
93
+ context: context,
94
+ endpoint_path: "/apis/networking.k8s.io"
95
+ )
96
+ end
97
+
98
+ def build_storage_v1_kubeclient(context)
99
+ build_kubeclient(
100
+ api_version: "v1",
101
+ context: context,
102
+ endpoint_path: "/apis/storage.k8s.io"
103
+ )
104
+ end
105
+
106
+ def build_scheduling_v1beta1_kubeclient(context)
107
+ build_kubeclient(
108
+ api_version: "v1beta1",
109
+ context: context,
110
+ endpoint_path: "/apis/scheduling.k8s.io"
111
+ )
112
+ end
113
+
114
+ def validate_config_files
115
+ errors = []
116
+ if @kubeconfig_files.empty?
117
+ errors << "Kubeconfig file name(s) not set in $KUBECONFIG"
118
+ else
119
+ @kubeconfig_files.each do |f|
120
+ # If any files in the list are not valid, we can't be sure the merged context list is what the user intended
121
+ errors << "Kubeconfig not found at #{f}" unless File.file?(f)
122
+ end
123
+ end
124
+ errors
125
+ end
126
+
127
+ def validate_config_files!
128
+ errors = validate_config_files
129
+ raise TaskConfigurationError, errors.join(', ') if errors.present?
130
+ end
131
+
132
+ private
133
+
134
+ def build_kubeclient(api_version:, context:, endpoint_path: nil)
135
+ validate_config_files!
136
+ @kubeclient_configs ||= @kubeconfig_files.map { |f| Kubeclient::Config.read(f) }
137
+ # Find a context defined in kube conf files that matches the input context by name
138
+ config = @kubeclient_configs.find { |c| c.contexts.include?(context) }
139
+ raise ContextMissingError.new(context, @kubeconfig_files) unless config
140
+
141
+ kube_context = config.context(context)
142
+ client = Kubeclient::Client.new(
143
+ "#{kube_context.api_endpoint}#{endpoint_path}",
144
+ api_version,
145
+ ssl_options: kube_context.ssl_options,
146
+ auth_options: kube_context.auth_options,
147
+ timeouts: {
148
+ open: Krane::Kubectl::DEFAULT_TIMEOUT,
149
+ read: Krane::Kubectl::DEFAULT_TIMEOUT,
150
+ }
151
+ )
152
+ client.discover
153
+ client
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+ require 'open3'
3
+
4
+ module Krane
5
+ class Kubectl
6
+ ERROR_MATCHERS = {
7
+ not_found: /NotFound/,
8
+ client_timeout: /Client\.Timeout exceeded while awaiting headers/,
9
+ }
10
+ DEFAULT_TIMEOUT = 15
11
+ MAX_RETRY_DELAY = 16
12
+ SERVER_DRY_RUN_MIN_VERSION = "1.13"
13
+
14
+ class ResourceNotFoundError < StandardError; end
15
+
16
+ delegate :namespace, :context, :logger, to: :@task_config
17
+
18
+ def initialize(task_config:, log_failure_by_default:, default_timeout: DEFAULT_TIMEOUT,
19
+ output_is_sensitive_default: false)
20
+ @task_config = task_config
21
+ @log_failure_by_default = log_failure_by_default
22
+ @default_timeout = default_timeout
23
+ @output_is_sensitive_default = output_is_sensitive_default
24
+ end
25
+
26
+ def run(*args, log_failure: nil, use_context: true, use_namespace: true, output: nil,
27
+ raise_if_not_found: false, attempts: 1, output_is_sensitive: nil, retry_whitelist: nil)
28
+ raise ArgumentError, "namespace is required" if namespace.blank? && use_namespace
29
+ log_failure = @log_failure_by_default if log_failure.nil?
30
+ output_is_sensitive = @output_is_sensitive_default if output_is_sensitive.nil?
31
+ cmd = build_command_from_options(args, use_namespace, use_context, output)
32
+ out, err, st = nil
33
+
34
+ (1..attempts).to_a.each do |current_attempt|
35
+ logger.debug("Running command (attempt #{current_attempt}): #{cmd.join(' ')}")
36
+ out, err, st = Open3.capture3(*cmd)
37
+ logger.debug("Kubectl out: " + out.gsub(/\s+/, ' ')) unless output_is_sensitive
38
+
39
+ break if st.success?
40
+ raise(ResourceNotFoundError, err) if err.match(ERROR_MATCHERS[:not_found]) && raise_if_not_found
41
+
42
+ if log_failure
43
+ warning = if current_attempt == attempts
44
+ "The following command failed (attempt #{current_attempt}/#{attempts})"
45
+ elsif retriable_err?(err, retry_whitelist)
46
+ "The following command failed and will be retried (attempt #{current_attempt}/#{attempts})"
47
+ else
48
+ "The following command failed and cannot be retried"
49
+ end
50
+ logger.warn("#{warning}: #{Shellwords.join(cmd)}")
51
+ logger.warn(err) unless output_is_sensitive
52
+ else
53
+ logger.debug("Kubectl err: #{output_is_sensitive ? '<suppressed sensitive output>' : err}")
54
+ end
55
+ StatsD.client.increment('kubectl.error', 1, tags: { context: context, namespace: namespace, cmd: cmd[1] })
56
+
57
+ break unless retriable_err?(err, retry_whitelist) && current_attempt < attempts
58
+ sleep(retry_delay(current_attempt))
59
+ end
60
+
61
+ [out.chomp, err.chomp, st]
62
+ end
63
+
64
+ def retry_delay(attempt)
65
+ # exponential backoff starting at 1s with cap at 16s, offset by up to 0.5s
66
+ [2**(attempt - 1), MAX_RETRY_DELAY].min - Random.rand(0.5).round(1)
67
+ end
68
+
69
+ def version_info
70
+ @version_info ||=
71
+ begin
72
+ response, _, status = run("version", use_namespace: false, log_failure: true)
73
+ raise KubectlError, "Could not retrieve kubectl version info" unless status.success?
74
+ extract_version_info_from_kubectl_response(response)
75
+ end
76
+ end
77
+
78
+ def client_version
79
+ version_info[:client]
80
+ end
81
+
82
+ def server_version
83
+ version_info[:server]
84
+ end
85
+
86
+ def server_dry_run_enabled?
87
+ server_version >= Gem::Version.new(SERVER_DRY_RUN_MIN_VERSION)
88
+ end
89
+
90
+ private
91
+
92
+ def build_command_from_options(args, use_namespace, use_context, output)
93
+ cmd = ["kubectl"] + args
94
+ cmd.push("--namespace=#{namespace}") if use_namespace
95
+ cmd.push("--context=#{context}") if use_context
96
+ cmd.push("--output=#{output}") if output
97
+ cmd.push("--request-timeout=#{@default_timeout}") if @default_timeout
98
+ cmd
99
+ end
100
+
101
+ def retriable_err?(err, retry_whitelist)
102
+ return !err.match(ERROR_MATCHERS[:not_found]) if retry_whitelist.nil?
103
+ retry_whitelist.any? do |retriable|
104
+ raise NotImplementedError, "No matcher defined for #{retriable.inspect}" unless ERROR_MATCHERS.key?(retriable)
105
+ err.match(ERROR_MATCHERS[retriable])
106
+ end
107
+ end
108
+
109
+ def extract_version_info_from_kubectl_response(response)
110
+ info = {}
111
+ response.each_line do |l|
112
+ match = l.match(/^(?<kind>Client|Server).* GitVersion:"v(?<version>\d+\.\d+\.\d+)/)
113
+ if match
114
+ info[match[:kind].downcase.to_sym] = Gem::Version.new(match[:version])
115
+ end
116
+ end
117
+ info
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,621 @@
1
+ # frozen_string_literal: true
2
+ require 'json'
3
+ require 'shellwords'
4
+
5
+ require 'krane/remote_logs'
6
+ require 'krane/duration_parser'
7
+ require 'krane/label_selector'
8
+ require 'krane/rollout_conditions'
9
+
10
+ module Krane
11
+ class KubernetesResource
12
+ attr_reader :name, :namespace, :context
13
+ attr_writer :type, :deploy_started_at, :global
14
+
15
+ GLOBAL = false
16
+ TIMEOUT = 5.minutes
17
+ LOG_LINE_COUNT = 250
18
+ SERVER_DRY_RUN_DISABLED_ERROR =
19
+ /(unknown flag: --server-dry-run)|(doesn't support dry-run)|(dryRun alpha feature is disabled)/
20
+
21
+ DISABLE_FETCHING_LOG_INFO = 'DISABLE_FETCHING_LOG_INFO'
22
+ DISABLE_FETCHING_EVENT_INFO = 'DISABLE_FETCHING_EVENT_INFO'
23
+ DISABLED_LOG_INFO_MESSAGE = "collection is disabled by the #{DISABLE_FETCHING_LOG_INFO} env var."
24
+ DISABLED_EVENT_INFO_MESSAGE = "collection is disabled by the #{DISABLE_FETCHING_EVENT_INFO} env var."
25
+ DEBUG_RESOURCE_NOT_FOUND_MESSAGE = "None found. Please check your usual logging service (e.g. Splunk)."
26
+ UNUSUAL_FAILURE_MESSAGE = <<~MSG
27
+ It is very unusual for this resource type to fail to deploy. Please try the deploy again.
28
+ If that new deploy also fails, contact your cluster administrator.
29
+ MSG
30
+ STANDARD_TIMEOUT_MESSAGE = <<~MSG
31
+ Kubernetes will continue to attempt to deploy this resource in the cluster, but at this point it is considered unlikely that it will succeed.
32
+ If you have reason to believe it will succeed, retry the deploy to continue to monitor the rollout.
33
+ MSG
34
+
35
+ TIMEOUT_OVERRIDE_ANNOTATION_SUFFIX = "timeout-override"
36
+ TIMEOUT_OVERRIDE_ANNOTATION_DEPRECATED = "kubernetes-deploy.shopify.io/#{TIMEOUT_OVERRIDE_ANNOTATION_SUFFIX}"
37
+ TIMEOUT_OVERRIDE_ANNOTATION = "krane.shopify.io/#{TIMEOUT_OVERRIDE_ANNOTATION_SUFFIX}"
38
+ LAST_APPLIED_ANNOTATION = "kubectl.kubernetes.io/last-applied-configuration"
39
+ SENSITIVE_TEMPLATE_CONTENT = false
40
+ SERVER_DRY_RUNNABLE = false
41
+
42
+ class << self
43
+ def build(namespace: nil, context:, definition:, logger:, statsd_tags:, crd: nil, global_names: [])
44
+ validate_definition_essentials(definition)
45
+ opts = { namespace: namespace, context: context, definition: definition, logger: logger,
46
+ statsd_tags: statsd_tags }
47
+ if (klass = class_for_kind(definition["kind"]))
48
+ return klass.new(**opts)
49
+ end
50
+ if crd
51
+ CustomResource.new(crd: crd, **opts)
52
+ else
53
+ type = definition["kind"]
54
+ inst = new(**opts)
55
+ inst.type = type
56
+ inst.global = global_names.map(&:downcase).include?(type.downcase)
57
+ inst
58
+ end
59
+ end
60
+
61
+ def class_for_kind(kind)
62
+ if Krane.const_defined?(kind)
63
+ Krane.const_get(kind)
64
+ end
65
+ rescue NameError
66
+ nil
67
+ end
68
+
69
+ def timeout
70
+ self::TIMEOUT
71
+ end
72
+
73
+ def kind
74
+ name.demodulize
75
+ end
76
+
77
+ private
78
+
79
+ def validate_definition_essentials(definition)
80
+ debug_content = <<~STRING
81
+ apiVersion: #{definition.fetch('apiVersion', '<missing>')}
82
+ kind: #{definition.fetch('kind', '<missing>')}
83
+ metadata: #{definition.fetch('metadata', {})}
84
+ <Template body suppressed because content sensitivity could not be determined.>
85
+ STRING
86
+ if definition["kind"].blank?
87
+ raise InvalidTemplateError.new("Template is missing required field 'kind'", content: debug_content)
88
+ end
89
+
90
+ if definition.dig('metadata', 'name').blank? && definition.dig('metadata', 'generateName').blank?
91
+ raise InvalidTemplateError.new("Template must specify one of 'metadata.name' or 'metadata.generateName'",
92
+ content: debug_content)
93
+ end
94
+ end
95
+ end
96
+
97
+ def timeout
98
+ return timeout_override if timeout_override.present?
99
+ self.class.timeout
100
+ end
101
+
102
+ def timeout_override
103
+ return @timeout_override if defined?(@timeout_override)
104
+
105
+ @timeout_override = DurationParser.new(krane_annotation_value(TIMEOUT_OVERRIDE_ANNOTATION_SUFFIX)).parse!.to_i
106
+ rescue DurationParser::ParsingError
107
+ @timeout_override = nil
108
+ end
109
+
110
+ def pretty_timeout_type
111
+ "timeout: #{timeout}s"
112
+ end
113
+
114
+ def initialize(namespace:, context:, definition:, logger:, statsd_tags: [])
115
+ # subclasses must also set these if they define their own initializer
116
+ @name = (definition.dig("metadata", "name") || definition.dig("metadata", "generateName")).to_s
117
+ @optional_statsd_tags = statsd_tags
118
+ @namespace = namespace
119
+ @context = context
120
+ @logger = logger
121
+ @definition = definition
122
+ @statsd_report_done = false
123
+ @disappeared = false
124
+ @validation_errors = []
125
+ @validation_warnings = []
126
+ @instance_data = {}
127
+ @server_dry_run_validated = false
128
+ end
129
+
130
+ def to_kubeclient_resource
131
+ Kubeclient::Resource.new(@definition)
132
+ end
133
+
134
+ def validate_definition(kubectl, selector: nil)
135
+ @validation_errors = []
136
+ @validation_warnings = []
137
+ validate_selector(selector) if selector
138
+ validate_timeout_annotation
139
+ validate_annotation_version
140
+ validate_spec_with_kubectl(kubectl)
141
+ @validation_errors.present?
142
+ end
143
+
144
+ def validation_warning_msg
145
+ @validation_warnings.join("\n")
146
+ end
147
+
148
+ def has_warnings?
149
+ @validation_warnings.present?
150
+ end
151
+
152
+ def validation_error_msg
153
+ @validation_errors.join("\n")
154
+ end
155
+
156
+ def validation_failed?
157
+ @validation_errors.present?
158
+ end
159
+
160
+ def id
161
+ "#{type}/#{name}"
162
+ end
163
+
164
+ def <=>(other)
165
+ id <=> other.id
166
+ end
167
+
168
+ def file_path
169
+ file.path
170
+ end
171
+
172
+ def sync(cache)
173
+ @instance_data = cache.get_instance(kubectl_resource_type, name, raise_if_not_found: true)
174
+ rescue Krane::Kubectl::ResourceNotFoundError
175
+ @disappeared = true if deploy_started?
176
+ @instance_data = {}
177
+ end
178
+
179
+ def after_sync
180
+ end
181
+
182
+ def terminating?
183
+ @instance_data.dig('metadata', 'deletionTimestamp').present?
184
+ end
185
+
186
+ def disappeared?
187
+ @disappeared
188
+ end
189
+
190
+ def deploy_failed?
191
+ false
192
+ end
193
+
194
+ def deploy_started?
195
+ @deploy_started_at.present?
196
+ end
197
+
198
+ def deploy_succeeded?
199
+ return false unless deploy_started?
200
+ unless @success_assumption_warning_shown
201
+ @logger.warn("Don't know how to monitor resources of type #{type}. Assuming #{id} deployed successfully.")
202
+ @success_assumption_warning_shown = true
203
+ end
204
+ true
205
+ end
206
+
207
+ def exists?
208
+ @instance_data.present?
209
+ end
210
+
211
+ def current_generation
212
+ return -1 unless exists? # must be different default than observed_generation
213
+ @instance_data.dig("metadata", "generation")
214
+ end
215
+
216
+ def observed_generation
217
+ return -2 unless exists?
218
+ # populating this is a best practice, but not all controllers actually do it
219
+ @instance_data.dig('status', 'observedGeneration')
220
+ end
221
+
222
+ def status
223
+ exists? ? "Exists" : "Not Found"
224
+ end
225
+
226
+ def type
227
+ @type || self.class.kind
228
+ end
229
+
230
+ def kubectl_resource_type
231
+ type
232
+ end
233
+
234
+ def deploy_timed_out?
235
+ return false unless deploy_started?
236
+ !deploy_succeeded? && !deploy_failed? && (Time.now.utc - @deploy_started_at > timeout)
237
+ end
238
+
239
+ # Expected values: :apply, :create, :replace, :replace_force
240
+ def deploy_method
241
+ if @definition.dig("metadata", "name").blank? && uses_generate_name?
242
+ :create
243
+ else
244
+ :apply
245
+ end
246
+ end
247
+
248
+ def sync_debug_info(kubectl)
249
+ @debug_events = fetch_events(kubectl) unless ENV[DISABLE_FETCHING_EVENT_INFO]
250
+ @debug_logs = fetch_debug_logs if print_debug_logs? && !ENV[DISABLE_FETCHING_LOG_INFO]
251
+ end
252
+
253
+ def debug_message(cause = nil, info_hash = {})
254
+ helpful_info = []
255
+ if cause == :gave_up
256
+ debug_heading = ColorizedString.new("#{id}: GLOBAL WATCH TIMEOUT (#{info_hash[:timeout]} seconds)").yellow
257
+ helpful_info << "If you expected it to take longer than #{info_hash[:timeout]} seconds for your deploy"\
258
+ " to roll out, increase --max-watch-seconds."
259
+ elsif deploy_failed?
260
+ debug_heading = ColorizedString.new("#{id}: FAILED").red
261
+ helpful_info << failure_message if failure_message.present?
262
+ elsif deploy_timed_out?
263
+ debug_heading = ColorizedString.new("#{id}: TIMED OUT (#{pretty_timeout_type})").yellow
264
+ helpful_info << timeout_message if timeout_message.present?
265
+ else
266
+ # Arriving in debug_message when we neither failed nor timed out is very unexpected. Dump all available info.
267
+ debug_heading = ColorizedString.new("#{id}: MONITORING ERROR").red
268
+ helpful_info << failure_message if failure_message.present?
269
+ helpful_info << timeout_message if timeout_message.present? && timeout_message != STANDARD_TIMEOUT_MESSAGE
270
+ end
271
+
272
+ final_status = " - Final status: #{status}"
273
+ final_status = "\n#{final_status}" if helpful_info.present? && !helpful_info.last.end_with?("\n")
274
+ helpful_info.prepend(debug_heading)
275
+ helpful_info << final_status
276
+
277
+ if @debug_events.present?
278
+ helpful_info << " - Events (common success events excluded):"
279
+ @debug_events.each do |identifier, event_hashes|
280
+ event_hashes.each { |event| helpful_info << " [#{identifier}]\t#{event}" }
281
+ end
282
+ elsif ENV[DISABLE_FETCHING_EVENT_INFO]
283
+ helpful_info << " - Events: #{DISABLED_EVENT_INFO_MESSAGE}"
284
+ else
285
+ helpful_info << " - Events: #{DEBUG_RESOURCE_NOT_FOUND_MESSAGE}"
286
+ end
287
+
288
+ if print_debug_logs?
289
+ if ENV[DISABLE_FETCHING_LOG_INFO]
290
+ helpful_info << " - Logs: #{DISABLED_LOG_INFO_MESSAGE}"
291
+ elsif @debug_logs.blank?
292
+ helpful_info << " - Logs: #{DEBUG_RESOURCE_NOT_FOUND_MESSAGE}"
293
+ else
294
+ container_logs = @debug_logs.container_logs.sort_by { |c| c.lines.length }
295
+ container_logs.each do |logs|
296
+ if logs.empty?
297
+ helpful_info << " - Logs from container '#{logs.container_name}': #{DEBUG_RESOURCE_NOT_FOUND_MESSAGE}"
298
+ next
299
+ end
300
+
301
+ if logs.lines.length == ContainerLogs::DEFAULT_LINE_LIMIT
302
+ truncated = " (last #{ContainerLogs::DEFAULT_LINE_LIMIT} lines shown)"
303
+ end
304
+ helpful_info << " - Logs from container '#{logs.container_name}'#{truncated}:"
305
+ logs.lines.each do |line|
306
+ helpful_info << " #{line}"
307
+ end
308
+ end
309
+ end
310
+ end
311
+
312
+ helpful_info.join("\n")
313
+ end
314
+
315
+ # Returns a hash in the following format:
316
+ # {
317
+ # "pod/web-1" => [
318
+ # "Pulling: pulling image "hello-world:latest" (1 events)",
319
+ # "Pulled: Successfully pulled image "hello-world:latest" (1 events)"
320
+ # ]
321
+ # }
322
+ def fetch_events(kubectl)
323
+ return {} unless exists?
324
+ out, _err, st = kubectl.run("get", "events", "--output=go-template=#{Event.go_template_for(type, name)}",
325
+ log_failure: false, use_namespace: !global?)
326
+ return {} unless st.success?
327
+
328
+ event_collector = Hash.new { |hash, key| hash[key] = [] }
329
+ Event.extract_all_from_go_template_blob(out).each_with_object(event_collector) do |candidate, events|
330
+ events[id] << candidate.to_s if candidate.seen_since?(@deploy_started_at - 5.seconds)
331
+ end
332
+ end
333
+
334
+ def timeout_message
335
+ STANDARD_TIMEOUT_MESSAGE
336
+ end
337
+
338
+ def failure_message
339
+ end
340
+
341
+ def pretty_status
342
+ padding = " " * [50 - id.length, 1].max
343
+ "#{id}#{padding}#{status}"
344
+ end
345
+
346
+ def report_status_to_statsd(watch_time)
347
+ unless @statsd_report_done
348
+ StatsD.client.distribution('resource.duration', watch_time, tags: statsd_tags)
349
+ @statsd_report_done = true
350
+ end
351
+ end
352
+
353
+ def sensitive_template_content?
354
+ self.class::SENSITIVE_TEMPLATE_CONTENT
355
+ end
356
+
357
+ def server_dry_runnable_resource?
358
+ # generateName and server-side dry run are incompatible because the former only works with `create`
359
+ # and the latter only works with `apply`
360
+ self.class::SERVER_DRY_RUNNABLE && !uses_generate_name?
361
+ end
362
+
363
+ def uses_generate_name?
364
+ @definition.dig('metadata', 'generateName').present?
365
+ end
366
+
367
+ def server_dry_run_validated?
368
+ @server_dry_run_validated
369
+ end
370
+
371
+ # If a resource uses generateName, we don't know the full name of the resource until it's deployed to the cluster.
372
+ # In this case, we need to update our local definition with the realized name in order to accurately track the
373
+ # resource during deploy
374
+ def use_generated_name(instance_data)
375
+ @name = instance_data.dig('metadata', 'name')
376
+ @definition['metadata']['name'] = @name
377
+ @definition['metadata'].delete('generateName')
378
+ @file = create_definition_tempfile
379
+ end
380
+
381
+ class Event
382
+ EVENT_SEPARATOR = "ENDEVENT--BEGINEVENT"
383
+ FIELD_SEPARATOR = "ENDFIELD--BEGINFIELD"
384
+ FIELDS = %w(
385
+ .involvedObject.kind
386
+ .involvedObject.name
387
+ .count
388
+ .lastTimestamp
389
+ .reason
390
+ .message
391
+ .eventTime
392
+ .deprecatedCount
393
+ .deprecatedLastTimestamp
394
+ .series
395
+ )
396
+ FIELD_EMPTY_VALUE = '<no value>'
397
+
398
+ def self.go_template_for(kind, name)
399
+ and_conditions = [
400
+ %[(eq .involvedObject.kind "#{kind}")],
401
+ %[(eq .involvedObject.name "#{name}")],
402
+ '(ne .reason "Started")',
403
+ '(ne .reason "Created")',
404
+ '(ne .reason "SuccessfulCreate")',
405
+ '(ne .reason "Scheduled")',
406
+ '(ne .reason "Pulling")',
407
+ '(ne .reason "Pulled")',
408
+ ]
409
+ condition_start = "{{if and #{and_conditions.join(' ')}}}"
410
+ field_part = FIELDS.map { |f| "{{#{f}}}" }.join(%({{print "#{FIELD_SEPARATOR}"}}))
411
+ %({{range .items}}#{condition_start}#{field_part}{{print "#{EVENT_SEPARATOR}"}}{{end}}{{end}})
412
+ end
413
+
414
+ def self.extract_all_from_go_template_blob(blob)
415
+ blob.split(EVENT_SEPARATOR).map do |event_blob|
416
+ pieces = event_blob.split(FIELD_SEPARATOR, FIELDS.length)
417
+ count = extract_event_count(pieces)
418
+ timestamp = extract_event_timestamp(pieces)
419
+
420
+ new(
421
+ subject_kind: pieces[FIELDS.index(".involvedObject.kind")],
422
+ subject_name: pieces[FIELDS.index(".involvedObject.name")],
423
+ count: count,
424
+ last_timestamp: timestamp,
425
+ reason: pieces[FIELDS.index(".reason")],
426
+ message: pieces[FIELDS.index(".message")]
427
+ )
428
+ end
429
+ end
430
+
431
+ def self.extract_event_count(pieces)
432
+ series = pieces[FIELDS.index(".series")]
433
+ count = pieces[FIELDS.index(".count")]
434
+ deprecated_count = pieces[FIELDS.index(".deprecatedCount")]
435
+
436
+ # Find the right event count according to Kubernetes API and kubectl version
437
+ if count.present? && count != FIELD_EMPTY_VALUE
438
+ count # This is the default field, so let's try to use it first
439
+ elsif series.present? && series != FIELD_EMPTY_VALUE
440
+ # kubectl 1.16 uses Events/v1, which has the .series/.count field
441
+ count_regex = /count:(?<value>\S+?(?=\s))/
442
+ count_regex.match(series)['value']
443
+ elsif deprecated_count.present? && deprecated_count != FIELD_EMPTY_VALUE
444
+ # kubectl < 1.16 uses events.k8s.io/v1beta1, which has .deprecatedCount
445
+ deprecated_count
446
+ else
447
+ "1" # Fallback to 1 when all count fields are null
448
+ end
449
+ end
450
+
451
+ def self.extract_event_timestamp(pieces)
452
+ series = pieces[FIELDS.index(".series")]
453
+ last_timestamp = pieces[FIELDS.index(".lastTimestamp")]
454
+ deprecated_timestamp = pieces[FIELDS.index(".deprecatedLastTimestamp")]
455
+
456
+ # Find the right event timestamp according to Kubernetes API and kubectl version
457
+ if last_timestamp.present? && last_timestamp != FIELD_EMPTY_VALUE
458
+ last_timestamp # kubernetes 1.16 also exposes .last_timestamp field, so let's support it
459
+ elsif series.present? && series != FIELD_EMPTY_VALUE
460
+ # kubectl 1.16 uses Events/v1, which has the .series/.lastObservedTime field
461
+ timestamp_regex = /lastObservedTime:(?<value>\S+?(?=\]))/
462
+ timestamp_regex.match(series)['value']
463
+ elsif deprecated_timestamp.present? && deprecated_timestamp != FIELD_EMPTY_VALUE
464
+ # kubectl < 1.16 uses events.k8s.io/v1beta1, which has .deprecatedLastTimestamp
465
+ deprecated_timestamp
466
+ else
467
+ pieces[FIELDS.index(".eventTime")] # Fallback to eventTime when other timestamp fields are null
468
+ end
469
+ end
470
+ private_class_method :extract_event_timestamp, :extract_event_count
471
+
472
+ def initialize(subject_kind:, last_timestamp:, reason:, message:, count:, subject_name:)
473
+ @subject_kind = subject_kind
474
+ @subject_name = subject_name
475
+ @last_timestamp = Time.parse(last_timestamp)
476
+ @reason = reason
477
+ @message = message.tr("\n", '')
478
+ @count = count.to_i
479
+ end
480
+
481
+ def seen_since?(time)
482
+ time.to_i <= @last_timestamp.to_i
483
+ end
484
+
485
+ def to_s
486
+ "#{@reason}: #{@message} (#{@count} events)"
487
+ end
488
+ end
489
+
490
+ def global?
491
+ @global || self.class::GLOBAL
492
+ end
493
+
494
+ private
495
+
496
+ def validate_timeout_annotation
497
+ timeout_override_value = krane_annotation_value(TIMEOUT_OVERRIDE_ANNOTATION_SUFFIX)
498
+ timeout_annotation_key = krane_annotation_key(TIMEOUT_OVERRIDE_ANNOTATION_SUFFIX)
499
+ return if timeout_override_value.nil?
500
+
501
+ override = DurationParser.new(timeout_override_value).parse!
502
+ if override <= 0
503
+ @validation_errors << "#{timeout_annotation_key} annotation is invalid: Value must be greater than 0"
504
+ elsif override > 24.hours
505
+ @validation_errors << "#{timeout_annotation_key} annotation is invalid: Value must be less than 24h"
506
+ end
507
+ rescue DurationParser::ParsingError => e
508
+ @validation_errors << "#{timeout_annotation_key} annotation is invalid: #{e}"
509
+ end
510
+
511
+ def validate_annotation_version
512
+ return if validation_warning_msg.include?("annotations is deprecated")
513
+ annotation_keys = @definition.dig("metadata", "annotations")&.keys
514
+ annotation_keys&.each do |annotation|
515
+ if annotation.include?("kubernetes-deploy.shopify.io")
516
+ annotation_prefix = annotation.split('/').first
517
+ @validation_warnings << "#{annotation_prefix} as a prefix for annotations is deprecated: "\
518
+ "Use the 'krane.shopify.io' annotation prefix instead"
519
+ return
520
+ end
521
+ end
522
+ end
523
+
524
+ def krane_annotation_value(suffix)
525
+ @definition.dig("metadata", "annotations", "kubernetes-deploy.shopify.io/#{suffix}") ||
526
+ @definition.dig("metadata", "annotations", "krane.shopify.io/#{suffix}")
527
+ end
528
+
529
+ def krane_annotation_key(suffix)
530
+ if @definition.dig("metadata", "annotations", "kubernetes-deploy.shopify.io/#{suffix}")
531
+ "kubernetes-deploy.shopify.io/#{suffix}"
532
+ elsif @definition.dig("metadata", "annotations", "krane.shopify.io/#{suffix}")
533
+ "krane.shopify.io/#{suffix}"
534
+ end
535
+ end
536
+
537
+ def validate_selector(selector)
538
+ if labels.nil?
539
+ @validation_errors << "selector #{selector} passed in, but no labels were defined"
540
+ return
541
+ end
542
+
543
+ unless selector.to_h <= labels
544
+ label_name = 'label'.pluralize(labels.size)
545
+ label_string = LabelSelector.new(labels).to_s
546
+ @validation_errors << "selector #{selector} does not match #{label_name} #{label_string}"
547
+ end
548
+ end
549
+
550
+ def validate_spec_with_kubectl(kubectl)
551
+ err = ""
552
+ if kubectl.server_dry_run_enabled? && server_dry_runnable_resource?
553
+ _, err, st = validate_with_server_side_dry_run(kubectl)
554
+ @server_dry_run_validated = st.success?
555
+ return true if st.success?
556
+ end
557
+
558
+ if err.empty? || err.match(SERVER_DRY_RUN_DISABLED_ERROR)
559
+ _, err, st = validate_with_local_dry_run(kubectl)
560
+ end
561
+
562
+ return true if st.success?
563
+ @validation_errors << if sensitive_template_content?
564
+ "Validation for #{id} failed. Detailed information is unavailable as the raw error may contain sensitive data."
565
+ else
566
+ err
567
+ end
568
+ end
569
+
570
+ # Server side dry run is only supported on apply
571
+ def validate_with_server_side_dry_run(kubectl)
572
+ command = ["apply", "-f", file_path, "--server-dry-run", "--output=name"]
573
+ kubectl.run(*command, log_failure: false, output_is_sensitive: sensitive_template_content?,
574
+ retry_whitelist: [:client_timeout], attempts: 3)
575
+ end
576
+
577
+ # Local dry run is supported on only create and apply
578
+ # If the deploy method is create, validating with apply will fail
579
+ # If the resource template uses generateName, validating with apply will fail
580
+ def validate_with_local_dry_run(kubectl)
581
+ verb = deploy_method == :apply ? "apply" : "create"
582
+ command = [verb, "-f", file_path, "--dry-run", "--output=name"]
583
+ kubectl.run(*command, log_failure: false, output_is_sensitive: sensitive_template_content?,
584
+ retry_whitelist: [:client_timeout], attempts: 3, use_namespace: !global?)
585
+ end
586
+
587
+ def labels
588
+ @definition.dig("metadata", "labels")
589
+ end
590
+
591
+ def file
592
+ @file ||= create_definition_tempfile
593
+ end
594
+
595
+ def create_definition_tempfile
596
+ file = Tempfile.new(["#{type}-#{name}", ".yml"])
597
+ file.write(YAML.dump(@definition))
598
+ file
599
+ ensure
600
+ file&.close
601
+ end
602
+
603
+ def print_debug_logs?
604
+ false
605
+ end
606
+
607
+ def statsd_tags
608
+ status = if deploy_failed?
609
+ "failure"
610
+ elsif deploy_timed_out?
611
+ "timeout"
612
+ elsif deploy_succeeded?
613
+ "success"
614
+ else
615
+ "unknown"
616
+ end
617
+ tags = %W(context:#{context} namespace:#{namespace} type:#{type} status:#{status})
618
+ tags | @optional_statsd_tags
619
+ end
620
+ end
621
+ end