kubernetes-deploy 0.29.0 → 1.0.0.pre.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.buildkite/pipeline.nightly.yml +7 -0
- data/.rubocop.yml +0 -12
- data/.shopify-build/{kubernetes-deploy.yml → krane.yml} +8 -2
- data/1.0-Upgrade.md +109 -0
- data/CHANGELOG.md +60 -0
- data/CONTRIBUTING.md +2 -2
- data/Gemfile +1 -0
- data/README.md +86 -2
- data/dev.yml +3 -1
- data/dev/flamegraph-from-tests +1 -1
- data/exe/kubernetes-deploy +12 -9
- data/exe/kubernetes-render +9 -7
- data/exe/kubernetes-restart +3 -3
- data/exe/kubernetes-run +1 -1
- data/kubernetes-deploy.gemspec +5 -5
- data/lib/krane.rb +5 -3
- data/lib/{kubernetes-deploy → krane}/bindings_parser.rb +1 -1
- data/lib/krane/cli/deploy_command.rb +25 -13
- data/lib/krane/cli/global_deploy_command.rb +55 -0
- data/lib/krane/cli/krane.rb +12 -3
- data/lib/krane/cli/render_command.rb +19 -9
- data/lib/krane/cli/restart_command.rb +4 -4
- data/lib/krane/cli/run_command.rb +4 -4
- data/lib/krane/cli/version_command.rb +1 -1
- data/lib/krane/cluster_resource_discovery.rb +113 -0
- data/lib/{kubernetes-deploy → krane}/common.rb +8 -9
- data/lib/krane/concerns/template_reporting.rb +29 -0
- data/lib/{kubernetes-deploy → krane}/concurrency.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/container_logs.rb +3 -2
- data/lib/{kubernetes-deploy → krane}/deferred_summary_logging.rb +2 -2
- data/lib/{kubernetes-deploy → krane}/delayed_exceptions.rb +0 -0
- data/lib/krane/deploy_task.rb +16 -0
- data/lib/krane/deploy_task_config_validator.rb +29 -0
- data/lib/krane/deprecated_deploy_task.rb +404 -0
- data/lib/{kubernetes-deploy → krane}/duration_parser.rb +1 -3
- data/lib/{kubernetes-deploy → krane}/ejson_secret_provisioner.rb +10 -13
- data/lib/krane/errors.rb +28 -0
- data/lib/{kubernetes-deploy → krane}/formatted_logger.rb +2 -2
- data/lib/krane/global_deploy_task.rb +210 -0
- data/lib/krane/global_deploy_task_config_validator.rb +12 -0
- data/lib/{kubernetes-deploy → krane}/kubeclient_builder.rb +13 -5
- data/lib/{kubernetes-deploy → krane}/kubectl.rb +14 -16
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource.rb +110 -27
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/cloudsql.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/config_map.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/cron_job.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/custom_resource.rb +2 -2
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/custom_resource_definition.rb +1 -5
- data/lib/krane/kubernetes_resource/daemon_set.rb +90 -0
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/deployment.rb +2 -2
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/horizontal_pod_autoscaler.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/ingress.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/job.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/network_policy.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/persistent_volume_claim.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/pod.rb +6 -2
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/pod_disruption_budget.rb +2 -2
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/pod_set_base.rb +3 -3
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/pod_template.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/replica_set.rb +2 -2
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/resource_quota.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/role.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/role_binding.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/secret.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/service.rb +2 -2
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/service_account.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/kubernetes_resource/stateful_set.rb +2 -2
- data/lib/{kubernetes-deploy → krane}/label_selector.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/oj.rb +0 -0
- data/lib/{kubernetes-deploy → krane}/options_helper.rb +2 -2
- data/lib/{kubernetes-deploy → krane}/remote_logs.rb +2 -2
- data/lib/krane/render_task.rb +149 -0
- data/lib/{kubernetes-deploy → krane}/renderer.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/resource_cache.rb +10 -9
- data/lib/krane/resource_deployer.rb +265 -0
- data/lib/{kubernetes-deploy → krane}/resource_watcher.rb +24 -25
- data/lib/krane/restart_task.rb +228 -0
- data/lib/{kubernetes-deploy → krane}/rollout_conditions.rb +1 -1
- data/lib/krane/runner_task.rb +212 -0
- data/lib/{kubernetes-deploy → krane}/runner_task_config_validator.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/statsd.rb +13 -27
- data/lib/krane/task_config.rb +22 -0
- data/lib/{kubernetes-deploy → krane}/task_config_validator.rb +1 -1
- data/lib/{kubernetes-deploy → krane}/template_sets.rb +5 -5
- data/lib/krane/version.rb +4 -0
- data/lib/kubernetes-deploy/deploy_task.rb +6 -608
- data/lib/kubernetes-deploy/errors.rb +1 -26
- data/lib/kubernetes-deploy/render_task.rb +5 -122
- data/lib/kubernetes-deploy/rescue_krane_exceptions.rb +18 -0
- data/lib/kubernetes-deploy/restart_task.rb +6 -198
- data/lib/kubernetes-deploy/runner_task.rb +6 -184
- metadata +96 -70
- data/lib/kubernetes-deploy/cluster_resource_discovery.rb +0 -34
- data/lib/kubernetes-deploy/kubernetes_resource/daemon_set.rb +0 -54
- data/lib/kubernetes-deploy/task_config.rb +0 -16
- data/lib/kubernetes-deploy/version.rb +0 -4
@@ -1,23 +1,22 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require '
|
4
|
-
require '
|
3
|
+
require 'krane/concurrency'
|
4
|
+
require 'krane/resource_cache'
|
5
5
|
|
6
|
-
module
|
6
|
+
module Krane
|
7
7
|
class ResourceWatcher
|
8
|
-
extend
|
8
|
+
extend Krane::StatsD::MeasureMethods
|
9
|
+
delegate :namespace, :context, :logger, to: :@task_config
|
9
10
|
|
10
|
-
def initialize(resources:,
|
11
|
-
|
11
|
+
def initialize(resources:, task_config:, deploy_started_at: Time.now.utc,
|
12
|
+
operation_name: "deploy", timeout: nil, sha: nil)
|
12
13
|
unless resources.is_a?(Enumerable)
|
13
14
|
raise ArgumentError, <<~MSG
|
14
15
|
ResourceWatcher expects Enumerable collection, got `#{resources.class}` instead
|
15
16
|
MSG
|
16
17
|
end
|
17
18
|
@resources = resources
|
18
|
-
@
|
19
|
-
@namespace = namespace
|
20
|
-
@context = context
|
19
|
+
@task_config = task_config
|
21
20
|
@deploy_started_at = deploy_started_at
|
22
21
|
@operation_name = operation_name
|
23
22
|
@timeout = timeout
|
@@ -53,16 +52,16 @@ module KubernetesDeploy
|
|
53
52
|
private
|
54
53
|
|
55
54
|
def sync_resources(resources)
|
56
|
-
cache = ResourceCache.new(@
|
57
|
-
|
55
|
+
cache = ResourceCache.new(@task_config)
|
56
|
+
Krane::Concurrency.split_across_threads(resources) { |r| r.sync(cache) }
|
58
57
|
resources.each(&:after_sync)
|
59
58
|
end
|
60
59
|
measure_method(:sync_resources, "sync.duration")
|
61
60
|
|
62
61
|
def statsd_tags
|
63
62
|
{
|
64
|
-
namespace:
|
65
|
-
context:
|
63
|
+
namespace: namespace,
|
64
|
+
context: context,
|
66
65
|
sha: @sha,
|
67
66
|
}
|
68
67
|
end
|
@@ -83,18 +82,18 @@ module KubernetesDeploy
|
|
83
82
|
watch_time = (Time.now.utc - @deploy_started_at).round(1)
|
84
83
|
new_failures.each do |resource|
|
85
84
|
resource.report_status_to_statsd(watch_time)
|
86
|
-
|
85
|
+
logger.error("#{resource.id} failed to #{@operation_name} after #{watch_time}s")
|
87
86
|
end
|
88
87
|
|
89
88
|
new_timeouts.each do |resource|
|
90
89
|
resource.report_status_to_statsd(watch_time)
|
91
|
-
|
90
|
+
logger.error("#{resource.id} rollout timed out after #{watch_time}s")
|
92
91
|
end
|
93
92
|
|
94
93
|
if new_successes.present?
|
95
94
|
new_successes.each { |r| r.report_status_to_statsd(watch_time) }
|
96
95
|
success_string = ColorizedString.new("Successfully #{past_tense_operation} in #{watch_time}s:").green
|
97
|
-
|
96
|
+
logger.info("#{success_string} #{new_successes.map(&:id).join(', ')}")
|
98
97
|
end
|
99
98
|
end
|
100
99
|
|
@@ -102,7 +101,7 @@ module KubernetesDeploy
|
|
102
101
|
return unless resources.present?
|
103
102
|
resource_list = resources.map(&:id).join(', ')
|
104
103
|
msg = reminder ? "Still waiting for: #{resource_list}" : "Continuing to wait for: #{resource_list}"
|
105
|
-
|
104
|
+
logger.info(msg)
|
106
105
|
end
|
107
106
|
|
108
107
|
def report_and_give_up(remaining_resources)
|
@@ -130,34 +129,34 @@ module KubernetesDeploy
|
|
130
129
|
timeouts, failures = failed_resources.partition(&:deploy_timed_out?)
|
131
130
|
timeouts += global_timeouts
|
132
131
|
if timeouts.present?
|
133
|
-
|
132
|
+
logger.summary.add_action(
|
134
133
|
"timed out waiting for #{timeouts.length} #{'resource'.pluralize(timeouts.length)} to #{@operation_name}"
|
135
134
|
)
|
136
135
|
end
|
137
136
|
|
138
137
|
if failures.present?
|
139
|
-
|
138
|
+
logger.summary.add_action(
|
140
139
|
"failed to #{@operation_name} #{failures.length} #{'resource'.pluralize(failures.length)}"
|
141
140
|
)
|
142
141
|
end
|
143
142
|
|
144
|
-
kubectl = Kubectl.new(
|
145
|
-
|
143
|
+
kubectl = Kubectl.new(task_config: @task_config, log_failure_by_default: false)
|
144
|
+
Krane::Concurrency.split_across_threads(failed_resources + global_timeouts) do |r|
|
146
145
|
r.sync_debug_info(kubectl)
|
147
146
|
end
|
148
147
|
|
149
|
-
failed_resources.each { |r|
|
150
|
-
global_timeouts.each { |r|
|
148
|
+
failed_resources.each { |r| logger.summary.add_paragraph(r.debug_message) }
|
149
|
+
global_timeouts.each { |r| logger.summary.add_paragraph(r.debug_message(:gave_up, timeout: @timeout)) }
|
151
150
|
end
|
152
151
|
end
|
153
152
|
|
154
153
|
def record_success_statuses(successful_resources)
|
155
154
|
success_count = successful_resources.length
|
156
155
|
if success_count > 0
|
157
|
-
|
156
|
+
logger.summary.add_action("successfully #{past_tense_operation} #{success_count} "\
|
158
157
|
"#{'resource'.pluralize(success_count)}")
|
159
158
|
final_statuses = successful_resources.map(&:pretty_status).join("\n")
|
160
|
-
|
159
|
+
logger.summary.add_paragraph("#{ColorizedString.new('Successful resources').green}\n#{final_statuses}")
|
161
160
|
end
|
162
161
|
end
|
163
162
|
|
@@ -0,0 +1,228 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'krane/common'
|
3
|
+
require 'krane/kubernetes_resource'
|
4
|
+
require 'krane/kubernetes_resource/deployment'
|
5
|
+
require 'krane/kubeclient_builder'
|
6
|
+
require 'krane/resource_watcher'
|
7
|
+
require 'krane/kubectl'
|
8
|
+
|
9
|
+
module Krane
|
10
|
+
# Restart the pods in one or more deployments
|
11
|
+
class RestartTask
|
12
|
+
class FatalRestartError < FatalDeploymentError; end
|
13
|
+
|
14
|
+
class RestartAPIError < FatalRestartError
|
15
|
+
def initialize(deployment_name, response)
|
16
|
+
super("Failed to restart #{deployment_name}. " \
|
17
|
+
"API returned non-200 response code (#{response.code})\n" \
|
18
|
+
"Response:\n#{response.body}")
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
HTTP_OK_RANGE = 200..299
|
23
|
+
ANNOTATION = "shipit.shopify.io/restart"
|
24
|
+
|
25
|
+
# Initializes the restart task
|
26
|
+
#
|
27
|
+
# @param context [String] Kubernetes context / cluster
|
28
|
+
# @param namespace [String] Kubernetes namespace
|
29
|
+
# @param logger [Object] Logger object (defaults to an instance of Krane::FormattedLogger)
|
30
|
+
# @param max_watch_seconds [Integer] Timeout in seconds
|
31
|
+
def initialize(context:, namespace:, logger: nil, max_watch_seconds: nil)
|
32
|
+
@logger = logger || Krane::FormattedLogger.build(namespace, context)
|
33
|
+
@task_config = Krane::TaskConfig.new(context, namespace, @logger)
|
34
|
+
@context = context
|
35
|
+
@namespace = namespace
|
36
|
+
@max_watch_seconds = max_watch_seconds
|
37
|
+
end
|
38
|
+
|
39
|
+
# Runs the task, returning a boolean representing success or failure
|
40
|
+
#
|
41
|
+
# @return [Boolean]
|
42
|
+
def run(*args)
|
43
|
+
perform!(*args)
|
44
|
+
true
|
45
|
+
rescue FatalDeploymentError
|
46
|
+
false
|
47
|
+
end
|
48
|
+
alias_method :perform, :run
|
49
|
+
|
50
|
+
# Runs the task, raising exceptions in case of issues
|
51
|
+
#
|
52
|
+
# @param deployments_names [Array<String>] Array of workload names to restart
|
53
|
+
# @param selector [Hash] Selector(s) parsed by Krane::LabelSelector
|
54
|
+
# @param verify_result [Boolean] Wait for completion and verify success
|
55
|
+
#
|
56
|
+
# @return [nil]
|
57
|
+
def run!(deployments_names = nil, selector: nil, verify_result: true)
|
58
|
+
start = Time.now.utc
|
59
|
+
@logger.reset
|
60
|
+
|
61
|
+
@logger.phase_heading("Initializing restart")
|
62
|
+
verify_config!
|
63
|
+
deployments = identify_target_deployments(deployments_names, selector: selector)
|
64
|
+
|
65
|
+
@logger.phase_heading("Triggering restart by touching ENV[RESTARTED_AT]")
|
66
|
+
patch_kubeclient_deployments(deployments)
|
67
|
+
|
68
|
+
if verify_result
|
69
|
+
@logger.phase_heading("Waiting for rollout")
|
70
|
+
resources = build_watchables(deployments, start)
|
71
|
+
verify_restart(resources)
|
72
|
+
else
|
73
|
+
warning = "Result verification is disabled for this task"
|
74
|
+
@logger.summary.add_paragraph(ColorizedString.new(warning).yellow)
|
75
|
+
end
|
76
|
+
StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('success', deployments))
|
77
|
+
@logger.print_summary(:success)
|
78
|
+
rescue DeploymentTimeoutError
|
79
|
+
StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('timeout', deployments))
|
80
|
+
@logger.print_summary(:timed_out)
|
81
|
+
raise
|
82
|
+
rescue FatalDeploymentError => error
|
83
|
+
StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('failure', deployments))
|
84
|
+
@logger.summary.add_action(error.message) if error.message != error.class.to_s
|
85
|
+
@logger.print_summary(:failure)
|
86
|
+
raise
|
87
|
+
end
|
88
|
+
alias_method :perform!, :run!
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def tags(status, deployments)
|
93
|
+
%W(namespace:#{@namespace} context:#{@context} status:#{status} deployments:#{deployments.to_a.length}})
|
94
|
+
end
|
95
|
+
|
96
|
+
def identify_target_deployments(deployment_names, selector: nil)
|
97
|
+
if deployment_names.nil?
|
98
|
+
deployments = if selector.nil?
|
99
|
+
@logger.info("Configured to restart all deployments with the `#{ANNOTATION}` annotation")
|
100
|
+
apps_v1_kubeclient.get_deployments(namespace: @namespace)
|
101
|
+
else
|
102
|
+
selector_string = selector.to_s
|
103
|
+
@logger.info(
|
104
|
+
"Configured to restart all deployments with the `#{ANNOTATION}` annotation and #{selector_string} selector"
|
105
|
+
)
|
106
|
+
apps_v1_kubeclient.get_deployments(namespace: @namespace, label_selector: selector_string)
|
107
|
+
end
|
108
|
+
deployments.select! { |d| d.metadata.annotations[ANNOTATION] }
|
109
|
+
|
110
|
+
if deployments.none?
|
111
|
+
raise FatalRestartError, "no deployments with the `#{ANNOTATION}` annotation found in namespace #{@namespace}"
|
112
|
+
end
|
113
|
+
elsif deployment_names.empty?
|
114
|
+
raise FatalRestartError, "Configured to restart deployments by name, but list of names was blank"
|
115
|
+
elsif !selector.nil?
|
116
|
+
raise FatalRestartError, "Can't specify deployment names and selector at the same time"
|
117
|
+
else
|
118
|
+
deployment_names = deployment_names.uniq
|
119
|
+
list = deployment_names.join(', ')
|
120
|
+
@logger.info("Configured to restart deployments by name: #{list}")
|
121
|
+
|
122
|
+
deployments = fetch_deployments(deployment_names)
|
123
|
+
if deployments.none?
|
124
|
+
raise FatalRestartError, "no deployments with names #{list} found in namespace #{@namespace}"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
deployments
|
128
|
+
end
|
129
|
+
|
130
|
+
def build_watchables(kubeclient_resources, started)
|
131
|
+
kubeclient_resources.map do |d|
|
132
|
+
definition = d.to_h.deep_stringify_keys
|
133
|
+
r = Deployment.new(namespace: @namespace, context: @context, definition: definition, logger: @logger)
|
134
|
+
r.deploy_started_at = started # we don't care what happened to the resource before the restart cmd ran
|
135
|
+
r
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def patch_deployment_with_restart(record)
|
140
|
+
apps_v1_kubeclient.patch_deployment(
|
141
|
+
record.metadata.name,
|
142
|
+
build_patch_payload(record),
|
143
|
+
@namespace
|
144
|
+
)
|
145
|
+
end
|
146
|
+
|
147
|
+
def patch_kubeclient_deployments(deployments)
|
148
|
+
deployments.each do |record|
|
149
|
+
begin
|
150
|
+
patch_deployment_with_restart(record)
|
151
|
+
@logger.info("Triggered `#{record.metadata.name}` restart")
|
152
|
+
rescue Kubeclient::HttpError => e
|
153
|
+
raise RestartAPIError.new(record.metadata.name, e.message)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def fetch_deployments(list)
|
159
|
+
list.map do |name|
|
160
|
+
record = nil
|
161
|
+
begin
|
162
|
+
record = apps_v1_kubeclient.get_deployment(name, @namespace)
|
163
|
+
rescue Kubeclient::ResourceNotFoundError
|
164
|
+
raise FatalRestartError, "Deployment `#{name}` not found in namespace `#{@namespace}`"
|
165
|
+
end
|
166
|
+
record
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def build_patch_payload(deployment)
|
171
|
+
containers = deployment.spec.template.spec.containers
|
172
|
+
{
|
173
|
+
spec: {
|
174
|
+
template: {
|
175
|
+
spec: {
|
176
|
+
containers: containers.map do |container|
|
177
|
+
{
|
178
|
+
name: container.name,
|
179
|
+
env: [{ name: "RESTARTED_AT", value: Time.now.to_i.to_s }],
|
180
|
+
}
|
181
|
+
end,
|
182
|
+
},
|
183
|
+
},
|
184
|
+
},
|
185
|
+
}
|
186
|
+
end
|
187
|
+
|
188
|
+
def verify_restart(resources)
|
189
|
+
ResourceWatcher.new(resources: resources, operation_name: "restart",
|
190
|
+
timeout: @max_watch_seconds, task_config: @task_config).run
|
191
|
+
failed_resources = resources.reject(&:deploy_succeeded?)
|
192
|
+
success = failed_resources.empty?
|
193
|
+
if !success && failed_resources.all?(&:deploy_timed_out?)
|
194
|
+
raise DeploymentTimeoutError
|
195
|
+
end
|
196
|
+
raise FatalDeploymentError unless success
|
197
|
+
end
|
198
|
+
|
199
|
+
def verify_config!
|
200
|
+
task_config_validator = TaskConfigValidator.new(@task_config, kubectl, kubeclient_builder)
|
201
|
+
unless task_config_validator.valid?
|
202
|
+
@logger.summary.add_action("Configuration invalid")
|
203
|
+
@logger.summary.add_paragraph(task_config_validator.errors.map { |err| "- #{err}" }.join("\n"))
|
204
|
+
raise Krane::TaskConfigurationError
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def apps_v1_kubeclient
|
209
|
+
@apps_v1_kubeclient ||= kubeclient_builder.build_apps_v1_kubeclient(@context)
|
210
|
+
end
|
211
|
+
|
212
|
+
def kubeclient
|
213
|
+
@kubeclient ||= kubeclient_builder.build_v1_kubeclient(@context)
|
214
|
+
end
|
215
|
+
|
216
|
+
def kubectl
|
217
|
+
@kubectl ||= Kubectl.new(task_config: @task_config, log_failure_by_default: true)
|
218
|
+
end
|
219
|
+
|
220
|
+
def v1beta1_kubeclient
|
221
|
+
@v1beta1_kubeclient ||= kubeclient_builder.build_v1beta1_kubeclient(@context)
|
222
|
+
end
|
223
|
+
|
224
|
+
def kubeclient_builder
|
225
|
+
@kubeclient_builder ||= KubeclientBuilder.new
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
require 'krane/common'
|
5
|
+
require 'krane/kubeclient_builder'
|
6
|
+
require 'krane/kubectl'
|
7
|
+
require 'krane/resource_cache'
|
8
|
+
require 'krane/resource_watcher'
|
9
|
+
require 'krane/kubernetes_resource'
|
10
|
+
require 'krane/kubernetes_resource/pod'
|
11
|
+
require 'krane/runner_task_config_validator'
|
12
|
+
|
13
|
+
module Krane
|
14
|
+
# Run a pod that exits upon completing a task
|
15
|
+
class RunnerTask
|
16
|
+
class TaskTemplateMissingError < TaskConfigurationError; end
|
17
|
+
|
18
|
+
attr_reader :pod_name
|
19
|
+
|
20
|
+
# Initializes the runner task
|
21
|
+
#
|
22
|
+
# @param namespace [String] Kubernetes namespace
|
23
|
+
# @param context [String] Kubernetes context / cluster
|
24
|
+
# @param logger [Object] Logger object (defaults to an instance of Krane::FormattedLogger)
|
25
|
+
# @param max_watch_seconds [Integer] Timeout in seconds
|
26
|
+
def initialize(namespace:, context:, logger: nil, max_watch_seconds: nil)
|
27
|
+
@logger = logger || Krane::FormattedLogger.build(namespace, context)
|
28
|
+
@task_config = Krane::TaskConfig.new(context, namespace, @logger)
|
29
|
+
@namespace = namespace
|
30
|
+
@context = context
|
31
|
+
@max_watch_seconds = max_watch_seconds
|
32
|
+
end
|
33
|
+
|
34
|
+
# Runs the task, returning a boolean representing success or failure
|
35
|
+
#
|
36
|
+
# @return [Boolean]
|
37
|
+
def run(*args)
|
38
|
+
run!(*args)
|
39
|
+
true
|
40
|
+
rescue DeploymentTimeoutError, FatalDeploymentError
|
41
|
+
false
|
42
|
+
end
|
43
|
+
|
44
|
+
# Runs the task, raising exceptions in case of issues
|
45
|
+
#
|
46
|
+
# @param task_template [String] The template file you'll be rendering
|
47
|
+
# @param entrypoint [Array<String>] Override the default command in the container image
|
48
|
+
# @param args [Array<String>] Override the default arguments for the command
|
49
|
+
# @param env_vars [Array<String>] List of env vars
|
50
|
+
# @param verify_result [Boolean] Wait for completion and verify pod success
|
51
|
+
#
|
52
|
+
# @return [nil]
|
53
|
+
def run!(task_template:, entrypoint:, args:, env_vars: [], verify_result: true)
|
54
|
+
start = Time.now.utc
|
55
|
+
@logger.reset
|
56
|
+
|
57
|
+
@logger.phase_heading("Initializing task")
|
58
|
+
|
59
|
+
@logger.info("Validating configuration")
|
60
|
+
verify_config!(task_template, args)
|
61
|
+
@logger.info("Using namespace '#{@namespace}' in context '#{@context}'")
|
62
|
+
|
63
|
+
pod = build_pod(task_template, entrypoint, args, env_vars, verify_result)
|
64
|
+
validate_pod(pod)
|
65
|
+
|
66
|
+
@logger.phase_heading("Running pod")
|
67
|
+
create_pod(pod)
|
68
|
+
|
69
|
+
if verify_result
|
70
|
+
@logger.phase_heading("Streaming logs")
|
71
|
+
watch_pod(pod)
|
72
|
+
else
|
73
|
+
record_status_once(pod)
|
74
|
+
end
|
75
|
+
StatsD.client.distribution('task_runner.duration', StatsD.duration(start), tags: statsd_tags('success'))
|
76
|
+
@logger.print_summary(:success)
|
77
|
+
rescue DeploymentTimeoutError
|
78
|
+
StatsD.client.distribution('task_runner.duration', StatsD.duration(start), tags: statsd_tags('timeout'))
|
79
|
+
@logger.print_summary(:timed_out)
|
80
|
+
raise
|
81
|
+
rescue FatalDeploymentError
|
82
|
+
StatsD.client.distribution('task_runner.duration', StatsD.duration(start), tags: statsd_tags('failure'))
|
83
|
+
@logger.print_summary(:failure)
|
84
|
+
raise
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def create_pod(pod)
|
90
|
+
@logger.info("Creating pod '#{pod.name}'")
|
91
|
+
pod.deploy_started_at = Time.now.utc
|
92
|
+
kubeclient.create_pod(pod.to_kubeclient_resource)
|
93
|
+
@pod_name = pod.name
|
94
|
+
@logger.info("Pod creation succeeded")
|
95
|
+
rescue Kubeclient::HttpError => e
|
96
|
+
msg = "Failed to create pod: #{e.class.name}: #{e.message}"
|
97
|
+
@logger.summary.add_paragraph(msg)
|
98
|
+
raise FatalDeploymentError, msg
|
99
|
+
end
|
100
|
+
|
101
|
+
def build_pod(template_name, entrypoint, args, env_vars, verify_result)
|
102
|
+
task_template = get_template(template_name)
|
103
|
+
@logger.info("Using template '#{template_name}'")
|
104
|
+
pod_template = build_pod_definition(task_template)
|
105
|
+
set_container_overrides!(pod_template, entrypoint, args, env_vars)
|
106
|
+
ensure_valid_restart_policy!(pod_template, verify_result)
|
107
|
+
Pod.new(namespace: @namespace, context: @context, logger: @logger, stream_logs: true,
|
108
|
+
definition: pod_template.to_hash.deep_stringify_keys, statsd_tags: [])
|
109
|
+
end
|
110
|
+
|
111
|
+
def validate_pod(pod)
|
112
|
+
pod.validate_definition(kubectl)
|
113
|
+
end
|
114
|
+
|
115
|
+
def watch_pod(pod)
|
116
|
+
rw = ResourceWatcher.new(resources: [pod], timeout: @max_watch_seconds,
|
117
|
+
operation_name: "run", task_config: @task_config)
|
118
|
+
rw.run(delay_sync: 1, reminder_interval: 30.seconds)
|
119
|
+
raise DeploymentTimeoutError if pod.deploy_timed_out?
|
120
|
+
raise FatalDeploymentError if pod.deploy_failed?
|
121
|
+
end
|
122
|
+
|
123
|
+
def record_status_once(pod)
|
124
|
+
cache = ResourceCache.new(@task_config)
|
125
|
+
pod.sync(cache)
|
126
|
+
warning = <<~STRING
|
127
|
+
#{ColorizedString.new('Result verification is disabled for this task.').yellow}
|
128
|
+
The following status was observed immediately after pod creation:
|
129
|
+
#{pod.pretty_status}
|
130
|
+
STRING
|
131
|
+
@logger.summary.add_paragraph(warning)
|
132
|
+
end
|
133
|
+
|
134
|
+
def verify_config!(task_template, args)
|
135
|
+
task_config_validator = RunnerTaskConfigValidator.new(task_template, args, @task_config, kubectl,
|
136
|
+
kubeclient_builder)
|
137
|
+
unless task_config_validator.valid?
|
138
|
+
@logger.summary.add_action("Configuration invalid")
|
139
|
+
@logger.summary.add_paragraph([task_config_validator.errors].map { |err| "- #{err}" }.join("\n"))
|
140
|
+
raise Krane::TaskConfigurationError
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def get_template(template_name)
|
145
|
+
pod_template = kubeclient.get_pod_template(template_name, @namespace)
|
146
|
+
pod_template.template
|
147
|
+
rescue Kubeclient::ResourceNotFoundError
|
148
|
+
msg = "Pod template `#{template_name}` not found in namespace `#{@namespace}`, context `#{@context}`"
|
149
|
+
@logger.summary.add_paragraph(msg)
|
150
|
+
raise TaskTemplateMissingError, msg
|
151
|
+
rescue Kubeclient::HttpError => error
|
152
|
+
raise FatalKubeAPIError, "Error retrieving pod template: #{error.class.name}: #{error.message}"
|
153
|
+
end
|
154
|
+
|
155
|
+
def build_pod_definition(base_template)
|
156
|
+
pod_definition = base_template.dup
|
157
|
+
pod_definition.kind = 'Pod'
|
158
|
+
pod_definition.apiVersion = 'v1'
|
159
|
+
pod_definition.metadata.namespace = @namespace
|
160
|
+
|
161
|
+
unique_name = pod_definition.metadata.name + "-" + SecureRandom.hex(8)
|
162
|
+
@logger.warn("Name is too long, using '#{unique_name[0..62]}'") if unique_name.length > 63
|
163
|
+
pod_definition.metadata.name = unique_name[0..62]
|
164
|
+
|
165
|
+
pod_definition
|
166
|
+
end
|
167
|
+
|
168
|
+
def set_container_overrides!(pod_definition, entrypoint, args, env_vars)
|
169
|
+
container = pod_definition.spec.containers.find { |cont| cont.name == 'task-runner' }
|
170
|
+
if container.nil?
|
171
|
+
message = "Pod spec does not contain a template container called 'task-runner'"
|
172
|
+
@logger.summary.add_paragraph(message)
|
173
|
+
raise TaskConfigurationError, message
|
174
|
+
end
|
175
|
+
|
176
|
+
container.command = entrypoint if entrypoint
|
177
|
+
container.args = args if args
|
178
|
+
|
179
|
+
env_args = env_vars.map do |env|
|
180
|
+
key, value = env.split('=', 2)
|
181
|
+
{ name: key, value: value }
|
182
|
+
end
|
183
|
+
container.env ||= []
|
184
|
+
container.env = container.env.map(&:to_h) + env_args
|
185
|
+
end
|
186
|
+
|
187
|
+
def ensure_valid_restart_policy!(template, verify)
|
188
|
+
restart_policy = template.spec.restartPolicy
|
189
|
+
if verify && restart_policy != "Never"
|
190
|
+
@logger.warn("Changed Pod RestartPolicy from '#{restart_policy}' to 'Never'. Disable "\
|
191
|
+
"result verification to use '#{restart_policy}'.")
|
192
|
+
template.spec.restartPolicy = "Never"
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def kubectl
|
197
|
+
@kubectl ||= Kubectl.new(task_config: @task_config, log_failure_by_default: true)
|
198
|
+
end
|
199
|
+
|
200
|
+
def kubeclient
|
201
|
+
@kubeclient ||= kubeclient_builder.build_v1_kubeclient(@context)
|
202
|
+
end
|
203
|
+
|
204
|
+
def kubeclient_builder
|
205
|
+
@kubeclient_builder ||= KubeclientBuilder.new
|
206
|
+
end
|
207
|
+
|
208
|
+
def statsd_tags(status)
|
209
|
+
%W(namespace:#{@namespace} context:#{@context} status:#{status})
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|