tobsch-krane 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.buildkite/pipeline.nightly.yml +43 -0
- data/.github/probots.yml +2 -0
- data/.gitignore +20 -0
- data/.rubocop.yml +17 -0
- data/.shopify-build/VERSION +1 -0
- data/.shopify-build/kubernetes-deploy.yml +53 -0
- data/1.0-Upgrade.md +185 -0
- data/CHANGELOG.md +431 -0
- data/CODE_OF_CONDUCT.md +46 -0
- data/CONTRIBUTING.md +164 -0
- data/Gemfile +16 -0
- data/ISSUE_TEMPLATE.md +25 -0
- data/LICENSE.txt +21 -0
- data/README.md +655 -0
- data/Rakefile +36 -0
- data/bin/ci +21 -0
- data/bin/setup +16 -0
- data/bin/test +47 -0
- data/dev.yml +28 -0
- data/dev/flamegraph-from-tests +35 -0
- data/exe/krane +5 -0
- data/krane.gemspec +44 -0
- data/lib/krane.rb +7 -0
- data/lib/krane/bindings_parser.rb +88 -0
- data/lib/krane/cli/deploy_command.rb +75 -0
- data/lib/krane/cli/global_deploy_command.rb +54 -0
- data/lib/krane/cli/krane.rb +91 -0
- data/lib/krane/cli/render_command.rb +41 -0
- data/lib/krane/cli/restart_command.rb +34 -0
- data/lib/krane/cli/run_command.rb +54 -0
- data/lib/krane/cli/version_command.rb +13 -0
- data/lib/krane/cluster_resource_discovery.rb +113 -0
- data/lib/krane/common.rb +23 -0
- data/lib/krane/concerns/template_reporting.rb +29 -0
- data/lib/krane/concurrency.rb +18 -0
- data/lib/krane/container_logs.rb +106 -0
- data/lib/krane/deferred_summary_logging.rb +95 -0
- data/lib/krane/delayed_exceptions.rb +14 -0
- data/lib/krane/deploy_task.rb +363 -0
- data/lib/krane/deploy_task_config_validator.rb +29 -0
- data/lib/krane/duration_parser.rb +27 -0
- data/lib/krane/ejson_secret_provisioner.rb +154 -0
- data/lib/krane/errors.rb +28 -0
- data/lib/krane/formatted_logger.rb +57 -0
- data/lib/krane/global_deploy_task.rb +210 -0
- data/lib/krane/global_deploy_task_config_validator.rb +12 -0
- data/lib/krane/kubeclient_builder.rb +156 -0
- data/lib/krane/kubectl.rb +120 -0
- data/lib/krane/kubernetes_resource.rb +621 -0
- data/lib/krane/kubernetes_resource/cloudsql.rb +43 -0
- data/lib/krane/kubernetes_resource/config_map.rb +22 -0
- data/lib/krane/kubernetes_resource/cron_job.rb +18 -0
- data/lib/krane/kubernetes_resource/custom_resource.rb +87 -0
- data/lib/krane/kubernetes_resource/custom_resource_definition.rb +98 -0
- data/lib/krane/kubernetes_resource/daemon_set.rb +90 -0
- data/lib/krane/kubernetes_resource/deployment.rb +213 -0
- data/lib/krane/kubernetes_resource/horizontal_pod_autoscaler.rb +65 -0
- data/lib/krane/kubernetes_resource/ingress.rb +18 -0
- data/lib/krane/kubernetes_resource/job.rb +60 -0
- data/lib/krane/kubernetes_resource/network_policy.rb +22 -0
- data/lib/krane/kubernetes_resource/persistent_volume_claim.rb +80 -0
- data/lib/krane/kubernetes_resource/pod.rb +269 -0
- data/lib/krane/kubernetes_resource/pod_disruption_budget.rb +23 -0
- data/lib/krane/kubernetes_resource/pod_set_base.rb +71 -0
- data/lib/krane/kubernetes_resource/pod_template.rb +20 -0
- data/lib/krane/kubernetes_resource/replica_set.rb +92 -0
- data/lib/krane/kubernetes_resource/resource_quota.rb +22 -0
- data/lib/krane/kubernetes_resource/role.rb +22 -0
- data/lib/krane/kubernetes_resource/role_binding.rb +22 -0
- data/lib/krane/kubernetes_resource/secret.rb +24 -0
- data/lib/krane/kubernetes_resource/service.rb +104 -0
- data/lib/krane/kubernetes_resource/service_account.rb +22 -0
- data/lib/krane/kubernetes_resource/stateful_set.rb +70 -0
- data/lib/krane/label_selector.rb +42 -0
- data/lib/krane/oj.rb +4 -0
- data/lib/krane/options_helper.rb +39 -0
- data/lib/krane/remote_logs.rb +60 -0
- data/lib/krane/render_task.rb +118 -0
- data/lib/krane/renderer.rb +118 -0
- data/lib/krane/resource_cache.rb +68 -0
- data/lib/krane/resource_deployer.rb +265 -0
- data/lib/krane/resource_watcher.rb +171 -0
- data/lib/krane/restart_task.rb +228 -0
- data/lib/krane/rollout_conditions.rb +103 -0
- data/lib/krane/runner_task.rb +212 -0
- data/lib/krane/runner_task_config_validator.rb +18 -0
- data/lib/krane/statsd.rb +65 -0
- data/lib/krane/task_config.rb +22 -0
- data/lib/krane/task_config_validator.rb +96 -0
- data/lib/krane/template_sets.rb +173 -0
- data/lib/krane/version.rb +4 -0
- data/pull_request_template.md +8 -0
- data/screenshots/deploy-demo.gif +0 -0
- data/screenshots/migrate-logs.png +0 -0
- data/screenshots/missing-secret-fail.png +0 -0
- data/screenshots/success.png +0 -0
- data/screenshots/test-output.png +0 -0
- metadata +375 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Krane
|
3
|
+
class HorizontalPodAutoscaler < KubernetesResource
|
4
|
+
TIMEOUT = 3.minutes
|
5
|
+
RECOVERABLE_CONDITION_PREFIX = "FailedGet"
|
6
|
+
|
7
|
+
def deploy_succeeded?
|
8
|
+
scaling_active_condition["status"] == "True" || scaling_disabled?
|
9
|
+
end
|
10
|
+
|
11
|
+
def deploy_failed?
|
12
|
+
return false unless exists?
|
13
|
+
return false if scaling_disabled?
|
14
|
+
scaling_active_condition["status"] == "False" &&
|
15
|
+
!scaling_active_condition.fetch("reason", "").start_with?(RECOVERABLE_CONDITION_PREFIX)
|
16
|
+
end
|
17
|
+
|
18
|
+
def kubectl_resource_type
|
19
|
+
'hpa.v2beta1.autoscaling'
|
20
|
+
end
|
21
|
+
|
22
|
+
def status
|
23
|
+
if !exists?
|
24
|
+
super
|
25
|
+
elsif scaling_disabled?
|
26
|
+
"ScalingDisabled"
|
27
|
+
elsif deploy_succeeded?
|
28
|
+
"Configured"
|
29
|
+
elsif scaling_active_condition.present? || able_to_scale_condition.present?
|
30
|
+
condition = scaling_active_condition.presence || able_to_scale_condition
|
31
|
+
condition['reason']
|
32
|
+
else
|
33
|
+
"Unknown"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def failure_message
|
38
|
+
condition = scaling_active_condition.presence || able_to_scale_condition.presence || {}
|
39
|
+
condition['message']
|
40
|
+
end
|
41
|
+
|
42
|
+
def timeout_message
|
43
|
+
failure_message.presence || super
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def scaling_disabled?
|
49
|
+
scaling_active_condition["status"] == "False" &&
|
50
|
+
scaling_active_condition["reason"] == "ScalingDisabled"
|
51
|
+
end
|
52
|
+
|
53
|
+
def conditions
|
54
|
+
@instance_data.dig("status", "conditions") || []
|
55
|
+
end
|
56
|
+
|
57
|
+
def able_to_scale_condition
|
58
|
+
conditions.detect { |c| c["type"] == "AbleToScale" } || {}
|
59
|
+
end
|
60
|
+
|
61
|
+
def scaling_active_condition
|
62
|
+
conditions.detect { |c| c["type"] == "ScalingActive" } || {}
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Krane
|
3
|
+
class Ingress < KubernetesResource
|
4
|
+
TIMEOUT = 30.seconds
|
5
|
+
|
6
|
+
def status
|
7
|
+
exists? ? "Created" : "Not Found"
|
8
|
+
end
|
9
|
+
|
10
|
+
def deploy_succeeded?
|
11
|
+
exists?
|
12
|
+
end
|
13
|
+
|
14
|
+
def deploy_failed?
|
15
|
+
false
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Krane
|
3
|
+
class Job < KubernetesResource
|
4
|
+
TIMEOUT = 10.minutes
|
5
|
+
|
6
|
+
def deploy_succeeded?
|
7
|
+
# Don't block deploys for long running jobs,
|
8
|
+
# Instead report success when there is at least 1 active
|
9
|
+
return false unless deploy_started?
|
10
|
+
done? || running?
|
11
|
+
end
|
12
|
+
|
13
|
+
def deploy_failed?
|
14
|
+
return false unless deploy_started?
|
15
|
+
return true if failed_status_condition
|
16
|
+
return false unless @instance_data.dig("spec", "backoffLimit").present?
|
17
|
+
(@instance_data.dig("status", "failed") || 0) >= @instance_data.dig("spec", "backoffLimit")
|
18
|
+
end
|
19
|
+
|
20
|
+
def status
|
21
|
+
if !exists?
|
22
|
+
super
|
23
|
+
elsif done?
|
24
|
+
"Succeeded"
|
25
|
+
elsif running?
|
26
|
+
"Started"
|
27
|
+
elsif deploy_failed?
|
28
|
+
"Failed"
|
29
|
+
else
|
30
|
+
"Unknown"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def failure_message
|
35
|
+
if (condition = failed_status_condition.presence)
|
36
|
+
"#{condition['reason']} (#{condition['message']})"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def failed_status_condition
|
43
|
+
@instance_data.dig("status", "conditions")&.detect do |condition|
|
44
|
+
condition["type"] == 'Failed' && condition['status'] == "True"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def done?
|
49
|
+
(@instance_data.dig("status", "succeeded") || 0) == @instance_data.dig("spec", "completions")
|
50
|
+
end
|
51
|
+
|
52
|
+
def running?
|
53
|
+
now = Time.now.utc
|
54
|
+
start_time = @instance_data.dig("status", "startTime")
|
55
|
+
# Wait 5 seconds to ensure job doesn't immediately fail.
|
56
|
+
return false if !start_time.present? || now - Time.parse(start_time) < 5.second
|
57
|
+
(@instance_data.dig("status", "active") || 0) >= 1
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Krane
|
3
|
+
class NetworkPolicy < KubernetesResource
|
4
|
+
TIMEOUT = 30.seconds
|
5
|
+
|
6
|
+
def status
|
7
|
+
exists? ? "Created" : "Not Found"
|
8
|
+
end
|
9
|
+
|
10
|
+
def deploy_succeeded?
|
11
|
+
exists?
|
12
|
+
end
|
13
|
+
|
14
|
+
def deploy_failed?
|
15
|
+
false
|
16
|
+
end
|
17
|
+
|
18
|
+
def timeout_message
|
19
|
+
UNUSUAL_FAILURE_MESSAGE
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Krane
|
3
|
+
class PersistentVolumeClaim < KubernetesResource
|
4
|
+
TIMEOUT = 5.minutes
|
5
|
+
|
6
|
+
def sync(cache)
|
7
|
+
super
|
8
|
+
@storage_classes = cache.get_all("StorageClass").map { |sc| StorageClass.new(sc) }
|
9
|
+
end
|
10
|
+
|
11
|
+
def status
|
12
|
+
exists? ? @instance_data["status"]["phase"] : "Not Found"
|
13
|
+
end
|
14
|
+
|
15
|
+
def deploy_succeeded?
|
16
|
+
return true if status == "Bound"
|
17
|
+
|
18
|
+
# if the StorageClass has volumeBindingMode: WaitForFirstConsumer,
|
19
|
+
# it won't bind until after a Pod mounts it. But it must be pre-deployed,
|
20
|
+
# as the Pod requires it. So 'Pending' must be treated as a 'Success' state
|
21
|
+
if storage_class&.volume_binding_mode == "WaitForFirstConsumer"
|
22
|
+
return status == "Pending" || status == "Bound"
|
23
|
+
end
|
24
|
+
false
|
25
|
+
end
|
26
|
+
|
27
|
+
def deploy_failed?
|
28
|
+
status == "Lost" || failure_message.present?
|
29
|
+
end
|
30
|
+
|
31
|
+
def failure_message
|
32
|
+
if storage_class_name.nil? && @storage_classes.count(&:default?) > 1
|
33
|
+
"PVC has no StorageClass specified and there are multiple StorageClasses " \
|
34
|
+
"annotated as default. This is an invalid cluster configuration."
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def timeout_message
|
39
|
+
return STANDARD_TIMEOUT_MESSAGE unless storage_class_name.present? && !storage_class
|
40
|
+
"PVC specified a StorageClass of #{storage_class_name} but the resource does not exist"
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def storage_class_name
|
46
|
+
@definition.dig("spec", "storageClassName")
|
47
|
+
end
|
48
|
+
|
49
|
+
def storage_class
|
50
|
+
if storage_class_name.present?
|
51
|
+
@storage_classes.detect { |sc| sc.name == storage_class_name }
|
52
|
+
# storage_class_name = "" is an explicit request for no storage class
|
53
|
+
# storage_class_name = nil is an impplicit request for default storage class
|
54
|
+
elsif storage_class_name != ""
|
55
|
+
@storage_classes.detect(&:default?)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class StorageClass < KubernetesResource
|
60
|
+
DEFAULT_CLASS_ANNOTATION = "storageclass.kubernetes.io/is-default-class"
|
61
|
+
DEFAULT_CLASS_BETA_ANNOTATION = "storageclass.beta.kubernetes.io/is-default-class"
|
62
|
+
|
63
|
+
attr_reader :name
|
64
|
+
|
65
|
+
def initialize(definition)
|
66
|
+
@definition = definition
|
67
|
+
@name = definition.dig("metadata", "name").to_s
|
68
|
+
end
|
69
|
+
|
70
|
+
def volume_binding_mode
|
71
|
+
@definition.dig("volumeBindingMode")
|
72
|
+
end
|
73
|
+
|
74
|
+
def default?
|
75
|
+
@definition.dig("metadata", "annotations", DEFAULT_CLASS_ANNOTATION) == "true" ||
|
76
|
+
@definition.dig("metadata", "annotations", DEFAULT_CLASS_BETA_ANNOTATION) == "true"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,269 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Krane
|
3
|
+
class Pod < KubernetesResource
|
4
|
+
TIMEOUT = 10.minutes
|
5
|
+
|
6
|
+
FAILED_PHASE_NAME = "Failed"
|
7
|
+
TRANSIENT_FAILURE_REASONS = %w(
|
8
|
+
Evicted
|
9
|
+
Preempting
|
10
|
+
)
|
11
|
+
|
12
|
+
attr_accessor :stream_logs
|
13
|
+
|
14
|
+
def initialize(namespace:, context:, definition:, logger:,
|
15
|
+
statsd_tags: nil, parent: nil, deploy_started_at: nil, stream_logs: false)
|
16
|
+
@parent = parent
|
17
|
+
@deploy_started_at = deploy_started_at
|
18
|
+
|
19
|
+
@containers = definition.fetch("spec", {}).fetch("containers", []).map { |c| Container.new(c) }
|
20
|
+
unless @containers.present?
|
21
|
+
logger.summary.add_paragraph("Rendered template content:\n#{definition.to_yaml}")
|
22
|
+
raise FatalDeploymentError, "Template is missing required field spec.containers"
|
23
|
+
end
|
24
|
+
@containers += definition["spec"].fetch("initContainers", []).map { |c| Container.new(c, init_container: true) }
|
25
|
+
@stream_logs = stream_logs
|
26
|
+
super(namespace: namespace, context: context, definition: definition,
|
27
|
+
logger: logger, statsd_tags: statsd_tags)
|
28
|
+
end
|
29
|
+
|
30
|
+
def sync(_cache)
|
31
|
+
super
|
32
|
+
raise_predates_deploy_error if exists? && unmanaged? && !deploy_started?
|
33
|
+
|
34
|
+
if exists?
|
35
|
+
logs.sync if unmanaged?
|
36
|
+
update_container_statuses(@instance_data["status"])
|
37
|
+
else # reset
|
38
|
+
@containers.each(&:reset_status)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def after_sync
|
43
|
+
if @stream_logs
|
44
|
+
logs.print_latest
|
45
|
+
elsif unmanaged? && deploy_succeeded?
|
46
|
+
logs.print_all
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def status
|
51
|
+
return phase if reason.blank?
|
52
|
+
"#{phase} (Reason: #{reason})"
|
53
|
+
end
|
54
|
+
|
55
|
+
def deploy_succeeded?
|
56
|
+
if unmanaged?
|
57
|
+
phase == "Succeeded"
|
58
|
+
else
|
59
|
+
phase == "Running" && ready?
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def deploy_failed?
|
64
|
+
failure_message.present?
|
65
|
+
end
|
66
|
+
|
67
|
+
def timeout_message
|
68
|
+
if readiness_probe_failure?
|
69
|
+
probe_failure_msgs = @containers.map(&:readiness_fail_reason).compact
|
70
|
+
header = "The following containers have not passed their readiness probes on at least one pod:\n"
|
71
|
+
header + probe_failure_msgs.join("\n")
|
72
|
+
elsif failed_schedule_reason.present?
|
73
|
+
"Pod could not be scheduled because #{failed_schedule_reason}"
|
74
|
+
else
|
75
|
+
STANDARD_TIMEOUT_MESSAGE
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def failure_message
|
80
|
+
doomed_containers = @containers.select(&:doomed?)
|
81
|
+
if doomed_containers.present?
|
82
|
+
container_problems = if unmanaged?
|
83
|
+
"The following containers encountered errors:\n"
|
84
|
+
else
|
85
|
+
"The following containers are in a state that is unlikely to be recoverable:\n"
|
86
|
+
end
|
87
|
+
doomed_containers.each do |c|
|
88
|
+
red_name = ColorizedString.new(c.name).red
|
89
|
+
container_problems += "> #{red_name}: #{c.doom_reason}\n"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
"#{phase_failure_message} #{container_problems}".strip.presence
|
93
|
+
end
|
94
|
+
|
95
|
+
def fetch_debug_logs
|
96
|
+
logs.sync
|
97
|
+
logs
|
98
|
+
end
|
99
|
+
|
100
|
+
def print_debug_logs?
|
101
|
+
exists? && !@stream_logs # don't print them a second time
|
102
|
+
end
|
103
|
+
|
104
|
+
def node_name
|
105
|
+
@instance_data.dig('spec', 'nodeName')
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def failed_schedule_reason
|
111
|
+
if phase == "Pending"
|
112
|
+
conditions = @instance_data.dig('status', 'conditions') || []
|
113
|
+
unschedulable = conditions.find do |condition|
|
114
|
+
condition["type"] == "PodScheduled" && condition["status"] == "False"
|
115
|
+
end
|
116
|
+
unschedulable&.dig('message')
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def failed_phase?
|
121
|
+
phase == FAILED_PHASE_NAME
|
122
|
+
end
|
123
|
+
|
124
|
+
def transient_failure_reason?
|
125
|
+
return false if unmanaged?
|
126
|
+
TRANSIENT_FAILURE_REASONS.include?(reason)
|
127
|
+
end
|
128
|
+
|
129
|
+
def phase_failure_message
|
130
|
+
if failed_phase? && !transient_failure_reason?
|
131
|
+
return "Pod status: #{status}."
|
132
|
+
end
|
133
|
+
|
134
|
+
return unless unmanaged?
|
135
|
+
|
136
|
+
if terminating?
|
137
|
+
"Pod status: Terminating."
|
138
|
+
elsif disappeared?
|
139
|
+
"Pod status: Disappeared."
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def logs
|
144
|
+
@logs ||= Krane::RemoteLogs.new(
|
145
|
+
logger: @logger,
|
146
|
+
parent_id: id,
|
147
|
+
container_names: @containers.map(&:name),
|
148
|
+
namespace: @namespace,
|
149
|
+
context: @context
|
150
|
+
)
|
151
|
+
end
|
152
|
+
|
153
|
+
def phase
|
154
|
+
@instance_data.dig("status", "phase") || "Unknown"
|
155
|
+
end
|
156
|
+
|
157
|
+
def reason
|
158
|
+
@instance_data.dig('status', 'reason')
|
159
|
+
end
|
160
|
+
|
161
|
+
def readiness_probe_failure?
|
162
|
+
return false if ready? || unmanaged?
|
163
|
+
return false if phase != "Running"
|
164
|
+
@containers.any?(&:readiness_fail_reason)
|
165
|
+
end
|
166
|
+
|
167
|
+
def ready?
|
168
|
+
return false unless (status_data = @instance_data["status"])
|
169
|
+
ready_condition = status_data.fetch("conditions", []).find { |condition| condition["type"] == "Ready" }
|
170
|
+
ready_condition.present? && (ready_condition["status"] == "True")
|
171
|
+
end
|
172
|
+
|
173
|
+
def update_container_statuses(status_data)
|
174
|
+
@containers.each do |c|
|
175
|
+
key = c.init_container? ? "initContainerStatuses" : "containerStatuses"
|
176
|
+
if status_data.key?(key)
|
177
|
+
data = status_data[key].find { |st| st["name"] == c.name }
|
178
|
+
c.update_status(data)
|
179
|
+
else
|
180
|
+
c.reset_status
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def unmanaged?
|
186
|
+
@parent.blank?
|
187
|
+
end
|
188
|
+
|
189
|
+
def raise_predates_deploy_error
|
190
|
+
example_color = :green
|
191
|
+
msg = <<-STRING.strip_heredoc
|
192
|
+
Unmanaged pods like #{id} must have unique names on every deploy in order to work as intended.
|
193
|
+
The recommended way to achieve this is to include "<%= deployment_id %>" in the pod's name, like this:
|
194
|
+
#{ColorizedString.new('kind: Pod').colorize(example_color)}
|
195
|
+
#{ColorizedString.new('metadata:').colorize(example_color)}
|
196
|
+
#{ColorizedString.new("name: #{@name}-<%= deployment_id %>").colorize(example_color)}
|
197
|
+
STRING
|
198
|
+
@logger.summary.add_paragraph(msg)
|
199
|
+
raise FatalDeploymentError, "#{id} existed before the deploy started"
|
200
|
+
end
|
201
|
+
|
202
|
+
class Container
|
203
|
+
attr_reader :name
|
204
|
+
|
205
|
+
def initialize(definition, init_container: false)
|
206
|
+
@init_container = init_container
|
207
|
+
@name = definition["name"]
|
208
|
+
@image = definition["image"]
|
209
|
+
@http_probe_location = definition.dig("readinessProbe", "httpGet", "path")
|
210
|
+
@exec_probe_command = definition.dig("readinessProbe", "exec", "command")
|
211
|
+
@status = {}
|
212
|
+
end
|
213
|
+
|
214
|
+
def doomed?
|
215
|
+
doom_reason.present?
|
216
|
+
end
|
217
|
+
|
218
|
+
def doom_reason
|
219
|
+
limbo_reason = @status.dig("state", "waiting", "reason")
|
220
|
+
limbo_message = @status.dig("state", "waiting", "message")
|
221
|
+
|
222
|
+
if @status.dig("lastState", "terminated", "reason") == "ContainerCannotRun"
|
223
|
+
# ref: https://github.com/kubernetes/kubernetes/blob/562e721ece8a16e05c7e7d6bdd6334c910733ab2/pkg/kubelet/dockershim/docker_container.go#L353
|
224
|
+
exit_code = @status.dig('lastState', 'terminated', 'exitCode')
|
225
|
+
"Failed to start (exit #{exit_code}): #{@status.dig('lastState', 'terminated', 'message')}"
|
226
|
+
elsif @status.dig("state", "terminated", "reason") == "ContainerCannotRun"
|
227
|
+
exit_code = @status.dig('state', 'terminated', 'exitCode')
|
228
|
+
"Failed to start (exit #{exit_code}): #{@status.dig('state', 'terminated', 'message')}"
|
229
|
+
elsif limbo_reason == "CrashLoopBackOff"
|
230
|
+
exit_code = @status.dig('lastState', 'terminated', 'exitCode')
|
231
|
+
"Crashing repeatedly (exit #{exit_code}). See logs for more information."
|
232
|
+
elsif limbo_reason == "ErrImagePull" && limbo_message.match(/not found/i)
|
233
|
+
"Failed to pull image #{@image}. "\
|
234
|
+
"Did you wait for it to be built and pushed to the registry before deploying?"
|
235
|
+
elsif limbo_reason == "CreateContainerConfigError"
|
236
|
+
"Failed to generate container configuration: #{limbo_message}"
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
def readiness_fail_reason
|
241
|
+
return if ready? || init_container?
|
242
|
+
return unless (@http_probe_location || @exec_probe_command).present?
|
243
|
+
|
244
|
+
yellow_name = ColorizedString.new(name).yellow
|
245
|
+
if @http_probe_location
|
246
|
+
"> #{yellow_name} must respond with a good status code at '#{@http_probe_location}'"
|
247
|
+
elsif @exec_probe_command
|
248
|
+
"> #{yellow_name} must exit 0 from the following command: '#{@exec_probe_command.join(' ')}'"
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
def ready?
|
253
|
+
@status['ready'] == true
|
254
|
+
end
|
255
|
+
|
256
|
+
def init_container?
|
257
|
+
@init_container
|
258
|
+
end
|
259
|
+
|
260
|
+
def update_status(data)
|
261
|
+
@status = data || {}
|
262
|
+
end
|
263
|
+
|
264
|
+
def reset_status
|
265
|
+
@status = {}
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|