tobsch-krane 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (99) hide show
  1. checksums.yaml +7 -0
  2. data/.buildkite/pipeline.nightly.yml +43 -0
  3. data/.github/probots.yml +2 -0
  4. data/.gitignore +20 -0
  5. data/.rubocop.yml +17 -0
  6. data/.shopify-build/VERSION +1 -0
  7. data/.shopify-build/kubernetes-deploy.yml +53 -0
  8. data/1.0-Upgrade.md +185 -0
  9. data/CHANGELOG.md +431 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +164 -0
  12. data/Gemfile +16 -0
  13. data/ISSUE_TEMPLATE.md +25 -0
  14. data/LICENSE.txt +21 -0
  15. data/README.md +655 -0
  16. data/Rakefile +36 -0
  17. data/bin/ci +21 -0
  18. data/bin/setup +16 -0
  19. data/bin/test +47 -0
  20. data/dev.yml +28 -0
  21. data/dev/flamegraph-from-tests +35 -0
  22. data/exe/krane +5 -0
  23. data/krane.gemspec +44 -0
  24. data/lib/krane.rb +7 -0
  25. data/lib/krane/bindings_parser.rb +88 -0
  26. data/lib/krane/cli/deploy_command.rb +75 -0
  27. data/lib/krane/cli/global_deploy_command.rb +54 -0
  28. data/lib/krane/cli/krane.rb +91 -0
  29. data/lib/krane/cli/render_command.rb +41 -0
  30. data/lib/krane/cli/restart_command.rb +34 -0
  31. data/lib/krane/cli/run_command.rb +54 -0
  32. data/lib/krane/cli/version_command.rb +13 -0
  33. data/lib/krane/cluster_resource_discovery.rb +113 -0
  34. data/lib/krane/common.rb +23 -0
  35. data/lib/krane/concerns/template_reporting.rb +29 -0
  36. data/lib/krane/concurrency.rb +18 -0
  37. data/lib/krane/container_logs.rb +106 -0
  38. data/lib/krane/deferred_summary_logging.rb +95 -0
  39. data/lib/krane/delayed_exceptions.rb +14 -0
  40. data/lib/krane/deploy_task.rb +363 -0
  41. data/lib/krane/deploy_task_config_validator.rb +29 -0
  42. data/lib/krane/duration_parser.rb +27 -0
  43. data/lib/krane/ejson_secret_provisioner.rb +154 -0
  44. data/lib/krane/errors.rb +28 -0
  45. data/lib/krane/formatted_logger.rb +57 -0
  46. data/lib/krane/global_deploy_task.rb +210 -0
  47. data/lib/krane/global_deploy_task_config_validator.rb +12 -0
  48. data/lib/krane/kubeclient_builder.rb +156 -0
  49. data/lib/krane/kubectl.rb +120 -0
  50. data/lib/krane/kubernetes_resource.rb +621 -0
  51. data/lib/krane/kubernetes_resource/cloudsql.rb +43 -0
  52. data/lib/krane/kubernetes_resource/config_map.rb +22 -0
  53. data/lib/krane/kubernetes_resource/cron_job.rb +18 -0
  54. data/lib/krane/kubernetes_resource/custom_resource.rb +87 -0
  55. data/lib/krane/kubernetes_resource/custom_resource_definition.rb +98 -0
  56. data/lib/krane/kubernetes_resource/daemon_set.rb +90 -0
  57. data/lib/krane/kubernetes_resource/deployment.rb +213 -0
  58. data/lib/krane/kubernetes_resource/horizontal_pod_autoscaler.rb +65 -0
  59. data/lib/krane/kubernetes_resource/ingress.rb +18 -0
  60. data/lib/krane/kubernetes_resource/job.rb +60 -0
  61. data/lib/krane/kubernetes_resource/network_policy.rb +22 -0
  62. data/lib/krane/kubernetes_resource/persistent_volume_claim.rb +80 -0
  63. data/lib/krane/kubernetes_resource/pod.rb +269 -0
  64. data/lib/krane/kubernetes_resource/pod_disruption_budget.rb +23 -0
  65. data/lib/krane/kubernetes_resource/pod_set_base.rb +71 -0
  66. data/lib/krane/kubernetes_resource/pod_template.rb +20 -0
  67. data/lib/krane/kubernetes_resource/replica_set.rb +92 -0
  68. data/lib/krane/kubernetes_resource/resource_quota.rb +22 -0
  69. data/lib/krane/kubernetes_resource/role.rb +22 -0
  70. data/lib/krane/kubernetes_resource/role_binding.rb +22 -0
  71. data/lib/krane/kubernetes_resource/secret.rb +24 -0
  72. data/lib/krane/kubernetes_resource/service.rb +104 -0
  73. data/lib/krane/kubernetes_resource/service_account.rb +22 -0
  74. data/lib/krane/kubernetes_resource/stateful_set.rb +70 -0
  75. data/lib/krane/label_selector.rb +42 -0
  76. data/lib/krane/oj.rb +4 -0
  77. data/lib/krane/options_helper.rb +39 -0
  78. data/lib/krane/remote_logs.rb +60 -0
  79. data/lib/krane/render_task.rb +118 -0
  80. data/lib/krane/renderer.rb +118 -0
  81. data/lib/krane/resource_cache.rb +68 -0
  82. data/lib/krane/resource_deployer.rb +265 -0
  83. data/lib/krane/resource_watcher.rb +171 -0
  84. data/lib/krane/restart_task.rb +228 -0
  85. data/lib/krane/rollout_conditions.rb +103 -0
  86. data/lib/krane/runner_task.rb +212 -0
  87. data/lib/krane/runner_task_config_validator.rb +18 -0
  88. data/lib/krane/statsd.rb +65 -0
  89. data/lib/krane/task_config.rb +22 -0
  90. data/lib/krane/task_config_validator.rb +96 -0
  91. data/lib/krane/template_sets.rb +173 -0
  92. data/lib/krane/version.rb +4 -0
  93. data/pull_request_template.md +8 -0
  94. data/screenshots/deploy-demo.gif +0 -0
  95. data/screenshots/migrate-logs.png +0 -0
  96. data/screenshots/missing-secret-fail.png +0 -0
  97. data/screenshots/success.png +0 -0
  98. data/screenshots/test-output.png +0 -0
  99. metadata +375 -0
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class PodDisruptionBudget < KubernetesResource
4
+ TIMEOUT = 10.seconds
5
+
6
+ def status
7
+ exists? ? "Available" : "Not Found"
8
+ end
9
+
10
+ def deploy_succeeded?
11
+ exists? && observed_generation == current_generation
12
+ end
13
+
14
+ def deploy_method
15
+ # Required until https://github.com/kubernetes/kubernetes/issues/45398 changes
16
+ uses_generate_name? ? :create : :replace_force
17
+ end
18
+
19
+ def timeout_message
20
+ UNUSUAL_FAILURE_MESSAGE
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+ require 'krane/kubernetes_resource/pod'
3
+
4
+ module Krane
5
+ class PodSetBase < KubernetesResource
6
+ def failure_message
7
+ pods.map(&:failure_message).compact.uniq.join("\n")
8
+ end
9
+
10
+ def timeout_message
11
+ pods.map(&:timeout_message).compact.uniq.join("\n")
12
+ end
13
+
14
+ def fetch_events(kubectl)
15
+ own_events = super
16
+ return own_events unless pods.present?
17
+ most_useful_pod = pods.find(&:deploy_failed?) || pods.find(&:deploy_timed_out?) || pods.first
18
+ own_events.merge(most_useful_pod.fetch_events(kubectl))
19
+ end
20
+
21
+ def fetch_debug_logs
22
+ logs = Krane::RemoteLogs.new(
23
+ logger: @logger,
24
+ parent_id: id,
25
+ container_names: container_names,
26
+ namespace: @namespace,
27
+ context: @context
28
+ )
29
+ logs.sync
30
+ logs
31
+ end
32
+
33
+ def print_debug_logs?
34
+ pods.present? # the kubectl command times out if no pods exist
35
+ end
36
+
37
+ private
38
+
39
+ def pods
40
+ raise NotImplementedError, "Subclasses must define a `pods` accessor"
41
+ end
42
+
43
+ def parent_of_pod?(_)
44
+ raise NotImplementedError, "Subclasses must define a `parent_of_pod?` method"
45
+ end
46
+
47
+ def container_names
48
+ regular_containers = @definition["spec"]["template"]["spec"]["containers"].map { |c| c["name"] }
49
+ init_containers = @definition["spec"]["template"]["spec"].fetch("initContainers", {}).map { |c| c["name"] }
50
+ regular_containers + init_containers
51
+ end
52
+
53
+ def find_pods(cache)
54
+ all_pods = cache.get_all(Pod.kind, @instance_data["spec"]["selector"]["matchLabels"])
55
+
56
+ all_pods.each_with_object([]) do |pod_data, relevant_pods|
57
+ next unless parent_of_pod?(pod_data)
58
+ pod = Pod.new(
59
+ namespace: namespace,
60
+ context: context,
61
+ definition: pod_data,
62
+ logger: @logger,
63
+ parent: "#{name.capitalize} #{type}",
64
+ deploy_started_at: @deploy_started_at
65
+ )
66
+ pod.sync(cache)
67
+ relevant_pods << pod
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class PodTemplate < KubernetesResource
4
+ def status
5
+ exists? ? "Available" : "Not Found"
6
+ end
7
+
8
+ def deploy_succeeded?
9
+ exists?
10
+ end
11
+
12
+ def deploy_failed?
13
+ false
14
+ end
15
+
16
+ def timeout_message
17
+ UNUSUAL_FAILURE_MESSAGE
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+ require 'krane/kubernetes_resource/pod_set_base'
3
+
4
+ module Krane
5
+ class ReplicaSet < PodSetBase
6
+ TIMEOUT = 5.minutes
7
+ attr_reader :pods
8
+
9
+ def initialize(namespace:, context:, definition:, logger:, statsd_tags: nil,
10
+ parent: nil, deploy_started_at: nil)
11
+ @parent = parent
12
+ @deploy_started_at = deploy_started_at
13
+ @pods = []
14
+ super(namespace: namespace, context: context, definition: definition,
15
+ logger: logger, statsd_tags: statsd_tags)
16
+ end
17
+
18
+ def sync(cache)
19
+ super
20
+ @pods = fetch_pods_if_needed(cache) || []
21
+ end
22
+
23
+ def status
24
+ return super unless rollout_data.present?
25
+ rollout_data.map { |state_replicas, num| "#{num} #{state_replicas.chop.pluralize(num)}" }.join(", ")
26
+ end
27
+
28
+ def deploy_succeeded?
29
+ return false if stale_status?
30
+ desired_replicas == rollout_data["availableReplicas"].to_i &&
31
+ desired_replicas == rollout_data["readyReplicas"].to_i
32
+ end
33
+
34
+ def deploy_failed?
35
+ pods.present? &&
36
+ pods.all?(&:deploy_failed?) &&
37
+ !stale_status?
38
+ end
39
+
40
+ def desired_replicas
41
+ return -1 unless exists?
42
+ @instance_data["spec"]["replicas"].to_i
43
+ end
44
+
45
+ def ready_replicas
46
+ return -1 unless exists?
47
+ rollout_data['readyReplicas'].to_i
48
+ end
49
+
50
+ def available_replicas
51
+ return -1 unless exists?
52
+ rollout_data["availableReplicas"].to_i
53
+ end
54
+
55
+ private
56
+
57
+ def stale_status?
58
+ observed_generation != current_generation
59
+ end
60
+
61
+ def fetch_pods_if_needed(cache)
62
+ # If the ReplicaSet doesn't exist, its pods won't either
63
+ return unless exists?
64
+ # If the status hasn't been updated yet, we're not going to make a determination anyway
65
+ return if stale_status?
66
+ # If we don't want any pods at all, we don't need to look for them
67
+ return if desired_replicas == 0
68
+ # We only need to fetch pods so that deploy_failed? can check that they aren't ALL bad.
69
+ # If we can already tell some pods are ok from the RS data, don't bother fetching them (which can be expensive)
70
+ # Lower numbers here make us more susceptible to being fooled by replicas without probes briefly appearing ready
71
+ return if ready_replicas > 1
72
+
73
+ find_pods(cache)
74
+ end
75
+
76
+ def rollout_data
77
+ return { "replicas" => 0 } unless exists?
78
+ { "replicas" => 0 }.merge(
79
+ @instance_data["status"].slice("replicas", "availableReplicas", "readyReplicas")
80
+ )
81
+ end
82
+
83
+ def parent_of_pod?(pod_data)
84
+ return false unless pod_data.dig("metadata", "ownerReferences")
85
+ pod_data["metadata"]["ownerReferences"].any? { |ref| ref["uid"] == @instance_data["metadata"]["uid"] }
86
+ end
87
+
88
+ def unmanaged?
89
+ @parent.blank?
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class ResourceQuota < KubernetesResource
4
+ TIMEOUT = 30.seconds
5
+
6
+ def status
7
+ exists? ? "In effect" : "Not Found"
8
+ end
9
+
10
+ def deploy_succeeded?
11
+ @instance_data.dig("spec", "hard") == @instance_data.dig("status", "hard")
12
+ end
13
+
14
+ def deploy_failed?
15
+ false
16
+ end
17
+
18
+ def timeout_message
19
+ UNUSUAL_FAILURE_MESSAGE
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class Role < KubernetesResource
4
+ TIMEOUT = 30.seconds
5
+
6
+ def status
7
+ exists? ? "Created" : "Not Found"
8
+ end
9
+
10
+ def deploy_succeeded?
11
+ exists?
12
+ end
13
+
14
+ def deploy_failed?
15
+ false
16
+ end
17
+
18
+ def timeout_message
19
+ UNUSUAL_FAILURE_MESSAGE
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class RoleBinding < KubernetesResource
4
+ TIMEOUT = 30.seconds
5
+
6
+ def status
7
+ exists? ? "Created" : "Not Found"
8
+ end
9
+
10
+ def deploy_succeeded?
11
+ exists?
12
+ end
13
+
14
+ def deploy_failed?
15
+ false
16
+ end
17
+
18
+ def timeout_message
19
+ UNUSUAL_FAILURE_MESSAGE
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class Secret < KubernetesResource
4
+ TIMEOUT = 30.seconds
5
+ SENSITIVE_TEMPLATE_CONTENT = true
6
+ SERVER_DRY_RUNNABLE = true
7
+
8
+ def status
9
+ exists? ? "Available" : "Not Found"
10
+ end
11
+
12
+ def deploy_succeeded?
13
+ exists?
14
+ end
15
+
16
+ def deploy_failed?
17
+ false
18
+ end
19
+
20
+ def timeout_message
21
+ UNUSUAL_FAILURE_MESSAGE
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+ require 'krane/kubernetes_resource/pod'
3
+
4
+ module Krane
5
+ class Service < KubernetesResource
6
+ TIMEOUT = 7.minutes
7
+
8
+ def sync(cache)
9
+ super
10
+ if exists? && selector.present?
11
+ @related_pods = cache.get_all(Pod.kind, selector)
12
+ @related_workloads = fetch_related_workloads(cache)
13
+ else
14
+ @related_pods = []
15
+ @related_workloads = []
16
+ end
17
+ end
18
+
19
+ def status
20
+ if !exists?
21
+ "Not found"
22
+ elsif requires_publishing? && !published?
23
+ "LoadBalancer IP address is not provisioned yet"
24
+ elsif !requires_endpoints?
25
+ "Doesn't require any endpoints"
26
+ elsif selects_some_pods?
27
+ "Selects at least 1 pod"
28
+ else
29
+ "Selects 0 pods"
30
+ end
31
+ end
32
+
33
+ def deploy_succeeded?
34
+ return false unless exists?
35
+ return published? if requires_publishing?
36
+ return exists? unless requires_endpoints?
37
+ # We can't use endpoints if we want the service to be able to fail fast when the pods are down
38
+ exposes_zero_replica_workload? || selects_some_pods?
39
+ end
40
+
41
+ def deploy_failed?
42
+ false
43
+ end
44
+
45
+ def timeout_message
46
+ "This service does not seem to select any pods and this is likely invalid. "\
47
+ "Please confirm the spec.selector is correct and the targeted workload is healthy."
48
+ end
49
+
50
+ private
51
+
52
+ def fetch_related_workloads(cache)
53
+ related_deployments = cache.get_all(Deployment.kind)
54
+ related_statefulsets = cache.get_all(StatefulSet.kind)
55
+ (related_deployments + related_statefulsets).select do |workload|
56
+ selector.all? { |k, v| workload['spec']['template']['metadata']['labels'][k] == v }
57
+ end
58
+ end
59
+
60
+ def exposes_zero_replica_workload?
61
+ return false unless related_replica_count
62
+ related_replica_count == 0
63
+ end
64
+
65
+ def requires_endpoints?
66
+ # services of type External don't have endpoints
67
+ return false if external_name_svc?
68
+
69
+ # problem counting replicas - by default, assume endpoints are required
70
+ return true if related_replica_count.blank?
71
+
72
+ related_replica_count > 0
73
+ end
74
+
75
+ def selects_some_pods?
76
+ return false unless selector.present?
77
+ @related_pods.present?
78
+ end
79
+
80
+ def selector
81
+ @definition["spec"].fetch("selector", {})
82
+ end
83
+
84
+ def related_replica_count
85
+ return 0 unless selector.present?
86
+
87
+ if @related_workloads.present?
88
+ @related_workloads.inject(0) { |sum, d| sum + d["spec"]["replicas"].to_i }
89
+ end
90
+ end
91
+
92
+ def external_name_svc?
93
+ @definition["spec"]["type"] == "ExternalName"
94
+ end
95
+
96
+ def requires_publishing?
97
+ @definition["spec"]["type"] == "LoadBalancer"
98
+ end
99
+
100
+ def published?
101
+ @instance_data.dig('status', 'loadBalancer', 'ingress').present?
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class ServiceAccount < KubernetesResource
4
+ TIMEOUT = 30.seconds
5
+
6
+ def status
7
+ exists? ? "Created" : "Not Found"
8
+ end
9
+
10
+ def deploy_succeeded?
11
+ exists?
12
+ end
13
+
14
+ def deploy_failed?
15
+ false
16
+ end
17
+
18
+ def timeout_message
19
+ UNUSUAL_FAILURE_MESSAGE
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+ require 'krane/kubernetes_resource/pod_set_base'
3
+ module Krane
4
+ class StatefulSet < PodSetBase
5
+ TIMEOUT = 10.minutes
6
+ ONDELETE = 'OnDelete'
7
+ attr_reader :pods
8
+
9
+ def sync(cache)
10
+ super
11
+ @pods = exists? ? find_pods(cache) : []
12
+ end
13
+
14
+ def status
15
+ return super unless @instance_data["status"].present?
16
+ rollout_data = @instance_data["status"].slice("replicas", "readyReplicas", "currentReplicas")
17
+ rollout_data.map { |state_replicas, num| "#{num} #{state_replicas.chop.pluralize(num)}" }.join(", ")
18
+ end
19
+
20
+ def deploy_succeeded?
21
+ if update_strategy == ONDELETE
22
+ # Gem cannot monitor update since it doesn't occur until delete
23
+ unless @success_assumption_warning_shown
24
+ @logger.warn("WARNING: Your StatefulSet's updateStrategy is set to OnDelete, "\
25
+ "which means updates will not be applied until its pods are deleted. "\
26
+ "Consider switching to rollingUpdate.")
27
+ @success_assumption_warning_shown = true
28
+ end
29
+ true
30
+ else
31
+ observed_generation == current_generation &&
32
+ status_data['currentRevision'] == status_data['updateRevision'] &&
33
+ desired_replicas == status_data['readyReplicas'].to_i &&
34
+ desired_replicas == status_data['currentReplicas'].to_i
35
+ end
36
+ end
37
+
38
+ def deploy_failed?
39
+ return false if update_strategy == ONDELETE
40
+ pods.present? && pods.any?(&:deploy_failed?) &&
41
+ observed_generation == current_generation
42
+ end
43
+
44
+ private
45
+
46
+ def update_strategy
47
+ if exists?
48
+ @instance_data['spec']['updateStrategy']['type']
49
+ else
50
+ 'Unknown'
51
+ end
52
+ end
53
+
54
+ def status_data
55
+ return { 'readyReplicas' => '-1', 'currentReplicas' => '-2' } unless exists?
56
+ @instance_data["status"]
57
+ end
58
+
59
+ def desired_replicas
60
+ return -1 unless exists?
61
+ @instance_data["spec"]["replicas"].to_i
62
+ end
63
+
64
+ def parent_of_pod?(pod_data)
65
+ return false unless pod_data.dig("metadata", "ownerReferences")
66
+ pod_data["metadata"]["ownerReferences"].any? { |ref| ref["uid"] == @instance_data["metadata"]["uid"] } &&
67
+ @instance_data["status"]["currentRevision"] == pod_data["metadata"]["labels"]["controller-revision-hash"]
68
+ end
69
+ end
70
+ end