tobsch-krane 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. checksums.yaml +7 -0
  2. data/.buildkite/pipeline.nightly.yml +43 -0
  3. data/.github/probots.yml +2 -0
  4. data/.gitignore +20 -0
  5. data/.rubocop.yml +17 -0
  6. data/.shopify-build/VERSION +1 -0
  7. data/.shopify-build/kubernetes-deploy.yml +53 -0
  8. data/1.0-Upgrade.md +185 -0
  9. data/CHANGELOG.md +431 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/CONTRIBUTING.md +164 -0
  12. data/Gemfile +16 -0
  13. data/ISSUE_TEMPLATE.md +25 -0
  14. data/LICENSE.txt +21 -0
  15. data/README.md +655 -0
  16. data/Rakefile +36 -0
  17. data/bin/ci +21 -0
  18. data/bin/setup +16 -0
  19. data/bin/test +47 -0
  20. data/dev.yml +28 -0
  21. data/dev/flamegraph-from-tests +35 -0
  22. data/exe/krane +5 -0
  23. data/krane.gemspec +44 -0
  24. data/lib/krane.rb +7 -0
  25. data/lib/krane/bindings_parser.rb +88 -0
  26. data/lib/krane/cli/deploy_command.rb +75 -0
  27. data/lib/krane/cli/global_deploy_command.rb +54 -0
  28. data/lib/krane/cli/krane.rb +91 -0
  29. data/lib/krane/cli/render_command.rb +41 -0
  30. data/lib/krane/cli/restart_command.rb +34 -0
  31. data/lib/krane/cli/run_command.rb +54 -0
  32. data/lib/krane/cli/version_command.rb +13 -0
  33. data/lib/krane/cluster_resource_discovery.rb +113 -0
  34. data/lib/krane/common.rb +23 -0
  35. data/lib/krane/concerns/template_reporting.rb +29 -0
  36. data/lib/krane/concurrency.rb +18 -0
  37. data/lib/krane/container_logs.rb +106 -0
  38. data/lib/krane/deferred_summary_logging.rb +95 -0
  39. data/lib/krane/delayed_exceptions.rb +14 -0
  40. data/lib/krane/deploy_task.rb +363 -0
  41. data/lib/krane/deploy_task_config_validator.rb +29 -0
  42. data/lib/krane/duration_parser.rb +27 -0
  43. data/lib/krane/ejson_secret_provisioner.rb +154 -0
  44. data/lib/krane/errors.rb +28 -0
  45. data/lib/krane/formatted_logger.rb +57 -0
  46. data/lib/krane/global_deploy_task.rb +210 -0
  47. data/lib/krane/global_deploy_task_config_validator.rb +12 -0
  48. data/lib/krane/kubeclient_builder.rb +156 -0
  49. data/lib/krane/kubectl.rb +120 -0
  50. data/lib/krane/kubernetes_resource.rb +621 -0
  51. data/lib/krane/kubernetes_resource/cloudsql.rb +43 -0
  52. data/lib/krane/kubernetes_resource/config_map.rb +22 -0
  53. data/lib/krane/kubernetes_resource/cron_job.rb +18 -0
  54. data/lib/krane/kubernetes_resource/custom_resource.rb +87 -0
  55. data/lib/krane/kubernetes_resource/custom_resource_definition.rb +98 -0
  56. data/lib/krane/kubernetes_resource/daemon_set.rb +90 -0
  57. data/lib/krane/kubernetes_resource/deployment.rb +213 -0
  58. data/lib/krane/kubernetes_resource/horizontal_pod_autoscaler.rb +65 -0
  59. data/lib/krane/kubernetes_resource/ingress.rb +18 -0
  60. data/lib/krane/kubernetes_resource/job.rb +60 -0
  61. data/lib/krane/kubernetes_resource/network_policy.rb +22 -0
  62. data/lib/krane/kubernetes_resource/persistent_volume_claim.rb +80 -0
  63. data/lib/krane/kubernetes_resource/pod.rb +269 -0
  64. data/lib/krane/kubernetes_resource/pod_disruption_budget.rb +23 -0
  65. data/lib/krane/kubernetes_resource/pod_set_base.rb +71 -0
  66. data/lib/krane/kubernetes_resource/pod_template.rb +20 -0
  67. data/lib/krane/kubernetes_resource/replica_set.rb +92 -0
  68. data/lib/krane/kubernetes_resource/resource_quota.rb +22 -0
  69. data/lib/krane/kubernetes_resource/role.rb +22 -0
  70. data/lib/krane/kubernetes_resource/role_binding.rb +22 -0
  71. data/lib/krane/kubernetes_resource/secret.rb +24 -0
  72. data/lib/krane/kubernetes_resource/service.rb +104 -0
  73. data/lib/krane/kubernetes_resource/service_account.rb +22 -0
  74. data/lib/krane/kubernetes_resource/stateful_set.rb +70 -0
  75. data/lib/krane/label_selector.rb +42 -0
  76. data/lib/krane/oj.rb +4 -0
  77. data/lib/krane/options_helper.rb +39 -0
  78. data/lib/krane/remote_logs.rb +60 -0
  79. data/lib/krane/render_task.rb +118 -0
  80. data/lib/krane/renderer.rb +118 -0
  81. data/lib/krane/resource_cache.rb +68 -0
  82. data/lib/krane/resource_deployer.rb +265 -0
  83. data/lib/krane/resource_watcher.rb +171 -0
  84. data/lib/krane/restart_task.rb +228 -0
  85. data/lib/krane/rollout_conditions.rb +103 -0
  86. data/lib/krane/runner_task.rb +212 -0
  87. data/lib/krane/runner_task_config_validator.rb +18 -0
  88. data/lib/krane/statsd.rb +65 -0
  89. data/lib/krane/task_config.rb +22 -0
  90. data/lib/krane/task_config_validator.rb +96 -0
  91. data/lib/krane/template_sets.rb +173 -0
  92. data/lib/krane/version.rb +4 -0
  93. data/pull_request_template.md +8 -0
  94. data/screenshots/deploy-demo.gif +0 -0
  95. data/screenshots/migrate-logs.png +0 -0
  96. data/screenshots/missing-secret-fail.png +0 -0
  97. data/screenshots/success.png +0 -0
  98. data/screenshots/test-output.png +0 -0
  99. metadata +375 -0
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class PodDisruptionBudget < KubernetesResource
4
+ TIMEOUT = 10.seconds
5
+
6
+ def status
7
+ exists? ? "Available" : "Not Found"
8
+ end
9
+
10
+ def deploy_succeeded?
11
+ exists? && observed_generation == current_generation
12
+ end
13
+
14
+ def deploy_method
15
+ # Required until https://github.com/kubernetes/kubernetes/issues/45398 changes
16
+ uses_generate_name? ? :create : :replace_force
17
+ end
18
+
19
+ def timeout_message
20
+ UNUSUAL_FAILURE_MESSAGE
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+ require 'krane/kubernetes_resource/pod'
3
+
4
+ module Krane
5
+ class PodSetBase < KubernetesResource
6
+ def failure_message
7
+ pods.map(&:failure_message).compact.uniq.join("\n")
8
+ end
9
+
10
+ def timeout_message
11
+ pods.map(&:timeout_message).compact.uniq.join("\n")
12
+ end
13
+
14
+ def fetch_events(kubectl)
15
+ own_events = super
16
+ return own_events unless pods.present?
17
+ most_useful_pod = pods.find(&:deploy_failed?) || pods.find(&:deploy_timed_out?) || pods.first
18
+ own_events.merge(most_useful_pod.fetch_events(kubectl))
19
+ end
20
+
21
+ def fetch_debug_logs
22
+ logs = Krane::RemoteLogs.new(
23
+ logger: @logger,
24
+ parent_id: id,
25
+ container_names: container_names,
26
+ namespace: @namespace,
27
+ context: @context
28
+ )
29
+ logs.sync
30
+ logs
31
+ end
32
+
33
+ def print_debug_logs?
34
+ pods.present? # the kubectl command times out if no pods exist
35
+ end
36
+
37
+ private
38
+
39
+ def pods
40
+ raise NotImplementedError, "Subclasses must define a `pods` accessor"
41
+ end
42
+
43
+ def parent_of_pod?(_)
44
+ raise NotImplementedError, "Subclasses must define a `parent_of_pod?` method"
45
+ end
46
+
47
+ def container_names
48
+ regular_containers = @definition["spec"]["template"]["spec"]["containers"].map { |c| c["name"] }
49
+ init_containers = @definition["spec"]["template"]["spec"].fetch("initContainers", {}).map { |c| c["name"] }
50
+ regular_containers + init_containers
51
+ end
52
+
53
+ def find_pods(cache)
54
+ all_pods = cache.get_all(Pod.kind, @instance_data["spec"]["selector"]["matchLabels"])
55
+
56
+ all_pods.each_with_object([]) do |pod_data, relevant_pods|
57
+ next unless parent_of_pod?(pod_data)
58
+ pod = Pod.new(
59
+ namespace: namespace,
60
+ context: context,
61
+ definition: pod_data,
62
+ logger: @logger,
63
+ parent: "#{name.capitalize} #{type}",
64
+ deploy_started_at: @deploy_started_at
65
+ )
66
+ pod.sync(cache)
67
+ relevant_pods << pod
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class PodTemplate < KubernetesResource
4
+ def status
5
+ exists? ? "Available" : "Not Found"
6
+ end
7
+
8
+ def deploy_succeeded?
9
+ exists?
10
+ end
11
+
12
+ def deploy_failed?
13
+ false
14
+ end
15
+
16
+ def timeout_message
17
+ UNUSUAL_FAILURE_MESSAGE
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+ require 'krane/kubernetes_resource/pod_set_base'
3
+
4
+ module Krane
5
+ class ReplicaSet < PodSetBase
6
+ TIMEOUT = 5.minutes
7
+ attr_reader :pods
8
+
9
+ def initialize(namespace:, context:, definition:, logger:, statsd_tags: nil,
10
+ parent: nil, deploy_started_at: nil)
11
+ @parent = parent
12
+ @deploy_started_at = deploy_started_at
13
+ @pods = []
14
+ super(namespace: namespace, context: context, definition: definition,
15
+ logger: logger, statsd_tags: statsd_tags)
16
+ end
17
+
18
+ def sync(cache)
19
+ super
20
+ @pods = fetch_pods_if_needed(cache) || []
21
+ end
22
+
23
+ def status
24
+ return super unless rollout_data.present?
25
+ rollout_data.map { |state_replicas, num| "#{num} #{state_replicas.chop.pluralize(num)}" }.join(", ")
26
+ end
27
+
28
+ def deploy_succeeded?
29
+ return false if stale_status?
30
+ desired_replicas == rollout_data["availableReplicas"].to_i &&
31
+ desired_replicas == rollout_data["readyReplicas"].to_i
32
+ end
33
+
34
+ def deploy_failed?
35
+ pods.present? &&
36
+ pods.all?(&:deploy_failed?) &&
37
+ !stale_status?
38
+ end
39
+
40
+ def desired_replicas
41
+ return -1 unless exists?
42
+ @instance_data["spec"]["replicas"].to_i
43
+ end
44
+
45
+ def ready_replicas
46
+ return -1 unless exists?
47
+ rollout_data['readyReplicas'].to_i
48
+ end
49
+
50
+ def available_replicas
51
+ return -1 unless exists?
52
+ rollout_data["availableReplicas"].to_i
53
+ end
54
+
55
+ private
56
+
57
+ def stale_status?
58
+ observed_generation != current_generation
59
+ end
60
+
61
+ def fetch_pods_if_needed(cache)
62
+ # If the ReplicaSet doesn't exist, its pods won't either
63
+ return unless exists?
64
+ # If the status hasn't been updated yet, we're not going to make a determination anyway
65
+ return if stale_status?
66
+ # If we don't want any pods at all, we don't need to look for them
67
+ return if desired_replicas == 0
68
+ # We only need to fetch pods so that deploy_failed? can check that they aren't ALL bad.
69
+ # If we can already tell some pods are ok from the RS data, don't bother fetching them (which can be expensive)
70
+ # Lower numbers here make us more susceptible to being fooled by replicas without probes briefly appearing ready
71
+ return if ready_replicas > 1
72
+
73
+ find_pods(cache)
74
+ end
75
+
76
+ def rollout_data
77
+ return { "replicas" => 0 } unless exists?
78
+ { "replicas" => 0 }.merge(
79
+ @instance_data["status"].slice("replicas", "availableReplicas", "readyReplicas")
80
+ )
81
+ end
82
+
83
+ def parent_of_pod?(pod_data)
84
+ return false unless pod_data.dig("metadata", "ownerReferences")
85
+ pod_data["metadata"]["ownerReferences"].any? { |ref| ref["uid"] == @instance_data["metadata"]["uid"] }
86
+ end
87
+
88
+ def unmanaged?
89
+ @parent.blank?
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class ResourceQuota < KubernetesResource
4
+ TIMEOUT = 30.seconds
5
+
6
+ def status
7
+ exists? ? "In effect" : "Not Found"
8
+ end
9
+
10
+ def deploy_succeeded?
11
+ @instance_data.dig("spec", "hard") == @instance_data.dig("status", "hard")
12
+ end
13
+
14
+ def deploy_failed?
15
+ false
16
+ end
17
+
18
+ def timeout_message
19
+ UNUSUAL_FAILURE_MESSAGE
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class Role < KubernetesResource
4
+ TIMEOUT = 30.seconds
5
+
6
+ def status
7
+ exists? ? "Created" : "Not Found"
8
+ end
9
+
10
+ def deploy_succeeded?
11
+ exists?
12
+ end
13
+
14
+ def deploy_failed?
15
+ false
16
+ end
17
+
18
+ def timeout_message
19
+ UNUSUAL_FAILURE_MESSAGE
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class RoleBinding < KubernetesResource
4
+ TIMEOUT = 30.seconds
5
+
6
+ def status
7
+ exists? ? "Created" : "Not Found"
8
+ end
9
+
10
+ def deploy_succeeded?
11
+ exists?
12
+ end
13
+
14
+ def deploy_failed?
15
+ false
16
+ end
17
+
18
+ def timeout_message
19
+ UNUSUAL_FAILURE_MESSAGE
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class Secret < KubernetesResource
4
+ TIMEOUT = 30.seconds
5
+ SENSITIVE_TEMPLATE_CONTENT = true
6
+ SERVER_DRY_RUNNABLE = true
7
+
8
+ def status
9
+ exists? ? "Available" : "Not Found"
10
+ end
11
+
12
+ def deploy_succeeded?
13
+ exists?
14
+ end
15
+
16
+ def deploy_failed?
17
+ false
18
+ end
19
+
20
+ def timeout_message
21
+ UNUSUAL_FAILURE_MESSAGE
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+ require 'krane/kubernetes_resource/pod'
3
+
4
+ module Krane
5
+ class Service < KubernetesResource
6
+ TIMEOUT = 7.minutes
7
+
8
+ def sync(cache)
9
+ super
10
+ if exists? && selector.present?
11
+ @related_pods = cache.get_all(Pod.kind, selector)
12
+ @related_workloads = fetch_related_workloads(cache)
13
+ else
14
+ @related_pods = []
15
+ @related_workloads = []
16
+ end
17
+ end
18
+
19
+ def status
20
+ if !exists?
21
+ "Not found"
22
+ elsif requires_publishing? && !published?
23
+ "LoadBalancer IP address is not provisioned yet"
24
+ elsif !requires_endpoints?
25
+ "Doesn't require any endpoints"
26
+ elsif selects_some_pods?
27
+ "Selects at least 1 pod"
28
+ else
29
+ "Selects 0 pods"
30
+ end
31
+ end
32
+
33
+ def deploy_succeeded?
34
+ return false unless exists?
35
+ return published? if requires_publishing?
36
+ return exists? unless requires_endpoints?
37
+ # We can't use endpoints if we want the service to be able to fail fast when the pods are down
38
+ exposes_zero_replica_workload? || selects_some_pods?
39
+ end
40
+
41
+ def deploy_failed?
42
+ false
43
+ end
44
+
45
+ def timeout_message
46
+ "This service does not seem to select any pods and this is likely invalid. "\
47
+ "Please confirm the spec.selector is correct and the targeted workload is healthy."
48
+ end
49
+
50
+ private
51
+
52
+ def fetch_related_workloads(cache)
53
+ related_deployments = cache.get_all(Deployment.kind)
54
+ related_statefulsets = cache.get_all(StatefulSet.kind)
55
+ (related_deployments + related_statefulsets).select do |workload|
56
+ selector.all? { |k, v| workload['spec']['template']['metadata']['labels'][k] == v }
57
+ end
58
+ end
59
+
60
+ def exposes_zero_replica_workload?
61
+ return false unless related_replica_count
62
+ related_replica_count == 0
63
+ end
64
+
65
+ def requires_endpoints?
66
+ # services of type External don't have endpoints
67
+ return false if external_name_svc?
68
+
69
+ # problem counting replicas - by default, assume endpoints are required
70
+ return true if related_replica_count.blank?
71
+
72
+ related_replica_count > 0
73
+ end
74
+
75
+ def selects_some_pods?
76
+ return false unless selector.present?
77
+ @related_pods.present?
78
+ end
79
+
80
+ def selector
81
+ @definition["spec"].fetch("selector", {})
82
+ end
83
+
84
+ def related_replica_count
85
+ return 0 unless selector.present?
86
+
87
+ if @related_workloads.present?
88
+ @related_workloads.inject(0) { |sum, d| sum + d["spec"]["replicas"].to_i }
89
+ end
90
+ end
91
+
92
+ def external_name_svc?
93
+ @definition["spec"]["type"] == "ExternalName"
94
+ end
95
+
96
+ def requires_publishing?
97
+ @definition["spec"]["type"] == "LoadBalancer"
98
+ end
99
+
100
+ def published?
101
+ @instance_data.dig('status', 'loadBalancer', 'ingress').present?
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+ module Krane
3
+ class ServiceAccount < KubernetesResource
4
+ TIMEOUT = 30.seconds
5
+
6
+ def status
7
+ exists? ? "Created" : "Not Found"
8
+ end
9
+
10
+ def deploy_succeeded?
11
+ exists?
12
+ end
13
+
14
+ def deploy_failed?
15
+ false
16
+ end
17
+
18
+ def timeout_message
19
+ UNUSUAL_FAILURE_MESSAGE
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+ require 'krane/kubernetes_resource/pod_set_base'
3
+ module Krane
4
+ class StatefulSet < PodSetBase
5
+ TIMEOUT = 10.minutes
6
+ ONDELETE = 'OnDelete'
7
+ attr_reader :pods
8
+
9
+ def sync(cache)
10
+ super
11
+ @pods = exists? ? find_pods(cache) : []
12
+ end
13
+
14
+ def status
15
+ return super unless @instance_data["status"].present?
16
+ rollout_data = @instance_data["status"].slice("replicas", "readyReplicas", "currentReplicas")
17
+ rollout_data.map { |state_replicas, num| "#{num} #{state_replicas.chop.pluralize(num)}" }.join(", ")
18
+ end
19
+
20
+ def deploy_succeeded?
21
+ if update_strategy == ONDELETE
22
+ # Gem cannot monitor update since it doesn't occur until delete
23
+ unless @success_assumption_warning_shown
24
+ @logger.warn("WARNING: Your StatefulSet's updateStrategy is set to OnDelete, "\
25
+ "which means updates will not be applied until its pods are deleted. "\
26
+ "Consider switching to rollingUpdate.")
27
+ @success_assumption_warning_shown = true
28
+ end
29
+ true
30
+ else
31
+ observed_generation == current_generation &&
32
+ status_data['currentRevision'] == status_data['updateRevision'] &&
33
+ desired_replicas == status_data['readyReplicas'].to_i &&
34
+ desired_replicas == status_data['currentReplicas'].to_i
35
+ end
36
+ end
37
+
38
+ def deploy_failed?
39
+ return false if update_strategy == ONDELETE
40
+ pods.present? && pods.any?(&:deploy_failed?) &&
41
+ observed_generation == current_generation
42
+ end
43
+
44
+ private
45
+
46
+ def update_strategy
47
+ if exists?
48
+ @instance_data['spec']['updateStrategy']['type']
49
+ else
50
+ 'Unknown'
51
+ end
52
+ end
53
+
54
+ def status_data
55
+ return { 'readyReplicas' => '-1', 'currentReplicas' => '-2' } unless exists?
56
+ @instance_data["status"]
57
+ end
58
+
59
+ def desired_replicas
60
+ return -1 unless exists?
61
+ @instance_data["spec"]["replicas"].to_i
62
+ end
63
+
64
+ def parent_of_pod?(pod_data)
65
+ return false unless pod_data.dig("metadata", "ownerReferences")
66
+ pod_data["metadata"]["ownerReferences"].any? { |ref| ref["uid"] == @instance_data["metadata"]["uid"] } &&
67
+ @instance_data["status"]["currentRevision"] == pod_data["metadata"]["labels"]["controller-revision-hash"]
68
+ end
69
+ end
70
+ end