RubyGems - tobsch-krane - Versions diffs - 1.0.0 - Mend

tobsch-krane 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

checksums.yaml +7 -0
data/.buildkite/pipeline.nightly.yml +43 -0
data/.github/probots.yml +2 -0
data/.gitignore +20 -0
data/.rubocop.yml +17 -0
data/.shopify-build/VERSION +1 -0
data/.shopify-build/kubernetes-deploy.yml +53 -0
data/1.0-Upgrade.md +185 -0
data/CHANGELOG.md +431 -0
data/CODE_OF_CONDUCT.md +46 -0
data/CONTRIBUTING.md +164 -0
data/Gemfile +16 -0
data/ISSUE_TEMPLATE.md +25 -0
data/LICENSE.txt +21 -0
data/README.md +655 -0
data/Rakefile +36 -0
data/bin/ci +21 -0
data/bin/setup +16 -0
data/bin/test +47 -0
data/dev.yml +28 -0
data/dev/flamegraph-from-tests +35 -0
data/exe/krane +5 -0
data/krane.gemspec +44 -0
data/lib/krane.rb +7 -0
data/lib/krane/bindings_parser.rb +88 -0
data/lib/krane/cli/deploy_command.rb +75 -0
data/lib/krane/cli/global_deploy_command.rb +54 -0
data/lib/krane/cli/krane.rb +91 -0
data/lib/krane/cli/render_command.rb +41 -0
data/lib/krane/cli/restart_command.rb +34 -0
data/lib/krane/cli/run_command.rb +54 -0
data/lib/krane/cli/version_command.rb +13 -0
data/lib/krane/cluster_resource_discovery.rb +113 -0
data/lib/krane/common.rb +23 -0
data/lib/krane/concerns/template_reporting.rb +29 -0
data/lib/krane/concurrency.rb +18 -0
data/lib/krane/container_logs.rb +106 -0
data/lib/krane/deferred_summary_logging.rb +95 -0
data/lib/krane/delayed_exceptions.rb +14 -0
data/lib/krane/deploy_task.rb +363 -0
data/lib/krane/deploy_task_config_validator.rb +29 -0
data/lib/krane/duration_parser.rb +27 -0
data/lib/krane/ejson_secret_provisioner.rb +154 -0
data/lib/krane/errors.rb +28 -0
data/lib/krane/formatted_logger.rb +57 -0
data/lib/krane/global_deploy_task.rb +210 -0
data/lib/krane/global_deploy_task_config_validator.rb +12 -0
data/lib/krane/kubeclient_builder.rb +156 -0
data/lib/krane/kubectl.rb +120 -0
data/lib/krane/kubernetes_resource.rb +621 -0
data/lib/krane/kubernetes_resource/cloudsql.rb +43 -0
data/lib/krane/kubernetes_resource/config_map.rb +22 -0
data/lib/krane/kubernetes_resource/cron_job.rb +18 -0
data/lib/krane/kubernetes_resource/custom_resource.rb +87 -0
data/lib/krane/kubernetes_resource/custom_resource_definition.rb +98 -0
data/lib/krane/kubernetes_resource/daemon_set.rb +90 -0
data/lib/krane/kubernetes_resource/deployment.rb +213 -0
data/lib/krane/kubernetes_resource/horizontal_pod_autoscaler.rb +65 -0
data/lib/krane/kubernetes_resource/ingress.rb +18 -0
data/lib/krane/kubernetes_resource/job.rb +60 -0
data/lib/krane/kubernetes_resource/network_policy.rb +22 -0
data/lib/krane/kubernetes_resource/persistent_volume_claim.rb +80 -0
data/lib/krane/kubernetes_resource/pod.rb +269 -0
data/lib/krane/kubernetes_resource/pod_disruption_budget.rb +23 -0
data/lib/krane/kubernetes_resource/pod_set_base.rb +71 -0
data/lib/krane/kubernetes_resource/pod_template.rb +20 -0
data/lib/krane/kubernetes_resource/replica_set.rb +92 -0
data/lib/krane/kubernetes_resource/resource_quota.rb +22 -0
data/lib/krane/kubernetes_resource/role.rb +22 -0
data/lib/krane/kubernetes_resource/role_binding.rb +22 -0
data/lib/krane/kubernetes_resource/secret.rb +24 -0
data/lib/krane/kubernetes_resource/service.rb +104 -0
data/lib/krane/kubernetes_resource/service_account.rb +22 -0
data/lib/krane/kubernetes_resource/stateful_set.rb +70 -0
data/lib/krane/label_selector.rb +42 -0
data/lib/krane/oj.rb +4 -0
data/lib/krane/options_helper.rb +39 -0
data/lib/krane/remote_logs.rb +60 -0
data/lib/krane/render_task.rb +118 -0
data/lib/krane/renderer.rb +118 -0
data/lib/krane/resource_cache.rb +68 -0
data/lib/krane/resource_deployer.rb +265 -0
data/lib/krane/resource_watcher.rb +171 -0
data/lib/krane/restart_task.rb +228 -0
data/lib/krane/rollout_conditions.rb +103 -0
data/lib/krane/runner_task.rb +212 -0
data/lib/krane/runner_task_config_validator.rb +18 -0
data/lib/krane/statsd.rb +65 -0
data/lib/krane/task_config.rb +22 -0
data/lib/krane/task_config_validator.rb +96 -0
data/lib/krane/template_sets.rb +173 -0
data/lib/krane/version.rb +4 -0
data/pull_request_template.md +8 -0
data/screenshots/deploy-demo.gif +0 -0
data/screenshots/migrate-logs.png +0 -0
data/screenshots/missing-secret-fail.png +0 -0
data/screenshots/success.png +0 -0
data/screenshots/test-output.png +0 -0
metadata +375 -0

data/lib/krane/kubernetes_resource/horizontal_pod_autoscaler.rb ADDED

@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+module Krane
+  class HorizontalPodAutoscaler < KubernetesResource
+    TIMEOUT = 3.minutes
+    RECOVERABLE_CONDITION_PREFIX = "FailedGet"
+    def deploy_succeeded?
+      scaling_active_condition["status"] == "True" || scaling_disabled?
+    end
+    def deploy_failed?
+      return false unless exists?
+      return false if scaling_disabled?
+      scaling_active_condition["status"] == "False" &&
+      !scaling_active_condition.fetch("reason", "").start_with?(RECOVERABLE_CONDITION_PREFIX)
+    end
+    def kubectl_resource_type
+      'hpa.v2beta1.autoscaling'
+    end
+    def status
+      if !exists?
+        super
+      elsif scaling_disabled?
+        "ScalingDisabled"
+      elsif deploy_succeeded?
+        "Configured"
+      elsif scaling_active_condition.present? || able_to_scale_condition.present?
+        condition = scaling_active_condition.presence || able_to_scale_condition
+        condition['reason']
+      else
+        "Unknown"
+      end
+    end
+    def failure_message
+      condition = scaling_active_condition.presence || able_to_scale_condition.presence || {}
+      condition['message']
+    end
+    def timeout_message
+      failure_message.presence || super
+    end
+    private
+    def scaling_disabled?
+      scaling_active_condition["status"] == "False" &&
+      scaling_active_condition["reason"] == "ScalingDisabled"
+    end
+    def conditions
+      @instance_data.dig("status", "conditions") || []
+    end
+    def able_to_scale_condition
+      conditions.detect { |c| c["type"] == "AbleToScale" } || {}
+    end
+    def scaling_active_condition
+      conditions.detect { |c| c["type"] == "ScalingActive" } || {}
+    end
+  end
+end

data/lib/krane/kubernetes_resource/ingress.rb ADDED

@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+module Krane
+  class Ingress < KubernetesResource
+    TIMEOUT = 30.seconds
+    def status
+      exists? ? "Created" : "Not Found"
+    end
+    def deploy_succeeded?
+      exists?
+    end
+    def deploy_failed?
+      false
+    end
+  end
+end

data/lib/krane/kubernetes_resource/job.rb ADDED

@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+module Krane
+  class Job < KubernetesResource
+    TIMEOUT = 10.minutes
+    def deploy_succeeded?
+      # Don't block deploys for long running jobs,
+      # Instead report success when there is at least 1 active
+      return false unless deploy_started?
+      done? || running?
+    end
+    def deploy_failed?
+      return false unless deploy_started?
+      return true if failed_status_condition
+      return false unless @instance_data.dig("spec", "backoffLimit").present?
+      (@instance_data.dig("status", "failed") || 0) >= @instance_data.dig("spec", "backoffLimit")
+    end
+    def status
+      if !exists?
+        super
+      elsif done?
+        "Succeeded"
+      elsif running?
+        "Started"
+      elsif deploy_failed?
+        "Failed"
+      else
+        "Unknown"
+      end
+    end
+    def failure_message
+      if (condition = failed_status_condition.presence)
+        "#{condition['reason']} (#{condition['message']})"
+      end
+    end
+    private
+    def failed_status_condition
+      @instance_data.dig("status", "conditions")&.detect do |condition|
+        condition["type"] == 'Failed' && condition['status'] == "True"
+      end
+    end
+    def done?
+      (@instance_data.dig("status", "succeeded") || 0) == @instance_data.dig("spec", "completions")
+    end
+    def running?
+      now = Time.now.utc
+      start_time = @instance_data.dig("status", "startTime")
+      # Wait 5 seconds to ensure job doesn't immediately fail.
+      return false if !start_time.present? || now - Time.parse(start_time) < 5.second
+      (@instance_data.dig("status", "active") || 0) >= 1
+    end
+  end
+end

data/lib/krane/kubernetes_resource/network_policy.rb ADDED

@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+module Krane
+  class NetworkPolicy < KubernetesResource
+    TIMEOUT = 30.seconds
+    def status
+      exists? ? "Created" : "Not Found"
+    end
+    def deploy_succeeded?
+      exists?
+    end
+    def deploy_failed?
+      false
+    end
+    def timeout_message
+      UNUSUAL_FAILURE_MESSAGE
+    end
+  end
+end

data/lib/krane/kubernetes_resource/persistent_volume_claim.rb ADDED

@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+module Krane
+  class PersistentVolumeClaim < KubernetesResource
+    TIMEOUT = 5.minutes
+    def sync(cache)
+      super
+      @storage_classes = cache.get_all("StorageClass").map { |sc| StorageClass.new(sc) }
+    end
+    def status
+      exists? ? @instance_data["status"]["phase"] : "Not Found"
+    end
+    def deploy_succeeded?
+      return true if status == "Bound"
+      # if the StorageClass has volumeBindingMode: WaitForFirstConsumer,
+      # it won't bind until after a Pod mounts it. But it must be pre-deployed,
+      # as the Pod requires it. So 'Pending' must be treated as a 'Success' state
+      if storage_class&.volume_binding_mode == "WaitForFirstConsumer"
+        return status == "Pending" || status == "Bound"
+      end
+      false
+    end
+    def deploy_failed?
+      status == "Lost" || failure_message.present?
+    end
+    def failure_message
+      if storage_class_name.nil? && @storage_classes.count(&:default?) > 1
+        "PVC has no StorageClass specified and there are multiple StorageClasses " \
+        "annotated as default. This is an invalid cluster configuration."
+      end
+    end
+    def timeout_message
+      return STANDARD_TIMEOUT_MESSAGE unless storage_class_name.present? && !storage_class
+      "PVC specified a StorageClass of #{storage_class_name} but the resource does not exist"
+    end
+    private
+    def storage_class_name
+      @definition.dig("spec", "storageClassName")
+    end
+    def storage_class
+      if storage_class_name.present?
+        @storage_classes.detect { |sc| sc.name == storage_class_name }
+      # storage_class_name = "" is an explicit request for no storage class
+      # storage_class_name = nil is an impplicit request for default storage class
+      elsif storage_class_name != ""
+        @storage_classes.detect(&:default?)
+      end
+    end
+    class StorageClass < KubernetesResource
+      DEFAULT_CLASS_ANNOTATION = "storageclass.kubernetes.io/is-default-class"
+      DEFAULT_CLASS_BETA_ANNOTATION = "storageclass.beta.kubernetes.io/is-default-class"
+      attr_reader :name
+      def initialize(definition)
+        @definition = definition
+        @name = definition.dig("metadata", "name").to_s
+      end
+      def volume_binding_mode
+        @definition.dig("volumeBindingMode")
+      end
+      def default?
+        @definition.dig("metadata", "annotations", DEFAULT_CLASS_ANNOTATION) == "true" ||
+        @definition.dig("metadata", "annotations", DEFAULT_CLASS_BETA_ANNOTATION) == "true"
+      end
+    end
+  end
+end

data/lib/krane/kubernetes_resource/pod.rb ADDED

@@ -0,0 +1,269 @@
+# frozen_string_literal: true
+module Krane
+  class Pod < KubernetesResource
+    TIMEOUT = 10.minutes
+    FAILED_PHASE_NAME = "Failed"
+    TRANSIENT_FAILURE_REASONS = %w(
+      Evicted
+      Preempting
+    )
+    attr_accessor :stream_logs
+    def initialize(namespace:, context:, definition:, logger:,
+      statsd_tags: nil, parent: nil, deploy_started_at: nil, stream_logs: false)
+      @parent = parent
+      @deploy_started_at = deploy_started_at
+      @containers = definition.fetch("spec", {}).fetch("containers", []).map { |c| Container.new(c) }
+      unless @containers.present?
+        logger.summary.add_paragraph("Rendered template content:\n#{definition.to_yaml}")
+        raise FatalDeploymentError, "Template is missing required field spec.containers"
+      end
+      @containers += definition["spec"].fetch("initContainers", []).map { |c| Container.new(c, init_container: true) }
+      @stream_logs = stream_logs
+      super(namespace: namespace, context: context, definition: definition,
+            logger: logger, statsd_tags: statsd_tags)
+    end
+    def sync(_cache)
+      super
+      raise_predates_deploy_error if exists? && unmanaged? && !deploy_started?
+      if exists?
+        logs.sync if unmanaged?
+        update_container_statuses(@instance_data["status"])
+      else # reset
+        @containers.each(&:reset_status)
+      end
+    end
+    def after_sync
+      if @stream_logs
+        logs.print_latest
+      elsif unmanaged? && deploy_succeeded?
+        logs.print_all
+      end
+    end
+    def status
+      return phase if reason.blank?
+      "#{phase} (Reason: #{reason})"
+    end
+    def deploy_succeeded?
+      if unmanaged?
+        phase == "Succeeded"
+      else
+        phase == "Running" && ready?
+      end
+    end
+    def deploy_failed?
+      failure_message.present?
+    end
+    def timeout_message
+      if readiness_probe_failure?
+        probe_failure_msgs = @containers.map(&:readiness_fail_reason).compact
+        header = "The following containers have not passed their readiness probes on at least one pod:\n"
+        header + probe_failure_msgs.join("\n")
+      elsif failed_schedule_reason.present?
+        "Pod could not be scheduled because #{failed_schedule_reason}"
+      else
+        STANDARD_TIMEOUT_MESSAGE
+      end
+    end
+    def failure_message
+      doomed_containers = @containers.select(&:doomed?)
+      if doomed_containers.present?
+        container_problems = if unmanaged?
+          "The following containers encountered errors:\n"
+        else
+          "The following containers are in a state that is unlikely to be recoverable:\n"
+        end
+        doomed_containers.each do |c|
+          red_name = ColorizedString.new(c.name).red
+          container_problems += "> #{red_name}: #{c.doom_reason}\n"
+        end
+      end
+      "#{phase_failure_message} #{container_problems}".strip.presence
+    end
+    def fetch_debug_logs
+      logs.sync
+      logs
+    end
+    def print_debug_logs?
+      exists? && !@stream_logs # don't print them a second time
+    end
+    def node_name
+      @instance_data.dig('spec', 'nodeName')
+    end
+    private
+    def failed_schedule_reason
+      if phase == "Pending"
+        conditions = @instance_data.dig('status', 'conditions') || []
+        unschedulable = conditions.find do |condition|
+          condition["type"] == "PodScheduled" && condition["status"] == "False"
+        end
+        unschedulable&.dig('message')
+      end
+    end
+    def failed_phase?
+      phase == FAILED_PHASE_NAME
+    end
+    def transient_failure_reason?
+      return false if unmanaged?
+      TRANSIENT_FAILURE_REASONS.include?(reason)
+    end
+    def phase_failure_message
+      if failed_phase? && !transient_failure_reason?
+        return "Pod status: #{status}."
+      end
+      return unless unmanaged?
+      if terminating?
+        "Pod status: Terminating."
+      elsif disappeared?
+        "Pod status: Disappeared."
+      end
+    end
+    def logs
+      @logs ||= Krane::RemoteLogs.new(
+        logger: @logger,
+        parent_id: id,
+        container_names: @containers.map(&:name),
+        namespace: @namespace,
+        context: @context
+      )
+    end
+    def phase
+      @instance_data.dig("status", "phase") || "Unknown"
+    end
+    def reason
+      @instance_data.dig('status', 'reason')
+    end
+    def readiness_probe_failure?
+      return false if ready? || unmanaged?
+      return false if phase != "Running"
+      @containers.any?(&:readiness_fail_reason)
+    end
+    def ready?
+      return false unless (status_data = @instance_data["status"])
+      ready_condition = status_data.fetch("conditions", []).find { |condition| condition["type"] == "Ready" }
+      ready_condition.present? && (ready_condition["status"] == "True")
+    end
+    def update_container_statuses(status_data)
+      @containers.each do |c|
+        key = c.init_container? ? "initContainerStatuses" : "containerStatuses"
+        if status_data.key?(key)
+          data = status_data[key].find { |st| st["name"] == c.name }
+          c.update_status(data)
+        else
+          c.reset_status
+        end
+      end
+    end
+    def unmanaged?
+      @parent.blank?
+    end
+    def raise_predates_deploy_error
+      example_color = :green
+      msg = <<-STRING.strip_heredoc
+        Unmanaged pods like #{id} must have unique names on every deploy in order to work as intended.
+        The recommended way to achieve this is to include "<%= deployment_id %>" in the pod's name, like this:
+          #{ColorizedString.new('kind: Pod').colorize(example_color)}
+          #{ColorizedString.new('metadata:').colorize(example_color)}
+            #{ColorizedString.new("name: #{@name}-<%= deployment_id %>").colorize(example_color)}
+      STRING
+      @logger.summary.add_paragraph(msg)
+      raise FatalDeploymentError, "#{id} existed before the deploy started"
+    end
+    class Container
+      attr_reader :name
+      def initialize(definition, init_container: false)
+        @init_container = init_container
+        @name = definition["name"]
+        @image = definition["image"]
+        @http_probe_location = definition.dig("readinessProbe", "httpGet", "path")
+        @exec_probe_command = definition.dig("readinessProbe", "exec", "command")
+        @status = {}
+      end
+      def doomed?
+        doom_reason.present?
+      end
+      def doom_reason
+        limbo_reason = @status.dig("state", "waiting", "reason")
+        limbo_message = @status.dig("state", "waiting", "message")
+        if @status.dig("lastState", "terminated", "reason") == "ContainerCannotRun"
+          # ref: https://github.com/kubernetes/kubernetes/blob/562e721ece8a16e05c7e7d6bdd6334c910733ab2/pkg/kubelet/dockershim/docker_container.go#L353
+          exit_code = @status.dig('lastState', 'terminated', 'exitCode')
+          "Failed to start (exit #{exit_code}): #{@status.dig('lastState', 'terminated', 'message')}"
+        elsif @status.dig("state", "terminated", "reason") == "ContainerCannotRun"
+          exit_code = @status.dig('state', 'terminated', 'exitCode')
+          "Failed to start (exit #{exit_code}): #{@status.dig('state', 'terminated', 'message')}"
+        elsif limbo_reason == "CrashLoopBackOff"
+          exit_code = @status.dig('lastState', 'terminated', 'exitCode')
+          "Crashing repeatedly (exit #{exit_code}). See logs for more information."
+        elsif limbo_reason == "ErrImagePull" && limbo_message.match(/not found/i)
+          "Failed to pull image #{@image}. "\
+          "Did you wait for it to be built and pushed to the registry before deploying?"
+        elsif limbo_reason == "CreateContainerConfigError"
+          "Failed to generate container configuration: #{limbo_message}"
+        end
+      end
+      def readiness_fail_reason
+        return if ready? || init_container?
+        return unless (@http_probe_location || @exec_probe_command).present?
+        yellow_name = ColorizedString.new(name).yellow
+        if @http_probe_location
+          "> #{yellow_name} must respond with a good status code at '#{@http_probe_location}'"
+        elsif @exec_probe_command
+          "> #{yellow_name} must exit 0 from the following command: '#{@exec_probe_command.join(' ')}'"
+        end
+      end
+      def ready?
+        @status['ready'] == true
+      end
+      def init_container?
+        @init_container
+      end
+      def update_status(data)
+        @status = data || {}
+      end
+      def reset_status
+        @status = {}
+      end
+    end
+  end
+end