RubyGems - seira - Versions diffs - 0.3.2 → 0.3.3 - Mend

seira 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: c324b547f1bcefcdf4273e878da27dacf362c79a
-  data.tar.gz: 740dbe2c8657affa050e6642d927fb82fa983bb5
+  metadata.gz: ee194647d3997370da2e9494debbfc5348064e26
+  data.tar.gz: 8b77f1bc88782a90081fc930615f261f6a40f971
 SHA512:
-  metadata.gz: becbc4ed3755d1ec7c151ee9e1cf3da0df292e5c6cf9be9c2898342cf7c33f9b668b429a3cd727c1b513ae3048e3c78955dae24f5202e4a73cc0dfd473ca821c
-  data.tar.gz: 0c8e1d381d3b043b56b78c13483280d379c66617492f94f779946260a62b9d687e1a6fcb94cae8f316534ceedb707caf46f80d515cd282a72b185b3719f9ec31
+  metadata.gz: e4dde3d0e4310683be3ec407d8090799319b88f6ca97eea0bb3a27748e52d7d8c2972533f2f56c9a2e3ef952eccf39b302988bcbe213e7057294c0ca2189c50e
+  data.tar.gz: bbfc6070333c8852503003a73435a150f216cb9b8ab4e0237b57171d274ab16b56be2cb386821d5fcf75bd496f487c8855b3fbaa2d186a4b6d966d399d2d00fa

data/lib/seira.rb CHANGED Viewed

@@ -17,6 +17,7 @@ require 'seira/redis'
 require 'seira/secrets'
 require 'seira/settings'
 require 'seira/setup'
+require 'seira/node_pools'
 # A base runner class that does base checks and then delegates the actual
 # work for the command to a class in lib/seira folder.
@@ -32,7 +33,8 @@ module Seira
       'app' => Seira::App,
       'cluster' => Seira::Cluster,
       'proxy' => Seira::Proxy,
-      'setup' => Seira::Setup
+      'setup' => Seira::Setup,
+      'node-pools' => Seira::NodePools
     }.freeze
     attr_reader :project, :cluster, :app, :category, :action, :args
@@ -45,7 +47,7 @@ module Seira
       reversed_args = ARGV.reverse.map(&:chomp)
-      # The cluster and proxy command are not specific to any app, so that
+      # The cluster, node-pools and proxy command are not specific to any app, so that
       # arg is not in the ARGV array and should be skipped over
       if ARGV[0] == 'help'
         @category = reversed_args.pop
@@ -54,6 +56,11 @@ module Seira
         @category = reversed_args.pop
         @action = reversed_args.pop
         @args = reversed_args.reverse
+      elsif ARGV[1] == 'node-pools'
+        cluster = reversed_args.pop
+        @category = reversed_args.pop
+        @action = reversed_args.pop
+        @args = reversed_args.reverse
       elsif ARGV[1] == 'proxy'
         cluster = reversed_args.pop
         @category = reversed_args.pop
@@ -103,6 +110,9 @@ module Seira
       if category == 'cluster'
         perform_action_validation(klass: command_class, action: action)
         command_class.new(action: action, args: args, context: passed_context, settings: settings).run
+      elsif category == 'node-pools'
+        perform_action_validation(klass: command_class, action: action)
+        command_class.new(action: action, args: args, context: passed_context, settings: settings).run
       elsif category == 'proxy'
         command_class.new.run
       else
@@ -132,7 +142,7 @@ module Seira
     def perform_action_validation(klass:, action:)
       return true if simple_cluster_change?
-      unless klass == Seira::Cluster || settings.applications.include?(app)
+      unless klass == Seira::Cluster || klass == Seira::NodePools || settings.applications.include?(app)
         puts "Invalid app name specified"
         exit(1)
       end

data/lib/seira/app.rb CHANGED Viewed

@@ -42,7 +42,7 @@ module Seira
       puts "\n\n"
       puts "Possible actions:\n\n"
       puts "bootstrap: Create new app with main secret, cloudsql secret, and gcr secret in the new namespace."
-      puts "apply: Apply the configuration in kubernetes/<cluster-name>/<app-name> using REVISION environment variable to find/replace REVISION in the YAML."
+      puts "apply: Apply the configuration in kubernetes/<cluster-name>/<app-name> using first argument or REVISION environment variable to find/replace REVISION in the YAML."
       puts "restart: Forces a rolling deploy for any deployment making use of RESTARTED_AT_VALUE in the deployment."
       puts "scale: Scales the given tier deployment to the specified number of instances."
     end
@@ -84,7 +84,7 @@ module Seira
       Dir.mktmpdir do |dir|
         destination = "#{dir}/#{context[:cluster]}/#{app}"
-        revision = ENV['REVISION']
+        revision = args.first || ENV['REVISION']
         if revision.nil?
           current_revision = ask_cluster_for_current_revision
@@ -220,7 +220,7 @@ module Seira
     def load_configs
       directory = "kubernetes/#{context[:cluster]}/#{app}/"
       Dir.new(directory).flat_map do |filename|
-        next if ['.', '..'].include? filename
+        next if File.directory?(File.join(directory, filename))
         YAML.load_stream(File.read(File.join(directory, filename)))
       end.compact
     end

data/lib/seira/cluster.rb CHANGED Viewed

@@ -5,7 +5,7 @@ require 'fileutils'
 # Example usages:
 module Seira
   class Cluster
-    VALID_ACTIONS = %w[help bootstrap upgrade].freeze
+    VALID_ACTIONS = %w[help bootstrap upgrade-master].freeze
     SUMMARY = "For managing whole clusters.".freeze
     attr_reader :action, :args, :context, :settings
@@ -23,8 +23,8 @@ module Seira
         run_help
       when 'bootstrap'
         run_bootstrap
-      when 'upgrade'
-        run_upgrade
+      when 'upgrade-master'
+        run_upgrade_master
       else
         fail "Unknown command encountered"
       end
@@ -81,13 +81,13 @@ module Seira
       puts `kubectl create secret generic cloudsql-credentials --namespace default --from-file=credentials.json=#{cloudsql_credentials_location}`
     end
-    def run_upgrade
+    def run_upgrade_master
       cluster = context[:cluster]
+      new_version = args.first
       # Take a single argument, which is the version to upgrade to
-      new_version = args[0]
       if new_version.nil?
-        puts 'must specify version to upgrade to'
+        puts 'Please specify version to upgrade to'
         exit(1)
       end
@@ -103,106 +103,18 @@ module Seira
       cluster_config = JSON.parse(`gcloud container clusters describe #{cluster} --format json`)
       # Update the master node first
-      puts 'updating master (this may take a while)'
+      exit(1) unless Highline.agree("Are you sure you want to upgrade cluster #{cluster} master to version #{new_version}? Services should continue to run fine, but the cluster control plane will be offline.")
+      puts 'Updating master (this may take a while)'
       if cluster_config['currentMasterVersion'] == new_version
         # Master has already been updated; this step is not needed
-        puts 'already up to date'
+        puts 'Already up to date!'
       elsif system("gcloud container clusters upgrade #{cluster} --cluster-version=#{new_version} --master")
-        puts 'master updated successfully'
+        puts 'Master updated successfully!'
       else
-        puts 'failed to update master'
+        puts 'Failed to update master.'
         exit(1)
       end
-      # Figure out what our current node pool setup is. The goal here is to be able to re-run this
-      # command if it fails partway through, and have it pick up where it left off.
-      pools = JSON.parse(`gcloud container node-pools list --cluster #{cluster} --format json`)
-      if pools.length == 2
-        # We have two node pools. Assume this is due to the upgrade process already being started,
-        # so we have one pool with the old version and one pool with the new version.
-        old_pool = pools.find { |p| p['version'] != new_version }
-        new_pool = pools.find { |p| p['version'] == new_version }
-        if old_pool.nil? || new_pool.nil?
-          # Turns out the two pools are not the result of a partially-finished upgrade; in this
-          # case we give up and the upgrade will have to proceed manually.
-          puts 'Unsupported node pool setup: could not find old and new pool'
-          exit(1)
-        end
-      elsif pools.length == 1
-        # Only one pool is the normal case; set old_pool and that's it.
-        old_pool = pools.first
-      else
-        # If we have three or more or zero pools, upgrade will have to proceed manually.
-        puts 'Unsupported node pool setup: unexpected number of pools'
-        exit(1)
-      end
-      # Get names of the nodes in the old node pool
-      old_nodes = `kubectl get nodes -l cloud.google.com/gke-nodepool=#{old_pool['name']} -o name`.split("\n")
-      # If we don't already have a new pool (i.e. one with the new version), create one
-      if new_pool.nil?
-        # Pick a name for the new pool, alternating between blue and green
-        new_pool_name = old_pool['name'] == 'blue' ? 'green' : 'blue'
-        # Create a new node pool with all the same settings as the old one. The version of the new
-        # pool will match the master version, which has already been updated.
-        puts 'creating new node pool'
-        command =
-          "gcloud container node-pools create #{new_pool_name} \
-          --cluster=#{cluster} \
-          --disk-size=#{old_pool['config']['diskSizeGb']} \
-          --image-type=#{old_pool['config']['imageType']} \
-          --machine-type=#{old_pool['config']['machineType']} \
-          --num-nodes=#{old_nodes.count} \
-          --service-account=#{old_pool['serviceAccount']}"
-        # TODO: support autoscaling if old pool has it turned on
-        if system(command)
-          puts 'new pool created successfully'
-        else
-          puts 'failed to create new pool'
-          exit(1)
-        end
-      end
-      # Cordon all the nodes in the old pool, preventing new workloads from being sent to them
-      puts 'cordoning old nodes'
-      old_nodes.each do |node|
-        unless system("kubectl cordon #{node}")
-          puts "failed to cordon node #{node}"
-          exit(1)
-        end
-      end
-      # Drain all the nodes in the old pool, moving workloads off of them gradually while
-      # respecting maxUnavailable etc.
-      puts 'draining old nodes'
-      old_nodes.each do |node|
-        # --force deletes pods that aren't managed by a ReplicationController, Job, or DaemonSet,
-        #   which shouldn't be any besides manually created temp pods
-        # --ignore-daemonsets prevents failing due to presence of DaemonSets, which cannot be moved
-        #   because they're tied to a specific node
-        # --delete-local-data prevents failing due to presence of local data, which cannot be moved
-        #   but is bad practice to use for anything that can't be lost
-        puts "draining #{node}"
-        unless system("kubectl drain --force --ignore-daemonsets --delete-local-data #{node}")
-          puts "failed to drain node #{node}"
-          exit(1)
-        end
-      end
-      # All workloads which can be moved have been moved off of old node pool have been moved, so
-      # that node pool can be deleted, leaving only the new pool with the new version
-      if HighLine.agree('Delete old node pool?')
-        puts 'deleting old node pool'
-        if system("gcloud container node-pools delete #{old_pool['name']} --cluster #{cluster}")
-          puts 'old pool deleted successfully'
-        else
-          puts 'failed to delete old pool'
-          exit(1)
-        end
-      end
-      puts 'upgrade complete!'
     end
   end
 end

data/lib/seira/memcached.rb CHANGED Viewed

@@ -64,12 +64,16 @@ module Seira
         }
       }
+      replica_count = 3 # The default
       args.each do |arg|
         puts "Applying arg #{arg} to values"
         if arg.start_with?('--memory=')
           values[:resources][:requests][:memory] = arg.split('=')[1]
         elsif arg.start_with?('--cpu=')
           values[:resources][:requests][:cpu] = arg.split('=')[1]
+        elsif arg.start_with?('--replicas=')
+          replica_count = arg.split('=')[1]
         elsif arg.start_with?('--size=')
           size = arg.split('=')[1]
           case size
@@ -103,10 +107,16 @@ module Seira
         end
       end
+      # Make sure that pdbMinAvailable is always 1 less than total replica count
+      # so that we can properly cordon and drain a node.
+      values[:replicaCount] = replica_count
+      values[:pdbMinAvailable] = replica_count - 1
+      unique_name = Seira::Random.unique_name(existing_instances)
+      name = "#{app}-memcached-#{unique_name}"
       Dir.mktmpdir do |dir|
         file_name = write_config(dir: dir, values: values)
-        unique_name = Seira::Random.unique_name(existing_instances)
-        name = "#{app}-memcached-#{unique_name}"
         puts `helm install --namespace #{app} --name #{name} --wait -f #{file_name} stable/memcached`
       end

data/lib/seira/node_pools.rb ADDED Viewed

@@ -0,0 +1,182 @@
+require 'json'
+require 'base64'
+require 'fileutils'
+# Example usages:
+module Seira
+  class NodePools
+    VALID_ACTIONS = %w[help list list-nodes add cordon drain delete].freeze
+    SUMMARY = "For managing node pools for a cluster.".freeze
+    attr_reader :action, :args, :context, :settings
+    def initialize(action:, args:, context:, settings:)
+      @action = action
+      @args = args
+      @context = context
+      @settings = settings
+    end
+    def run
+      case action
+      when 'help'
+        run_help
+      when 'list'
+        run_list
+      when 'list-nodes'
+        run_list_nodes
+      when 'add'
+        run_add
+      when 'cordon'
+        run_cordon
+      when 'drain'
+        run_drain
+      when 'delete'
+        run_delete
+      else
+        fail "Unknown command encountered"
+      end
+    end
+    private
+    def run_help
+      puts SUMMARY
+      puts "\n\n"
+      puts "Possible actions:\n\n"
+      puts "list: List the node pools for this cluster: `node-pools list`"
+      puts "list-nodes: List the nodes in specified node pool: `node-pools list-nodes <node-pool-name>`"
+      puts "add: Create a node pool. First arg is the name to use, and use --copy to specify the existing node pool to copy."
+      puts "     `node-pools add <node-pool-name> --copy=<existing-node-pool-name>`"
+      puts "cordon: Cordon nodes in specified node pool: `node-pools cordon <node-pool-name>`"
+      puts "drain: Drain all pods from specified node pool:  `node-pools drain <node-pool-name>`"
+      puts "delete: Delete a node pool. Will force-run cordon and drain, first:  `node-pools delete <node-pool-name>`"
+    end
+    # TODO: Info about what is running on it?
+    # TODO: What information do we get in the json format we could include here?
+    def run_list
+      puts `gcloud container node-pools list --cluster #{context[:cluster]}`
+    end
+    def run_list_nodes
+      puts nodes_for_pool(args.first)
+    end
+    def run_add
+      new_pool_name = args.shift
+      disk_size = nil
+      image_type = nil
+      machine_type = nil
+      service_account = nil
+      num_nodes = nil
+      args.each do |arg|
+        if arg.start_with? '--copy='
+          node_pool_name_to_copy = arg.split('=')[1]
+          node_pool_to_copy = node_pools.find { |p| p['name'] == node_pool_name_to_copy }
+          fail "Could not find node pool with name #{node_pool_name_to_copy} to copy from." if node_pool_to_copy.nil?
+          disk_size = node_pool_to_copy['config']['diskSizeGb']
+          image_type = node_pool_to_copy['config']['imageType']
+          machine_type = node_pool_to_copy['config']['machineType']
+          service_account = node_pool_to_copy['serviceAccount']
+          num_nodes = nodes_for_pool(node_pool_name_to_copy).count
+        else
+          puts "Warning: Unrecognized argument '#{arg}'"
+        end
+      end
+      command =
+        "gcloud container node-pools create #{new_pool_name} \
+        --cluster=#{context[:cluster]} \
+        --disk-size=#{disk_size} \
+        --image-type=#{image_type} \
+        --machine-type=#{machine_type} \
+        --num-nodes=#{num_nodes} \
+        --service-account=#{service_account}"
+      if system(command)
+        puts 'New pool created successfully'
+      else
+        puts 'Failed to create new pool'
+        exit(1)
+      end
+    end
+    def run_cordon
+      fail_if_lone_node_pool
+      node_pool_name = args.first
+      nodes = nodes_for_pool(node_pool_name)
+      nodes.each do |node|
+        unless system("kubectl cordon #{node}")
+          puts "Failed to cordon node #{node}"
+          exit(1)
+        end
+      end
+      puts "Successfully cordoned node pool #{node_pool_name}. No new workloads will be placed on #{node_pool_name} nodes."
+    end
+    def run_drain
+      fail_if_lone_node_pool
+      node_pool_name = args.first
+      nodes = nodes_for_pool(node_pool_name)
+      nodes.each do |node|
+        # --force deletes pods that aren't managed by a ReplicationController, Job, or DaemonSet,
+        #   which shouldn't be any besides manually created temp pods
+        # --ignore-daemonsets prevents failing due to presence of DaemonSets, which cannot be moved
+        #   because they're tied to a specific node
+        # --delete-local-data prevents failing due to presence of local data, which cannot be moved
+        #   but is bad practice to use for anything that can't be lost
+        puts "Draining #{node}"
+        unless system("kubectl drain --force --ignore-daemonsets --delete-local-data #{node}")
+          puts "Failed to drain node #{node}"
+          exit(1)
+        end
+      end
+      puts "Successfully drained all nodes in node pool #{node_pool_name}. No pods are running on #{node_pool_name} nodes."
+    end
+    def run_delete
+      fail_if_lone_node_pool
+      node_pool_name = args.first
+      puts "Running cordon and drain as a safety measure first. If you haven't run these yet, please do so separately before deleting this node pool."
+      run_cordon
+      run_drain
+      exit(1) unless HighLine.agree "Node pool has successfully been cordoned and drained, and should be safe to delete. Continue deleting node pool #{node_pool_name}?"
+      if system("gcloud container node-pools delete #{node_pool_name} --cluster #{context[:cluster]}")
+        puts 'Node pool deleted successfully'
+      else
+        puts 'Failed to delete old pool'
+        exit(1)
+      end
+    end
+    # TODO: Represent by a ruby object?
+    def node_pools
+      JSON.parse(`gcloud container node-pools list --cluster #{context[:cluster]} --format json`)
+    end
+    def nodes_for_pool(pool_name)
+      `kubectl get nodes -l cloud.google.com/gke-nodepool=#{pool_name} -o name`.split("\n")
+    end
+    def fail_if_lone_node_pool
+      return if node_pools.count > 1
+      puts "Operation is unsafe to run with only one node pool. Please add a new node pool first to ensure services in cluster can continue running."
+      exit(1)
+    end
+  end
+end

data/lib/seira/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Seira
-  VERSION = "0.3.2".freeze
+  VERSION = "0.3.3".freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: seira
 version: !ruby/object:Gem::Version
-  version: 0.3.2
+  version: 0.3.3
 platform: ruby
 authors:
 - Scott Ringwelski
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-02-28 00:00:00.000000000 Z
+date: 2018-03-09 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: highline
@@ -124,6 +124,7 @@ files:
 - lib/seira/db/create.rb
 - lib/seira/jobs.rb
 - lib/seira/memcached.rb
+- lib/seira/node_pools.rb
 - lib/seira/pods.rb
 - lib/seira/proxy.rb
 - lib/seira/random.rb