seira 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c324b547f1bcefcdf4273e878da27dacf362c79a
4
- data.tar.gz: 740dbe2c8657affa050e6642d927fb82fa983bb5
3
+ metadata.gz: ee194647d3997370da2e9494debbfc5348064e26
4
+ data.tar.gz: 8b77f1bc88782a90081fc930615f261f6a40f971
5
5
  SHA512:
6
- metadata.gz: becbc4ed3755d1ec7c151ee9e1cf3da0df292e5c6cf9be9c2898342cf7c33f9b668b429a3cd727c1b513ae3048e3c78955dae24f5202e4a73cc0dfd473ca821c
7
- data.tar.gz: 0c8e1d381d3b043b56b78c13483280d379c66617492f94f779946260a62b9d687e1a6fcb94cae8f316534ceedb707caf46f80d515cd282a72b185b3719f9ec31
6
+ metadata.gz: e4dde3d0e4310683be3ec407d8090799319b88f6ca97eea0bb3a27748e52d7d8c2972533f2f56c9a2e3ef952eccf39b302988bcbe213e7057294c0ca2189c50e
7
+ data.tar.gz: bbfc6070333c8852503003a73435a150f216cb9b8ab4e0237b57171d274ab16b56be2cb386821d5fcf75bd496f487c8855b3fbaa2d186a4b6d966d399d2d00fa
data/lib/seira.rb CHANGED
@@ -17,6 +17,7 @@ require 'seira/redis'
17
17
  require 'seira/secrets'
18
18
  require 'seira/settings'
19
19
  require 'seira/setup'
20
+ require 'seira/node_pools'
20
21
 
21
22
  # A base runner class that does base checks and then delegates the actual
22
23
  # work for the command to a class in lib/seira folder.
@@ -32,7 +33,8 @@ module Seira
32
33
  'app' => Seira::App,
33
34
  'cluster' => Seira::Cluster,
34
35
  'proxy' => Seira::Proxy,
35
- 'setup' => Seira::Setup
36
+ 'setup' => Seira::Setup,
37
+ 'node-pools' => Seira::NodePools
36
38
  }.freeze
37
39
 
38
40
  attr_reader :project, :cluster, :app, :category, :action, :args
@@ -45,7 +47,7 @@ module Seira
45
47
 
46
48
  reversed_args = ARGV.reverse.map(&:chomp)
47
49
 
48
- # The cluster and proxy command are not specific to any app, so that
50
+ # The cluster, node-pools and proxy command are not specific to any app, so that
49
51
  # arg is not in the ARGV array and should be skipped over
50
52
  if ARGV[0] == 'help'
51
53
  @category = reversed_args.pop
@@ -54,6 +56,11 @@ module Seira
54
56
  @category = reversed_args.pop
55
57
  @action = reversed_args.pop
56
58
  @args = reversed_args.reverse
59
+ elsif ARGV[1] == 'node-pools'
60
+ cluster = reversed_args.pop
61
+ @category = reversed_args.pop
62
+ @action = reversed_args.pop
63
+ @args = reversed_args.reverse
57
64
  elsif ARGV[1] == 'proxy'
58
65
  cluster = reversed_args.pop
59
66
  @category = reversed_args.pop
@@ -103,6 +110,9 @@ module Seira
103
110
  if category == 'cluster'
104
111
  perform_action_validation(klass: command_class, action: action)
105
112
  command_class.new(action: action, args: args, context: passed_context, settings: settings).run
113
+ elsif category == 'node-pools'
114
+ perform_action_validation(klass: command_class, action: action)
115
+ command_class.new(action: action, args: args, context: passed_context, settings: settings).run
106
116
  elsif category == 'proxy'
107
117
  command_class.new.run
108
118
  else
@@ -132,7 +142,7 @@ module Seira
132
142
  def perform_action_validation(klass:, action:)
133
143
  return true if simple_cluster_change?
134
144
 
135
- unless klass == Seira::Cluster || settings.applications.include?(app)
145
+ unless klass == Seira::Cluster || klass == Seira::NodePools || settings.applications.include?(app)
136
146
  puts "Invalid app name specified"
137
147
  exit(1)
138
148
  end
data/lib/seira/app.rb CHANGED
@@ -42,7 +42,7 @@ module Seira
42
42
  puts "\n\n"
43
43
  puts "Possible actions:\n\n"
44
44
  puts "bootstrap: Create new app with main secret, cloudsql secret, and gcr secret in the new namespace."
45
- puts "apply: Apply the configuration in kubernetes/<cluster-name>/<app-name> using REVISION environment variable to find/replace REVISION in the YAML."
45
+ puts "apply: Apply the configuration in kubernetes/<cluster-name>/<app-name> using first argument or REVISION environment variable to find/replace REVISION in the YAML."
46
46
  puts "restart: Forces a rolling deploy for any deployment making use of RESTARTED_AT_VALUE in the deployment."
47
47
  puts "scale: Scales the given tier deployment to the specified number of instances."
48
48
  end
@@ -84,7 +84,7 @@ module Seira
84
84
 
85
85
  Dir.mktmpdir do |dir|
86
86
  destination = "#{dir}/#{context[:cluster]}/#{app}"
87
- revision = ENV['REVISION']
87
+ revision = args.first || ENV['REVISION']
88
88
 
89
89
  if revision.nil?
90
90
  current_revision = ask_cluster_for_current_revision
@@ -220,7 +220,7 @@ module Seira
220
220
  def load_configs
221
221
  directory = "kubernetes/#{context[:cluster]}/#{app}/"
222
222
  Dir.new(directory).flat_map do |filename|
223
- next if ['.', '..'].include? filename
223
+ next if File.directory?(File.join(directory, filename))
224
224
  YAML.load_stream(File.read(File.join(directory, filename)))
225
225
  end.compact
226
226
  end
data/lib/seira/cluster.rb CHANGED
@@ -5,7 +5,7 @@ require 'fileutils'
5
5
  # Example usages:
6
6
  module Seira
7
7
  class Cluster
8
- VALID_ACTIONS = %w[help bootstrap upgrade].freeze
8
+ VALID_ACTIONS = %w[help bootstrap upgrade-master].freeze
9
9
  SUMMARY = "For managing whole clusters.".freeze
10
10
 
11
11
  attr_reader :action, :args, :context, :settings
@@ -23,8 +23,8 @@ module Seira
23
23
  run_help
24
24
  when 'bootstrap'
25
25
  run_bootstrap
26
- when 'upgrade'
27
- run_upgrade
26
+ when 'upgrade-master'
27
+ run_upgrade_master
28
28
  else
29
29
  fail "Unknown command encountered"
30
30
  end
@@ -81,13 +81,13 @@ module Seira
81
81
  puts `kubectl create secret generic cloudsql-credentials --namespace default --from-file=credentials.json=#{cloudsql_credentials_location}`
82
82
  end
83
83
 
84
- def run_upgrade
84
+ def run_upgrade_master
85
85
  cluster = context[:cluster]
86
+ new_version = args.first
86
87
 
87
88
  # Take a single argument, which is the version to upgrade to
88
- new_version = args[0]
89
89
  if new_version.nil?
90
- puts 'must specify version to upgrade to'
90
+ puts 'Please specify version to upgrade to'
91
91
  exit(1)
92
92
  end
93
93
 
@@ -103,106 +103,18 @@ module Seira
103
103
  cluster_config = JSON.parse(`gcloud container clusters describe #{cluster} --format json`)
104
104
 
105
105
  # Update the master node first
106
- puts 'updating master (this may take a while)'
106
+ exit(1) unless Highline.agree("Are you sure you want to upgrade cluster #{cluster} master to version #{new_version}? Services should continue to run fine, but the cluster control plane will be offline.")
107
+
108
+ puts 'Updating master (this may take a while)'
107
109
  if cluster_config['currentMasterVersion'] == new_version
108
110
  # Master has already been updated; this step is not needed
109
- puts 'already up to date'
111
+ puts 'Already up to date!'
110
112
  elsif system("gcloud container clusters upgrade #{cluster} --cluster-version=#{new_version} --master")
111
- puts 'master updated successfully'
113
+ puts 'Master updated successfully!'
112
114
  else
113
- puts 'failed to update master'
115
+ puts 'Failed to update master.'
114
116
  exit(1)
115
117
  end
116
-
117
- # Figure out what our current node pool setup is. The goal here is to be able to re-run this
118
- # command if it fails partway through, and have it pick up where it left off.
119
- pools = JSON.parse(`gcloud container node-pools list --cluster #{cluster} --format json`)
120
- if pools.length == 2
121
- # We have two node pools. Assume this is due to the upgrade process already being started,
122
- # so we have one pool with the old version and one pool with the new version.
123
- old_pool = pools.find { |p| p['version'] != new_version }
124
- new_pool = pools.find { |p| p['version'] == new_version }
125
- if old_pool.nil? || new_pool.nil?
126
- # Turns out the two pools are not the result of a partially-finished upgrade; in this
127
- # case we give up and the upgrade will have to proceed manually.
128
- puts 'Unsupported node pool setup: could not find old and new pool'
129
- exit(1)
130
- end
131
- elsif pools.length == 1
132
- # Only one pool is the normal case; set old_pool and that's it.
133
- old_pool = pools.first
134
- else
135
- # If we have three or more or zero pools, upgrade will have to proceed manually.
136
- puts 'Unsupported node pool setup: unexpected number of pools'
137
- exit(1)
138
- end
139
- # Get names of the nodes in the old node pool
140
- old_nodes = `kubectl get nodes -l cloud.google.com/gke-nodepool=#{old_pool['name']} -o name`.split("\n")
141
-
142
- # If we don't already have a new pool (i.e. one with the new version), create one
143
- if new_pool.nil?
144
- # Pick a name for the new pool, alternating between blue and green
145
- new_pool_name = old_pool['name'] == 'blue' ? 'green' : 'blue'
146
-
147
- # Create a new node pool with all the same settings as the old one. The version of the new
148
- # pool will match the master version, which has already been updated.
149
- puts 'creating new node pool'
150
- command =
151
- "gcloud container node-pools create #{new_pool_name} \
152
- --cluster=#{cluster} \
153
- --disk-size=#{old_pool['config']['diskSizeGb']} \
154
- --image-type=#{old_pool['config']['imageType']} \
155
- --machine-type=#{old_pool['config']['machineType']} \
156
- --num-nodes=#{old_nodes.count} \
157
- --service-account=#{old_pool['serviceAccount']}"
158
- # TODO: support autoscaling if old pool has it turned on
159
- if system(command)
160
- puts 'new pool created successfully'
161
- else
162
- puts 'failed to create new pool'
163
- exit(1)
164
- end
165
- end
166
-
167
- # Cordon all the nodes in the old pool, preventing new workloads from being sent to them
168
- puts 'cordoning old nodes'
169
- old_nodes.each do |node|
170
- unless system("kubectl cordon #{node}")
171
- puts "failed to cordon node #{node}"
172
- exit(1)
173
- end
174
- end
175
-
176
- # Drain all the nodes in the old pool, moving workloads off of them gradually while
177
- # respecting maxUnavailable etc.
178
- puts 'draining old nodes'
179
- old_nodes.each do |node|
180
- # --force deletes pods that aren't managed by a ReplicationController, Job, or DaemonSet,
181
- # which shouldn't be any besides manually created temp pods
182
- # --ignore-daemonsets prevents failing due to presence of DaemonSets, which cannot be moved
183
- # because they're tied to a specific node
184
- # --delete-local-data prevents failing due to presence of local data, which cannot be moved
185
- # but is bad practice to use for anything that can't be lost
186
- puts "draining #{node}"
187
- unless system("kubectl drain --force --ignore-daemonsets --delete-local-data #{node}")
188
- puts "failed to drain node #{node}"
189
- exit(1)
190
- end
191
- end
192
-
193
- # All workloads which can be moved have been moved off of old node pool have been moved, so
194
- # that node pool can be deleted, leaving only the new pool with the new version
195
- if HighLine.agree('Delete old node pool?')
196
- puts 'deleting old node pool'
197
- if system("gcloud container node-pools delete #{old_pool['name']} --cluster #{cluster}")
198
- puts 'old pool deleted successfully'
199
- else
200
- puts 'failed to delete old pool'
201
- exit(1)
202
- end
203
- end
204
-
205
- puts 'upgrade complete!'
206
118
  end
207
119
  end
208
120
  end
@@ -64,12 +64,16 @@ module Seira
64
64
  }
65
65
  }
66
66
 
67
+ replica_count = 3 # The default
68
+
67
69
  args.each do |arg|
68
70
  puts "Applying arg #{arg} to values"
69
71
  if arg.start_with?('--memory=')
70
72
  values[:resources][:requests][:memory] = arg.split('=')[1]
71
73
  elsif arg.start_with?('--cpu=')
72
74
  values[:resources][:requests][:cpu] = arg.split('=')[1]
75
+ elsif arg.start_with?('--replicas=')
76
+ replica_count = arg.split('=')[1]
73
77
  elsif arg.start_with?('--size=')
74
78
  size = arg.split('=')[1]
75
79
  case size
@@ -103,10 +107,16 @@ module Seira
103
107
  end
104
108
  end
105
109
 
110
+ # Make sure that pdbMinAvailable is always 1 less than total replica count
111
+ # so that we can properly cordon and drain a node.
112
+ values[:replicaCount] = replica_count
113
+ values[:pdbMinAvailable] = replica_count - 1
114
+
115
+ unique_name = Seira::Random.unique_name(existing_instances)
116
+ name = "#{app}-memcached-#{unique_name}"
117
+
106
118
  Dir.mktmpdir do |dir|
107
119
  file_name = write_config(dir: dir, values: values)
108
- unique_name = Seira::Random.unique_name(existing_instances)
109
- name = "#{app}-memcached-#{unique_name}"
110
120
  puts `helm install --namespace #{app} --name #{name} --wait -f #{file_name} stable/memcached`
111
121
  end
112
122
 
@@ -0,0 +1,182 @@
1
+ require 'json'
2
+ require 'base64'
3
+ require 'fileutils'
4
+
5
+ # Example usages:
6
+ module Seira
7
+ class NodePools
8
+ VALID_ACTIONS = %w[help list list-nodes add cordon drain delete].freeze
9
+ SUMMARY = "For managing node pools for a cluster.".freeze
10
+
11
+ attr_reader :action, :args, :context, :settings
12
+
13
+ def initialize(action:, args:, context:, settings:)
14
+ @action = action
15
+ @args = args
16
+ @context = context
17
+ @settings = settings
18
+ end
19
+
20
+ def run
21
+ case action
22
+ when 'help'
23
+ run_help
24
+ when 'list'
25
+ run_list
26
+ when 'list-nodes'
27
+ run_list_nodes
28
+ when 'add'
29
+ run_add
30
+ when 'cordon'
31
+ run_cordon
32
+ when 'drain'
33
+ run_drain
34
+ when 'delete'
35
+ run_delete
36
+ else
37
+ fail "Unknown command encountered"
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def run_help
44
+ puts SUMMARY
45
+ puts "\n\n"
46
+ puts "Possible actions:\n\n"
47
+ puts "list: List the node pools for this cluster: `node-pools list`"
48
+ puts "list-nodes: List the nodes in specified node pool: `node-pools list-nodes <node-pool-name>`"
49
+ puts "add: Create a node pool. First arg is the name to use, and use --copy to specify the existing node pool to copy."
50
+ puts " `node-pools add <node-pool-name> --copy=<existing-node-pool-name>`"
51
+ puts "cordon: Cordon nodes in specified node pool: `node-pools cordon <node-pool-name>`"
52
+ puts "drain: Drain all pods from specified node pool: `node-pools drain <node-pool-name>`"
53
+ puts "delete: Delete a node pool. Will force-run cordon and drain, first: `node-pools delete <node-pool-name>`"
54
+ end
55
+
56
+ # TODO: Info about what is running on it?
57
+ # TODO: What information do we get in the json format we could include here?
58
+ def run_list
59
+ puts `gcloud container node-pools list --cluster #{context[:cluster]}`
60
+ end
61
+
62
+ def run_list_nodes
63
+ puts nodes_for_pool(args.first)
64
+ end
65
+
66
+ def run_add
67
+ new_pool_name = args.shift
68
+ disk_size = nil
69
+ image_type = nil
70
+ machine_type = nil
71
+ service_account = nil
72
+ num_nodes = nil
73
+
74
+ args.each do |arg|
75
+ if arg.start_with? '--copy='
76
+ node_pool_name_to_copy = arg.split('=')[1]
77
+ node_pool_to_copy = node_pools.find { |p| p['name'] == node_pool_name_to_copy }
78
+
79
+ fail "Could not find node pool with name #{node_pool_name_to_copy} to copy from." if node_pool_to_copy.nil?
80
+
81
+ disk_size = node_pool_to_copy['config']['diskSizeGb']
82
+ image_type = node_pool_to_copy['config']['imageType']
83
+ machine_type = node_pool_to_copy['config']['machineType']
84
+ service_account = node_pool_to_copy['serviceAccount']
85
+ num_nodes = nodes_for_pool(node_pool_name_to_copy).count
86
+ else
87
+ puts "Warning: Unrecognized argument '#{arg}'"
88
+ end
89
+ end
90
+
91
+ command =
92
+ "gcloud container node-pools create #{new_pool_name} \
93
+ --cluster=#{context[:cluster]} \
94
+ --disk-size=#{disk_size} \
95
+ --image-type=#{image_type} \
96
+ --machine-type=#{machine_type} \
97
+ --num-nodes=#{num_nodes} \
98
+ --service-account=#{service_account}"
99
+
100
+ if system(command)
101
+ puts 'New pool created successfully'
102
+ else
103
+ puts 'Failed to create new pool'
104
+ exit(1)
105
+ end
106
+ end
107
+
108
+ def run_cordon
109
+ fail_if_lone_node_pool
110
+
111
+ node_pool_name = args.first
112
+ nodes = nodes_for_pool(node_pool_name)
113
+
114
+ nodes.each do |node|
115
+ unless system("kubectl cordon #{node}")
116
+ puts "Failed to cordon node #{node}"
117
+ exit(1)
118
+ end
119
+ end
120
+
121
+ puts "Successfully cordoned node pool #{node_pool_name}. No new workloads will be placed on #{node_pool_name} nodes."
122
+ end
123
+
124
+ def run_drain
125
+ fail_if_lone_node_pool
126
+
127
+ node_pool_name = args.first
128
+ nodes = nodes_for_pool(node_pool_name)
129
+
130
+ nodes.each do |node|
131
+ # --force deletes pods that aren't managed by a ReplicationController, Job, or DaemonSet,
132
+ # which shouldn't be any besides manually created temp pods
133
+ # --ignore-daemonsets prevents failing due to presence of DaemonSets, which cannot be moved
134
+ # because they're tied to a specific node
135
+ # --delete-local-data prevents failing due to presence of local data, which cannot be moved
136
+ # but is bad practice to use for anything that can't be lost
137
+ puts "Draining #{node}"
138
+ unless system("kubectl drain --force --ignore-daemonsets --delete-local-data #{node}")
139
+ puts "Failed to drain node #{node}"
140
+ exit(1)
141
+ end
142
+ end
143
+
144
+ puts "Successfully drained all nodes in node pool #{node_pool_name}. No pods are running on #{node_pool_name} nodes."
145
+ end
146
+
147
+ def run_delete
148
+ fail_if_lone_node_pool
149
+
150
+ node_pool_name = args.first
151
+
152
+ puts "Running cordon and drain as a safety measure first. If you haven't run these yet, please do so separately before deleting this node pool."
153
+ run_cordon
154
+ run_drain
155
+
156
+ exit(1) unless HighLine.agree "Node pool has successfully been cordoned and drained, and should be safe to delete. Continue deleting node pool #{node_pool_name}?"
157
+
158
+ if system("gcloud container node-pools delete #{node_pool_name} --cluster #{context[:cluster]}")
159
+ puts 'Node pool deleted successfully'
160
+ else
161
+ puts 'Failed to delete old pool'
162
+ exit(1)
163
+ end
164
+ end
165
+
166
+ # TODO: Represent by a ruby object?
167
+ def node_pools
168
+ JSON.parse(`gcloud container node-pools list --cluster #{context[:cluster]} --format json`)
169
+ end
170
+
171
+ def nodes_for_pool(pool_name)
172
+ `kubectl get nodes -l cloud.google.com/gke-nodepool=#{pool_name} -o name`.split("\n")
173
+ end
174
+
175
+ def fail_if_lone_node_pool
176
+ return if node_pools.count > 1
177
+
178
+ puts "Operation is unsafe to run with only one node pool. Please add a new node pool first to ensure services in cluster can continue running."
179
+ exit(1)
180
+ end
181
+ end
182
+ end
data/lib/seira/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Seira
2
- VERSION = "0.3.2".freeze
2
+ VERSION = "0.3.3".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: seira
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Scott Ringwelski
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-02-28 00:00:00.000000000 Z
11
+ date: 2018-03-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: highline
@@ -124,6 +124,7 @@ files:
124
124
  - lib/seira/db/create.rb
125
125
  - lib/seira/jobs.rb
126
126
  - lib/seira/memcached.rb
127
+ - lib/seira/node_pools.rb
127
128
  - lib/seira/pods.rb
128
129
  - lib/seira/proxy.rb
129
130
  - lib/seira/random.rb