seira 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c324b547f1bcefcdf4273e878da27dacf362c79a
4
- data.tar.gz: 740dbe2c8657affa050e6642d927fb82fa983bb5
3
+ metadata.gz: ee194647d3997370da2e9494debbfc5348064e26
4
+ data.tar.gz: 8b77f1bc88782a90081fc930615f261f6a40f971
5
5
  SHA512:
6
- metadata.gz: becbc4ed3755d1ec7c151ee9e1cf3da0df292e5c6cf9be9c2898342cf7c33f9b668b429a3cd727c1b513ae3048e3c78955dae24f5202e4a73cc0dfd473ca821c
7
- data.tar.gz: 0c8e1d381d3b043b56b78c13483280d379c66617492f94f779946260a62b9d687e1a6fcb94cae8f316534ceedb707caf46f80d515cd282a72b185b3719f9ec31
6
+ metadata.gz: e4dde3d0e4310683be3ec407d8090799319b88f6ca97eea0bb3a27748e52d7d8c2972533f2f56c9a2e3ef952eccf39b302988bcbe213e7057294c0ca2189c50e
7
+ data.tar.gz: bbfc6070333c8852503003a73435a150f216cb9b8ab4e0237b57171d274ab16b56be2cb386821d5fcf75bd496f487c8855b3fbaa2d186a4b6d966d399d2d00fa
data/lib/seira.rb CHANGED
@@ -17,6 +17,7 @@ require 'seira/redis'
17
17
  require 'seira/secrets'
18
18
  require 'seira/settings'
19
19
  require 'seira/setup'
20
+ require 'seira/node_pools'
20
21
 
21
22
  # A base runner class that does base checks and then delegates the actual
22
23
  # work for the command to a class in lib/seira folder.
@@ -32,7 +33,8 @@ module Seira
32
33
  'app' => Seira::App,
33
34
  'cluster' => Seira::Cluster,
34
35
  'proxy' => Seira::Proxy,
35
- 'setup' => Seira::Setup
36
+ 'setup' => Seira::Setup,
37
+ 'node-pools' => Seira::NodePools
36
38
  }.freeze
37
39
 
38
40
  attr_reader :project, :cluster, :app, :category, :action, :args
@@ -45,7 +47,7 @@ module Seira
45
47
 
46
48
  reversed_args = ARGV.reverse.map(&:chomp)
47
49
 
48
- # The cluster and proxy command are not specific to any app, so that
50
+ # The cluster, node-pools and proxy command are not specific to any app, so that
49
51
  # arg is not in the ARGV array and should be skipped over
50
52
  if ARGV[0] == 'help'
51
53
  @category = reversed_args.pop
@@ -54,6 +56,11 @@ module Seira
54
56
  @category = reversed_args.pop
55
57
  @action = reversed_args.pop
56
58
  @args = reversed_args.reverse
59
+ elsif ARGV[1] == 'node-pools'
60
+ cluster = reversed_args.pop
61
+ @category = reversed_args.pop
62
+ @action = reversed_args.pop
63
+ @args = reversed_args.reverse
57
64
  elsif ARGV[1] == 'proxy'
58
65
  cluster = reversed_args.pop
59
66
  @category = reversed_args.pop
@@ -103,6 +110,9 @@ module Seira
103
110
  if category == 'cluster'
104
111
  perform_action_validation(klass: command_class, action: action)
105
112
  command_class.new(action: action, args: args, context: passed_context, settings: settings).run
113
+ elsif category == 'node-pools'
114
+ perform_action_validation(klass: command_class, action: action)
115
+ command_class.new(action: action, args: args, context: passed_context, settings: settings).run
106
116
  elsif category == 'proxy'
107
117
  command_class.new.run
108
118
  else
@@ -132,7 +142,7 @@ module Seira
132
142
  def perform_action_validation(klass:, action:)
133
143
  return true if simple_cluster_change?
134
144
 
135
- unless klass == Seira::Cluster || settings.applications.include?(app)
145
+ unless klass == Seira::Cluster || klass == Seira::NodePools || settings.applications.include?(app)
136
146
  puts "Invalid app name specified"
137
147
  exit(1)
138
148
  end
data/lib/seira/app.rb CHANGED
@@ -42,7 +42,7 @@ module Seira
42
42
  puts "\n\n"
43
43
  puts "Possible actions:\n\n"
44
44
  puts "bootstrap: Create new app with main secret, cloudsql secret, and gcr secret in the new namespace."
45
- puts "apply: Apply the configuration in kubernetes/<cluster-name>/<app-name> using REVISION environment variable to find/replace REVISION in the YAML."
45
+ puts "apply: Apply the configuration in kubernetes/<cluster-name>/<app-name> using first argument or REVISION environment variable to find/replace REVISION in the YAML."
46
46
  puts "restart: Forces a rolling deploy for any deployment making use of RESTARTED_AT_VALUE in the deployment."
47
47
  puts "scale: Scales the given tier deployment to the specified number of instances."
48
48
  end
@@ -84,7 +84,7 @@ module Seira
84
84
 
85
85
  Dir.mktmpdir do |dir|
86
86
  destination = "#{dir}/#{context[:cluster]}/#{app}"
87
- revision = ENV['REVISION']
87
+ revision = args.first || ENV['REVISION']
88
88
 
89
89
  if revision.nil?
90
90
  current_revision = ask_cluster_for_current_revision
@@ -220,7 +220,7 @@ module Seira
220
220
  def load_configs
221
221
  directory = "kubernetes/#{context[:cluster]}/#{app}/"
222
222
  Dir.new(directory).flat_map do |filename|
223
- next if ['.', '..'].include? filename
223
+ next if File.directory?(File.join(directory, filename))
224
224
  YAML.load_stream(File.read(File.join(directory, filename)))
225
225
  end.compact
226
226
  end
data/lib/seira/cluster.rb CHANGED
@@ -5,7 +5,7 @@ require 'fileutils'
5
5
  # Example usages:
6
6
  module Seira
7
7
  class Cluster
8
- VALID_ACTIONS = %w[help bootstrap upgrade].freeze
8
+ VALID_ACTIONS = %w[help bootstrap upgrade-master].freeze
9
9
  SUMMARY = "For managing whole clusters.".freeze
10
10
 
11
11
  attr_reader :action, :args, :context, :settings
@@ -23,8 +23,8 @@ module Seira
23
23
  run_help
24
24
  when 'bootstrap'
25
25
  run_bootstrap
26
- when 'upgrade'
27
- run_upgrade
26
+ when 'upgrade-master'
27
+ run_upgrade_master
28
28
  else
29
29
  fail "Unknown command encountered"
30
30
  end
@@ -81,13 +81,13 @@ module Seira
81
81
  puts `kubectl create secret generic cloudsql-credentials --namespace default --from-file=credentials.json=#{cloudsql_credentials_location}`
82
82
  end
83
83
 
84
- def run_upgrade
84
+ def run_upgrade_master
85
85
  cluster = context[:cluster]
86
+ new_version = args.first
86
87
 
87
88
  # Take a single argument, which is the version to upgrade to
88
- new_version = args[0]
89
89
  if new_version.nil?
90
- puts 'must specify version to upgrade to'
90
+ puts 'Please specify version to upgrade to'
91
91
  exit(1)
92
92
  end
93
93
 
@@ -103,106 +103,18 @@ module Seira
103
103
  cluster_config = JSON.parse(`gcloud container clusters describe #{cluster} --format json`)
104
104
 
105
105
  # Update the master node first
106
- puts 'updating master (this may take a while)'
106
+ exit(1) unless Highline.agree("Are you sure you want to upgrade cluster #{cluster} master to version #{new_version}? Services should continue to run fine, but the cluster control plane will be offline.")
107
+
108
+ puts 'Updating master (this may take a while)'
107
109
  if cluster_config['currentMasterVersion'] == new_version
108
110
  # Master has already been updated; this step is not needed
109
- puts 'already up to date'
111
+ puts 'Already up to date!'
110
112
  elsif system("gcloud container clusters upgrade #{cluster} --cluster-version=#{new_version} --master")
111
- puts 'master updated successfully'
113
+ puts 'Master updated successfully!'
112
114
  else
113
- puts 'failed to update master'
115
+ puts 'Failed to update master.'
114
116
  exit(1)
115
117
  end
116
-
117
- # Figure out what our current node pool setup is. The goal here is to be able to re-run this
118
- # command if it fails partway through, and have it pick up where it left off.
119
- pools = JSON.parse(`gcloud container node-pools list --cluster #{cluster} --format json`)
120
- if pools.length == 2
121
- # We have two node pools. Assume this is due to the upgrade process already being started,
122
- # so we have one pool with the old version and one pool with the new version.
123
- old_pool = pools.find { |p| p['version'] != new_version }
124
- new_pool = pools.find { |p| p['version'] == new_version }
125
- if old_pool.nil? || new_pool.nil?
126
- # Turns out the two pools are not the result of a partially-finished upgrade; in this
127
- # case we give up and the upgrade will have to proceed manually.
128
- puts 'Unsupported node pool setup: could not find old and new pool'
129
- exit(1)
130
- end
131
- elsif pools.length == 1
132
- # Only one pool is the normal case; set old_pool and that's it.
133
- old_pool = pools.first
134
- else
135
- # If we have three or more or zero pools, upgrade will have to proceed manually.
136
- puts 'Unsupported node pool setup: unexpected number of pools'
137
- exit(1)
138
- end
139
- # Get names of the nodes in the old node pool
140
- old_nodes = `kubectl get nodes -l cloud.google.com/gke-nodepool=#{old_pool['name']} -o name`.split("\n")
141
-
142
- # If we don't already have a new pool (i.e. one with the new version), create one
143
- if new_pool.nil?
144
- # Pick a name for the new pool, alternating between blue and green
145
- new_pool_name = old_pool['name'] == 'blue' ? 'green' : 'blue'
146
-
147
- # Create a new node pool with all the same settings as the old one. The version of the new
148
- # pool will match the master version, which has already been updated.
149
- puts 'creating new node pool'
150
- command =
151
- "gcloud container node-pools create #{new_pool_name} \
152
- --cluster=#{cluster} \
153
- --disk-size=#{old_pool['config']['diskSizeGb']} \
154
- --image-type=#{old_pool['config']['imageType']} \
155
- --machine-type=#{old_pool['config']['machineType']} \
156
- --num-nodes=#{old_nodes.count} \
157
- --service-account=#{old_pool['serviceAccount']}"
158
- # TODO: support autoscaling if old pool has it turned on
159
- if system(command)
160
- puts 'new pool created successfully'
161
- else
162
- puts 'failed to create new pool'
163
- exit(1)
164
- end
165
- end
166
-
167
- # Cordon all the nodes in the old pool, preventing new workloads from being sent to them
168
- puts 'cordoning old nodes'
169
- old_nodes.each do |node|
170
- unless system("kubectl cordon #{node}")
171
- puts "failed to cordon node #{node}"
172
- exit(1)
173
- end
174
- end
175
-
176
- # Drain all the nodes in the old pool, moving workloads off of them gradually while
177
- # respecting maxUnavailable etc.
178
- puts 'draining old nodes'
179
- old_nodes.each do |node|
180
- # --force deletes pods that aren't managed by a ReplicationController, Job, or DaemonSet,
181
- # which shouldn't be any besides manually created temp pods
182
- # --ignore-daemonsets prevents failing due to presence of DaemonSets, which cannot be moved
183
- # because they're tied to a specific node
184
- # --delete-local-data prevents failing due to presence of local data, which cannot be moved
185
- # but is bad practice to use for anything that can't be lost
186
- puts "draining #{node}"
187
- unless system("kubectl drain --force --ignore-daemonsets --delete-local-data #{node}")
188
- puts "failed to drain node #{node}"
189
- exit(1)
190
- end
191
- end
192
-
193
- # All workloads which can be moved have been moved off of old node pool have been moved, so
194
- # that node pool can be deleted, leaving only the new pool with the new version
195
- if HighLine.agree('Delete old node pool?')
196
- puts 'deleting old node pool'
197
- if system("gcloud container node-pools delete #{old_pool['name']} --cluster #{cluster}")
198
- puts 'old pool deleted successfully'
199
- else
200
- puts 'failed to delete old pool'
201
- exit(1)
202
- end
203
- end
204
-
205
- puts 'upgrade complete!'
206
118
  end
207
119
  end
208
120
  end
@@ -64,12 +64,16 @@ module Seira
64
64
  }
65
65
  }
66
66
 
67
+ replica_count = 3 # The default
68
+
67
69
  args.each do |arg|
68
70
  puts "Applying arg #{arg} to values"
69
71
  if arg.start_with?('--memory=')
70
72
  values[:resources][:requests][:memory] = arg.split('=')[1]
71
73
  elsif arg.start_with?('--cpu=')
72
74
  values[:resources][:requests][:cpu] = arg.split('=')[1]
75
+ elsif arg.start_with?('--replicas=')
76
+ replica_count = arg.split('=')[1]
73
77
  elsif arg.start_with?('--size=')
74
78
  size = arg.split('=')[1]
75
79
  case size
@@ -103,10 +107,16 @@ module Seira
103
107
  end
104
108
  end
105
109
 
110
+ # Make sure that pdbMinAvailable is always 1 less than total replica count
111
+ # so that we can properly cordon and drain a node.
112
+ values[:replicaCount] = replica_count
113
+ values[:pdbMinAvailable] = replica_count - 1
114
+
115
+ unique_name = Seira::Random.unique_name(existing_instances)
116
+ name = "#{app}-memcached-#{unique_name}"
117
+
106
118
  Dir.mktmpdir do |dir|
107
119
  file_name = write_config(dir: dir, values: values)
108
- unique_name = Seira::Random.unique_name(existing_instances)
109
- name = "#{app}-memcached-#{unique_name}"
110
120
  puts `helm install --namespace #{app} --name #{name} --wait -f #{file_name} stable/memcached`
111
121
  end
112
122
 
@@ -0,0 +1,182 @@
1
+ require 'json'
2
+ require 'base64'
3
+ require 'fileutils'
4
+
5
+ # Example usages:
6
+ module Seira
7
+ class NodePools
8
+ VALID_ACTIONS = %w[help list list-nodes add cordon drain delete].freeze
9
+ SUMMARY = "For managing node pools for a cluster.".freeze
10
+
11
+ attr_reader :action, :args, :context, :settings
12
+
13
+ def initialize(action:, args:, context:, settings:)
14
+ @action = action
15
+ @args = args
16
+ @context = context
17
+ @settings = settings
18
+ end
19
+
20
+ def run
21
+ case action
22
+ when 'help'
23
+ run_help
24
+ when 'list'
25
+ run_list
26
+ when 'list-nodes'
27
+ run_list_nodes
28
+ when 'add'
29
+ run_add
30
+ when 'cordon'
31
+ run_cordon
32
+ when 'drain'
33
+ run_drain
34
+ when 'delete'
35
+ run_delete
36
+ else
37
+ fail "Unknown command encountered"
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def run_help
44
+ puts SUMMARY
45
+ puts "\n\n"
46
+ puts "Possible actions:\n\n"
47
+ puts "list: List the node pools for this cluster: `node-pools list`"
48
+ puts "list-nodes: List the nodes in specified node pool: `node-pools list-nodes <node-pool-name>`"
49
+ puts "add: Create a node pool. First arg is the name to use, and use --copy to specify the existing node pool to copy."
50
+ puts " `node-pools add <node-pool-name> --copy=<existing-node-pool-name>`"
51
+ puts "cordon: Cordon nodes in specified node pool: `node-pools cordon <node-pool-name>`"
52
+ puts "drain: Drain all pods from specified node pool: `node-pools drain <node-pool-name>`"
53
+ puts "delete: Delete a node pool. Will force-run cordon and drain, first: `node-pools delete <node-pool-name>`"
54
+ end
55
+
56
+ # TODO: Info about what is running on it?
57
+ # TODO: What information do we get in the json format we could include here?
58
+ def run_list
59
+ puts `gcloud container node-pools list --cluster #{context[:cluster]}`
60
+ end
61
+
62
+ def run_list_nodes
63
+ puts nodes_for_pool(args.first)
64
+ end
65
+
66
+ def run_add
67
+ new_pool_name = args.shift
68
+ disk_size = nil
69
+ image_type = nil
70
+ machine_type = nil
71
+ service_account = nil
72
+ num_nodes = nil
73
+
74
+ args.each do |arg|
75
+ if arg.start_with? '--copy='
76
+ node_pool_name_to_copy = arg.split('=')[1]
77
+ node_pool_to_copy = node_pools.find { |p| p['name'] == node_pool_name_to_copy }
78
+
79
+ fail "Could not find node pool with name #{node_pool_name_to_copy} to copy from." if node_pool_to_copy.nil?
80
+
81
+ disk_size = node_pool_to_copy['config']['diskSizeGb']
82
+ image_type = node_pool_to_copy['config']['imageType']
83
+ machine_type = node_pool_to_copy['config']['machineType']
84
+ service_account = node_pool_to_copy['serviceAccount']
85
+ num_nodes = nodes_for_pool(node_pool_name_to_copy).count
86
+ else
87
+ puts "Warning: Unrecognized argument '#{arg}'"
88
+ end
89
+ end
90
+
91
+ command =
92
+ "gcloud container node-pools create #{new_pool_name} \
93
+ --cluster=#{context[:cluster]} \
94
+ --disk-size=#{disk_size} \
95
+ --image-type=#{image_type} \
96
+ --machine-type=#{machine_type} \
97
+ --num-nodes=#{num_nodes} \
98
+ --service-account=#{service_account}"
99
+
100
+ if system(command)
101
+ puts 'New pool created successfully'
102
+ else
103
+ puts 'Failed to create new pool'
104
+ exit(1)
105
+ end
106
+ end
107
+
108
+ def run_cordon
109
+ fail_if_lone_node_pool
110
+
111
+ node_pool_name = args.first
112
+ nodes = nodes_for_pool(node_pool_name)
113
+
114
+ nodes.each do |node|
115
+ unless system("kubectl cordon #{node}")
116
+ puts "Failed to cordon node #{node}"
117
+ exit(1)
118
+ end
119
+ end
120
+
121
+ puts "Successfully cordoned node pool #{node_pool_name}. No new workloads will be placed on #{node_pool_name} nodes."
122
+ end
123
+
124
+ def run_drain
125
+ fail_if_lone_node_pool
126
+
127
+ node_pool_name = args.first
128
+ nodes = nodes_for_pool(node_pool_name)
129
+
130
+ nodes.each do |node|
131
+ # --force deletes pods that aren't managed by a ReplicationController, Job, or DaemonSet,
132
+ # which shouldn't be any besides manually created temp pods
133
+ # --ignore-daemonsets prevents failing due to presence of DaemonSets, which cannot be moved
134
+ # because they're tied to a specific node
135
+ # --delete-local-data prevents failing due to presence of local data, which cannot be moved
136
+ # but is bad practice to use for anything that can't be lost
137
+ puts "Draining #{node}"
138
+ unless system("kubectl drain --force --ignore-daemonsets --delete-local-data #{node}")
139
+ puts "Failed to drain node #{node}"
140
+ exit(1)
141
+ end
142
+ end
143
+
144
+ puts "Successfully drained all nodes in node pool #{node_pool_name}. No pods are running on #{node_pool_name} nodes."
145
+ end
146
+
147
+ def run_delete
148
+ fail_if_lone_node_pool
149
+
150
+ node_pool_name = args.first
151
+
152
+ puts "Running cordon and drain as a safety measure first. If you haven't run these yet, please do so separately before deleting this node pool."
153
+ run_cordon
154
+ run_drain
155
+
156
+ exit(1) unless HighLine.agree "Node pool has successfully been cordoned and drained, and should be safe to delete. Continue deleting node pool #{node_pool_name}?"
157
+
158
+ if system("gcloud container node-pools delete #{node_pool_name} --cluster #{context[:cluster]}")
159
+ puts 'Node pool deleted successfully'
160
+ else
161
+ puts 'Failed to delete old pool'
162
+ exit(1)
163
+ end
164
+ end
165
+
166
+ # TODO: Represent by a ruby object?
167
+ def node_pools
168
+ JSON.parse(`gcloud container node-pools list --cluster #{context[:cluster]} --format json`)
169
+ end
170
+
171
+ def nodes_for_pool(pool_name)
172
+ `kubectl get nodes -l cloud.google.com/gke-nodepool=#{pool_name} -o name`.split("\n")
173
+ end
174
+
175
+ def fail_if_lone_node_pool
176
+ return if node_pools.count > 1
177
+
178
+ puts "Operation is unsafe to run with only one node pool. Please add a new node pool first to ensure services in cluster can continue running."
179
+ exit(1)
180
+ end
181
+ end
182
+ end
data/lib/seira/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Seira
2
- VERSION = "0.3.2".freeze
2
+ VERSION = "0.3.3".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: seira
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Scott Ringwelski
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-02-28 00:00:00.000000000 Z
11
+ date: 2018-03-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: highline
@@ -124,6 +124,7 @@ files:
124
124
  - lib/seira/db/create.rb
125
125
  - lib/seira/jobs.rb
126
126
  - lib/seira/memcached.rb
127
+ - lib/seira/node_pools.rb
127
128
  - lib/seira/pods.rb
128
129
  - lib/seira/proxy.rb
129
130
  - lib/seira/random.rb