hetzner-k3s 0.4.7 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,22 +1,24 @@
1
- require 'thread'
1
+ # frozen_string_literal: true
2
+
2
3
  require 'net/ssh'
3
- require "securerandom"
4
- require "base64"
5
- require "k8s-ruby"
4
+ require 'securerandom'
5
+ require 'base64'
6
6
  require 'timeout'
7
+ require 'subprocess'
7
8
 
8
- require_relative "../infra/client"
9
- require_relative "../infra/firewall"
10
- require_relative "../infra/network"
11
- require_relative "../infra/ssh_key"
12
- require_relative "../infra/server"
13
- require_relative "../infra/load_balancer"
14
- require_relative "../infra/placement_group"
15
-
16
- require_relative "../k3s/client_patch"
9
+ require_relative '../infra/client'
10
+ require_relative '../infra/firewall'
11
+ require_relative '../infra/network'
12
+ require_relative '../infra/ssh_key'
13
+ require_relative '../infra/server'
14
+ require_relative '../infra/load_balancer'
15
+ require_relative '../infra/placement_group'
17
16
 
17
+ require_relative '../utils'
18
18
 
19
19
  class Cluster
20
+ include Utils
21
+
20
22
  def initialize(hetzner_client:, hetzner_token:)
21
23
  @hetzner_client = hetzner_client
22
24
  @hetzner_token = hetzner_token
@@ -24,18 +26,19 @@ class Cluster
24
26
 
25
27
  def create(configuration:)
26
28
  @configuration = configuration
27
- @cluster_name = configuration.dig("cluster_name")
28
- @kubeconfig_path = File.expand_path(configuration.dig("kubeconfig_path"))
29
- @public_ssh_key_path = File.expand_path(configuration.dig("public_ssh_key_path"))
30
- private_ssh_key_path = configuration.dig("private_ssh_key_path")
31
- @private_ssh_key_path = File.expand_path(private_ssh_key_path) if private_ssh_key_path
32
- @k3s_version = configuration.dig("k3s_version")
33
- @masters_config = configuration.dig("masters")
29
+ @cluster_name = configuration['cluster_name']
30
+ @kubeconfig_path = File.expand_path(configuration['kubeconfig_path'])
31
+ @public_ssh_key_path = File.expand_path(configuration['public_ssh_key_path'])
32
+ private_ssh_key_path = configuration['private_ssh_key_path']
33
+ @private_ssh_key_path = private_ssh_key_path && File.expand_path(private_ssh_key_path)
34
+ @k3s_version = configuration['k3s_version']
35
+ @masters_config = configuration['masters']
34
36
  @worker_node_pools = find_worker_node_pools(configuration)
35
- @location = configuration.dig("location")
36
- @verify_host_key = configuration.fetch("verify_host_key", false)
37
+ @location = configuration['location']
38
+ @verify_host_key = configuration.fetch('verify_host_key', false)
37
39
  @servers = []
38
- @networks = configuration.dig("ssh_allowed_networks")
40
+ @networks = configuration['ssh_allowed_networks']
41
+ @enable_ipsec_encryption = configuration.fetch('enable_ipsec_encryption', false)
39
42
 
40
43
  create_resources
41
44
 
@@ -49,17 +52,20 @@ class Cluster
49
52
  end
50
53
 
51
54
  def delete(configuration:)
52
- @cluster_name = configuration.dig("cluster_name")
53
- @kubeconfig_path = File.expand_path(configuration.dig("kubeconfig_path"))
54
- @public_ssh_key_path = File.expand_path(configuration.dig("public_ssh_key_path"))
55
+ @configuration = configuration
56
+ @cluster_name = configuration['cluster_name']
57
+ @kubeconfig_path = File.expand_path(configuration['kubeconfig_path'])
58
+ @public_ssh_key_path = File.expand_path(configuration['public_ssh_key_path'])
59
+ @masters_config = configuration['masters']
60
+ @worker_node_pools = find_worker_node_pools(configuration)
55
61
 
56
62
  delete_resources
57
63
  end
58
64
 
59
65
  def upgrade(configuration:, new_k3s_version:, config_file:)
60
66
  @configuration = configuration
61
- @cluster_name = configuration.dig("cluster_name")
62
- @kubeconfig_path = File.expand_path(configuration.dig("kubeconfig_path"))
67
+ @cluster_name = configuration['cluster_name']
68
+ @kubeconfig_path = File.expand_path(configuration['kubeconfig_path'])
63
69
  @new_k3s_version = new_k3s_version
64
70
  @config_file = config_file
65
71
 
@@ -68,597 +74,510 @@ class Cluster
68
74
 
69
75
  private
70
76
 
71
- def find_worker_node_pools(configuration)
72
- configuration.fetch("worker_node_pools", [])
73
- end
74
-
75
- attr_accessor :servers
76
-
77
- attr_reader :hetzner_client, :cluster_name, :kubeconfig_path, :k3s_version,
78
- :masters_config, :worker_node_pools,
79
- :location, :public_ssh_key_path, :kubernetes_client,
80
- :hetzner_token, :tls_sans, :new_k3s_version, :configuration,
81
- :config_file, :verify_host_key, :networks, :private_ssh_key_path, :configuration
82
-
83
-
84
- def latest_k3s_version
85
- response = HTTP.get("https://api.github.com/repos/k3s-io/k3s/tags").body
86
- JSON.parse(response).first["name"]
87
- end
88
-
89
- def create_resources
90
- master_instance_type = masters_config["instance_type"]
91
- masters_count = masters_config["instance_count"]
92
-
93
- placement_group_id = Hetzner::PlacementGroup.new(
94
- hetzner_client: hetzner_client,
95
- cluster_name: cluster_name
96
- ).create
97
-
98
- firewall_id = Hetzner::Firewall.new(
99
- hetzner_client: hetzner_client,
100
- cluster_name: cluster_name
101
- ).create(ha: (masters_count > 1), networks: networks)
102
-
103
- network_id = Hetzner::Network.new(
104
- hetzner_client: hetzner_client,
105
- cluster_name: cluster_name
106
- ).create(location: location)
107
-
108
- ssh_key_id = Hetzner::SSHKey.new(
109
- hetzner_client: hetzner_client,
110
- cluster_name: cluster_name
111
- ).create(public_ssh_key_path: public_ssh_key_path)
112
-
113
- server_configs = []
114
-
115
- masters_count.times do |i|
116
- server_configs << {
117
- location: location,
118
- instance_type: master_instance_type,
119
- instance_id: "master#{i+1}",
120
- firewall_id: firewall_id,
121
- network_id: network_id,
122
- ssh_key_id: ssh_key_id,
123
- placement_group_id: placement_group_id,
124
- image: image
125
- }
126
- end
127
-
128
- if masters_count > 1
129
- Hetzner::LoadBalancer.new(
130
- hetzner_client: hetzner_client,
131
- cluster_name: cluster_name
132
- ).create(location: location, network_id: network_id)
133
- end
134
-
135
- worker_node_pools.each do |worker_node_pool|
136
- worker_node_pool_name = worker_node_pool["name"]
137
- worker_instance_type = worker_node_pool["instance_type"]
138
- worker_count = worker_node_pool["instance_count"]
139
-
140
- worker_count.times do |i|
141
- server_configs << {
142
- location: location,
143
- instance_type: worker_instance_type,
144
- instance_id: "pool-#{worker_node_pool_name}-worker#{i+1}",
145
- firewall_id: firewall_id,
146
- network_id: network_id,
147
- ssh_key_id: ssh_key_id,
148
- placement_group_id: placement_group_id,
149
- image: image
150
- }
151
- end
152
- end
153
-
154
- threads = server_configs.map do |server_config|
155
- Thread.new do
156
- servers << Hetzner::Server.new(hetzner_client: hetzner_client, cluster_name: cluster_name).create(server_config)
157
- end
158
- end
77
+ attr_accessor :servers
159
78
 
160
- threads.each(&:join) unless threads.empty?
79
+ attr_reader :hetzner_client, :cluster_name, :kubeconfig_path, :k3s_version,
80
+ :masters_config, :worker_node_pools,
81
+ :location, :public_ssh_key_path,
82
+ :hetzner_token, :new_k3s_version, :configuration,
83
+ :config_file, :verify_host_key, :networks, :private_ssh_key_path,
84
+ :enable_ipsec_encryption
161
85
 
162
- while servers.size != server_configs.size
163
- sleep 1
164
- end
86
+ def find_worker_node_pools(configuration)
87
+ configuration.fetch('worker_node_pools', [])
88
+ end
165
89
 
166
- puts
167
- threads = servers.map do |server|
168
- Thread.new { wait_for_ssh server }
169
- end
90
+ def latest_k3s_version
91
+ response = HTTP.get('https://api.github.com/repos/k3s-io/k3s/tags').body
92
+ JSON.parse(response).first['name']
93
+ end
170
94
 
171
- threads.each(&:join) unless threads.empty?
172
- end
95
+ def create_resources
96
+ create_servers
97
+ create_load_balancer if masters.size > 1
98
+ end
173
99
 
174
- def delete_resources
175
- Hetzner::PlacementGroup.new(
176
- hetzner_client: hetzner_client,
177
- cluster_name: cluster_name
178
- ).delete
179
-
180
- Hetzner::LoadBalancer.new(
181
- hetzner_client: hetzner_client,
182
- cluster_name: cluster_name
183
- ).delete(ha: (masters.size > 1))
184
-
185
- Hetzner::Firewall.new(
186
- hetzner_client: hetzner_client,
187
- cluster_name: cluster_name
188
- ).delete(all_servers)
189
-
190
- Hetzner::Network.new(
191
- hetzner_client: hetzner_client,
192
- cluster_name: cluster_name
193
- ).delete
194
-
195
- Hetzner::SSHKey.new(
196
- hetzner_client: hetzner_client,
197
- cluster_name: cluster_name
198
- ).delete(public_ssh_key_path: public_ssh_key_path)
199
-
200
- threads = all_servers.map do |server|
201
- Thread.new do
202
- Hetzner::Server.new(hetzner_client: hetzner_client, cluster_name: cluster_name).delete(server_name: server["name"])
203
- end
204
- end
100
+ def delete_placement_groups
101
+ Hetzner::PlacementGroup.new(hetzner_client:, cluster_name:).delete
205
102
 
206
- threads.each(&:join) unless threads.empty?
103
+ worker_node_pools.each do |pool|
104
+ pool_name = pool['name']
105
+ Hetzner::PlacementGroup.new(hetzner_client:, cluster_name:, pool_name:).delete
207
106
  end
107
+ end
208
108
 
209
- def upgrade_cluster
210
- resources = K8s::Resource.from_files(ugrade_plan_manifest_path)
109
+ def delete_resources
110
+ Hetzner::LoadBalancer.new(hetzner_client:, cluster_name:).delete(high_availability: (masters.size > 1))
211
111
 
212
- begin
213
- kubernetes_client.api("upgrade.cattle.io/v1").resource("plans").get("k3s-server", namespace: "system-upgrade")
112
+ Hetzner::Firewall.new(hetzner_client:, cluster_name:).delete(all_servers)
214
113
 
215
- puts "Aborting - an upgrade is already in progress."
114
+ Hetzner::Network.new(hetzner_client:, cluster_name:).delete
216
115
 
217
- rescue K8s::Error::NotFound
218
- resources.each do |resource|
219
- kubernetes_client.create_resource(resource)
220
- end
116
+ Hetzner::SSHKey.new(hetzner_client:, cluster_name:).delete(public_ssh_key_path:)
221
117
 
222
- puts "Upgrade will now start. Run `watch kubectl get nodes` to see the nodes being upgraded. This should take a few minutes for a small cluster."
223
- puts "The API server may be briefly unavailable during the upgrade of the controlplane."
118
+ delete_placement_groups
119
+ delete_servers
120
+ end
224
121
 
225
- configuration["k3s_version"] = new_k3s_version
122
+ def upgrade_cluster
123
+ worker_upgrade_concurrency = workers.size - 1
124
+ worker_upgrade_concurrency = 1 if worker_upgrade_concurrency.zero?
226
125
 
227
- File.write(config_file, configuration.to_yaml)
228
- end
229
- end
126
+ cmd = <<~BASH
127
+ kubectl apply -f - <<-EOF
128
+ apiVersion: upgrade.cattle.io/v1
129
+ kind: Plan
130
+ metadata:
131
+ name: k3s-server
132
+ namespace: system-upgrade
133
+ labels:
134
+ k3s-upgrade: server
135
+ spec:
136
+ concurrency: 1
137
+ version: #{new_k3s_version}
138
+ nodeSelector:
139
+ matchExpressions:
140
+ - {key: node-role.kubernetes.io/master, operator: In, values: ["true"]}
141
+ serviceAccountName: system-upgrade
142
+ tolerations:
143
+ - key: "CriticalAddonsOnly"
144
+ operator: "Equal"
145
+ value: "true"
146
+ effect: "NoExecute"
147
+ cordon: true
148
+ upgrade:
149
+ image: rancher/k3s-upgrade
150
+ EOF
151
+ BASH
230
152
 
153
+ run cmd, kubeconfig_path: kubeconfig_path
231
154
 
232
- def master_script(master)
233
- server = master == first_master ? " --cluster-init " : " --server https://#{first_master_private_ip}:6443 "
234
- flannel_interface = find_flannel_interface(master)
235
-
236
- taint = schedule_workloads_on_masters? ? " " : " --node-taint CriticalAddonsOnly=true:NoExecute "
237
-
238
- <<~EOF
239
- curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="#{k3s_version}" K3S_TOKEN="#{k3s_token}" INSTALL_K3S_EXEC="server \
240
- --disable-cloud-controller \
241
- --disable servicelb \
242
- --disable traefik \
243
- --disable local-storage \
244
- --disable metrics-server \
245
- --write-kubeconfig-mode=644 \
246
- --node-name="$(hostname -f)" \
247
- --cluster-cidr=10.244.0.0/16 \
248
- --etcd-expose-metrics=true \
249
- --kube-controller-manager-arg="address=0.0.0.0" \
250
- --kube-controller-manager-arg="bind-address=0.0.0.0" \
251
- --kube-proxy-arg="metrics-bind-address=0.0.0.0" \
252
- --kube-scheduler-arg="address=0.0.0.0" \
253
- --kube-scheduler-arg="bind-address=0.0.0.0" \
254
- #{taint} \
255
- --kubelet-arg="cloud-provider=external" \
256
- --advertise-address=$(hostname -I | awk '{print $2}') \
257
- --node-ip=$(hostname -I | awk '{print $2}') \
258
- --node-external-ip=$(hostname -I | awk '{print $1}') \
259
- --flannel-iface=#{flannel_interface} \
260
- #{server} #{tls_sans}" sh -
155
+ cmd = <<~BASH
156
+ kubectl apply -f - <<-EOF
157
+ apiVersion: upgrade.cattle.io/v1
158
+ kind: Plan
159
+ metadata:
160
+ name: k3s-agent
161
+ namespace: system-upgrade
162
+ labels:
163
+ k3s-upgrade: agent
164
+ spec:
165
+ concurrency: #{worker_upgrade_concurrency}
166
+ version: #{new_k3s_version}
167
+ nodeSelector:
168
+ matchExpressions:
169
+ - {key: node-role.kubernetes.io/master, operator: NotIn, values: ["true"]}
170
+ serviceAccountName: system-upgrade
171
+ prepare:
172
+ image: rancher/k3s-upgrade
173
+ args: ["prepare", "k3s-server"]
174
+ cordon: true
175
+ upgrade:
176
+ image: rancher/k3s-upgrade
261
177
  EOF
262
- end
178
+ BASH
263
179
 
264
- def worker_script(worker)
265
- flannel_interface = find_flannel_interface(worker)
180
+ run cmd, kubeconfig_path: kubeconfig_path
266
181
 
267
- <<~EOF
268
- curl -sfL https://get.k3s.io | K3S_TOKEN="#{k3s_token}" INSTALL_K3S_VERSION="#{k3s_version}" K3S_URL=https://#{first_master_private_ip}:6443 INSTALL_K3S_EXEC="agent \
269
- --node-name="$(hostname -f)" \
270
- --kubelet-arg="cloud-provider=external" \
271
- --node-ip=$(hostname -I | awk '{print $2}') \
272
- --node-external-ip=$(hostname -I | awk '{print $1}') \
273
- --flannel-iface=#{flannel_interface}" sh -
274
- EOF
275
- end
182
+ puts 'Upgrade will now start. Run `watch kubectl get nodes` to see the nodes being upgraded. This should take a few minutes for a small cluster.'
183
+ puts 'The API server may be briefly unavailable during the upgrade of the controlplane.'
276
184
 
277
- def deploy_kubernetes
278
- puts
279
- puts "Deploying k3s to first master (#{first_master["name"]})..."
185
+ configuration['k3s_version'] = new_k3s_version
280
186
 
281
- ssh first_master, master_script(first_master), print_output: true
187
+ File.write(config_file, configuration.to_yaml)
188
+ end
282
189
 
283
- puts
284
- puts "...k3s has been deployed to first master."
190
+ def master_script(master)
191
+ server = master == first_master ? ' --cluster-init ' : " --server https://#{first_master_private_ip}:6443 "
192
+ flannel_interface = find_flannel_interface(master)
193
+ flannel_ipsec = enable_ipsec_encryption ? ' --flannel-backend=ipsec ' : ' '
194
+
195
+ taint = schedule_workloads_on_masters? ? ' ' : ' --node-taint CriticalAddonsOnly=true:NoExecute '
196
+
197
+ <<~SCRIPT
198
+ curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="#{k3s_version}" K3S_TOKEN="#{k3s_token}" INSTALL_K3S_EXEC="server \
199
+ --disable-cloud-controller \
200
+ --disable servicelb \
201
+ --disable traefik \
202
+ --disable local-storage \
203
+ --disable metrics-server \
204
+ --write-kubeconfig-mode=644 \
205
+ --node-name="$(hostname -f)" \
206
+ --cluster-cidr=10.244.0.0/16 \
207
+ --etcd-expose-metrics=true \
208
+ #{flannel_ipsec} \
209
+ --kube-controller-manager-arg="address=0.0.0.0" \
210
+ --kube-controller-manager-arg="bind-address=0.0.0.0" \
211
+ --kube-proxy-arg="metrics-bind-address=0.0.0.0" \
212
+ --kube-scheduler-arg="address=0.0.0.0" \
213
+ --kube-scheduler-arg="bind-address=0.0.0.0" \
214
+ #{taint} \
215
+ --kubelet-arg="cloud-provider=external" \
216
+ --advertise-address=$(hostname -I | awk '{print $2}') \
217
+ --node-ip=$(hostname -I | awk '{print $2}') \
218
+ --node-external-ip=$(hostname -I | awk '{print $1}') \
219
+ --flannel-iface=#{flannel_interface} \
220
+ #{server} #{tls_sans}" sh -
221
+ SCRIPT
222
+ end
285
223
 
286
- save_kubeconfig
224
+ def worker_script(worker)
225
+ flannel_interface = find_flannel_interface(worker)
226
+
227
+ <<~BASH
228
+ curl -sfL https://get.k3s.io | K3S_TOKEN="#{k3s_token}" INSTALL_K3S_VERSION="#{k3s_version}" K3S_URL=https://#{first_master_private_ip}:6443 INSTALL_K3S_EXEC="agent \
229
+ --node-name="$(hostname -f)" \
230
+ --kubelet-arg="cloud-provider=external" \
231
+ --node-ip=$(hostname -I | awk '{print $2}') \
232
+ --node-external-ip=$(hostname -I | awk '{print $1}') \
233
+ --flannel-iface=#{flannel_interface}" sh -
234
+ BASH
235
+ end
287
236
 
288
- if masters.size > 1
289
- threads = masters[1..-1].map do |master|
290
- Thread.new do
291
- puts
292
- puts "Deploying k3s to master #{master["name"]}..."
237
+ def deploy_kubernetes
238
+ puts
239
+ puts "Deploying k3s to first master (#{first_master['name']})..."
293
240
 
294
- ssh master, master_script(master), print_output: true
241
+ ssh first_master, master_script(first_master), print_output: true
295
242
 
296
- puts
297
- puts "...k3s has been deployed to master #{master["name"]}."
298
- end
299
- end
243
+ puts
244
+ puts '...k3s has been deployed to first master.'
300
245
 
301
- threads.each(&:join) unless threads.empty?
302
- end
246
+ save_kubeconfig
303
247
 
304
- threads = workers.map do |worker|
248
+ if masters.size > 1
249
+ threads = masters[1..].map do |master|
305
250
  Thread.new do
306
251
  puts
307
- puts "Deploying k3s to worker (#{worker["name"]})..."
252
+ puts "Deploying k3s to master #{master['name']}..."
308
253
 
309
- ssh worker, worker_script(worker), print_output: true
254
+ ssh master, master_script(master), print_output: true
310
255
 
311
256
  puts
312
- puts "...k3s has been deployed to worker (#{worker["name"]})."
257
+ puts "...k3s has been deployed to master #{master['name']}."
313
258
  end
314
259
  end
315
260
 
316
261
  threads.each(&:join) unless threads.empty?
317
262
  end
318
263
 
319
- def deploy_cloud_controller_manager
320
- puts
321
- puts "Deploying Hetzner Cloud Controller Manager..."
322
-
323
- begin
324
- kubernetes_client.api("v1").resource("secrets").get("hcloud", namespace: "kube-system")
325
-
326
- rescue K8s::Error::NotFound
327
- secret = K8s::Resource.new(
328
- apiVersion: "v1",
329
- kind: "Secret",
330
- metadata: {
331
- namespace: 'kube-system',
332
- name: 'hcloud',
333
- },
334
- data: {
335
- network: Base64.encode64(cluster_name),
336
- token: Base64.encode64(hetzner_token)
337
- }
338
- )
339
-
340
- kubernetes_client.api('v1').resource('secrets').create_resource(secret)
341
- end
342
-
264
+ threads = workers.map do |worker|
265
+ Thread.new do
266
+ puts
267
+ puts "Deploying k3s to worker (#{worker['name']})..."
343
268
 
344
- manifest = fetch_manifest("https://github.com/hetznercloud/hcloud-cloud-controller-manager/releases/latest/download/ccm-networks.yaml")
269
+ ssh worker, worker_script(worker), print_output: true
345
270
 
346
- File.write("/tmp/cloud-controller-manager.yaml", manifest)
271
+ puts
272
+ puts "...k3s has been deployed to worker (#{worker['name']})."
273
+ end
274
+ end
347
275
 
348
- resources = K8s::Resource.from_files("/tmp/cloud-controller-manager.yaml")
276
+ threads.each(&:join) unless threads.empty?
277
+ end
349
278
 
350
- begin
351
- kubernetes_client.api("apps/v1").resource("deployments").get("hcloud-cloud-controller-manager", namespace: "kube-system")
279
+ def deploy_cloud_controller_manager
280
+ check_kubectl
352
281
 
353
- resources.each do |resource|
354
- kubernetes_client.update_resource(resource)
355
- end
282
+ puts
283
+ puts 'Deploying Hetzner Cloud Controller Manager...'
356
284
 
357
- rescue K8s::Error::NotFound
358
- resources.each do |resource|
359
- kubernetes_client.create_resource(resource)
360
- end
285
+ cmd = <<~BASH
286
+ kubectl apply -f - <<-EOF
287
+ apiVersion: "v1"
288
+ kind: "Secret"
289
+ metadata:
290
+ namespace: 'kube-system'
291
+ name: 'hcloud'
292
+ stringData:
293
+ network: "#{cluster_name}"
294
+ token: "#{hetzner_token}"
295
+ EOF
296
+ BASH
361
297
 
362
- end
298
+ run cmd, kubeconfig_path: kubeconfig_path
363
299
 
364
- puts "...Cloud Controller Manager deployed"
365
- rescue Excon::Error::Socket
366
- retry
367
- end
300
+ cmd = 'kubectl apply -f https://github.com/hetznercloud/hcloud-cloud-controller-manager/releases/latest/download/ccm-networks.yaml'
368
301
 
369
- def fetch_manifest(url)
370
- retries ||= 1
371
- HTTP.follow.get(url).body
372
- rescue
373
- retry if (retries += 1) <= 10
374
- end
302
+ run cmd, kubeconfig_path: kubeconfig_path
375
303
 
376
- def deploy_system_upgrade_controller
377
- puts
378
- puts "Deploying k3s System Upgrade Controller..."
304
+ puts '...Cloud Controller Manager deployed'
305
+ end
379
306
 
380
- manifest = HTTP.follow.get("https://github.com/rancher/system-upgrade-controller/releases/download/v0.8.0/system-upgrade-controller.yaml").body
307
+ def deploy_system_upgrade_controller
308
+ check_kubectl
381
309
 
382
- File.write("/tmp/system-upgrade-controller.yaml", manifest)
310
+ puts
311
+ puts 'Deploying k3s System Upgrade Controller...'
383
312
 
384
- resources = K8s::Resource.from_files("/tmp/system-upgrade-controller.yaml")
313
+ cmd = 'kubectl apply -f https://github.com/rancher/system-upgrade-controller/releases/download/v0.8.1/system-upgrade-controller.yaml'
385
314
 
386
- begin
387
- kubernetes_client.api("apps/v1").resource("deployments").get("system-upgrade-controller", namespace: "system-upgrade")
315
+ run cmd, kubeconfig_path: kubeconfig_path
388
316
 
389
- resources.each do |resource|
390
- kubernetes_client.update_resource(resource)
391
- end
317
+ puts '...k3s System Upgrade Controller deployed'
318
+ end
392
319
 
393
- rescue K8s::Error::NotFound
394
- resources.each do |resource|
395
- kubernetes_client.create_resource(resource)
396
- end
320
+ def deploy_csi_driver
321
+ check_kubectl
397
322
 
398
- end
323
+ puts
324
+ puts 'Deploying Hetzner CSI Driver...'
399
325
 
400
- puts "...k3s System Upgrade Controller deployed"
401
- rescue Excon::Error::Socket
402
- retry
403
- end
326
+ cmd = <<~BASH
327
+ kubectl apply -f - <<-EOF
328
+ apiVersion: "v1"
329
+ kind: "Secret"
330
+ metadata:
331
+ namespace: 'kube-system'
332
+ name: 'hcloud-csi'
333
+ stringData:
334
+ token: "#{hetzner_token}"
335
+ EOF
336
+ BASH
404
337
 
405
- def deploy_csi_driver
406
- puts
407
- puts "Deploying Hetzner CSI Driver..."
408
-
409
- begin
410
- kubernetes_client.api("v1").resource("secrets").get("hcloud-csi", namespace: "kube-system")
411
-
412
- rescue K8s::Error::NotFound
413
- secret = K8s::Resource.new(
414
- apiVersion: "v1",
415
- kind: "Secret",
416
- metadata: {
417
- namespace: 'kube-system',
418
- name: 'hcloud-csi',
419
- },
420
- data: {
421
- token: Base64.encode64(hetzner_token)
422
- }
423
- )
424
-
425
- kubernetes_client.api('v1').resource('secrets').create_resource(secret)
426
- end
338
+ run cmd, kubeconfig_path: kubeconfig_path
427
339
 
340
+ cmd = 'kubectl apply -f https://raw.githubusercontent.com/hetznercloud/csi-driver/v1.6.0/deploy/kubernetes/hcloud-csi.yml'
428
341
 
429
- manifest = HTTP.follow.get("https://raw.githubusercontent.com/hetznercloud/csi-driver/v1.6.0/deploy/kubernetes/hcloud-csi.yml").body
342
+ run cmd, kubeconfig_path: kubeconfig_path
430
343
 
431
- File.write("/tmp/csi-driver.yaml", manifest)
344
+ puts '...CSI Driver deployed'
345
+ end
432
346
 
433
- resources = K8s::Resource.from_files("/tmp/csi-driver.yaml")
347
+ def find_flannel_interface(server)
348
+ if ssh(server, 'lscpu | grep Vendor') =~ /Intel/
349
+ 'ens10'
350
+ else
351
+ 'enp7s0'
352
+ end
353
+ end
434
354
 
435
- begin
436
- kubernetes_client.api("apps/v1").resource("daemonsets").get("hcloud-csi-node", namespace: "kube-system")
355
+ def all_servers
356
+ @all_servers ||= hetzner_client.get('/servers?sort=created:desc')['servers'].select do |server|
357
+ belongs_to_cluster?(server) == true
358
+ end
359
+ end
437
360
 
361
+ def masters
362
+ @masters ||= all_servers.select { |server| server['name'] =~ /master\d+\Z/ }.sort { |a, b| a['name'] <=> b['name'] }
363
+ end
438
364
 
439
- resources.each do |resource|
440
- begin
441
- kubernetes_client.update_resource(resource)
442
- rescue K8s::Error::Invalid => e
443
- raise e unless e.message =~ /must be specified/i
444
- end
445
- end
365
+ def workers
366
+ @workers = all_servers.select { |server| server['name'] =~ /worker\d+\Z/ }.sort { |a, b| a['name'] <=> b['name'] }
367
+ end
446
368
 
447
- rescue K8s::Error::NotFound
448
- resources.each do |resource|
449
- kubernetes_client.create_resource(resource)
450
- end
369
+ def k3s_token
370
+ @k3s_token ||= begin
371
+ token = ssh(first_master, '{ TOKEN=$(< /var/lib/rancher/k3s/server/node-token); } 2> /dev/null; echo $TOKEN')
451
372
 
373
+ if token.empty?
374
+ SecureRandom.hex
375
+ else
376
+ token.split(':').last
452
377
  end
453
-
454
- puts "...CSI Driver deployed"
455
- rescue Excon::Error::Socket
456
- retry
457
378
  end
379
+ end
458
380
 
459
- def wait_for_ssh(server)
460
- Timeout::timeout(5) do
461
- server_name = server["name"]
381
+ def first_master_private_ip
382
+ @first_master_private_ip ||= first_master['private_net'][0]['ip']
383
+ end
462
384
 
463
- puts "Waiting for server #{server_name} to be up..."
385
+ def first_master
386
+ masters.first
387
+ end
464
388
 
465
- loop do
466
- result = ssh(server, "echo UP")
467
- break if result == "UP"
468
- end
389
+ def api_server_ip
390
+ return @api_server_ip if @api_server_ip
391
+
392
+ @api_server_ip = if masters.size > 1
393
+ load_balancer_name = "#{cluster_name}-api"
394
+ load_balancer = hetzner_client.get('/load_balancers')['load_balancers'].detect do |lb|
395
+ lb['name'] == load_balancer_name
396
+ end
397
+ load_balancer['public_net']['ipv4']['ip']
398
+ else
399
+ first_master_public_ip
400
+ end
401
+ end
469
402
 
470
- puts "...server #{server_name} is now up."
471
- end
472
- rescue Errno::ENETUNREACH, Errno::EHOSTUNREACH, Timeout::Error, IOError
473
- retry
403
+ def tls_sans
404
+ sans = " --tls-san=#{api_server_ip} "
405
+
406
+ masters.each do |master|
407
+ master_private_ip = master['private_net'][0]['ip']
408
+ sans << " --tls-san=#{master_private_ip} "
474
409
  end
475
410
 
476
- def ssh(server, command, print_output: false)
477
- public_ip = server.dig("public_net", "ipv4", "ip")
478
- output = ""
411
+ sans
412
+ end
479
413
 
480
- params = { verify_host_key: (verify_host_key ? :always : :never) }
414
+ def first_master_public_ip
415
+ @first_master_public_ip ||= first_master.dig('public_net', 'ipv4', 'ip')
416
+ end
481
417
 
482
- if private_ssh_key_path
483
- params[:keys] = [private_ssh_key_path]
484
- end
418
+ def save_kubeconfig
419
+ kubeconfig = ssh(first_master, 'cat /etc/rancher/k3s/k3s.yaml')
420
+ .gsub('127.0.0.1', api_server_ip)
421
+ .gsub('default', cluster_name)
485
422
 
486
- Net::SSH.start(public_ip, "root", params) do |session|
487
- session.exec!(command) do |channel, stream, data|
488
- output << data
489
- puts data if print_output
490
- end
491
- end
492
- output.chop
493
- rescue Net::SSH::Disconnect => e
494
- retry unless e.message =~ /Too many authentication failures/
495
- rescue Net::SSH::ConnectionTimeout, Errno::ECONNREFUSED, Errno::ENETUNREACH, Errno::EHOSTUNREACH
496
- retry
497
- rescue Net::SSH::AuthenticationFailed
498
- puts
499
- puts "Cannot continue: SSH authentication failed. Please ensure that the private SSH key is correct."
500
- exit 1
501
- rescue Net::SSH::HostKeyMismatch
502
- puts
503
- puts "Cannot continue: Unable to SSH into server with IP #{public_ip} because the existing fingerprint in the known_hosts file does not match that of the actual host key."
504
- puts "This is due to a security check but can also happen when creating a new server that gets assigned the same IP address as another server you've owned in the past."
505
- puts "If are sure no security is being violated here and you're just creating new servers, you can eiher remove the relevant lines from your known_hosts (see IPs from the cloud console) or disable host key verification by setting the option 'verify_host_key' to false in the configuration file for the cluster."
506
- exit 1
507
- end
423
+ File.write(kubeconfig_path, kubeconfig)
508
424
 
509
- def kubernetes_client
510
- return @kubernetes_client if @kubernetes_client
425
+ FileUtils.chmod 'go-r', kubeconfig_path
426
+ end
511
427
 
512
- config_hash = YAML.load_file(kubeconfig_path)
513
- config_hash['current-context'] = cluster_name
514
- @kubernetes_client = K8s::Client.config(K8s::Config.new(config_hash))
515
- end
428
+ def belongs_to_cluster?(server)
429
+ server.dig('labels', 'cluster') == cluster_name
430
+ end
516
431
 
517
- def find_flannel_interface(server)
518
- if ssh(server, "lscpu | grep Vendor") =~ /Intel/
519
- "ens10"
520
- else
521
- "enp7s0"
522
- end
523
- end
432
+ def schedule_workloads_on_masters?
433
+ schedule_workloads_on_masters = configuration['schedule_workloads_on_masters']
434
+ schedule_workloads_on_masters ? !!schedule_workloads_on_masters : false
435
+ end
524
436
 
525
- def all_servers
526
- @all_servers ||= hetzner_client.get("/servers")["servers"].select{ |server| belongs_to_cluster?(server) == true }
527
- end
437
+ def image
438
+ configuration['image'] || 'ubuntu-20.04'
439
+ end
528
440
 
529
- def masters
530
- @masters ||= all_servers.select{ |server| server["name"] =~ /master\d+\Z/ }.sort{ |a, b| a["name"] <=> b["name"] }
531
- end
441
+ def additional_packages
442
+ configuration['additional_packages'] || []
443
+ end
532
444
 
533
- def workers
534
- @workers = all_servers.select{ |server| server["name"] =~ /worker\d+\Z/ }.sort{ |a, b| a["name"] <=> b["name"] }
535
- end
445
+ def check_kubectl
446
+ return if which('kubectl')
536
447
 
537
- def k3s_token
538
- @k3s_token ||= begin
539
- token = ssh(first_master, "{ TOKEN=$(< /var/lib/rancher/k3s/server/node-token); } 2> /dev/null; echo $TOKEN")
448
+ puts 'Please ensure kubectl is installed and in your PATH.'
449
+ exit 1
450
+ end
540
451
 
541
- if token.empty?
542
- SecureRandom.hex
543
- else
544
- token.split(":").last
545
- end
546
- end
547
- end
452
+ def placement_group_id(pool_name = nil)
453
+ @placement_groups ||= {}
454
+ @placement_groups[pool_name || '__masters__'] ||= Hetzner::PlacementGroup.new(hetzner_client:, cluster_name:, pool_name:).create
455
+ end
548
456
 
549
- def first_master_private_ip
550
- @first_master_private_ip ||= first_master["private_net"][0]["ip"]
551
- end
457
+ def master_instance_type
458
+ @master_instance_type ||= masters_config['instance_type']
459
+ end
552
460
 
553
- def first_master
554
- masters.first
555
- end
461
+ def masters_count
462
+ @masters_count ||= masters_config['instance_count']
463
+ end
556
464
 
557
- def api_server_ip
558
- return @api_server_ip if @api_server_ip
465
+ def firewall_id
466
+ @firewall_id ||= Hetzner::Firewall.new(hetzner_client:, cluster_name:).create(high_availability: (masters_count > 1), networks:)
467
+ end
559
468
 
560
- @api_server_ip = if masters.size > 1
561
- load_balancer_name = "#{cluster_name}-api"
562
- load_balancer = hetzner_client.get("/load_balancers")["load_balancers"].detect{ |load_balancer| load_balancer["name"] == load_balancer_name }
563
- load_balancer["public_net"]["ipv4"]["ip"]
564
- else
565
- first_master_public_ip
566
- end
469
+ def network_id
470
+ @network_id ||= Hetzner::Network.new(hetzner_client:, cluster_name:).create(location:)
471
+ end
472
+
473
+ def ssh_key_id
474
+ @ssh_key_id ||= Hetzner::SSHKey.new(hetzner_client:, cluster_name:).create(public_ssh_key_path:)
475
+ end
476
+
477
+ def master_definitions_for_create
478
+ definitions = []
479
+
480
+ masters_count.times do |i|
481
+ definitions << {
482
+ instance_type: master_instance_type,
483
+ instance_id: "master#{i + 1}",
484
+ placement_group_id:,
485
+ location:,
486
+ firewall_id:,
487
+ network_id:,
488
+ ssh_key_id:,
489
+ image:,
490
+ additional_packages:
491
+ }
567
492
  end
568
493
 
569
- def tls_sans
570
- sans = " --tls-san=#{api_server_ip} "
494
+ definitions
495
+ end
571
496
 
572
- masters.each do |master|
573
- master_private_ip = master["private_net"][0]["ip"]
574
- sans << " --tls-san=#{master_private_ip} "
575
- end
497
+ def master_definitions_for_delete
498
+ definitions = []
576
499
 
577
- sans
500
+ masters_count.times do |i|
501
+ definitions << {
502
+ instance_type: master_instance_type,
503
+ instance_id: "master#{i + 1}"
504
+ }
578
505
  end
579
506
 
580
- def first_master_public_ip
581
- @first_master_public_ip ||= first_master.dig("public_net", "ipv4", "ip")
507
+ definitions
508
+ end
509
+
510
+ def worker_node_pool_definitions(worker_node_pool)
511
+ worker_node_pool_name = worker_node_pool['name']
512
+ worker_instance_type = worker_node_pool['instance_type']
513
+ worker_count = worker_node_pool['instance_count']
514
+
515
+ definitions = []
516
+
517
+ worker_count.times do |i|
518
+ definitions << {
519
+ instance_type: worker_instance_type,
520
+ instance_id: "pool-#{worker_node_pool_name}-worker#{i + 1}",
521
+ placement_group_id: placement_group_id(worker_node_pool_name),
522
+ location:,
523
+ firewall_id:,
524
+ network_id:,
525
+ ssh_key_id:,
526
+ image:,
527
+ additional_packages:
528
+ }
582
529
  end
583
530
 
584
- def save_kubeconfig
585
- kubeconfig = ssh(first_master, "cat /etc/rancher/k3s/k3s.yaml").
586
- gsub("127.0.0.1", api_server_ip).
587
- gsub("default", cluster_name)
531
+ definitions
532
+ end
533
+
534
+ def create_load_balancer
535
+ Hetzner::LoadBalancer.new(hetzner_client:, cluster_name:).create(location:, network_id:)
536
+ end
537
+
538
+ def server_configs
539
+ return @server_configs if @server_configs
588
540
 
589
- File.write(kubeconfig_path, kubeconfig)
541
+ @server_configs = master_definitions_for_create
590
542
 
591
- FileUtils.chmod "go-r", kubeconfig_path
543
+ worker_node_pools.each do |worker_node_pool|
544
+ @server_configs += worker_node_pool_definitions(worker_node_pool)
592
545
  end
593
546
 
594
- def ugrade_plan_manifest_path
595
- worker_upgrade_concurrency = workers.size - 1
596
- worker_upgrade_concurrency = 1 if worker_upgrade_concurrency == 0
547
+ @server_configs
548
+ end
597
549
 
598
- manifest = <<~EOF
599
- apiVersion: upgrade.cattle.io/v1
600
- kind: Plan
601
- metadata:
602
- name: k3s-server
603
- namespace: system-upgrade
604
- labels:
605
- k3s-upgrade: server
606
- spec:
607
- concurrency: 1
608
- version: #{new_k3s_version}
609
- nodeSelector:
610
- matchExpressions:
611
- - {key: node-role.kubernetes.io/master, operator: In, values: ["true"]}
612
- serviceAccountName: system-upgrade
613
- tolerations:
614
- - key: "CriticalAddonsOnly"
615
- operator: "Equal"
616
- value: "true"
617
- effect: "NoExecute"
618
- cordon: true
619
- upgrade:
620
- image: rancher/k3s-upgrade
621
- ---
622
- apiVersion: upgrade.cattle.io/v1
623
- kind: Plan
624
- metadata:
625
- name: k3s-agent
626
- namespace: system-upgrade
627
- labels:
628
- k3s-upgrade: agent
629
- spec:
630
- concurrency: #{worker_upgrade_concurrency}
631
- version: #{new_k3s_version}
632
- nodeSelector:
633
- matchExpressions:
634
- - {key: node-role.kubernetes.io/master, operator: NotIn, values: ["true"]}
635
- serviceAccountName: system-upgrade
636
- prepare:
637
- image: rancher/k3s-upgrade
638
- args: ["prepare", "k3s-server"]
639
- cordon: true
640
- upgrade:
641
- image: rancher/k3s-upgrade
642
- EOF
550
+ def create_servers
551
+ servers = []
552
+
553
+ threads = server_configs.map do |server_config|
554
+ Thread.new do
555
+ servers << Hetzner::Server.new(hetzner_client:, cluster_name:).create(**server_config)
556
+ end
557
+ end
643
558
 
644
- temp_file_path = "/tmp/k3s-upgrade-plan.yaml"
559
+ threads.each(&:join) unless threads.empty?
645
560
 
646
- File.write(temp_file_path, manifest)
561
+ sleep 1 while servers.size != server_configs.size
647
562
 
648
- temp_file_path
649
- end
563
+ wait_for_servers(servers)
564
+ end
650
565
 
651
- def belongs_to_cluster?(server)
652
- server.dig("labels", "cluster") == cluster_name
566
+ def wait_for_servers(servers)
567
+ threads = servers.map do |server|
568
+ Thread.new { wait_for_ssh server }
653
569
  end
654
570
 
655
- def schedule_workloads_on_masters?
656
- schedule_workloads_on_masters = configuration.dig("schedule_workloads_on_masters")
657
- schedule_workloads_on_masters ? !!schedule_workloads_on_masters : false
658
- end
571
+ threads.each(&:join) unless threads.empty?
572
+ end
659
573
 
660
- def image
661
- configuration.dig("image") || "ubuntu-20.04"
574
+ def delete_servers
575
+ threads = all_servers.map do |server|
576
+ Thread.new do
577
+ Hetzner::Server.new(hetzner_client:, cluster_name:).delete(server_name: server['name'])
578
+ end
662
579
  end
663
580
 
581
+ threads.each(&:join) unless threads.empty?
582
+ end
664
583
  end