hetzner-k3s 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,57 @@
1
+ module Hetzner
2
+ class SSHKey
3
+ def initialize(hetzner_client:, cluster_name:)
4
+ @hetzner_client = hetzner_client
5
+ @cluster_name = cluster_name
6
+ end
7
+
8
+ def create(ssh_key_path:)
9
+ @ssh_key_path = ssh_key_path
10
+
11
+ puts
12
+
13
+ if ssh_key = find_ssh_key
14
+ puts "SSH key already exists, skipping."
15
+ puts
16
+ return ssh_key["id"]
17
+ end
18
+
19
+ puts "Creating SSH key..."
20
+
21
+ response = hetzner_client.post("/ssh_keys", ssh_key_config).body
22
+
23
+ puts "...SSH key created."
24
+ puts
25
+
26
+ JSON.parse(response)["ssh_key"]["id"]
27
+ end
28
+
29
+ def delete
30
+ if ssh_key = find_ssh_key
31
+ puts "Deleting ssh_key..."
32
+ hetzner_client.delete("/ssh_keys", ssh_key["id"])
33
+ puts "...ssh_key deleted."
34
+ else
35
+ puts "SSH key no longer exists, skipping."
36
+ end
37
+
38
+ puts
39
+ end
40
+
41
+ private
42
+
43
+ attr_reader :hetzner_client, :cluster_name, :ssh_key_path
44
+
45
+ def ssh_key_config
46
+ {
47
+ name: cluster_name,
48
+ public_key: File.read(ssh_key_path)
49
+ }
50
+ end
51
+
52
+ def find_ssh_key
53
+ hetzner_client.get("/ssh_keys")["ssh_keys"].detect{ |ssh_key| ssh_key["name"] == cluster_name }
54
+ end
55
+
56
+ end
57
+ end
@@ -0,0 +1,303 @@
1
+ require "thor"
2
+ require "http"
3
+ require "sshkey"
4
+
5
+ require_relative "cluster"
6
+
7
+ module Hetzner
8
+ module K3s
9
+ class CLI < Thor
10
+ def self.exit_on_failure?
11
+ true
12
+ end
13
+
14
+ desc "create-cluster", "Create a k3s cluster in Hetzner Cloud"
15
+ option :config_file, required: true
16
+
17
+ def create_cluster
18
+ validate_config_file :create
19
+
20
+ Cluster.new(hetzner_client: hetzner_client).create configuration: configuration
21
+ end
22
+
23
+ desc "delete-cluster", "Delete an existing k3s cluster in Hetzner Cloud"
24
+ option :config_file, required: true
25
+
26
+ def delete_cluster
27
+ validate_config_file :delete
28
+ Cluster.new(hetzner_client: hetzner_client).delete configuration: configuration
29
+ end
30
+
31
+ desc "upgrade-cluster", "Upgrade an existing k3s cluster in Hetzner Cloud to a new version"
32
+ option :config_file, required: true
33
+ option :new_k3s_version, required: true
34
+ option :force, default: "false"
35
+
36
+ def upgrade_cluster
37
+ validate_config_file :upgrade
38
+ Cluster.new(hetzner_client: hetzner_client).upgrade configuration: configuration, new_k3s_version: options[:new_k3s_version], config_file: options[:config_file]
39
+ end
40
+
41
+ desc "releases", "List available k3s releases"
42
+ def releases
43
+ find_available_releases.each do |release|
44
+ puts release
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ attr_reader :configuration, :hetzner_client, :k3s_version
51
+ attr_accessor :errors, :used_server_types
52
+
53
+ def validate_config_file(action)
54
+ config_file_path = options[:config_file]
55
+
56
+ if File.exists?(config_file_path)
57
+ begin
58
+ @configuration = YAML.load_file(options[:config_file])
59
+ raise "invalid" unless configuration.is_a? Hash
60
+ rescue
61
+ puts "Please ensure that the config file is a correct YAML manifest."
62
+ return
63
+ end
64
+ else
65
+ puts "Please specify a correct path for the config file."
66
+ return
67
+ end
68
+
69
+ @errors = []
70
+ @used_server_types = []
71
+
72
+ validate_token
73
+ validate_cluster_name
74
+ validate_kubeconfig_path
75
+
76
+ case action
77
+ when :create
78
+ validate_ssh_key
79
+ validate_location
80
+ validate_k3s_version
81
+ validate_masters
82
+ validate_worker_node_pools
83
+ validate_all_nodes_must_be_of_same_series
84
+ when :delete
85
+ validate_kubeconfig_path_must_exist
86
+ when :upgrade
87
+ validate_kubeconfig_path_must_exist
88
+ validate_new_k3s_version
89
+ validate_new_k3s_version_must_be_more_recent
90
+ end
91
+
92
+ errors.flatten!
93
+
94
+ unless errors.empty?
95
+ puts "Some information in the configuration file requires your attention:"
96
+ errors.each do |error|
97
+ puts " - #{error}"
98
+ end
99
+
100
+ exit 1
101
+ end
102
+ end
103
+
104
+ def validate_token
105
+ token = configuration.dig("hetzner_token")
106
+ @hetzner_client = Hetzner::Client.new(token: token)
107
+ hetzner_client.get("/locations")
108
+ rescue
109
+ errors << "Invalid Hetzner Cloid token"
110
+ end
111
+
112
+ def validate_cluster_name
113
+ errors << "Cluster name is an invalid format" unless configuration["cluster_name"] =~ /\A([A-Za-z0-9\-\_]+)\Z/
114
+ end
115
+
116
+ def validate_kubeconfig_path
117
+ path = File.expand_path(configuration.dig("kubeconfig_path"))
118
+ errors << "kubeconfig path cannot be a directory" and return if File.directory? path
119
+
120
+ directory = File.dirname(path)
121
+ errors << "Directory #{directory} doesn't exist" unless File.exists? directory
122
+ rescue
123
+ errors << "Invalid path for the kubeconfig"
124
+ end
125
+
126
+ def validate_ssh_key
127
+ path = File.expand_path(configuration.dig("ssh_key_path"))
128
+ errors << "Invalid Public SSH key path" and return unless File.exists? path
129
+
130
+ key = File.read(path)
131
+ errors << "Public SSH key is invalid" unless ::SSHKey.valid_ssh_public_key? key
132
+ rescue
133
+ errors << "Invalid Public SSH key path"
134
+ end
135
+
136
+ def validate_kubeconfig_path_must_exist
137
+ path = File.expand_path configuration.dig("kubeconfig_path")
138
+ errors << "kubeconfig path is invalid" and return unless File.exists? path
139
+ errors << "kubeconfig path cannot be a directory" if File.directory? path
140
+ rescue
141
+ errors << "Invalid kubeconfig path"
142
+ end
143
+
144
+ def server_types
145
+ @server_types ||= hetzner_client.get("/server_types")["server_types"].map{ |server_type| server_type["name"] }
146
+ rescue
147
+ @errors << "Cannot fetch server types with Hetzner API, please try again later"
148
+ false
149
+ end
150
+
151
+ def locations
152
+ @locations ||= hetzner_client.get("/locations")["locations"].map{ |location| location["name"] }
153
+ rescue
154
+ @errors << "Cannot fetch locations with Hetzner API, please try again later"
155
+ false
156
+ end
157
+
158
+ def validate_location
159
+ errors << "Invalid location - available locations: nbg1 (Nuremberg, Germany), fsn1 (Falkenstein, Germany), hel1 (Helsinki, Finland)" unless locations.include? configuration.dig("location")
160
+ end
161
+
162
+ def find_available_releases
163
+ @available_releases ||= begin
164
+ response = HTTP.get("https://api.github.com/repos/k3s-io/k3s/tags").body
165
+ JSON.parse(response).map { |hash| hash["name"] }
166
+ end
167
+ rescue
168
+ errors << "Cannot fetch the releases with Hetzner API, please try again later"
169
+ end
170
+
171
+ def validate_k3s_version
172
+ k3s_version = configuration.dig("k3s_version")
173
+ available_releases = find_available_releases
174
+ errors << "Invalid k3s version" unless available_releases.include? k3s_version
175
+ end
176
+
177
+ def validate_new_k3s_version
178
+ new_k3s_version = options[:new_k3s_version]
179
+ available_releases = find_available_releases
180
+ errors << "The new k3s version is invalid" unless available_releases.include? new_k3s_version
181
+ end
182
+
183
+ def validate_masters
184
+ masters_pool = nil
185
+
186
+ begin
187
+ masters_pool = configuration.dig("masters")
188
+ rescue
189
+ errors << "Invalid masters configuration"
190
+ return
191
+ end
192
+
193
+ if masters_pool.nil?
194
+ errors << "Invalid masters configuration"
195
+ return
196
+ end
197
+
198
+ validate_instance_group masters_pool, workers: false
199
+ end
200
+
201
+ def validate_worker_node_pools
202
+ worker_node_pools = nil
203
+
204
+ begin
205
+ worker_node_pools = configuration.dig("worker_node_pools")
206
+ rescue
207
+ errors << "Invalid node pools configuration"
208
+ return
209
+ end
210
+
211
+ if !worker_node_pools.is_a? Array
212
+ errors << "Invalid node pools configuration"
213
+ elsif worker_node_pools.size == 0
214
+ errors << "At least one node pool is required in order to schedule workloads"
215
+ elsif worker_node_pools.map{ |worker_node_pool| worker_node_pool["name"]}.uniq.size != worker_node_pools.size
216
+ errors << "Each node pool must have an unique name"
217
+ elsif server_types
218
+ worker_node_pools.each do |worker_node_pool|
219
+ validate_instance_group worker_node_pool
220
+ end
221
+ end
222
+ end
223
+
224
+ def validate_all_nodes_must_be_of_same_series
225
+ series = used_server_types.map{ |used_server_type| used_server_type[0..1]}
226
+ errors << "Master and worker node pools must all be of the same server series for networking to function properly (available series: cx, cp, ccx)" unless series.uniq.size == 1
227
+ end
228
+
229
+ def validate_new_k3s_version_must_be_more_recent
230
+ return if options[:force] == "true"
231
+ return unless kubernetes_client
232
+
233
+ begin
234
+ Timeout::timeout(5) do
235
+ servers = kubernetes_client.api("v1").resource("nodes").list
236
+
237
+ if servers.size == 0
238
+ errors << "The cluster seems to have no nodes, nothing to upgrade"
239
+ else
240
+ available_releases = find_available_releases
241
+
242
+ current_k3s_version = servers.first.dig(:status, :nodeInfo, :kubeletVersion)
243
+ current_k3s_version_index = available_releases.index(current_k3s_version) || 1000
244
+
245
+ new_k3s_version = options[:new_k3s_version]
246
+ new_k3s_version_index = available_releases.index(new_k3s_version) || 1000
247
+
248
+ unless new_k3s_version_index < current_k3s_version_index
249
+ errors << "The new k3s version must be more recent than the current one"
250
+ end
251
+ end
252
+ end
253
+
254
+ rescue Timeout::Error
255
+ puts "Cannot upgrade: Unable to fetch nodes from Kubernetes API. Is the cluster online?"
256
+ end
257
+ end
258
+
259
+ def validate_instance_group(instance_group, workers: true)
260
+ instance_group_errors = []
261
+
262
+ instance_group_type = workers ? "Worker mode pool #{instance_group["name"]}" : "Masters pool"
263
+
264
+ unless !workers || instance_group["name"] =~ /\A([A-Za-z0-9\-\_]+)\Z/
265
+ instance_group_errors << "#{instance_group_type} has an invalid name"
266
+ end
267
+
268
+ unless instance_group.is_a? Hash
269
+ instance_group_errors << "#{instance_group_type} is in an invalid format"
270
+ end
271
+
272
+ unless server_types.include?(instance_group["instance_type"])
273
+ instance_group_errors << "#{instance_group_type} has an invalid instance type"
274
+ end
275
+
276
+ if instance_group["instance_count"].is_a? Integer
277
+ if instance_group["instance_count"] < 1
278
+ instance_group_errors << "#{instance_group_type} must have at least one node"
279
+ elsif !workers
280
+ instance_group_errors << "Masters count must equal to 1 for non-HA clusters or an odd number (recommended 3) for an HA cluster" unless instance_group["instance_count"].odd?
281
+ end
282
+ else
283
+ instance_group_errors << "#{instance_group_type} has an invalid instance count"
284
+ end
285
+
286
+ used_server_types << instance_group["instance_type"]
287
+
288
+ errors << instance_group_errors
289
+ end
290
+
291
+ def kubernetes_client
292
+ return @kubernetes_client if @kubernetes_client
293
+
294
+ config_hash = YAML.load_file(File.expand_path(configuration["kubeconfig_path"]))
295
+ config_hash['current-context'] = configuration["cluster_name"]
296
+ @kubernetes_client = K8s::Client.config(K8s::Config.new(config_hash))
297
+ rescue
298
+ errors << "Cannot connect to the Kubernetes cluster"
299
+ false
300
+ end
301
+ end
302
+ end
303
+ end
@@ -0,0 +1,38 @@
1
+ module K8s
2
+ class ResourceClient
3
+ def initialize(transport, api_client, api_resource, namespace: nil, resource_class: K8s::Resource)
4
+ @transport = transport
5
+ @api_client = api_client
6
+ @api_resource = api_resource
7
+ @namespace = namespace
8
+ @resource_class = resource_class
9
+
10
+ if @api_resource.name.include? '/'
11
+ @resource, @subresource = @api_resource.name.split('/', 2)
12
+ else
13
+ @resource = @api_resource.name
14
+ @subresource = nil
15
+ end
16
+
17
+ # fail "Resource #{api_resource.name} is not namespaced" unless api_resource.namespaced || !namespace
18
+ end
19
+
20
+ def path(name = nil, subresource: @subresource, namespace: @namespace)
21
+ namespace_part = namespace ? ['namespaces', namespace] : []
22
+
23
+ if namespaced?
24
+ if name && subresource
25
+ @api_client.path(*namespace_part, @resource, name, subresource)
26
+ elsif name
27
+ @api_client.path(*namespace_part, @resource, name)
28
+ else namespaced?
29
+ @api_client.path(*namespace_part, @resource)
30
+ end
31
+ elsif name
32
+ @api_client.path(@resource, name)
33
+ else
34
+ @api_client.path(@resource)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,609 @@
1
+ require 'thread'
2
+ require 'net/ssh'
3
+ require "securerandom"
4
+ require "base64"
5
+ require "k8s-ruby"
6
+ require 'timeout'
7
+
8
+ require_relative "../infra/client"
9
+ require_relative "../infra/firewall"
10
+ require_relative "../infra/network"
11
+ require_relative "../infra/ssh_key"
12
+ require_relative "../infra/server"
13
+ require_relative "../infra/load_balancer"
14
+
15
+ require_relative "../k3s/client_patch"
16
+
17
+
18
+ class Cluster
19
+ def initialize(hetzner_client:)
20
+ @hetzner_client = hetzner_client
21
+ end
22
+
23
+ def create(configuration:)
24
+ @hetzner_token = configuration.dig("hetzner_token")
25
+ @cluster_name = configuration.dig("cluster_name")
26
+ @kubeconfig_path = File.expand_path(configuration.dig("kubeconfig_path"))
27
+ @ssh_key_path = File.expand_path(configuration.dig("ssh_key_path"))
28
+ @k3s_version = configuration.dig("k3s_version")
29
+ @masters_config = configuration.dig("masters")
30
+ @worker_node_pools = configuration.dig("worker_node_pools")
31
+ @location = configuration.dig("location")
32
+ @flannel_interface = find_flannel_interface(configuration.dig("masters")["instance_type"])
33
+ @servers = []
34
+
35
+ create_resources
36
+
37
+ deploy_kubernetes
38
+
39
+ sleep 10
40
+
41
+ deploy_cloud_controller_manager
42
+ deploy_csi_driver
43
+ deploy_system_upgrade_controller
44
+ end
45
+
46
+ def delete(configuration:)
47
+ @cluster_name = configuration.dig("cluster_name")
48
+ @kubeconfig_path = File.expand_path(configuration.dig("kubeconfig_path"))
49
+
50
+ delete_resources
51
+ end
52
+
53
+ def upgrade(configuration:, new_k3s_version:, config_file:)
54
+ @configuration = configuration
55
+ @cluster_name = configuration.dig("cluster_name")
56
+ @kubeconfig_path = File.expand_path(configuration.dig("kubeconfig_path"))
57
+ @new_k3s_version = new_k3s_version
58
+ @config_file = config_file
59
+
60
+ upgrade_cluster
61
+ end
62
+
63
+ private
64
+
65
+ attr_accessor :servers
66
+
67
+ attr_reader :hetzner_client, :cluster_name, :kubeconfig_path, :k3s_version,
68
+ :masters_config, :worker_node_pools,
69
+ :location, :flannel_interface, :ssh_key_path, :kubernetes_client,
70
+ :hetzner_token, :tls_sans, :new_k3s_version, :configuration,
71
+ :config_file
72
+
73
+
74
+ def latest_k3s_version
75
+ response = HTTP.get("https://api.github.com/repos/k3s-io/k3s/tags").body
76
+ JSON.parse(response).first["name"]
77
+ end
78
+
79
+ def create_resources
80
+ firewall_id = Hetzner::Firewall.new(
81
+ hetzner_client: hetzner_client,
82
+ cluster_name: cluster_name
83
+ ).create
84
+
85
+ network_id = Hetzner::Network.new(
86
+ hetzner_client: hetzner_client,
87
+ cluster_name: cluster_name
88
+ ).create
89
+
90
+ ssh_key_id = Hetzner::SSHKey.new(
91
+ hetzner_client: hetzner_client,
92
+ cluster_name: cluster_name
93
+ ).create(ssh_key_path: ssh_key_path)
94
+
95
+ server_configs = []
96
+
97
+ master_instance_type = masters_config["instance_type"]
98
+ masters_count = masters_config["instance_count"]
99
+
100
+ masters_count.times do |i|
101
+ server_configs << {
102
+ location: location,
103
+ instance_type: master_instance_type,
104
+ instance_id: "master#{i+1}",
105
+ firewall_id: firewall_id,
106
+ network_id: network_id,
107
+ ssh_key_id: ssh_key_id
108
+ }
109
+ end
110
+
111
+ if masters_count > 1
112
+ Hetzner::LoadBalancer.new(
113
+ hetzner_client: hetzner_client,
114
+ cluster_name: cluster_name
115
+ ).create(location: location, network_id: network_id)
116
+ end
117
+
118
+ worker_node_pools.each do |worker_node_pool|
119
+ worker_node_pool_name = worker_node_pool["name"]
120
+ worker_instance_type = worker_node_pool["instance_type"]
121
+ worker_count = worker_node_pool["instance_count"]
122
+
123
+ worker_count.times do |i|
124
+ server_configs << {
125
+ location: location,
126
+ instance_type: worker_instance_type,
127
+ instance_id: "pool-#{worker_node_pool_name}-worker#{i+1}",
128
+ firewall_id: firewall_id,
129
+ network_id: network_id,
130
+ ssh_key_id: ssh_key_id
131
+ }
132
+ end
133
+ end
134
+
135
+ threads = server_configs.map do |server_config|
136
+ Thread.new do
137
+ servers << Hetzner::Server.new(hetzner_client: hetzner_client, cluster_name: cluster_name).create(server_config)
138
+ end
139
+ end
140
+
141
+ threads.each(&:join)
142
+
143
+ puts
144
+ threads = servers.map do |server|
145
+ Thread.new { wait_for_ssh server }
146
+ end
147
+
148
+ threads.each(&:join)
149
+ end
150
+
151
+ def delete_resources
152
+ begin
153
+ Timeout::timeout(5) do
154
+ servers = kubernetes_client.api("v1").resource("nodes").list
155
+
156
+ threads = servers.map do |node|
157
+ Thread.new do
158
+ Hetzner::Server.new(hetzner_client: hetzner_client, cluster_name: cluster_name).delete(server_name: node.metadata[:name])
159
+ end
160
+ end
161
+
162
+ threads.each(&:join)
163
+ end
164
+ rescue Timeout::Error
165
+ puts "Unable to fetch nodes from Kubernetes API. Is the cluster online?"
166
+ end
167
+
168
+ puts
169
+
170
+ sleep 5 # give time for the servers to actually be deleted
171
+
172
+ Hetzner::Firewall.new(
173
+ hetzner_client: hetzner_client,
174
+ cluster_name: cluster_name
175
+ ).delete
176
+
177
+ Hetzner::Network.new(
178
+ hetzner_client: hetzner_client,
179
+ cluster_name: cluster_name
180
+ ).delete
181
+
182
+ Hetzner::SSHKey.new(
183
+ hetzner_client: hetzner_client,
184
+ cluster_name: cluster_name
185
+ ).delete
186
+
187
+ Hetzner::LoadBalancer.new(
188
+ hetzner_client: hetzner_client,
189
+ cluster_name: cluster_name
190
+ ).delete
191
+
192
+ end
193
+
194
+ def upgrade_cluster
195
+ resources = K8s::Resource.from_files(ugrade_plan_manifest_path)
196
+
197
+ begin
198
+ kubernetes_client.api("upgrade.cattle.io/v1").resource("plans").get("k3s-server", namespace: "system-upgrade")
199
+
200
+ puts "Aborting - an upgrade is already in progress."
201
+
202
+ rescue K8s::Error::NotFound
203
+ resources.each do |resource|
204
+ kubernetes_client.create_resource(resource)
205
+ end
206
+
207
+ puts "Upgrade will now start. Run `watch kubectl get nodes` to see the nodes being upgraded. This should take a few minutes for a small cluster."
208
+ puts "The API server may be briefly unavailable during the upgrade of the controlplane."
209
+
210
+ configuration["k3s_version"] = new_k3s_version
211
+
212
+ File.write(config_file, configuration.to_yaml)
213
+ end
214
+ end
215
+
216
+
217
+ def master_script(master)
218
+ server = master == first_master ? " --cluster-init " : " --server https://#{first_master_private_ip}:6443 "
219
+
220
+ <<~EOF
221
+ curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="#{k3s_version}" K3S_TOKEN="#{k3s_token}" INSTALL_K3S_EXEC="server \
222
+ --disable-cloud-controller \
223
+ --disable servicelb \
224
+ --disable traefik \
225
+ --disable local-storage \
226
+ --disable metrics-server \
227
+ --write-kubeconfig-mode=644 \
228
+ --node-name="$(hostname -f)" \
229
+ --cluster-cidr=10.244.0.0/16 \
230
+ --etcd-expose-metrics=true \
231
+ --kube-controller-manager-arg="address=0.0.0.0" \
232
+ --kube-controller-manager-arg="bind-address=0.0.0.0" \
233
+ --kube-proxy-arg="metrics-bind-address=0.0.0.0" \
234
+ --kube-scheduler-arg="address=0.0.0.0" \
235
+ --kube-scheduler-arg="bind-address=0.0.0.0" \
236
+ --node-taint CriticalAddonsOnly=true:NoExecute \
237
+ --kubelet-arg="cloud-provider=external" \
238
+ --node-ip=$(hostname -I | awk '{print $2}') \
239
+ --node-external-ip=$(hostname -I | awk '{print $1}') \
240
+ --flannel-iface=#{flannel_interface} \
241
+ #{server} #{tls_sans}" sh -
242
+ EOF
243
+ end
244
+
245
+ def worker_script
246
+ <<~EOF
247
+ curl -sfL https://get.k3s.io | K3S_TOKEN="#{k3s_token}" INSTALL_K3S_VERSION="#{k3s_version}" K3S_URL=https://#{first_master_private_ip}:6443 INSTALL_K3S_EXEC="agent \
248
+ --node-name="$(hostname -f)" \
249
+ --kubelet-arg="cloud-provider=external" \
250
+ --node-ip=$(hostname -I | awk '{print $2}') \
251
+ --node-external-ip=$(hostname -I | awk '{print $1}') \
252
+ --flannel-iface=#{flannel_interface}" sh -
253
+ EOF
254
+ end
255
+
256
+ def deploy_kubernetes
257
+ puts
258
+ puts "Deploying k3s to first master (#{first_master["name"]})..."
259
+
260
+ ssh first_master, master_script(first_master), print_output: true
261
+
262
+ puts
263
+ puts "...k3s has been deployed to first master."
264
+
265
+ save_kubeconfig
266
+
267
+ if masters.size > 1
268
+ threads = masters[1..-1].map do |master|
269
+ Thread.new do
270
+ puts
271
+ puts "Deploying k3s to master #{master["name"]}..."
272
+
273
+ ssh master, master_script(master), print_output: true
274
+
275
+ puts
276
+ puts "...k3s has been deployed to master #{master["name"]}."
277
+ end
278
+ end
279
+
280
+ threads.each(&:join)
281
+ end
282
+
283
+ threads = workers.map do |worker|
284
+ Thread.new do
285
+ puts
286
+ puts "Deploying k3s to worker (#{worker["name"]})..."
287
+
288
+ ssh worker, worker_script, print_output: true
289
+
290
+ puts
291
+ puts "...k3s has been deployed to worker (#{worker["name"]})."
292
+ end
293
+ end
294
+
295
+ threads.each(&:join)
296
+ end
297
+
298
+ def deploy_cloud_controller_manager
299
+ puts
300
+ puts "Deploying Hetzner Cloud Controller Manager..."
301
+
302
+ begin
303
+ kubernetes_client.api("v1").resource("secrets").get("hcloud", namespace: "kube-system")
304
+
305
+ rescue K8s::Error::NotFound
306
+ secret = K8s::Resource.new(
307
+ apiVersion: "v1",
308
+ kind: "Secret",
309
+ metadata: {
310
+ namespace: 'kube-system',
311
+ name: 'hcloud',
312
+ },
313
+ data: {
314
+ network: Base64.encode64(cluster_name),
315
+ token: Base64.encode64(hetzner_token)
316
+ }
317
+ )
318
+
319
+ kubernetes_client.api('v1').resource('secrets').create_resource(secret)
320
+ end
321
+
322
+
323
+ manifest = HTTP.follow.get("https://github.com/hetznercloud/hcloud-cloud-controller-manager/releases/latest/download/ccm-networks.yaml").body
324
+
325
+ File.write("/tmp/cloud-controller-manager.yaml", manifest)
326
+
327
+ resources = K8s::Resource.from_files("/tmp/cloud-controller-manager.yaml")
328
+
329
+ begin
330
+ kubernetes_client.api("apps/v1").resource("deployments").get("hcloud-cloud-controller-manager", namespace: "kube-system")
331
+
332
+ resources.each do |resource|
333
+ kubernetes_client.update_resource(resource)
334
+ end
335
+
336
+ rescue K8s::Error::NotFound
337
+ resources.each do |resource|
338
+ kubernetes_client.create_resource(resource)
339
+ end
340
+
341
+ end
342
+
343
+ puts "...Cloud Controller Manager deployed"
344
+ rescue Excon::Error::Socket
345
+ retry
346
+ end
347
+
348
+ def deploy_system_upgrade_controller
349
+ puts
350
+ puts "Deploying k3s System Upgrade Controller..."
351
+
352
+ manifest = HTTP.follow.get("https://github.com/rancher/system-upgrade-controller/releases/download/v0.7.3/system-upgrade-controller.yaml").body
353
+
354
+ File.write("/tmp/system-upgrade-controller.yaml", manifest)
355
+
356
+ resources = K8s::Resource.from_files("/tmp/system-upgrade-controller.yaml")
357
+
358
+ begin
359
+ kubernetes_client.api("apps/v1").resource("deployments").get("system-upgrade-controller", namespace: "system-upgrade")
360
+
361
+ resources.each do |resource|
362
+ kubernetes_client.update_resource(resource)
363
+ end
364
+
365
+ rescue K8s::Error::NotFound
366
+ resources.each do |resource|
367
+ kubernetes_client.create_resource(resource)
368
+ end
369
+
370
+ end
371
+
372
+ puts "...k3s System Upgrade Controller deployed"
373
+ rescue Excon::Error::Socket
374
+ retry
375
+ end
376
+
377
+ def deploy_csi_driver
378
+ puts
379
+ puts "Deploying Hetzner CSI Driver..."
380
+
381
+ begin
382
+ kubernetes_client.api("v1").resource("secrets").get("hcloud-csi", namespace: "kube-system")
383
+
384
+ rescue K8s::Error::NotFound
385
+ secret = K8s::Resource.new(
386
+ apiVersion: "v1",
387
+ kind: "Secret",
388
+ metadata: {
389
+ namespace: 'kube-system',
390
+ name: 'hcloud-csi',
391
+ },
392
+ data: {
393
+ token: Base64.encode64(hetzner_token)
394
+ }
395
+ )
396
+
397
+ kubernetes_client.api('v1').resource('secrets').create_resource(secret)
398
+ end
399
+
400
+
401
+ manifest = HTTP.follow.get("https://raw.githubusercontent.com/hetznercloud/csi-driver/v1.5.3/deploy/kubernetes/hcloud-csi.yml").body
402
+
403
+ File.write("/tmp/csi-driver.yaml", manifest)
404
+
405
+ resources = K8s::Resource.from_files("/tmp/csi-driver.yaml")
406
+
407
+ begin
408
+ kubernetes_client.api("apps/v1").resource("daemonsets").get("hcloud-csi-node", namespace: "kube-system")
409
+
410
+
411
+ resources.each do |resource|
412
+ begin
413
+ kubernetes_client.update_resource(resource)
414
+ rescue K8s::Error::Invalid => e
415
+ raise e unless e.message =~ /must be specified/i
416
+ end
417
+ end
418
+
419
+ rescue K8s::Error::NotFound
420
+ resources.each do |resource|
421
+ kubernetes_client.create_resource(resource)
422
+ end
423
+
424
+ end
425
+
426
+ puts "...CSI Driver deployed"
427
+ rescue Excon::Error::Socket
428
+ retry
429
+ end
430
+
431
+ def wait_for_ssh(server)
432
+ server_name = server["name"]
433
+
434
+ puts "Waiting for server #{server_name} to be up..."
435
+
436
+ loop do
437
+ result = ssh(server, "echo UP")
438
+ break if result == "UP"
439
+ end
440
+
441
+ puts "...server #{server_name} is now up."
442
+ rescue Errno::ENETUNREACH
443
+ retry
444
+ end
445
+
446
+ def ssh(server, command, print_output: false)
447
+ public_ip = server.dig("public_net", "ipv4", "ip")
448
+ output = ""
449
+
450
+ Net::SSH.start(public_ip, "root") do |session|
451
+ session.exec!(command) do |channel, stream, data|
452
+ output << data
453
+ puts data if print_output
454
+ end
455
+ end
456
+
457
+ output.chop
458
+ rescue Net::SSH::ConnectionTimeout
459
+ retry
460
+ rescue Net::SSH::Disconnect => e
461
+ retry unless e.message =~ /Too many authentication failures/
462
+ rescue Errno::ECONNREFUSED
463
+ retry
464
+ end
465
+
466
+ def kubernetes_client
467
+ return @kubernetes_client if @kubernetes_client
468
+
469
+ config_hash = YAML.load_file(kubeconfig_path)
470
+ config_hash['current-context'] = cluster_name
471
+ @kubernetes_client = K8s::Client.config(K8s::Config.new(config_hash))
472
+ end
473
+
474
+ def find_flannel_interface(server_type)
475
+ case server_type[0..1]
476
+ when "cp"
477
+ "enp7s0"
478
+ when "cc"
479
+ "enp7s0"
480
+ when "cx"
481
+ "ens10"
482
+ end
483
+ end
484
+
485
+ def all_servers
486
+ @all_servers ||= hetzner_client.get("/servers")["servers"]
487
+ end
488
+
489
+ def masters
490
+ @masters ||= all_servers.select{ |server| server["name"] =~ /master\d+\Z/ }.sort{ |a, b| a["name"] <=> b["name"] }
491
+ end
492
+
493
+ def workers
494
+ @workers = all_servers.select{ |server| server["name"] =~ /worker\d+\Z/ }.sort{ |a, b| a["name"] <=> b["name"] }
495
+ end
496
+
497
+ def k3s_token
498
+ @k3s_token ||= begin
499
+ token = ssh(first_master, "{ TOKEN=$(< /var/lib/rancher/k3s/server/node-token); } 2> /dev/null; echo $TOKEN")
500
+
501
+ if token.empty?
502
+ SecureRandom.hex
503
+ else
504
+ token.split(":").last
505
+ end
506
+ end
507
+ end
508
+
509
+ def first_master_private_ip
510
+ @first_master_private_ip ||= first_master["private_net"][0]["ip"]
511
+ end
512
+
513
+ def first_master
514
+ masters.first
515
+ end
516
+
517
+ def api_server_ip
518
+ return @api_server_ip if @api_server_ip
519
+
520
+ @api_server_ip = if masters.size > 1
521
+ load_balancer_name = "#{cluster_name}-api"
522
+ load_balancer = hetzner_client.get("/load_balancers")["load_balancers"].detect{ |load_balancer| load_balancer["name"] == load_balancer_name }
523
+ load_balancer["public_net"]["ipv4"]["ip"]
524
+ else
525
+ first_master_public_ip
526
+ end
527
+ end
528
+
529
+ def tls_sans
530
+ sans = " --tls-san=#{api_server_ip} "
531
+
532
+ masters.each do |master|
533
+ master_private_ip = master["private_net"][0]["ip"]
534
+ sans << " --tls-san=#{master_private_ip} "
535
+ end
536
+
537
+ sans
538
+ end
539
+
540
+ def first_master_public_ip
541
+ @first_master_public_ip ||= first_master.dig("public_net", "ipv4", "ip")
542
+ end
543
+
544
+ def save_kubeconfig
545
+ kubeconfig = ssh(first_master, "cat /etc/rancher/k3s/k3s.yaml").
546
+ gsub("127.0.0.1", api_server_ip).
547
+ gsub("default", cluster_name)
548
+
549
+ File.write(kubeconfig_path, kubeconfig)
550
+ end
551
+
552
+ def ugrade_plan_manifest_path
553
+ worker_upgrade_concurrency = workers.size - 1
554
+ worker_upgrade_concurrency = 1 if worker_upgrade_concurrency == 0
555
+
556
+ manifest = <<~EOF
557
+ apiVersion: upgrade.cattle.io/v1
558
+ kind: Plan
559
+ metadata:
560
+ name: k3s-server
561
+ namespace: system-upgrade
562
+ labels:
563
+ k3s-upgrade: server
564
+ spec:
565
+ concurrency: 1
566
+ version: #{new_k3s_version}
567
+ nodeSelector:
568
+ matchExpressions:
569
+ - {key: node-role.kubernetes.io/master, operator: In, values: ["true"]}
570
+ serviceAccountName: system-upgrade
571
+ tolerations:
572
+ - key: "CriticalAddonsOnly"
573
+ operator: "Equal"
574
+ value: "true"
575
+ effect: "NoExecute"
576
+ cordon: true
577
+ upgrade:
578
+ image: rancher/k3s-upgrade
579
+ ---
580
+ apiVersion: upgrade.cattle.io/v1
581
+ kind: Plan
582
+ metadata:
583
+ name: k3s-agent
584
+ namespace: system-upgrade
585
+ labels:
586
+ k3s-upgrade: agent
587
+ spec:
588
+ concurrency: #{worker_upgrade_concurrency}
589
+ version: #{new_k3s_version}
590
+ nodeSelector:
591
+ matchExpressions:
592
+ - {key: node-role.kubernetes.io/master, operator: NotIn, values: ["true"]}
593
+ serviceAccountName: system-upgrade
594
+ prepare:
595
+ image: rancher/k3s-upgrade
596
+ args: ["prepare", "k3s-server"]
597
+ cordon: true
598
+ upgrade:
599
+ image: rancher/k3s-upgrade
600
+ EOF
601
+
602
+ temp_file_path = "/tmp/k3s-upgrade-plan.yaml"
603
+
604
+ File.write(temp_file_path, manifest)
605
+
606
+ temp_file_path
607
+ end
608
+
609
+ end