hetzner-k3s 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ module Hetzner
2
+ class SSHKey
3
+ def initialize(hetzner_client:, cluster_name:)
4
+ @hetzner_client = hetzner_client
5
+ @cluster_name = cluster_name
6
+ end
7
+
8
+ def create(ssh_key_path:)
9
+ @ssh_key_path = ssh_key_path
10
+
11
+ puts
12
+
13
+ if ssh_key = find_ssh_key
14
+ puts "SSH key already exists, skipping."
15
+ puts
16
+ return ssh_key["id"]
17
+ end
18
+
19
+ puts "Creating SSH key..."
20
+
21
+ response = hetzner_client.post("/ssh_keys", ssh_key_config).body
22
+
23
+ puts "...SSH key created."
24
+ puts
25
+
26
+ JSON.parse(response)["ssh_key"]["id"]
27
+ end
28
+
29
+ def delete
30
+ if ssh_key = find_ssh_key
31
+ puts "Deleting ssh_key..."
32
+ hetzner_client.delete("/ssh_keys", ssh_key["id"])
33
+ puts "...ssh_key deleted."
34
+ else
35
+ puts "SSH key no longer exists, skipping."
36
+ end
37
+
38
+ puts
39
+ end
40
+
41
+ private
42
+
43
+ attr_reader :hetzner_client, :cluster_name, :ssh_key_path
44
+
45
+ def ssh_key_config
46
+ {
47
+ name: cluster_name,
48
+ public_key: File.read(ssh_key_path)
49
+ }
50
+ end
51
+
52
+ def find_ssh_key
53
+ hetzner_client.get("/ssh_keys")["ssh_keys"].detect{ |ssh_key| ssh_key["name"] == cluster_name }
54
+ end
55
+
56
+ end
57
+ end
@@ -0,0 +1,303 @@
1
+ require "thor"
2
+ require "http"
3
+ require "sshkey"
4
+
5
+ require_relative "cluster"
6
+
7
+ module Hetzner
8
+ module K3s
9
+ class CLI < Thor
10
+ def self.exit_on_failure?
11
+ true
12
+ end
13
+
14
+ desc "create-cluster", "Create a k3s cluster in Hetzner Cloud"
15
+ option :config_file, required: true
16
+
17
+ def create_cluster
18
+ validate_config_file :create
19
+
20
+ Cluster.new(hetzner_client: hetzner_client).create configuration: configuration
21
+ end
22
+
23
+ desc "delete-cluster", "Delete an existing k3s cluster in Hetzner Cloud"
24
+ option :config_file, required: true
25
+
26
+ def delete_cluster
27
+ validate_config_file :delete
28
+ Cluster.new(hetzner_client: hetzner_client).delete configuration: configuration
29
+ end
30
+
31
+ desc "upgrade-cluster", "Upgrade an existing k3s cluster in Hetzner Cloud to a new version"
32
+ option :config_file, required: true
33
+ option :new_k3s_version, required: true
34
+ option :force, default: "false"
35
+
36
+ def upgrade_cluster
37
+ validate_config_file :upgrade
38
+ Cluster.new(hetzner_client: hetzner_client).upgrade configuration: configuration, new_k3s_version: options[:new_k3s_version], config_file: options[:config_file]
39
+ end
40
+
41
+ desc "releases", "List available k3s releases"
42
+ def releases
43
+ find_available_releases.each do |release|
44
+ puts release
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ attr_reader :configuration, :hetzner_client, :k3s_version
51
+ attr_accessor :errors, :used_server_types
52
+
53
+ def validate_config_file(action)
54
+ config_file_path = options[:config_file]
55
+
56
+ if File.exists?(config_file_path)
57
+ begin
58
+ @configuration = YAML.load_file(options[:config_file])
59
+ raise "invalid" unless configuration.is_a? Hash
60
+ rescue
61
+ puts "Please ensure that the config file is a correct YAML manifest."
62
+ return
63
+ end
64
+ else
65
+ puts "Please specify a correct path for the config file."
66
+ return
67
+ end
68
+
69
+ @errors = []
70
+ @used_server_types = []
71
+
72
+ validate_token
73
+ validate_cluster_name
74
+ validate_kubeconfig_path
75
+
76
+ case action
77
+ when :create
78
+ validate_ssh_key
79
+ validate_location
80
+ validate_k3s_version
81
+ validate_masters
82
+ validate_worker_node_pools
83
+ validate_all_nodes_must_be_of_same_series
84
+ when :delete
85
+ validate_kubeconfig_path_must_exist
86
+ when :upgrade
87
+ validate_kubeconfig_path_must_exist
88
+ validate_new_k3s_version
89
+ validate_new_k3s_version_must_be_more_recent
90
+ end
91
+
92
+ errors.flatten!
93
+
94
+ unless errors.empty?
95
+ puts "Some information in the configuration file requires your attention:"
96
+ errors.each do |error|
97
+ puts " - #{error}"
98
+ end
99
+
100
+ exit 1
101
+ end
102
+ end
103
+
104
+ def validate_token
105
+ token = configuration.dig("hetzner_token")
106
+ @hetzner_client = Hetzner::Client.new(token: token)
107
+ hetzner_client.get("/locations")
108
+ rescue
109
+ errors << "Invalid Hetzner Cloid token"
110
+ end
111
+
112
+ def validate_cluster_name
113
+ errors << "Cluster name is an invalid format" unless configuration["cluster_name"] =~ /\A([A-Za-z0-9\-\_]+)\Z/
114
+ end
115
+
116
+ def validate_kubeconfig_path
117
+ path = File.expand_path(configuration.dig("kubeconfig_path"))
118
+ errors << "kubeconfig path cannot be a directory" and return if File.directory? path
119
+
120
+ directory = File.dirname(path)
121
+ errors << "Directory #{directory} doesn't exist" unless File.exists? directory
122
+ rescue
123
+ errors << "Invalid path for the kubeconfig"
124
+ end
125
+
126
+ def validate_ssh_key
127
+ path = File.expand_path(configuration.dig("ssh_key_path"))
128
+ errors << "Invalid Public SSH key path" and return unless File.exists? path
129
+
130
+ key = File.read(path)
131
+ errors << "Public SSH key is invalid" unless ::SSHKey.valid_ssh_public_key? key
132
+ rescue
133
+ errors << "Invalid Public SSH key path"
134
+ end
135
+
136
+ def validate_kubeconfig_path_must_exist
137
+ path = File.expand_path configuration.dig("kubeconfig_path")
138
+ errors << "kubeconfig path is invalid" and return unless File.exists? path
139
+ errors << "kubeconfig path cannot be a directory" if File.directory? path
140
+ rescue
141
+ errors << "Invalid kubeconfig path"
142
+ end
143
+
144
+ def server_types
145
+ @server_types ||= hetzner_client.get("/server_types")["server_types"].map{ |server_type| server_type["name"] }
146
+ rescue
147
+ @errors << "Cannot fetch server types with Hetzner API, please try again later"
148
+ false
149
+ end
150
+
151
+ def locations
152
+ @locations ||= hetzner_client.get("/locations")["locations"].map{ |location| location["name"] }
153
+ rescue
154
+ @errors << "Cannot fetch locations with Hetzner API, please try again later"
155
+ false
156
+ end
157
+
158
+ def validate_location
159
+ errors << "Invalid location - available locations: nbg1 (Nuremberg, Germany), fsn1 (Falkenstein, Germany), hel1 (Helsinki, Finland)" unless locations.include? configuration.dig("location")
160
+ end
161
+
162
+ def find_available_releases
163
+ @available_releases ||= begin
164
+ response = HTTP.get("https://api.github.com/repos/k3s-io/k3s/tags").body
165
+ JSON.parse(response).map { |hash| hash["name"] }
166
+ end
167
+ rescue
168
+ errors << "Cannot fetch the releases with Hetzner API, please try again later"
169
+ end
170
+
171
+ def validate_k3s_version
172
+ k3s_version = configuration.dig("k3s_version")
173
+ available_releases = find_available_releases
174
+ errors << "Invalid k3s version" unless available_releases.include? k3s_version
175
+ end
176
+
177
+ def validate_new_k3s_version
178
+ new_k3s_version = options[:new_k3s_version]
179
+ available_releases = find_available_releases
180
+ errors << "The new k3s version is invalid" unless available_releases.include? new_k3s_version
181
+ end
182
+
183
+ def validate_masters
184
+ masters_pool = nil
185
+
186
+ begin
187
+ masters_pool = configuration.dig("masters")
188
+ rescue
189
+ errors << "Invalid masters configuration"
190
+ return
191
+ end
192
+
193
+ if masters_pool.nil?
194
+ errors << "Invalid masters configuration"
195
+ return
196
+ end
197
+
198
+ validate_instance_group masters_pool, workers: false
199
+ end
200
+
201
+ def validate_worker_node_pools
202
+ worker_node_pools = nil
203
+
204
+ begin
205
+ worker_node_pools = configuration.dig("worker_node_pools")
206
+ rescue
207
+ errors << "Invalid node pools configuration"
208
+ return
209
+ end
210
+
211
+ if !worker_node_pools.is_a? Array
212
+ errors << "Invalid node pools configuration"
213
+ elsif worker_node_pools.size == 0
214
+ errors << "At least one node pool is required in order to schedule workloads"
215
+ elsif worker_node_pools.map{ |worker_node_pool| worker_node_pool["name"]}.uniq.size != worker_node_pools.size
216
+ errors << "Each node pool must have an unique name"
217
+ elsif server_types
218
+ worker_node_pools.each do |worker_node_pool|
219
+ validate_instance_group worker_node_pool
220
+ end
221
+ end
222
+ end
223
+
224
+ def validate_all_nodes_must_be_of_same_series
225
+ series = used_server_types.map{ |used_server_type| used_server_type[0..1]}
226
+ errors << "Master and worker node pools must all be of the same server series for networking to function properly (available series: cx, cp, ccx)" unless series.uniq.size == 1
227
+ end
228
+
229
+ def validate_new_k3s_version_must_be_more_recent
230
+ return if options[:force] == "true"
231
+ return unless kubernetes_client
232
+
233
+ begin
234
+ Timeout::timeout(5) do
235
+ servers = kubernetes_client.api("v1").resource("nodes").list
236
+
237
+ if servers.size == 0
238
+ errors << "The cluster seems to have no nodes, nothing to upgrade"
239
+ else
240
+ available_releases = find_available_releases
241
+
242
+ current_k3s_version = servers.first.dig(:status, :nodeInfo, :kubeletVersion)
243
+ current_k3s_version_index = available_releases.index(current_k3s_version) || 1000
244
+
245
+ new_k3s_version = options[:new_k3s_version]
246
+ new_k3s_version_index = available_releases.index(new_k3s_version) || 1000
247
+
248
+ unless new_k3s_version_index < current_k3s_version_index
249
+ errors << "The new k3s version must be more recent than the current one"
250
+ end
251
+ end
252
+ end
253
+
254
+ rescue Timeout::Error
255
+ puts "Cannot upgrade: Unable to fetch nodes from Kubernetes API. Is the cluster online?"
256
+ end
257
+ end
258
+
259
+ def validate_instance_group(instance_group, workers: true)
260
+ instance_group_errors = []
261
+
262
+ instance_group_type = workers ? "Worker mode pool #{instance_group["name"]}" : "Masters pool"
263
+
264
+ unless !workers || instance_group["name"] =~ /\A([A-Za-z0-9\-\_]+)\Z/
265
+ instance_group_errors << "#{instance_group_type} has an invalid name"
266
+ end
267
+
268
+ unless instance_group.is_a? Hash
269
+ instance_group_errors << "#{instance_group_type} is in an invalid format"
270
+ end
271
+
272
+ unless server_types.include?(instance_group["instance_type"])
273
+ instance_group_errors << "#{instance_group_type} has an invalid instance type"
274
+ end
275
+
276
+ if instance_group["instance_count"].is_a? Integer
277
+ if instance_group["instance_count"] < 1
278
+ instance_group_errors << "#{instance_group_type} must have at least one node"
279
+ elsif !workers
280
+ instance_group_errors << "Masters count must equal to 1 for non-HA clusters or an odd number (recommended 3) for an HA cluster" unless instance_group["instance_count"].odd?
281
+ end
282
+ else
283
+ instance_group_errors << "#{instance_group_type} has an invalid instance count"
284
+ end
285
+
286
+ used_server_types << instance_group["instance_type"]
287
+
288
+ errors << instance_group_errors
289
+ end
290
+
291
+ def kubernetes_client
292
+ return @kubernetes_client if @kubernetes_client
293
+
294
+ config_hash = YAML.load_file(File.expand_path(configuration["kubeconfig_path"]))
295
+ config_hash['current-context'] = configuration["cluster_name"]
296
+ @kubernetes_client = K8s::Client.config(K8s::Config.new(config_hash))
297
+ rescue
298
+ errors << "Cannot connect to the Kubernetes cluster"
299
+ false
300
+ end
301
+ end
302
+ end
303
+ end
@@ -0,0 +1,38 @@
1
+ module K8s
2
+ class ResourceClient
3
+ def initialize(transport, api_client, api_resource, namespace: nil, resource_class: K8s::Resource)
4
+ @transport = transport
5
+ @api_client = api_client
6
+ @api_resource = api_resource
7
+ @namespace = namespace
8
+ @resource_class = resource_class
9
+
10
+ if @api_resource.name.include? '/'
11
+ @resource, @subresource = @api_resource.name.split('/', 2)
12
+ else
13
+ @resource = @api_resource.name
14
+ @subresource = nil
15
+ end
16
+
17
+ # fail "Resource #{api_resource.name} is not namespaced" unless api_resource.namespaced || !namespace
18
+ end
19
+
20
+ def path(name = nil, subresource: @subresource, namespace: @namespace)
21
+ namespace_part = namespace ? ['namespaces', namespace] : []
22
+
23
+ if namespaced?
24
+ if name && subresource
25
+ @api_client.path(*namespace_part, @resource, name, subresource)
26
+ elsif name
27
+ @api_client.path(*namespace_part, @resource, name)
28
+ else namespaced?
29
+ @api_client.path(*namespace_part, @resource)
30
+ end
31
+ elsif name
32
+ @api_client.path(@resource, name)
33
+ else
34
+ @api_client.path(@resource)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,609 @@
1
+ require 'thread'
2
+ require 'net/ssh'
3
+ require "securerandom"
4
+ require "base64"
5
+ require "k8s-ruby"
6
+ require 'timeout'
7
+
8
+ require_relative "../infra/client"
9
+ require_relative "../infra/firewall"
10
+ require_relative "../infra/network"
11
+ require_relative "../infra/ssh_key"
12
+ require_relative "../infra/server"
13
+ require_relative "../infra/load_balancer"
14
+
15
+ require_relative "../k3s/client_patch"
16
+
17
+
18
+ class Cluster
19
+ def initialize(hetzner_client:)
20
+ @hetzner_client = hetzner_client
21
+ end
22
+
23
+ def create(configuration:)
24
+ @hetzner_token = configuration.dig("hetzner_token")
25
+ @cluster_name = configuration.dig("cluster_name")
26
+ @kubeconfig_path = File.expand_path(configuration.dig("kubeconfig_path"))
27
+ @ssh_key_path = File.expand_path(configuration.dig("ssh_key_path"))
28
+ @k3s_version = configuration.dig("k3s_version")
29
+ @masters_config = configuration.dig("masters")
30
+ @worker_node_pools = configuration.dig("worker_node_pools")
31
+ @location = configuration.dig("location")
32
+ @flannel_interface = find_flannel_interface(configuration.dig("masters")["instance_type"])
33
+ @servers = []
34
+
35
+ create_resources
36
+
37
+ deploy_kubernetes
38
+
39
+ sleep 10
40
+
41
+ deploy_cloud_controller_manager
42
+ deploy_csi_driver
43
+ deploy_system_upgrade_controller
44
+ end
45
+
46
+ def delete(configuration:)
47
+ @cluster_name = configuration.dig("cluster_name")
48
+ @kubeconfig_path = File.expand_path(configuration.dig("kubeconfig_path"))
49
+
50
+ delete_resources
51
+ end
52
+
53
+ def upgrade(configuration:, new_k3s_version:, config_file:)
54
+ @configuration = configuration
55
+ @cluster_name = configuration.dig("cluster_name")
56
+ @kubeconfig_path = File.expand_path(configuration.dig("kubeconfig_path"))
57
+ @new_k3s_version = new_k3s_version
58
+ @config_file = config_file
59
+
60
+ upgrade_cluster
61
+ end
62
+
63
+ private
64
+
65
+ attr_accessor :servers
66
+
67
+ attr_reader :hetzner_client, :cluster_name, :kubeconfig_path, :k3s_version,
68
+ :masters_config, :worker_node_pools,
69
+ :location, :flannel_interface, :ssh_key_path, :kubernetes_client,
70
+ :hetzner_token, :tls_sans, :new_k3s_version, :configuration,
71
+ :config_file
72
+
73
+
74
+ def latest_k3s_version
75
+ response = HTTP.get("https://api.github.com/repos/k3s-io/k3s/tags").body
76
+ JSON.parse(response).first["name"]
77
+ end
78
+
79
+ def create_resources
80
+ firewall_id = Hetzner::Firewall.new(
81
+ hetzner_client: hetzner_client,
82
+ cluster_name: cluster_name
83
+ ).create
84
+
85
+ network_id = Hetzner::Network.new(
86
+ hetzner_client: hetzner_client,
87
+ cluster_name: cluster_name
88
+ ).create
89
+
90
+ ssh_key_id = Hetzner::SSHKey.new(
91
+ hetzner_client: hetzner_client,
92
+ cluster_name: cluster_name
93
+ ).create(ssh_key_path: ssh_key_path)
94
+
95
+ server_configs = []
96
+
97
+ master_instance_type = masters_config["instance_type"]
98
+ masters_count = masters_config["instance_count"]
99
+
100
+ masters_count.times do |i|
101
+ server_configs << {
102
+ location: location,
103
+ instance_type: master_instance_type,
104
+ instance_id: "master#{i+1}",
105
+ firewall_id: firewall_id,
106
+ network_id: network_id,
107
+ ssh_key_id: ssh_key_id
108
+ }
109
+ end
110
+
111
+ if masters_count > 1
112
+ Hetzner::LoadBalancer.new(
113
+ hetzner_client: hetzner_client,
114
+ cluster_name: cluster_name
115
+ ).create(location: location, network_id: network_id)
116
+ end
117
+
118
+ worker_node_pools.each do |worker_node_pool|
119
+ worker_node_pool_name = worker_node_pool["name"]
120
+ worker_instance_type = worker_node_pool["instance_type"]
121
+ worker_count = worker_node_pool["instance_count"]
122
+
123
+ worker_count.times do |i|
124
+ server_configs << {
125
+ location: location,
126
+ instance_type: worker_instance_type,
127
+ instance_id: "pool-#{worker_node_pool_name}-worker#{i+1}",
128
+ firewall_id: firewall_id,
129
+ network_id: network_id,
130
+ ssh_key_id: ssh_key_id
131
+ }
132
+ end
133
+ end
134
+
135
+ threads = server_configs.map do |server_config|
136
+ Thread.new do
137
+ servers << Hetzner::Server.new(hetzner_client: hetzner_client, cluster_name: cluster_name).create(server_config)
138
+ end
139
+ end
140
+
141
+ threads.each(&:join)
142
+
143
+ puts
144
+ threads = servers.map do |server|
145
+ Thread.new { wait_for_ssh server }
146
+ end
147
+
148
+ threads.each(&:join)
149
+ end
150
+
151
+ def delete_resources
152
+ begin
153
+ Timeout::timeout(5) do
154
+ servers = kubernetes_client.api("v1").resource("nodes").list
155
+
156
+ threads = servers.map do |node|
157
+ Thread.new do
158
+ Hetzner::Server.new(hetzner_client: hetzner_client, cluster_name: cluster_name).delete(server_name: node.metadata[:name])
159
+ end
160
+ end
161
+
162
+ threads.each(&:join)
163
+ end
164
+ rescue Timeout::Error
165
+ puts "Unable to fetch nodes from Kubernetes API. Is the cluster online?"
166
+ end
167
+
168
+ puts
169
+
170
+ sleep 5 # give time for the servers to actually be deleted
171
+
172
+ Hetzner::Firewall.new(
173
+ hetzner_client: hetzner_client,
174
+ cluster_name: cluster_name
175
+ ).delete
176
+
177
+ Hetzner::Network.new(
178
+ hetzner_client: hetzner_client,
179
+ cluster_name: cluster_name
180
+ ).delete
181
+
182
+ Hetzner::SSHKey.new(
183
+ hetzner_client: hetzner_client,
184
+ cluster_name: cluster_name
185
+ ).delete
186
+
187
+ Hetzner::LoadBalancer.new(
188
+ hetzner_client: hetzner_client,
189
+ cluster_name: cluster_name
190
+ ).delete
191
+
192
+ end
193
+
194
+ def upgrade_cluster
195
+ resources = K8s::Resource.from_files(ugrade_plan_manifest_path)
196
+
197
+ begin
198
+ kubernetes_client.api("upgrade.cattle.io/v1").resource("plans").get("k3s-server", namespace: "system-upgrade")
199
+
200
+ puts "Aborting - an upgrade is already in progress."
201
+
202
+ rescue K8s::Error::NotFound
203
+ resources.each do |resource|
204
+ kubernetes_client.create_resource(resource)
205
+ end
206
+
207
+ puts "Upgrade will now start. Run `watch kubectl get nodes` to see the nodes being upgraded. This should take a few minutes for a small cluster."
208
+ puts "The API server may be briefly unavailable during the upgrade of the controlplane."
209
+
210
+ configuration["k3s_version"] = new_k3s_version
211
+
212
+ File.write(config_file, configuration.to_yaml)
213
+ end
214
+ end
215
+
216
+
217
+ def master_script(master)
218
+ server = master == first_master ? " --cluster-init " : " --server https://#{first_master_private_ip}:6443 "
219
+
220
+ <<~EOF
221
+ curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="#{k3s_version}" K3S_TOKEN="#{k3s_token}" INSTALL_K3S_EXEC="server \
222
+ --disable-cloud-controller \
223
+ --disable servicelb \
224
+ --disable traefik \
225
+ --disable local-storage \
226
+ --disable metrics-server \
227
+ --write-kubeconfig-mode=644 \
228
+ --node-name="$(hostname -f)" \
229
+ --cluster-cidr=10.244.0.0/16 \
230
+ --etcd-expose-metrics=true \
231
+ --kube-controller-manager-arg="address=0.0.0.0" \
232
+ --kube-controller-manager-arg="bind-address=0.0.0.0" \
233
+ --kube-proxy-arg="metrics-bind-address=0.0.0.0" \
234
+ --kube-scheduler-arg="address=0.0.0.0" \
235
+ --kube-scheduler-arg="bind-address=0.0.0.0" \
236
+ --node-taint CriticalAddonsOnly=true:NoExecute \
237
+ --kubelet-arg="cloud-provider=external" \
238
+ --node-ip=$(hostname -I | awk '{print $2}') \
239
+ --node-external-ip=$(hostname -I | awk '{print $1}') \
240
+ --flannel-iface=#{flannel_interface} \
241
+ #{server} #{tls_sans}" sh -
242
+ EOF
243
+ end
244
+
245
+ def worker_script
246
+ <<~EOF
247
+ curl -sfL https://get.k3s.io | K3S_TOKEN="#{k3s_token}" INSTALL_K3S_VERSION="#{k3s_version}" K3S_URL=https://#{first_master_private_ip}:6443 INSTALL_K3S_EXEC="agent \
248
+ --node-name="$(hostname -f)" \
249
+ --kubelet-arg="cloud-provider=external" \
250
+ --node-ip=$(hostname -I | awk '{print $2}') \
251
+ --node-external-ip=$(hostname -I | awk '{print $1}') \
252
+ --flannel-iface=#{flannel_interface}" sh -
253
+ EOF
254
+ end
255
+
256
+ def deploy_kubernetes
257
+ puts
258
+ puts "Deploying k3s to first master (#{first_master["name"]})..."
259
+
260
+ ssh first_master, master_script(first_master), print_output: true
261
+
262
+ puts
263
+ puts "...k3s has been deployed to first master."
264
+
265
+ save_kubeconfig
266
+
267
+ if masters.size > 1
268
+ threads = masters[1..-1].map do |master|
269
+ Thread.new do
270
+ puts
271
+ puts "Deploying k3s to master #{master["name"]}..."
272
+
273
+ ssh master, master_script(master), print_output: true
274
+
275
+ puts
276
+ puts "...k3s has been deployed to master #{master["name"]}."
277
+ end
278
+ end
279
+
280
+ threads.each(&:join)
281
+ end
282
+
283
+ threads = workers.map do |worker|
284
+ Thread.new do
285
+ puts
286
+ puts "Deploying k3s to worker (#{worker["name"]})..."
287
+
288
+ ssh worker, worker_script, print_output: true
289
+
290
+ puts
291
+ puts "...k3s has been deployed to worker (#{worker["name"]})."
292
+ end
293
+ end
294
+
295
+ threads.each(&:join)
296
+ end
297
+
298
+ def deploy_cloud_controller_manager
299
+ puts
300
+ puts "Deploying Hetzner Cloud Controller Manager..."
301
+
302
+ begin
303
+ kubernetes_client.api("v1").resource("secrets").get("hcloud", namespace: "kube-system")
304
+
305
+ rescue K8s::Error::NotFound
306
+ secret = K8s::Resource.new(
307
+ apiVersion: "v1",
308
+ kind: "Secret",
309
+ metadata: {
310
+ namespace: 'kube-system',
311
+ name: 'hcloud',
312
+ },
313
+ data: {
314
+ network: Base64.encode64(cluster_name),
315
+ token: Base64.encode64(hetzner_token)
316
+ }
317
+ )
318
+
319
+ kubernetes_client.api('v1').resource('secrets').create_resource(secret)
320
+ end
321
+
322
+
323
+ manifest = HTTP.follow.get("https://github.com/hetznercloud/hcloud-cloud-controller-manager/releases/latest/download/ccm-networks.yaml").body
324
+
325
+ File.write("/tmp/cloud-controller-manager.yaml", manifest)
326
+
327
+ resources = K8s::Resource.from_files("/tmp/cloud-controller-manager.yaml")
328
+
329
+ begin
330
+ kubernetes_client.api("apps/v1").resource("deployments").get("hcloud-cloud-controller-manager", namespace: "kube-system")
331
+
332
+ resources.each do |resource|
333
+ kubernetes_client.update_resource(resource)
334
+ end
335
+
336
+ rescue K8s::Error::NotFound
337
+ resources.each do |resource|
338
+ kubernetes_client.create_resource(resource)
339
+ end
340
+
341
+ end
342
+
343
+ puts "...Cloud Controller Manager deployed"
344
+ rescue Excon::Error::Socket
345
+ retry
346
+ end
347
+
348
+ def deploy_system_upgrade_controller
349
+ puts
350
+ puts "Deploying k3s System Upgrade Controller..."
351
+
352
+ manifest = HTTP.follow.get("https://github.com/rancher/system-upgrade-controller/releases/download/v0.7.3/system-upgrade-controller.yaml").body
353
+
354
+ File.write("/tmp/system-upgrade-controller.yaml", manifest)
355
+
356
+ resources = K8s::Resource.from_files("/tmp/system-upgrade-controller.yaml")
357
+
358
+ begin
359
+ kubernetes_client.api("apps/v1").resource("deployments").get("system-upgrade-controller", namespace: "system-upgrade")
360
+
361
+ resources.each do |resource|
362
+ kubernetes_client.update_resource(resource)
363
+ end
364
+
365
+ rescue K8s::Error::NotFound
366
+ resources.each do |resource|
367
+ kubernetes_client.create_resource(resource)
368
+ end
369
+
370
+ end
371
+
372
+ puts "...k3s System Upgrade Controller deployed"
373
+ rescue Excon::Error::Socket
374
+ retry
375
+ end
376
+
377
+ def deploy_csi_driver
378
+ puts
379
+ puts "Deploying Hetzner CSI Driver..."
380
+
381
+ begin
382
+ kubernetes_client.api("v1").resource("secrets").get("hcloud-csi", namespace: "kube-system")
383
+
384
+ rescue K8s::Error::NotFound
385
+ secret = K8s::Resource.new(
386
+ apiVersion: "v1",
387
+ kind: "Secret",
388
+ metadata: {
389
+ namespace: 'kube-system',
390
+ name: 'hcloud-csi',
391
+ },
392
+ data: {
393
+ token: Base64.encode64(hetzner_token)
394
+ }
395
+ )
396
+
397
+ kubernetes_client.api('v1').resource('secrets').create_resource(secret)
398
+ end
399
+
400
+
401
+ manifest = HTTP.follow.get("https://raw.githubusercontent.com/hetznercloud/csi-driver/v1.5.3/deploy/kubernetes/hcloud-csi.yml").body
402
+
403
+ File.write("/tmp/csi-driver.yaml", manifest)
404
+
405
+ resources = K8s::Resource.from_files("/tmp/csi-driver.yaml")
406
+
407
+ begin
408
+ kubernetes_client.api("apps/v1").resource("daemonsets").get("hcloud-csi-node", namespace: "kube-system")
409
+
410
+
411
+ resources.each do |resource|
412
+ begin
413
+ kubernetes_client.update_resource(resource)
414
+ rescue K8s::Error::Invalid => e
415
+ raise e unless e.message =~ /must be specified/i
416
+ end
417
+ end
418
+
419
+ rescue K8s::Error::NotFound
420
+ resources.each do |resource|
421
+ kubernetes_client.create_resource(resource)
422
+ end
423
+
424
+ end
425
+
426
+ puts "...CSI Driver deployed"
427
+ rescue Excon::Error::Socket
428
+ retry
429
+ end
430
+
431
+ def wait_for_ssh(server)
432
+ server_name = server["name"]
433
+
434
+ puts "Waiting for server #{server_name} to be up..."
435
+
436
+ loop do
437
+ result = ssh(server, "echo UP")
438
+ break if result == "UP"
439
+ end
440
+
441
+ puts "...server #{server_name} is now up."
442
+ rescue Errno::ENETUNREACH
443
+ retry
444
+ end
445
+
446
+ def ssh(server, command, print_output: false)
447
+ public_ip = server.dig("public_net", "ipv4", "ip")
448
+ output = ""
449
+
450
+ Net::SSH.start(public_ip, "root") do |session|
451
+ session.exec!(command) do |channel, stream, data|
452
+ output << data
453
+ puts data if print_output
454
+ end
455
+ end
456
+
457
+ output.chop
458
+ rescue Net::SSH::ConnectionTimeout
459
+ retry
460
+ rescue Net::SSH::Disconnect => e
461
+ retry unless e.message =~ /Too many authentication failures/
462
+ rescue Errno::ECONNREFUSED
463
+ retry
464
+ end
465
+
466
+ def kubernetes_client
467
+ return @kubernetes_client if @kubernetes_client
468
+
469
+ config_hash = YAML.load_file(kubeconfig_path)
470
+ config_hash['current-context'] = cluster_name
471
+ @kubernetes_client = K8s::Client.config(K8s::Config.new(config_hash))
472
+ end
473
+
474
+ def find_flannel_interface(server_type)
475
+ case server_type[0..1]
476
+ when "cp"
477
+ "enp7s0"
478
+ when "cc"
479
+ "enp7s0"
480
+ when "cx"
481
+ "ens10"
482
+ end
483
+ end
484
+
485
+ def all_servers
486
+ @all_servers ||= hetzner_client.get("/servers")["servers"]
487
+ end
488
+
489
+ def masters
490
+ @masters ||= all_servers.select{ |server| server["name"] =~ /master\d+\Z/ }.sort{ |a, b| a["name"] <=> b["name"] }
491
+ end
492
+
493
+ def workers
494
+ @workers = all_servers.select{ |server| server["name"] =~ /worker\d+\Z/ }.sort{ |a, b| a["name"] <=> b["name"] }
495
+ end
496
+
497
+ def k3s_token
498
+ @k3s_token ||= begin
499
+ token = ssh(first_master, "{ TOKEN=$(< /var/lib/rancher/k3s/server/node-token); } 2> /dev/null; echo $TOKEN")
500
+
501
+ if token.empty?
502
+ SecureRandom.hex
503
+ else
504
+ token.split(":").last
505
+ end
506
+ end
507
+ end
508
+
509
+ def first_master_private_ip
510
+ @first_master_private_ip ||= first_master["private_net"][0]["ip"]
511
+ end
512
+
513
+ def first_master
514
+ masters.first
515
+ end
516
+
517
+ def api_server_ip
518
+ return @api_server_ip if @api_server_ip
519
+
520
+ @api_server_ip = if masters.size > 1
521
+ load_balancer_name = "#{cluster_name}-api"
522
+ load_balancer = hetzner_client.get("/load_balancers")["load_balancers"].detect{ |load_balancer| load_balancer["name"] == load_balancer_name }
523
+ load_balancer["public_net"]["ipv4"]["ip"]
524
+ else
525
+ first_master_public_ip
526
+ end
527
+ end
528
+
529
+ def tls_sans
530
+ sans = " --tls-san=#{api_server_ip} "
531
+
532
+ masters.each do |master|
533
+ master_private_ip = master["private_net"][0]["ip"]
534
+ sans << " --tls-san=#{master_private_ip} "
535
+ end
536
+
537
+ sans
538
+ end
539
+
540
+ def first_master_public_ip
541
+ @first_master_public_ip ||= first_master.dig("public_net", "ipv4", "ip")
542
+ end
543
+
544
+ def save_kubeconfig
545
+ kubeconfig = ssh(first_master, "cat /etc/rancher/k3s/k3s.yaml").
546
+ gsub("127.0.0.1", api_server_ip).
547
+ gsub("default", cluster_name)
548
+
549
+ File.write(kubeconfig_path, kubeconfig)
550
+ end
551
+
552
+ def ugrade_plan_manifest_path
553
+ worker_upgrade_concurrency = workers.size - 1
554
+ worker_upgrade_concurrency = 1 if worker_upgrade_concurrency == 0
555
+
556
+ manifest = <<~EOF
557
+ apiVersion: upgrade.cattle.io/v1
558
+ kind: Plan
559
+ metadata:
560
+ name: k3s-server
561
+ namespace: system-upgrade
562
+ labels:
563
+ k3s-upgrade: server
564
+ spec:
565
+ concurrency: 1
566
+ version: #{new_k3s_version}
567
+ nodeSelector:
568
+ matchExpressions:
569
+ - {key: node-role.kubernetes.io/master, operator: In, values: ["true"]}
570
+ serviceAccountName: system-upgrade
571
+ tolerations:
572
+ - key: "CriticalAddonsOnly"
573
+ operator: "Equal"
574
+ value: "true"
575
+ effect: "NoExecute"
576
+ cordon: true
577
+ upgrade:
578
+ image: rancher/k3s-upgrade
579
+ ---
580
+ apiVersion: upgrade.cattle.io/v1
581
+ kind: Plan
582
+ metadata:
583
+ name: k3s-agent
584
+ namespace: system-upgrade
585
+ labels:
586
+ k3s-upgrade: agent
587
+ spec:
588
+ concurrency: #{worker_upgrade_concurrency}
589
+ version: #{new_k3s_version}
590
+ nodeSelector:
591
+ matchExpressions:
592
+ - {key: node-role.kubernetes.io/master, operator: NotIn, values: ["true"]}
593
+ serviceAccountName: system-upgrade
594
+ prepare:
595
+ image: rancher/k3s-upgrade
596
+ args: ["prepare", "k3s-server"]
597
+ cordon: true
598
+ upgrade:
599
+ image: rancher/k3s-upgrade
600
+ EOF
601
+
602
+ temp_file_path = "/tmp/k3s-upgrade-plan.yaml"
603
+
604
+ File.write(temp_file_path, manifest)
605
+
606
+ temp_file_path
607
+ end
608
+
609
+ end