nvoi 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/todo/refactor/00-overview.md +171 -0
- data/.claude/todo/refactor/01-objects.md +96 -0
- data/.claude/todo/refactor/02-utils.md +143 -0
- data/.claude/todo/refactor/03-external-cloud.md +164 -0
- data/.claude/todo/refactor/04-external-dns.md +104 -0
- data/.claude/todo/refactor/05-external.md +133 -0
- data/.claude/todo/refactor/06-cli.md +123 -0
- data/.claude/todo/refactor/07-cli-deploy-command.md +177 -0
- data/.claude/todo/refactor/08-cli-deploy-steps.md +201 -0
- data/.claude/todo/refactor/09-cli-delete-command.md +169 -0
- data/.claude/todo/refactor/10-cli-exec-command.md +157 -0
- data/.claude/todo/refactor/11-cli-credentials-command.md +190 -0
- data/.claude/todo/refactor/12-cli-db-command.md +128 -0
- data/.claude/todo/refactor/_target.md +79 -0
- data/.claude/todo/refactor-execution/00-entrypoint.md +49 -0
- data/.claude/todo/refactor-execution/01-objects.md +42 -0
- data/.claude/todo/refactor-execution/02-utils.md +41 -0
- data/.claude/todo/refactor-execution/03-external-cloud.md +38 -0
- data/.claude/todo/refactor-execution/04-external-dns.md +35 -0
- data/.claude/todo/refactor-execution/05-external-other.md +46 -0
- data/.claude/todo/refactor-execution/06-cli-deploy.md +45 -0
- data/.claude/todo/refactor-execution/07-cli-delete.md +43 -0
- data/.claude/todo/refactor-execution/08-cli-exec.md +30 -0
- data/.claude/todo/refactor-execution/09-cli-credentials.md +34 -0
- data/.claude/todo/refactor-execution/10-cli-db.md +31 -0
- data/.claude/todo/refactor-execution/11-cli-router.md +44 -0
- data/.claude/todo/refactor-execution/12-cleanup.md +120 -0
- data/.claude/todo/refactor-execution/_monitoring-strategy.md +126 -0
- data/.claude/todo/scaleway.impl.md +644 -0
- data/.claude/todo/scaleway.reference.md +520 -0
- data/.claude/todos.md +550 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +46 -5
- data/Rakefile +1 -1
- data/doc/config-schema.yaml +44 -11
- data/examples/golang/deploy.enc +0 -0
- data/examples/golang/main.go +18 -0
- data/exe/nvoi +3 -1
- data/ingest +0 -0
- data/lib/nvoi/cli/config/command.rb +219 -0
- data/lib/nvoi/cli/credentials/edit/command.rb +384 -0
- data/lib/nvoi/cli/credentials/show/command.rb +35 -0
- data/lib/nvoi/cli/db/command.rb +308 -0
- data/lib/nvoi/cli/delete/command.rb +75 -0
- data/lib/nvoi/cli/delete/steps/detach_volumes.rb +98 -0
- data/lib/nvoi/cli/delete/steps/teardown_dns.rb +50 -0
- data/lib/nvoi/cli/delete/steps/teardown_firewall.rb +46 -0
- data/lib/nvoi/cli/delete/steps/teardown_network.rb +30 -0
- data/lib/nvoi/cli/delete/steps/teardown_server.rb +50 -0
- data/lib/nvoi/cli/delete/steps/teardown_tunnel.rb +44 -0
- data/lib/nvoi/cli/delete/steps/teardown_volume.rb +61 -0
- data/lib/nvoi/cli/deploy/command.rb +184 -0
- data/lib/nvoi/cli/deploy/steps/build_image.rb +27 -0
- data/lib/nvoi/cli/deploy/steps/cleanup_images.rb +42 -0
- data/lib/nvoi/cli/deploy/steps/configure_tunnel.rb +102 -0
- data/lib/nvoi/cli/deploy/steps/deploy_service.rb +399 -0
- data/lib/nvoi/cli/deploy/steps/provision_network.rb +44 -0
- data/lib/nvoi/cli/deploy/steps/provision_server.rb +143 -0
- data/lib/nvoi/cli/deploy/steps/provision_volume.rb +171 -0
- data/lib/nvoi/cli/deploy/steps/setup_k3s.rb +490 -0
- data/lib/nvoi/cli/exec/command.rb +173 -0
- data/lib/nvoi/cli/logs/command.rb +66 -0
- data/lib/nvoi/cli/onboard/command.rb +761 -0
- data/lib/nvoi/cli/unlock/command.rb +72 -0
- data/lib/nvoi/cli.rb +339 -141
- data/lib/nvoi/config_api/actions/app.rb +53 -0
- data/lib/nvoi/config_api/actions/compute_provider.rb +55 -0
- data/lib/nvoi/config_api/actions/database.rb +70 -0
- data/lib/nvoi/config_api/actions/domain_provider.rb +40 -0
- data/lib/nvoi/config_api/actions/env.rb +32 -0
- data/lib/nvoi/config_api/actions/init.rb +67 -0
- data/lib/nvoi/config_api/actions/secret.rb +32 -0
- data/lib/nvoi/config_api/actions/server.rb +66 -0
- data/lib/nvoi/config_api/actions/service.rb +52 -0
- data/lib/nvoi/config_api/actions/volume.rb +40 -0
- data/lib/nvoi/config_api/base.rb +38 -0
- data/lib/nvoi/config_api/result.rb +26 -0
- data/lib/nvoi/config_api.rb +93 -0
- data/lib/nvoi/errors.rb +68 -50
- data/lib/nvoi/external/cloud/aws.rb +450 -0
- data/lib/nvoi/external/cloud/base.rb +99 -0
- data/lib/nvoi/external/cloud/factory.rb +48 -0
- data/lib/nvoi/external/cloud/hetzner.rb +402 -0
- data/lib/nvoi/external/cloud/scaleway.rb +559 -0
- data/lib/nvoi/external/cloud.rb +15 -0
- data/lib/nvoi/external/containerd.rb +86 -0
- data/lib/nvoi/external/database/mysql.rb +84 -0
- data/lib/nvoi/external/database/postgres.rb +82 -0
- data/lib/nvoi/external/database/provider.rb +65 -0
- data/lib/nvoi/external/database/sqlite.rb +72 -0
- data/lib/nvoi/external/database.rb +22 -0
- data/lib/nvoi/external/dns/cloudflare.rb +310 -0
- data/lib/nvoi/external/kubectl.rb +65 -0
- data/lib/nvoi/external/ssh.rb +106 -0
- data/lib/nvoi/objects/config_override.rb +60 -0
- data/lib/nvoi/objects/configuration.rb +483 -0
- data/lib/nvoi/objects/database.rb +56 -0
- data/lib/nvoi/objects/dns.rb +14 -0
- data/lib/nvoi/objects/firewall.rb +11 -0
- data/lib/nvoi/objects/network.rb +11 -0
- data/lib/nvoi/objects/server.rb +14 -0
- data/lib/nvoi/objects/service_spec.rb +26 -0
- data/lib/nvoi/objects/tunnel.rb +14 -0
- data/lib/nvoi/objects/volume.rb +17 -0
- data/lib/nvoi/utils/config_loader.rb +172 -0
- data/lib/nvoi/utils/constants.rb +61 -0
- data/lib/nvoi/{credentials/manager.rb → utils/credential_store.rb} +16 -16
- data/lib/nvoi/{credentials → utils}/crypto.rb +8 -5
- data/lib/nvoi/{config → utils}/env_resolver.rb +10 -2
- data/lib/nvoi/utils/logger.rb +84 -0
- data/lib/nvoi/{config/naming.rb → utils/namer.rb} +37 -25
- data/lib/nvoi/{deployer → utils}/retry.rb +23 -3
- data/lib/nvoi/utils/templates.rb +62 -0
- data/lib/nvoi/version.rb +1 -1
- data/lib/nvoi.rb +27 -55
- data/templates/app-ingress.yaml.erb +3 -1
- data/templates/error-backend.yaml.erb +134 -0
- metadata +121 -44
- data/examples/golang/deploy.yml +0 -54
- data/lib/nvoi/cloudflare/client.rb +0 -287
- data/lib/nvoi/config/config.rb +0 -248
- data/lib/nvoi/config/loader.rb +0 -102
- data/lib/nvoi/config/ssh_keys.rb +0 -82
- data/lib/nvoi/config/types.rb +0 -274
- data/lib/nvoi/constants.rb +0 -59
- data/lib/nvoi/credentials/editor.rb +0 -272
- data/lib/nvoi/deployer/cleaner.rb +0 -36
- data/lib/nvoi/deployer/image_builder.rb +0 -23
- data/lib/nvoi/deployer/infrastructure.rb +0 -126
- data/lib/nvoi/deployer/orchestrator.rb +0 -146
- data/lib/nvoi/deployer/service_deployer.rb +0 -311
- data/lib/nvoi/deployer/tunnel_manager.rb +0 -57
- data/lib/nvoi/deployer/types.rb +0 -8
- data/lib/nvoi/k8s/renderer.rb +0 -44
- data/lib/nvoi/k8s/templates.rb +0 -29
- data/lib/nvoi/logger.rb +0 -72
- data/lib/nvoi/providers/aws.rb +0 -403
- data/lib/nvoi/providers/base.rb +0 -111
- data/lib/nvoi/providers/hetzner.rb +0 -288
- data/lib/nvoi/providers/hetzner_client.rb +0 -170
- data/lib/nvoi/remote/docker_manager.rb +0 -203
- data/lib/nvoi/remote/ssh_executor.rb +0 -72
- data/lib/nvoi/remote/volume_manager.rb +0 -103
- data/lib/nvoi/service/delete.rb +0 -234
- data/lib/nvoi/service/deploy.rb +0 -80
- data/lib/nvoi/service/exec.rb +0 -144
- data/lib/nvoi/service/provider.rb +0 -36
- data/lib/nvoi/steps/application_deployer.rb +0 -26
- data/lib/nvoi/steps/database_provisioner.rb +0 -60
- data/lib/nvoi/steps/k3s_cluster_setup.rb +0 -105
- data/lib/nvoi/steps/k3s_provisioner.rb +0 -351
- data/lib/nvoi/steps/server_provisioner.rb +0 -43
- data/lib/nvoi/steps/services_provisioner.rb +0 -29
- data/lib/nvoi/steps/tunnel_configurator.rb +0 -66
- data/lib/nvoi/steps/volume_provisioner.rb +0 -154
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Nvoi
|
|
4
|
+
class Cli
|
|
5
|
+
module Deploy
|
|
6
|
+
module Steps
|
|
7
|
+
# ProvisionVolume handles block storage volume provisioning
|
|
8
|
+
class ProvisionVolume
|
|
9
|
+
def initialize(config, provider, log)
|
|
10
|
+
@config = config
|
|
11
|
+
@provider = provider
|
|
12
|
+
@log = log
|
|
13
|
+
@namer = config.namer
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def run
|
|
17
|
+
volumes = collect_volumes
|
|
18
|
+
return if volumes.empty?
|
|
19
|
+
|
|
20
|
+
@log.info "Provisioning %d volume(s)", volumes.size
|
|
21
|
+
|
|
22
|
+
volumes.each do |vol_config|
|
|
23
|
+
provision_volume(vol_config)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
@log.success "All volumes provisioned"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def collect_volumes
|
|
32
|
+
volumes = []
|
|
33
|
+
|
|
34
|
+
@config.deploy.application.servers.each do |server_group, server_config|
|
|
35
|
+
next unless server_config.volumes && !server_config.volumes.empty?
|
|
36
|
+
|
|
37
|
+
resolved_server = @namer.server_name(server_group, 1)
|
|
38
|
+
|
|
39
|
+
server_config.volumes.each do |vol_name, vol_config|
|
|
40
|
+
full_name = @namer.server_volume_name(server_group, vol_name)
|
|
41
|
+
volumes << {
|
|
42
|
+
name: full_name,
|
|
43
|
+
server_name: resolved_server,
|
|
44
|
+
mount_path: @namer.server_volume_host_path(server_group, vol_name),
|
|
45
|
+
size: vol_config.size
|
|
46
|
+
}
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
volumes
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def provision_volume(vol_config)
|
|
54
|
+
@log.info "Provisioning volume: %s", vol_config[:name]
|
|
55
|
+
|
|
56
|
+
# Check if volume already exists
|
|
57
|
+
existing = @provider.get_volume_by_name(vol_config[:name])
|
|
58
|
+
if existing
|
|
59
|
+
@log.info "Volume already exists: %s", vol_config[:name]
|
|
60
|
+
ensure_attached_and_mounted(existing, vol_config)
|
|
61
|
+
return
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Find server to attach to
|
|
65
|
+
server = @provider.find_server(vol_config[:server_name])
|
|
66
|
+
raise Errors::VolumeError, "server not found: #{vol_config[:server_name]}" unless server
|
|
67
|
+
|
|
68
|
+
# Create volume
|
|
69
|
+
opts = Objects::Volume::CreateOptions.new(
|
|
70
|
+
name: vol_config[:name],
|
|
71
|
+
size: vol_config[:size],
|
|
72
|
+
server_id: server.id
|
|
73
|
+
)
|
|
74
|
+
volume = @provider.create_volume(opts)
|
|
75
|
+
|
|
76
|
+
# Attach volume
|
|
77
|
+
@log.info "Attaching volume to server..."
|
|
78
|
+
@provider.attach_volume(volume.id, server.id)
|
|
79
|
+
|
|
80
|
+
# Mount volume on server
|
|
81
|
+
mount_volume(server.public_ipv4, volume, vol_config[:mount_path])
|
|
82
|
+
|
|
83
|
+
@log.success "Volume provisioned and mounted: %s", vol_config[:name]
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def ensure_attached_and_mounted(volume, vol_config)
|
|
87
|
+
server = @provider.find_server(vol_config[:server_name])
|
|
88
|
+
return unless server
|
|
89
|
+
|
|
90
|
+
# Attach if not attached
|
|
91
|
+
if volume.server_id.nil? || volume.server_id.empty?
|
|
92
|
+
@log.info "Attaching existing volume to server..."
|
|
93
|
+
@provider.attach_volume(volume.id, server.id)
|
|
94
|
+
volume = @provider.get_volume(volume.id)
|
|
95
|
+
else
|
|
96
|
+
@log.info "Volume already attached to server"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Mount if not mounted
|
|
100
|
+
mount_volume(server.public_ipv4, volume, vol_config[:mount_path])
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def mount_volume(server_ip, volume, mount_path)
|
|
104
|
+
ssh = External::Ssh.new(server_ip, @config.ssh_key_path)
|
|
105
|
+
|
|
106
|
+
# Get device path from provider
|
|
107
|
+
@log.info "Waiting for device path..."
|
|
108
|
+
device_path = @provider.wait_for_device_path(volume.id, ssh)
|
|
109
|
+
raise Errors::VolumeError, "volume #{volume.id} has no device path after attachment" unless device_path
|
|
110
|
+
|
|
111
|
+
@log.info "Device path: %s", device_path
|
|
112
|
+
@log.info "Waiting for device to be available on server..."
|
|
113
|
+
|
|
114
|
+
# Wait for device to be available
|
|
115
|
+
wait_for_device(ssh, device_path)
|
|
116
|
+
|
|
117
|
+
@log.info "Mounting volume at %s", mount_path
|
|
118
|
+
|
|
119
|
+
# Check if already mounted at target path
|
|
120
|
+
mount_check = ssh.execute("mountpoint -q #{mount_path} && echo 'mounted' || echo 'not'").strip
|
|
121
|
+
if mount_check == "mounted"
|
|
122
|
+
@log.info "Volume already mounted at %s", mount_path
|
|
123
|
+
return
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Create mount point
|
|
127
|
+
ssh.execute("sudo mkdir -p #{mount_path}")
|
|
128
|
+
|
|
129
|
+
# Check if device has filesystem
|
|
130
|
+
fs_check = ssh.execute("sudo blkid #{device_path} || true")
|
|
131
|
+
if fs_check.empty? || !fs_check.include?("TYPE=")
|
|
132
|
+
# Format with XFS
|
|
133
|
+
@log.info "Formatting volume with XFS"
|
|
134
|
+
ssh.execute("sudo mkfs.xfs #{device_path}")
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Mount
|
|
138
|
+
ssh.execute("sudo mount #{device_path} #{mount_path}")
|
|
139
|
+
|
|
140
|
+
# Add to fstab using UUID (more reliable than device path)
|
|
141
|
+
fstab_check = ssh.execute("grep '#{mount_path}' /etc/fstab || true")
|
|
142
|
+
if fstab_check.empty?
|
|
143
|
+
cmd = "UUID=$(sudo blkid -s UUID -o value #{device_path}) && " \
|
|
144
|
+
"echo \"UUID=$UUID #{mount_path} xfs defaults,nofail 0 2\" | sudo tee -a /etc/fstab"
|
|
145
|
+
ssh.execute(cmd)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Verify mount succeeded
|
|
149
|
+
verify_mount(ssh, mount_path)
|
|
150
|
+
|
|
151
|
+
@log.success "Volume mounted at %s", mount_path
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def wait_for_device(ssh, device_path)
|
|
155
|
+
ready = Utils::Retry.poll(max_attempts: 30, interval: 2) do
|
|
156
|
+
check = ssh.execute("test -b #{device_path} && echo 'ready' || true")
|
|
157
|
+
check.strip == "ready"
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
raise Errors::VolumeError, "device not available: #{device_path}" unless ready
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def verify_mount(ssh, mount_path)
|
|
164
|
+
check = ssh.execute("mountpoint -q #{mount_path} && echo 'mounted' || echo 'not mounted'")
|
|
165
|
+
raise Errors::VolumeError, "volume not mounted at #{mount_path}" unless check.strip == "mounted"
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Nvoi
|
|
4
|
+
class Cli
|
|
5
|
+
module Deploy
|
|
6
|
+
module Steps
|
|
7
|
+
# SetupK3s handles K3s cluster installation and configuration
|
|
8
|
+
class SetupK3s
|
|
9
|
+
def initialize(config, provider, log, main_server_ip)
|
|
10
|
+
@config = config
|
|
11
|
+
@provider = provider
|
|
12
|
+
@log = log
|
|
13
|
+
@main_server_ip = main_server_ip
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def run
|
|
17
|
+
@log.info "Setting up K3s cluster"
|
|
18
|
+
|
|
19
|
+
# Find master server group
|
|
20
|
+
master_group, master_config = find_master_group
|
|
21
|
+
raise Errors::K8sError, "no master server group found" unless master_group
|
|
22
|
+
|
|
23
|
+
# Setup K3s on master
|
|
24
|
+
master_name = @config.namer.server_name(master_group, 1)
|
|
25
|
+
master = @provider.find_server(master_name)
|
|
26
|
+
raise Errors::K8sError, "master server not found: #{master_name}" unless master
|
|
27
|
+
|
|
28
|
+
master_ssh = External::Ssh.new(master.public_ipv4, @config.ssh_key_path)
|
|
29
|
+
|
|
30
|
+
# Provision master
|
|
31
|
+
cluster_token, master_private_ip = provision_master(master_ssh, master_group, master_name, master.private_ipv4)
|
|
32
|
+
|
|
33
|
+
# Setup workers
|
|
34
|
+
@config.deploy.application.servers.each do |group_name, group_config|
|
|
35
|
+
next if group_name == master_group
|
|
36
|
+
next unless group_config
|
|
37
|
+
|
|
38
|
+
count = group_config.count.positive? ? group_config.count : 1
|
|
39
|
+
|
|
40
|
+
(1..count).each do |i|
|
|
41
|
+
worker_name = @config.namer.server_name(group_name, i)
|
|
42
|
+
setup_worker(worker_name, group_name, cluster_token, master_private_ip, master_ssh)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
@log.success "K3s cluster setup complete"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def find_master_group
|
|
52
|
+
@config.deploy.application.servers.each do |name, cfg|
|
|
53
|
+
return [name, cfg] if cfg&.master
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# If only one group, use it as master
|
|
57
|
+
if @config.deploy.application.servers.size == 1
|
|
58
|
+
return @config.deploy.application.servers.first
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
nil
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def provision_master(ssh, server_role, server_name, private_ip)
|
|
65
|
+
wait_for_cloud_init(ssh)
|
|
66
|
+
|
|
67
|
+
# Discover private IP via SSH if not provided by provider
|
|
68
|
+
private_ip ||= discover_private_ip(ssh)
|
|
69
|
+
raise Errors::K8sError, "server has no private IP - ensure network is attached" unless private_ip
|
|
70
|
+
|
|
71
|
+
# Check if K3s is already running
|
|
72
|
+
begin
|
|
73
|
+
ssh.execute("systemctl is-active k3s")
|
|
74
|
+
@log.info "K3s already running, skipping installation"
|
|
75
|
+
setup_kubeconfig(ssh)
|
|
76
|
+
token = get_cluster_token(ssh)
|
|
77
|
+
return [token, private_ip]
|
|
78
|
+
rescue Errors::SshCommandError
|
|
79
|
+
# Not running, continue installation
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
@log.info "Installing K3s server"
|
|
83
|
+
|
|
84
|
+
private_iface = get_interface_for_ip(ssh, private_ip)
|
|
85
|
+
|
|
86
|
+
@log.info "Installing k3s on private IP: %s, interface: %s", private_ip, private_iface
|
|
87
|
+
|
|
88
|
+
# Install Docker for image building
|
|
89
|
+
install_docker(ssh, private_ip)
|
|
90
|
+
|
|
91
|
+
# Configure k3s registries
|
|
92
|
+
configure_registries(ssh)
|
|
93
|
+
|
|
94
|
+
# Install K3s
|
|
95
|
+
install_cmd = <<~CMD
|
|
96
|
+
curl -sfL https://get.k3s.io | sudo sh -s - server \
|
|
97
|
+
--bind-address=#{private_ip} \
|
|
98
|
+
--advertise-address=#{private_ip} \
|
|
99
|
+
--node-ip=#{private_ip} \
|
|
100
|
+
--tls-san=#{private_ip} \
|
|
101
|
+
--flannel-iface=#{private_iface} \
|
|
102
|
+
--flannel-backend=wireguard-native \
|
|
103
|
+
--disable=traefik \
|
|
104
|
+
--write-kubeconfig-mode=644 \
|
|
105
|
+
--cluster-cidr=10.42.0.0/16 \
|
|
106
|
+
--service-cidr=10.43.0.0/16
|
|
107
|
+
CMD
|
|
108
|
+
|
|
109
|
+
ssh.execute(install_cmd, stream: true)
|
|
110
|
+
@log.success "K3s server installed"
|
|
111
|
+
|
|
112
|
+
setup_kubeconfig(ssh, private_ip)
|
|
113
|
+
wait_for_k3s_ready(ssh)
|
|
114
|
+
|
|
115
|
+
# Label master node
|
|
116
|
+
label_node(ssh, server_name, { "nvoi.io/server-name" => server_role })
|
|
117
|
+
|
|
118
|
+
# Setup registry and ingress
|
|
119
|
+
setup_registry(ssh)
|
|
120
|
+
setup_ingress_controller(ssh)
|
|
121
|
+
|
|
122
|
+
token = get_cluster_token(ssh)
|
|
123
|
+
[token, private_ip]
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def setup_worker(worker_name, group_name, cluster_token, master_private_ip, master_ssh)
|
|
127
|
+
@log.info "Setting up K3s worker: %s", worker_name
|
|
128
|
+
|
|
129
|
+
worker = @provider.find_server(worker_name)
|
|
130
|
+
unless worker
|
|
131
|
+
@log.warning "Worker server not found: %s", worker_name
|
|
132
|
+
return
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
worker_ssh = External::Ssh.new(worker.public_ipv4, @config.ssh_key_path)
|
|
136
|
+
wait_for_cloud_init(worker_ssh)
|
|
137
|
+
|
|
138
|
+
# Discover private IP via SSH if not provided by provider
|
|
139
|
+
private_ip = worker.private_ipv4 || discover_private_ip(worker_ssh)
|
|
140
|
+
unless private_ip
|
|
141
|
+
@log.warning "Worker %s has no private IP, skipping", worker_name
|
|
142
|
+
return
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Check if K3s agent is already running
|
|
146
|
+
begin
|
|
147
|
+
worker_ssh.execute("systemctl is-active k3s-agent")
|
|
148
|
+
@log.info "K3s agent already running on %s", worker_name
|
|
149
|
+
return
|
|
150
|
+
rescue Errors::SshCommandError
|
|
151
|
+
# Not running, continue
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
@log.info "Installing K3s agent on %s", worker_name
|
|
155
|
+
|
|
156
|
+
private_iface = get_interface_for_ip(worker_ssh, private_ip)
|
|
157
|
+
|
|
158
|
+
cmd = <<~CMD
|
|
159
|
+
curl -sfL https://get.k3s.io | K3S_URL="https://#{master_private_ip}:6443" K3S_TOKEN="#{cluster_token}" sh -s - agent \
|
|
160
|
+
--node-ip=#{private_ip} \
|
|
161
|
+
--flannel-iface=#{private_iface} \
|
|
162
|
+
--node-name=#{worker_name}
|
|
163
|
+
CMD
|
|
164
|
+
|
|
165
|
+
worker_ssh.execute(cmd, stream: true)
|
|
166
|
+
@log.success "K3s agent installed on %s", worker_name
|
|
167
|
+
|
|
168
|
+
# Label worker node from master
|
|
169
|
+
label_worker_from_master(master_ssh, worker_name, group_name)
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def wait_for_cloud_init(ssh)
|
|
173
|
+
@log.info "Waiting for cloud-init to complete"
|
|
174
|
+
|
|
175
|
+
ready = Utils::Retry.poll(max_attempts: 60, interval: 5) do
|
|
176
|
+
begin
|
|
177
|
+
output = ssh.execute("test -f /var/lib/cloud/instance/boot-finished && echo 'ready'")
|
|
178
|
+
output.include?("ready")
|
|
179
|
+
rescue Errors::SshCommandError
|
|
180
|
+
false
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
raise Errors::K8sError, "cloud-init timeout" unless ready
|
|
185
|
+
|
|
186
|
+
@log.success "Cloud-init complete"
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def get_cluster_token(ssh)
|
|
190
|
+
@log.info "Retrieving K3s cluster token"
|
|
191
|
+
output = ssh.execute("sudo cat /var/lib/rancher/k3s/server/node-token")
|
|
192
|
+
token = output.strip
|
|
193
|
+
raise Errors::K8sError, "cluster token is empty" if token.empty?
|
|
194
|
+
|
|
195
|
+
@log.success "Cluster token retrieved"
|
|
196
|
+
token
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def discover_private_ip(ssh)
|
|
200
|
+
# Match RFC1918 private ranges, exclude docker/bridge interfaces
|
|
201
|
+
output = ssh.execute("ip addr show | grep -v 'docker\\|br-\\|veth' | grep -E 'inet (10\\.|172\\.(1[6-9]|2[0-9]|3[01])\\.|192\\.168\\.)' | awk '{print $2}' | cut -d/ -f1 | head -1")
|
|
202
|
+
ip = output.strip
|
|
203
|
+
ip.empty? ? nil : ip
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def get_interface_for_ip(ssh, ip)
|
|
207
|
+
# Find the interface that has this IP
|
|
208
|
+
output = ssh.execute("ip addr show | grep 'inet #{ip}/' | awk '{print $NF}'").strip
|
|
209
|
+
return output unless output.empty?
|
|
210
|
+
|
|
211
|
+
# Fallback: find any interface with the IP prefix
|
|
212
|
+
prefix = ip.split(".")[0..2].join(".")
|
|
213
|
+
output = ssh.execute("ip addr show | grep -v 'docker\\|br-\\|veth' | grep 'inet #{prefix}\\.' | awk '{print $NF}' | head -1").strip
|
|
214
|
+
output.empty? ? nil : output
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def install_docker(ssh, private_ip)
|
|
218
|
+
begin
|
|
219
|
+
ssh.execute("systemctl is-active docker")
|
|
220
|
+
@log.info "Docker already running, skipping installation"
|
|
221
|
+
rescue Errors::SshCommandError
|
|
222
|
+
docker_install = <<~CMD
|
|
223
|
+
sudo apt-get update && sudo apt-get install -y docker.io
|
|
224
|
+
sudo systemctl start docker
|
|
225
|
+
sudo systemctl enable docker
|
|
226
|
+
sudo usermod -aG docker deploy
|
|
227
|
+
CMD
|
|
228
|
+
|
|
229
|
+
ssh.execute(docker_install, stream: true)
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Configure Docker for insecure registry
|
|
233
|
+
docker_config = <<~CMD
|
|
234
|
+
sudo mkdir -p /etc/docker
|
|
235
|
+
sudo tee /etc/docker/daemon.json > /dev/null <<EOF
|
|
236
|
+
{"insecure-registries": ["#{private_ip}:5001", "localhost:30500"]}
|
|
237
|
+
EOF
|
|
238
|
+
sudo systemctl restart docker
|
|
239
|
+
CMD
|
|
240
|
+
|
|
241
|
+
ssh.execute(docker_config)
|
|
242
|
+
|
|
243
|
+
# Add registry domain to /etc/hosts
|
|
244
|
+
ssh.execute('grep -q "nvoi-registry.default.svc.cluster.local" /etc/hosts || echo "127.0.0.1 nvoi-registry.default.svc.cluster.local" | sudo tee -a /etc/hosts')
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def configure_registries(ssh)
|
|
248
|
+
config = <<~CMD
|
|
249
|
+
sudo mkdir -p /etc/rancher/k3s
|
|
250
|
+
sudo tee /etc/rancher/k3s/registries.yaml > /dev/null <<'REGEOF'
|
|
251
|
+
mirrors:
|
|
252
|
+
"nvoi-registry.default.svc.cluster.local:5000":
|
|
253
|
+
endpoint:
|
|
254
|
+
- "http://localhost:30500"
|
|
255
|
+
"localhost:30500":
|
|
256
|
+
endpoint:
|
|
257
|
+
- "http://localhost:30500"
|
|
258
|
+
configs:
|
|
259
|
+
"nvoi-registry.default.svc.cluster.local:5000":
|
|
260
|
+
tls:
|
|
261
|
+
insecure_skip_verify: true
|
|
262
|
+
"localhost:30500":
|
|
263
|
+
tls:
|
|
264
|
+
insecure_skip_verify: true
|
|
265
|
+
REGEOF
|
|
266
|
+
CMD
|
|
267
|
+
|
|
268
|
+
ssh.execute(config)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def setup_kubeconfig(ssh, private_ip = nil)
|
|
272
|
+
private_ip ||= discover_private_ip(ssh)
|
|
273
|
+
|
|
274
|
+
cmd = <<~CMD
|
|
275
|
+
sudo mkdir -p /home/deploy/.kube
|
|
276
|
+
sudo cp /etc/rancher/k3s/k3s.yaml /home/deploy/.kube/config
|
|
277
|
+
sudo sed -i "s/127.0.0.1/#{private_ip}/g" /home/deploy/.kube/config
|
|
278
|
+
sudo chown -R deploy:deploy /home/deploy/.kube
|
|
279
|
+
CMD
|
|
280
|
+
|
|
281
|
+
ssh.execute(cmd)
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def wait_for_k3s_ready(ssh)
|
|
285
|
+
@log.info "Waiting for K3s to be ready"
|
|
286
|
+
|
|
287
|
+
ready = Utils::Retry.poll(max_attempts: 60, interval: 5) do
|
|
288
|
+
begin
|
|
289
|
+
output = ssh.execute("kubectl get nodes")
|
|
290
|
+
output.include?("Ready")
|
|
291
|
+
rescue Errors::SshCommandError
|
|
292
|
+
false
|
|
293
|
+
end
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
raise Errors::K8sError, "K3s failed to become ready" unless ready
|
|
297
|
+
|
|
298
|
+
@log.success "K3s is ready"
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def label_node(ssh, node_name, labels)
|
|
302
|
+
actual_node = ssh.execute("kubectl get nodes -o jsonpath='{.items[0].metadata.name}'").strip
|
|
303
|
+
|
|
304
|
+
labels.each do |key, value|
|
|
305
|
+
ssh.execute("kubectl label node #{actual_node} #{key}=#{value} --overwrite")
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def label_worker_from_master(master_ssh, worker_name, group_name)
|
|
310
|
+
@log.info "Labeling worker node: %s", worker_name
|
|
311
|
+
|
|
312
|
+
joined = Utils::Retry.poll(max_attempts: 30, interval: 5) do
|
|
313
|
+
begin
|
|
314
|
+
output = master_ssh.execute("kubectl get nodes -o name")
|
|
315
|
+
output.include?(worker_name)
|
|
316
|
+
rescue Errors::SshCommandError
|
|
317
|
+
false
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
unless joined
|
|
322
|
+
@log.warning "Worker node did not join cluster in time: %s", worker_name
|
|
323
|
+
return
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
master_ssh.execute("kubectl label node #{worker_name} nvoi.io/server-name=#{group_name} --overwrite")
|
|
327
|
+
@log.success "Worker labeled: %s", worker_name
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def setup_registry(ssh)
|
|
331
|
+
@log.info "Setting up in-cluster registry"
|
|
332
|
+
|
|
333
|
+
manifest = <<~YAML
|
|
334
|
+
apiVersion: v1
|
|
335
|
+
kind: Namespace
|
|
336
|
+
metadata:
|
|
337
|
+
name: nvoi-system
|
|
338
|
+
---
|
|
339
|
+
apiVersion: apps/v1
|
|
340
|
+
kind: Deployment
|
|
341
|
+
metadata:
|
|
342
|
+
name: nvoi-registry
|
|
343
|
+
namespace: default
|
|
344
|
+
spec:
|
|
345
|
+
replicas: 1
|
|
346
|
+
selector:
|
|
347
|
+
matchLabels:
|
|
348
|
+
app: nvoi-registry
|
|
349
|
+
template:
|
|
350
|
+
metadata:
|
|
351
|
+
labels:
|
|
352
|
+
app: nvoi-registry
|
|
353
|
+
spec:
|
|
354
|
+
containers:
|
|
355
|
+
- name: registry
|
|
356
|
+
image: registry:2
|
|
357
|
+
ports:
|
|
358
|
+
- containerPort: 5000
|
|
359
|
+
protocol: TCP
|
|
360
|
+
env:
|
|
361
|
+
- name: REGISTRY_HTTP_ADDR
|
|
362
|
+
value: "0.0.0.0:5000"
|
|
363
|
+
volumeMounts:
|
|
364
|
+
- name: registry-storage
|
|
365
|
+
mountPath: /var/lib/registry
|
|
366
|
+
volumes:
|
|
367
|
+
- name: registry-storage
|
|
368
|
+
emptyDir: {}
|
|
369
|
+
---
|
|
370
|
+
apiVersion: v1
|
|
371
|
+
kind: Service
|
|
372
|
+
metadata:
|
|
373
|
+
name: nvoi-registry
|
|
374
|
+
namespace: default
|
|
375
|
+
spec:
|
|
376
|
+
type: NodePort
|
|
377
|
+
ports:
|
|
378
|
+
- port: 5000
|
|
379
|
+
targetPort: 5000
|
|
380
|
+
nodePort: 30500
|
|
381
|
+
selector:
|
|
382
|
+
app: nvoi-registry
|
|
383
|
+
YAML
|
|
384
|
+
|
|
385
|
+
ssh.execute("cat <<'EOF' | kubectl apply -f -\n#{manifest}\nEOF")
|
|
386
|
+
|
|
387
|
+
# Wait for registry to be ready
|
|
388
|
+
@log.info "Waiting for registry to be ready"
|
|
389
|
+
|
|
390
|
+
ready = Utils::Retry.poll(max_attempts: 24, interval: 5) do
|
|
391
|
+
begin
|
|
392
|
+
output = ssh.execute("kubectl get deployment nvoi-registry -n default -o jsonpath='{.status.readyReplicas}'")
|
|
393
|
+
output.strip == "1"
|
|
394
|
+
rescue Errors::SshCommandError
|
|
395
|
+
false
|
|
396
|
+
end
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
raise Errors::K8sError, "registry failed to become ready" unless ready
|
|
400
|
+
|
|
401
|
+
@log.success "In-cluster registry running on :30500"
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
def setup_ingress_controller(ssh)
|
|
405
|
+
@log.info "Setting up NGINX Ingress Controller"
|
|
406
|
+
|
|
407
|
+
ssh.execute("kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.10.0/deploy/static/provider/baremetal/deploy.yaml", stream: true)
|
|
408
|
+
|
|
409
|
+
@log.info "Waiting for NGINX Ingress Controller to be ready"
|
|
410
|
+
|
|
411
|
+
ready = Utils::Retry.poll(max_attempts: 60, interval: 10) do
|
|
412
|
+
begin
|
|
413
|
+
ready_replicas = ssh.execute("kubectl get deployment ingress-nginx-controller -n ingress-nginx -o jsonpath='{.status.readyReplicas}'").strip
|
|
414
|
+
desired_replicas = ssh.execute("kubectl get deployment ingress-nginx-controller -n ingress-nginx -o jsonpath='{.spec.replicas}'").strip
|
|
415
|
+
|
|
416
|
+
!ready_replicas.empty? && !desired_replicas.empty? && ready_replicas == desired_replicas
|
|
417
|
+
rescue Errors::SshCommandError
|
|
418
|
+
false
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
raise Errors::K8sError, "NGINX Ingress Controller failed to become ready" unless ready
|
|
423
|
+
|
|
424
|
+
@log.success "NGINX Ingress Controller is ready"
|
|
425
|
+
deploy_error_backend(ssh)
|
|
426
|
+
configure_custom_error_pages(ssh)
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
def deploy_error_backend(ssh)
|
|
430
|
+
@log.info "Deploying custom error backend"
|
|
431
|
+
|
|
432
|
+
Utils::Templates.apply_manifest(ssh, "error-backend.yaml", {})
|
|
433
|
+
|
|
434
|
+
ready = Utils::Retry.poll(max_attempts: 30, interval: 2) do
|
|
435
|
+
begin
|
|
436
|
+
replicas = ssh.execute("kubectl get deployment nvoi-error-backend -n ingress-nginx -o jsonpath='{.status.readyReplicas}'").strip
|
|
437
|
+
replicas == "1"
|
|
438
|
+
rescue Errors::SshCommandError
|
|
439
|
+
false
|
|
440
|
+
end
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
raise Errors::K8sError, "Error backend failed to become ready" unless ready
|
|
444
|
+
|
|
445
|
+
@log.success "Error backend is ready"
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
def configure_custom_error_pages(ssh)
|
|
449
|
+
@log.info "Configuring custom error pages for 502, 503, 504"
|
|
450
|
+
|
|
451
|
+
patch_cmd = <<~CMD
|
|
452
|
+
kubectl patch configmap ingress-nginx-controller -n ingress-nginx --type merge -p '{"data":{"custom-http-errors":"502,503,504"}}'
|
|
453
|
+
CMD
|
|
454
|
+
|
|
455
|
+
ssh.execute(patch_cmd)
|
|
456
|
+
|
|
457
|
+
check_cmd = "kubectl get deployment ingress-nginx-controller -n ingress-nginx -o jsonpath='{.spec.template.spec.containers[0].args}'"
|
|
458
|
+
current_args = ssh.execute(check_cmd)
|
|
459
|
+
|
|
460
|
+
unless current_args.include?("--default-backend-service")
|
|
461
|
+
patch_deployment = <<~CMD
|
|
462
|
+
kubectl patch deployment ingress-nginx-controller -n ingress-nginx --type=json -p='[
|
|
463
|
+
{"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--default-backend-service=ingress-nginx/nvoi-error-backend"}
|
|
464
|
+
]'
|
|
465
|
+
CMD
|
|
466
|
+
|
|
467
|
+
ssh.execute(patch_deployment)
|
|
468
|
+
|
|
469
|
+
@log.info "Waiting for ingress controller to restart..."
|
|
470
|
+
ready = Utils::Retry.poll(max_attempts: 60, interval: 2) do
|
|
471
|
+
begin
|
|
472
|
+
ready_replicas = ssh.execute("kubectl get deployment ingress-nginx-controller -n ingress-nginx -o jsonpath='{.status.readyReplicas}'").strip
|
|
473
|
+
desired_replicas = ssh.execute("kubectl get deployment ingress-nginx-controller -n ingress-nginx -o jsonpath='{.spec.replicas}'").strip
|
|
474
|
+
!ready_replicas.empty? && !desired_replicas.empty? && ready_replicas == desired_replicas
|
|
475
|
+
rescue Errors::SshCommandError
|
|
476
|
+
false
|
|
477
|
+
end
|
|
478
|
+
end
|
|
479
|
+
raise Errors::K8sError, "Ingress controller failed to restart" unless ready
|
|
480
|
+
else
|
|
481
|
+
@log.info "Custom error backend already configured"
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
@log.success "Custom error pages configured"
|
|
485
|
+
end
|
|
486
|
+
end
|
|
487
|
+
end
|
|
488
|
+
end
|
|
489
|
+
end
|
|
490
|
+
end
|