pangea-kubernetes 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/auto-bump.yml +11 -0
- data/.github/workflows/ci.yml +7 -0
- data/.github/workflows/release.yml +22 -0
- data/.gitignore +6 -0
- data/.rspec +3 -0
- data/AGENTS.md +3 -0
- data/CLAUDE.md +370 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +128 -0
- data/README.md +42 -0
- data/Rakefile +8 -0
- data/flake.lock +2144 -0
- data/flake.nix +30 -0
- data/gemset.nix +312 -0
- data/lib/pangea/kubernetes/architecture.rb +383 -0
- data/lib/pangea/kubernetes/backend_registry.rb +117 -0
- data/lib/pangea/kubernetes/backends/aws_eks.rb +203 -0
- data/lib/pangea/kubernetes/backends/aws_nixos.rb +1347 -0
- data/lib/pangea/kubernetes/backends/azure_aks.rb +145 -0
- data/lib/pangea/kubernetes/backends/azure_nixos.rb +275 -0
- data/lib/pangea/kubernetes/backends/base.rb +116 -0
- data/lib/pangea/kubernetes/backends/gcp_gke.rb +176 -0
- data/lib/pangea/kubernetes/backends/gcp_nixos.rb +240 -0
- data/lib/pangea/kubernetes/backends/hcloud_k3s.rb +181 -0
- data/lib/pangea/kubernetes/backends/nixos_base.rb +235 -0
- data/lib/pangea/kubernetes/bare_metal/cloud_init.rb +196 -0
- data/lib/pangea/kubernetes/bare_metal/cluster_reference.rb +72 -0
- data/lib/pangea/kubernetes/load_balancer.rb +157 -0
- data/lib/pangea/kubernetes/network_backend_registry.rb +54 -0
- data/lib/pangea/kubernetes/network_backends/base.rb +78 -0
- data/lib/pangea/kubernetes/network_backends/cilium.rb +105 -0
- data/lib/pangea/kubernetes/network_backends/vpc_cni.rb +36 -0
- data/lib/pangea/kubernetes/types/argocd_config.rb +55 -0
- data/lib/pangea/kubernetes/types/control_plane_config.rb +65 -0
- data/lib/pangea/kubernetes/types/etcd_config.rb +64 -0
- data/lib/pangea/kubernetes/types/firewall_config.rb +39 -0
- data/lib/pangea/kubernetes/types/k3s_config.rb +112 -0
- data/lib/pangea/kubernetes/types/kernel_config.rb +31 -0
- data/lib/pangea/kubernetes/types/kubernetes_config.rb +129 -0
- data/lib/pangea/kubernetes/types/persistent_state_config.rb +100 -0
- data/lib/pangea/kubernetes/types/pki_config.rb +48 -0
- data/lib/pangea/kubernetes/types/secrets_config.rb +41 -0
- data/lib/pangea/kubernetes/types/vpn_config.rb +188 -0
- data/lib/pangea/kubernetes/types/wait_for_dns_config.rb +35 -0
- data/lib/pangea/kubernetes/types.rb +521 -0
- data/lib/pangea-kubernetes/version.rb +5 -0
- data/lib/pangea-kubernetes.rb +43 -0
- data/pangea-kubernetes.gemspec +33 -0
- metadata +192 -0
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright 2025 The Pangea Authors
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
require 'pangea/kubernetes/bare_metal/cloud_init'
|
|
18
|
+
|
|
19
|
+
module Pangea
|
|
20
|
+
module Kubernetes
|
|
21
|
+
module Backends
|
|
22
|
+
# Template method module for NixOS backends.
|
|
23
|
+
# Extracts shared logic for all 4 NixOS backends (AWS, GCP, Azure, Hetzner).
|
|
24
|
+
#
|
|
25
|
+
# Shared methods (implemented here):
|
|
26
|
+
# - create_cluster: firewall + control plane server loop + cloud-init
|
|
27
|
+
# - create_node_pool: worker cloud-init + scaling group delegation
|
|
28
|
+
# - build_server_cloud_init: full option passthrough from config.nixos
|
|
29
|
+
# - build_agent_cloud_init: worker cloud-init with join_server
|
|
30
|
+
# - base_firewall_ports: cloud-agnostic port definitions
|
|
31
|
+
# - build_secrets_hash: extracts path references from config
|
|
32
|
+
#
|
|
33
|
+
# Template hooks (subclasses implement):
|
|
34
|
+
# - create_compute_instance(ctx, resource_name, config, result, cloud_init, index, tags)
|
|
35
|
+
# - create_worker_pool(ctx, name, cluster_ref, pool_config, cloud_init, tags)
|
|
36
|
+
# - create_firewall_resources(ctx, name, config, network_result, tags)
|
|
37
|
+
# - resolve_image(config)
|
|
38
|
+
# - post_create_instance(ctx, name, server, result, index, tags)
|
|
39
|
+
module NixosBase
|
|
40
|
+
# Kubernetes port definitions shared across all NixOS backends
|
|
41
|
+
COMMON_PORTS = {
|
|
42
|
+
ssh: { port: 22, protocol: :tcp, public: true, description: 'SSH' },
|
|
43
|
+
http: { port: 80, protocol: :tcp, public: true, description: 'HTTP' },
|
|
44
|
+
https: { port: 443, protocol: :tcp, public: true, description: 'HTTPS' },
|
|
45
|
+
api: { port: 6443, protocol: :tcp, public: true, description: 'K8s API' },
|
|
46
|
+
kubelet: { port: 10_250, protocol: :tcp, public: false, description: 'Kubelet' },
|
|
47
|
+
etcd: { port: '2379-2380', protocol: :tcp, public: false, description: 'etcd' },
|
|
48
|
+
vxlan: { port: 8472, protocol: :udp, public: false, description: 'VXLAN' },
|
|
49
|
+
wireguard: { port: 51_820, protocol: :udp, public: false, description: 'WireGuard VPN' }
|
|
50
|
+
}.freeze
|
|
51
|
+
|
|
52
|
+
# Additional ports for vanilla Kubernetes
|
|
53
|
+
VANILLA_K8S_PORTS = {
|
|
54
|
+
controller_manager: { port: 10_257, protocol: :tcp, public: false, description: 'controller-manager' },
|
|
55
|
+
scheduler: { port: 10_259, protocol: :tcp, public: false, description: 'scheduler' }
|
|
56
|
+
}.freeze
|
|
57
|
+
|
|
58
|
+
# Returns all firewall ports for the given distribution
|
|
59
|
+
def base_firewall_ports(distribution)
|
|
60
|
+
ports = COMMON_PORTS.dup
|
|
61
|
+
ports.merge!(VANILLA_K8S_PORTS) if distribution.to_sym == :kubernetes
|
|
62
|
+
ports
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Create control plane server(s) via template hooks.
|
|
66
|
+
# Subclasses override create_compute_instance and create_firewall_resources.
|
|
67
|
+
def nixos_create_cluster(ctx, name, config, result, tags)
|
|
68
|
+
system_pool = config.system_node_pool
|
|
69
|
+
cp_count = [system_pool.min_size, 1].max
|
|
70
|
+
servers = []
|
|
71
|
+
|
|
72
|
+
cp_count.times do |idx|
|
|
73
|
+
cloud_init = build_server_cloud_init(name, config, idx, result)
|
|
74
|
+
|
|
75
|
+
server = create_compute_instance(ctx, name, config, result, cloud_init, idx, tags)
|
|
76
|
+
post_create_instance(ctx, name, server, result, idx, tags)
|
|
77
|
+
|
|
78
|
+
servers << server
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
servers.first
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Placeholder used in agent cloud-init for the join server address.
|
|
85
|
+
# Backends that defer user_data encoding to Terraform (e.g., AWS with
|
|
86
|
+
# terraform_base64encode) replace this with the actual Terraform
|
|
87
|
+
# expression at synthesis time via replace().
|
|
88
|
+
JOIN_SERVER_PLACEHOLDER = '__PANGEA_JOIN_SERVER__'
|
|
89
|
+
|
|
90
|
+
# Create worker node pool via template hooks.
|
|
91
|
+
# Subclasses override create_worker_pool.
|
|
92
|
+
def nixos_create_node_pool(ctx, name, cluster_ref, pool_config, tags)
|
|
93
|
+
cloud_init = build_agent_cloud_init(name, tags, cluster_ref)
|
|
94
|
+
create_worker_pool(ctx, name, cluster_ref, pool_config, cloud_init, tags)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Build cloud-init for a control plane server with full option passthrough.
|
|
98
|
+
def build_server_cloud_init(name, config, index, result)
|
|
99
|
+
gitops_config = case config.gitops_operator
|
|
100
|
+
when :fluxcd then config.fluxcd&.to_h
|
|
101
|
+
when :argocd then config.argocd&.to_h
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
BareMetal::CloudInit.generate(
|
|
105
|
+
cluster_name: name.to_s,
|
|
106
|
+
distribution: config.distribution,
|
|
107
|
+
profile: config.profile,
|
|
108
|
+
distribution_track: config.distribution_track || config.kubernetes_version,
|
|
109
|
+
role: 'server',
|
|
110
|
+
node_index: index,
|
|
111
|
+
cluster_init: index.zero?,
|
|
112
|
+
network_id: result.network&.dig(:network)&.id,
|
|
113
|
+
fluxcd: config.gitops_operator == :fluxcd ? gitops_config : nil,
|
|
114
|
+
argocd: config.gitops_operator == :argocd ? gitops_config : nil,
|
|
115
|
+
k3s: config.distribution == :k3s ? config.nixos&.k3s&.to_h : nil,
|
|
116
|
+
kubernetes: config.distribution == :kubernetes ? config.nixos&.kubernetes&.to_h : nil,
|
|
117
|
+
secrets: build_secrets_hash(config),
|
|
118
|
+
vpn: config.vpn&.to_h,
|
|
119
|
+
bootstrap_secrets: build_bootstrap_secrets(config),
|
|
120
|
+
persistent_state: config.persistent_state&.to_h
|
|
121
|
+
)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Build cloud-init for a worker/agent node.
|
|
125
|
+
# Workers receive only the k3s_server_token from bootstrap_secrets
|
|
126
|
+
# (they need it to authenticate to the control plane for cluster join).
|
|
127
|
+
#
|
|
128
|
+
# node_index defaults to 'dynamic' — the generated shell script queries
|
|
129
|
+
# EC2 instance metadata at boot time to derive a unique index from the
|
|
130
|
+
# instance ID. This prevents duplicate hostnames when multiple ASG
|
|
131
|
+
# instances share the same launch template. Backends that create
|
|
132
|
+
# individual resources (e.g., Hetzner) can override with a static index.
|
|
133
|
+
#
|
|
134
|
+
# When use_join_placeholder is true, the join_server value is replaced
|
|
135
|
+
# with JOIN_SERVER_PLACEHOLDER. This allows backends that use Terraform
|
|
136
|
+
# functions (e.g., base64encode) to inject the actual Terraform
|
|
137
|
+
# expression via replace() at apply time, avoiding premature encoding
|
|
138
|
+
# of ${...} references by Ruby.
|
|
139
|
+
def build_agent_cloud_init(name, tags, cluster_ref, node_index: 'dynamic', use_join_placeholder: false)
|
|
140
|
+
track = if cluster_ref.respond_to?(:distribution_track) && cluster_ref.distribution_track
|
|
141
|
+
cluster_ref.distribution_track
|
|
142
|
+
else
|
|
143
|
+
tags[:DistributionTrack] || '1.34'
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
agent_secrets = if cluster_ref.respond_to?(:agent_bootstrap_secrets)
|
|
147
|
+
cluster_ref.agent_bootstrap_secrets
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
join_server = use_join_placeholder ? JOIN_SERVER_PLACEHOLDER : cluster_ref.ipv4_address
|
|
151
|
+
|
|
152
|
+
BareMetal::CloudInit.generate(
|
|
153
|
+
cluster_name: name.to_s,
|
|
154
|
+
distribution: tags[:Distribution]&.to_sym || :k3s,
|
|
155
|
+
profile: tags[:Profile] || 'cloud-server',
|
|
156
|
+
distribution_track: track,
|
|
157
|
+
role: 'agent',
|
|
158
|
+
node_index: node_index,
|
|
159
|
+
cluster_init: false,
|
|
160
|
+
join_server: join_server,
|
|
161
|
+
bootstrap_secrets: agent_secrets
|
|
162
|
+
)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Extract secrets path references from config.
|
|
166
|
+
# Returns nil when no secrets are configured.
|
|
167
|
+
def build_secrets_hash(config)
|
|
168
|
+
paths = {}
|
|
169
|
+
|
|
170
|
+
if config.fluxcd
|
|
171
|
+
paths[:flux_ssh_key_path] = config.fluxcd.source_ssh_key_file if config.fluxcd.source_ssh_key_file
|
|
172
|
+
paths[:flux_token_path] = config.fluxcd.source_token_file if config.fluxcd.source_token_file
|
|
173
|
+
paths[:sops_age_key_path] = config.fluxcd.sops_age_key_file if config.fluxcd.sops_age_key_file
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
if config.nixos&.secrets
|
|
177
|
+
secrets = config.nixos.secrets
|
|
178
|
+
paths[:flux_ssh_key_path] ||= secrets.flux_ssh_key_path if secrets.flux_ssh_key_path
|
|
179
|
+
paths[:flux_token_path] ||= secrets.flux_token_path if secrets.flux_token_path
|
|
180
|
+
paths[:sops_age_key_path] ||= secrets.sops_age_key_path if secrets.sops_age_key_path
|
|
181
|
+
paths[:join_token_path] = secrets.join_token_path if secrets.join_token_path
|
|
182
|
+
paths.merge!(secrets.extra_paths) if secrets.extra_paths.any?
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
paths.empty? ? nil : paths
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Extract bootstrap secrets from config for cloud-init delivery.
|
|
189
|
+
# These are written to disk at first boot before sops-nix activates.
|
|
190
|
+
# Returns nil when no bootstrap secrets are configured.
|
|
191
|
+
def build_bootstrap_secrets(config)
|
|
192
|
+
bs = config.bootstrap_secrets
|
|
193
|
+
return nil unless bs.is_a?(Hash) && bs.any?
|
|
194
|
+
return nil if bs.values.all? { |v| v.nil? || (v.is_a?(String) && v.empty?) }
|
|
195
|
+
|
|
196
|
+
bs
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Extract only the secrets workers need to join the cluster.
|
|
200
|
+
# Workers receive:
|
|
201
|
+
# - k3s_server_token: cluster join authentication
|
|
202
|
+
# - nix_github_token: access private flake inputs during nixos-rebuild
|
|
203
|
+
# They do NOT receive flux tokens, SOPS keys, VPN keys, or admin passwords.
|
|
204
|
+
AGENT_BOOTSTRAP_KEYS = %i[k3s_server_token nix_github_token].freeze
|
|
205
|
+
|
|
206
|
+
def build_agent_bootstrap_secrets(config)
|
|
207
|
+
bs = config.bootstrap_secrets
|
|
208
|
+
return nil unless bs.is_a?(Hash)
|
|
209
|
+
|
|
210
|
+
agent_secrets = AGENT_BOOTSTRAP_KEYS.each_with_object({}) do |key, h|
|
|
211
|
+
h[key] = bs[key] if bs[key]
|
|
212
|
+
end
|
|
213
|
+
agent_secrets.empty? ? nil : agent_secrets
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# --- Template hooks (subclasses override) ---
|
|
217
|
+
|
|
218
|
+
# Create a single compute instance. Returns a resource reference.
|
|
219
|
+
def create_compute_instance(_ctx, _name, _config, _result, _cloud_init, _index, _tags)
|
|
220
|
+
raise NotImplementedError, "#{self} must implement create_compute_instance"
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Create a worker pool (ASG, MIG, VMSS, or server loop). Returns a resource reference.
|
|
224
|
+
def create_worker_pool(_ctx, _name, _cluster_ref, _pool_config, _cloud_init, _tags)
|
|
225
|
+
raise NotImplementedError, "#{self} must implement create_worker_pool"
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Post-instance creation hook (e.g., Hetzner network attachment). No-op by default.
|
|
229
|
+
def post_create_instance(_ctx, _name, _server, _result, _index, _tags)
|
|
230
|
+
# no-op — subclasses override when needed
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright 2025 The Pangea Authors
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
require 'json'
|
|
18
|
+
|
|
19
|
+
module Pangea
|
|
20
|
+
module Kubernetes
|
|
21
|
+
module BareMetal
|
|
22
|
+
# Generates user_data for NixOS servers running k3s or vanilla Kubernetes
|
|
23
|
+
# via blackmatter-kubernetes modules.
|
|
24
|
+
#
|
|
25
|
+
# The NixOS boot sequence reads /etc/pangea/cluster-config.json and applies
|
|
26
|
+
# the corresponding blackmatter-kubernetes module (k3s or kubernetes).
|
|
27
|
+
#
|
|
28
|
+
# Config is cloud-agnostic — the same JSON drives k3s/k8s setup on
|
|
29
|
+
# AWS EC2, GCP GCE, Azure VMs, and Hetzner servers.
|
|
30
|
+
#
|
|
31
|
+
# Two output formats:
|
|
32
|
+
# :shell — bash script (NixOS AMIs with amazon-init, default)
|
|
33
|
+
# :cloud_config — #cloud-config YAML (providers with real cloud-init)
|
|
34
|
+
module CloudInit
|
|
35
|
+
class << self
|
|
36
|
+
# Generate user_data for a NixOS Kubernetes node.
|
|
37
|
+
#
|
|
38
|
+
# @param cluster_name [String] Name of the cluster
|
|
39
|
+
# @param distribution [Symbol] :k3s or :kubernetes
|
|
40
|
+
# @param profile [String] blackmatter-kubernetes profile (e.g., 'cilium-standard')
|
|
41
|
+
# @param distribution_track [String] version track (e.g., '1.34')
|
|
42
|
+
# @param role [String] 'server'/'agent' (k3s) or 'control-plane'/'worker' (k8s)
|
|
43
|
+
# @param node_index [Integer] Index within the role group
|
|
44
|
+
# @param cluster_init [Boolean] Whether this is the first server (cluster-init)
|
|
45
|
+
# @param network_id [String, nil] Cloud network ID for private networking
|
|
46
|
+
# @param join_server [String, nil] IP/hostname of the server to join
|
|
47
|
+
# @param fluxcd [Hash, nil] FluxCD bootstrap configuration
|
|
48
|
+
# @param argocd [Hash, nil] ArgoCD bootstrap configuration
|
|
49
|
+
# @param k3s [Hash, nil] K3s distribution options (full passthrough)
|
|
50
|
+
# @param kubernetes [Hash, nil] Vanilla Kubernetes options (full passthrough)
|
|
51
|
+
# @param secrets [Hash, nil] Secrets path references (sops-nix)
|
|
52
|
+
# @param vpn [Hash, nil] VPN configuration (WireGuard links)
|
|
53
|
+
# @param bootstrap_secrets [Hash, nil] Bootstrap secrets (age key, tokens) written at first boot
|
|
54
|
+
# @param persistent_state [Hash, nil] Persistent EBS-volume mount config — kindling discovers + attaches + mounts before k3s starts
|
|
55
|
+
# @param format [Symbol] :shell (NixOS AMIs) or :cloud_config (real cloud-init)
|
|
56
|
+
# @return [String] user_data string
|
|
57
|
+
def generate(cluster_name:, distribution: :k3s, profile: 'cloud-server',
|
|
58
|
+
distribution_track: '1.34', role: 'server', node_index: 0,
|
|
59
|
+
cluster_init: false, network_id: nil, join_server: nil,
|
|
60
|
+
fluxcd: nil, argocd: nil, k3s: nil, kubernetes: nil, secrets: nil,
|
|
61
|
+
vpn: nil, bootstrap_secrets: nil, persistent_state: nil,
|
|
62
|
+
format: :shell)
|
|
63
|
+
config = {
|
|
64
|
+
'cluster_name' => cluster_name,
|
|
65
|
+
'distribution' => distribution.to_s,
|
|
66
|
+
'profile' => profile,
|
|
67
|
+
'distribution_track' => distribution_track,
|
|
68
|
+
'role' => normalize_role(distribution, role),
|
|
69
|
+
'node_index' => node_index,
|
|
70
|
+
'cluster_init' => cluster_init
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
config['network_id'] = network_id if network_id
|
|
74
|
+
config['join_server'] = join_server if join_server
|
|
75
|
+
config['fluxcd'] = fluxcd if fluxcd
|
|
76
|
+
config['argocd'] = stringify_keys_recursive(argocd) if argocd && !argocd.empty?
|
|
77
|
+
config['k3s'] = stringify_keys_recursive(k3s) if k3s && !k3s.empty?
|
|
78
|
+
config['kubernetes'] = stringify_keys_recursive(kubernetes) if kubernetes && !kubernetes.empty?
|
|
79
|
+
config['secrets'] = stringify_keys_recursive(secrets) if secrets && !secrets.empty?
|
|
80
|
+
config['vpn'] = stringify_keys_recursive(vpn) if vpn && !vpn.empty?
|
|
81
|
+
config['bootstrap_secrets'] = stringify_keys_recursive(bootstrap_secrets) if bootstrap_secrets && !bootstrap_secrets.empty?
|
|
82
|
+
config['persistent_state'] = stringify_keys_recursive(persistent_state) if persistent_state && !persistent_state.empty?
|
|
83
|
+
|
|
84
|
+
case format.to_sym
|
|
85
|
+
when :cloud_config
|
|
86
|
+
generate_cloud_config(config)
|
|
87
|
+
else
|
|
88
|
+
generate_shell_script(config)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
# Normalize role names across distributions:
|
|
95
|
+
# k3s: server/agent
|
|
96
|
+
# kubernetes: control-plane/worker
|
|
97
|
+
def normalize_role(distribution, role)
|
|
98
|
+
return role if distribution.to_sym == :k3s
|
|
99
|
+
|
|
100
|
+
case role.to_s
|
|
101
|
+
when 'server' then 'control-plane'
|
|
102
|
+
when 'agent' then 'worker'
|
|
103
|
+
else role.to_s
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def config_path
|
|
108
|
+
'/etc/pangea/cluster-config.json'
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Recursively convert symbol keys to strings for JSON serialization
|
|
112
|
+
def stringify_keys_recursive(obj)
|
|
113
|
+
case obj
|
|
114
|
+
when Hash
|
|
115
|
+
obj.each_with_object({}) { |(k, v), h| h[k.to_s] = stringify_keys_recursive(v) }
|
|
116
|
+
when Array
|
|
117
|
+
obj.map { |v| stringify_keys_recursive(v) }
|
|
118
|
+
else
|
|
119
|
+
obj
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Shell script format — for NixOS AMIs with amazon-init.
|
|
124
|
+
# amazon-init executes user_data as a shell script directly.
|
|
125
|
+
#
|
|
126
|
+
# The script ONLY writes the cluster config JSON. The pre-installed
|
|
127
|
+
# kindling-server-bootstrap.service (baked into the AMI) detects the
|
|
128
|
+
# file via ExecCondition and runs the 13-phase bootstrap automatically.
|
|
129
|
+
# This avoids a slow `nix run` that would re-download/build kindling.
|
|
130
|
+
#
|
|
131
|
+
# When node_index is "dynamic", the script queries EC2 instance
|
|
132
|
+
# metadata to derive a unique index from the instance ID. This is
|
|
133
|
+
# needed for ASG-based workers where all instances share the same
|
|
134
|
+
# launch template and cannot have a Terraform-time unique index.
|
|
135
|
+
def generate_shell_script(config)
|
|
136
|
+
dynamic_index = config['node_index'] == 'dynamic'
|
|
137
|
+
json = config.to_json
|
|
138
|
+
<<~SHELL
|
|
139
|
+
#!/usr/bin/env bash
|
|
140
|
+
set -euo pipefail
|
|
141
|
+
#{dynamic_index_snippet if dynamic_index}
|
|
142
|
+
mkdir -p "$(dirname '#{config_path}')"
|
|
143
|
+
cat > '#{config_path}' << 'PANGEA_CONFIG_EOF'
|
|
144
|
+
#{json}
|
|
145
|
+
PANGEA_CONFIG_EOF
|
|
146
|
+
#{dynamic_index_sed_snippet if dynamic_index}
|
|
147
|
+
chmod 0640 '#{config_path}'
|
|
148
|
+
SHELL
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Shell snippet that resolves a unique node index from EC2 instance
|
|
152
|
+
# metadata. Uses the last 8 hex digits of the instance ID, converted
|
|
153
|
+
# to decimal, modulo 10000 for a reasonable hostname suffix.
|
|
154
|
+
def dynamic_index_snippet
|
|
155
|
+
# Uses single-quoted heredoc so Ruby does NOT interpolate.
|
|
156
|
+
# Shell expands ${} at runtime.
|
|
157
|
+
# IMPORTANT: avoid bash 16#hex syntax — the # breaks Terraform's
|
|
158
|
+
# expression parser when embedded in base64encode(replace(...)).
|
|
159
|
+
# Use printf '%d' "0x..." instead for hex-to-decimal conversion.
|
|
160
|
+
<<~'BASH'.chomp
|
|
161
|
+
IMDS_TOKEN=$(curl -sf -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30" 2>/dev/null || true)
|
|
162
|
+
INSTANCE_ID=$(curl -sf -H "X-aws-ec2-metadata-token: ${IMDS_TOKEN}" "http://169.254.169.254/latest/meta-data/instance-id" 2>/dev/null || echo "i-unknown0000")
|
|
163
|
+
HEX_SUFFIX="${INSTANCE_ID: -8}"
|
|
164
|
+
NODE_INDEX=$(printf '%d' "0x${HEX_SUFFIX}" 2>/dev/null || echo 0)
|
|
165
|
+
NODE_INDEX=$((NODE_INDEX % 10000))
|
|
166
|
+
BASH
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Shell snippet that replaces the "dynamic" sentinel in the config JSON
|
|
170
|
+
# with the resolved NODE_INDEX value. Uses Ruby interpolation for the
|
|
171
|
+
# config path, but shell interpolation for NODE_INDEX.
|
|
172
|
+
def dynamic_index_sed_snippet
|
|
173
|
+
# rubocop:disable Style/StringLiterals
|
|
174
|
+
"sed -i \"s/\\\"node_index\\\":\\\"dynamic\\\"/\\\"node_index\\\":${NODE_INDEX}/\" '#{config_path}'"
|
|
175
|
+
# rubocop:enable Style/StringLiterals
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# cloud-config YAML format — for providers with real cloud-init
|
|
179
|
+
# (Hetzner, GCP, Azure, etc.).
|
|
180
|
+
#
|
|
181
|
+
# Only writes the config file. The pre-installed kindling-server-bootstrap
|
|
182
|
+
# service handles the actual bootstrap after cloud-init completes.
|
|
183
|
+
def generate_cloud_config(config)
|
|
184
|
+
<<~YAML
|
|
185
|
+
#cloud-config
|
|
186
|
+
write_files:
|
|
187
|
+
- path: #{config_path}
|
|
188
|
+
content: '#{config.to_json}'
|
|
189
|
+
permissions: '0640'
|
|
190
|
+
YAML
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright 2025 The Pangea Authors
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
module Pangea
|
|
18
|
+
module Kubernetes
|
|
19
|
+
module BareMetal
|
|
20
|
+
# Synthetic cluster reference for unmanaged Kubernetes (k3s, kubeadm, etc.).
|
|
21
|
+
# Unlike managed K8s (EKS, GKE, AKS), bare-metal clusters don't have a
|
|
22
|
+
# single terraform resource representing the cluster. This provides a
|
|
23
|
+
# compatible interface using the primary control plane server.
|
|
24
|
+
class ClusterReference
|
|
25
|
+
attr_reader :name, :control_plane_servers, :worker_servers, :config
|
|
26
|
+
|
|
27
|
+
def initialize(name:, control_plane_servers:, worker_servers: [], config: {})
|
|
28
|
+
@name = name
|
|
29
|
+
@control_plane_servers = control_plane_servers
|
|
30
|
+
@worker_servers = worker_servers
|
|
31
|
+
@config = config
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Primary control plane endpoint
|
|
35
|
+
def endpoint
|
|
36
|
+
primary_server&.ipv4_address
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# k3s API port
|
|
40
|
+
def api_port
|
|
41
|
+
6443
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Full API endpoint URL
|
|
45
|
+
def api_endpoint
|
|
46
|
+
"https://#{endpoint}:#{api_port}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# All node IPs (control plane + workers)
|
|
50
|
+
def all_node_ips
|
|
51
|
+
(control_plane_servers + worker_servers).map(&:ipv4_address)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def to_h
|
|
55
|
+
{
|
|
56
|
+
name: name,
|
|
57
|
+
endpoint: endpoint,
|
|
58
|
+
api_port: api_port,
|
|
59
|
+
control_plane_count: control_plane_servers.size,
|
|
60
|
+
worker_count: worker_servers.size
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
def primary_server
|
|
67
|
+
control_plane_servers.first
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright 2025 The Pangea Authors
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
require 'pangea/kubernetes/types'
|
|
18
|
+
require 'pangea/kubernetes/bare_metal/cloud_init'
|
|
19
|
+
|
|
20
|
+
module Pangea
|
|
21
|
+
module Kubernetes
|
|
22
|
+
# Elastic load balancer tier composition.
|
|
23
|
+
#
|
|
24
|
+
# Two-tier architecture:
|
|
25
|
+
# Tier 1 (External): Fleet of NixOS HAProxy VMs behind Hetzner Cloud LB
|
|
26
|
+
# Tier 2 (In-Cluster): Cilium eBPF (L4) + Istio Gateway (L7)
|
|
27
|
+
#
|
|
28
|
+
# Traffic flow:
|
|
29
|
+
# DNS → Hetzner Cloud LB → NixOS HAProxy fleet → K8s NodePort → Istio Gateway
|
|
30
|
+
#
|
|
31
|
+
# For bare metal: replace Hetzner Cloud LB with NixOS BIRD BGP + keepalived VRRP
|
|
32
|
+
module LoadBalancer
|
|
33
|
+
# Create an elastic load balancer tier for a Kubernetes cluster.
|
|
34
|
+
#
|
|
35
|
+
# @param name [Symbol] LB tier name
|
|
36
|
+
# @param attributes [Hash] Load balancer configuration (see Types::LoadBalancerConfig)
|
|
37
|
+
# @return [Hash] Created resource references
|
|
38
|
+
def elastic_load_balancer(name, attributes = {})
|
|
39
|
+
config = Types::LoadBalancerConfig.new(attributes)
|
|
40
|
+
result = {}
|
|
41
|
+
|
|
42
|
+
tags = {
|
|
43
|
+
LoadBalancer: name.to_s,
|
|
44
|
+
Mode: config.mode,
|
|
45
|
+
ManagedBy: 'Pangea'
|
|
46
|
+
}.merge(config.tags)
|
|
47
|
+
|
|
48
|
+
hcloud_labels = tags.transform_keys { |k| k.to_s.downcase.gsub(/[^a-z0-9_]/, '_') }
|
|
49
|
+
|
|
50
|
+
# Create HAProxy VMs
|
|
51
|
+
result[:haproxy_servers] = create_haproxy_fleet(name, config, hcloud_labels)
|
|
52
|
+
|
|
53
|
+
# Create Hetzner Cloud LB in front of HAProxy fleet (managed mode)
|
|
54
|
+
unless config.bare_metal?
|
|
55
|
+
result[:cloud_lb] = create_hetzner_cloud_lb(name, config, result[:haproxy_servers], hcloud_labels)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
result
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
def create_haproxy_fleet(name, config, labels)
|
|
64
|
+
servers = []
|
|
65
|
+
|
|
66
|
+
config.instance_count.times do |idx|
|
|
67
|
+
user_data = generate_haproxy_cloud_init(name, config, idx)
|
|
68
|
+
|
|
69
|
+
server = hcloud_server(
|
|
70
|
+
:"#{name}_haproxy_#{idx}",
|
|
71
|
+
name: "#{name}-haproxy-#{idx}",
|
|
72
|
+
server_type: config.instance_type,
|
|
73
|
+
image: 'ubuntu-24.04',
|
|
74
|
+
location: config.region,
|
|
75
|
+
user_data: user_data,
|
|
76
|
+
labels: labels.merge(
|
|
77
|
+
'role' => 'haproxy',
|
|
78
|
+
'node_index' => idx.to_s
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
servers << server
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
servers
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def create_hetzner_cloud_lb(name, config, haproxy_servers, labels)
|
|
89
|
+
lb = hcloud_load_balancer(
|
|
90
|
+
:"#{name}_cloud_lb",
|
|
91
|
+
name: "#{name}-cloud-lb",
|
|
92
|
+
load_balancer_type: 'lb11',
|
|
93
|
+
location: config.region,
|
|
94
|
+
labels: labels
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Add targets (HAProxy servers)
|
|
98
|
+
haproxy_servers.each_with_index do |server, idx|
|
|
99
|
+
hcloud_load_balancer_target(
|
|
100
|
+
:"#{name}_lb_target_#{idx}",
|
|
101
|
+
load_balancer_id: lb.id,
|
|
102
|
+
type: 'server',
|
|
103
|
+
server_id: server.id
|
|
104
|
+
)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Add services for each frontend port
|
|
108
|
+
config.frontend_ports.each do |port|
|
|
109
|
+
protocol = port == 443 ? 'https' : 'http'
|
|
110
|
+
hcloud_load_balancer_service(
|
|
111
|
+
:"#{name}_lb_service_#{port}",
|
|
112
|
+
load_balancer_id: lb.id,
|
|
113
|
+
protocol: protocol,
|
|
114
|
+
listen_port: port,
|
|
115
|
+
destination_port: port,
|
|
116
|
+
health_check: {
|
|
117
|
+
protocol: 'tcp',
|
|
118
|
+
port: port,
|
|
119
|
+
interval: config.health_check_interval.to_i
|
|
120
|
+
}
|
|
121
|
+
)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
lb
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def generate_haproxy_cloud_init(name, config, index)
|
|
128
|
+
haproxy_config = {
|
|
129
|
+
'cluster_name' => name.to_s,
|
|
130
|
+
'role' => 'haproxy',
|
|
131
|
+
'node_index' => index,
|
|
132
|
+
'mode' => config.mode,
|
|
133
|
+
'max_connections' => config.max_connections,
|
|
134
|
+
'frontend_ports' => config.frontend_ports,
|
|
135
|
+
'backends' => config.backends
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if config.bare_metal?
|
|
139
|
+
haproxy_config['bgp_asn'] = config.bgp_asn if config.bgp_asn
|
|
140
|
+
haproxy_config['bgp_neighbor'] = config.bgp_neighbor if config.bgp_neighbor
|
|
141
|
+
haproxy_config['vrrp_interface'] = config.vrrp_interface if config.vrrp_interface
|
|
142
|
+
haproxy_config['virtual_ips'] = config.virtual_ips if config.virtual_ips.any?
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
<<~YAML
|
|
146
|
+
#cloud-config
|
|
147
|
+
write_files:
|
|
148
|
+
- path: /etc/pangea/haproxy-config.json
|
|
149
|
+
content: '#{haproxy_config.to_json}'
|
|
150
|
+
permissions: '0644'
|
|
151
|
+
runcmd:
|
|
152
|
+
- ['systemctl', 'start', 'pangea-haproxy-bootstrap']
|
|
153
|
+
YAML
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|