pangea-kubernetes 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/auto-bump.yml +11 -0
  3. data/.github/workflows/ci.yml +7 -0
  4. data/.github/workflows/release.yml +22 -0
  5. data/.gitignore +6 -0
  6. data/.rspec +3 -0
  7. data/AGENTS.md +3 -0
  8. data/CLAUDE.md +370 -0
  9. data/Gemfile +9 -0
  10. data/Gemfile.lock +128 -0
  11. data/README.md +42 -0
  12. data/Rakefile +8 -0
  13. data/flake.lock +2144 -0
  14. data/flake.nix +30 -0
  15. data/gemset.nix +312 -0
  16. data/lib/pangea/kubernetes/architecture.rb +383 -0
  17. data/lib/pangea/kubernetes/backend_registry.rb +117 -0
  18. data/lib/pangea/kubernetes/backends/aws_eks.rb +203 -0
  19. data/lib/pangea/kubernetes/backends/aws_nixos.rb +1347 -0
  20. data/lib/pangea/kubernetes/backends/azure_aks.rb +145 -0
  21. data/lib/pangea/kubernetes/backends/azure_nixos.rb +275 -0
  22. data/lib/pangea/kubernetes/backends/base.rb +116 -0
  23. data/lib/pangea/kubernetes/backends/gcp_gke.rb +176 -0
  24. data/lib/pangea/kubernetes/backends/gcp_nixos.rb +240 -0
  25. data/lib/pangea/kubernetes/backends/hcloud_k3s.rb +181 -0
  26. data/lib/pangea/kubernetes/backends/nixos_base.rb +235 -0
  27. data/lib/pangea/kubernetes/bare_metal/cloud_init.rb +196 -0
  28. data/lib/pangea/kubernetes/bare_metal/cluster_reference.rb +72 -0
  29. data/lib/pangea/kubernetes/load_balancer.rb +157 -0
  30. data/lib/pangea/kubernetes/network_backend_registry.rb +54 -0
  31. data/lib/pangea/kubernetes/network_backends/base.rb +78 -0
  32. data/lib/pangea/kubernetes/network_backends/cilium.rb +105 -0
  33. data/lib/pangea/kubernetes/network_backends/vpc_cni.rb +36 -0
  34. data/lib/pangea/kubernetes/types/argocd_config.rb +55 -0
  35. data/lib/pangea/kubernetes/types/control_plane_config.rb +65 -0
  36. data/lib/pangea/kubernetes/types/etcd_config.rb +64 -0
  37. data/lib/pangea/kubernetes/types/firewall_config.rb +39 -0
  38. data/lib/pangea/kubernetes/types/k3s_config.rb +112 -0
  39. data/lib/pangea/kubernetes/types/kernel_config.rb +31 -0
  40. data/lib/pangea/kubernetes/types/kubernetes_config.rb +129 -0
  41. data/lib/pangea/kubernetes/types/persistent_state_config.rb +100 -0
  42. data/lib/pangea/kubernetes/types/pki_config.rb +48 -0
  43. data/lib/pangea/kubernetes/types/secrets_config.rb +41 -0
  44. data/lib/pangea/kubernetes/types/vpn_config.rb +188 -0
  45. data/lib/pangea/kubernetes/types/wait_for_dns_config.rb +35 -0
  46. data/lib/pangea/kubernetes/types.rb +521 -0
  47. data/lib/pangea-kubernetes/version.rb +5 -0
  48. data/lib/pangea-kubernetes.rb +43 -0
  49. data/pangea-kubernetes.gemspec +33 -0
  50. metadata +192 -0
@@ -0,0 +1,235 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright 2025 The Pangea Authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'pangea/kubernetes/bare_metal/cloud_init'
18
+
19
+ module Pangea
20
+ module Kubernetes
21
+ module Backends
22
+ # Template method module for NixOS backends.
23
+ # Extracts shared logic for all 4 NixOS backends (AWS, GCP, Azure, Hetzner).
24
+ #
25
+ # Shared methods (implemented here):
26
+ # - create_cluster: firewall + control plane server loop + cloud-init
27
+ # - create_node_pool: worker cloud-init + scaling group delegation
28
+ # - build_server_cloud_init: full option passthrough from config.nixos
29
+ # - build_agent_cloud_init: worker cloud-init with join_server
30
+ # - base_firewall_ports: cloud-agnostic port definitions
31
+ # - build_secrets_hash: extracts path references from config
32
+ #
33
+ # Template hooks (subclasses implement):
34
+ # - create_compute_instance(ctx, resource_name, config, result, cloud_init, index, tags)
35
+ # - create_worker_pool(ctx, name, cluster_ref, pool_config, cloud_init, tags)
36
+ # - create_firewall_resources(ctx, name, config, network_result, tags)
37
+ # - resolve_image(config)
38
+ # - post_create_instance(ctx, name, server, result, index, tags)
39
+ module NixosBase
40
+ # Kubernetes port definitions shared across all NixOS backends
41
+ COMMON_PORTS = {
42
+ ssh: { port: 22, protocol: :tcp, public: true, description: 'SSH' },
43
+ http: { port: 80, protocol: :tcp, public: true, description: 'HTTP' },
44
+ https: { port: 443, protocol: :tcp, public: true, description: 'HTTPS' },
45
+ api: { port: 6443, protocol: :tcp, public: true, description: 'K8s API' },
46
+ kubelet: { port: 10_250, protocol: :tcp, public: false, description: 'Kubelet' },
47
+ etcd: { port: '2379-2380', protocol: :tcp, public: false, description: 'etcd' },
48
+ vxlan: { port: 8472, protocol: :udp, public: false, description: 'VXLAN' },
49
+ wireguard: { port: 51_820, protocol: :udp, public: false, description: 'WireGuard VPN' }
50
+ }.freeze
51
+
52
+ # Additional ports for vanilla Kubernetes
53
+ VANILLA_K8S_PORTS = {
54
+ controller_manager: { port: 10_257, protocol: :tcp, public: false, description: 'controller-manager' },
55
+ scheduler: { port: 10_259, protocol: :tcp, public: false, description: 'scheduler' }
56
+ }.freeze
57
+
58
+ # Returns all firewall ports for the given distribution
59
+ def base_firewall_ports(distribution)
60
+ ports = COMMON_PORTS.dup
61
+ ports.merge!(VANILLA_K8S_PORTS) if distribution.to_sym == :kubernetes
62
+ ports
63
+ end
64
+
65
+ # Create control plane server(s) via template hooks.
66
+ # Subclasses override create_compute_instance and create_firewall_resources.
67
+ def nixos_create_cluster(ctx, name, config, result, tags)
68
+ system_pool = config.system_node_pool
69
+ cp_count = [system_pool.min_size, 1].max
70
+ servers = []
71
+
72
+ cp_count.times do |idx|
73
+ cloud_init = build_server_cloud_init(name, config, idx, result)
74
+
75
+ server = create_compute_instance(ctx, name, config, result, cloud_init, idx, tags)
76
+ post_create_instance(ctx, name, server, result, idx, tags)
77
+
78
+ servers << server
79
+ end
80
+
81
+ servers.first
82
+ end
83
+
84
+ # Placeholder used in agent cloud-init for the join server address.
85
+ # Backends that defer user_data encoding to Terraform (e.g., AWS with
86
+ # terraform_base64encode) replace this with the actual Terraform
87
+ # expression at synthesis time via replace().
88
+ JOIN_SERVER_PLACEHOLDER = '__PANGEA_JOIN_SERVER__'
89
+
90
+ # Create worker node pool via template hooks.
91
+ # Subclasses override create_worker_pool.
92
+ def nixos_create_node_pool(ctx, name, cluster_ref, pool_config, tags)
93
+ cloud_init = build_agent_cloud_init(name, tags, cluster_ref)
94
+ create_worker_pool(ctx, name, cluster_ref, pool_config, cloud_init, tags)
95
+ end
96
+
97
+ # Build cloud-init for a control plane server with full option passthrough.
98
+ def build_server_cloud_init(name, config, index, result)
99
+ gitops_config = case config.gitops_operator
100
+ when :fluxcd then config.fluxcd&.to_h
101
+ when :argocd then config.argocd&.to_h
102
+ end
103
+
104
+ BareMetal::CloudInit.generate(
105
+ cluster_name: name.to_s,
106
+ distribution: config.distribution,
107
+ profile: config.profile,
108
+ distribution_track: config.distribution_track || config.kubernetes_version,
109
+ role: 'server',
110
+ node_index: index,
111
+ cluster_init: index.zero?,
112
+ network_id: result.network&.dig(:network)&.id,
113
+ fluxcd: config.gitops_operator == :fluxcd ? gitops_config : nil,
114
+ argocd: config.gitops_operator == :argocd ? gitops_config : nil,
115
+ k3s: config.distribution == :k3s ? config.nixos&.k3s&.to_h : nil,
116
+ kubernetes: config.distribution == :kubernetes ? config.nixos&.kubernetes&.to_h : nil,
117
+ secrets: build_secrets_hash(config),
118
+ vpn: config.vpn&.to_h,
119
+ bootstrap_secrets: build_bootstrap_secrets(config),
120
+ persistent_state: config.persistent_state&.to_h
121
+ )
122
+ end
123
+
124
+ # Build cloud-init for a worker/agent node.
125
+ # Workers receive only the k3s_server_token from bootstrap_secrets
126
+ # (they need it to authenticate to the control plane for cluster join).
127
+ #
128
+ # node_index defaults to 'dynamic' — the generated shell script queries
129
+ # EC2 instance metadata at boot time to derive a unique index from the
130
+ # instance ID. This prevents duplicate hostnames when multiple ASG
131
+ # instances share the same launch template. Backends that create
132
+ # individual resources (e.g., Hetzner) can override with a static index.
133
+ #
134
+ # When use_join_placeholder is true, the join_server value is replaced
135
+ # with JOIN_SERVER_PLACEHOLDER. This allows backends that use Terraform
136
+ # functions (e.g., base64encode) to inject the actual Terraform
137
+ # expression via replace() at apply time, avoiding premature encoding
138
+ # of ${...} references by Ruby.
139
+ def build_agent_cloud_init(name, tags, cluster_ref, node_index: 'dynamic', use_join_placeholder: false)
140
+ track = if cluster_ref.respond_to?(:distribution_track) && cluster_ref.distribution_track
141
+ cluster_ref.distribution_track
142
+ else
143
+ tags[:DistributionTrack] || '1.34'
144
+ end
145
+
146
+ agent_secrets = if cluster_ref.respond_to?(:agent_bootstrap_secrets)
147
+ cluster_ref.agent_bootstrap_secrets
148
+ end
149
+
150
+ join_server = use_join_placeholder ? JOIN_SERVER_PLACEHOLDER : cluster_ref.ipv4_address
151
+
152
+ BareMetal::CloudInit.generate(
153
+ cluster_name: name.to_s,
154
+ distribution: tags[:Distribution]&.to_sym || :k3s,
155
+ profile: tags[:Profile] || 'cloud-server',
156
+ distribution_track: track,
157
+ role: 'agent',
158
+ node_index: node_index,
159
+ cluster_init: false,
160
+ join_server: join_server,
161
+ bootstrap_secrets: agent_secrets
162
+ )
163
+ end
164
+
165
+ # Extract secrets path references from config.
166
+ # Returns nil when no secrets are configured.
167
+ def build_secrets_hash(config)
168
+ paths = {}
169
+
170
+ if config.fluxcd
171
+ paths[:flux_ssh_key_path] = config.fluxcd.source_ssh_key_file if config.fluxcd.source_ssh_key_file
172
+ paths[:flux_token_path] = config.fluxcd.source_token_file if config.fluxcd.source_token_file
173
+ paths[:sops_age_key_path] = config.fluxcd.sops_age_key_file if config.fluxcd.sops_age_key_file
174
+ end
175
+
176
+ if config.nixos&.secrets
177
+ secrets = config.nixos.secrets
178
+ paths[:flux_ssh_key_path] ||= secrets.flux_ssh_key_path if secrets.flux_ssh_key_path
179
+ paths[:flux_token_path] ||= secrets.flux_token_path if secrets.flux_token_path
180
+ paths[:sops_age_key_path] ||= secrets.sops_age_key_path if secrets.sops_age_key_path
181
+ paths[:join_token_path] = secrets.join_token_path if secrets.join_token_path
182
+ paths.merge!(secrets.extra_paths) if secrets.extra_paths.any?
183
+ end
184
+
185
+ paths.empty? ? nil : paths
186
+ end
187
+
188
+ # Extract bootstrap secrets from config for cloud-init delivery.
189
+ # These are written to disk at first boot before sops-nix activates.
190
+ # Returns nil when no bootstrap secrets are configured.
191
+ def build_bootstrap_secrets(config)
192
+ bs = config.bootstrap_secrets
193
+ return nil unless bs.is_a?(Hash) && bs.any?
194
+ return nil if bs.values.all? { |v| v.nil? || (v.is_a?(String) && v.empty?) }
195
+
196
+ bs
197
+ end
198
+
199
+ # Extract only the secrets workers need to join the cluster.
200
+ # Workers receive:
201
+ # - k3s_server_token: cluster join authentication
202
+ # - nix_github_token: access private flake inputs during nixos-rebuild
203
+ # They do NOT receive flux tokens, SOPS keys, VPN keys, or admin passwords.
204
+ AGENT_BOOTSTRAP_KEYS = %i[k3s_server_token nix_github_token].freeze
205
+
206
+ def build_agent_bootstrap_secrets(config)
207
+ bs = config.bootstrap_secrets
208
+ return nil unless bs.is_a?(Hash)
209
+
210
+ agent_secrets = AGENT_BOOTSTRAP_KEYS.each_with_object({}) do |key, h|
211
+ h[key] = bs[key] if bs[key]
212
+ end
213
+ agent_secrets.empty? ? nil : agent_secrets
214
+ end
215
+
216
+ # --- Template hooks (subclasses override) ---
217
+
218
+ # Create a single compute instance. Returns a resource reference.
219
+ def create_compute_instance(_ctx, _name, _config, _result, _cloud_init, _index, _tags)
220
+ raise NotImplementedError, "#{self} must implement create_compute_instance"
221
+ end
222
+
223
+ # Create a worker pool (ASG, MIG, VMSS, or server loop). Returns a resource reference.
224
+ def create_worker_pool(_ctx, _name, _cluster_ref, _pool_config, _cloud_init, _tags)
225
+ raise NotImplementedError, "#{self} must implement create_worker_pool"
226
+ end
227
+
228
+ # Post-instance creation hook (e.g., Hetzner network attachment). No-op by default.
229
+ def post_create_instance(_ctx, _name, _server, _result, _index, _tags)
230
+ # no-op — subclasses override when needed
231
+ end
232
+ end
233
+ end
234
+ end
235
+ end
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright 2025 The Pangea Authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'json'
18
+
19
+ module Pangea
20
+ module Kubernetes
21
+ module BareMetal
22
+ # Generates user_data for NixOS servers running k3s or vanilla Kubernetes
23
+ # via blackmatter-kubernetes modules.
24
+ #
25
+ # The NixOS boot sequence reads /etc/pangea/cluster-config.json and applies
26
+ # the corresponding blackmatter-kubernetes module (k3s or kubernetes).
27
+ #
28
+ # Config is cloud-agnostic — the same JSON drives k3s/k8s setup on
29
+ # AWS EC2, GCP GCE, Azure VMs, and Hetzner servers.
30
+ #
31
+ # Two output formats:
32
+ # :shell — bash script (NixOS AMIs with amazon-init, default)
33
+ # :cloud_config — #cloud-config YAML (providers with real cloud-init)
34
+ module CloudInit
35
+ class << self
36
+ # Generate user_data for a NixOS Kubernetes node.
37
+ #
38
+ # @param cluster_name [String] Name of the cluster
39
+ # @param distribution [Symbol] :k3s or :kubernetes
40
+ # @param profile [String] blackmatter-kubernetes profile (e.g., 'cilium-standard')
41
+ # @param distribution_track [String] version track (e.g., '1.34')
42
+ # @param role [String] 'server'/'agent' (k3s) or 'control-plane'/'worker' (k8s)
43
+ # @param node_index [Integer] Index within the role group
44
+ # @param cluster_init [Boolean] Whether this is the first server (cluster-init)
45
+ # @param network_id [String, nil] Cloud network ID for private networking
46
+ # @param join_server [String, nil] IP/hostname of the server to join
47
+ # @param fluxcd [Hash, nil] FluxCD bootstrap configuration
48
+ # @param argocd [Hash, nil] ArgoCD bootstrap configuration
49
+ # @param k3s [Hash, nil] K3s distribution options (full passthrough)
50
+ # @param kubernetes [Hash, nil] Vanilla Kubernetes options (full passthrough)
51
+ # @param secrets [Hash, nil] Secrets path references (sops-nix)
52
+ # @param vpn [Hash, nil] VPN configuration (WireGuard links)
53
+ # @param bootstrap_secrets [Hash, nil] Bootstrap secrets (age key, tokens) written at first boot
54
+ # @param persistent_state [Hash, nil] Persistent EBS-volume mount config — kindling discovers + attaches + mounts before k3s starts
55
+ # @param format [Symbol] :shell (NixOS AMIs) or :cloud_config (real cloud-init)
56
+ # @return [String] user_data string
57
+ def generate(cluster_name:, distribution: :k3s, profile: 'cloud-server',
58
+ distribution_track: '1.34', role: 'server', node_index: 0,
59
+ cluster_init: false, network_id: nil, join_server: nil,
60
+ fluxcd: nil, argocd: nil, k3s: nil, kubernetes: nil, secrets: nil,
61
+ vpn: nil, bootstrap_secrets: nil, persistent_state: nil,
62
+ format: :shell)
63
+ config = {
64
+ 'cluster_name' => cluster_name,
65
+ 'distribution' => distribution.to_s,
66
+ 'profile' => profile,
67
+ 'distribution_track' => distribution_track,
68
+ 'role' => normalize_role(distribution, role),
69
+ 'node_index' => node_index,
70
+ 'cluster_init' => cluster_init
71
+ }
72
+
73
+ config['network_id'] = network_id if network_id
74
+ config['join_server'] = join_server if join_server
75
+ config['fluxcd'] = fluxcd if fluxcd
76
+ config['argocd'] = stringify_keys_recursive(argocd) if argocd && !argocd.empty?
77
+ config['k3s'] = stringify_keys_recursive(k3s) if k3s && !k3s.empty?
78
+ config['kubernetes'] = stringify_keys_recursive(kubernetes) if kubernetes && !kubernetes.empty?
79
+ config['secrets'] = stringify_keys_recursive(secrets) if secrets && !secrets.empty?
80
+ config['vpn'] = stringify_keys_recursive(vpn) if vpn && !vpn.empty?
81
+ config['bootstrap_secrets'] = stringify_keys_recursive(bootstrap_secrets) if bootstrap_secrets && !bootstrap_secrets.empty?
82
+ config['persistent_state'] = stringify_keys_recursive(persistent_state) if persistent_state && !persistent_state.empty?
83
+
84
+ case format.to_sym
85
+ when :cloud_config
86
+ generate_cloud_config(config)
87
+ else
88
+ generate_shell_script(config)
89
+ end
90
+ end
91
+
92
+ private
93
+
94
+ # Normalize role names across distributions:
95
+ # k3s: server/agent
96
+ # kubernetes: control-plane/worker
97
+ def normalize_role(distribution, role)
98
+ return role if distribution.to_sym == :k3s
99
+
100
+ case role.to_s
101
+ when 'server' then 'control-plane'
102
+ when 'agent' then 'worker'
103
+ else role.to_s
104
+ end
105
+ end
106
+
107
+ def config_path
108
+ '/etc/pangea/cluster-config.json'
109
+ end
110
+
111
+ # Recursively convert symbol keys to strings for JSON serialization
112
+ def stringify_keys_recursive(obj)
113
+ case obj
114
+ when Hash
115
+ obj.each_with_object({}) { |(k, v), h| h[k.to_s] = stringify_keys_recursive(v) }
116
+ when Array
117
+ obj.map { |v| stringify_keys_recursive(v) }
118
+ else
119
+ obj
120
+ end
121
+ end
122
+
123
+ # Shell script format — for NixOS AMIs with amazon-init.
124
+ # amazon-init executes user_data as a shell script directly.
125
+ #
126
+ # The script ONLY writes the cluster config JSON. The pre-installed
127
+ # kindling-server-bootstrap.service (baked into the AMI) detects the
128
+ # file via ExecCondition and runs the 13-phase bootstrap automatically.
129
+ # This avoids a slow `nix run` that would re-download/build kindling.
130
+ #
131
+ # When node_index is "dynamic", the script queries EC2 instance
132
+ # metadata to derive a unique index from the instance ID. This is
133
+ # needed for ASG-based workers where all instances share the same
134
+ # launch template and cannot have a Terraform-time unique index.
135
+ def generate_shell_script(config)
136
+ dynamic_index = config['node_index'] == 'dynamic'
137
+ json = config.to_json
138
+ <<~SHELL
139
+ #!/usr/bin/env bash
140
+ set -euo pipefail
141
+ #{dynamic_index_snippet if dynamic_index}
142
+ mkdir -p "$(dirname '#{config_path}')"
143
+ cat > '#{config_path}' << 'PANGEA_CONFIG_EOF'
144
+ #{json}
145
+ PANGEA_CONFIG_EOF
146
+ #{dynamic_index_sed_snippet if dynamic_index}
147
+ chmod 0640 '#{config_path}'
148
+ SHELL
149
+ end
150
+
151
+ # Shell snippet that resolves a unique node index from EC2 instance
152
+ # metadata. Uses the last 8 hex digits of the instance ID, converted
153
+ # to decimal, modulo 10000 for a reasonable hostname suffix.
154
+ def dynamic_index_snippet
155
+ # Uses single-quoted heredoc so Ruby does NOT interpolate.
156
+ # Shell expands ${} at runtime.
157
+ # IMPORTANT: avoid bash 16#hex syntax — the # breaks Terraform's
158
+ # expression parser when embedded in base64encode(replace(...)).
159
+ # Use printf '%d' "0x..." instead for hex-to-decimal conversion.
160
+ <<~'BASH'.chomp
161
+ IMDS_TOKEN=$(curl -sf -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30" 2>/dev/null || true)
162
+ INSTANCE_ID=$(curl -sf -H "X-aws-ec2-metadata-token: ${IMDS_TOKEN}" "http://169.254.169.254/latest/meta-data/instance-id" 2>/dev/null || echo "i-unknown0000")
163
+ HEX_SUFFIX="${INSTANCE_ID: -8}"
164
+ NODE_INDEX=$(printf '%d' "0x${HEX_SUFFIX}" 2>/dev/null || echo 0)
165
+ NODE_INDEX=$((NODE_INDEX % 10000))
166
+ BASH
167
+ end
168
+
169
+ # Shell snippet that replaces the "dynamic" sentinel in the config JSON
170
+ # with the resolved NODE_INDEX value. Uses Ruby interpolation for the
171
+ # config path, but shell interpolation for NODE_INDEX.
172
+ def dynamic_index_sed_snippet
173
+ # rubocop:disable Style/StringLiterals
174
+ "sed -i \"s/\\\"node_index\\\":\\\"dynamic\\\"/\\\"node_index\\\":${NODE_INDEX}/\" '#{config_path}'"
175
+ # rubocop:enable Style/StringLiterals
176
+ end
177
+
178
+ # cloud-config YAML format — for providers with real cloud-init
179
+ # (Hetzner, GCP, Azure, etc.).
180
+ #
181
+ # Only writes the config file. The pre-installed kindling-server-bootstrap
182
+ # service handles the actual bootstrap after cloud-init completes.
183
+ def generate_cloud_config(config)
184
+ <<~YAML
185
+ #cloud-config
186
+ write_files:
187
+ - path: #{config_path}
188
+ content: '#{config.to_json}'
189
+ permissions: '0640'
190
+ YAML
191
+ end
192
+ end
193
+ end
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright 2025 The Pangea Authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ module Pangea
18
+ module Kubernetes
19
+ module BareMetal
20
+ # Synthetic cluster reference for unmanaged Kubernetes (k3s, kubeadm, etc.).
21
+ # Unlike managed K8s (EKS, GKE, AKS), bare-metal clusters don't have a
22
+ # single terraform resource representing the cluster. This provides a
23
+ # compatible interface using the primary control plane server.
24
+ class ClusterReference
25
+ attr_reader :name, :control_plane_servers, :worker_servers, :config
26
+
27
+ def initialize(name:, control_plane_servers:, worker_servers: [], config: {})
28
+ @name = name
29
+ @control_plane_servers = control_plane_servers
30
+ @worker_servers = worker_servers
31
+ @config = config
32
+ end
33
+
34
+ # Primary control plane endpoint
35
+ def endpoint
36
+ primary_server&.ipv4_address
37
+ end
38
+
39
+ # k3s API port
40
+ def api_port
41
+ 6443
42
+ end
43
+
44
+ # Full API endpoint URL
45
+ def api_endpoint
46
+ "https://#{endpoint}:#{api_port}"
47
+ end
48
+
49
+ # All node IPs (control plane + workers)
50
+ def all_node_ips
51
+ (control_plane_servers + worker_servers).map(&:ipv4_address)
52
+ end
53
+
54
+ def to_h
55
+ {
56
+ name: name,
57
+ endpoint: endpoint,
58
+ api_port: api_port,
59
+ control_plane_count: control_plane_servers.size,
60
+ worker_count: worker_servers.size
61
+ }
62
+ end
63
+
64
+ private
65
+
66
+ def primary_server
67
+ control_plane_servers.first
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright 2025 The Pangea Authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'pangea/kubernetes/types'
18
+ require 'pangea/kubernetes/bare_metal/cloud_init'
19
+
20
+ module Pangea
21
+ module Kubernetes
22
+ # Elastic load balancer tier composition.
23
+ #
24
+ # Two-tier architecture:
25
+ # Tier 1 (External): Fleet of NixOS HAProxy VMs behind Hetzner Cloud LB
26
+ # Tier 2 (In-Cluster): Cilium eBPF (L4) + Istio Gateway (L7)
27
+ #
28
+ # Traffic flow:
29
+ # DNS → Hetzner Cloud LB → NixOS HAProxy fleet → K8s NodePort → Istio Gateway
30
+ #
31
+ # For bare metal: replace Hetzner Cloud LB with NixOS BIRD BGP + keepalived VRRP
32
+ module LoadBalancer
33
+ # Create an elastic load balancer tier for a Kubernetes cluster.
34
+ #
35
+ # @param name [Symbol] LB tier name
36
+ # @param attributes [Hash] Load balancer configuration (see Types::LoadBalancerConfig)
37
+ # @return [Hash] Created resource references
38
+ def elastic_load_balancer(name, attributes = {})
39
+ config = Types::LoadBalancerConfig.new(attributes)
40
+ result = {}
41
+
42
+ tags = {
43
+ LoadBalancer: name.to_s,
44
+ Mode: config.mode,
45
+ ManagedBy: 'Pangea'
46
+ }.merge(config.tags)
47
+
48
+ hcloud_labels = tags.transform_keys { |k| k.to_s.downcase.gsub(/[^a-z0-9_]/, '_') }
49
+
50
+ # Create HAProxy VMs
51
+ result[:haproxy_servers] = create_haproxy_fleet(name, config, hcloud_labels)
52
+
53
+ # Create Hetzner Cloud LB in front of HAProxy fleet (managed mode)
54
+ unless config.bare_metal?
55
+ result[:cloud_lb] = create_hetzner_cloud_lb(name, config, result[:haproxy_servers], hcloud_labels)
56
+ end
57
+
58
+ result
59
+ end
60
+
61
+ private
62
+
63
+ def create_haproxy_fleet(name, config, labels)
64
+ servers = []
65
+
66
+ config.instance_count.times do |idx|
67
+ user_data = generate_haproxy_cloud_init(name, config, idx)
68
+
69
+ server = hcloud_server(
70
+ :"#{name}_haproxy_#{idx}",
71
+ name: "#{name}-haproxy-#{idx}",
72
+ server_type: config.instance_type,
73
+ image: 'ubuntu-24.04',
74
+ location: config.region,
75
+ user_data: user_data,
76
+ labels: labels.merge(
77
+ 'role' => 'haproxy',
78
+ 'node_index' => idx.to_s
79
+ )
80
+ )
81
+
82
+ servers << server
83
+ end
84
+
85
+ servers
86
+ end
87
+
88
+ def create_hetzner_cloud_lb(name, config, haproxy_servers, labels)
89
+ lb = hcloud_load_balancer(
90
+ :"#{name}_cloud_lb",
91
+ name: "#{name}-cloud-lb",
92
+ load_balancer_type: 'lb11',
93
+ location: config.region,
94
+ labels: labels
95
+ )
96
+
97
+ # Add targets (HAProxy servers)
98
+ haproxy_servers.each_with_index do |server, idx|
99
+ hcloud_load_balancer_target(
100
+ :"#{name}_lb_target_#{idx}",
101
+ load_balancer_id: lb.id,
102
+ type: 'server',
103
+ server_id: server.id
104
+ )
105
+ end
106
+
107
+ # Add services for each frontend port
108
+ config.frontend_ports.each do |port|
109
+ protocol = port == 443 ? 'https' : 'http'
110
+ hcloud_load_balancer_service(
111
+ :"#{name}_lb_service_#{port}",
112
+ load_balancer_id: lb.id,
113
+ protocol: protocol,
114
+ listen_port: port,
115
+ destination_port: port,
116
+ health_check: {
117
+ protocol: 'tcp',
118
+ port: port,
119
+ interval: config.health_check_interval.to_i
120
+ }
121
+ )
122
+ end
123
+
124
+ lb
125
+ end
126
+
127
+ def generate_haproxy_cloud_init(name, config, index)
128
+ haproxy_config = {
129
+ 'cluster_name' => name.to_s,
130
+ 'role' => 'haproxy',
131
+ 'node_index' => index,
132
+ 'mode' => config.mode,
133
+ 'max_connections' => config.max_connections,
134
+ 'frontend_ports' => config.frontend_ports,
135
+ 'backends' => config.backends
136
+ }
137
+
138
+ if config.bare_metal?
139
+ haproxy_config['bgp_asn'] = config.bgp_asn if config.bgp_asn
140
+ haproxy_config['bgp_neighbor'] = config.bgp_neighbor if config.bgp_neighbor
141
+ haproxy_config['vrrp_interface'] = config.vrrp_interface if config.vrrp_interface
142
+ haproxy_config['virtual_ips'] = config.virtual_ips if config.virtual_ips.any?
143
+ end
144
+
145
+ <<~YAML
146
+ #cloud-config
147
+ write_files:
148
+ - path: /etc/pangea/haproxy-config.json
149
+ content: '#{haproxy_config.to_json}'
150
+ permissions: '0644'
151
+ runcmd:
152
+ - ['systemctl', 'start', 'pangea-haproxy-bootstrap']
153
+ YAML
154
+ end
155
+ end
156
+ end
157
+ end