pangea-kubernetes 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/auto-bump.yml +11 -0
- data/.github/workflows/ci.yml +7 -0
- data/.github/workflows/release.yml +22 -0
- data/.gitignore +6 -0
- data/.rspec +3 -0
- data/AGENTS.md +3 -0
- data/CLAUDE.md +370 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +128 -0
- data/README.md +42 -0
- data/Rakefile +8 -0
- data/flake.lock +2144 -0
- data/flake.nix +30 -0
- data/gemset.nix +312 -0
- data/lib/pangea/kubernetes/architecture.rb +383 -0
- data/lib/pangea/kubernetes/backend_registry.rb +117 -0
- data/lib/pangea/kubernetes/backends/aws_eks.rb +203 -0
- data/lib/pangea/kubernetes/backends/aws_nixos.rb +1347 -0
- data/lib/pangea/kubernetes/backends/azure_aks.rb +145 -0
- data/lib/pangea/kubernetes/backends/azure_nixos.rb +275 -0
- data/lib/pangea/kubernetes/backends/base.rb +116 -0
- data/lib/pangea/kubernetes/backends/gcp_gke.rb +176 -0
- data/lib/pangea/kubernetes/backends/gcp_nixos.rb +240 -0
- data/lib/pangea/kubernetes/backends/hcloud_k3s.rb +181 -0
- data/lib/pangea/kubernetes/backends/nixos_base.rb +235 -0
- data/lib/pangea/kubernetes/bare_metal/cloud_init.rb +196 -0
- data/lib/pangea/kubernetes/bare_metal/cluster_reference.rb +72 -0
- data/lib/pangea/kubernetes/load_balancer.rb +157 -0
- data/lib/pangea/kubernetes/network_backend_registry.rb +54 -0
- data/lib/pangea/kubernetes/network_backends/base.rb +78 -0
- data/lib/pangea/kubernetes/network_backends/cilium.rb +105 -0
- data/lib/pangea/kubernetes/network_backends/vpc_cni.rb +36 -0
- data/lib/pangea/kubernetes/types/argocd_config.rb +55 -0
- data/lib/pangea/kubernetes/types/control_plane_config.rb +65 -0
- data/lib/pangea/kubernetes/types/etcd_config.rb +64 -0
- data/lib/pangea/kubernetes/types/firewall_config.rb +39 -0
- data/lib/pangea/kubernetes/types/k3s_config.rb +112 -0
- data/lib/pangea/kubernetes/types/kernel_config.rb +31 -0
- data/lib/pangea/kubernetes/types/kubernetes_config.rb +129 -0
- data/lib/pangea/kubernetes/types/persistent_state_config.rb +100 -0
- data/lib/pangea/kubernetes/types/pki_config.rb +48 -0
- data/lib/pangea/kubernetes/types/secrets_config.rb +41 -0
- data/lib/pangea/kubernetes/types/vpn_config.rb +188 -0
- data/lib/pangea/kubernetes/types/wait_for_dns_config.rb +35 -0
- data/lib/pangea/kubernetes/types.rb +521 -0
- data/lib/pangea-kubernetes/version.rb +5 -0
- data/lib/pangea-kubernetes.rb +43 -0
- data/pangea-kubernetes.gemspec +33 -0
- metadata +192 -0
|
@@ -0,0 +1,1347 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright 2025 The Pangea Authors
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
|
|
17
|
+
require 'json'
|
|
18
|
+
require 'pangea/kubernetes/backends/base'
|
|
19
|
+
require 'pangea/kubernetes/backends/nixos_base'
|
|
20
|
+
|
|
21
|
+
module Pangea
|
|
22
|
+
module Kubernetes
|
|
23
|
+
module Backends
|
|
24
|
+
# AWS NixOS backend — EC2 instances running NixOS with k3s/k8s
|
|
25
|
+
# via blackmatter-kubernetes modules.
|
|
26
|
+
#
|
|
27
|
+
# Security invariants (enforced by this backend):
|
|
28
|
+
# - NO wildcard IAM actions — every action listed individually
|
|
29
|
+
# - NO public SSH or K8s API — restricted to VPC CIDR
|
|
30
|
+
# - prevent_destroy on stateful resources (IAM role, VPC)
|
|
31
|
+
# - IMDSv2 required on all instances (SSRF protection)
|
|
32
|
+
# - Session duration capped at 1 hour
|
|
33
|
+
# - 5 least-privilege IAM policies (ECR, S3, CloudWatch, EC2, SSM)
|
|
34
|
+
# - CloudWatch log group with 30-day retention
|
|
35
|
+
module AwsNixos
|
|
36
|
+
include Base
|
|
37
|
+
extend NixosBase
|
|
38
|
+
|
|
39
|
+
ControlPlaneRef = Struct.new(
|
|
40
|
+
:nlb, :asg, :lt, :tg, :listener, :asg_tg,
|
|
41
|
+
:subnet_ids, :sg_id, :instance_profile_name, :ami_id, :key_name,
|
|
42
|
+
:ingress_alb, :ingress_alb_tg, :ingress_alb_https_listener, :ingress_alb_http_listener,
|
|
43
|
+
:ingress_alb_sg,
|
|
44
|
+
:vpn_nlb, :vpn_nlb_tg, :vpn_nlb_listener,
|
|
45
|
+
:public_subnet_ids,
|
|
46
|
+
:distribution_track,
|
|
47
|
+
:agent_bootstrap_secrets,
|
|
48
|
+
keyword_init: true
|
|
49
|
+
) do
|
|
50
|
+
def ipv4_address
|
|
51
|
+
nlb.dns_name
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def id
|
|
55
|
+
nlb.id
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def arn
|
|
59
|
+
nlb.arn
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
class << self
|
|
64
|
+
def backend_name = :aws_nixos
|
|
65
|
+
def managed_kubernetes? = false
|
|
66
|
+
def required_gem = 'pangea-aws'
|
|
67
|
+
|
|
68
|
+
def load_provider!
|
|
69
|
+
require required_gem
|
|
70
|
+
rescue LoadError => e
|
|
71
|
+
raise LoadError,
|
|
72
|
+
"Backend :aws_nixos requires gem 'pangea-aws'. " \
|
|
73
|
+
"Add it to your Gemfile: gem 'pangea-aws'\n" \
|
|
74
|
+
"Original error: #{e.message}"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# ── Phase 1: Network + Storage ────────────────────────────────
|
|
78
|
+
def create_network(ctx, name, config, tags)
|
|
79
|
+
validate_cidr_restrictions!(config)
|
|
80
|
+
network = Architecture::NetworkResult.new
|
|
81
|
+
|
|
82
|
+
# S3 bucket for etcd backups (optional — disable for dev clusters)
|
|
83
|
+
if config.etcd_backup_enabled
|
|
84
|
+
etcd_bucket = config.etcd_backup_bucket || "#{name}-etcd-backups"
|
|
85
|
+
network.etcd_bucket = ctx.aws_s3_bucket(
|
|
86
|
+
:"#{name}_etcd",
|
|
87
|
+
bucket: etcd_bucket,
|
|
88
|
+
tags: tags.merge(Name: etcd_bucket)
|
|
89
|
+
)
|
|
90
|
+
if config.etcd_backup_versioning
|
|
91
|
+
ctx.aws_s3_bucket_versioning(
|
|
92
|
+
:"#{name}_etcd_versioning",
|
|
93
|
+
bucket: network.etcd_bucket.id,
|
|
94
|
+
versioning_configuration: { status: 'Enabled' }
|
|
95
|
+
)
|
|
96
|
+
end
|
|
97
|
+
ctx.aws_s3_bucket_server_side_encryption_configuration(
|
|
98
|
+
:"#{name}_etcd_encryption",
|
|
99
|
+
bucket: network.etcd_bucket.id,
|
|
100
|
+
rule: [{ apply_server_side_encryption_by_default: { sse_algorithm: 'AES256' } }]
|
|
101
|
+
)
|
|
102
|
+
ctx.aws_s3_bucket_public_access_block(
|
|
103
|
+
:"#{name}_etcd_public_access",
|
|
104
|
+
bucket: network.etcd_bucket.id,
|
|
105
|
+
block_public_acls: true,
|
|
106
|
+
block_public_policy: true,
|
|
107
|
+
ignore_public_acls: true,
|
|
108
|
+
restrict_public_buckets: true
|
|
109
|
+
)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
vpc_cidr = config.network&.vpc_cidr || '10.0.0.0/16'
|
|
113
|
+
|
|
114
|
+
# ── VPC ─────────────────────────────────────────────────
|
|
115
|
+
network.vpc = ctx.aws_vpc(
|
|
116
|
+
:"#{name}_vpc",
|
|
117
|
+
cidr_block: vpc_cidr,
|
|
118
|
+
enable_dns_hostnames: true,
|
|
119
|
+
enable_dns_support: true,
|
|
120
|
+
tags: tags.merge(Name: "#{name}-vpc"),
|
|
121
|
+
lifecycle: { prevent_destroy: false }
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# ── Internet Gateway ────────────────────────────────────
|
|
125
|
+
network.igw = ctx.aws_internet_gateway(
|
|
126
|
+
:"#{name}_igw",
|
|
127
|
+
vpc_id: network.vpc.id,
|
|
128
|
+
tags: tags.merge(Name: "#{name}-igw")
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# ── Public Route Table (IGW → internet) ─────────────────
|
|
132
|
+
public_rt = ctx.aws_route_table(
|
|
133
|
+
:"#{name}_public_rt",
|
|
134
|
+
vpc_id: network.vpc.id,
|
|
135
|
+
tags: tags.merge(Name: "#{name}-public-rt")
|
|
136
|
+
)
|
|
137
|
+
network.route_table = public_rt
|
|
138
|
+
|
|
139
|
+
ctx.aws_route(
|
|
140
|
+
:"#{name}_public_default_route",
|
|
141
|
+
route_table_id: public_rt.id,
|
|
142
|
+
destination_cidr_block: '0.0.0.0/0',
|
|
143
|
+
gateway_id: network.igw.id
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# ── CIDR Layout (organized by tier × AZ) ───────────────
|
|
147
|
+
#
|
|
148
|
+
# VPC: 10.0.0.0/16
|
|
149
|
+
#
|
|
150
|
+
# Public tier (NLBs, NAT, bastions — internet-facing):
|
|
151
|
+
# 10.0.0.0/24 public-a us-east-1a
|
|
152
|
+
# 10.0.1.0/24 public-b us-east-1b
|
|
153
|
+
# 10.0.2.0/24 public-c us-east-1c
|
|
154
|
+
#
|
|
155
|
+
# Web tier (K8s nodes, apps — private, NAT egress):
|
|
156
|
+
# 10.0.10.0/24 web-a us-east-1a
|
|
157
|
+
# 10.0.11.0/24 web-b us-east-1b
|
|
158
|
+
# 10.0.12.0/24 web-c us-east-1c
|
|
159
|
+
#
|
|
160
|
+
# Data tier (databases, caches — private, no internet):
|
|
161
|
+
# 10.0.20.0/24 data-a us-east-1a
|
|
162
|
+
# 10.0.21.0/24 data-b us-east-1b
|
|
163
|
+
# 10.0.22.0/24 data-c us-east-1c
|
|
164
|
+
#
|
|
165
|
+
azs = %w[a b c]
|
|
166
|
+
|
|
167
|
+
# ── Public Subnets ──────────────────────────────────────
|
|
168
|
+
azs.each_with_index do |az, idx|
|
|
169
|
+
subnet = ctx.aws_subnet(
|
|
170
|
+
:"#{name}_public_#{az}",
|
|
171
|
+
vpc_id: network.vpc.id,
|
|
172
|
+
cidr_block: "10.0.#{idx}.0/24",
|
|
173
|
+
availability_zone: "#{config.region}#{az}",
|
|
174
|
+
map_public_ip_on_launch: true,
|
|
175
|
+
tags: tags.merge(Name: "#{name}-public-#{az}", Tier: 'public')
|
|
176
|
+
)
|
|
177
|
+
network.add_subnet(:"public_#{az}", subnet, tier: :public)
|
|
178
|
+
|
|
179
|
+
ctx.aws_route_table_association(
|
|
180
|
+
:"#{name}_public_rta_#{az}",
|
|
181
|
+
subnet_id: subnet.id,
|
|
182
|
+
route_table_id: public_rt.id
|
|
183
|
+
)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# ── Web Subnets (created before NAT so we can associate per-AZ) ─
|
|
187
|
+
web_subnets = []
|
|
188
|
+
azs.each_with_index do |az, idx|
|
|
189
|
+
subnet = ctx.aws_subnet(
|
|
190
|
+
:"#{name}_web_#{az}",
|
|
191
|
+
vpc_id: network.vpc.id,
|
|
192
|
+
cidr_block: "10.0.#{10 + idx}.0/24",
|
|
193
|
+
availability_zone: "#{config.region}#{az}",
|
|
194
|
+
map_public_ip_on_launch: false,
|
|
195
|
+
tags: tags.merge(Name: "#{name}-web-#{az}", Tier: 'web')
|
|
196
|
+
)
|
|
197
|
+
network.add_subnet(:"web_#{az}", subnet, tier: :web)
|
|
198
|
+
web_subnets << subnet
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
if config.nat_per_az
|
|
202
|
+
# ── Per-AZ NAT Gateways (HA) ────────────────────────────
|
|
203
|
+
azs.each_with_index do |az, idx|
|
|
204
|
+
eip = ctx.aws_eip(
|
|
205
|
+
:"#{name}_nat_eip_#{az}",
|
|
206
|
+
tags: tags.merge(Name: "#{name}-nat-eip-#{az}")
|
|
207
|
+
)
|
|
208
|
+
nat = ctx.aws_nat_gateway(
|
|
209
|
+
:"#{name}_nat_#{az}",
|
|
210
|
+
subnet_id: network.public_subnets[idx].id,
|
|
211
|
+
allocation_id: eip.id,
|
|
212
|
+
tags: tags.merge(Name: "#{name}-nat-#{az}")
|
|
213
|
+
)
|
|
214
|
+
web_rt = ctx.aws_route_table(
|
|
215
|
+
:"#{name}_web_rt_#{az}",
|
|
216
|
+
vpc_id: network.vpc.id,
|
|
217
|
+
tags: tags.merge(Name: "#{name}-web-rt-#{az}")
|
|
218
|
+
)
|
|
219
|
+
ctx.aws_route(
|
|
220
|
+
:"#{name}_web_default_route_#{az}",
|
|
221
|
+
route_table_id: web_rt.id,
|
|
222
|
+
destination_cidr_block: '0.0.0.0/0',
|
|
223
|
+
nat_gateway_id: nat.id
|
|
224
|
+
)
|
|
225
|
+
ctx.aws_route_table_association(
|
|
226
|
+
:"#{name}_web_rta_#{az}",
|
|
227
|
+
subnet_id: web_subnets[idx].id,
|
|
228
|
+
route_table_id: web_rt.id
|
|
229
|
+
)
|
|
230
|
+
end
|
|
231
|
+
else
|
|
232
|
+
# ── Single NAT Gateway (in public-a) ────────────────────
|
|
233
|
+
eip = ctx.aws_eip(
|
|
234
|
+
:"#{name}_nat_eip",
|
|
235
|
+
tags: tags.merge(Name: "#{name}-nat-eip")
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
nat_gw = ctx.aws_nat_gateway(
|
|
239
|
+
:"#{name}_nat",
|
|
240
|
+
allocation_id: eip.id,
|
|
241
|
+
subnet_id: network.public_subnets.first.id,
|
|
242
|
+
tags: tags.merge(Name: "#{name}-nat")
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
web_rt = ctx.aws_route_table(
|
|
246
|
+
:"#{name}_web_rt",
|
|
247
|
+
vpc_id: network.vpc.id,
|
|
248
|
+
tags: tags.merge(Name: "#{name}-web-rt")
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
ctx.aws_route(
|
|
252
|
+
:"#{name}_web_default_route",
|
|
253
|
+
route_table_id: web_rt.id,
|
|
254
|
+
destination_cidr_block: '0.0.0.0/0',
|
|
255
|
+
nat_gateway_id: nat_gw.id
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
web_subnets.each_with_index do |subnet, idx|
|
|
259
|
+
az = azs[idx]
|
|
260
|
+
ctx.aws_route_table_association(
|
|
261
|
+
:"#{name}_web_rta_#{az}",
|
|
262
|
+
subnet_id: subnet.id,
|
|
263
|
+
route_table_id: web_rt.id
|
|
264
|
+
)
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# ── Data Tier Route Table (no internet, VPC-local only) ─
|
|
269
|
+
data_rt = ctx.aws_route_table(
|
|
270
|
+
:"#{name}_data_rt",
|
|
271
|
+
vpc_id: network.vpc.id,
|
|
272
|
+
tags: tags.merge(Name: "#{name}-data-rt")
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# ── Data Subnets ────────────────────────────────────────
|
|
276
|
+
azs.each_with_index do |az, idx|
|
|
277
|
+
subnet = ctx.aws_subnet(
|
|
278
|
+
:"#{name}_data_#{az}",
|
|
279
|
+
vpc_id: network.vpc.id,
|
|
280
|
+
cidr_block: "10.0.#{20 + idx}.0/24",
|
|
281
|
+
availability_zone: "#{config.region}#{az}",
|
|
282
|
+
map_public_ip_on_launch: false,
|
|
283
|
+
tags: tags.merge(Name: "#{name}-data-#{az}", Tier: 'data')
|
|
284
|
+
)
|
|
285
|
+
network.add_subnet(:"data_#{az}", subnet, tier: :data)
|
|
286
|
+
|
|
287
|
+
ctx.aws_route_table_association(
|
|
288
|
+
:"#{name}_data_rta_#{az}",
|
|
289
|
+
subnet_id: subnet.id,
|
|
290
|
+
route_table_id: data_rt.id
|
|
291
|
+
)
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Security group — K3s ports restricted to VPC CIDR
|
|
295
|
+
ensure_security_group(ctx, name, config, network, vpc_cidr, tags)
|
|
296
|
+
|
|
297
|
+
# ── VPC Flow Logs (optional — network traffic auditing) ───
|
|
298
|
+
if config.flow_logs_enabled
|
|
299
|
+
flow_trust = JSON.generate({
|
|
300
|
+
Version: '2012-10-17',
|
|
301
|
+
Statement: [{ Effect: 'Allow',
|
|
302
|
+
Principal: { Service: 'vpc-flow-logs.amazonaws.com' },
|
|
303
|
+
Action: 'sts:AssumeRole' }]
|
|
304
|
+
})
|
|
305
|
+
flow_role = ctx.aws_iam_role(:"#{name}_flow_log_role",
|
|
306
|
+
assume_role_policy: flow_trust,
|
|
307
|
+
tags: tags.merge(Name: "#{name}-flow-log-role"))
|
|
308
|
+
|
|
309
|
+
flow_policy = ctx.aws_iam_policy(:"#{name}_flow_log_policy",
|
|
310
|
+
policy: JSON.generate({ Version: '2012-10-17',
|
|
311
|
+
Statement: [{ Effect: 'Allow',
|
|
312
|
+
Action: %w[logs:CreateLogGroup logs:CreateLogStream logs:PutLogEvents
|
|
313
|
+
logs:DescribeLogGroups logs:DescribeLogStreams],
|
|
314
|
+
Resource: ["arn:aws:logs:#{config.region}:#{config.account_id}:log-group:/vpc/#{name}*"] }]
|
|
315
|
+
}), tags: tags)
|
|
316
|
+
|
|
317
|
+
ctx.aws_iam_role_policy_attachment(:"#{name}_flow_log_attach",
|
|
318
|
+
role: flow_role.name, policy_arn: flow_policy.arn)
|
|
319
|
+
|
|
320
|
+
flow_log_group = ctx.aws_cloudwatch_log_group(:"#{name}_flow_logs",
|
|
321
|
+
retention_in_days: config.flow_logs_retention_days,
|
|
322
|
+
tags: tags.merge(Name: "#{name}-flow-logs"))
|
|
323
|
+
|
|
324
|
+
network.flow_log = ctx.aws_flow_log(:"#{name}_vpc_flow_log",
|
|
325
|
+
vpc_id: network.vpc.id,
|
|
326
|
+
traffic_type: config.flow_logs_traffic_type,
|
|
327
|
+
log_destination_type: 'cloud-watch-logs',
|
|
328
|
+
log_group_name: flow_log_group.name,
|
|
329
|
+
iam_role_arn: flow_role.arn,
|
|
330
|
+
tags: tags.merge(Name: "#{name}-vpc-flow-log"))
|
|
331
|
+
network.flow_log_role = flow_role
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
# ── SSM Logs Bucket (optional — separate from etcd) ───────
|
|
335
|
+
if config.ssm_logs_bucket
|
|
336
|
+
network.ssm_logs_bucket = ctx.aws_s3_bucket(:"#{name}_ssm_logs",
|
|
337
|
+
bucket: config.ssm_logs_bucket,
|
|
338
|
+
tags: tags.merge(Name: config.ssm_logs_bucket))
|
|
339
|
+
ctx.aws_s3_bucket_server_side_encryption_configuration(:"#{name}_ssm_logs_sse",
|
|
340
|
+
bucket: network.ssm_logs_bucket.id,
|
|
341
|
+
rule: [{ apply_server_side_encryption_by_default: { sse_algorithm: 'AES256' } }])
|
|
342
|
+
ctx.aws_s3_bucket_public_access_block(:"#{name}_ssm_logs_pab",
|
|
343
|
+
bucket: network.ssm_logs_bucket.id,
|
|
344
|
+
block_public_acls: true, block_public_policy: true,
|
|
345
|
+
ignore_public_acls: true, restrict_public_buckets: true)
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
# ── Persistent state volume (opt-in) ────────────────────
|
|
349
|
+
# When config.persistent_state is set, provision a
|
|
350
|
+
# separately-managed EBS volume tagged for discovery from
|
|
351
|
+
# within the cluster instance. lifecycle.prevent_destroy is
|
|
352
|
+
# ON — only an explicit operator action with the lifecycle
|
|
353
|
+
# block removed (or terraform state rm) can destroy this
|
|
354
|
+
# volume. The volume survives ASG sleep/wake, instance
|
|
355
|
+
# replacement, and even a full `pangea destroy` of the
|
|
356
|
+
# cluster template.
|
|
357
|
+
if config.persistent_state
|
|
358
|
+
persistent_az = persistent_state_az(config)
|
|
359
|
+
ps = config.persistent_state
|
|
360
|
+
vol_attrs = {
|
|
361
|
+
availability_zone: persistent_az,
|
|
362
|
+
size: ps.size_gb,
|
|
363
|
+
type: ps.volume_type,
|
|
364
|
+
encrypted: ps.encrypted,
|
|
365
|
+
tags: tags.merge(
|
|
366
|
+
Name: "#{name}-persistent-state",
|
|
367
|
+
Role: 'persistent-state',
|
|
368
|
+
Cluster: name.to_s,
|
|
369
|
+
ps.discovery_tag.to_sym => name.to_s
|
|
370
|
+
),
|
|
371
|
+
lifecycle: { prevent_destroy: true }
|
|
372
|
+
}
|
|
373
|
+
vol_attrs[:kms_key_id] = ps.kms_key_id if ps.kms_key_id
|
|
374
|
+
vol_attrs[:iops] = ps.iops if ps.iops
|
|
375
|
+
vol_attrs[:throughput] = ps.throughput if ps.throughput
|
|
376
|
+
network.persistent_state_volume = ctx.aws_ebs_volume(
|
|
377
|
+
:"#{name}_persistent_state",
|
|
378
|
+
**vol_attrs
|
|
379
|
+
)
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
network
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
# ── Persistent state AZ selection ─────────────────────────
|
|
386
|
+
# When config.persistent_state.availability_zone is explicit,
|
|
387
|
+
# use it. Otherwise default to the first web-tier AZ — i.e.
|
|
388
|
+
# `<region>a` — matching the convention that the system pool
|
|
389
|
+
# ASG launches into web-a when persistent_state is set.
|
|
390
|
+
def persistent_state_az(config)
|
|
391
|
+
explicit = config.persistent_state&.availability_zone
|
|
392
|
+
return explicit if explicit && !explicit.empty?
|
|
393
|
+
|
|
394
|
+
"#{config.region}a"
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
# ── Phase 2: IAM (least-privilege) ───────────────────────────
|
|
398
|
+
def create_iam(ctx, name, config, tags)
|
|
399
|
+
iam = Architecture::IamResult.new
|
|
400
|
+
account_id = config.account_id
|
|
401
|
+
if account_id.nil? || account_id == 'CHANGEME'
|
|
402
|
+
raise ArgumentError,
|
|
403
|
+
"account_id is required for IAM policy scoping. " \
|
|
404
|
+
"Set ACCOUNT_ID env var or pass account_id in tags."
|
|
405
|
+
end
|
|
406
|
+
region = config.region
|
|
407
|
+
etcd_bucket = config.etcd_backup_bucket || "#{name}-etcd-backups"
|
|
408
|
+
log_group = "/k3s/#{name}"
|
|
409
|
+
|
|
410
|
+
# EC2-only assume-role trust policy (JSON String per Terraform schema)
|
|
411
|
+
assume_role_policy = JSON.generate({
|
|
412
|
+
Version: '2012-10-17',
|
|
413
|
+
Statement: [{
|
|
414
|
+
Effect: 'Allow',
|
|
415
|
+
Principal: { Service: 'ec2.amazonaws.com' },
|
|
416
|
+
Action: 'sts:AssumeRole'
|
|
417
|
+
}]
|
|
418
|
+
})
|
|
419
|
+
|
|
420
|
+
iam.role = ctx.aws_iam_role(
|
|
421
|
+
:"#{name}_node_role",
|
|
422
|
+
description: "Least-privilege role for #{name} K3s cluster nodes",
|
|
423
|
+
assume_role_policy: assume_role_policy,
|
|
424
|
+
max_session_duration: 3600,
|
|
425
|
+
tags: tags.merge(Name: "#{name}-node-role")
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
iam.instance_profile = ctx.aws_iam_instance_profile(
|
|
429
|
+
:"#{name}_node_profile",
|
|
430
|
+
role: iam.role.ref(:name),
|
|
431
|
+
tags: tags.merge(Name: "#{name}-node-profile")
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
# ── Policy: ECR Read-Only ────────────────────────────────
|
|
435
|
+
ecr_resource = ["arn:aws:ecr:#{region}:#{account_id}:repository/*"]
|
|
436
|
+
|
|
437
|
+
iam.ecr_policy = ctx.aws_iam_policy(
|
|
438
|
+
:"#{name}_ecr_read",
|
|
439
|
+
description: "ECR read-only for #{name} K3s nodes",
|
|
440
|
+
policy: JSON.generate({
|
|
441
|
+
Version: '2012-10-17',
|
|
442
|
+
Statement: [{
|
|
443
|
+
Sid: 'ECRReadOnly',
|
|
444
|
+
Effect: 'Allow',
|
|
445
|
+
Action: %w[
|
|
446
|
+
ecr:GetDownloadUrlForLayer
|
|
447
|
+
ecr:BatchGetImage
|
|
448
|
+
ecr:BatchCheckLayerAvailability
|
|
449
|
+
ecr:DescribeRepositories
|
|
450
|
+
ecr:ListImages
|
|
451
|
+
],
|
|
452
|
+
Resource: ecr_resource,
|
|
453
|
+
}, {
|
|
454
|
+
Sid: 'ECRAuth',
|
|
455
|
+
Effect: 'Allow',
|
|
456
|
+
Action: ['ecr:GetAuthorizationToken'],
|
|
457
|
+
Resource: ['*'],
|
|
458
|
+
}],
|
|
459
|
+
}),
|
|
460
|
+
tags: tags,
|
|
461
|
+
)
|
|
462
|
+
ctx.aws_iam_role_policy_attachment(:"#{name}_ecr_read",
|
|
463
|
+
role: iam.role.ref(:name), policy_arn: iam.ecr_policy.ref(:arn))
|
|
464
|
+
|
|
465
|
+
# ── Policy: S3 Etcd Backup (conditional) ─────────────────
|
|
466
|
+
if config.etcd_backup_enabled
|
|
467
|
+
iam.etcd_policy = ctx.aws_iam_policy(
|
|
468
|
+
:"#{name}_etcd_backup",
|
|
469
|
+
description: "S3 etcd backup access for #{name} K3s nodes",
|
|
470
|
+
policy: JSON.generate({
|
|
471
|
+
Version: '2012-10-17',
|
|
472
|
+
Statement: [{
|
|
473
|
+
Sid: 'EtcdBackupReadWrite',
|
|
474
|
+
Effect: 'Allow',
|
|
475
|
+
Action: %w[s3:GetObject s3:PutObject s3:ListBucket],
|
|
476
|
+
Resource: ["arn:aws:s3:::#{etcd_bucket}", "arn:aws:s3:::#{etcd_bucket}/*"],
|
|
477
|
+
}],
|
|
478
|
+
}),
|
|
479
|
+
tags: tags,
|
|
480
|
+
)
|
|
481
|
+
ctx.aws_iam_role_policy_attachment(:"#{name}_etcd_backup",
|
|
482
|
+
role: iam.role.ref(:name), policy_arn: iam.etcd_policy.ref(:arn))
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
# ── Policy: CloudWatch Logs ──────────────────────────────
|
|
486
|
+
logs_resource = ["arn:aws:logs:#{region}:#{account_id}:log-group:#{log_group}:*"]
|
|
487
|
+
|
|
488
|
+
iam.logs_policy = ctx.aws_iam_policy(
|
|
489
|
+
:"#{name}_logs",
|
|
490
|
+
description: "CloudWatch log access for #{name} K3s nodes",
|
|
491
|
+
policy: JSON.generate({
|
|
492
|
+
Version: '2012-10-17',
|
|
493
|
+
Statement: [{
|
|
494
|
+
Sid: 'CloudWatchLogs',
|
|
495
|
+
Effect: 'Allow',
|
|
496
|
+
Action: %w[logs:CreateLogStream logs:PutLogEvents logs:DescribeLogStreams],
|
|
497
|
+
Resource: logs_resource,
|
|
498
|
+
}],
|
|
499
|
+
}),
|
|
500
|
+
tags: tags,
|
|
501
|
+
)
|
|
502
|
+
ctx.aws_iam_role_policy_attachment(:"#{name}_logs",
|
|
503
|
+
role: iam.role.ref(:name), policy_arn: iam.logs_policy.ref(:arn))
|
|
504
|
+
|
|
505
|
+
# ── Policy: EC2 Describe (node discovery) ────────────────
|
|
506
|
+
ec2_statement = {
|
|
507
|
+
Sid: 'EC2Describe',
|
|
508
|
+
Effect: 'Allow',
|
|
509
|
+
Action: %w[
|
|
510
|
+
ec2:DescribeInstances
|
|
511
|
+
ec2:DescribeTags
|
|
512
|
+
ec2:DescribeVolumes
|
|
513
|
+
ec2:DescribeNetworkInterfaces
|
|
514
|
+
ec2:DescribeSecurityGroups
|
|
515
|
+
ec2:DescribeSubnets
|
|
516
|
+
ec2:DescribeVpcs
|
|
517
|
+
],
|
|
518
|
+
Resource: ['*'],
|
|
519
|
+
}
|
|
520
|
+
ec2_statement[:Condition] = { StringEquals: { 'ec2:Region': region } }
|
|
521
|
+
|
|
522
|
+
iam.ec2_policy = ctx.aws_iam_policy(
|
|
523
|
+
:"#{name}_ec2_describe",
|
|
524
|
+
description: "EC2 read-only metadata for #{name} K3s nodes",
|
|
525
|
+
policy: JSON.generate({ Version: '2012-10-17', Statement: [ec2_statement] }),
|
|
526
|
+
tags: tags,
|
|
527
|
+
)
|
|
528
|
+
ctx.aws_iam_role_policy_attachment(:"#{name}_ec2_describe",
|
|
529
|
+
role: iam.role.ref(:name), policy_arn: iam.ec2_policy.ref(:arn))
|
|
530
|
+
|
|
531
|
+
# ── Policy: SSM Session Manager ──────────────────────────
|
|
532
|
+
ssm_bucket = config.ssm_logs_bucket || etcd_bucket
|
|
533
|
+
iam.ssm_policy = ctx.aws_iam_policy(
|
|
534
|
+
:"#{name}_ssm",
|
|
535
|
+
description: "SSM session access for #{name} K3s nodes",
|
|
536
|
+
policy: JSON.generate({
|
|
537
|
+
Version: '2012-10-17',
|
|
538
|
+
Statement: [{
|
|
539
|
+
Sid: 'SSMCore',
|
|
540
|
+
Effect: 'Allow',
|
|
541
|
+
Action: %w[
|
|
542
|
+
ssm:UpdateInstanceInformation
|
|
543
|
+
ssmmessages:CreateControlChannel
|
|
544
|
+
ssmmessages:CreateDataChannel
|
|
545
|
+
ssmmessages:OpenControlChannel
|
|
546
|
+
ssmmessages:OpenDataChannel
|
|
547
|
+
],
|
|
548
|
+
Resource: ['*'],
|
|
549
|
+
}, {
|
|
550
|
+
Sid: 'SSMSessionLogs',
|
|
551
|
+
Effect: 'Allow',
|
|
552
|
+
Action: ['s3:PutObject'],
|
|
553
|
+
Resource: ["arn:aws:s3:::#{ssm_bucket}/ssm-logs/*"],
|
|
554
|
+
}],
|
|
555
|
+
}),
|
|
556
|
+
tags: tags,
|
|
557
|
+
)
|
|
558
|
+
ctx.aws_iam_role_policy_attachment(:"#{name}_ssm",
|
|
559
|
+
role: iam.role.ref(:name), policy_arn: iam.ssm_policy.ref(:arn))
|
|
560
|
+
|
|
561
|
+
# ── KMS Key for CloudWatch Logs (optional) ─────────────────
|
|
562
|
+
kms_key_id = nil
|
|
563
|
+
if config.kms_logs_enabled
|
|
564
|
+
if config.kms_key_arn
|
|
565
|
+
kms_key_id = config.kms_key_arn
|
|
566
|
+
else
|
|
567
|
+
kms_key = ctx.aws_kms_key(:"#{name}_logs_kms",
|
|
568
|
+
description: "KMS key for #{name} CloudWatch logs",
|
|
569
|
+
enable_key_rotation: true,
|
|
570
|
+
policy: kms_cloudwatch_policy(account_id, config.region),
|
|
571
|
+
tags: tags.merge(Name: "#{name}-logs-kms"))
|
|
572
|
+
ctx.aws_kms_alias(:"#{name}_logs_kms_alias",
|
|
573
|
+
name: "alias/#{name}-logs", target_key_id: kms_key.id)
|
|
574
|
+
kms_key_id = kms_key.arn
|
|
575
|
+
end
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
# ── CloudWatch Log Group ─────────────────────────────────
|
|
579
|
+
log_group_attrs = {
|
|
580
|
+
retention_in_days: 30,
|
|
581
|
+
tags: tags.merge(Name: "#{name}-logs")
|
|
582
|
+
}
|
|
583
|
+
log_group_attrs[:kms_key_id] = kms_key_id if kms_key_id
|
|
584
|
+
|
|
585
|
+
iam.log_group = ctx.aws_cloudwatch_log_group(
|
|
586
|
+
:"#{name}_logs",
|
|
587
|
+
**log_group_attrs
|
|
588
|
+
)
|
|
589
|
+
|
|
590
|
+
# ── Karpenter IRSA role (opt-in, deployed post-cluster via GitOps)
|
|
591
|
+
if config.karpenter_enabled
|
|
592
|
+
karpenter_assume = JSON.generate({
|
|
593
|
+
Version: '2012-10-17',
|
|
594
|
+
Statement: [{
|
|
595
|
+
Effect: 'Allow',
|
|
596
|
+
Principal: { Service: 'ec2.amazonaws.com' },
|
|
597
|
+
Action: 'sts:AssumeRole'
|
|
598
|
+
}]
|
|
599
|
+
})
|
|
600
|
+
|
|
601
|
+
iam.karpenter_role = ctx.aws_iam_role(
|
|
602
|
+
:"#{name}_karpenter_role",
|
|
603
|
+
description: "Karpenter node role for #{name} (IRSA)",
|
|
604
|
+
assume_role_policy: karpenter_assume,
|
|
605
|
+
max_session_duration: 3600,
|
|
606
|
+
tags: tags.merge(Name: "#{name}-karpenter-role")
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
iam.karpenter_profile = ctx.aws_iam_instance_profile(
|
|
610
|
+
:"#{name}_karpenter_profile",
|
|
611
|
+
role: iam.karpenter_role.ref(:name),
|
|
612
|
+
tags: tags.merge(Name: "#{name}-karpenter-profile")
|
|
613
|
+
)
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
# ── Policy: Persistent state volume attach/detach ──────
|
|
617
|
+
# When persistent_state is configured, the node role needs
|
|
618
|
+
# to be able to (a) describe the cluster's tagged EBS
|
|
619
|
+
# volume to find its VolumeId, and (b) attach/detach it
|
|
620
|
+
# to/from this instance. Tag-scoped so the role cannot
|
|
621
|
+
# touch unrelated volumes in the account.
|
|
622
|
+
if config.persistent_state
|
|
623
|
+
ps = config.persistent_state
|
|
624
|
+
tag_condition = {
|
|
625
|
+
StringEquals: {
|
|
626
|
+
"aws:ResourceTag/#{ps.discovery_tag}" => name.to_s
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
iam.persistent_state_policy = ctx.aws_iam_policy(
|
|
630
|
+
:"#{name}_persistent_state",
|
|
631
|
+
description: "Discover + attach the persistent-state EBS volume for #{name}",
|
|
632
|
+
policy: JSON.generate({
|
|
633
|
+
Version: '2012-10-17',
|
|
634
|
+
Statement: [{
|
|
635
|
+
Sid: 'DescribeVolumes',
|
|
636
|
+
Effect: 'Allow',
|
|
637
|
+
Action: %w[ec2:DescribeVolumes ec2:DescribeInstances],
|
|
638
|
+
Resource: ['*']
|
|
639
|
+
}, {
|
|
640
|
+
Sid: 'AttachDetachTaggedVolume',
|
|
641
|
+
Effect: 'Allow',
|
|
642
|
+
Action: %w[ec2:AttachVolume ec2:DetachVolume],
|
|
643
|
+
Resource: [
|
|
644
|
+
"arn:aws:ec2:#{region}:#{account_id}:volume/*",
|
|
645
|
+
"arn:aws:ec2:#{region}:#{account_id}:instance/*"
|
|
646
|
+
],
|
|
647
|
+
Condition: tag_condition
|
|
648
|
+
}]
|
|
649
|
+
}),
|
|
650
|
+
tags: tags
|
|
651
|
+
)
|
|
652
|
+
ctx.aws_iam_role_policy_attachment(:"#{name}_persistent_state",
|
|
653
|
+
role: iam.role.ref(:name),
|
|
654
|
+
policy_arn: iam.persistent_state_policy.ref(:arn))
|
|
655
|
+
end
|
|
656
|
+
|
|
657
|
+
iam
|
|
658
|
+
end
|
|
659
|
+
|
|
660
|
+
# ── Phase 3: Cluster (control plane via LT+ASG+NLB) ────────────
|
|
661
|
+
def create_cluster(ctx, name, config, result, tags)
|
|
662
|
+
# Ensure cluster SG exists — when external_network is used,
|
|
663
|
+
# create_network was skipped so network.sg may be nil.
|
|
664
|
+
if result.network && result.network.sg.nil?
|
|
665
|
+
vpc_cidr = config.network&.vpc_cidr || '10.0.0.0/16'
|
|
666
|
+
ensure_security_group(ctx, name, config, result.network, vpc_cidr, tags)
|
|
667
|
+
end
|
|
668
|
+
|
|
669
|
+
system_pool = config.system_node_pool
|
|
670
|
+
instance_type = system_pool.instance_types.first
|
|
671
|
+
ami_id = if config.ami_id
|
|
672
|
+
config.ami_id
|
|
673
|
+
elsif config.ssm_ami_parameter
|
|
674
|
+
ctx.extend(Pangea::Resources::AWS) unless ctx.respond_to?(:data_aws_ssm_parameter)
|
|
675
|
+
ssm_data = ctx.data_aws_ssm_parameter(:"#{name}-ami", name: config.ssm_ami_parameter)
|
|
676
|
+
ssm_data.value
|
|
677
|
+
else
|
|
678
|
+
config.nixos&.image_id || 'ami-nixos-latest'
|
|
679
|
+
end
|
|
680
|
+
subnet_ids = resolve_subnet_ids(config, result)
|
|
681
|
+
# AZ binding: EBS volumes are AZ-scoped. When persistent_state
|
|
682
|
+
# is configured the control plane ASG must launch into the
|
|
683
|
+
# same AZ as the volume — otherwise attach fails. Filter the
|
|
684
|
+
# multi-AZ subnet list down to the persistent_state AZ.
|
|
685
|
+
subnet_ids = filter_subnets_to_persistent_az(subnet_ids, config, result) if config.persistent_state
|
|
686
|
+
sg_id = result.network&.sg&.id
|
|
687
|
+
instance_profile_name = result.iam&.instance_profile&.ref(:name)
|
|
688
|
+
key_name = config.key_pair
|
|
689
|
+
|
|
690
|
+
cloud_init = build_server_cloud_init(name, config, 0, result)
|
|
691
|
+
|
|
692
|
+
effective_key_name = config.ssm_only ? nil : key_name
|
|
693
|
+
cp_lt_attrs = {
|
|
694
|
+
image_id: ami_id,
|
|
695
|
+
instance_type: instance_type,
|
|
696
|
+
user_data: terraform_base64encode(cloud_init),
|
|
697
|
+
iam_instance_profile: instance_profile_name ? { name: instance_profile_name } : nil,
|
|
698
|
+
vpc_security_group_ids: sg_id ? [sg_id] : [],
|
|
699
|
+
metadata_options: {
|
|
700
|
+
http_endpoint: 'enabled',
|
|
701
|
+
http_tokens: 'required',
|
|
702
|
+
http_put_response_hop_limit: 1,
|
|
703
|
+
instance_metadata_tags: 'enabled',
|
|
704
|
+
},
|
|
705
|
+
block_device_mappings: [{
|
|
706
|
+
device_name: '/dev/xvda',
|
|
707
|
+
ebs: {
|
|
708
|
+
volume_size: system_pool.disk_size_gb,
|
|
709
|
+
volume_type: 'gp3',
|
|
710
|
+
encrypted: true,
|
|
711
|
+
}
|
|
712
|
+
}],
|
|
713
|
+
tag_specifications: [{
|
|
714
|
+
resource_type: 'instance',
|
|
715
|
+
tags: tags.merge(
|
|
716
|
+
Name: "#{name}-cp",
|
|
717
|
+
Role: 'control-plane',
|
|
718
|
+
Distribution: config.distribution.to_s
|
|
719
|
+
)
|
|
720
|
+
}],
|
|
721
|
+
tags: tags.merge(Name: "#{name}-cp-lt")
|
|
722
|
+
}
|
|
723
|
+
cp_lt_attrs[:key_name] = effective_key_name if effective_key_name
|
|
724
|
+
|
|
725
|
+
lt = ctx.aws_launch_template(:"#{name}_cp_lt", **cp_lt_attrs)
|
|
726
|
+
|
|
727
|
+
# min_size=0 allows parked mode (all instances off, infra preserved)
|
|
728
|
+
cp_desired = system_pool.min_size || 1
|
|
729
|
+
max_cp = system_pool.max_size || [cp_desired, 1].max
|
|
730
|
+
cp_asg_attrs = {
|
|
731
|
+
min_size: cp_desired,
|
|
732
|
+
max_size: [max_cp, cp_desired].max,
|
|
733
|
+
launch_template: { id: lt.id, version: '$Latest' },
|
|
734
|
+
vpc_zone_identifier: subnet_ids,
|
|
735
|
+
health_check_grace_period: 300,
|
|
736
|
+
tag: [
|
|
737
|
+
{ key: 'Name', value: "#{name}-cp", propagate_at_launch: true },
|
|
738
|
+
{ key: 'KubernetesCluster', value: name.to_s, propagate_at_launch: true },
|
|
739
|
+
{ key: 'Role', value: 'control-plane', propagate_at_launch: true }
|
|
740
|
+
]
|
|
741
|
+
}
|
|
742
|
+
cp_asg_attrs[:desired_capacity] = system_pool.desired_size if system_pool.desired_size
|
|
743
|
+
|
|
744
|
+
asg = ctx.aws_autoscaling_group(:"#{name}_cp_asg", **cp_asg_attrs)
|
|
745
|
+
|
|
746
|
+
nlb = ctx.aws_lb(
|
|
747
|
+
:"#{name}_cp_nlb",
|
|
748
|
+
name: "#{name}-cp-nlb",
|
|
749
|
+
internal: true,
|
|
750
|
+
load_balancer_type: 'network',
|
|
751
|
+
subnets: subnet_ids,
|
|
752
|
+
tags: tags.merge(Name: "#{name}-cp-nlb")
|
|
753
|
+
)
|
|
754
|
+
|
|
755
|
+
tg = ctx.aws_lb_target_group(
|
|
756
|
+
:"#{name}_cp_tg",
|
|
757
|
+
name: "#{name}-cp-tg",
|
|
758
|
+
port: 6443,
|
|
759
|
+
protocol: 'TCP',
|
|
760
|
+
vpc_id: result.network&.vpc&.id,
|
|
761
|
+
target_type: 'instance',
|
|
762
|
+
health_check: {
|
|
763
|
+
protocol: 'TCP',
|
|
764
|
+
port: '6443',
|
|
765
|
+
healthy_threshold: 3,
|
|
766
|
+
unhealthy_threshold: 3,
|
|
767
|
+
interval: 30,
|
|
768
|
+
},
|
|
769
|
+
tags: tags.merge(Name: "#{name}-cp-tg")
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
listener = ctx.aws_lb_listener(
|
|
773
|
+
:"#{name}_cp_listener",
|
|
774
|
+
load_balancer_arn: nlb.arn,
|
|
775
|
+
port: 6443,
|
|
776
|
+
protocol: 'TCP',
|
|
777
|
+
default_action: [{ type: 'forward', target_group_arn: tg.arn }]
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
asg_tg = ctx.aws_autoscaling_attachment(
|
|
781
|
+
:"#{name}_cp_asg_tg",
|
|
782
|
+
autoscaling_group_name: asg.id,
|
|
783
|
+
lb_target_group_arn: tg.arn
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
# ── Ingress ALB (optional — HTTP/HTTPS for services) ────
|
|
787
|
+
ingress_alb = nil
|
|
788
|
+
ingress_alb_tg = nil
|
|
789
|
+
ingress_alb_https_listener = nil
|
|
790
|
+
ingress_alb_http_listener = nil
|
|
791
|
+
alb_sg = nil
|
|
792
|
+
public_subnet_ids = resolve_public_subnet_ids(config, result)
|
|
793
|
+
|
|
794
|
+
# ── ACM Certificate (optional — auto-create for ALB HTTPS) ─
|
|
795
|
+
effective_cert_arn = config.ingress_alb_certificate_arn
|
|
796
|
+
if config.ingress_alb_enabled && config.ingress_alb_domain && !effective_cert_arn
|
|
797
|
+
acm_cert = ctx.aws_acm_certificate(:"#{name}_ingress_cert",
|
|
798
|
+
domain_name: config.ingress_alb_domain,
|
|
799
|
+
validation_method: 'DNS',
|
|
800
|
+
tags: tags.merge(Name: "#{name}-ingress-cert"))
|
|
801
|
+
if config.ingress_alb_zone_id
|
|
802
|
+
ctx.aws_acm_certificate_validation(:"#{name}_ingress_cert_validation",
|
|
803
|
+
certificate_arn: acm_cert.arn)
|
|
804
|
+
end
|
|
805
|
+
effective_cert_arn = acm_cert.arn
|
|
806
|
+
end
|
|
807
|
+
|
|
808
|
+
if config.ingress_alb_enabled
|
|
809
|
+
# ALB security group — allows 80/443 from ingress_source_cidr
|
|
810
|
+
ingress_cidr = config.ingress_source_cidr || '0.0.0.0/0'
|
|
811
|
+
alb_sg = ctx.aws_security_group(
|
|
812
|
+
:"#{name}_alb_sg",
|
|
813
|
+
description: "ALB security group for #{name} ingress",
|
|
814
|
+
vpc_id: result.network&.vpc&.id,
|
|
815
|
+
tags: tags.merge(Name: "#{name}-alb-sg")
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
ctx.aws_security_group_rule(
|
|
819
|
+
:"#{name}_alb_sg_https",
|
|
820
|
+
type: 'ingress', from_port: 443, to_port: 443, protocol: 'tcp',
|
|
821
|
+
cidr_blocks: [ingress_cidr],
|
|
822
|
+
security_group_id: alb_sg.id,
|
|
823
|
+
description: 'HTTPS ingress'
|
|
824
|
+
)
|
|
825
|
+
|
|
826
|
+
ctx.aws_security_group_rule(
|
|
827
|
+
:"#{name}_alb_sg_http",
|
|
828
|
+
type: 'ingress', from_port: 80, to_port: 80, protocol: 'tcp',
|
|
829
|
+
cidr_blocks: [ingress_cidr],
|
|
830
|
+
security_group_id: alb_sg.id,
|
|
831
|
+
description: 'HTTP ingress (redirect to HTTPS)'
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
ctx.aws_security_group_rule(
|
|
835
|
+
:"#{name}_alb_sg_egress",
|
|
836
|
+
type: 'egress', from_port: 0, to_port: 0, protocol: '-1',
|
|
837
|
+
cidr_blocks: ['0.0.0.0/0'],
|
|
838
|
+
security_group_id: alb_sg.id
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
ingress_alb = ctx.aws_lb(
|
|
842
|
+
:"#{name}_ingress_alb",
|
|
843
|
+
name: "#{name}-ingress",
|
|
844
|
+
internal: false,
|
|
845
|
+
load_balancer_type: 'application',
|
|
846
|
+
subnets: public_subnet_ids,
|
|
847
|
+
security_groups: [alb_sg.id],
|
|
848
|
+
idle_timeout: config.ingress_alb_idle_timeout,
|
|
849
|
+
tags: tags.merge(Name: "#{name}-ingress-alb")
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
# Target group for ingress controller (HTTP on nodes)
|
|
853
|
+
ingress_alb_tg = ctx.aws_lb_target_group(
|
|
854
|
+
:"#{name}_ingress_tg",
|
|
855
|
+
name: "#{name}-ingress-tg",
|
|
856
|
+
port: 80,
|
|
857
|
+
protocol: 'HTTP',
|
|
858
|
+
vpc_id: result.network&.vpc&.id,
|
|
859
|
+
target_type: 'instance',
|
|
860
|
+
health_check: {
|
|
861
|
+
protocol: 'HTTP',
|
|
862
|
+
port: '80',
|
|
863
|
+
path: '/healthz',
|
|
864
|
+
healthy_threshold: 2,
|
|
865
|
+
unhealthy_threshold: 3,
|
|
866
|
+
interval: 15,
|
|
867
|
+
},
|
|
868
|
+
tags: tags.merge(Name: "#{name}-ingress-tg")
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
# HTTPS listener (TLS termination at ALB)
|
|
872
|
+
if effective_cert_arn
|
|
873
|
+
ingress_alb_https_listener = ctx.aws_lb_listener(
|
|
874
|
+
:"#{name}_ingress_https",
|
|
875
|
+
load_balancer_arn: ingress_alb.arn,
|
|
876
|
+
port: 443,
|
|
877
|
+
protocol: 'HTTPS',
|
|
878
|
+
ssl_policy: 'ELBSecurityPolicy-TLS13-1-2-2021-06',
|
|
879
|
+
certificate_arn: effective_cert_arn,
|
|
880
|
+
default_action: [{ type: 'forward', target_group_arn: ingress_alb_tg.arn }]
|
|
881
|
+
)
|
|
882
|
+
end
|
|
883
|
+
|
|
884
|
+
# HTTP listener (redirect to HTTPS or forward)
|
|
885
|
+
if config.ingress_alb_http_redirect && effective_cert_arn
|
|
886
|
+
ingress_alb_http_listener = ctx.aws_lb_listener(
|
|
887
|
+
:"#{name}_ingress_http",
|
|
888
|
+
load_balancer_arn: ingress_alb.arn,
|
|
889
|
+
port: 80,
|
|
890
|
+
protocol: 'HTTP',
|
|
891
|
+
default_action: [{
|
|
892
|
+
type: 'redirect',
|
|
893
|
+
redirect: { port: '443', protocol: 'HTTPS', status_code: 'HTTP_301' }
|
|
894
|
+
}]
|
|
895
|
+
)
|
|
896
|
+
else
|
|
897
|
+
ingress_alb_http_listener = ctx.aws_lb_listener(
|
|
898
|
+
:"#{name}_ingress_http",
|
|
899
|
+
load_balancer_arn: ingress_alb.arn,
|
|
900
|
+
port: 80,
|
|
901
|
+
protocol: 'HTTP',
|
|
902
|
+
default_action: [{ type: 'forward', target_group_arn: ingress_alb_tg.arn }]
|
|
903
|
+
)
|
|
904
|
+
end
|
|
905
|
+
|
|
906
|
+
# Attach worker ASG to ingress target group (done in create_node_pool)
|
|
907
|
+
|
|
908
|
+
# SG-to-SG rules for HTTP/HTTPS when restricted to ALB
|
|
909
|
+
if config.sg_restrict_http_to_alb
|
|
910
|
+
ctx.aws_security_group_rule(:"#{name}_sg_http_from_alb",
|
|
911
|
+
type: 'ingress', from_port: 80, to_port: 80, protocol: 'tcp',
|
|
912
|
+
source_security_group_id: alb_sg.id,
|
|
913
|
+
security_group_id: result.network.sg.id,
|
|
914
|
+
description: 'HTTP from ALB only')
|
|
915
|
+
ctx.aws_security_group_rule(:"#{name}_sg_https_from_alb",
|
|
916
|
+
type: 'ingress', from_port: 443, to_port: 443, protocol: 'tcp',
|
|
917
|
+
source_security_group_id: alb_sg.id,
|
|
918
|
+
security_group_id: result.network.sg.id,
|
|
919
|
+
description: 'HTTPS from ALB only')
|
|
920
|
+
end
|
|
921
|
+
end
|
|
922
|
+
|
|
923
|
+
# ── VPN NLB (optional — WireGuard operator access) ──────
|
|
924
|
+
vpn_nlb = nil
|
|
925
|
+
vpn_nlb_tg = nil
|
|
926
|
+
vpn_nlb_listener = nil
|
|
927
|
+
|
|
928
|
+
if config.vpn_nlb_enabled
|
|
929
|
+
vpn_port = config.vpn_nlb_port.to_i
|
|
930
|
+
|
|
931
|
+
# When EIP allocation IDs are provided, use subnet_mapping to
|
|
932
|
+
# attach Elastic IPs to the NLB. This gives the VPN endpoint a
|
|
933
|
+
# permanent public IP that survives NLB recreation. Otherwise
|
|
934
|
+
# fall back to plain subnets (dynamic DNS-based endpoint).
|
|
935
|
+
vpn_eip_ids = config.respond_to?(:vpn_eip_allocation_ids) ? config.vpn_eip_allocation_ids : []
|
|
936
|
+
vpn_nlb_attrs = {
|
|
937
|
+
name: "#{name}-vpn",
|
|
938
|
+
internal: false,
|
|
939
|
+
load_balancer_type: 'network',
|
|
940
|
+
tags: tags.merge(Name: "#{name}-vpn-nlb"),
|
|
941
|
+
}
|
|
942
|
+
if vpn_eip_ids.any?
|
|
943
|
+
vpn_nlb_attrs[:subnet_mapping] = public_subnet_ids.zip(vpn_eip_ids).map do |subnet_id, eip_id|
|
|
944
|
+
mapping = { subnet_id: subnet_id }
|
|
945
|
+
mapping[:allocation_id] = eip_id if eip_id
|
|
946
|
+
mapping
|
|
947
|
+
end
|
|
948
|
+
else
|
|
949
|
+
vpn_nlb_attrs[:subnets] = public_subnet_ids
|
|
950
|
+
end
|
|
951
|
+
|
|
952
|
+
vpn_nlb = ctx.aws_lb(
|
|
953
|
+
:"#{name}_vpn_nlb",
|
|
954
|
+
**vpn_nlb_attrs
|
|
955
|
+
)
|
|
956
|
+
|
|
957
|
+
health_port = (config.vpn_health_check_port || vpn_port).to_s
|
|
958
|
+
vpn_nlb_tg = ctx.aws_lb_target_group(
|
|
959
|
+
:"#{name}_vpn_tg",
|
|
960
|
+
name: "#{name}-vpn-wg",
|
|
961
|
+
port: vpn_port,
|
|
962
|
+
protocol: 'UDP',
|
|
963
|
+
vpc_id: result.network&.vpc&.id,
|
|
964
|
+
target_type: 'instance',
|
|
965
|
+
health_check: {
|
|
966
|
+
protocol: 'TCP',
|
|
967
|
+
port: health_port,
|
|
968
|
+
healthy_threshold: 3,
|
|
969
|
+
unhealthy_threshold: 3,
|
|
970
|
+
interval: 30,
|
|
971
|
+
},
|
|
972
|
+
tags: tags.merge(Name: "#{name}-vpn-tg")
|
|
973
|
+
)
|
|
974
|
+
|
|
975
|
+
vpn_nlb_listener = ctx.aws_lb_listener(
|
|
976
|
+
:"#{name}_vpn_listener",
|
|
977
|
+
load_balancer_arn: vpn_nlb.arn,
|
|
978
|
+
port: vpn_port,
|
|
979
|
+
protocol: 'UDP',
|
|
980
|
+
default_action: [{ type: 'forward', target_group_arn: vpn_nlb_tg.arn }]
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
# Attach control plane ASG to VPN target group
|
|
984
|
+
ctx.aws_autoscaling_attachment(
|
|
985
|
+
:"#{name}_vpn_asg_tg",
|
|
986
|
+
autoscaling_group_name: asg.id,
|
|
987
|
+
lb_target_group_arn: vpn_nlb_tg.arn
|
|
988
|
+
)
|
|
989
|
+
|
|
990
|
+
# Security group rule for VPN ingress
|
|
991
|
+
vpn_source = config.vpn_source_cidr || config.ingress_source_cidr || '0.0.0.0/0'
|
|
992
|
+
ctx.aws_security_group_rule(
|
|
993
|
+
:"#{name}_sg_vpn_ingress",
|
|
994
|
+
type: 'ingress', from_port: vpn_port, to_port: vpn_port, protocol: 'udp',
|
|
995
|
+
cidr_blocks: [vpn_source],
|
|
996
|
+
security_group_id: sg_id,
|
|
997
|
+
description: 'WireGuard VPN (internet-facing NLB)'
|
|
998
|
+
)
|
|
999
|
+
end
|
|
1000
|
+
|
|
1001
|
+
ControlPlaneRef.new(
|
|
1002
|
+
nlb: nlb, asg: asg, lt: lt, tg: tg,
|
|
1003
|
+
listener: listener, asg_tg: asg_tg,
|
|
1004
|
+
subnet_ids: subnet_ids, sg_id: sg_id,
|
|
1005
|
+
instance_profile_name: instance_profile_name,
|
|
1006
|
+
ami_id: ami_id, key_name: effective_key_name,
|
|
1007
|
+
ingress_alb: ingress_alb, ingress_alb_tg: ingress_alb_tg,
|
|
1008
|
+
ingress_alb_https_listener: ingress_alb_https_listener,
|
|
1009
|
+
ingress_alb_http_listener: ingress_alb_http_listener,
|
|
1010
|
+
ingress_alb_sg: config.ingress_alb_enabled ? alb_sg : nil,
|
|
1011
|
+
vpn_nlb: vpn_nlb, vpn_nlb_tg: vpn_nlb_tg,
|
|
1012
|
+
vpn_nlb_listener: vpn_nlb_listener,
|
|
1013
|
+
public_subnet_ids: public_subnet_ids,
|
|
1014
|
+
distribution_track: config.distribution_track || config.kubernetes_version,
|
|
1015
|
+
agent_bootstrap_secrets: build_agent_bootstrap_secrets(config)
|
|
1016
|
+
)
|
|
1017
|
+
end
|
|
1018
|
+
|
|
1019
|
+
# ── Phase 4: Node pools (workers) ────────────────────────────
|
|
1020
|
+
# Overrides nixos_create_node_pool to use JOIN_SERVER_PLACEHOLDER so
|
|
1021
|
+
# terraform_base64encode can inject the actual Terraform expression
|
|
1022
|
+
# via replace() — keeping ${...} references resolvable at apply time.
|
|
1023
|
+
def create_node_pool(ctx, name, cluster_ref, pool_config, tags)
|
|
1024
|
+
cloud_init = build_agent_cloud_init(name, tags, cluster_ref, use_join_placeholder: true)
|
|
1025
|
+
create_worker_pool(ctx, name, cluster_ref, pool_config, cloud_init, tags)
|
|
1026
|
+
end
|
|
1027
|
+
|
|
1028
|
+
# --- NixosBase template hooks ---
|
|
1029
|
+
|
|
1030
|
+
def create_worker_pool(ctx, name, cluster_ref, pool_config, cloud_init, tags)
|
|
1031
|
+
pool_name = :"#{name}_#{pool_config.name}"
|
|
1032
|
+
instance_type = pool_config.instance_types.first
|
|
1033
|
+
|
|
1034
|
+
# Read infra context from ControlPlaneRef (fixes missing IAM/SG/subnet bugs)
|
|
1035
|
+
ami_id = cluster_ref.respond_to?(:ami_id) ? cluster_ref.ami_id : (tags[:AmiId] || 'ami-nixos-latest')
|
|
1036
|
+
key_name = cluster_ref.respond_to?(:key_name) ? cluster_ref.key_name : tags[:KeyPair]
|
|
1037
|
+
subnet_ids = cluster_ref.respond_to?(:subnet_ids) ? cluster_ref.subnet_ids : (tags[:SubnetIds] || [])
|
|
1038
|
+
sg_id = cluster_ref.respond_to?(:sg_id) ? cluster_ref.sg_id : nil
|
|
1039
|
+
instance_profile_name = cluster_ref.respond_to?(:instance_profile_name) ? cluster_ref.instance_profile_name : nil
|
|
1040
|
+
|
|
1041
|
+
# Build replacement mapping for Terraform references in the cloud-init.
|
|
1042
|
+
# JOIN_SERVER_PLACEHOLDER is replaced with the NLB DNS name expression
|
|
1043
|
+
# at Terraform apply time (not synthesis time).
|
|
1044
|
+
# Strip ${...} wrapper from the reference to get a bare Terraform
|
|
1045
|
+
# expression suitable for use inside a function call.
|
|
1046
|
+
join_server_ref = cluster_ref.ipv4_address.to_s
|
|
1047
|
+
join_server_tf = strip_tf_interpolation(join_server_ref)
|
|
1048
|
+
replacements = { JOIN_SERVER_PLACEHOLDER => join_server_tf }
|
|
1049
|
+
|
|
1050
|
+
worker_lt_attrs = {
|
|
1051
|
+
image_id: ami_id,
|
|
1052
|
+
instance_type: instance_type,
|
|
1053
|
+
user_data: terraform_base64encode(cloud_init, replacements),
|
|
1054
|
+
iam_instance_profile: instance_profile_name ? { name: instance_profile_name } : nil,
|
|
1055
|
+
vpc_security_group_ids: sg_id ? [sg_id] : [],
|
|
1056
|
+
metadata_options: {
|
|
1057
|
+
http_endpoint: 'enabled',
|
|
1058
|
+
http_tokens: 'required',
|
|
1059
|
+
http_put_response_hop_limit: 1,
|
|
1060
|
+
instance_metadata_tags: 'enabled',
|
|
1061
|
+
},
|
|
1062
|
+
block_device_mappings: [{
|
|
1063
|
+
device_name: '/dev/xvda',
|
|
1064
|
+
ebs: {
|
|
1065
|
+
volume_size: pool_config.disk_size_gb,
|
|
1066
|
+
volume_type: 'gp3',
|
|
1067
|
+
encrypted: true,
|
|
1068
|
+
}
|
|
1069
|
+
}],
|
|
1070
|
+
tag_specifications: [{
|
|
1071
|
+
resource_type: 'instance',
|
|
1072
|
+
tags: tags.merge(
|
|
1073
|
+
Name: "#{name}-#{pool_config.name}",
|
|
1074
|
+
Role: 'worker',
|
|
1075
|
+
NodePool: pool_config.name.to_s
|
|
1076
|
+
)
|
|
1077
|
+
}],
|
|
1078
|
+
tags: tags.merge(Name: "#{name}-#{pool_config.name}-lt")
|
|
1079
|
+
}
|
|
1080
|
+
worker_lt_attrs[:key_name] = key_name if key_name
|
|
1081
|
+
|
|
1082
|
+
lt = ctx.aws_launch_template(:"#{pool_name}_lt", **worker_lt_attrs)
|
|
1083
|
+
|
|
1084
|
+
worker_asg_attrs = {
|
|
1085
|
+
min_size: pool_config.min_size,
|
|
1086
|
+
max_size: pool_config.max_size,
|
|
1087
|
+
launch_template: { id: lt.id, version: '$Latest' },
|
|
1088
|
+
vpc_zone_identifier: subnet_ids,
|
|
1089
|
+
health_check_grace_period: 300,
|
|
1090
|
+
tag: [
|
|
1091
|
+
{ key: 'Name', value: "#{name}-#{pool_config.name}", propagate_at_launch: true },
|
|
1092
|
+
{ key: 'KubernetesCluster', value: name.to_s, propagate_at_launch: true },
|
|
1093
|
+
{ key: 'NodePool', value: pool_config.name.to_s, propagate_at_launch: true }
|
|
1094
|
+
]
|
|
1095
|
+
}
|
|
1096
|
+
worker_asg_attrs[:desired_capacity] = pool_config.desired_size if pool_config.desired_size
|
|
1097
|
+
|
|
1098
|
+
worker_asg = ctx.aws_autoscaling_group(:"#{pool_name}_asg", **worker_asg_attrs)
|
|
1099
|
+
|
|
1100
|
+
# Attach workers to ingress ALB target group when present
|
|
1101
|
+
if cluster_ref.respond_to?(:ingress_alb_tg) && cluster_ref.ingress_alb_tg
|
|
1102
|
+
ctx.aws_autoscaling_attachment(:"#{pool_name}_ingress_tg",
|
|
1103
|
+
autoscaling_group_name: worker_asg.id,
|
|
1104
|
+
lb_target_group_arn: cluster_ref.ingress_alb_tg.arn)
|
|
1105
|
+
end
|
|
1106
|
+
|
|
1107
|
+
worker_asg
|
|
1108
|
+
end
|
|
1109
|
+
|
|
1110
|
+
private
|
|
1111
|
+
|
|
1112
|
+
# Narrow a multi-AZ subnet list down to the single AZ that the
|
|
1113
|
+
# cluster's persistent state volume lives in. Looks up the
|
|
1114
|
+
# web-tier subnet whose availability_zone matches
|
|
1115
|
+
# persistent_state_az(config). Returns the original list
|
|
1116
|
+
# unchanged if no match (e.g. operator passed an explicit
|
|
1117
|
+
# subnet list via config.network.subnet_ids).
|
|
1118
|
+
def filter_subnets_to_persistent_az(subnet_ids, config, result)
|
|
1119
|
+
target_az = persistent_state_az(config)
|
|
1120
|
+
return subnet_ids unless result&.network&.respond_to?(:web_subnets)
|
|
1121
|
+
|
|
1122
|
+
target_subnet = result.network.web_subnets.find do |s|
|
|
1123
|
+
s.respond_to?(:availability_zone) &&
|
|
1124
|
+
s.availability_zone.to_s.end_with?(target_az.to_s)
|
|
1125
|
+
end
|
|
1126
|
+
return subnet_ids unless target_subnet
|
|
1127
|
+
|
|
1128
|
+
[target_subnet.id]
|
|
1129
|
+
end
|
|
1130
|
+
|
|
1131
|
+
# Resolve subnet IDs for K8s nodes — prefer web tier (private), fall back to all subnets.
|
|
1132
|
+
def resolve_subnet_ids(config, result)
|
|
1133
|
+
if config.network&.subnet_ids&.any?
|
|
1134
|
+
config.network.subnet_ids
|
|
1135
|
+
elsif result.network
|
|
1136
|
+
# Prefer web tier subnets (private, where K8s nodes should run)
|
|
1137
|
+
web = result.network.respond_to?(:web_subnet_ids) ? result.network.web_subnet_ids : []
|
|
1138
|
+
return web if web.any?
|
|
1139
|
+
|
|
1140
|
+
# Fall back to all subnets
|
|
1141
|
+
result.network.respond_to?(:subnet_ids) ? result.network.subnet_ids : []
|
|
1142
|
+
else
|
|
1143
|
+
[]
|
|
1144
|
+
end
|
|
1145
|
+
end
|
|
1146
|
+
|
|
1147
|
+
# Resolve public subnet IDs for NLBs — prefer public tier.
|
|
1148
|
+
def resolve_public_subnet_ids(config, result)
|
|
1149
|
+
if result.network
|
|
1150
|
+
pub = result.network.respond_to?(:public_subnet_ids) ? result.network.public_subnet_ids : []
|
|
1151
|
+
return pub if pub.any?
|
|
1152
|
+
end
|
|
1153
|
+
# Fall back to resolve_subnet_ids (all subnets)
|
|
1154
|
+
resolve_subnet_ids(config, result)
|
|
1155
|
+
end
|
|
1156
|
+
|
|
1157
|
+
# Reject 0.0.0.0/0 for SSH, K8s API, and VPN — these must never be public.
|
|
1158
|
+
def validate_cidr_restrictions!(config)
|
|
1159
|
+
ssh_cidr = config.ssh_cidr
|
|
1160
|
+
api_cidr = config.api_cidr
|
|
1161
|
+
vpn_cidr = config.vpn_cidr
|
|
1162
|
+
if ssh_cidr == '0.0.0.0/0'
|
|
1163
|
+
raise ArgumentError, "ssh_cidr must not be 0.0.0.0/0 — SSH must not be public"
|
|
1164
|
+
end
|
|
1165
|
+
if api_cidr == '0.0.0.0/0'
|
|
1166
|
+
raise ArgumentError, "api_cidr must not be 0.0.0.0/0 — K8s API must not be public"
|
|
1167
|
+
end
|
|
1168
|
+
if vpn_cidr == '0.0.0.0/0'
|
|
1169
|
+
raise ArgumentError, "vpn_cidr must not be 0.0.0.0/0 — WireGuard must not be public"
|
|
1170
|
+
end
|
|
1171
|
+
if vpn_cidr.nil? && config.vpn && config.vpn.links.any?
|
|
1172
|
+
raise ArgumentError, "vpn_cidr tag is required when VPN links are configured"
|
|
1173
|
+
end
|
|
1174
|
+
end
|
|
1175
|
+
|
|
1176
|
+
# Security group rules — private ports restricted to VPC CIDR,
|
|
1177
|
+
# SSH restricted to VPC, only HTTP/HTTPS public for ingress.
|
|
1178
|
+
def aws_security_group_rules(config, vpc_cidr)
|
|
1179
|
+
ssh_cidr = config.ssh_cidr || vpc_cidr
|
|
1180
|
+
api_cidr = config.api_cidr || vpc_cidr
|
|
1181
|
+
vpn_cidr = config.vpn_cidr
|
|
1182
|
+
|
|
1183
|
+
rules = base_firewall_ports(config.distribution).filter_map do |port_name, port_def|
|
|
1184
|
+
cidr = case port_name
|
|
1185
|
+
when :ssh
|
|
1186
|
+
next nil if config.ssm_only
|
|
1187
|
+
[ssh_cidr]
|
|
1188
|
+
when :api then [api_cidr]
|
|
1189
|
+
when :http, :https
|
|
1190
|
+
if config.sg_restrict_http_to_alb && config.ingress_alb_enabled
|
|
1191
|
+
next nil # SG-source rules added in create_cluster
|
|
1192
|
+
end
|
|
1193
|
+
[config.ingress_source_cidr || '0.0.0.0/0']
|
|
1194
|
+
when :wireguard then vpn_cidr ? [vpn_cidr] : [vpc_cidr]
|
|
1195
|
+
else [vpc_cidr]
|
|
1196
|
+
end
|
|
1197
|
+
|
|
1198
|
+
{
|
|
1199
|
+
from_port: port_range_start(port_def[:port]),
|
|
1200
|
+
to_port: port_range_end(port_def[:port]),
|
|
1201
|
+
protocol: port_def[:protocol].to_s,
|
|
1202
|
+
cidr_blocks: cidr,
|
|
1203
|
+
description: port_def[:description]
|
|
1204
|
+
}
|
|
1205
|
+
end
|
|
1206
|
+
|
|
1207
|
+
# Remove WireGuard rule entirely when no VPN is configured
|
|
1208
|
+
rules.reject! { |r| r[:description] == 'WireGuard VPN' } unless vpn_cidr || config.vpn
|
|
1209
|
+
|
|
1210
|
+
rules
|
|
1211
|
+
end
|
|
1212
|
+
|
|
1213
|
+
# Create cluster security group with K3s/K8s port rules.
|
|
1214
|
+
# Called from create_network (normal path) and create_cluster
|
|
1215
|
+
# (when external_network is provided and network.sg is nil).
|
|
1216
|
+
def ensure_security_group(ctx, name, config, network, vpc_cidr, tags)
|
|
1217
|
+
return if network.sg
|
|
1218
|
+
|
|
1219
|
+
network.sg = ctx.aws_security_group(
|
|
1220
|
+
:"#{name}_sg",
|
|
1221
|
+
description: "Security group for #{name} k8s/k3s NixOS nodes",
|
|
1222
|
+
vpc_id: network.vpc.id,
|
|
1223
|
+
tags: tags.merge(Name: "#{name}-sg")
|
|
1224
|
+
)
|
|
1225
|
+
|
|
1226
|
+
aws_security_group_rules(config, vpc_cidr).each_with_index do |rule, idx|
|
|
1227
|
+
rule_suffix = rule[:description]&.downcase&.gsub(/[^a-z0-9]+/, '_')&.gsub(/_+$/, '') || "rule_#{idx}"
|
|
1228
|
+
ctx.aws_security_group_rule(
|
|
1229
|
+
:"#{name}_sg_ingress_#{rule_suffix}",
|
|
1230
|
+
type: 'ingress',
|
|
1231
|
+
security_group_id: network.sg.id,
|
|
1232
|
+
from_port: rule[:from_port],
|
|
1233
|
+
to_port: rule[:to_port],
|
|
1234
|
+
protocol: rule[:protocol],
|
|
1235
|
+
cidr_blocks: rule[:cidr_blocks],
|
|
1236
|
+
description: rule[:description]
|
|
1237
|
+
)
|
|
1238
|
+
end
|
|
1239
|
+
|
|
1240
|
+
ctx.aws_security_group_rule(
|
|
1241
|
+
:"#{name}_sg_egress_all",
|
|
1242
|
+
type: 'egress',
|
|
1243
|
+
security_group_id: network.sg.id,
|
|
1244
|
+
from_port: 0,
|
|
1245
|
+
to_port: 0,
|
|
1246
|
+
protocol: '-1',
|
|
1247
|
+
cidr_blocks: ['0.0.0.0/0']
|
|
1248
|
+
)
|
|
1249
|
+
end
|
|
1250
|
+
|
|
1251
|
+
def kms_cloudwatch_policy(account_id, region)
|
|
1252
|
+
JSON.generate({
|
|
1253
|
+
Version: '2012-10-17',
|
|
1254
|
+
Statement: [
|
|
1255
|
+
{ Sid: 'AllowKeyAdmin', Effect: 'Allow',
|
|
1256
|
+
Principal: { AWS: "arn:aws:iam::#{account_id}:root" },
|
|
1257
|
+
Action: %w[
|
|
1258
|
+
kms:Create* kms:Describe* kms:Enable* kms:List*
|
|
1259
|
+
kms:Put* kms:Update* kms:Revoke* kms:Disable*
|
|
1260
|
+
kms:Get* kms:Delete* kms:TagResource kms:UntagResource
|
|
1261
|
+
kms:ScheduleKeyDeletion kms:CancelKeyDeletion
|
|
1262
|
+
],
|
|
1263
|
+
Resource: '*' },
|
|
1264
|
+
{ Sid: 'AllowCloudWatchLogs', Effect: 'Allow',
|
|
1265
|
+
Principal: { Service: "logs.#{region}.amazonaws.com" },
|
|
1266
|
+
Action: %w[kms:Encrypt kms:Decrypt kms:ReEncrypt* kms:GenerateDataKey* kms:DescribeKey],
|
|
1267
|
+
Resource: '*' }
|
|
1268
|
+
]
|
|
1269
|
+
})
|
|
1270
|
+
end
|
|
1271
|
+
|
|
1272
|
+
# Wrap a cloud-init string in Terraform's base64encode() function.
|
|
1273
|
+
#
|
|
1274
|
+
# Unlike Ruby's Base64.strict_encode64() which encodes at synthesis
|
|
1275
|
+
# time (making ${...} Terraform references opaque), this produces a
|
|
1276
|
+
# Terraform expression that defers encoding to apply time. Terraform
|
|
1277
|
+
# resolves any interpolation references BEFORE base64-encoding.
|
|
1278
|
+
#
|
|
1279
|
+
# The +replacements+ hash maps placeholder strings in the cloud-init
|
|
1280
|
+
# to raw Terraform expressions. Placeholders are swapped via nested
|
|
1281
|
+
# replace() calls so the Terraform references sit outside string
|
|
1282
|
+
# literals and are properly interpolated. All other ${...} patterns
|
|
1283
|
+
# (e.g., shell variables) are escaped to $${...} so Terraform treats
|
|
1284
|
+
# them as literal text.
|
|
1285
|
+
#
|
|
1286
|
+
# @param raw [String] The cloud-init script (may contain shell ${} vars)
|
|
1287
|
+
# @param replacements [Hash<String,String>] placeholder => Terraform expression
|
|
1288
|
+
# @return [String] A Terraform expression string for the user_data attribute
|
|
1289
|
+
def terraform_base64encode(raw, replacements = {})
|
|
1290
|
+
require 'base64'
|
|
1291
|
+
|
|
1292
|
+
# Ruby handles ALL encoding. Terraform only resolves dynamic values.
|
|
1293
|
+
#
|
|
1294
|
+
# Strategy:
|
|
1295
|
+
# 1. Base64-encode the entire content in Ruby (with placeholders intact)
|
|
1296
|
+
# 2. For each Terraform-time replacement, find the base64-encoded form
|
|
1297
|
+
# of the placeholder and replace it with base64encode(tf_expression)
|
|
1298
|
+
# 3. Terraform's expression parser only sees simple replace() calls
|
|
1299
|
+
# on short base64 markers — never the raw bash script content
|
|
1300
|
+
#
|
|
1301
|
+
# This eliminates ALL escaping issues ($, #, quotes, newlines, etc.)
|
|
1302
|
+
# because the heavy content is opaque base64 by the time Terraform sees it.
|
|
1303
|
+
|
|
1304
|
+
if replacements.empty?
|
|
1305
|
+
return Base64.strict_encode64(raw)
|
|
1306
|
+
end
|
|
1307
|
+
|
|
1308
|
+
b64 = Base64.strict_encode64(raw)
|
|
1309
|
+
expr = "\"#{b64}\""
|
|
1310
|
+
|
|
1311
|
+
replacements.each do |placeholder, tf_expression|
|
|
1312
|
+
b64_placeholder = Base64.strict_encode64(placeholder)
|
|
1313
|
+
bare_expr = tf_expression.delete_prefix('${').delete_suffix('}')
|
|
1314
|
+
expr = "replace(#{expr}, \"#{b64_placeholder}\", base64encode(#{bare_expr}))"
|
|
1315
|
+
end
|
|
1316
|
+
|
|
1317
|
+
"${#{expr}}"
|
|
1318
|
+
end
|
|
1319
|
+
|
|
1320
|
+
# Escape a string for use inside a Terraform string literal
|
|
1321
|
+
def escape_tf_string(str)
|
|
1322
|
+
str.gsub('\\', '\\\\').gsub('"', '\\"')
|
|
1323
|
+
end
|
|
1324
|
+
|
|
1325
|
+
# Strip ${...} wrapper from a Terraform interpolation string to get
|
|
1326
|
+
# a bare expression. E.g., "${aws_lb.x.dns_name}" -> "aws_lb.x.dns_name"
|
|
1327
|
+
# Returns the original string unchanged if not wrapped.
|
|
1328
|
+
def strip_tf_interpolation(ref)
|
|
1329
|
+
if ref.start_with?('${') && ref.end_with?('}')
|
|
1330
|
+
ref[2..-2]
|
|
1331
|
+
else
|
|
1332
|
+
ref
|
|
1333
|
+
end
|
|
1334
|
+
end
|
|
1335
|
+
|
|
1336
|
+
def port_range_start(port)
|
|
1337
|
+
port.is_a?(String) ? port.split('-').first.to_i : port
|
|
1338
|
+
end
|
|
1339
|
+
|
|
1340
|
+
def port_range_end(port)
|
|
1341
|
+
port.is_a?(String) ? port.split('-').last.to_i : port
|
|
1342
|
+
end
|
|
1343
|
+
end
|
|
1344
|
+
end
|
|
1345
|
+
end
|
|
1346
|
+
end
|
|
1347
|
+
end
|