pangea-kubernetes 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/auto-bump.yml +11 -0
  3. data/.github/workflows/ci.yml +7 -0
  4. data/.github/workflows/release.yml +22 -0
  5. data/.gitignore +6 -0
  6. data/.rspec +3 -0
  7. data/AGENTS.md +3 -0
  8. data/CLAUDE.md +370 -0
  9. data/Gemfile +9 -0
  10. data/Gemfile.lock +128 -0
  11. data/README.md +42 -0
  12. data/Rakefile +8 -0
  13. data/flake.lock +2144 -0
  14. data/flake.nix +30 -0
  15. data/gemset.nix +312 -0
  16. data/lib/pangea/kubernetes/architecture.rb +383 -0
  17. data/lib/pangea/kubernetes/backend_registry.rb +117 -0
  18. data/lib/pangea/kubernetes/backends/aws_eks.rb +203 -0
  19. data/lib/pangea/kubernetes/backends/aws_nixos.rb +1347 -0
  20. data/lib/pangea/kubernetes/backends/azure_aks.rb +145 -0
  21. data/lib/pangea/kubernetes/backends/azure_nixos.rb +275 -0
  22. data/lib/pangea/kubernetes/backends/base.rb +116 -0
  23. data/lib/pangea/kubernetes/backends/gcp_gke.rb +176 -0
  24. data/lib/pangea/kubernetes/backends/gcp_nixos.rb +240 -0
  25. data/lib/pangea/kubernetes/backends/hcloud_k3s.rb +181 -0
  26. data/lib/pangea/kubernetes/backends/nixos_base.rb +235 -0
  27. data/lib/pangea/kubernetes/bare_metal/cloud_init.rb +196 -0
  28. data/lib/pangea/kubernetes/bare_metal/cluster_reference.rb +72 -0
  29. data/lib/pangea/kubernetes/load_balancer.rb +157 -0
  30. data/lib/pangea/kubernetes/network_backend_registry.rb +54 -0
  31. data/lib/pangea/kubernetes/network_backends/base.rb +78 -0
  32. data/lib/pangea/kubernetes/network_backends/cilium.rb +105 -0
  33. data/lib/pangea/kubernetes/network_backends/vpc_cni.rb +36 -0
  34. data/lib/pangea/kubernetes/types/argocd_config.rb +55 -0
  35. data/lib/pangea/kubernetes/types/control_plane_config.rb +65 -0
  36. data/lib/pangea/kubernetes/types/etcd_config.rb +64 -0
  37. data/lib/pangea/kubernetes/types/firewall_config.rb +39 -0
  38. data/lib/pangea/kubernetes/types/k3s_config.rb +112 -0
  39. data/lib/pangea/kubernetes/types/kernel_config.rb +31 -0
  40. data/lib/pangea/kubernetes/types/kubernetes_config.rb +129 -0
  41. data/lib/pangea/kubernetes/types/persistent_state_config.rb +100 -0
  42. data/lib/pangea/kubernetes/types/pki_config.rb +48 -0
  43. data/lib/pangea/kubernetes/types/secrets_config.rb +41 -0
  44. data/lib/pangea/kubernetes/types/vpn_config.rb +188 -0
  45. data/lib/pangea/kubernetes/types/wait_for_dns_config.rb +35 -0
  46. data/lib/pangea/kubernetes/types.rb +521 -0
  47. data/lib/pangea-kubernetes/version.rb +5 -0
  48. data/lib/pangea-kubernetes.rb +43 -0
  49. data/pangea-kubernetes.gemspec +33 -0
  50. metadata +192 -0
@@ -0,0 +1,1347 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright 2025 The Pangea Authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ require 'json'
18
+ require 'pangea/kubernetes/backends/base'
19
+ require 'pangea/kubernetes/backends/nixos_base'
20
+
21
+ module Pangea
22
+ module Kubernetes
23
+ module Backends
24
+ # AWS NixOS backend — EC2 instances running NixOS with k3s/k8s
25
+ # via blackmatter-kubernetes modules.
26
+ #
27
+ # Security invariants (enforced by this backend):
28
+ # - NO wildcard IAM actions — every action listed individually
29
+ # - NO public SSH or K8s API — restricted to VPC CIDR
30
+ # - prevent_destroy on stateful resources (IAM role, VPC)
31
+ # - IMDSv2 required on all instances (SSRF protection)
32
+ # - Session duration capped at 1 hour
33
+ # - 5 least-privilege IAM policies (ECR, S3, CloudWatch, EC2, SSM)
34
+ # - CloudWatch log group with 30-day retention
35
+ module AwsNixos
36
+ include Base
37
+ extend NixosBase
38
+
39
+ ControlPlaneRef = Struct.new(
40
+ :nlb, :asg, :lt, :tg, :listener, :asg_tg,
41
+ :subnet_ids, :sg_id, :instance_profile_name, :ami_id, :key_name,
42
+ :ingress_alb, :ingress_alb_tg, :ingress_alb_https_listener, :ingress_alb_http_listener,
43
+ :ingress_alb_sg,
44
+ :vpn_nlb, :vpn_nlb_tg, :vpn_nlb_listener,
45
+ :public_subnet_ids,
46
+ :distribution_track,
47
+ :agent_bootstrap_secrets,
48
+ keyword_init: true
49
+ ) do
50
+ def ipv4_address
51
+ nlb.dns_name
52
+ end
53
+
54
+ def id
55
+ nlb.id
56
+ end
57
+
58
+ def arn
59
+ nlb.arn
60
+ end
61
+ end
62
+
63
+ class << self
64
+ def backend_name = :aws_nixos
65
+ def managed_kubernetes? = false
66
+ def required_gem = 'pangea-aws'
67
+
68
+ def load_provider!
69
+ require required_gem
70
+ rescue LoadError => e
71
+ raise LoadError,
72
+ "Backend :aws_nixos requires gem 'pangea-aws'. " \
73
+ "Add it to your Gemfile: gem 'pangea-aws'\n" \
74
+ "Original error: #{e.message}"
75
+ end
76
+
77
+ # ── Phase 1: Network + Storage ────────────────────────────────
78
+ def create_network(ctx, name, config, tags)
79
+ validate_cidr_restrictions!(config)
80
+ network = Architecture::NetworkResult.new
81
+
82
+ # S3 bucket for etcd backups (optional — disable for dev clusters)
83
+ if config.etcd_backup_enabled
84
+ etcd_bucket = config.etcd_backup_bucket || "#{name}-etcd-backups"
85
+ network.etcd_bucket = ctx.aws_s3_bucket(
86
+ :"#{name}_etcd",
87
+ bucket: etcd_bucket,
88
+ tags: tags.merge(Name: etcd_bucket)
89
+ )
90
+ if config.etcd_backup_versioning
91
+ ctx.aws_s3_bucket_versioning(
92
+ :"#{name}_etcd_versioning",
93
+ bucket: network.etcd_bucket.id,
94
+ versioning_configuration: { status: 'Enabled' }
95
+ )
96
+ end
97
+ ctx.aws_s3_bucket_server_side_encryption_configuration(
98
+ :"#{name}_etcd_encryption",
99
+ bucket: network.etcd_bucket.id,
100
+ rule: [{ apply_server_side_encryption_by_default: { sse_algorithm: 'AES256' } }]
101
+ )
102
+ ctx.aws_s3_bucket_public_access_block(
103
+ :"#{name}_etcd_public_access",
104
+ bucket: network.etcd_bucket.id,
105
+ block_public_acls: true,
106
+ block_public_policy: true,
107
+ ignore_public_acls: true,
108
+ restrict_public_buckets: true
109
+ )
110
+ end
111
+
112
+ vpc_cidr = config.network&.vpc_cidr || '10.0.0.0/16'
113
+
114
+ # ── VPC ─────────────────────────────────────────────────
115
+ network.vpc = ctx.aws_vpc(
116
+ :"#{name}_vpc",
117
+ cidr_block: vpc_cidr,
118
+ enable_dns_hostnames: true,
119
+ enable_dns_support: true,
120
+ tags: tags.merge(Name: "#{name}-vpc"),
121
+ lifecycle: { prevent_destroy: false }
122
+ )
123
+
124
+ # ── Internet Gateway ────────────────────────────────────
125
+ network.igw = ctx.aws_internet_gateway(
126
+ :"#{name}_igw",
127
+ vpc_id: network.vpc.id,
128
+ tags: tags.merge(Name: "#{name}-igw")
129
+ )
130
+
131
+ # ── Public Route Table (IGW → internet) ─────────────────
132
+ public_rt = ctx.aws_route_table(
133
+ :"#{name}_public_rt",
134
+ vpc_id: network.vpc.id,
135
+ tags: tags.merge(Name: "#{name}-public-rt")
136
+ )
137
+ network.route_table = public_rt
138
+
139
+ ctx.aws_route(
140
+ :"#{name}_public_default_route",
141
+ route_table_id: public_rt.id,
142
+ destination_cidr_block: '0.0.0.0/0',
143
+ gateway_id: network.igw.id
144
+ )
145
+
146
+ # ── CIDR Layout (organized by tier × AZ) ───────────────
147
+ #
148
+ # VPC: 10.0.0.0/16
149
+ #
150
+ # Public tier (NLBs, NAT, bastions — internet-facing):
151
+ # 10.0.0.0/24 public-a us-east-1a
152
+ # 10.0.1.0/24 public-b us-east-1b
153
+ # 10.0.2.0/24 public-c us-east-1c
154
+ #
155
+ # Web tier (K8s nodes, apps — private, NAT egress):
156
+ # 10.0.10.0/24 web-a us-east-1a
157
+ # 10.0.11.0/24 web-b us-east-1b
158
+ # 10.0.12.0/24 web-c us-east-1c
159
+ #
160
+ # Data tier (databases, caches — private, no internet):
161
+ # 10.0.20.0/24 data-a us-east-1a
162
+ # 10.0.21.0/24 data-b us-east-1b
163
+ # 10.0.22.0/24 data-c us-east-1c
164
+ #
165
+ azs = %w[a b c]
166
+
167
+ # ── Public Subnets ──────────────────────────────────────
168
+ azs.each_with_index do |az, idx|
169
+ subnet = ctx.aws_subnet(
170
+ :"#{name}_public_#{az}",
171
+ vpc_id: network.vpc.id,
172
+ cidr_block: "10.0.#{idx}.0/24",
173
+ availability_zone: "#{config.region}#{az}",
174
+ map_public_ip_on_launch: true,
175
+ tags: tags.merge(Name: "#{name}-public-#{az}", Tier: 'public')
176
+ )
177
+ network.add_subnet(:"public_#{az}", subnet, tier: :public)
178
+
179
+ ctx.aws_route_table_association(
180
+ :"#{name}_public_rta_#{az}",
181
+ subnet_id: subnet.id,
182
+ route_table_id: public_rt.id
183
+ )
184
+ end
185
+
186
+ # ── Web Subnets (created before NAT so we can associate per-AZ) ─
187
+ web_subnets = []
188
+ azs.each_with_index do |az, idx|
189
+ subnet = ctx.aws_subnet(
190
+ :"#{name}_web_#{az}",
191
+ vpc_id: network.vpc.id,
192
+ cidr_block: "10.0.#{10 + idx}.0/24",
193
+ availability_zone: "#{config.region}#{az}",
194
+ map_public_ip_on_launch: false,
195
+ tags: tags.merge(Name: "#{name}-web-#{az}", Tier: 'web')
196
+ )
197
+ network.add_subnet(:"web_#{az}", subnet, tier: :web)
198
+ web_subnets << subnet
199
+ end
200
+
201
+ if config.nat_per_az
202
+ # ── Per-AZ NAT Gateways (HA) ────────────────────────────
203
+ azs.each_with_index do |az, idx|
204
+ eip = ctx.aws_eip(
205
+ :"#{name}_nat_eip_#{az}",
206
+ tags: tags.merge(Name: "#{name}-nat-eip-#{az}")
207
+ )
208
+ nat = ctx.aws_nat_gateway(
209
+ :"#{name}_nat_#{az}",
210
+ subnet_id: network.public_subnets[idx].id,
211
+ allocation_id: eip.id,
212
+ tags: tags.merge(Name: "#{name}-nat-#{az}")
213
+ )
214
+ web_rt = ctx.aws_route_table(
215
+ :"#{name}_web_rt_#{az}",
216
+ vpc_id: network.vpc.id,
217
+ tags: tags.merge(Name: "#{name}-web-rt-#{az}")
218
+ )
219
+ ctx.aws_route(
220
+ :"#{name}_web_default_route_#{az}",
221
+ route_table_id: web_rt.id,
222
+ destination_cidr_block: '0.0.0.0/0',
223
+ nat_gateway_id: nat.id
224
+ )
225
+ ctx.aws_route_table_association(
226
+ :"#{name}_web_rta_#{az}",
227
+ subnet_id: web_subnets[idx].id,
228
+ route_table_id: web_rt.id
229
+ )
230
+ end
231
+ else
232
+ # ── Single NAT Gateway (in public-a) ────────────────────
233
+ eip = ctx.aws_eip(
234
+ :"#{name}_nat_eip",
235
+ tags: tags.merge(Name: "#{name}-nat-eip")
236
+ )
237
+
238
+ nat_gw = ctx.aws_nat_gateway(
239
+ :"#{name}_nat",
240
+ allocation_id: eip.id,
241
+ subnet_id: network.public_subnets.first.id,
242
+ tags: tags.merge(Name: "#{name}-nat")
243
+ )
244
+
245
+ web_rt = ctx.aws_route_table(
246
+ :"#{name}_web_rt",
247
+ vpc_id: network.vpc.id,
248
+ tags: tags.merge(Name: "#{name}-web-rt")
249
+ )
250
+
251
+ ctx.aws_route(
252
+ :"#{name}_web_default_route",
253
+ route_table_id: web_rt.id,
254
+ destination_cidr_block: '0.0.0.0/0',
255
+ nat_gateway_id: nat_gw.id
256
+ )
257
+
258
+ web_subnets.each_with_index do |subnet, idx|
259
+ az = azs[idx]
260
+ ctx.aws_route_table_association(
261
+ :"#{name}_web_rta_#{az}",
262
+ subnet_id: subnet.id,
263
+ route_table_id: web_rt.id
264
+ )
265
+ end
266
+ end
267
+
268
+ # ── Data Tier Route Table (no internet, VPC-local only) ─
269
+ data_rt = ctx.aws_route_table(
270
+ :"#{name}_data_rt",
271
+ vpc_id: network.vpc.id,
272
+ tags: tags.merge(Name: "#{name}-data-rt")
273
+ )
274
+
275
+ # ── Data Subnets ────────────────────────────────────────
276
+ azs.each_with_index do |az, idx|
277
+ subnet = ctx.aws_subnet(
278
+ :"#{name}_data_#{az}",
279
+ vpc_id: network.vpc.id,
280
+ cidr_block: "10.0.#{20 + idx}.0/24",
281
+ availability_zone: "#{config.region}#{az}",
282
+ map_public_ip_on_launch: false,
283
+ tags: tags.merge(Name: "#{name}-data-#{az}", Tier: 'data')
284
+ )
285
+ network.add_subnet(:"data_#{az}", subnet, tier: :data)
286
+
287
+ ctx.aws_route_table_association(
288
+ :"#{name}_data_rta_#{az}",
289
+ subnet_id: subnet.id,
290
+ route_table_id: data_rt.id
291
+ )
292
+ end
293
+
294
+ # Security group — K3s ports restricted to VPC CIDR
295
+ ensure_security_group(ctx, name, config, network, vpc_cidr, tags)
296
+
297
+ # ── VPC Flow Logs (optional — network traffic auditing) ───
298
+ if config.flow_logs_enabled
299
+ flow_trust = JSON.generate({
300
+ Version: '2012-10-17',
301
+ Statement: [{ Effect: 'Allow',
302
+ Principal: { Service: 'vpc-flow-logs.amazonaws.com' },
303
+ Action: 'sts:AssumeRole' }]
304
+ })
305
+ flow_role = ctx.aws_iam_role(:"#{name}_flow_log_role",
306
+ assume_role_policy: flow_trust,
307
+ tags: tags.merge(Name: "#{name}-flow-log-role"))
308
+
309
+ flow_policy = ctx.aws_iam_policy(:"#{name}_flow_log_policy",
310
+ policy: JSON.generate({ Version: '2012-10-17',
311
+ Statement: [{ Effect: 'Allow',
312
+ Action: %w[logs:CreateLogGroup logs:CreateLogStream logs:PutLogEvents
313
+ logs:DescribeLogGroups logs:DescribeLogStreams],
314
+ Resource: ["arn:aws:logs:#{config.region}:#{config.account_id}:log-group:/vpc/#{name}*"] }]
315
+ }), tags: tags)
316
+
317
+ ctx.aws_iam_role_policy_attachment(:"#{name}_flow_log_attach",
318
+ role: flow_role.name, policy_arn: flow_policy.arn)
319
+
320
+ flow_log_group = ctx.aws_cloudwatch_log_group(:"#{name}_flow_logs",
321
+ retention_in_days: config.flow_logs_retention_days,
322
+ tags: tags.merge(Name: "#{name}-flow-logs"))
323
+
324
+ network.flow_log = ctx.aws_flow_log(:"#{name}_vpc_flow_log",
325
+ vpc_id: network.vpc.id,
326
+ traffic_type: config.flow_logs_traffic_type,
327
+ log_destination_type: 'cloud-watch-logs',
328
+ log_group_name: flow_log_group.name,
329
+ iam_role_arn: flow_role.arn,
330
+ tags: tags.merge(Name: "#{name}-vpc-flow-log"))
331
+ network.flow_log_role = flow_role
332
+ end
333
+
334
+ # ── SSM Logs Bucket (optional — separate from etcd) ───────
335
+ if config.ssm_logs_bucket
336
+ network.ssm_logs_bucket = ctx.aws_s3_bucket(:"#{name}_ssm_logs",
337
+ bucket: config.ssm_logs_bucket,
338
+ tags: tags.merge(Name: config.ssm_logs_bucket))
339
+ ctx.aws_s3_bucket_server_side_encryption_configuration(:"#{name}_ssm_logs_sse",
340
+ bucket: network.ssm_logs_bucket.id,
341
+ rule: [{ apply_server_side_encryption_by_default: { sse_algorithm: 'AES256' } }])
342
+ ctx.aws_s3_bucket_public_access_block(:"#{name}_ssm_logs_pab",
343
+ bucket: network.ssm_logs_bucket.id,
344
+ block_public_acls: true, block_public_policy: true,
345
+ ignore_public_acls: true, restrict_public_buckets: true)
346
+ end
347
+
348
+ # ── Persistent state volume (opt-in) ────────────────────
349
+ # When config.persistent_state is set, provision a
350
+ # separately-managed EBS volume tagged for discovery from
351
+ # within the cluster instance. lifecycle.prevent_destroy is
352
+ # ON — only an explicit operator action with the lifecycle
353
+ # block removed (or terraform state rm) can destroy this
354
+ # volume. The volume survives ASG sleep/wake, instance
355
+ # replacement, and even a full `pangea destroy` of the
356
+ # cluster template.
357
+ if config.persistent_state
358
+ persistent_az = persistent_state_az(config)
359
+ ps = config.persistent_state
360
+ vol_attrs = {
361
+ availability_zone: persistent_az,
362
+ size: ps.size_gb,
363
+ type: ps.volume_type,
364
+ encrypted: ps.encrypted,
365
+ tags: tags.merge(
366
+ Name: "#{name}-persistent-state",
367
+ Role: 'persistent-state',
368
+ Cluster: name.to_s,
369
+ ps.discovery_tag.to_sym => name.to_s
370
+ ),
371
+ lifecycle: { prevent_destroy: true }
372
+ }
373
+ vol_attrs[:kms_key_id] = ps.kms_key_id if ps.kms_key_id
374
+ vol_attrs[:iops] = ps.iops if ps.iops
375
+ vol_attrs[:throughput] = ps.throughput if ps.throughput
376
+ network.persistent_state_volume = ctx.aws_ebs_volume(
377
+ :"#{name}_persistent_state",
378
+ **vol_attrs
379
+ )
380
+ end
381
+
382
+ network
383
+ end
384
+
385
+ # ── Persistent state AZ selection ─────────────────────────
386
+ # When config.persistent_state.availability_zone is explicit,
387
+ # use it. Otherwise default to the first web-tier AZ — i.e.
388
+ # `<region>a` — matching the convention that the system pool
389
+ # ASG launches into web-a when persistent_state is set.
390
+ def persistent_state_az(config)
391
+ explicit = config.persistent_state&.availability_zone
392
+ return explicit if explicit && !explicit.empty?
393
+
394
+ "#{config.region}a"
395
+ end
396
+
397
+ # ── Phase 2: IAM (least-privilege) ───────────────────────────
398
+ def create_iam(ctx, name, config, tags)
399
+ iam = Architecture::IamResult.new
400
+ account_id = config.account_id
401
+ if account_id.nil? || account_id == 'CHANGEME'
402
+ raise ArgumentError,
403
+ "account_id is required for IAM policy scoping. " \
404
+ "Set ACCOUNT_ID env var or pass account_id in tags."
405
+ end
406
+ region = config.region
407
+ etcd_bucket = config.etcd_backup_bucket || "#{name}-etcd-backups"
408
+ log_group = "/k3s/#{name}"
409
+
410
+ # EC2-only assume-role trust policy (JSON String per Terraform schema)
411
+ assume_role_policy = JSON.generate({
412
+ Version: '2012-10-17',
413
+ Statement: [{
414
+ Effect: 'Allow',
415
+ Principal: { Service: 'ec2.amazonaws.com' },
416
+ Action: 'sts:AssumeRole'
417
+ }]
418
+ })
419
+
420
+ iam.role = ctx.aws_iam_role(
421
+ :"#{name}_node_role",
422
+ description: "Least-privilege role for #{name} K3s cluster nodes",
423
+ assume_role_policy: assume_role_policy,
424
+ max_session_duration: 3600,
425
+ tags: tags.merge(Name: "#{name}-node-role")
426
+ )
427
+
428
+ iam.instance_profile = ctx.aws_iam_instance_profile(
429
+ :"#{name}_node_profile",
430
+ role: iam.role.ref(:name),
431
+ tags: tags.merge(Name: "#{name}-node-profile")
432
+ )
433
+
434
+ # ── Policy: ECR Read-Only ────────────────────────────────
435
+ ecr_resource = ["arn:aws:ecr:#{region}:#{account_id}:repository/*"]
436
+
437
+ iam.ecr_policy = ctx.aws_iam_policy(
438
+ :"#{name}_ecr_read",
439
+ description: "ECR read-only for #{name} K3s nodes",
440
+ policy: JSON.generate({
441
+ Version: '2012-10-17',
442
+ Statement: [{
443
+ Sid: 'ECRReadOnly',
444
+ Effect: 'Allow',
445
+ Action: %w[
446
+ ecr:GetDownloadUrlForLayer
447
+ ecr:BatchGetImage
448
+ ecr:BatchCheckLayerAvailability
449
+ ecr:DescribeRepositories
450
+ ecr:ListImages
451
+ ],
452
+ Resource: ecr_resource,
453
+ }, {
454
+ Sid: 'ECRAuth',
455
+ Effect: 'Allow',
456
+ Action: ['ecr:GetAuthorizationToken'],
457
+ Resource: ['*'],
458
+ }],
459
+ }),
460
+ tags: tags,
461
+ )
462
+ ctx.aws_iam_role_policy_attachment(:"#{name}_ecr_read",
463
+ role: iam.role.ref(:name), policy_arn: iam.ecr_policy.ref(:arn))
464
+
465
+ # ── Policy: S3 Etcd Backup (conditional) ─────────────────
466
+ if config.etcd_backup_enabled
467
+ iam.etcd_policy = ctx.aws_iam_policy(
468
+ :"#{name}_etcd_backup",
469
+ description: "S3 etcd backup access for #{name} K3s nodes",
470
+ policy: JSON.generate({
471
+ Version: '2012-10-17',
472
+ Statement: [{
473
+ Sid: 'EtcdBackupReadWrite',
474
+ Effect: 'Allow',
475
+ Action: %w[s3:GetObject s3:PutObject s3:ListBucket],
476
+ Resource: ["arn:aws:s3:::#{etcd_bucket}", "arn:aws:s3:::#{etcd_bucket}/*"],
477
+ }],
478
+ }),
479
+ tags: tags,
480
+ )
481
+ ctx.aws_iam_role_policy_attachment(:"#{name}_etcd_backup",
482
+ role: iam.role.ref(:name), policy_arn: iam.etcd_policy.ref(:arn))
483
+ end
484
+
485
+ # ── Policy: CloudWatch Logs ──────────────────────────────
486
+ logs_resource = ["arn:aws:logs:#{region}:#{account_id}:log-group:#{log_group}:*"]
487
+
488
+ iam.logs_policy = ctx.aws_iam_policy(
489
+ :"#{name}_logs",
490
+ description: "CloudWatch log access for #{name} K3s nodes",
491
+ policy: JSON.generate({
492
+ Version: '2012-10-17',
493
+ Statement: [{
494
+ Sid: 'CloudWatchLogs',
495
+ Effect: 'Allow',
496
+ Action: %w[logs:CreateLogStream logs:PutLogEvents logs:DescribeLogStreams],
497
+ Resource: logs_resource,
498
+ }],
499
+ }),
500
+ tags: tags,
501
+ )
502
+ ctx.aws_iam_role_policy_attachment(:"#{name}_logs",
503
+ role: iam.role.ref(:name), policy_arn: iam.logs_policy.ref(:arn))
504
+
505
+ # ── Policy: EC2 Describe (node discovery) ────────────────
506
+ ec2_statement = {
507
+ Sid: 'EC2Describe',
508
+ Effect: 'Allow',
509
+ Action: %w[
510
+ ec2:DescribeInstances
511
+ ec2:DescribeTags
512
+ ec2:DescribeVolumes
513
+ ec2:DescribeNetworkInterfaces
514
+ ec2:DescribeSecurityGroups
515
+ ec2:DescribeSubnets
516
+ ec2:DescribeVpcs
517
+ ],
518
+ Resource: ['*'],
519
+ }
520
+ ec2_statement[:Condition] = { StringEquals: { 'ec2:Region': region } }
521
+
522
+ iam.ec2_policy = ctx.aws_iam_policy(
523
+ :"#{name}_ec2_describe",
524
+ description: "EC2 read-only metadata for #{name} K3s nodes",
525
+ policy: JSON.generate({ Version: '2012-10-17', Statement: [ec2_statement] }),
526
+ tags: tags,
527
+ )
528
+ ctx.aws_iam_role_policy_attachment(:"#{name}_ec2_describe",
529
+ role: iam.role.ref(:name), policy_arn: iam.ec2_policy.ref(:arn))
530
+
531
+ # ── Policy: SSM Session Manager ──────────────────────────
532
+ ssm_bucket = config.ssm_logs_bucket || etcd_bucket
533
+ iam.ssm_policy = ctx.aws_iam_policy(
534
+ :"#{name}_ssm",
535
+ description: "SSM session access for #{name} K3s nodes",
536
+ policy: JSON.generate({
537
+ Version: '2012-10-17',
538
+ Statement: [{
539
+ Sid: 'SSMCore',
540
+ Effect: 'Allow',
541
+ Action: %w[
542
+ ssm:UpdateInstanceInformation
543
+ ssmmessages:CreateControlChannel
544
+ ssmmessages:CreateDataChannel
545
+ ssmmessages:OpenControlChannel
546
+ ssmmessages:OpenDataChannel
547
+ ],
548
+ Resource: ['*'],
549
+ }, {
550
+ Sid: 'SSMSessionLogs',
551
+ Effect: 'Allow',
552
+ Action: ['s3:PutObject'],
553
+ Resource: ["arn:aws:s3:::#{ssm_bucket}/ssm-logs/*"],
554
+ }],
555
+ }),
556
+ tags: tags,
557
+ )
558
+ ctx.aws_iam_role_policy_attachment(:"#{name}_ssm",
559
+ role: iam.role.ref(:name), policy_arn: iam.ssm_policy.ref(:arn))
560
+
561
+ # ── KMS Key for CloudWatch Logs (optional) ─────────────────
562
+ kms_key_id = nil
563
+ if config.kms_logs_enabled
564
+ if config.kms_key_arn
565
+ kms_key_id = config.kms_key_arn
566
+ else
567
+ kms_key = ctx.aws_kms_key(:"#{name}_logs_kms",
568
+ description: "KMS key for #{name} CloudWatch logs",
569
+ enable_key_rotation: true,
570
+ policy: kms_cloudwatch_policy(account_id, config.region),
571
+ tags: tags.merge(Name: "#{name}-logs-kms"))
572
+ ctx.aws_kms_alias(:"#{name}_logs_kms_alias",
573
+ name: "alias/#{name}-logs", target_key_id: kms_key.id)
574
+ kms_key_id = kms_key.arn
575
+ end
576
+ end
577
+
578
+ # ── CloudWatch Log Group ─────────────────────────────────
579
+ log_group_attrs = {
580
+ retention_in_days: 30,
581
+ tags: tags.merge(Name: "#{name}-logs")
582
+ }
583
+ log_group_attrs[:kms_key_id] = kms_key_id if kms_key_id
584
+
585
+ iam.log_group = ctx.aws_cloudwatch_log_group(
586
+ :"#{name}_logs",
587
+ **log_group_attrs
588
+ )
589
+
590
+ # ── Karpenter IRSA role (opt-in, deployed post-cluster via GitOps)
591
+ if config.karpenter_enabled
592
+ karpenter_assume = JSON.generate({
593
+ Version: '2012-10-17',
594
+ Statement: [{
595
+ Effect: 'Allow',
596
+ Principal: { Service: 'ec2.amazonaws.com' },
597
+ Action: 'sts:AssumeRole'
598
+ }]
599
+ })
600
+
601
+ iam.karpenter_role = ctx.aws_iam_role(
602
+ :"#{name}_karpenter_role",
603
+ description: "Karpenter node role for #{name} (IRSA)",
604
+ assume_role_policy: karpenter_assume,
605
+ max_session_duration: 3600,
606
+ tags: tags.merge(Name: "#{name}-karpenter-role")
607
+ )
608
+
609
+ iam.karpenter_profile = ctx.aws_iam_instance_profile(
610
+ :"#{name}_karpenter_profile",
611
+ role: iam.karpenter_role.ref(:name),
612
+ tags: tags.merge(Name: "#{name}-karpenter-profile")
613
+ )
614
+ end
615
+
616
+ # ── Policy: Persistent state volume attach/detach ──────
617
+ # When persistent_state is configured, the node role needs
618
+ # to be able to (a) describe the cluster's tagged EBS
619
+ # volume to find its VolumeId, and (b) attach/detach it
620
+ # to/from this instance. Tag-scoped so the role cannot
621
+ # touch unrelated volumes in the account.
622
+ if config.persistent_state
623
+ ps = config.persistent_state
624
+ tag_condition = {
625
+ StringEquals: {
626
+ "aws:ResourceTag/#{ps.discovery_tag}" => name.to_s
627
+ }
628
+ }
629
+ iam.persistent_state_policy = ctx.aws_iam_policy(
630
+ :"#{name}_persistent_state",
631
+ description: "Discover + attach the persistent-state EBS volume for #{name}",
632
+ policy: JSON.generate({
633
+ Version: '2012-10-17',
634
+ Statement: [{
635
+ Sid: 'DescribeVolumes',
636
+ Effect: 'Allow',
637
+ Action: %w[ec2:DescribeVolumes ec2:DescribeInstances],
638
+ Resource: ['*']
639
+ }, {
640
+ Sid: 'AttachDetachTaggedVolume',
641
+ Effect: 'Allow',
642
+ Action: %w[ec2:AttachVolume ec2:DetachVolume],
643
+ Resource: [
644
+ "arn:aws:ec2:#{region}:#{account_id}:volume/*",
645
+ "arn:aws:ec2:#{region}:#{account_id}:instance/*"
646
+ ],
647
+ Condition: tag_condition
648
+ }]
649
+ }),
650
+ tags: tags
651
+ )
652
+ ctx.aws_iam_role_policy_attachment(:"#{name}_persistent_state",
653
+ role: iam.role.ref(:name),
654
+ policy_arn: iam.persistent_state_policy.ref(:arn))
655
+ end
656
+
657
+ iam
658
+ end
659
+
660
+ # ── Phase 3: Cluster (control plane via LT+ASG+NLB) ────────────
661
+ def create_cluster(ctx, name, config, result, tags)
662
+ # Ensure cluster SG exists — when external_network is used,
663
+ # create_network was skipped so network.sg may be nil.
664
+ if result.network && result.network.sg.nil?
665
+ vpc_cidr = config.network&.vpc_cidr || '10.0.0.0/16'
666
+ ensure_security_group(ctx, name, config, result.network, vpc_cidr, tags)
667
+ end
668
+
669
+ system_pool = config.system_node_pool
670
+ instance_type = system_pool.instance_types.first
671
+ ami_id = if config.ami_id
672
+ config.ami_id
673
+ elsif config.ssm_ami_parameter
674
+ ctx.extend(Pangea::Resources::AWS) unless ctx.respond_to?(:data_aws_ssm_parameter)
675
+ ssm_data = ctx.data_aws_ssm_parameter(:"#{name}-ami", name: config.ssm_ami_parameter)
676
+ ssm_data.value
677
+ else
678
+ config.nixos&.image_id || 'ami-nixos-latest'
679
+ end
680
+ subnet_ids = resolve_subnet_ids(config, result)
681
+ # AZ binding: EBS volumes are AZ-scoped. When persistent_state
682
+ # is configured the control plane ASG must launch into the
683
+ # same AZ as the volume — otherwise attach fails. Filter the
684
+ # multi-AZ subnet list down to the persistent_state AZ.
685
+ subnet_ids = filter_subnets_to_persistent_az(subnet_ids, config, result) if config.persistent_state
686
+ sg_id = result.network&.sg&.id
687
+ instance_profile_name = result.iam&.instance_profile&.ref(:name)
688
+ key_name = config.key_pair
689
+
690
+ cloud_init = build_server_cloud_init(name, config, 0, result)
691
+
692
+ effective_key_name = config.ssm_only ? nil : key_name
693
+ cp_lt_attrs = {
694
+ image_id: ami_id,
695
+ instance_type: instance_type,
696
+ user_data: terraform_base64encode(cloud_init),
697
+ iam_instance_profile: instance_profile_name ? { name: instance_profile_name } : nil,
698
+ vpc_security_group_ids: sg_id ? [sg_id] : [],
699
+ metadata_options: {
700
+ http_endpoint: 'enabled',
701
+ http_tokens: 'required',
702
+ http_put_response_hop_limit: 1,
703
+ instance_metadata_tags: 'enabled',
704
+ },
705
+ block_device_mappings: [{
706
+ device_name: '/dev/xvda',
707
+ ebs: {
708
+ volume_size: system_pool.disk_size_gb,
709
+ volume_type: 'gp3',
710
+ encrypted: true,
711
+ }
712
+ }],
713
+ tag_specifications: [{
714
+ resource_type: 'instance',
715
+ tags: tags.merge(
716
+ Name: "#{name}-cp",
717
+ Role: 'control-plane',
718
+ Distribution: config.distribution.to_s
719
+ )
720
+ }],
721
+ tags: tags.merge(Name: "#{name}-cp-lt")
722
+ }
723
+ cp_lt_attrs[:key_name] = effective_key_name if effective_key_name
724
+
725
+ lt = ctx.aws_launch_template(:"#{name}_cp_lt", **cp_lt_attrs)
726
+
727
+ # min_size=0 allows parked mode (all instances off, infra preserved)
728
+ cp_desired = system_pool.min_size || 1
729
+ max_cp = system_pool.max_size || [cp_desired, 1].max
730
+ cp_asg_attrs = {
731
+ min_size: cp_desired,
732
+ max_size: [max_cp, cp_desired].max,
733
+ launch_template: { id: lt.id, version: '$Latest' },
734
+ vpc_zone_identifier: subnet_ids,
735
+ health_check_grace_period: 300,
736
+ tag: [
737
+ { key: 'Name', value: "#{name}-cp", propagate_at_launch: true },
738
+ { key: 'KubernetesCluster', value: name.to_s, propagate_at_launch: true },
739
+ { key: 'Role', value: 'control-plane', propagate_at_launch: true }
740
+ ]
741
+ }
742
+ cp_asg_attrs[:desired_capacity] = system_pool.desired_size if system_pool.desired_size
743
+
744
+ asg = ctx.aws_autoscaling_group(:"#{name}_cp_asg", **cp_asg_attrs)
745
+
746
+ nlb = ctx.aws_lb(
747
+ :"#{name}_cp_nlb",
748
+ name: "#{name}-cp-nlb",
749
+ internal: true,
750
+ load_balancer_type: 'network',
751
+ subnets: subnet_ids,
752
+ tags: tags.merge(Name: "#{name}-cp-nlb")
753
+ )
754
+
755
+ tg = ctx.aws_lb_target_group(
756
+ :"#{name}_cp_tg",
757
+ name: "#{name}-cp-tg",
758
+ port: 6443,
759
+ protocol: 'TCP',
760
+ vpc_id: result.network&.vpc&.id,
761
+ target_type: 'instance',
762
+ health_check: {
763
+ protocol: 'TCP',
764
+ port: '6443',
765
+ healthy_threshold: 3,
766
+ unhealthy_threshold: 3,
767
+ interval: 30,
768
+ },
769
+ tags: tags.merge(Name: "#{name}-cp-tg")
770
+ )
771
+
772
+ listener = ctx.aws_lb_listener(
773
+ :"#{name}_cp_listener",
774
+ load_balancer_arn: nlb.arn,
775
+ port: 6443,
776
+ protocol: 'TCP',
777
+ default_action: [{ type: 'forward', target_group_arn: tg.arn }]
778
+ )
779
+
780
+ asg_tg = ctx.aws_autoscaling_attachment(
781
+ :"#{name}_cp_asg_tg",
782
+ autoscaling_group_name: asg.id,
783
+ lb_target_group_arn: tg.arn
784
+ )
785
+
786
+ # ── Ingress ALB (optional — HTTP/HTTPS for services) ────
787
+ ingress_alb = nil
788
+ ingress_alb_tg = nil
789
+ ingress_alb_https_listener = nil
790
+ ingress_alb_http_listener = nil
791
+ alb_sg = nil
792
+ public_subnet_ids = resolve_public_subnet_ids(config, result)
793
+
794
+ # ── ACM Certificate (optional — auto-create for ALB HTTPS) ─
795
+ effective_cert_arn = config.ingress_alb_certificate_arn
796
+ if config.ingress_alb_enabled && config.ingress_alb_domain && !effective_cert_arn
797
+ acm_cert = ctx.aws_acm_certificate(:"#{name}_ingress_cert",
798
+ domain_name: config.ingress_alb_domain,
799
+ validation_method: 'DNS',
800
+ tags: tags.merge(Name: "#{name}-ingress-cert"))
801
+ if config.ingress_alb_zone_id
802
+ ctx.aws_acm_certificate_validation(:"#{name}_ingress_cert_validation",
803
+ certificate_arn: acm_cert.arn)
804
+ end
805
+ effective_cert_arn = acm_cert.arn
806
+ end
807
+
808
+ if config.ingress_alb_enabled
809
+ # ALB security group — allows 80/443 from ingress_source_cidr
810
+ ingress_cidr = config.ingress_source_cidr || '0.0.0.0/0'
811
+ alb_sg = ctx.aws_security_group(
812
+ :"#{name}_alb_sg",
813
+ description: "ALB security group for #{name} ingress",
814
+ vpc_id: result.network&.vpc&.id,
815
+ tags: tags.merge(Name: "#{name}-alb-sg")
816
+ )
817
+
818
+ ctx.aws_security_group_rule(
819
+ :"#{name}_alb_sg_https",
820
+ type: 'ingress', from_port: 443, to_port: 443, protocol: 'tcp',
821
+ cidr_blocks: [ingress_cidr],
822
+ security_group_id: alb_sg.id,
823
+ description: 'HTTPS ingress'
824
+ )
825
+
826
+ ctx.aws_security_group_rule(
827
+ :"#{name}_alb_sg_http",
828
+ type: 'ingress', from_port: 80, to_port: 80, protocol: 'tcp',
829
+ cidr_blocks: [ingress_cidr],
830
+ security_group_id: alb_sg.id,
831
+ description: 'HTTP ingress (redirect to HTTPS)'
832
+ )
833
+
834
+ ctx.aws_security_group_rule(
835
+ :"#{name}_alb_sg_egress",
836
+ type: 'egress', from_port: 0, to_port: 0, protocol: '-1',
837
+ cidr_blocks: ['0.0.0.0/0'],
838
+ security_group_id: alb_sg.id
839
+ )
840
+
841
+ ingress_alb = ctx.aws_lb(
842
+ :"#{name}_ingress_alb",
843
+ name: "#{name}-ingress",
844
+ internal: false,
845
+ load_balancer_type: 'application',
846
+ subnets: public_subnet_ids,
847
+ security_groups: [alb_sg.id],
848
+ idle_timeout: config.ingress_alb_idle_timeout,
849
+ tags: tags.merge(Name: "#{name}-ingress-alb")
850
+ )
851
+
852
+ # Target group for ingress controller (HTTP on nodes)
853
+ ingress_alb_tg = ctx.aws_lb_target_group(
854
+ :"#{name}_ingress_tg",
855
+ name: "#{name}-ingress-tg",
856
+ port: 80,
857
+ protocol: 'HTTP',
858
+ vpc_id: result.network&.vpc&.id,
859
+ target_type: 'instance',
860
+ health_check: {
861
+ protocol: 'HTTP',
862
+ port: '80',
863
+ path: '/healthz',
864
+ healthy_threshold: 2,
865
+ unhealthy_threshold: 3,
866
+ interval: 15,
867
+ },
868
+ tags: tags.merge(Name: "#{name}-ingress-tg")
869
+ )
870
+
871
+ # HTTPS listener (TLS termination at ALB)
872
+ if effective_cert_arn
873
+ ingress_alb_https_listener = ctx.aws_lb_listener(
874
+ :"#{name}_ingress_https",
875
+ load_balancer_arn: ingress_alb.arn,
876
+ port: 443,
877
+ protocol: 'HTTPS',
878
+ ssl_policy: 'ELBSecurityPolicy-TLS13-1-2-2021-06',
879
+ certificate_arn: effective_cert_arn,
880
+ default_action: [{ type: 'forward', target_group_arn: ingress_alb_tg.arn }]
881
+ )
882
+ end
883
+
884
+ # HTTP listener (redirect to HTTPS or forward)
885
+ if config.ingress_alb_http_redirect && effective_cert_arn
886
+ ingress_alb_http_listener = ctx.aws_lb_listener(
887
+ :"#{name}_ingress_http",
888
+ load_balancer_arn: ingress_alb.arn,
889
+ port: 80,
890
+ protocol: 'HTTP',
891
+ default_action: [{
892
+ type: 'redirect',
893
+ redirect: { port: '443', protocol: 'HTTPS', status_code: 'HTTP_301' }
894
+ }]
895
+ )
896
+ else
897
+ ingress_alb_http_listener = ctx.aws_lb_listener(
898
+ :"#{name}_ingress_http",
899
+ load_balancer_arn: ingress_alb.arn,
900
+ port: 80,
901
+ protocol: 'HTTP',
902
+ default_action: [{ type: 'forward', target_group_arn: ingress_alb_tg.arn }]
903
+ )
904
+ end
905
+
906
+ # Attach worker ASG to ingress target group (done in create_node_pool)
907
+
908
+ # SG-to-SG rules for HTTP/HTTPS when restricted to ALB
909
+ if config.sg_restrict_http_to_alb
910
+ ctx.aws_security_group_rule(:"#{name}_sg_http_from_alb",
911
+ type: 'ingress', from_port: 80, to_port: 80, protocol: 'tcp',
912
+ source_security_group_id: alb_sg.id,
913
+ security_group_id: result.network.sg.id,
914
+ description: 'HTTP from ALB only')
915
+ ctx.aws_security_group_rule(:"#{name}_sg_https_from_alb",
916
+ type: 'ingress', from_port: 443, to_port: 443, protocol: 'tcp',
917
+ source_security_group_id: alb_sg.id,
918
+ security_group_id: result.network.sg.id,
919
+ description: 'HTTPS from ALB only')
920
+ end
921
+ end
922
+
923
+ # ── VPN NLB (optional — WireGuard operator access) ──────
924
+ vpn_nlb = nil
925
+ vpn_nlb_tg = nil
926
+ vpn_nlb_listener = nil
927
+
928
+ if config.vpn_nlb_enabled
929
+ vpn_port = config.vpn_nlb_port.to_i
930
+
931
+ # When EIP allocation IDs are provided, use subnet_mapping to
932
+ # attach Elastic IPs to the NLB. This gives the VPN endpoint a
933
+ # permanent public IP that survives NLB recreation. Otherwise
934
+ # fall back to plain subnets (dynamic DNS-based endpoint).
935
+ vpn_eip_ids = config.respond_to?(:vpn_eip_allocation_ids) ? config.vpn_eip_allocation_ids : []
936
+ vpn_nlb_attrs = {
937
+ name: "#{name}-vpn",
938
+ internal: false,
939
+ load_balancer_type: 'network',
940
+ tags: tags.merge(Name: "#{name}-vpn-nlb"),
941
+ }
942
+ if vpn_eip_ids.any?
943
+ vpn_nlb_attrs[:subnet_mapping] = public_subnet_ids.zip(vpn_eip_ids).map do |subnet_id, eip_id|
944
+ mapping = { subnet_id: subnet_id }
945
+ mapping[:allocation_id] = eip_id if eip_id
946
+ mapping
947
+ end
948
+ else
949
+ vpn_nlb_attrs[:subnets] = public_subnet_ids
950
+ end
951
+
952
+ vpn_nlb = ctx.aws_lb(
953
+ :"#{name}_vpn_nlb",
954
+ **vpn_nlb_attrs
955
+ )
956
+
957
+ health_port = (config.vpn_health_check_port || vpn_port).to_s
958
+ vpn_nlb_tg = ctx.aws_lb_target_group(
959
+ :"#{name}_vpn_tg",
960
+ name: "#{name}-vpn-wg",
961
+ port: vpn_port,
962
+ protocol: 'UDP',
963
+ vpc_id: result.network&.vpc&.id,
964
+ target_type: 'instance',
965
+ health_check: {
966
+ protocol: 'TCP',
967
+ port: health_port,
968
+ healthy_threshold: 3,
969
+ unhealthy_threshold: 3,
970
+ interval: 30,
971
+ },
972
+ tags: tags.merge(Name: "#{name}-vpn-tg")
973
+ )
974
+
975
+ vpn_nlb_listener = ctx.aws_lb_listener(
976
+ :"#{name}_vpn_listener",
977
+ load_balancer_arn: vpn_nlb.arn,
978
+ port: vpn_port,
979
+ protocol: 'UDP',
980
+ default_action: [{ type: 'forward', target_group_arn: vpn_nlb_tg.arn }]
981
+ )
982
+
983
+ # Attach control plane ASG to VPN target group
984
+ ctx.aws_autoscaling_attachment(
985
+ :"#{name}_vpn_asg_tg",
986
+ autoscaling_group_name: asg.id,
987
+ lb_target_group_arn: vpn_nlb_tg.arn
988
+ )
989
+
990
+ # Security group rule for VPN ingress
991
+ vpn_source = config.vpn_source_cidr || config.ingress_source_cidr || '0.0.0.0/0'
992
+ ctx.aws_security_group_rule(
993
+ :"#{name}_sg_vpn_ingress",
994
+ type: 'ingress', from_port: vpn_port, to_port: vpn_port, protocol: 'udp',
995
+ cidr_blocks: [vpn_source],
996
+ security_group_id: sg_id,
997
+ description: 'WireGuard VPN (internet-facing NLB)'
998
+ )
999
+ end
1000
+
1001
+ ControlPlaneRef.new(
1002
+ nlb: nlb, asg: asg, lt: lt, tg: tg,
1003
+ listener: listener, asg_tg: asg_tg,
1004
+ subnet_ids: subnet_ids, sg_id: sg_id,
1005
+ instance_profile_name: instance_profile_name,
1006
+ ami_id: ami_id, key_name: effective_key_name,
1007
+ ingress_alb: ingress_alb, ingress_alb_tg: ingress_alb_tg,
1008
+ ingress_alb_https_listener: ingress_alb_https_listener,
1009
+ ingress_alb_http_listener: ingress_alb_http_listener,
1010
+ ingress_alb_sg: config.ingress_alb_enabled ? alb_sg : nil,
1011
+ vpn_nlb: vpn_nlb, vpn_nlb_tg: vpn_nlb_tg,
1012
+ vpn_nlb_listener: vpn_nlb_listener,
1013
+ public_subnet_ids: public_subnet_ids,
1014
+ distribution_track: config.distribution_track || config.kubernetes_version,
1015
+ agent_bootstrap_secrets: build_agent_bootstrap_secrets(config)
1016
+ )
1017
+ end
1018
+
1019
+ # ── Phase 4: Node pools (workers) ────────────────────────────
1020
+ # Overrides nixos_create_node_pool to use JOIN_SERVER_PLACEHOLDER so
1021
+ # terraform_base64encode can inject the actual Terraform expression
1022
+ # via replace() — keeping ${...} references resolvable at apply time.
1023
+ def create_node_pool(ctx, name, cluster_ref, pool_config, tags)
1024
+ cloud_init = build_agent_cloud_init(name, tags, cluster_ref, use_join_placeholder: true)
1025
+ create_worker_pool(ctx, name, cluster_ref, pool_config, cloud_init, tags)
1026
+ end
1027
+
1028
+ # --- NixosBase template hooks ---
1029
+
1030
+ def create_worker_pool(ctx, name, cluster_ref, pool_config, cloud_init, tags)
1031
+ pool_name = :"#{name}_#{pool_config.name}"
1032
+ instance_type = pool_config.instance_types.first
1033
+
1034
+ # Read infra context from ControlPlaneRef (fixes missing IAM/SG/subnet bugs)
1035
+ ami_id = cluster_ref.respond_to?(:ami_id) ? cluster_ref.ami_id : (tags[:AmiId] || 'ami-nixos-latest')
1036
+ key_name = cluster_ref.respond_to?(:key_name) ? cluster_ref.key_name : tags[:KeyPair]
1037
+ subnet_ids = cluster_ref.respond_to?(:subnet_ids) ? cluster_ref.subnet_ids : (tags[:SubnetIds] || [])
1038
+ sg_id = cluster_ref.respond_to?(:sg_id) ? cluster_ref.sg_id : nil
1039
+ instance_profile_name = cluster_ref.respond_to?(:instance_profile_name) ? cluster_ref.instance_profile_name : nil
1040
+
1041
+ # Build replacement mapping for Terraform references in the cloud-init.
1042
+ # JOIN_SERVER_PLACEHOLDER is replaced with the NLB DNS name expression
1043
+ # at Terraform apply time (not synthesis time).
1044
+ # Strip ${...} wrapper from the reference to get a bare Terraform
1045
+ # expression suitable for use inside a function call.
1046
+ join_server_ref = cluster_ref.ipv4_address.to_s
1047
+ join_server_tf = strip_tf_interpolation(join_server_ref)
1048
+ replacements = { JOIN_SERVER_PLACEHOLDER => join_server_tf }
1049
+
1050
+ worker_lt_attrs = {
1051
+ image_id: ami_id,
1052
+ instance_type: instance_type,
1053
+ user_data: terraform_base64encode(cloud_init, replacements),
1054
+ iam_instance_profile: instance_profile_name ? { name: instance_profile_name } : nil,
1055
+ vpc_security_group_ids: sg_id ? [sg_id] : [],
1056
+ metadata_options: {
1057
+ http_endpoint: 'enabled',
1058
+ http_tokens: 'required',
1059
+ http_put_response_hop_limit: 1,
1060
+ instance_metadata_tags: 'enabled',
1061
+ },
1062
+ block_device_mappings: [{
1063
+ device_name: '/dev/xvda',
1064
+ ebs: {
1065
+ volume_size: pool_config.disk_size_gb,
1066
+ volume_type: 'gp3',
1067
+ encrypted: true,
1068
+ }
1069
+ }],
1070
+ tag_specifications: [{
1071
+ resource_type: 'instance',
1072
+ tags: tags.merge(
1073
+ Name: "#{name}-#{pool_config.name}",
1074
+ Role: 'worker',
1075
+ NodePool: pool_config.name.to_s
1076
+ )
1077
+ }],
1078
+ tags: tags.merge(Name: "#{name}-#{pool_config.name}-lt")
1079
+ }
1080
+ worker_lt_attrs[:key_name] = key_name if key_name
1081
+
1082
+ lt = ctx.aws_launch_template(:"#{pool_name}_lt", **worker_lt_attrs)
1083
+
1084
+ worker_asg_attrs = {
1085
+ min_size: pool_config.min_size,
1086
+ max_size: pool_config.max_size,
1087
+ launch_template: { id: lt.id, version: '$Latest' },
1088
+ vpc_zone_identifier: subnet_ids,
1089
+ health_check_grace_period: 300,
1090
+ tag: [
1091
+ { key: 'Name', value: "#{name}-#{pool_config.name}", propagate_at_launch: true },
1092
+ { key: 'KubernetesCluster', value: name.to_s, propagate_at_launch: true },
1093
+ { key: 'NodePool', value: pool_config.name.to_s, propagate_at_launch: true }
1094
+ ]
1095
+ }
1096
+ worker_asg_attrs[:desired_capacity] = pool_config.desired_size if pool_config.desired_size
1097
+
1098
+ worker_asg = ctx.aws_autoscaling_group(:"#{pool_name}_asg", **worker_asg_attrs)
1099
+
1100
+ # Attach workers to ingress ALB target group when present
1101
+ if cluster_ref.respond_to?(:ingress_alb_tg) && cluster_ref.ingress_alb_tg
1102
+ ctx.aws_autoscaling_attachment(:"#{pool_name}_ingress_tg",
1103
+ autoscaling_group_name: worker_asg.id,
1104
+ lb_target_group_arn: cluster_ref.ingress_alb_tg.arn)
1105
+ end
1106
+
1107
+ worker_asg
1108
+ end
1109
+
1110
+ private
1111
+
1112
+ # Narrow a multi-AZ subnet list down to the single AZ that the
1113
+ # cluster's persistent state volume lives in. Looks up the
1114
+ # web-tier subnet whose availability_zone matches
1115
+ # persistent_state_az(config). Returns the original list
1116
+ # unchanged if no match (e.g. operator passed an explicit
1117
+ # subnet list via config.network.subnet_ids).
1118
+ def filter_subnets_to_persistent_az(subnet_ids, config, result)
1119
+ target_az = persistent_state_az(config)
1120
+ return subnet_ids unless result&.network&.respond_to?(:web_subnets)
1121
+
1122
+ target_subnet = result.network.web_subnets.find do |s|
1123
+ s.respond_to?(:availability_zone) &&
1124
+ s.availability_zone.to_s.end_with?(target_az.to_s)
1125
+ end
1126
+ return subnet_ids unless target_subnet
1127
+
1128
+ [target_subnet.id]
1129
+ end
1130
+
1131
+ # Resolve subnet IDs for K8s nodes — prefer web tier (private), fall back to all subnets.
1132
+ def resolve_subnet_ids(config, result)
1133
+ if config.network&.subnet_ids&.any?
1134
+ config.network.subnet_ids
1135
+ elsif result.network
1136
+ # Prefer web tier subnets (private, where K8s nodes should run)
1137
+ web = result.network.respond_to?(:web_subnet_ids) ? result.network.web_subnet_ids : []
1138
+ return web if web.any?
1139
+
1140
+ # Fall back to all subnets
1141
+ result.network.respond_to?(:subnet_ids) ? result.network.subnet_ids : []
1142
+ else
1143
+ []
1144
+ end
1145
+ end
1146
+
1147
+ # Resolve public subnet IDs for NLBs — prefer public tier.
1148
+ def resolve_public_subnet_ids(config, result)
1149
+ if result.network
1150
+ pub = result.network.respond_to?(:public_subnet_ids) ? result.network.public_subnet_ids : []
1151
+ return pub if pub.any?
1152
+ end
1153
+ # Fall back to resolve_subnet_ids (all subnets)
1154
+ resolve_subnet_ids(config, result)
1155
+ end
1156
+
1157
+ # Reject 0.0.0.0/0 for SSH, K8s API, and VPN — these must never be public.
1158
+ def validate_cidr_restrictions!(config)
1159
+ ssh_cidr = config.ssh_cidr
1160
+ api_cidr = config.api_cidr
1161
+ vpn_cidr = config.vpn_cidr
1162
+ if ssh_cidr == '0.0.0.0/0'
1163
+ raise ArgumentError, "ssh_cidr must not be 0.0.0.0/0 — SSH must not be public"
1164
+ end
1165
+ if api_cidr == '0.0.0.0/0'
1166
+ raise ArgumentError, "api_cidr must not be 0.0.0.0/0 — K8s API must not be public"
1167
+ end
1168
+ if vpn_cidr == '0.0.0.0/0'
1169
+ raise ArgumentError, "vpn_cidr must not be 0.0.0.0/0 — WireGuard must not be public"
1170
+ end
1171
+ if vpn_cidr.nil? && config.vpn && config.vpn.links.any?
1172
+ raise ArgumentError, "vpn_cidr tag is required when VPN links are configured"
1173
+ end
1174
+ end
1175
+
1176
+ # Security group rules — private ports restricted to VPC CIDR,
1177
+ # SSH restricted to VPC, only HTTP/HTTPS public for ingress.
1178
+ def aws_security_group_rules(config, vpc_cidr)
1179
+ ssh_cidr = config.ssh_cidr || vpc_cidr
1180
+ api_cidr = config.api_cidr || vpc_cidr
1181
+ vpn_cidr = config.vpn_cidr
1182
+
1183
+ rules = base_firewall_ports(config.distribution).filter_map do |port_name, port_def|
1184
+ cidr = case port_name
1185
+ when :ssh
1186
+ next nil if config.ssm_only
1187
+ [ssh_cidr]
1188
+ when :api then [api_cidr]
1189
+ when :http, :https
1190
+ if config.sg_restrict_http_to_alb && config.ingress_alb_enabled
1191
+ next nil # SG-source rules added in create_cluster
1192
+ end
1193
+ [config.ingress_source_cidr || '0.0.0.0/0']
1194
+ when :wireguard then vpn_cidr ? [vpn_cidr] : [vpc_cidr]
1195
+ else [vpc_cidr]
1196
+ end
1197
+
1198
+ {
1199
+ from_port: port_range_start(port_def[:port]),
1200
+ to_port: port_range_end(port_def[:port]),
1201
+ protocol: port_def[:protocol].to_s,
1202
+ cidr_blocks: cidr,
1203
+ description: port_def[:description]
1204
+ }
1205
+ end
1206
+
1207
+ # Remove WireGuard rule entirely when no VPN is configured
1208
+ rules.reject! { |r| r[:description] == 'WireGuard VPN' } unless vpn_cidr || config.vpn
1209
+
1210
+ rules
1211
+ end
1212
+
1213
+ # Create cluster security group with K3s/K8s port rules.
1214
+ # Called from create_network (normal path) and create_cluster
1215
+ # (when external_network is provided and network.sg is nil).
1216
+ def ensure_security_group(ctx, name, config, network, vpc_cidr, tags)
1217
+ return if network.sg
1218
+
1219
+ network.sg = ctx.aws_security_group(
1220
+ :"#{name}_sg",
1221
+ description: "Security group for #{name} k8s/k3s NixOS nodes",
1222
+ vpc_id: network.vpc.id,
1223
+ tags: tags.merge(Name: "#{name}-sg")
1224
+ )
1225
+
1226
+ aws_security_group_rules(config, vpc_cidr).each_with_index do |rule, idx|
1227
+ rule_suffix = rule[:description]&.downcase&.gsub(/[^a-z0-9]+/, '_')&.gsub(/_+$/, '') || "rule_#{idx}"
1228
+ ctx.aws_security_group_rule(
1229
+ :"#{name}_sg_ingress_#{rule_suffix}",
1230
+ type: 'ingress',
1231
+ security_group_id: network.sg.id,
1232
+ from_port: rule[:from_port],
1233
+ to_port: rule[:to_port],
1234
+ protocol: rule[:protocol],
1235
+ cidr_blocks: rule[:cidr_blocks],
1236
+ description: rule[:description]
1237
+ )
1238
+ end
1239
+
1240
+ ctx.aws_security_group_rule(
1241
+ :"#{name}_sg_egress_all",
1242
+ type: 'egress',
1243
+ security_group_id: network.sg.id,
1244
+ from_port: 0,
1245
+ to_port: 0,
1246
+ protocol: '-1',
1247
+ cidr_blocks: ['0.0.0.0/0']
1248
+ )
1249
+ end
1250
+
1251
+ def kms_cloudwatch_policy(account_id, region)
1252
+ JSON.generate({
1253
+ Version: '2012-10-17',
1254
+ Statement: [
1255
+ { Sid: 'AllowKeyAdmin', Effect: 'Allow',
1256
+ Principal: { AWS: "arn:aws:iam::#{account_id}:root" },
1257
+ Action: %w[
1258
+ kms:Create* kms:Describe* kms:Enable* kms:List*
1259
+ kms:Put* kms:Update* kms:Revoke* kms:Disable*
1260
+ kms:Get* kms:Delete* kms:TagResource kms:UntagResource
1261
+ kms:ScheduleKeyDeletion kms:CancelKeyDeletion
1262
+ ],
1263
+ Resource: '*' },
1264
+ { Sid: 'AllowCloudWatchLogs', Effect: 'Allow',
1265
+ Principal: { Service: "logs.#{region}.amazonaws.com" },
1266
+ Action: %w[kms:Encrypt kms:Decrypt kms:ReEncrypt* kms:GenerateDataKey* kms:DescribeKey],
1267
+ Resource: '*' }
1268
+ ]
1269
+ })
1270
+ end
1271
+
1272
+ # Wrap a cloud-init string in Terraform's base64encode() function.
1273
+ #
1274
+ # Unlike Ruby's Base64.strict_encode64() which encodes at synthesis
1275
+ # time (making ${...} Terraform references opaque), this produces a
1276
+ # Terraform expression that defers encoding to apply time. Terraform
1277
+ # resolves any interpolation references BEFORE base64-encoding.
1278
+ #
1279
+ # The +replacements+ hash maps placeholder strings in the cloud-init
1280
+ # to raw Terraform expressions. Placeholders are swapped via nested
1281
+ # replace() calls so the Terraform references sit outside string
1282
+ # literals and are properly interpolated. All other ${...} patterns
1283
+ # (e.g., shell variables) are escaped to $${...} so Terraform treats
1284
+ # them as literal text.
1285
+ #
1286
+ # @param raw [String] The cloud-init script (may contain shell ${} vars)
1287
+ # @param replacements [Hash<String,String>] placeholder => Terraform expression
1288
+ # @return [String] A Terraform expression string for the user_data attribute
1289
+ def terraform_base64encode(raw, replacements = {})
1290
+ require 'base64'
1291
+
1292
+ # Ruby handles ALL encoding. Terraform only resolves dynamic values.
1293
+ #
1294
+ # Strategy:
1295
+ # 1. Base64-encode the entire content in Ruby (with placeholders intact)
1296
+ # 2. For each Terraform-time replacement, find the base64-encoded form
1297
+ # of the placeholder and replace it with base64encode(tf_expression)
1298
+ # 3. Terraform's expression parser only sees simple replace() calls
1299
+ # on short base64 markers — never the raw bash script content
1300
+ #
1301
+ # This eliminates ALL escaping issues ($, #, quotes, newlines, etc.)
1302
+ # because the heavy content is opaque base64 by the time Terraform sees it.
1303
+
1304
+ if replacements.empty?
1305
+ return Base64.strict_encode64(raw)
1306
+ end
1307
+
1308
+ b64 = Base64.strict_encode64(raw)
1309
+ expr = "\"#{b64}\""
1310
+
1311
+ replacements.each do |placeholder, tf_expression|
1312
+ b64_placeholder = Base64.strict_encode64(placeholder)
1313
+ bare_expr = tf_expression.delete_prefix('${').delete_suffix('}')
1314
+ expr = "replace(#{expr}, \"#{b64_placeholder}\", base64encode(#{bare_expr}))"
1315
+ end
1316
+
1317
+ "${#{expr}}"
1318
+ end
1319
+
1320
+ # Escape a string for use inside a Terraform string literal
1321
+ def escape_tf_string(str)
1322
+ str.gsub('\\', '\\\\').gsub('"', '\\"')
1323
+ end
1324
+
1325
+ # Strip ${...} wrapper from a Terraform interpolation string to get
1326
+ # a bare expression. E.g., "${aws_lb.x.dns_name}" -> "aws_lb.x.dns_name"
1327
+ # Returns the original string unchanged if not wrapped.
1328
+ def strip_tf_interpolation(ref)
1329
+ if ref.start_with?('${') && ref.end_with?('}')
1330
+ ref[2..-2]
1331
+ else
1332
+ ref
1333
+ end
1334
+ end
1335
+
1336
+ def port_range_start(port)
1337
+ port.is_a?(String) ? port.split('-').first.to_i : port
1338
+ end
1339
+
1340
+ def port_range_end(port)
1341
+ port.is_a?(String) ? port.split('-').last.to_i : port
1342
+ end
1343
+ end
1344
+ end
1345
+ end
1346
+ end
1347
+ end