@rulebricks/cli 2.1.3 → 2.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,14 +24,19 @@ export declare function terraformPlan(deploymentName: string): Promise<void>;
24
24
  */
25
25
  export declare function terraformApply(deploymentName: string): Promise<void>;
26
26
  /**
27
- * Cleans up orphaned cloud resources that may linger after a failed deploy or
28
- * incomplete destroy. Best-effort: failures are logged but never thrown.
27
+ * Lightweight pre-deploy cleanup for the CloudWatch log group that the EKS module
28
+ * no longer manages (create_cloudwatch_log_group = false). Safe to call before
29
+ * terraform apply since it targets a resource outside terraform's control.
29
30
  */
30
31
  export declare function cleanupOrphanedResources(provider: CloudProvider, clusterName: string, region: string): Promise<void>;
31
32
  /**
32
- * Destroys Terraform infrastructure with retry logic.
33
- * Runs init first to ensure .terraform folder exists (handles partial deployments).
34
- * After all attempts, sweeps orphaned cloud resources that Terraform doesn't manage.
33
+ * Destroys Terraform infrastructure, then sweeps remaining cloud resources.
34
+ *
35
+ * Flow:
36
+ * 1. terraform destroy (single attempt)
37
+ * 2. Cloud-native cleanup ALWAYS runs (terraform can report success while
38
+ * resources still exist)
39
+ * 3. If terraform reported failure, try once more now that blockers are gone
35
40
  */
36
41
  export declare function terraformDestroy(deploymentName: string, cloudContext?: {
37
42
  provider: CloudProvider;
@@ -169,14 +169,12 @@ export async function terraformApply(deploymentName) {
169
169
  }
170
170
  }
171
171
  /**
172
- * Cleans up orphaned cloud resources that may linger after a failed deploy or
173
- * incomplete destroy. Best-effort: failures are logged but never thrown.
172
+ * Lightweight pre-deploy cleanup for the CloudWatch log group that the EKS module
173
+ * no longer manages (create_cloudwatch_log_group = false). Safe to call before
174
+ * terraform apply since it targets a resource outside terraform's control.
174
175
  */
175
176
  export async function cleanupOrphanedResources(provider, clusterName, region) {
176
177
  if (provider === 'aws') {
177
- // The EKS module (or AWS itself) creates /aws/eks/<cluster>/cluster.
178
- // Since we disabled Terraform management of this log group, we must
179
- // delete it ourselves to ensure a clean slate.
180
178
  const logGroupName = `/aws/eks/${clusterName}/cluster`;
181
179
  try {
182
180
  await execa('aws', [
@@ -189,14 +187,368 @@ export async function cleanupOrphanedResources(provider, clusterName, region) {
189
187
  // Log group may not exist — that's fine
190
188
  }
191
189
  }
192
- // GCP and Azure don't have an equivalent orphan problem today
193
190
  }
194
- const DESTROY_MAX_ATTEMPTS = 3;
195
- const DESTROY_RETRY_DELAY_MS = 5_000;
191
+ // ============================================================================
192
+ // Post-destroy cloud-native cleanup (AWS)
193
+ //
194
+ // Handles every uniquely-named resource that terraform tends to leave behind
195
+ // after a failed destroy or partial apply. Runs unconditionally after every
196
+ // terraform destroy since terraform can report success while resources linger.
197
+ // Every step is best-effort: failures are silently swallowed.
198
+ // ============================================================================
199
+ async function deleteAwsEksNodeGroups(clusterName, region) {
200
+ let nodeGroups;
201
+ try {
202
+ const { stdout } = await execa('aws', [
203
+ 'eks', 'list-nodegroups',
204
+ '--cluster-name', clusterName,
205
+ '--region', region,
206
+ '--output', 'json',
207
+ ]);
208
+ const parsed = JSON.parse(stdout);
209
+ nodeGroups = parsed.nodegroups ?? [];
210
+ }
211
+ catch {
212
+ return; // Cluster may not exist
213
+ }
214
+ for (const ng of nodeGroups) {
215
+ try {
216
+ await execa('aws', [
217
+ 'eks', 'delete-nodegroup',
218
+ '--cluster-name', clusterName,
219
+ '--nodegroup-name', ng,
220
+ '--region', region,
221
+ ]);
222
+ }
223
+ catch { /* already gone */ }
224
+ }
225
+ // Wait for all node groups to finish deleting
226
+ for (const ng of nodeGroups) {
227
+ try {
228
+ await execa('aws', [
229
+ 'eks', 'wait', 'nodegroup-deleted',
230
+ '--cluster-name', clusterName,
231
+ '--nodegroup-name', ng,
232
+ '--region', region,
233
+ ]);
234
+ }
235
+ catch { /* timeout or already gone */ }
236
+ }
237
+ }
238
+ async function deleteAwsEksCluster(clusterName, region) {
239
+ try {
240
+ await execa('aws', [
241
+ 'eks', 'delete-cluster',
242
+ '--name', clusterName,
243
+ '--region', region,
244
+ ]);
245
+ }
246
+ catch {
247
+ return; // Cluster may not exist
248
+ }
249
+ try {
250
+ await execa('aws', [
251
+ 'eks', 'wait', 'cluster-deleted',
252
+ '--name', clusterName,
253
+ '--region', region,
254
+ ]);
255
+ }
256
+ catch { /* timeout or already gone */ }
257
+ }
258
+ async function deleteAwsCloudWatchLogGroup(clusterName, region) {
259
+ try {
260
+ await execa('aws', [
261
+ 'logs', 'delete-log-group',
262
+ '--log-group-name', `/aws/eks/${clusterName}/cluster`,
263
+ '--region', region,
264
+ ]);
265
+ }
266
+ catch { /* may not exist */ }
267
+ }
268
+ /**
269
+ * Captures the OIDC issuer URL from an EKS cluster before it's deleted.
270
+ * The URL uses a random cluster ID (not the cluster name), so we must
271
+ * grab it while the cluster still exists to identify the OIDC provider later.
272
+ */
273
+ async function getEksOidcIssuer(clusterName, region) {
274
+ try {
275
+ const { stdout } = await execa('aws', [
276
+ 'eks', 'describe-cluster',
277
+ '--name', clusterName,
278
+ '--region', region,
279
+ '--query', 'cluster.identity.oidc.issuer',
280
+ '--output', 'text',
281
+ ]);
282
+ const url = stdout.trim();
283
+ return url && url !== 'None' ? url : undefined;
284
+ }
285
+ catch {
286
+ return undefined;
287
+ }
288
+ }
289
+ async function deleteAwsOidcProvider(oidcIssuerUrl) {
290
+ if (!oidcIssuerUrl)
291
+ return;
292
+ // Strip the https:// prefix to match how IAM stores the URL
293
+ const issuerHost = oidcIssuerUrl.replace('https://', '');
294
+ let providerArns;
295
+ try {
296
+ const { stdout } = await execa('aws', [
297
+ 'iam', 'list-open-id-connect-providers',
298
+ '--output', 'json',
299
+ ]);
300
+ const parsed = JSON.parse(stdout);
301
+ providerArns = (parsed.OpenIDConnectProviderList ?? []).map((p) => p.Arn);
302
+ }
303
+ catch {
304
+ return;
305
+ }
306
+ for (const arn of providerArns) {
307
+ try {
308
+ const { stdout } = await execa('aws', [
309
+ 'iam', 'get-open-id-connect-provider',
310
+ '--open-id-connect-provider-arn', arn,
311
+ '--output', 'json',
312
+ ]);
313
+ const parsed = JSON.parse(stdout);
314
+ if (parsed.Url && issuerHost.includes(parsed.Url)) {
315
+ await execa('aws', [
316
+ 'iam', 'delete-open-id-connect-provider',
317
+ '--open-id-connect-provider-arn', arn,
318
+ ]);
319
+ }
320
+ }
321
+ catch { /* skip */ }
322
+ }
323
+ }
324
+ async function releaseAwsElasticIps(clusterName, region) {
325
+ try {
326
+ const { stdout } = await execa('aws', [
327
+ 'ec2', 'describe-addresses',
328
+ '--filters', `Name=tag:Name,Values=*${clusterName}*`,
329
+ '--region', region,
330
+ '--query', 'Addresses[?AssociationId==null].AllocationId',
331
+ '--output', 'json',
332
+ ]);
333
+ const allocationIds = JSON.parse(stdout);
334
+ for (const id of allocationIds) {
335
+ try {
336
+ await execa('aws', [
337
+ 'ec2', 'release-address',
338
+ '--allocation-id', id,
339
+ '--region', region,
340
+ ]);
341
+ }
342
+ catch { /* may already be released */ }
343
+ }
344
+ }
345
+ catch { /* skip */ }
346
+ }
347
+ async function deleteAwsIamRole(roleName) {
348
+ // Detach all managed policies
349
+ try {
350
+ const { stdout } = await execa('aws', [
351
+ 'iam', 'list-attached-role-policies',
352
+ '--role-name', roleName,
353
+ '--output', 'json',
354
+ ]);
355
+ const parsed = JSON.parse(stdout);
356
+ for (const policy of parsed.AttachedPolicies ?? []) {
357
+ try {
358
+ await execa('aws', [
359
+ 'iam', 'detach-role-policy',
360
+ '--role-name', roleName,
361
+ '--policy-arn', policy.PolicyArn,
362
+ ]);
363
+ }
364
+ catch { /* skip */ }
365
+ }
366
+ }
367
+ catch { /* role may not exist */ }
368
+ // Delete inline policies
369
+ try {
370
+ const { stdout } = await execa('aws', [
371
+ 'iam', 'list-role-policies',
372
+ '--role-name', roleName,
373
+ '--output', 'json',
374
+ ]);
375
+ const parsed = JSON.parse(stdout);
376
+ for (const policyName of parsed.PolicyNames ?? []) {
377
+ try {
378
+ await execa('aws', [
379
+ 'iam', 'delete-role-policy',
380
+ '--role-name', roleName,
381
+ '--policy-name', policyName,
382
+ ]);
383
+ }
384
+ catch { /* skip */ }
385
+ }
386
+ }
387
+ catch { /* role may not exist */ }
388
+ // Delete the role itself
389
+ try {
390
+ await execa('aws', ['iam', 'delete-role', '--role-name', roleName]);
391
+ }
392
+ catch { /* may not exist */ }
393
+ }
394
+ async function deleteAwsKmsAlias(clusterName, region) {
395
+ const aliasName = `alias/eks/${clusterName}`;
396
+ let keyId;
397
+ // Find the KMS key behind the alias so we can schedule it for deletion
398
+ try {
399
+ const { stdout } = await execa('aws', [
400
+ 'kms', 'list-aliases',
401
+ '--query', `Aliases[?AliasName=='${aliasName}'].TargetKeyId | [0]`,
402
+ '--output', 'text',
403
+ '--region', region,
404
+ ]);
405
+ const id = stdout.trim();
406
+ if (id && id !== 'None') {
407
+ keyId = id;
408
+ }
409
+ }
410
+ catch { /* skip */ }
411
+ // Delete the alias (unique name constraint -- blocks re-deploy if left behind)
412
+ try {
413
+ await execa('aws', [
414
+ 'kms', 'delete-alias',
415
+ '--alias-name', aliasName,
416
+ '--region', region,
417
+ ]);
418
+ }
419
+ catch { /* may not exist */ }
420
+ // Schedule the underlying key for deletion (7-day mandatory minimum)
421
+ if (keyId) {
422
+ try {
423
+ await execa('aws', [
424
+ 'kms', 'schedule-key-deletion',
425
+ '--key-id', keyId,
426
+ '--pending-window-in-days', '7',
427
+ '--region', region,
428
+ ]);
429
+ }
430
+ catch { /* key may already be pending deletion or not exist */ }
431
+ }
432
+ }
433
+ /**
434
+ * Finds KMS keys by the description the EKS module uses, and schedules them for
435
+ * deletion. Catches keys that survive after their alias is already deleted.
436
+ */
437
+ async function scheduleAwsOrphanedKmsKeys(clusterName, region) {
438
+ try {
439
+ const { stdout } = await execa('aws', [
440
+ 'kms', 'list-keys',
441
+ '--region', region,
442
+ '--query', 'Keys[].KeyId',
443
+ '--output', 'json',
444
+ ]);
445
+ const keyIds = JSON.parse(stdout);
446
+ for (const keyId of keyIds) {
447
+ try {
448
+ const { stdout: meta } = await execa('aws', [
449
+ 'kms', 'describe-key',
450
+ '--key-id', keyId,
451
+ '--region', region,
452
+ '--query', 'KeyMetadata.{State:KeyState,Desc:Description,Manager:KeyManager}',
453
+ '--output', 'json',
454
+ ]);
455
+ const info = JSON.parse(meta);
456
+ if (info.Manager === 'CUSTOMER' &&
457
+ info.State === 'Enabled' &&
458
+ info.Desc.includes(clusterName)) {
459
+ await execa('aws', [
460
+ 'kms', 'schedule-key-deletion',
461
+ '--key-id', keyId,
462
+ '--pending-window-in-days', '7',
463
+ '--region', region,
464
+ ]);
465
+ }
466
+ }
467
+ catch { /* skip individual key */ }
468
+ }
469
+ }
470
+ catch { /* skip */ }
471
+ }
472
+ async function deleteAwsLaunchTemplates(clusterName, region) {
473
+ try {
474
+ const { stdout } = await execa('aws', [
475
+ 'ec2', 'describe-launch-templates',
476
+ '--filters', `Name=tag:Environment,Values=rulebricks`,
477
+ '--region', region,
478
+ '--query', 'LaunchTemplates[].LaunchTemplateId',
479
+ '--output', 'json',
480
+ ]);
481
+ const ids = JSON.parse(stdout);
482
+ for (const id of ids) {
483
+ try {
484
+ await execa('aws', [
485
+ 'ec2', 'delete-launch-template',
486
+ '--launch-template-id', id,
487
+ '--region', region,
488
+ ]);
489
+ }
490
+ catch { /* may not exist or in use */ }
491
+ }
492
+ }
493
+ catch { /* skip */ }
494
+ }
495
+ async function deleteAwsIamPolicy(policyName) {
496
+ try {
497
+ const { stdout } = await execa('aws', [
498
+ 'iam', 'list-policies',
499
+ '--query', `Policies[?PolicyName=='${policyName}']`,
500
+ '--output', 'json',
501
+ ]);
502
+ const policies = JSON.parse(stdout);
503
+ for (const policy of policies) {
504
+ try {
505
+ await execa('aws', ['iam', 'delete-policy', '--policy-arn', policy.Arn]);
506
+ }
507
+ catch { /* may have attachments or not exist */ }
508
+ }
509
+ }
510
+ catch { /* skip */ }
511
+ }
512
+ /**
513
+ * Comprehensive post-destroy cleanup of AWS resources that terraform leaves
514
+ * behind. Handles the full dependency chain in the correct order.
515
+ * Entirely best-effort: every step silently swallows errors.
516
+ */
517
+ async function cleanupAwsResources(clusterName, region) {
518
+ // Capture the OIDC issuer URL BEFORE deleting the cluster -- the URL uses a
519
+ // random cluster ID (not the cluster name) so we can't find it after deletion.
520
+ const oidcIssuerUrl = await getEksOidcIssuer(clusterName, region);
521
+ // 1. EKS node groups (must be deleted before cluster)
522
+ await deleteAwsEksNodeGroups(clusterName, region);
523
+ // 2. EKS cluster
524
+ await deleteAwsEksCluster(clusterName, region);
525
+ // 3. CloudWatch log group (now safe -- cluster is gone, won't be recreated)
526
+ await deleteAwsCloudWatchLogGroup(clusterName, region);
527
+ // 4. OIDC provider (matched by issuer URL captured above)
528
+ await deleteAwsOidcProvider(oidcIssuerUrl);
529
+ // 5. IAM roles created by terraform modules
530
+ await deleteAwsIamRole(`${clusterName}-ebs-csi`);
531
+ await deleteAwsIamRole(`${clusterName}-external-dns`);
532
+ await deleteAwsIamRole(`${clusterName}-vector`);
533
+ // 6. Customer-managed IAM policies
534
+ await deleteAwsIamPolicy(`${clusterName}-vector-s3`);
535
+ // 7. KMS key + alias (created by EKS module for envelope encryption)
536
+ await deleteAwsKmsAlias(clusterName, region);
537
+ // 8. KMS keys that lost their alias but are still Enabled (matched by description)
538
+ await scheduleAwsOrphanedKmsKeys(clusterName, region);
539
+ // 9. Launch templates (created by EKS managed node groups)
540
+ await deleteAwsLaunchTemplates(clusterName, region);
541
+ // 10. Elastic IPs (created by VPC module for NAT gateways, cost money if leaked)
542
+ await releaseAwsElasticIps(clusterName, region);
543
+ }
196
544
  /**
197
- * Destroys Terraform infrastructure with retry logic.
198
- * Runs init first to ensure .terraform folder exists (handles partial deployments).
199
- * After all attempts, sweeps orphaned cloud resources that Terraform doesn't manage.
545
+ * Destroys Terraform infrastructure, then sweeps remaining cloud resources.
546
+ *
547
+ * Flow:
548
+ * 1. terraform destroy (single attempt)
549
+ * 2. Cloud-native cleanup ALWAYS runs (terraform can report success while
550
+ * resources still exist)
551
+ * 3. If terraform reported failure, try once more now that blockers are gone
200
552
  */
201
553
  export async function terraformDestroy(deploymentName, cloudContext) {
202
554
  const workDir = getTerraformDir(deploymentName);
@@ -211,36 +563,41 @@ export async function terraformDestroy(deploymentName, cloudContext) {
211
563
  if (execaInitError.stdout || execaInitError.stderr) {
212
564
  await saveLogFile(workDir, 'destroy-init', execaInitError.stdout || '', execaInitError.stderr || '');
213
565
  }
214
- // Don't throw — continue to try destroy anyway
215
566
  }
216
- let lastError;
217
- for (let attempt = 1; attempt <= DESTROY_MAX_ATTEMPTS; attempt++) {
567
+ // First terraform destroy attempt
568
+ let firstAttemptFailed = false;
569
+ try {
570
+ await execa('terraform', ['destroy', '-auto-approve'], {
571
+ cwd: workDir
572
+ });
573
+ }
574
+ catch (error) {
575
+ firstAttemptFailed = true;
576
+ const execaError = error;
577
+ if (execaError.stdout || execaError.stderr) {
578
+ await saveLogFile(workDir, 'destroy', execaError.stdout || '', execaError.stderr || '');
579
+ }
580
+ }
581
+ // ALWAYS run cloud-native cleanup -- terraform can't be trusted to report
582
+ // accurately whether all resources were actually destroyed
583
+ if (cloudContext?.provider === 'aws') {
584
+ await cleanupAwsResources(cloudContext.clusterName, cloudContext.region);
585
+ }
586
+ // If terraform failed, try once more now that cloud-native cleanup removed blockers
587
+ if (firstAttemptFailed) {
218
588
  try {
219
589
  await execa('terraform', ['destroy', '-auto-approve'], {
220
590
  cwd: workDir
221
591
  });
222
- lastError = undefined;
223
- break;
224
592
  }
225
593
  catch (error) {
226
594
  const execaError = error;
227
595
  if (execaError.stdout || execaError.stderr) {
228
- await saveLogFile(workDir, `destroy-attempt-${attempt}`, execaError.stdout || '', execaError.stderr || '');
229
- }
230
- lastError = new Error(`Terraform destroy failed (attempt ${attempt}/${DESTROY_MAX_ATTEMPTS}):\n` +
231
- `${getErrorMessage(error, 'Unknown error')}\n\nLogs saved to: ${workDir}`);
232
- if (attempt < DESTROY_MAX_ATTEMPTS) {
233
- await new Promise((r) => setTimeout(r, DESTROY_RETRY_DELAY_MS));
596
+ await saveLogFile(workDir, 'destroy-final', execaError.stdout || '', execaError.stderr || '');
234
597
  }
598
+ throw new Error(`Terraform destroy failed:\n${getErrorMessage(error, 'Unknown error')}\n\nLogs saved to: ${workDir}`);
235
599
  }
236
600
  }
237
- // Best-effort cleanup of orphaned cloud resources regardless of destroy outcome
238
- if (cloudContext) {
239
- await cleanupOrphanedResources(cloudContext.provider, cloudContext.clusterName, cloudContext.region);
240
- }
241
- if (lastError) {
242
- throw lastError;
243
- }
244
601
  }
245
602
  /**
246
603
  * Gets Terraform outputs
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rulebricks/cli",
3
- "version": "2.1.3",
3
+ "version": "2.1.5",
4
4
  "description": "CLI for deploying and managing private Rulebricks instances",
5
5
  "type": "module",
6
6
  "bin": {