@rulebricks/cli 2.1.3 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,14 +24,19 @@ export declare function terraformPlan(deploymentName: string): Promise<void>;
24
24
  */
25
25
  export declare function terraformApply(deploymentName: string): Promise<void>;
26
26
  /**
27
- * Cleans up orphaned cloud resources that may linger after a failed deploy or
28
- * incomplete destroy. Best-effort: failures are logged but never thrown.
27
+ * Lightweight pre-deploy cleanup for the CloudWatch log group that the EKS module
28
+ * no longer manages (create_cloudwatch_log_group = false). Safe to call before
29
+ * terraform apply since it targets a resource outside terraform's control.
29
30
  */
30
31
  export declare function cleanupOrphanedResources(provider: CloudProvider, clusterName: string, region: string): Promise<void>;
31
32
  /**
32
- * Destroys Terraform infrastructure with retry logic.
33
- * Runs init first to ensure .terraform folder exists (handles partial deployments).
34
- * After all attempts, sweeps orphaned cloud resources that Terraform doesn't manage.
33
+ * Destroys Terraform infrastructure, then sweeps remaining cloud resources.
34
+ *
35
+ * Flow:
36
+ * 1. terraform destroy (single attempt)
37
+ * 2. Cloud-native cleanup ALWAYS runs (terraform can report success while
38
+ * resources still exist)
39
+ * 3. If terraform reported failure, try once more now that blockers are gone
35
40
  */
36
41
  export declare function terraformDestroy(deploymentName: string, cloudContext?: {
37
42
  provider: CloudProvider;
@@ -169,14 +169,12 @@ export async function terraformApply(deploymentName) {
169
169
  }
170
170
  }
171
171
  /**
172
- * Cleans up orphaned cloud resources that may linger after a failed deploy or
173
- * incomplete destroy. Best-effort: failures are logged but never thrown.
172
+ * Lightweight pre-deploy cleanup for the CloudWatch log group that the EKS module
173
+ * no longer manages (create_cloudwatch_log_group = false). Safe to call before
174
+ * terraform apply since it targets a resource outside terraform's control.
174
175
  */
175
176
  export async function cleanupOrphanedResources(provider, clusterName, region) {
176
177
  if (provider === 'aws') {
177
- // The EKS module (or AWS itself) creates /aws/eks/<cluster>/cluster.
178
- // Since we disabled Terraform management of this log group, we must
179
- // delete it ourselves to ensure a clean slate.
180
178
  const logGroupName = `/aws/eks/${clusterName}/cluster`;
181
179
  try {
182
180
  await execa('aws', [
@@ -189,14 +187,208 @@ export async function cleanupOrphanedResources(provider, clusterName, region) {
189
187
  // Log group may not exist — that's fine
190
188
  }
191
189
  }
192
- // GCP and Azure don't have an equivalent orphan problem today
193
190
  }
194
- const DESTROY_MAX_ATTEMPTS = 3;
195
- const DESTROY_RETRY_DELAY_MS = 5_000;
191
+ // ============================================================================
192
+ // Post-destroy cloud-native cleanup (AWS)
193
+ //
194
+ // Handles every uniquely-named resource that terraform tends to leave behind
195
+ // after a failed destroy or partial apply. Runs unconditionally after every
196
+ // terraform destroy since terraform can report success while resources linger.
197
+ // Every step is best-effort: failures are silently swallowed.
198
+ // ============================================================================
199
+ async function deleteAwsEksNodeGroups(clusterName, region) {
200
+ let nodeGroups;
201
+ try {
202
+ const { stdout } = await execa('aws', [
203
+ 'eks', 'list-nodegroups',
204
+ '--cluster-name', clusterName,
205
+ '--region', region,
206
+ '--output', 'json',
207
+ ]);
208
+ const parsed = JSON.parse(stdout);
209
+ nodeGroups = parsed.nodegroups ?? [];
210
+ }
211
+ catch {
212
+ return; // Cluster may not exist
213
+ }
214
+ for (const ng of nodeGroups) {
215
+ try {
216
+ await execa('aws', [
217
+ 'eks', 'delete-nodegroup',
218
+ '--cluster-name', clusterName,
219
+ '--nodegroup-name', ng,
220
+ '--region', region,
221
+ ]);
222
+ }
223
+ catch { /* already gone */ }
224
+ }
225
+ // Wait for all node groups to finish deleting
226
+ for (const ng of nodeGroups) {
227
+ try {
228
+ await execa('aws', [
229
+ 'eks', 'wait', 'nodegroup-deleted',
230
+ '--cluster-name', clusterName,
231
+ '--nodegroup-name', ng,
232
+ '--region', region,
233
+ ]);
234
+ }
235
+ catch { /* timeout or already gone */ }
236
+ }
237
+ }
238
+ async function deleteAwsEksCluster(clusterName, region) {
239
+ try {
240
+ await execa('aws', [
241
+ 'eks', 'delete-cluster',
242
+ '--name', clusterName,
243
+ '--region', region,
244
+ ]);
245
+ }
246
+ catch {
247
+ return; // Cluster may not exist
248
+ }
249
+ try {
250
+ await execa('aws', [
251
+ 'eks', 'wait', 'cluster-deleted',
252
+ '--name', clusterName,
253
+ '--region', region,
254
+ ]);
255
+ }
256
+ catch { /* timeout or already gone */ }
257
+ }
258
+ async function deleteAwsCloudWatchLogGroup(clusterName, region) {
259
+ try {
260
+ await execa('aws', [
261
+ 'logs', 'delete-log-group',
262
+ '--log-group-name', `/aws/eks/${clusterName}/cluster`,
263
+ '--region', region,
264
+ ]);
265
+ }
266
+ catch { /* may not exist */ }
267
+ }
268
+ async function deleteAwsOidcProvider(clusterName) {
269
+ let providerArns;
270
+ try {
271
+ const { stdout } = await execa('aws', [
272
+ 'iam', 'list-open-id-connect-providers',
273
+ '--output', 'json',
274
+ ]);
275
+ const parsed = JSON.parse(stdout);
276
+ providerArns = (parsed.OpenIDConnectProviderList ?? []).map((p) => p.Arn);
277
+ }
278
+ catch {
279
+ return;
280
+ }
281
+ for (const arn of providerArns) {
282
+ try {
283
+ const { stdout } = await execa('aws', [
284
+ 'iam', 'get-open-id-connect-provider',
285
+ '--open-id-connect-provider-arn', arn,
286
+ '--output', 'json',
287
+ ]);
288
+ const parsed = JSON.parse(stdout);
289
+ if (parsed.Url && parsed.Url.includes(clusterName)) {
290
+ await execa('aws', [
291
+ 'iam', 'delete-open-id-connect-provider',
292
+ '--open-id-connect-provider-arn', arn,
293
+ ]);
294
+ }
295
+ }
296
+ catch { /* skip */ }
297
+ }
298
+ }
299
+ async function deleteAwsIamRole(roleName) {
300
+ // Detach all managed policies
301
+ try {
302
+ const { stdout } = await execa('aws', [
303
+ 'iam', 'list-attached-role-policies',
304
+ '--role-name', roleName,
305
+ '--output', 'json',
306
+ ]);
307
+ const parsed = JSON.parse(stdout);
308
+ for (const policy of parsed.AttachedPolicies ?? []) {
309
+ try {
310
+ await execa('aws', [
311
+ 'iam', 'detach-role-policy',
312
+ '--role-name', roleName,
313
+ '--policy-arn', policy.PolicyArn,
314
+ ]);
315
+ }
316
+ catch { /* skip */ }
317
+ }
318
+ }
319
+ catch { /* role may not exist */ }
320
+ // Delete inline policies
321
+ try {
322
+ const { stdout } = await execa('aws', [
323
+ 'iam', 'list-role-policies',
324
+ '--role-name', roleName,
325
+ '--output', 'json',
326
+ ]);
327
+ const parsed = JSON.parse(stdout);
328
+ for (const policyName of parsed.PolicyNames ?? []) {
329
+ try {
330
+ await execa('aws', [
331
+ 'iam', 'delete-role-policy',
332
+ '--role-name', roleName,
333
+ '--policy-name', policyName,
334
+ ]);
335
+ }
336
+ catch { /* skip */ }
337
+ }
338
+ }
339
+ catch { /* role may not exist */ }
340
+ // Delete the role itself
341
+ try {
342
+ await execa('aws', ['iam', 'delete-role', '--role-name', roleName]);
343
+ }
344
+ catch { /* may not exist */ }
345
+ }
346
+ async function deleteAwsIamPolicy(policyName) {
347
+ try {
348
+ const { stdout } = await execa('aws', [
349
+ 'iam', 'list-policies',
350
+ '--query', `Policies[?PolicyName=='${policyName}']`,
351
+ '--output', 'json',
352
+ ]);
353
+ const policies = JSON.parse(stdout);
354
+ for (const policy of policies) {
355
+ try {
356
+ await execa('aws', ['iam', 'delete-policy', '--policy-arn', policy.Arn]);
357
+ }
358
+ catch { /* may have attachments or not exist */ }
359
+ }
360
+ }
361
+ catch { /* skip */ }
362
+ }
363
+ /**
364
+ * Comprehensive post-destroy cleanup of AWS resources that terraform leaves
365
+ * behind. Handles the full dependency chain in the correct order.
366
+ * Entirely best-effort: every step silently swallows errors.
367
+ */
368
+ async function cleanupAwsResources(clusterName, region) {
369
+ // 1. EKS node groups (must be deleted before cluster)
370
+ await deleteAwsEksNodeGroups(clusterName, region);
371
+ // 2. EKS cluster
372
+ await deleteAwsEksCluster(clusterName, region);
373
+ // 3. CloudWatch log group (now safe -- cluster is gone, won't be recreated)
374
+ await deleteAwsCloudWatchLogGroup(clusterName, region);
375
+ // 4. OIDC provider (created by EKS module for IRSA)
376
+ await deleteAwsOidcProvider(clusterName);
377
+ // 5. IAM roles created by terraform modules
378
+ await deleteAwsIamRole(`${clusterName}-ebs-csi`);
379
+ await deleteAwsIamRole(`${clusterName}-external-dns`);
380
+ await deleteAwsIamRole(`${clusterName}-vector`);
381
+ // 6. Customer-managed IAM policies
382
+ await deleteAwsIamPolicy(`${clusterName}-vector-s3`);
383
+ }
196
384
  /**
197
- * Destroys Terraform infrastructure with retry logic.
198
- * Runs init first to ensure .terraform folder exists (handles partial deployments).
199
- * After all attempts, sweeps orphaned cloud resources that Terraform doesn't manage.
385
+ * Destroys Terraform infrastructure, then sweeps remaining cloud resources.
386
+ *
387
+ * Flow:
388
+ * 1. terraform destroy (single attempt)
389
+ * 2. Cloud-native cleanup ALWAYS runs (terraform can report success while
390
+ * resources still exist)
391
+ * 3. If terraform reported failure, try once more now that blockers are gone
200
392
  */
201
393
  export async function terraformDestroy(deploymentName, cloudContext) {
202
394
  const workDir = getTerraformDir(deploymentName);
@@ -211,36 +403,41 @@ export async function terraformDestroy(deploymentName, cloudContext) {
211
403
  if (execaInitError.stdout || execaInitError.stderr) {
212
404
  await saveLogFile(workDir, 'destroy-init', execaInitError.stdout || '', execaInitError.stderr || '');
213
405
  }
214
- // Don't throw — continue to try destroy anyway
215
406
  }
216
- let lastError;
217
- for (let attempt = 1; attempt <= DESTROY_MAX_ATTEMPTS; attempt++) {
407
+ // First terraform destroy attempt
408
+ let firstAttemptFailed = false;
409
+ try {
410
+ await execa('terraform', ['destroy', '-auto-approve'], {
411
+ cwd: workDir
412
+ });
413
+ }
414
+ catch (error) {
415
+ firstAttemptFailed = true;
416
+ const execaError = error;
417
+ if (execaError.stdout || execaError.stderr) {
418
+ await saveLogFile(workDir, 'destroy', execaError.stdout || '', execaError.stderr || '');
419
+ }
420
+ }
421
+ // ALWAYS run cloud-native cleanup -- terraform can't be trusted to report
422
+ // accurately whether all resources were actually destroyed
423
+ if (cloudContext?.provider === 'aws') {
424
+ await cleanupAwsResources(cloudContext.clusterName, cloudContext.region);
425
+ }
426
+ // If terraform failed, try once more now that cloud-native cleanup removed blockers
427
+ if (firstAttemptFailed) {
218
428
  try {
219
429
  await execa('terraform', ['destroy', '-auto-approve'], {
220
430
  cwd: workDir
221
431
  });
222
- lastError = undefined;
223
- break;
224
432
  }
225
433
  catch (error) {
226
434
  const execaError = error;
227
435
  if (execaError.stdout || execaError.stderr) {
228
- await saveLogFile(workDir, `destroy-attempt-${attempt}`, execaError.stdout || '', execaError.stderr || '');
229
- }
230
- lastError = new Error(`Terraform destroy failed (attempt ${attempt}/${DESTROY_MAX_ATTEMPTS}):\n` +
231
- `${getErrorMessage(error, 'Unknown error')}\n\nLogs saved to: ${workDir}`);
232
- if (attempt < DESTROY_MAX_ATTEMPTS) {
233
- await new Promise((r) => setTimeout(r, DESTROY_RETRY_DELAY_MS));
436
+ await saveLogFile(workDir, 'destroy-final', execaError.stdout || '', execaError.stderr || '');
234
437
  }
438
+ throw new Error(`Terraform destroy failed:\n${getErrorMessage(error, 'Unknown error')}\n\nLogs saved to: ${workDir}`);
235
439
  }
236
440
  }
237
- // Best-effort cleanup of orphaned cloud resources regardless of destroy outcome
238
- if (cloudContext) {
239
- await cleanupOrphanedResources(cloudContext.provider, cloudContext.clusterName, cloudContext.region);
240
- }
241
- if (lastError) {
242
- throw lastError;
243
- }
244
441
  }
245
442
  /**
246
443
  * Gets Terraform outputs
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rulebricks/cli",
3
- "version": "2.1.3",
3
+ "version": "2.1.4",
4
4
  "description": "CLI for deploying and managing private Rulebricks instances",
5
5
  "type": "module",
6
6
  "bin": {