@rulebricks/cli 2.1.2 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ import { platform } from "os";
5
5
  import { BorderBox, Spinner, StatusLine, ThemeProvider, useTheme, Logo, } from "../components/common/index.js";
6
6
  import { DNSWaitScreen } from "../components/DNSWaitScreen.js";
7
7
  import { loadDeploymentConfig, loadDeploymentState, saveDeploymentState, updateDeploymentStatus, saveTerraformVars, } from "../lib/config.js";
8
- import { setupTerraformWorkspace, terraformInit, terraformPlan, terraformApply, terraformDestroy, updateKubeconfig, hasTerraformState, isTerraformInstalled, generateTerraformVars, } from "../lib/terraform.js";
8
+ import { setupTerraformWorkspace, terraformInit, terraformPlan, terraformApply, terraformDestroy, cleanupOrphanedResources, updateKubeconfig, hasTerraformState, isTerraformInstalled, generateTerraformVars, } from "../lib/terraform.js";
9
9
  import { checkGcpApplicationDefaultCredentials, checkAzureResourceProviders, checkAzureVmQuota, AZURE_TIER_CORES, } from "../lib/cloudCli.js";
10
10
  import { installOrUpgradeChart, upgradeChart, isHelmInstalled, } from "../lib/helm.js";
11
11
  import { isKubectlInstalled, checkClusterAccessible, } from "../lib/kubernetes.js";
@@ -32,7 +32,14 @@ function DeployCommandInner({ name, skipInfra, skipDns, version, }) {
32
32
  const handleCleanup = useCallback(async () => {
33
33
  setStep("cleanup-running");
34
34
  try {
35
- await terraformDestroy(name);
35
+ const cloudContext = config?.infrastructure.provider && config?.infrastructure.region
36
+ ? {
37
+ provider: config.infrastructure.provider,
38
+ clusterName: config.infrastructure.clusterName || `${name}-cluster`,
39
+ region: config.infrastructure.region,
40
+ }
41
+ : undefined;
42
+ await terraformDestroy(name, cloudContext);
36
43
  setStep("cleanup-complete");
37
44
  setTimeout(() => exit(), 3000);
38
45
  }
@@ -41,7 +48,7 @@ function DeployCommandInner({ name, skipInfra, skipDns, version, }) {
41
48
  setStep("cleanup-complete");
42
49
  setTimeout(() => exit(), 5000);
43
50
  }
44
- }, [name, exit]);
51
+ }, [name, config, exit]);
45
52
  const skipCleanup = useCallback(() => {
46
53
  setStep("error");
47
54
  }, []);
@@ -167,6 +174,11 @@ function DeployCommandInner({ name, skipInfra, skipDns, version, }) {
167
174
  await terraformInit(name);
168
175
  setStep("infra-plan");
169
176
  await terraformPlan(name);
177
+ // Clean up orphaned cloud resources from prior failed deployments
178
+ // (e.g. CloudWatch log groups that survived an incomplete destroy)
179
+ if (cfg.infrastructure.provider && cfg.infrastructure.region) {
180
+ await cleanupOrphanedResources(cfg.infrastructure.provider, cfg.infrastructure.clusterName || `${name}-cluster`, cfg.infrastructure.region);
181
+ }
170
182
  setStep("infra-apply");
171
183
  await terraformApply(name);
172
184
  setStatus((s) => ({ ...s, infrastructure: "success" }));
@@ -16,6 +16,7 @@ function DestroyCommandInner({ name, cluster, config, force, }) {
16
16
  const [scope, setScope] = useState(null);
17
17
  const [error, setError] = useState(null);
18
18
  const [confirmText, setConfirmText] = useState("");
19
+ const [infraError, setInfraError] = useState(null);
19
20
  const [status, setStatus] = useState({
20
21
  helm: "pending",
21
22
  pvc: "pending",
@@ -163,10 +164,18 @@ function DestroyCommandInner({ name, cluster, config, force, }) {
163
164
  if (cluster && deploymentScope.hasInfrastructure) {
164
165
  setStatus((s) => ({ ...s, infrastructure: "running" }));
165
166
  try {
166
- await terraformDestroy(name);
167
+ const cloudContext = cfg?.infrastructure.provider && cfg?.infrastructure.region
168
+ ? {
169
+ provider: cfg.infrastructure.provider,
170
+ clusterName: cfg.infrastructure.clusterName || `${name}-cluster`,
171
+ region: cfg.infrastructure.region,
172
+ }
173
+ : undefined;
174
+ await terraformDestroy(name, cloudContext);
167
175
  setStatus((s) => ({ ...s, infrastructure: "success" }));
168
176
  }
169
- catch {
177
+ catch (infraErr) {
178
+ setInfraError(infraErr instanceof Error ? infraErr.message : "Infrastructure destroy failed");
170
179
  setStatus((s) => ({ ...s, infrastructure: "error" }));
171
180
  }
172
181
  }
@@ -220,7 +229,9 @@ function DestroyCommandInner({ name, cluster, config, force, }) {
220
229
  const noClusterCleanup = status.helm === "skipped" &&
221
230
  status.pvc === "skipped" &&
222
231
  status.namespace === "skipped";
223
- return (_jsx(BorderBox, { title: "Destruction Complete", children: _jsxs(Box, { flexDirection: "column", marginY: 1, children: [_jsxs(Text, { color: colors.success, bold: true, children: ["\u2713 Deployment \"", name, "\" has been destroyed"] }), cleanedItems.length > 0 && (_jsxs(Box, { marginTop: 1, flexDirection: "column", children: [_jsx(Text, { color: colors.muted, children: "Cleaned up:" }), cleanedItems.map((item) => (_jsxs(Text, { color: colors.muted, children: [" ", "\u2022 ", item] }, item)))] })), noClusterCleanup && status.cleanup === "success" && (_jsx(Box, { marginTop: 1, children: _jsx(Text, { color: colors.muted, dimColor: true, children: "Note: No cluster resources found, only local files were cleaned up." }) })), status.cleanup === "skipped" && (_jsx(Box, { marginTop: 1, children: _jsxs(Text, { color: colors.muted, dimColor: true, children: ["Local configuration files preserved in ~/.rulebricks/deployments/", name, "/"] }) }))] }) }));
232
+ const hasInfraFailure = status.infrastructure === "error";
233
+ const title = hasInfraFailure ? "Destruction Partially Complete" : "Destruction Complete";
234
+ return (_jsx(BorderBox, { title: title, children: _jsxs(Box, { flexDirection: "column", marginY: 1, children: [hasInfraFailure ? (_jsxs(Text, { color: colors.warning, bold: true, children: ["\u26A0 Deployment \"", name, "\" was partially destroyed"] })) : (_jsxs(Text, { color: colors.success, bold: true, children: ["\u2713 Deployment \"", name, "\" has been destroyed"] })), cleanedItems.length > 0 && (_jsxs(Box, { marginTop: 1, flexDirection: "column", children: [_jsx(Text, { color: colors.muted, children: "Cleaned up:" }), cleanedItems.map((item) => (_jsxs(Text, { color: colors.muted, children: [" ", "\u2022 ", item] }, item)))] })), hasInfraFailure && (_jsxs(Box, { marginTop: 1, flexDirection: "column", children: [_jsx(Text, { color: colors.error, bold: true, children: "\u2717 Infrastructure destroy failed" }), _jsx(Text, { color: colors.error, children: infraError }), _jsx(Box, { marginTop: 1, children: _jsxs(Text, { color: colors.muted, children: ["Cloud resources may still exist. Run `rulebricks destroy ", name, " ", "--cluster` to retry."] }) })] })), noClusterCleanup && status.cleanup === "success" && !hasInfraFailure && (_jsx(Box, { marginTop: 1, children: _jsx(Text, { color: colors.muted, dimColor: true, children: "Note: No cluster resources found, only local files were cleaned up." }) })), status.cleanup === "skipped" && (_jsx(Box, { marginTop: 1, children: _jsxs(Text, { color: colors.muted, dimColor: true, children: ["Local configuration files preserved in ~/.rulebricks/deployments/", name, "/"] }) }))] }) }));
224
235
  }
225
236
  // Destroying screen
226
237
  if (step === "destroying") {
@@ -24,10 +24,25 @@ export declare function terraformPlan(deploymentName: string): Promise<void>;
24
24
  */
25
25
  export declare function terraformApply(deploymentName: string): Promise<void>;
26
26
  /**
27
- * Destroys Terraform infrastructure.
28
- * Runs init first to ensure .terraform folder exists (handles partial deployments).
27
+ * Lightweight pre-deploy cleanup for the CloudWatch log group that the EKS module
28
+ * no longer manages (create_cloudwatch_log_group = false). Safe to call before
29
+ * terraform apply since it targets a resource outside terraform's control.
29
30
  */
30
- export declare function terraformDestroy(deploymentName: string): Promise<void>;
31
+ export declare function cleanupOrphanedResources(provider: CloudProvider, clusterName: string, region: string): Promise<void>;
32
+ /**
33
+ * Destroys Terraform infrastructure, then sweeps remaining cloud resources.
34
+ *
35
+ * Flow:
36
+ * 1. terraform destroy (single attempt)
37
+ * 2. Cloud-native cleanup ALWAYS runs (terraform can report success while
38
+ * resources still exist)
39
+ * 3. If terraform reported failure, try once more now that blockers are gone
40
+ */
41
+ export declare function terraformDestroy(deploymentName: string, cloudContext?: {
42
+ provider: CloudProvider;
43
+ clusterName: string;
44
+ region: string;
45
+ }): Promise<void>;
31
46
  /**
32
47
  * Gets Terraform outputs
33
48
  */
@@ -169,37 +169,274 @@ export async function terraformApply(deploymentName) {
169
169
  }
170
170
  }
171
171
  /**
172
- * Destroys Terraform infrastructure.
173
- * Runs init first to ensure .terraform folder exists (handles partial deployments).
172
+ * Lightweight pre-deploy cleanup for the CloudWatch log group that the EKS module
173
+ * no longer manages (create_cloudwatch_log_group = false). Safe to call before
174
+ * terraform apply since it targets a resource outside terraform's control.
174
175
  */
175
- export async function terraformDestroy(deploymentName) {
176
- const workDir = getTerraformDir(deploymentName);
176
+ export async function cleanupOrphanedResources(provider, clusterName, region) {
177
+ if (provider === 'aws') {
178
+ const logGroupName = `/aws/eks/${clusterName}/cluster`;
179
+ try {
180
+ await execa('aws', [
181
+ 'logs', 'delete-log-group',
182
+ '--log-group-name', logGroupName,
183
+ '--region', region,
184
+ ]);
185
+ }
186
+ catch {
187
+ // Log group may not exist — that's fine
188
+ }
189
+ }
190
+ }
191
+ // ============================================================================
192
+ // Post-destroy cloud-native cleanup (AWS)
193
+ //
194
+ // Handles every uniquely-named resource that terraform tends to leave behind
195
+ // after a failed destroy or partial apply. Runs unconditionally after every
196
+ // terraform destroy since terraform can report success while resources linger.
197
+ // Every step is best-effort: failures are silently swallowed.
198
+ // ============================================================================
199
+ async function deleteAwsEksNodeGroups(clusterName, region) {
200
+ let nodeGroups;
177
201
  try {
178
- // Run init first to ensure terraform is ready (handles partial deployments
179
- // where .terraform folder might be missing or corrupted)
202
+ const { stdout } = await execa('aws', [
203
+ 'eks', 'list-nodegroups',
204
+ '--cluster-name', clusterName,
205
+ '--region', region,
206
+ '--output', 'json',
207
+ ]);
208
+ const parsed = JSON.parse(stdout);
209
+ nodeGroups = parsed.nodegroups ?? [];
210
+ }
211
+ catch {
212
+ return; // Cluster may not exist
213
+ }
214
+ for (const ng of nodeGroups) {
180
215
  try {
181
- await execa('terraform', ['init', '-upgrade'], {
182
- cwd: workDir
183
- });
216
+ await execa('aws', [
217
+ 'eks', 'delete-nodegroup',
218
+ '--cluster-name', clusterName,
219
+ '--nodegroup-name', ng,
220
+ '--region', region,
221
+ ]);
184
222
  }
185
- catch (initError) {
186
- // If init fails, still try destroy - it might work if state exists
187
- const execaInitError = initError;
188
- if (execaInitError.stdout || execaInitError.stderr) {
189
- await saveLogFile(workDir, 'destroy-init', execaInitError.stdout || '', execaInitError.stderr || '');
223
+ catch { /* already gone */ }
224
+ }
225
+ // Wait for all node groups to finish deleting
226
+ for (const ng of nodeGroups) {
227
+ try {
228
+ await execa('aws', [
229
+ 'eks', 'wait', 'nodegroup-deleted',
230
+ '--cluster-name', clusterName,
231
+ '--nodegroup-name', ng,
232
+ '--region', region,
233
+ ]);
234
+ }
235
+ catch { /* timeout or already gone */ }
236
+ }
237
+ }
238
+ async function deleteAwsEksCluster(clusterName, region) {
239
+ try {
240
+ await execa('aws', [
241
+ 'eks', 'delete-cluster',
242
+ '--name', clusterName,
243
+ '--region', region,
244
+ ]);
245
+ }
246
+ catch {
247
+ return; // Cluster may not exist
248
+ }
249
+ try {
250
+ await execa('aws', [
251
+ 'eks', 'wait', 'cluster-deleted',
252
+ '--name', clusterName,
253
+ '--region', region,
254
+ ]);
255
+ }
256
+ catch { /* timeout or already gone */ }
257
+ }
258
+ async function deleteAwsCloudWatchLogGroup(clusterName, region) {
259
+ try {
260
+ await execa('aws', [
261
+ 'logs', 'delete-log-group',
262
+ '--log-group-name', `/aws/eks/${clusterName}/cluster`,
263
+ '--region', region,
264
+ ]);
265
+ }
266
+ catch { /* may not exist */ }
267
+ }
268
+ async function deleteAwsOidcProvider(clusterName) {
269
+ let providerArns;
270
+ try {
271
+ const { stdout } = await execa('aws', [
272
+ 'iam', 'list-open-id-connect-providers',
273
+ '--output', 'json',
274
+ ]);
275
+ const parsed = JSON.parse(stdout);
276
+ providerArns = (parsed.OpenIDConnectProviderList ?? []).map((p) => p.Arn);
277
+ }
278
+ catch {
279
+ return;
280
+ }
281
+ for (const arn of providerArns) {
282
+ try {
283
+ const { stdout } = await execa('aws', [
284
+ 'iam', 'get-open-id-connect-provider',
285
+ '--open-id-connect-provider-arn', arn,
286
+ '--output', 'json',
287
+ ]);
288
+ const parsed = JSON.parse(stdout);
289
+ if (parsed.Url && parsed.Url.includes(clusterName)) {
290
+ await execa('aws', [
291
+ 'iam', 'delete-open-id-connect-provider',
292
+ '--open-id-connect-provider-arn', arn,
293
+ ]);
190
294
  }
191
- // Don't throw - continue to try destroy anyway
192
295
  }
296
+ catch { /* skip */ }
297
+ }
298
+ }
299
+ async function deleteAwsIamRole(roleName) {
300
+ // Detach all managed policies
301
+ try {
302
+ const { stdout } = await execa('aws', [
303
+ 'iam', 'list-attached-role-policies',
304
+ '--role-name', roleName,
305
+ '--output', 'json',
306
+ ]);
307
+ const parsed = JSON.parse(stdout);
308
+ for (const policy of parsed.AttachedPolicies ?? []) {
309
+ try {
310
+ await execa('aws', [
311
+ 'iam', 'detach-role-policy',
312
+ '--role-name', roleName,
313
+ '--policy-arn', policy.PolicyArn,
314
+ ]);
315
+ }
316
+ catch { /* skip */ }
317
+ }
318
+ }
319
+ catch { /* role may not exist */ }
320
+ // Delete inline policies
321
+ try {
322
+ const { stdout } = await execa('aws', [
323
+ 'iam', 'list-role-policies',
324
+ '--role-name', roleName,
325
+ '--output', 'json',
326
+ ]);
327
+ const parsed = JSON.parse(stdout);
328
+ for (const policyName of parsed.PolicyNames ?? []) {
329
+ try {
330
+ await execa('aws', [
331
+ 'iam', 'delete-role-policy',
332
+ '--role-name', roleName,
333
+ '--policy-name', policyName,
334
+ ]);
335
+ }
336
+ catch { /* skip */ }
337
+ }
338
+ }
339
+ catch { /* role may not exist */ }
340
+ // Delete the role itself
341
+ try {
342
+ await execa('aws', ['iam', 'delete-role', '--role-name', roleName]);
343
+ }
344
+ catch { /* may not exist */ }
345
+ }
346
+ async function deleteAwsIamPolicy(policyName) {
347
+ try {
348
+ const { stdout } = await execa('aws', [
349
+ 'iam', 'list-policies',
350
+ '--query', `Policies[?PolicyName=='${policyName}']`,
351
+ '--output', 'json',
352
+ ]);
353
+ const policies = JSON.parse(stdout);
354
+ for (const policy of policies) {
355
+ try {
356
+ await execa('aws', ['iam', 'delete-policy', '--policy-arn', policy.Arn]);
357
+ }
358
+ catch { /* may have attachments or not exist */ }
359
+ }
360
+ }
361
+ catch { /* skip */ }
362
+ }
363
+ /**
364
+ * Comprehensive post-destroy cleanup of AWS resources that terraform leaves
365
+ * behind. Handles the full dependency chain in the correct order.
366
+ * Entirely best-effort: every step silently swallows errors.
367
+ */
368
+ async function cleanupAwsResources(clusterName, region) {
369
+ // 1. EKS node groups (must be deleted before cluster)
370
+ await deleteAwsEksNodeGroups(clusterName, region);
371
+ // 2. EKS cluster
372
+ await deleteAwsEksCluster(clusterName, region);
373
+ // 3. CloudWatch log group (now safe -- cluster is gone, won't be recreated)
374
+ await deleteAwsCloudWatchLogGroup(clusterName, region);
375
+ // 4. OIDC provider (created by EKS module for IRSA)
376
+ await deleteAwsOidcProvider(clusterName);
377
+ // 5. IAM roles created by terraform modules
378
+ await deleteAwsIamRole(`${clusterName}-ebs-csi`);
379
+ await deleteAwsIamRole(`${clusterName}-external-dns`);
380
+ await deleteAwsIamRole(`${clusterName}-vector`);
381
+ // 6. Customer-managed IAM policies
382
+ await deleteAwsIamPolicy(`${clusterName}-vector-s3`);
383
+ }
384
+ /**
385
+ * Destroys Terraform infrastructure, then sweeps remaining cloud resources.
386
+ *
387
+ * Flow:
388
+ * 1. terraform destroy (single attempt)
389
+ * 2. Cloud-native cleanup ALWAYS runs (terraform can report success while
390
+ * resources still exist)
391
+ * 3. If terraform reported failure, try once more now that blockers are gone
392
+ */
393
+ export async function terraformDestroy(deploymentName, cloudContext) {
394
+ const workDir = getTerraformDir(deploymentName);
395
+ // Run init first to ensure terraform is ready
396
+ try {
397
+ await execa('terraform', ['init', '-upgrade'], {
398
+ cwd: workDir
399
+ });
400
+ }
401
+ catch (initError) {
402
+ const execaInitError = initError;
403
+ if (execaInitError.stdout || execaInitError.stderr) {
404
+ await saveLogFile(workDir, 'destroy-init', execaInitError.stdout || '', execaInitError.stderr || '');
405
+ }
406
+ }
407
+ // First terraform destroy attempt
408
+ let firstAttemptFailed = false;
409
+ try {
193
410
  await execa('terraform', ['destroy', '-auto-approve'], {
194
411
  cwd: workDir
195
412
  });
196
413
  }
197
414
  catch (error) {
415
+ firstAttemptFailed = true;
198
416
  const execaError = error;
199
417
  if (execaError.stdout || execaError.stderr) {
200
418
  await saveLogFile(workDir, 'destroy', execaError.stdout || '', execaError.stderr || '');
201
419
  }
202
- throw new Error(`Terraform destroy failed:\n${getErrorMessage(error, 'Unknown error')}\n\nLogs saved to: ${workDir}`);
420
+ }
421
+ // ALWAYS run cloud-native cleanup -- terraform can't be trusted to report
422
+ // accurately whether all resources were actually destroyed
423
+ if (cloudContext?.provider === 'aws') {
424
+ await cleanupAwsResources(cloudContext.clusterName, cloudContext.region);
425
+ }
426
+ // If terraform failed, try once more now that cloud-native cleanup removed blockers
427
+ if (firstAttemptFailed) {
428
+ try {
429
+ await execa('terraform', ['destroy', '-auto-approve'], {
430
+ cwd: workDir
431
+ });
432
+ }
433
+ catch (error) {
434
+ const execaError = error;
435
+ if (execaError.stdout || execaError.stderr) {
436
+ await saveLogFile(workDir, 'destroy-final', execaError.stdout || '', execaError.stderr || '');
437
+ }
438
+ throw new Error(`Terraform destroy failed:\n${getErrorMessage(error, 'Unknown error')}\n\nLogs saved to: ${workDir}`);
439
+ }
203
440
  }
204
441
  }
205
442
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rulebricks/cli",
3
- "version": "2.1.2",
3
+ "version": "2.1.4",
4
4
  "description": "CLI for deploying and managing private Rulebricks instances",
5
5
  "type": "module",
6
6
  "bin": {
@@ -142,6 +142,12 @@ module "eks" {
142
142
  cluster_endpoint_public_access = true
143
143
  cluster_endpoint_private_access = true
144
144
 
145
+ # Disable Terraform-managed CloudWatch log group to prevent
146
+ # ResourceAlreadyExistsException on re-deploy after partial failures.
147
+ # AWS creates the log group automatically if control-plane logging is enabled.
148
+ create_cloudwatch_log_group = false
149
+ cluster_enabled_log_types = []
150
+
145
151
  vpc_id = module.vpc.vpc_id
146
152
  subnet_ids = module.vpc.private_subnets
147
153