npm - @rulebricks/cli - Versions diffs - 2.1.5 → 2.1.7 - Mend

@rulebricks/cli 2.1.5 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +26 -0
package/cluster-setup/aws/README.md +74 -0
package/cluster-setup/aws/check-aws-access.sh +78 -0
package/cluster-setup/aws/cluster.yaml +33 -0
package/cluster-setup/azure/README.md +93 -0
package/cluster-setup/azure/check-aks-prereqs.sh +96 -0
package/cluster-setup/azure/main.bicep +282 -0
package/cluster-setup/azure/main.parameters.json +21 -0
package/cluster-setup/gcp/README.md +172 -0
package/cluster-setup/gcp/check-gke-prereqs.sh +98 -0
package/dist/commands/deploy.js +31 -4
package/dist/commands/init.js +9 -2
package/dist/components/Wizard/WizardContext.d.ts +27 -3
package/dist/components/Wizard/WizardContext.js +95 -2
package/dist/components/Wizard/steps/CloudProviderStep.js +7 -2
package/dist/components/Wizard/steps/FeatureConfigStep.js +407 -10
package/dist/components/Wizard/steps/ReviewStep.js +7 -2
package/dist/lib/helmValues.js +227 -22
package/dist/lib/kubernetes.d.ts +31 -1
package/dist/lib/kubernetes.js +157 -5
package/dist/types/index.d.ts +368 -7
package/dist/types/index.js +47 -2
package/package.json +2 -1

package/dist/components/Wizard/steps/ReviewStep.js CHANGED Viewed

@@ -43,7 +43,12 @@ export function ReviewStep({ onComplete, onBack }) {
         dispatch({ type: 'SET_NAME', name });
         setEditingName(false);
     };
-    const tierConfig = state.tier ? TIER_CONFIGS[state.tier] : null;
+    const tierConfig = state.infrastructureMode !== 'existing' && state.tier ? TIER_CONFIGS[state.tier] : null;
+    const tierLabel = state.infrastructureMode === 'existing'
+        ? 'Inferred from cluster'
+        : state.tier
+            ? `${state.tier.charAt(0).toUpperCase()}${state.tier.slice(1)}`
+            : 'Not selected';
     const externalDnsEnabled = state.dnsAutoManage && isSupportedDnsProvider(state.dnsProvider);
     if (editingName) {
         return (_jsx(BorderBox, { title: "Deployment Name", children: _jsxs(Box, { flexDirection: "column", marginY: 1, children: [_jsx(Text, { children: "Enter a name for this deployment:" }), _jsx(Text, { color: "gray", dimColor: true, children: "Lowercase letters, numbers, and hyphens only" }), _jsx(Box, { marginTop: 1, children: _jsx(TextInput, { value: name, onChange: setName, onSubmit: handleNameSubmit, placeholder: "my-deployment" }) }), error && (_jsx(Box, { marginTop: 1, children: _jsxs(Text, { color: "red", children: ["\u2717 ", error] }) }))] }) }));
@@ -52,5 +57,5 @@ export function ReviewStep({ onComplete, onBack }) {
     const ConfigRow = ({ label, value, valueColor }) => (_jsxs(Box, { children: [_jsx(Box, { width: 16, children: _jsx(Text, { color: colors.muted, children: label }) }), _jsx(Text, { color: valueColor || colors.accent, children: value })] }));
     // Helper to render a section header
     const SectionHeader = ({ title }) => (_jsx(Box, { marginTop: 1, children: _jsxs(Text, { bold: true, color: colors.accent, children: ["\u2500\u2500 ", title, " \u2500\u2500"] }) }));
-    return (_jsxs(BorderBox, { title: "Review Configuration", children: [_jsxs(Box, { flexDirection: "column", children: [_jsx(SectionHeader, { title: "Deployment" }), _jsx(ConfigRow, { label: "Name", value: state.name }), state.appVersion && (_jsx(ConfigRow, { label: "App Version", value: state.appVersion })), _jsx(SectionHeader, { title: "Infrastructure" }), _jsx(ConfigRow, { label: "Mode", value: state.infrastructureMode === 'provision' ? 'Provision new cluster' : 'Use existing cluster' }), state.provider && (_jsx(ConfigRow, { label: "Provider", value: state.provider.toUpperCase() })), state.region && (_jsx(ConfigRow, { label: "Region", value: state.region })), _jsx(SectionHeader, { title: "Domain & DNS" }), _jsx(ConfigRow, { label: "Domain", value: state.domain }), _jsx(ConfigRow, { label: "Admin Email", value: state.adminEmail }), _jsx(ConfigRow, { label: "TLS Email", value: state.tlsEmail }), _jsxs(Box, { children: [_jsx(Box, { width: 16, children: _jsx(Text, { color: colors.muted, children: "DNS" }) }), _jsx(Text, { color: colors.accent, children: DNS_PROVIDER_NAMES[state.dnsProvider] }), externalDnsEnabled && _jsx(Text, { color: colors.success, children: " (auto)" })] }), _jsx(SectionHeader, { title: "SMTP" }), _jsx(ConfigRow, { label: "Host", value: `${state.smtpHost}:${state.smtpPort}` }), _jsx(ConfigRow, { label: "From", value: `${state.smtpFromName} <${state.smtpFrom}>` }), _jsx(SectionHeader, { title: "Database" }), _jsx(ConfigRow, { label: "Type", value: state.databaseType === 'supabase-cloud' ? 'Supabase Cloud' : 'Self-hosted' }), _jsx(SectionHeader, { title: "Performance" }), _jsxs(Box, { children: [_jsx(Box, { width: 16, children: _jsx(Text, { color: colors.muted, children: "Tier" }) }), _jsxs(Text, { color: colors.accent, bold: true, children: [state.tier?.charAt(0).toUpperCase(), state.tier?.slice(1)] }), tierConfig && _jsxs(Text, { color: colors.muted, children: [" (", tierConfig.throughput, ")"] })] }), _jsx(SectionHeader, { title: "Features" }), _jsxs(Box, { children: [_jsxs(Text, { color: state.aiEnabled ? colors.success : colors.muted, children: [state.aiEnabled ? '✓' : '○', " AI"] }), _jsx(Text, { children: "  " }), _jsxs(Text, { color: state.ssoEnabled ? colors.success : colors.muted, children: [state.ssoEnabled ? '✓' : '○', " SSO"] }), _jsx(Text, { children: "  " }), _jsxs(Text, { color: state.monitoringEnabled ? colors.success : colors.muted, children: [state.monitoringEnabled ? '✓' : '○', " Monitoring"] }), _jsx(Text, { children: "  " }), _jsxs(Text, { color: state.loggingSink !== 'console' ? colors.success : colors.muted, children: [state.loggingSink !== 'console' ? '✓' : '○', " Logging"] })] }), _jsx(SectionHeader, { title: "License" }), _jsx(ConfigRow, { label: "Key", value: `${state.licenseKey?.substring(0, 12)}...` })] }), _jsxs(Box, { marginTop: 1, flexDirection: "column", children: [_jsx(Text, { color: colors.success, bold: true, children: "Press Enter to save this configuration" }), _jsx(Text, { color: colors.muted, dimColor: true, children: "e to edit name \u2022 Esc to go back" })] })] }));
+    return (_jsxs(BorderBox, { title: "Review Configuration", children: [_jsxs(Box, { flexDirection: "column", children: [_jsx(SectionHeader, { title: "Deployment" }), _jsx(ConfigRow, { label: "Name", value: state.name }), state.appVersion && (_jsx(ConfigRow, { label: "App Version", value: state.appVersion })), _jsx(SectionHeader, { title: "Infrastructure" }), _jsx(ConfigRow, { label: "Mode", value: state.infrastructureMode === 'provision' ? 'Provision new cluster' : 'Use existing cluster' }), state.provider && (_jsx(ConfigRow, { label: "Provider", value: state.provider.toUpperCase() })), state.region && (_jsx(ConfigRow, { label: "Region", value: state.region })), _jsx(SectionHeader, { title: "Domain & DNS" }), _jsx(ConfigRow, { label: "Domain", value: state.domain }), _jsx(ConfigRow, { label: "Admin Email", value: state.adminEmail }), _jsx(ConfigRow, { label: "TLS Email", value: state.tlsEmail }), _jsxs(Box, { children: [_jsx(Box, { width: 16, children: _jsx(Text, { color: colors.muted, children: "DNS" }) }), _jsx(Text, { color: colors.accent, children: DNS_PROVIDER_NAMES[state.dnsProvider] }), externalDnsEnabled && _jsx(Text, { color: colors.success, children: " (auto)" })] }), _jsx(SectionHeader, { title: "SMTP" }), _jsx(ConfigRow, { label: "Host", value: `${state.smtpHost}:${state.smtpPort}` }), _jsx(ConfigRow, { label: "From", value: `${state.smtpFromName} <${state.smtpFrom}>` }), _jsx(SectionHeader, { title: "Database" }), _jsx(ConfigRow, { label: "Type", value: state.databaseType === 'supabase-cloud' ? 'Supabase Cloud' : 'Self-hosted' }), _jsx(SectionHeader, { title: "Performance" }), _jsxs(Box, { children: [_jsx(Box, { width: 16, children: _jsx(Text, { color: colors.muted, children: "Tier" }) }), _jsx(Text, { color: colors.accent, bold: true, children: tierLabel }), tierConfig && _jsxs(Text, { color: colors.muted, children: [" (", tierConfig.throughput, ")"] }), state.infrastructureMode === 'existing' && (_jsx(Text, { color: colors.muted, children: " (used for app sizing)" }))] }), _jsx(SectionHeader, { title: "Features" }), _jsxs(Box, { children: [_jsxs(Text, { color: state.aiEnabled ? colors.success : colors.muted, children: [state.aiEnabled ? '✓' : '○', " AI"] }), _jsx(Text, { children: "  " }), _jsxs(Text, { color: state.ssoEnabled ? colors.success : colors.muted, children: [state.ssoEnabled ? '✓' : '○', " SSO"] }), _jsx(Text, { children: "  " }), _jsxs(Text, { color: state.monitoringEnabled ? colors.success : colors.muted, children: [state.monitoringEnabled ? '✓' : '○', " Monitoring"] }), _jsx(Text, { children: "  " }), _jsxs(Text, { color: state.loggingSink !== 'console' ? colors.success : colors.muted, children: [state.loggingSink !== 'console' ? '✓' : '○', " Logging"] })] }), _jsx(SectionHeader, { title: "License" }), _jsx(ConfigRow, { label: "Key", value: `${state.licenseKey?.substring(0, 12)}...` })] }), _jsxs(Box, { marginTop: 1, flexDirection: "column", children: [_jsx(Text, { color: colors.success, bold: true, children: "Press Enter to save this configuration" }), _jsx(Text, { color: colors.muted, dimColor: true, children: "e to edit name \u2022 Esc to go back" })] })] }));
 }

package/dist/lib/helmValues.js CHANGED Viewed

@@ -36,17 +36,39 @@ function generateVectorSinks(config) {
                 };
                 break;
             case "azure-blob":
-                sinks.azure_blob = {
+                if (!bucket) {
+                    throw new Error("Azure Blob logging requires a storage account.");
+                }
+                const azureBlobSink = {
                     type: "azure_blob",
                     inputs: ["kafka"],
-                    container_name: bucket,
-                    storage_account: "rulebrickslogs", // Will be configured via env var
+                    account_name: bucket,
+                    container_name: config.features.logging.azureBlobContainer || "rulebricks-logs",
                     blob_prefix: "rulebricks/logs/%Y/%m/%d/",
                     compression: "gzip",
                     encoding: {
                         codec: "json",
                     },
                 };
+                if (config.features.logging.cloudAuthMode === "secret") {
+                    if (!config.features.logging.azureBlobConnectionStringSecretRef) {
+                        throw new Error("Azure Blob connection string auth requires a secret ref.");
+                    }
+                    azureBlobSink.connection_string = "${AZURE_STORAGE_CONNECTION_STRING}";
+                }
+                else {
+                    if (!config.features.logging.azureBlobClientId ||
+                        !config.features.logging.azureBlobTenantId) {
+                        throw new Error("Azure Blob workload identity requires client ID and tenant ID.");
+                    }
+                    azureBlobSink.auth = {
+                        azure_credential_kind: "workload_identity",
+                        client_id: config.features.logging.azureBlobClientId,
+                        tenant_id: config.features.logging.azureBlobTenantId,
+                        token_file_path: "/var/run/secrets/azure/tokens/azure-identity-token",
+                    };
+                }
+                sinks.azure_blob = azureBlobSink;
                 break;
             case "gcs":
                 sinks.gcs = {
@@ -163,6 +185,61 @@ function generateVectorSinks(config) {
     }
     return sinks;
 }
+function generateVectorEnv(config) {
+    const env = [
+        {
+            name: "KAFKA_BOOTSTRAP_SERVERS",
+            valueFrom: {
+                configMapKeyRef: {
+                    name: "vector-kafka-env",
+                    key: "KAFKA_BOOTSTRAP_SERVERS",
+                },
+            },
+        },
+    ];
+    const azureBlobSecretRef = config.features.logging.azureBlobConnectionStringSecretRef;
+    if (config.features.logging.sink === "azure-blob" &&
+        config.features.logging.cloudAuthMode === "secret" &&
+        azureBlobSecretRef) {
+        env.push({
+            name: "AZURE_STORAGE_CONNECTION_STRING",
+            valueFrom: {
+                secretKeyRef: secretKeySelector(azureBlobSecretRef),
+            },
+        });
+    }
+    return env;
+}
+function generateVectorServiceAccount(config) {
+    const annotations = {};
+    if (config.features.logging.sink === "s3" && config.features.logging.awsIamRoleArn) {
+        annotations["eks.amazonaws.com/role-arn"] =
+            config.features.logging.awsIamRoleArn;
+    }
+    if (config.features.logging.sink === "azure-blob" &&
+        config.features.logging.cloudAuthMode !== "secret" &&
+        config.features.logging.azureBlobClientId) {
+        annotations["azure.workload.identity/client-id"] =
+            config.features.logging.azureBlobClientId;
+    }
+    if (config.features.logging.sink === "gcs" && config.features.logging.gcpServiceAccountEmail) {
+        annotations["iam.gke.io/gcp-service-account"] =
+            config.features.logging.gcpServiceAccountEmail;
+    }
+    return {
+        create: true,
+        name: "vector",
+        annotations,
+    };
+}
+function generateVectorPodLabels(config) {
+    const labels = {};
+    if (config.features.logging.sink === "azure-blob" &&
+        config.features.logging.cloudAuthMode !== "secret") {
+        labels["azure.workload.identity/use"] = "true";
+    }
+    return labels;
+}
 /**
  * Maps DNS provider to external-dns provider name
  */
@@ -175,6 +252,145 @@ function getExternalDnsProvider(dnsProvider) {
     };
     return mapping[dnsProvider] || "aws";
 }
+function secretKeySelector(ref) {
+    return {
+        name: ref.name,
+        key: ref.key,
+    };
+}
+function generateRemoteWriteSpec(config) {
+    if (config.features.monitoring.destination === "local-grafana") {
+        return [];
+    }
+    const remoteWrite = config.features.monitoring.remoteWrite;
+    if (!remoteWrite) {
+        return config.features.monitoring.remoteWriteUrl
+            ? [{ url: config.features.monitoring.remoteWriteUrl }]
+            : [];
+    }
+    const base = {
+        url: remoteWrite.url,
+    };
+    switch (remoteWrite.destination) {
+        case "aws-amp":
+            if (!remoteWrite.awsRegion) {
+                throw new Error("AWS Managed Prometheus remote_write requires a region.");
+            }
+            return [
+                {
+                    ...base,
+                    sigv4: {
+                        region: remoteWrite.awsRegion,
+                    },
+                },
+            ];
+        case "azure-monitor":
+            return [generateAzureMonitorRemoteWrite(remoteWrite, base)];
+        case "grafana-cloud":
+            return [generateBasicAuthRemoteWrite(remoteWrite, base)];
+        case "generic":
+            return [generateGenericRemoteWrite(remoteWrite, base)];
+        default:
+            return [base];
+    }
+}
+function generatePrometheusServiceAccount(config) {
+    const annotations = {};
+    const remoteWrite = config.features.monitoring.remoteWrite;
+    if (remoteWrite?.destination === "aws-amp" && remoteWrite.awsRoleArn) {
+        annotations["eks.amazonaws.com/role-arn"] = remoteWrite.awsRoleArn;
+    }
+    if (remoteWrite?.destination === "azure-monitor" &&
+        remoteWrite.authType === "workload-identity" &&
+        remoteWrite.clientId) {
+        annotations["azure.workload.identity/client-id"] = remoteWrite.clientId;
+    }
+    return {
+        create: true,
+        name: "prometheus",
+        annotations,
+    };
+}
+function generatePrometheusPodMetadata(config) {
+    const remoteWrite = config.features.monitoring.remoteWrite;
+    if (remoteWrite?.destination === "azure-monitor" &&
+        remoteWrite.authType === "workload-identity") {
+        return {
+            labels: {
+                "azure.workload.identity/use": "true",
+            },
+        };
+    }
+    return {};
+}
+function generateAzureMonitorRemoteWrite(remoteWrite, base) {
+    const azureAd = {
+        cloud: remoteWrite.azureCloud || "AzurePublic",
+    };
+    if (remoteWrite.authType === "oauth") {
+        if (!remoteWrite.clientId ||
+            !remoteWrite.tenantId ||
+            !remoteWrite.clientSecretRef) {
+            throw new Error("Azure Monitor remote_write OAuth requires client ID, tenant ID, and client secret ref.");
+        }
+        azureAd.oauth = {
+            clientId: remoteWrite.clientId,
+            tenantId: remoteWrite.tenantId,
+            clientSecret: secretKeySelector(remoteWrite.clientSecretRef),
+        };
+    }
+    else if (remoteWrite.authType === "workload-identity") {
+        if (!remoteWrite.clientId || !remoteWrite.tenantId) {
+            throw new Error("Azure Monitor remote_write workload identity requires client ID and tenant ID.");
+        }
+        azureAd.workloadIdentity = {
+            clientId: remoteWrite.clientId,
+            tenantId: remoteWrite.tenantId,
+        };
+    }
+    else {
+        if (!remoteWrite.clientId) {
+            throw new Error("Azure Monitor remote_write managed identity requires client ID.");
+        }
+        azureAd.managedIdentity = {
+            clientId: remoteWrite.clientId,
+        };
+    }
+    return {
+        ...base,
+        azureAd,
+    };
+}
+function generateBasicAuthRemoteWrite(remoteWrite, base) {
+    if (!remoteWrite.usernameSecretRef || !remoteWrite.passwordSecretRef) {
+        throw new Error("Basic auth remote_write requires username and password secret refs.");
+    }
+    return {
+        ...base,
+        basicAuth: {
+            username: secretKeySelector(remoteWrite.usernameSecretRef),
+            password: secretKeySelector(remoteWrite.passwordSecretRef),
+        },
+    };
+}
+function generateGenericRemoteWrite(remoteWrite, base) {
+    if (remoteWrite.authType === "basic") {
+        return generateBasicAuthRemoteWrite(remoteWrite, base);
+    }
+    if (remoteWrite.authType === "bearer") {
+        if (!remoteWrite.bearerTokenSecretRef) {
+            throw new Error("Bearer remote_write requires a token secret ref.");
+        }
+        return {
+            ...base,
+            authorization: {
+                type: "Bearer",
+                credentials: secretKeySelector(remoteWrite.bearerTokenSecretRef),
+            },
+        };
+    }
+    return base;
+}
 /**
  * Generates Kafka extra environment variables for tuning
  */
@@ -205,6 +421,7 @@ function generateKafkaExtraEnvVars() {
 export async function generateHelmValues(config, options = {}) {
     const tierConfig = TIER_CONFIGS[config.tier];
     const { tlsEnabled = true } = options;
+    const useLocalGrafana = config.features.monitoring.destination === "local-grafana";
     // Determine if external-dns should be enabled
     const externalDnsEnabled = config.dns.autoManage && isSupportedDnsProvider(config.dns.provider);
     // Determine storage class based on provider
@@ -501,22 +718,14 @@ export async function generateHelmValues(config, options = {}) {
             replicas: tierConfig.vectorReplicas,
             resources: tierConfig.vectorResources,
             tolerations: arm64Tolerations,
+            serviceAccount: generateVectorServiceAccount(config),
+            podLabels: generateVectorPodLabels(config),
             service: {
                 enabled: true,
                 ports: [{ name: "api", port: 8686, protocol: "TCP", targetPort: 8686 }],
             },
             // Load KAFKA_BOOTSTRAP_SERVERS from templated ConfigMap
-            env: [
-                {
-                    name: "KAFKA_BOOTSTRAP_SERVERS",
-                    valueFrom: {
-                        configMapKeyRef: {
-                            name: "vector-kafka-env",
-                            key: "KAFKA_BOOTSTRAP_SERVERS",
-                        },
-                    },
-                },
-            ],
+            env: generateVectorEnv(config),
             customConfig: {
                 sources: {
                     kafka: {
@@ -598,12 +807,14 @@ export async function generateHelmValues(config, options = {}) {
                 enabled: false,
             },
             grafana: {
-                enabled: false,
+                enabled: useLocalGrafana,
             },
             prometheus: {
                 enabled: config.features.monitoring.enabled,
+                serviceAccount: generatePrometheusServiceAccount(config),
                 prometheusSpec: {
                     retention: "30d",
+                    podMetadata: generatePrometheusPodMetadata(config),
                     storageSpec: {
                         volumeClaimTemplate: {
                             spec: {
@@ -617,13 +828,7 @@ export async function generateHelmValues(config, options = {}) {
                             },
                         },
                     },
-                    ...(config.features.monitoring.remoteWriteUrl
-                        ? {
-                            remoteWrite: [
-                                { url: config.features.monitoring.remoteWriteUrl },
-                            ],
-                        }
-                        : { remoteWrite: [] }),
+                    remoteWrite: generateRemoteWriteSpec(config),
                 },
             },
         },

package/dist/lib/kubernetes.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { CloudProvider } from "../types/index.js";
+import { CloudProvider, PerformanceTier } from "../types/index.js";
 /**
  * Checks if kubectl is installed
  */
@@ -20,6 +20,12 @@ export declare function checkClusterAccessible(): Promise<string | null>;
  * Gets the current kubectl context
  */
 export declare function getCurrentContext(): Promise<string | null>;
+/**
+ * Infers the closest internal Rulebricks sizing tier from the current cluster.
+ * This is used only for existing clusters, where the CLI is not responsible for
+ * provisioning node pools but still needs app/Kafka/worker Helm sizing values.
+ */
+export declare function inferClusterTier(): Promise<PerformanceTier | null>;
 /**
  * Gets pod status for the Rulebricks namespace
  */
@@ -58,7 +64,31 @@ export interface CertificateStatus {
     name: string;
     dnsNames: string[];
     ready: boolean;
+    failed: boolean;
+    message?: string;
 }
+/**
+ * Deletes a failed cert-manager Certificate and recreates it from its spec,
+ * bypassing cert-manager's exponential backoff on failed issuance attempts.
+ * The delete cascades to the failed CertificateRequest and ACME Order via
+ * owner references, so the recreated Certificate starts with a clean slate.
+ */
+export declare function recreateFailedCertificate(namespace: string, certName: string): Promise<boolean>;
+/**
+ * Polls cert-manager Certificates until all are Ready, with automatic retry
+ * for transient ACME failures (e.g. order finalization race conditions).
+ *
+ * On failure detection: deletes and recreates the Certificate resource to
+ * bypass cert-manager's 1-hour exponential backoff, then continues polling.
+ *
+ * Throws on timeout with details about which certs are not ready.
+ * Returns silently if no Certificate resources exist in the namespace.
+ */
+export declare function waitForCertificatesReady(namespace: string, options?: {
+    timeoutMs?: number;
+    pollIntervalMs?: number;
+    maxRetries?: number;
+}): Promise<void>;
 /**
  * Streams logs from a pod
  */

package/dist/lib/kubernetes.js CHANGED Viewed

@@ -165,6 +165,65 @@ export async function getCurrentContext() {
         return null;
     }
 }
+function parseCpuToCores(cpu) {
+    if (cpu.endsWith("n"))
+        return Number(cpu.slice(0, -1)) / 1_000_000_000;
+    if (cpu.endsWith("u"))
+        return Number(cpu.slice(0, -1)) / 1_000_000;
+    if (cpu.endsWith("m"))
+        return Number(cpu.slice(0, -1)) / 1_000;
+    return Number(cpu);
+}
+function parseMemoryToGi(memory) {
+    const match = memory.match(/^(\d+(?:\.\d+)?)([KMGTP]i?|[kMGTPE])?$/);
+    if (!match)
+        return 0;
+    const value = Number(match[1]);
+    const unit = match[2] || "";
+    const multipliers = {
+        Ki: 1 / 1024 / 1024,
+        Mi: 1 / 1024,
+        Gi: 1,
+        Ti: 1024,
+        Pi: 1024 * 1024,
+        K: 1000 / 1024 / 1024 / 1024,
+        M: 1000 ** 2 / 1024 ** 3,
+        G: 1000 ** 3 / 1024 ** 3,
+        T: 1000 ** 4 / 1024 ** 3,
+        P: 1000 ** 5 / 1024 ** 3,
+    };
+    return value * (multipliers[unit] ?? 1 / 1024 ** 3);
+}
+/**
+ * Infers the closest internal Rulebricks sizing tier from the current cluster.
+ * This is used only for existing clusters, where the CLI is not responsible for
+ * provisioning node pools but still needs app/Kafka/worker Helm sizing values.
+ */
+export async function inferClusterTier() {
+    try {
+        const { stdout } = await execa("kubectl", ["get", "nodes", "-o", "json"], {
+            timeout: 15000,
+        });
+        const data = JSON.parse(stdout);
+        const schedulableNodes = data.items?.filter((node) => !node.spec?.unschedulable) ?? [];
+        let totalCpu = 0;
+        let totalMemoryGi = 0;
+        for (const node of schedulableNodes) {
+            totalCpu += parseCpuToCores(node.status?.allocatable?.cpu || "0");
+            totalMemoryGi += parseMemoryToGi(node.status?.allocatable?.memory || "0");
+        }
+        if (totalCpu >= 40 && totalMemoryGi >= 80)
+            return "large";
+        if (totalCpu >= 16 && totalMemoryGi >= 32)
+            return "medium";
+        if (totalCpu > 0 && totalMemoryGi > 0)
+            return "small";
+        return null;
+    }
+    catch {
+        return null;
+    }
+}
 /**
  * Gets pod status for the Rulebricks namespace
  */
@@ -258,16 +317,109 @@ export async function getCertificateStatus(namespace = DEFAULT_NAMESPACE) {
             "json",
         ]);
         const data = JSON.parse(stdout);
-        return data.items.map((cert) => ({
-            name: cert.metadata.name,
-            dnsNames: cert.spec.dnsNames ?? [],
-            ready: cert.status.conditions?.some((c) => c.type === "Ready" && c.status === "True") ?? false,
-        }));
+        return data.items.map((cert) => {
+            const readyCond = cert.status.conditions?.find((c) => c.type === "Ready");
+            const issuingCond = cert.status.conditions?.find((c) => c.type === "Issuing");
+            const ready = readyCond?.status === "True";
+            const failed = !ready &&
+                issuingCond?.status === "False" &&
+                issuingCond?.reason === "Failed";
+            return {
+                name: cert.metadata.name,
+                dnsNames: cert.spec.dnsNames ?? [],
+                ready,
+                failed: failed ?? false,
+                message: failed ? issuingCond?.message : readyCond?.message,
+            };
+        });
     }
     catch {
         return [];
     }
 }
+/**
+ * Deletes a failed cert-manager Certificate and recreates it from its spec,
+ * bypassing cert-manager's exponential backoff on failed issuance attempts.
+ * The delete cascades to the failed CertificateRequest and ACME Order via
+ * owner references, so the recreated Certificate starts with a clean slate.
+ */
+export async function recreateFailedCertificate(namespace, certName) {
+    try {
+        const { stdout } = await execa("kubectl", [
+            "get",
+            "certificate",
+            certName,
+            "-n",
+            namespace,
+            "-o",
+            "json",
+        ]);
+        const cert = JSON.parse(stdout);
+        const recreated = {
+            apiVersion: "cert-manager.io/v1",
+            kind: "Certificate",
+            metadata: {
+                name: cert.metadata.name,
+                namespace: cert.metadata.namespace,
+                ...(cert.metadata.labels ? { labels: cert.metadata.labels } : {}),
+                ...(cert.metadata.annotations
+                    ? { annotations: cert.metadata.annotations }
+                    : {}),
+            },
+            spec: cert.spec,
+        };
+        await execa("kubectl", ["delete", "certificate", certName, "-n", namespace]);
+        await execa("kubectl", ["apply", "-f", "-"], {
+            input: JSON.stringify(recreated),
+        });
+        return true;
+    }
+    catch {
+        return false;
+    }
+}
+/**
+ * Polls cert-manager Certificates until all are Ready, with automatic retry
+ * for transient ACME failures (e.g. order finalization race conditions).
+ *
+ * On failure detection: deletes and recreates the Certificate resource to
+ * bypass cert-manager's 1-hour exponential backoff, then continues polling.
+ *
+ * Throws on timeout with details about which certs are not ready.
+ * Returns silently if no Certificate resources exist in the namespace.
+ */
+export async function waitForCertificatesReady(namespace, options) {
+    const { timeoutMs = 120_000, pollIntervalMs = 5_000, maxRetries = 1, } = options ?? {};
+    let retriesUsed = 0;
+    const deadline = Date.now() + timeoutMs;
+    while (Date.now() < deadline) {
+        const certs = await getCertificateStatus(namespace);
+        if (certs.length === 0)
+            return;
+        if (certs.every((c) => c.ready))
+            return;
+        const failed = certs.filter((c) => c.failed);
+        if (failed.length > 0 && retriesUsed < maxRetries) {
+            for (const cert of failed) {
+                await recreateFailedCertificate(namespace, cert.name);
+            }
+            retriesUsed++;
+        }
+        await sleep(pollIntervalMs);
+    }
+    // Final check after timeout
+    const certs = await getCertificateStatus(namespace);
+    if (certs.length > 0 && certs.every((c) => c.ready))
+        return;
+    const notReady = certs.filter((c) => !c.ready);
+    if (notReady.length > 0) {
+        const details = notReady
+            .map((c) => `  ${c.name}: ${c.message || "not ready"}`)
+            .join("\n");
+        throw new Error(`TLS certificates not ready after ${timeoutMs / 1000}s:\n${details}\n\n` +
+            `Run 'rulebricks status' to check certificate status.`);
+    }
+}
 /**
  * Streams logs from a pod
  */