@rulebricks/cli 2.1.5 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,7 +43,12 @@ export function ReviewStep({ onComplete, onBack }) {
43
43
  dispatch({ type: 'SET_NAME', name });
44
44
  setEditingName(false);
45
45
  };
46
- const tierConfig = state.tier ? TIER_CONFIGS[state.tier] : null;
46
+ const tierConfig = state.infrastructureMode !== 'existing' && state.tier ? TIER_CONFIGS[state.tier] : null;
47
+ const tierLabel = state.infrastructureMode === 'existing'
48
+ ? 'Inferred from cluster'
49
+ : state.tier
50
+ ? `${state.tier.charAt(0).toUpperCase()}${state.tier.slice(1)}`
51
+ : 'Not selected';
47
52
  const externalDnsEnabled = state.dnsAutoManage && isSupportedDnsProvider(state.dnsProvider);
48
53
  if (editingName) {
49
54
  return (_jsx(BorderBox, { title: "Deployment Name", children: _jsxs(Box, { flexDirection: "column", marginY: 1, children: [_jsx(Text, { children: "Enter a name for this deployment:" }), _jsx(Text, { color: "gray", dimColor: true, children: "Lowercase letters, numbers, and hyphens only" }), _jsx(Box, { marginTop: 1, children: _jsx(TextInput, { value: name, onChange: setName, onSubmit: handleNameSubmit, placeholder: "my-deployment" }) }), error && (_jsx(Box, { marginTop: 1, children: _jsxs(Text, { color: "red", children: ["\u2717 ", error] }) }))] }) }));
@@ -52,5 +57,5 @@ export function ReviewStep({ onComplete, onBack }) {
52
57
  const ConfigRow = ({ label, value, valueColor }) => (_jsxs(Box, { children: [_jsx(Box, { width: 16, children: _jsx(Text, { color: colors.muted, children: label }) }), _jsx(Text, { color: valueColor || colors.accent, children: value })] }));
53
58
  // Helper to render a section header
54
59
  const SectionHeader = ({ title }) => (_jsx(Box, { marginTop: 1, children: _jsxs(Text, { bold: true, color: colors.accent, children: ["\u2500\u2500 ", title, " \u2500\u2500"] }) }));
55
- return (_jsxs(BorderBox, { title: "Review Configuration", children: [_jsxs(Box, { flexDirection: "column", children: [_jsx(SectionHeader, { title: "Deployment" }), _jsx(ConfigRow, { label: "Name", value: state.name }), state.appVersion && (_jsx(ConfigRow, { label: "App Version", value: state.appVersion })), _jsx(SectionHeader, { title: "Infrastructure" }), _jsx(ConfigRow, { label: "Mode", value: state.infrastructureMode === 'provision' ? 'Provision new cluster' : 'Use existing cluster' }), state.provider && (_jsx(ConfigRow, { label: "Provider", value: state.provider.toUpperCase() })), state.region && (_jsx(ConfigRow, { label: "Region", value: state.region })), _jsx(SectionHeader, { title: "Domain & DNS" }), _jsx(ConfigRow, { label: "Domain", value: state.domain }), _jsx(ConfigRow, { label: "Admin Email", value: state.adminEmail }), _jsx(ConfigRow, { label: "TLS Email", value: state.tlsEmail }), _jsxs(Box, { children: [_jsx(Box, { width: 16, children: _jsx(Text, { color: colors.muted, children: "DNS" }) }), _jsx(Text, { color: colors.accent, children: DNS_PROVIDER_NAMES[state.dnsProvider] }), externalDnsEnabled && _jsx(Text, { color: colors.success, children: " (auto)" })] }), _jsx(SectionHeader, { title: "SMTP" }), _jsx(ConfigRow, { label: "Host", value: `${state.smtpHost}:${state.smtpPort}` }), _jsx(ConfigRow, { label: "From", value: `${state.smtpFromName} <${state.smtpFrom}>` }), _jsx(SectionHeader, { title: "Database" }), _jsx(ConfigRow, { label: "Type", value: state.databaseType === 'supabase-cloud' ? 'Supabase Cloud' : 'Self-hosted' }), _jsx(SectionHeader, { title: "Performance" }), _jsxs(Box, { children: [_jsx(Box, { width: 16, children: _jsx(Text, { color: colors.muted, children: "Tier" }) }), _jsxs(Text, { color: colors.accent, bold: true, children: [state.tier?.charAt(0).toUpperCase(), state.tier?.slice(1)] }), tierConfig && _jsxs(Text, { color: colors.muted, children: [" (", tierConfig.throughput, ")"] })] }), _jsx(SectionHeader, { title: "Features" }), _jsxs(Box, { children: [_jsxs(Text, { color: state.aiEnabled ? colors.success : colors.muted, children: [state.aiEnabled ? '✓' : '○', " AI"] }), _jsx(Text, { children: " " }), _jsxs(Text, { color: state.ssoEnabled ? colors.success : colors.muted, children: [state.ssoEnabled ? '✓' : '○', " SSO"] }), _jsx(Text, { children: " " }), _jsxs(Text, { color: state.monitoringEnabled ? colors.success : colors.muted, children: [state.monitoringEnabled ? '✓' : '○', " Monitoring"] }), _jsx(Text, { children: " " }), _jsxs(Text, { color: state.loggingSink !== 'console' ? colors.success : colors.muted, children: [state.loggingSink !== 'console' ? '✓' : '○', " Logging"] })] }), _jsx(SectionHeader, { title: "License" }), _jsx(ConfigRow, { label: "Key", value: `${state.licenseKey?.substring(0, 12)}...` })] }), _jsxs(Box, { marginTop: 1, flexDirection: "column", children: [_jsx(Text, { color: colors.success, bold: true, children: "Press Enter to save this configuration" }), _jsx(Text, { color: colors.muted, dimColor: true, children: "e to edit name \u2022 Esc to go back" })] })] }));
60
+ return (_jsxs(BorderBox, { title: "Review Configuration", children: [_jsxs(Box, { flexDirection: "column", children: [_jsx(SectionHeader, { title: "Deployment" }), _jsx(ConfigRow, { label: "Name", value: state.name }), state.appVersion && (_jsx(ConfigRow, { label: "App Version", value: state.appVersion })), _jsx(SectionHeader, { title: "Infrastructure" }), _jsx(ConfigRow, { label: "Mode", value: state.infrastructureMode === 'provision' ? 'Provision new cluster' : 'Use existing cluster' }), state.provider && (_jsx(ConfigRow, { label: "Provider", value: state.provider.toUpperCase() })), state.region && (_jsx(ConfigRow, { label: "Region", value: state.region })), _jsx(SectionHeader, { title: "Domain & DNS" }), _jsx(ConfigRow, { label: "Domain", value: state.domain }), _jsx(ConfigRow, { label: "Admin Email", value: state.adminEmail }), _jsx(ConfigRow, { label: "TLS Email", value: state.tlsEmail }), _jsxs(Box, { children: [_jsx(Box, { width: 16, children: _jsx(Text, { color: colors.muted, children: "DNS" }) }), _jsx(Text, { color: colors.accent, children: DNS_PROVIDER_NAMES[state.dnsProvider] }), externalDnsEnabled && _jsx(Text, { color: colors.success, children: " (auto)" })] }), _jsx(SectionHeader, { title: "SMTP" }), _jsx(ConfigRow, { label: "Host", value: `${state.smtpHost}:${state.smtpPort}` }), _jsx(ConfigRow, { label: "From", value: `${state.smtpFromName} <${state.smtpFrom}>` }), _jsx(SectionHeader, { title: "Database" }), _jsx(ConfigRow, { label: "Type", value: state.databaseType === 'supabase-cloud' ? 'Supabase Cloud' : 'Self-hosted' }), _jsx(SectionHeader, { title: "Performance" }), _jsxs(Box, { children: [_jsx(Box, { width: 16, children: _jsx(Text, { color: colors.muted, children: "Tier" }) }), _jsx(Text, { color: colors.accent, bold: true, children: tierLabel }), tierConfig && _jsxs(Text, { color: colors.muted, children: [" (", tierConfig.throughput, ")"] }), state.infrastructureMode === 'existing' && (_jsx(Text, { color: colors.muted, children: " (used for app sizing)" }))] }), _jsx(SectionHeader, { title: "Features" }), _jsxs(Box, { children: [_jsxs(Text, { color: state.aiEnabled ? colors.success : colors.muted, children: [state.aiEnabled ? '✓' : '○', " AI"] }), _jsx(Text, { children: " " }), _jsxs(Text, { color: state.ssoEnabled ? colors.success : colors.muted, children: [state.ssoEnabled ? '✓' : '○', " SSO"] }), _jsx(Text, { children: " " }), _jsxs(Text, { color: state.monitoringEnabled ? colors.success : colors.muted, children: [state.monitoringEnabled ? '✓' : '○', " Monitoring"] }), _jsx(Text, { children: " " }), _jsxs(Text, { color: state.loggingSink !== 'console' ? colors.success : colors.muted, children: [state.loggingSink !== 'console' ? '✓' : '○', " Logging"] })] }), _jsx(SectionHeader, { title: "License" }), _jsx(ConfigRow, { label: "Key", value: `${state.licenseKey?.substring(0, 12)}...` })] }), _jsxs(Box, { marginTop: 1, flexDirection: "column", children: [_jsx(Text, { color: colors.success, bold: true, children: "Press Enter to save this configuration" }), _jsx(Text, { color: colors.muted, dimColor: true, children: "e to edit name \u2022 Esc to go back" })] })] }));
56
61
  }
@@ -36,17 +36,39 @@ function generateVectorSinks(config) {
36
36
  };
37
37
  break;
38
38
  case "azure-blob":
39
- sinks.azure_blob = {
39
+ if (!bucket) {
40
+ throw new Error("Azure Blob logging requires a storage account.");
41
+ }
42
+ const azureBlobSink = {
40
43
  type: "azure_blob",
41
44
  inputs: ["kafka"],
42
- container_name: bucket,
43
- storage_account: "rulebrickslogs", // Will be configured via env var
45
+ account_name: bucket,
46
+ container_name: config.features.logging.azureBlobContainer || "rulebricks-logs",
44
47
  blob_prefix: "rulebricks/logs/%Y/%m/%d/",
45
48
  compression: "gzip",
46
49
  encoding: {
47
50
  codec: "json",
48
51
  },
49
52
  };
53
+ if (config.features.logging.cloudAuthMode === "secret") {
54
+ if (!config.features.logging.azureBlobConnectionStringSecretRef) {
55
+ throw new Error("Azure Blob connection string auth requires a secret ref.");
56
+ }
57
+ azureBlobSink.connection_string = "${AZURE_STORAGE_CONNECTION_STRING}";
58
+ }
59
+ else {
60
+ if (!config.features.logging.azureBlobClientId ||
61
+ !config.features.logging.azureBlobTenantId) {
62
+ throw new Error("Azure Blob workload identity requires client ID and tenant ID.");
63
+ }
64
+ azureBlobSink.auth = {
65
+ azure_credential_kind: "workload_identity",
66
+ client_id: config.features.logging.azureBlobClientId,
67
+ tenant_id: config.features.logging.azureBlobTenantId,
68
+ token_file_path: "/var/run/secrets/azure/tokens/azure-identity-token",
69
+ };
70
+ }
71
+ sinks.azure_blob = azureBlobSink;
50
72
  break;
51
73
  case "gcs":
52
74
  sinks.gcs = {
@@ -163,6 +185,61 @@ function generateVectorSinks(config) {
163
185
  }
164
186
  return sinks;
165
187
  }
188
+ function generateVectorEnv(config) {
189
+ const env = [
190
+ {
191
+ name: "KAFKA_BOOTSTRAP_SERVERS",
192
+ valueFrom: {
193
+ configMapKeyRef: {
194
+ name: "vector-kafka-env",
195
+ key: "KAFKA_BOOTSTRAP_SERVERS",
196
+ },
197
+ },
198
+ },
199
+ ];
200
+ const azureBlobSecretRef = config.features.logging.azureBlobConnectionStringSecretRef;
201
+ if (config.features.logging.sink === "azure-blob" &&
202
+ config.features.logging.cloudAuthMode === "secret" &&
203
+ azureBlobSecretRef) {
204
+ env.push({
205
+ name: "AZURE_STORAGE_CONNECTION_STRING",
206
+ valueFrom: {
207
+ secretKeyRef: secretKeySelector(azureBlobSecretRef),
208
+ },
209
+ });
210
+ }
211
+ return env;
212
+ }
213
+ function generateVectorServiceAccount(config) {
214
+ const annotations = {};
215
+ if (config.features.logging.sink === "s3" && config.features.logging.awsIamRoleArn) {
216
+ annotations["eks.amazonaws.com/role-arn"] =
217
+ config.features.logging.awsIamRoleArn;
218
+ }
219
+ if (config.features.logging.sink === "azure-blob" &&
220
+ config.features.logging.cloudAuthMode !== "secret" &&
221
+ config.features.logging.azureBlobClientId) {
222
+ annotations["azure.workload.identity/client-id"] =
223
+ config.features.logging.azureBlobClientId;
224
+ }
225
+ if (config.features.logging.sink === "gcs" && config.features.logging.gcpServiceAccountEmail) {
226
+ annotations["iam.gke.io/gcp-service-account"] =
227
+ config.features.logging.gcpServiceAccountEmail;
228
+ }
229
+ return {
230
+ create: true,
231
+ name: "vector",
232
+ annotations,
233
+ };
234
+ }
235
+ function generateVectorPodLabels(config) {
236
+ const labels = {};
237
+ if (config.features.logging.sink === "azure-blob" &&
238
+ config.features.logging.cloudAuthMode !== "secret") {
239
+ labels["azure.workload.identity/use"] = "true";
240
+ }
241
+ return labels;
242
+ }
166
243
  /**
167
244
  * Maps DNS provider to external-dns provider name
168
245
  */
@@ -175,6 +252,145 @@ function getExternalDnsProvider(dnsProvider) {
175
252
  };
176
253
  return mapping[dnsProvider] || "aws";
177
254
  }
255
+ function secretKeySelector(ref) {
256
+ return {
257
+ name: ref.name,
258
+ key: ref.key,
259
+ };
260
+ }
261
+ function generateRemoteWriteSpec(config) {
262
+ if (config.features.monitoring.destination === "local-grafana") {
263
+ return [];
264
+ }
265
+ const remoteWrite = config.features.monitoring.remoteWrite;
266
+ if (!remoteWrite) {
267
+ return config.features.monitoring.remoteWriteUrl
268
+ ? [{ url: config.features.monitoring.remoteWriteUrl }]
269
+ : [];
270
+ }
271
+ const base = {
272
+ url: remoteWrite.url,
273
+ };
274
+ switch (remoteWrite.destination) {
275
+ case "aws-amp":
276
+ if (!remoteWrite.awsRegion) {
277
+ throw new Error("AWS Managed Prometheus remote_write requires a region.");
278
+ }
279
+ return [
280
+ {
281
+ ...base,
282
+ sigv4: {
283
+ region: remoteWrite.awsRegion,
284
+ },
285
+ },
286
+ ];
287
+ case "azure-monitor":
288
+ return [generateAzureMonitorRemoteWrite(remoteWrite, base)];
289
+ case "grafana-cloud":
290
+ return [generateBasicAuthRemoteWrite(remoteWrite, base)];
291
+ case "generic":
292
+ return [generateGenericRemoteWrite(remoteWrite, base)];
293
+ default:
294
+ return [base];
295
+ }
296
+ }
297
+ function generatePrometheusServiceAccount(config) {
298
+ const annotations = {};
299
+ const remoteWrite = config.features.monitoring.remoteWrite;
300
+ if (remoteWrite?.destination === "aws-amp" && remoteWrite.awsRoleArn) {
301
+ annotations["eks.amazonaws.com/role-arn"] = remoteWrite.awsRoleArn;
302
+ }
303
+ if (remoteWrite?.destination === "azure-monitor" &&
304
+ remoteWrite.authType === "workload-identity" &&
305
+ remoteWrite.clientId) {
306
+ annotations["azure.workload.identity/client-id"] = remoteWrite.clientId;
307
+ }
308
+ return {
309
+ create: true,
310
+ name: "prometheus",
311
+ annotations,
312
+ };
313
+ }
314
+ function generatePrometheusPodMetadata(config) {
315
+ const remoteWrite = config.features.monitoring.remoteWrite;
316
+ if (remoteWrite?.destination === "azure-monitor" &&
317
+ remoteWrite.authType === "workload-identity") {
318
+ return {
319
+ labels: {
320
+ "azure.workload.identity/use": "true",
321
+ },
322
+ };
323
+ }
324
+ return {};
325
+ }
326
+ function generateAzureMonitorRemoteWrite(remoteWrite, base) {
327
+ const azureAd = {
328
+ cloud: remoteWrite.azureCloud || "AzurePublic",
329
+ };
330
+ if (remoteWrite.authType === "oauth") {
331
+ if (!remoteWrite.clientId ||
332
+ !remoteWrite.tenantId ||
333
+ !remoteWrite.clientSecretRef) {
334
+ throw new Error("Azure Monitor remote_write OAuth requires client ID, tenant ID, and client secret ref.");
335
+ }
336
+ azureAd.oauth = {
337
+ clientId: remoteWrite.clientId,
338
+ tenantId: remoteWrite.tenantId,
339
+ clientSecret: secretKeySelector(remoteWrite.clientSecretRef),
340
+ };
341
+ }
342
+ else if (remoteWrite.authType === "workload-identity") {
343
+ if (!remoteWrite.clientId || !remoteWrite.tenantId) {
344
+ throw new Error("Azure Monitor remote_write workload identity requires client ID and tenant ID.");
345
+ }
346
+ azureAd.workloadIdentity = {
347
+ clientId: remoteWrite.clientId,
348
+ tenantId: remoteWrite.tenantId,
349
+ };
350
+ }
351
+ else {
352
+ if (!remoteWrite.clientId) {
353
+ throw new Error("Azure Monitor remote_write managed identity requires client ID.");
354
+ }
355
+ azureAd.managedIdentity = {
356
+ clientId: remoteWrite.clientId,
357
+ };
358
+ }
359
+ return {
360
+ ...base,
361
+ azureAd,
362
+ };
363
+ }
364
+ function generateBasicAuthRemoteWrite(remoteWrite, base) {
365
+ if (!remoteWrite.usernameSecretRef || !remoteWrite.passwordSecretRef) {
366
+ throw new Error("Basic auth remote_write requires username and password secret refs.");
367
+ }
368
+ return {
369
+ ...base,
370
+ basicAuth: {
371
+ username: secretKeySelector(remoteWrite.usernameSecretRef),
372
+ password: secretKeySelector(remoteWrite.passwordSecretRef),
373
+ },
374
+ };
375
+ }
376
+ function generateGenericRemoteWrite(remoteWrite, base) {
377
+ if (remoteWrite.authType === "basic") {
378
+ return generateBasicAuthRemoteWrite(remoteWrite, base);
379
+ }
380
+ if (remoteWrite.authType === "bearer") {
381
+ if (!remoteWrite.bearerTokenSecretRef) {
382
+ throw new Error("Bearer remote_write requires a token secret ref.");
383
+ }
384
+ return {
385
+ ...base,
386
+ authorization: {
387
+ type: "Bearer",
388
+ credentials: secretKeySelector(remoteWrite.bearerTokenSecretRef),
389
+ },
390
+ };
391
+ }
392
+ return base;
393
+ }
178
394
  /**
179
395
  * Generates Kafka extra environment variables for tuning
180
396
  */
@@ -205,6 +421,7 @@ function generateKafkaExtraEnvVars() {
205
421
  export async function generateHelmValues(config, options = {}) {
206
422
  const tierConfig = TIER_CONFIGS[config.tier];
207
423
  const { tlsEnabled = true } = options;
424
+ const useLocalGrafana = config.features.monitoring.destination === "local-grafana";
208
425
  // Determine if external-dns should be enabled
209
426
  const externalDnsEnabled = config.dns.autoManage && isSupportedDnsProvider(config.dns.provider);
210
427
  // Determine storage class based on provider
@@ -501,22 +718,14 @@ export async function generateHelmValues(config, options = {}) {
501
718
  replicas: tierConfig.vectorReplicas,
502
719
  resources: tierConfig.vectorResources,
503
720
  tolerations: arm64Tolerations,
721
+ serviceAccount: generateVectorServiceAccount(config),
722
+ podLabels: generateVectorPodLabels(config),
504
723
  service: {
505
724
  enabled: true,
506
725
  ports: [{ name: "api", port: 8686, protocol: "TCP", targetPort: 8686 }],
507
726
  },
508
727
  // Load KAFKA_BOOTSTRAP_SERVERS from templated ConfigMap
509
- env: [
510
- {
511
- name: "KAFKA_BOOTSTRAP_SERVERS",
512
- valueFrom: {
513
- configMapKeyRef: {
514
- name: "vector-kafka-env",
515
- key: "KAFKA_BOOTSTRAP_SERVERS",
516
- },
517
- },
518
- },
519
- ],
728
+ env: generateVectorEnv(config),
520
729
  customConfig: {
521
730
  sources: {
522
731
  kafka: {
@@ -598,12 +807,14 @@ export async function generateHelmValues(config, options = {}) {
598
807
  enabled: false,
599
808
  },
600
809
  grafana: {
601
- enabled: false,
810
+ enabled: useLocalGrafana,
602
811
  },
603
812
  prometheus: {
604
813
  enabled: config.features.monitoring.enabled,
814
+ serviceAccount: generatePrometheusServiceAccount(config),
605
815
  prometheusSpec: {
606
816
  retention: "30d",
817
+ podMetadata: generatePrometheusPodMetadata(config),
607
818
  storageSpec: {
608
819
  volumeClaimTemplate: {
609
820
  spec: {
@@ -617,13 +828,7 @@ export async function generateHelmValues(config, options = {}) {
617
828
  },
618
829
  },
619
830
  },
620
- ...(config.features.monitoring.remoteWriteUrl
621
- ? {
622
- remoteWrite: [
623
- { url: config.features.monitoring.remoteWriteUrl },
624
- ],
625
- }
626
- : { remoteWrite: [] }),
831
+ remoteWrite: generateRemoteWriteSpec(config),
627
832
  },
628
833
  },
629
834
  },
@@ -1,4 +1,4 @@
1
- import { CloudProvider } from "../types/index.js";
1
+ import { CloudProvider, PerformanceTier } from "../types/index.js";
2
2
  /**
3
3
  * Checks if kubectl is installed
4
4
  */
@@ -20,6 +20,12 @@ export declare function checkClusterAccessible(): Promise<string | null>;
20
20
  * Gets the current kubectl context
21
21
  */
22
22
  export declare function getCurrentContext(): Promise<string | null>;
23
+ /**
24
+ * Infers the closest internal Rulebricks sizing tier from the current cluster.
25
+ * This is used only for existing clusters, where the CLI is not responsible for
26
+ * provisioning node pools but still needs app/Kafka/worker Helm sizing values.
27
+ */
28
+ export declare function inferClusterTier(): Promise<PerformanceTier | null>;
23
29
  /**
24
30
  * Gets pod status for the Rulebricks namespace
25
31
  */
@@ -58,7 +64,31 @@ export interface CertificateStatus {
58
64
  name: string;
59
65
  dnsNames: string[];
60
66
  ready: boolean;
67
+ failed: boolean;
68
+ message?: string;
61
69
  }
70
+ /**
71
+ * Deletes a failed cert-manager Certificate and recreates it from its spec,
72
+ * bypassing cert-manager's exponential backoff on failed issuance attempts.
73
+ * The delete cascades to the failed CertificateRequest and ACME Order via
74
+ * owner references, so the recreated Certificate starts with a clean slate.
75
+ */
76
+ export declare function recreateFailedCertificate(namespace: string, certName: string): Promise<boolean>;
77
+ /**
78
+ * Polls cert-manager Certificates until all are Ready, with automatic retry
79
+ * for transient ACME failures (e.g. order finalization race conditions).
80
+ *
81
+ * On failure detection: deletes and recreates the Certificate resource to
82
+ * bypass cert-manager's 1-hour exponential backoff, then continues polling.
83
+ *
84
+ * Throws on timeout with details about which certs are not ready.
85
+ * Returns silently if no Certificate resources exist in the namespace.
86
+ */
87
+ export declare function waitForCertificatesReady(namespace: string, options?: {
88
+ timeoutMs?: number;
89
+ pollIntervalMs?: number;
90
+ maxRetries?: number;
91
+ }): Promise<void>;
62
92
  /**
63
93
  * Streams logs from a pod
64
94
  */
@@ -165,6 +165,65 @@ export async function getCurrentContext() {
165
165
  return null;
166
166
  }
167
167
  }
168
+ function parseCpuToCores(cpu) {
169
+ if (cpu.endsWith("n"))
170
+ return Number(cpu.slice(0, -1)) / 1_000_000_000;
171
+ if (cpu.endsWith("u"))
172
+ return Number(cpu.slice(0, -1)) / 1_000_000;
173
+ if (cpu.endsWith("m"))
174
+ return Number(cpu.slice(0, -1)) / 1_000;
175
+ return Number(cpu);
176
+ }
177
+ function parseMemoryToGi(memory) {
178
+ const match = memory.match(/^(\d+(?:\.\d+)?)([KMGTP]i?|[kMGTPE])?$/);
179
+ if (!match)
180
+ return 0;
181
+ const value = Number(match[1]);
182
+ const unit = match[2] || "";
183
+ const multipliers = {
184
+ Ki: 1 / 1024 / 1024,
185
+ Mi: 1 / 1024,
186
+ Gi: 1,
187
+ Ti: 1024,
188
+ Pi: 1024 * 1024,
189
+ K: 1000 / 1024 / 1024 / 1024,
190
+ M: 1000 ** 2 / 1024 ** 3,
191
+ G: 1000 ** 3 / 1024 ** 3,
192
+ T: 1000 ** 4 / 1024 ** 3,
193
+ P: 1000 ** 5 / 1024 ** 3,
194
+ };
195
+ return value * (multipliers[unit] ?? 1 / 1024 ** 3);
196
+ }
197
+ /**
198
+ * Infers the closest internal Rulebricks sizing tier from the current cluster.
199
+ * This is used only for existing clusters, where the CLI is not responsible for
200
+ * provisioning node pools but still needs app/Kafka/worker Helm sizing values.
201
+ */
202
+ export async function inferClusterTier() {
203
+ try {
204
+ const { stdout } = await execa("kubectl", ["get", "nodes", "-o", "json"], {
205
+ timeout: 15000,
206
+ });
207
+ const data = JSON.parse(stdout);
208
+ const schedulableNodes = data.items?.filter((node) => !node.spec?.unschedulable) ?? [];
209
+ let totalCpu = 0;
210
+ let totalMemoryGi = 0;
211
+ for (const node of schedulableNodes) {
212
+ totalCpu += parseCpuToCores(node.status?.allocatable?.cpu || "0");
213
+ totalMemoryGi += parseMemoryToGi(node.status?.allocatable?.memory || "0");
214
+ }
215
+ if (totalCpu >= 40 && totalMemoryGi >= 80)
216
+ return "large";
217
+ if (totalCpu >= 16 && totalMemoryGi >= 32)
218
+ return "medium";
219
+ if (totalCpu > 0 && totalMemoryGi > 0)
220
+ return "small";
221
+ return null;
222
+ }
223
+ catch {
224
+ return null;
225
+ }
226
+ }
168
227
  /**
169
228
  * Gets pod status for the Rulebricks namespace
170
229
  */
@@ -258,16 +317,109 @@ export async function getCertificateStatus(namespace = DEFAULT_NAMESPACE) {
258
317
  "json",
259
318
  ]);
260
319
  const data = JSON.parse(stdout);
261
- return data.items.map((cert) => ({
262
- name: cert.metadata.name,
263
- dnsNames: cert.spec.dnsNames ?? [],
264
- ready: cert.status.conditions?.some((c) => c.type === "Ready" && c.status === "True") ?? false,
265
- }));
320
+ return data.items.map((cert) => {
321
+ const readyCond = cert.status.conditions?.find((c) => c.type === "Ready");
322
+ const issuingCond = cert.status.conditions?.find((c) => c.type === "Issuing");
323
+ const ready = readyCond?.status === "True";
324
+ const failed = !ready &&
325
+ issuingCond?.status === "False" &&
326
+ issuingCond?.reason === "Failed";
327
+ return {
328
+ name: cert.metadata.name,
329
+ dnsNames: cert.spec.dnsNames ?? [],
330
+ ready,
331
+ failed: failed ?? false,
332
+ message: failed ? issuingCond?.message : readyCond?.message,
333
+ };
334
+ });
266
335
  }
267
336
  catch {
268
337
  return [];
269
338
  }
270
339
  }
340
+ /**
341
+ * Deletes a failed cert-manager Certificate and recreates it from its spec,
342
+ * bypassing cert-manager's exponential backoff on failed issuance attempts.
343
+ * The delete cascades to the failed CertificateRequest and ACME Order via
344
+ * owner references, so the recreated Certificate starts with a clean slate.
345
+ */
346
+ export async function recreateFailedCertificate(namespace, certName) {
347
+ try {
348
+ const { stdout } = await execa("kubectl", [
349
+ "get",
350
+ "certificate",
351
+ certName,
352
+ "-n",
353
+ namespace,
354
+ "-o",
355
+ "json",
356
+ ]);
357
+ const cert = JSON.parse(stdout);
358
+ const recreated = {
359
+ apiVersion: "cert-manager.io/v1",
360
+ kind: "Certificate",
361
+ metadata: {
362
+ name: cert.metadata.name,
363
+ namespace: cert.metadata.namespace,
364
+ ...(cert.metadata.labels ? { labels: cert.metadata.labels } : {}),
365
+ ...(cert.metadata.annotations
366
+ ? { annotations: cert.metadata.annotations }
367
+ : {}),
368
+ },
369
+ spec: cert.spec,
370
+ };
371
+ await execa("kubectl", ["delete", "certificate", certName, "-n", namespace]);
372
+ await execa("kubectl", ["apply", "-f", "-"], {
373
+ input: JSON.stringify(recreated),
374
+ });
375
+ return true;
376
+ }
377
+ catch {
378
+ return false;
379
+ }
380
+ }
381
+ /**
382
+ * Polls cert-manager Certificates until all are Ready, with automatic retry
383
+ * for transient ACME failures (e.g. order finalization race conditions).
384
+ *
385
+ * On failure detection: deletes and recreates the Certificate resource to
386
+ * bypass cert-manager's 1-hour exponential backoff, then continues polling.
387
+ *
388
+ * Throws on timeout with details about which certs are not ready.
389
+ * Returns silently if no Certificate resources exist in the namespace.
390
+ */
391
+ export async function waitForCertificatesReady(namespace, options) {
392
+ const { timeoutMs = 120_000, pollIntervalMs = 5_000, maxRetries = 1, } = options ?? {};
393
+ let retriesUsed = 0;
394
+ const deadline = Date.now() + timeoutMs;
395
+ while (Date.now() < deadline) {
396
+ const certs = await getCertificateStatus(namespace);
397
+ if (certs.length === 0)
398
+ return;
399
+ if (certs.every((c) => c.ready))
400
+ return;
401
+ const failed = certs.filter((c) => c.failed);
402
+ if (failed.length > 0 && retriesUsed < maxRetries) {
403
+ for (const cert of failed) {
404
+ await recreateFailedCertificate(namespace, cert.name);
405
+ }
406
+ retriesUsed++;
407
+ }
408
+ await sleep(pollIntervalMs);
409
+ }
410
+ // Final check after timeout
411
+ const certs = await getCertificateStatus(namespace);
412
+ if (certs.length > 0 && certs.every((c) => c.ready))
413
+ return;
414
+ const notReady = certs.filter((c) => !c.ready);
415
+ if (notReady.length > 0) {
416
+ const details = notReady
417
+ .map((c) => ` ${c.name}: ${c.message || "not ready"}`)
418
+ .join("\n");
419
+ throw new Error(`TLS certificates not ready after ${timeoutMs / 1000}s:\n${details}\n\n` +
420
+ `Run 'rulebricks status' to check certificate status.`);
421
+ }
422
+ }
271
423
  /**
272
424
  * Streams logs from a pod
273
425
  */