@go-to-k/cdkd 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -539,9 +539,6 @@ Both flags accept either form on each invocation:
539
539
  `TYPE` must look like `AWS::Service::Resource`; malformed types are rejected at parse time. `warn < timeout` is enforced both globally and per-type — so `--resource-warn-after AWS::X=10m --resource-timeout AWS::X=5m` is a parse-time error.
540
540
 
541
541
  ```bash
542
- # Bump the per-resource budget to one hour (matches the Custom Resource provider's polling cap)
543
- cdkd deploy --resource-timeout 1h
544
-
545
542
  # Surface "still running" warnings sooner on a fast-feedback dev loop
546
543
  cdkd deploy --resource-warn-after 90s --resource-timeout 10m
547
544
 
@@ -550,19 +547,25 @@ cdkd deploy \
550
547
  --resource-timeout 30m \
551
548
  --resource-timeout AWS::CloudFront::Distribution=1h \
552
549
  --resource-timeout AWS::RDS::DBCluster=1h30m
550
+
551
+ # Force Custom Resources to abort earlier than their 1h self-reported polling cap
552
+ cdkd deploy --resource-timeout AWS::CloudFormation::CustomResource=5m
553
553
  ```
554
554
 
555
555
  ### Why the default is 30m, not 1h
556
556
 
557
- cdkd's Custom Resource provider polls async handlers (`isCompleteHandler` pattern) for up to one hour before giving up. Setting the per-resource timeout to 1h by default would make a single hung Custom Resource hold the whole stack for an hour even though no other resource type ever needs more than a few minutes. A shorter default (`30m`) catches stuck operations faster, and stacks that legitimately rely on long-running Custom Resources opt into the higher budget explicitly with `--resource-timeout 1h`.
557
+ cdkd's Custom Resource provider polls async handlers (`isCompleteHandler` pattern) for up to one hour before giving up. Setting the per-resource timeout to 1h by default would make a single hung non-CR resource hold the whole stack for an hour even though no other resource type ever needs more than a few minutes. The 30m global default catches stuck operations faster.
558
+
559
+ For Custom Resources specifically, the provider self-reports its 1h polling cap to the engine via the `getMinResourceTimeoutMs()` interface — the deploy engine resolves the per-resource budget as `max(provider self-report, --resource-timeout global)`, so CR resources get their full hour automatically without the user having to remember `--resource-timeout 1h`. To force CR to abort earlier than its self-reported cap, pass an explicit per-type override (`--resource-timeout AWS::CloudFormation::CustomResource=5m`). Per-type overrides always win over the provider's self-report — they're the documented escape hatch.
558
560
 
559
- The error message on timeout names the resource, type, region, elapsed time, and operation, and reminds you to re-run with `--resource-timeout 1h` (or higher) for genuinely-long resources:
561
+ The error message on timeout names the resource, type, region, elapsed time, and operation, and reminds you that long-running resources self-report their needed budget when you see CR time out, the cause is genuinely the handler, not too-tight a default:
560
562
 
561
563
  ```text
562
564
  Resource MyBucket (AWS::S3::Bucket) in us-east-1 timed out after 30m during CREATE (elapsed 30m).
563
565
  This may indicate a stuck Cloud Control polling loop, hung Custom Resource, or
564
- slow ENI provisioning. Re-run with --resource-timeout 1h if the resource genuinely
565
- needs more time, or --verbose to see the underlying provider activity.
566
+ slow ENI provisioning. Re-run with --resource-timeout AWS::S3::Bucket=<DURATION>
567
+ to bump the budget for this resource type only, or --verbose to see the
568
+ underlying provider activity.
566
569
  ```
567
570
 
568
571
  Note: `--resource-warn-after` must be less than `--resource-timeout`. Reversed values are rejected at parse time.
package/dist/cli.js CHANGED
@@ -1133,8 +1133,9 @@ var ResourceTimeoutError = class _ResourceTimeoutError extends CdkdError {
1133
1133
  super(
1134
1134
  `Resource ${logicalId} (${resourceType}) in ${region} timed out after ${timeoutLabel} during ${operation} (elapsed ${elapsedLabel}).
1135
1135
  This may indicate a stuck Cloud Control polling loop, hung Custom Resource, or
1136
- slow ENI provisioning. Re-run with --resource-timeout 1h if the resource genuinely
1137
- needs more time, or --verbose to see the underlying provider activity.`,
1136
+ slow ENI provisioning. Re-run with --resource-timeout ${resourceType}=<DURATION>
1137
+ to bump the budget for this resource type only, or --verbose to see the
1138
+ underlying provider activity.`,
1138
1139
  "RESOURCE_TIMEOUT"
1139
1140
  );
1140
1141
  this.logicalId = logicalId;
@@ -7448,6 +7449,23 @@ var CustomResourceProvider = class _CustomResourceProvider {
7448
7449
  logger = getLogger().child("CustomResourceProvider");
7449
7450
  responseBucket;
7450
7451
  responsePrefix;
7452
+ /**
7453
+ * Opt out of the deploy engine's outer transient-error retry loop.
7454
+ *
7455
+ * The loop re-invokes `provider.create()` from the top on a transient
7456
+ * SDK error (IAM propagation, HTTP 429/503, etc.). Each invocation
7457
+ * generates a brand-new RequestId and a brand-new pre-signed S3
7458
+ * response URL via `prepareInvocation()`. If the underlying Lambda has
7459
+ * already started — e.g. an outer retry fired between the placeholder
7460
+ * `PutObject` and the `Invoke`, or after the `Invoke` returned but a
7461
+ * spurious downstream error fired — the first attempt's Lambda
7462
+ * response lands at an S3 key that nobody polls, hanging the deploy
7463
+ * until the polling timeout. The provider already polls with its own
7464
+ * exponential backoff for async patterns (CDK Provider framework with
7465
+ * isCompleteHandler), so an outer retry adds nothing but the multi-
7466
+ * key bug.
7467
+ */
7468
+ disableOuterRetry = true;
7451
7469
  /** Max time to wait for synchronous S3 response after Lambda invocation (30 seconds) */
7452
7470
  SYNC_RESPONSE_TIMEOUT_MS = 3e4;
7453
7471
  /** Max time to wait for async S3 response (CDK Provider framework with isCompleteHandler) */
@@ -7467,6 +7485,22 @@ var CustomResourceProvider = class _CustomResourceProvider {
7467
7485
  this.responsePrefix = config?.responsePrefix ?? "custom-resource-responses";
7468
7486
  this.asyncResponseTimeoutMs = config?.asyncResponseTimeoutMs ?? _CustomResourceProvider.DEFAULT_ASYNC_RESPONSE_TIMEOUT_MS;
7469
7487
  }
7488
+ /**
7489
+ * Self-reported minimum per-resource timeout.
7490
+ *
7491
+ * Custom Resource async invocations (CDK Provider framework with
7492
+ * `isCompleteHandler`) poll for up to `asyncResponseTimeoutMs`
7493
+ * (default 1 hour, matching CDK's `totalTimeout` default). The deploy
7494
+ * engine's global `--resource-timeout` default is 30 minutes, which
7495
+ * would abort a perfectly healthy CR mid-poll. By self-reporting the
7496
+ * polling cap, the engine lifts the deadline to `max(self-report,
7497
+ * global)` for CR resources only; a user-supplied per-type override
7498
+ * (`--resource-timeout AWS::CloudFormation::CustomResource=5m`) still
7499
+ * wins for explicit escape-hatching.
7500
+ */
7501
+ getMinResourceTimeoutMs() {
7502
+ return this.asyncResponseTimeoutMs;
7503
+ }
7470
7504
  /**
7471
7505
  * Set the S3 bucket for custom resource responses
7472
7506
  * Called by ProviderRegistry when state bucket is configured
@@ -31890,8 +31924,11 @@ var DeployEngine = class {
31890
31924
  const baseLabel = `${verb} ${logicalId} (${resourceType})`;
31891
31925
  renderer.addTask(logicalId, baseLabel);
31892
31926
  const operationKind = change.changeType === "CREATE" ? "CREATE" : change.changeType === "DELETE" ? "DELETE" : "UPDATE";
31927
+ const provider = this.providerRegistry.getProvider(resourceType);
31928
+ const providerMinTimeoutMs = provider.getMinResourceTimeoutMs?.() ?? 0;
31893
31929
  const warnAfterMs = this.options.resourceWarnAfterByType?.[resourceType] ?? this.options.resourceWarnAfterMs ?? DEFAULT_RESOURCE_WARN_AFTER_MS;
31894
- const timeoutMs = this.options.resourceTimeoutByType?.[resourceType] ?? this.options.resourceTimeoutMs ?? DEFAULT_RESOURCE_TIMEOUT_MS;
31930
+ const globalTimeoutMs = this.options.resourceTimeoutMs ?? DEFAULT_RESOURCE_TIMEOUT_MS;
31931
+ const timeoutMs = this.options.resourceTimeoutByType?.[resourceType] ?? Math.max(providerMinTimeoutMs, globalTimeoutMs);
31895
31932
  try {
31896
31933
  await withResourceDeadline(
31897
31934
  async () => {
@@ -31970,7 +32007,10 @@ var DeployEngine = class {
31970
32007
  const { provider: createProvider, properties: createProps } = this.selectProviderWithSafetyNet(provider, resourceType, resolvedProps, logicalId);
31971
32008
  const result = await this.withRetry(
31972
32009
  () => createProvider.create(logicalId, resourceType, createProps),
31973
- logicalId
32010
+ logicalId,
32011
+ void 0,
32012
+ void 0,
32013
+ provider
31974
32014
  );
31975
32015
  const dependencies = this.extractAllDependencies(template, logicalId);
31976
32016
  stateResources[logicalId] = {
@@ -32024,7 +32064,10 @@ var DeployEngine = class {
32024
32064
  const { provider: replaceProvider, properties: replaceProps } = this.selectProviderWithSafetyNet(provider, resourceType, resolvedProps, logicalId);
32025
32065
  const createResult = await this.withRetry(
32026
32066
  () => replaceProvider.create(logicalId, resourceType, replaceProps),
32027
- logicalId
32067
+ logicalId,
32068
+ void 0,
32069
+ void 0,
32070
+ provider
32028
32071
  );
32029
32072
  const updateReplacePolicy = template?.Resources?.[logicalId]?.UpdateReplacePolicy;
32030
32073
  if (updateReplacePolicy === "Retain") {
@@ -32075,7 +32118,10 @@ var DeployEngine = class {
32075
32118
  updateProps,
32076
32119
  currentProps
32077
32120
  ),
32078
- logicalId
32121
+ logicalId,
32122
+ void 0,
32123
+ void 0,
32124
+ provider
32079
32125
  );
32080
32126
  } catch (updateError) {
32081
32127
  const msg = updateError instanceof Error ? updateError.message : String(updateError);
@@ -32104,7 +32150,10 @@ var DeployEngine = class {
32104
32150
  const { provider: replProvider, properties: replProps } = this.selectProviderWithSafetyNet(provider, resourceType, resolvedProps, logicalId);
32105
32151
  const createResult = await this.withRetry(
32106
32152
  () => replProvider.create(logicalId, resourceType, replProps),
32107
- logicalId
32153
+ logicalId,
32154
+ void 0,
32155
+ void 0,
32156
+ provider
32108
32157
  );
32109
32158
  result = {
32110
32159
  physicalId: createResult.physicalId,
@@ -32161,7 +32210,8 @@ var DeployEngine = class {
32161
32210
  logicalId,
32162
32211
  3,
32163
32212
  // fewer retries for DELETE
32164
- 5e3
32213
+ 5e3,
32214
+ provider
32165
32215
  );
32166
32216
  } catch (deleteError) {
32167
32217
  const msg = deleteError instanceof Error ? deleteError.message : String(deleteError);
@@ -32338,8 +32388,18 @@ var DeployEngine = class {
32338
32388
  * Thin wrapper over `withRetry` from ./retry.js that injects this engine's
32339
32389
  * SIGINT-aware interrupt check and logger. The actual backoff schedule
32340
32390
  * lives there.
32391
+ *
32392
+ * When the provider opts out via `disableOuterRetry`, the operation is
32393
+ * invoked exactly once and the retry loop is skipped entirely. The
32394
+ * Custom Resource provider uses this to avoid re-running its `create()`
32395
+ * — each invocation derives a fresh pre-signed S3 URL and RequestId,
32396
+ * so an outer retry leaves the previous attempt's Lambda response
32397
+ * stranded at an S3 key nobody polls.
32341
32398
  */
32342
- async withRetry(operation, logicalId, maxRetries, initialDelayMs) {
32399
+ async withRetry(operation, logicalId, maxRetries, initialDelayMs, provider) {
32400
+ if (provider?.disableOuterRetry) {
32401
+ return operation();
32402
+ }
32343
32403
  return withRetry(operation, logicalId, {
32344
32404
  ...maxRetries !== void 0 && { maxRetries },
32345
32405
  ...initialDelayMs !== void 0 && { initialDelayMs },
@@ -33019,16 +33079,19 @@ Acquiring lock for stack ${stackName}...`);
33019
33079
  logger.warn(`Resource ${logicalId} not found in state, skipping`);
33020
33080
  return;
33021
33081
  }
33022
- const warnAfterMs = ctx.resourceWarnAfterByType?.[resource.resourceType] ?? ctx.resourceWarnAfterMs ?? DEFAULT_RESOURCE_WARN_AFTER_MS;
33023
- const timeoutMs = ctx.resourceTimeoutByType?.[resource.resourceType] ?? ctx.resourceTimeoutMs ?? DEFAULT_RESOURCE_TIMEOUT_MS;
33024
33082
  const baseLabel = `Deleting ${logicalId} (${resource.resourceType})`;
33025
33083
  renderer.addTask(logicalId, baseLabel);
33026
33084
  try {
33027
33085
  const provider = destroyProviderRegistry.getProvider(resource.resourceType);
33086
+ const providerMinTimeoutMs = provider.getMinResourceTimeoutMs?.() ?? 0;
33087
+ const warnAfterMs = ctx.resourceWarnAfterByType?.[resource.resourceType] ?? ctx.resourceWarnAfterMs ?? DEFAULT_RESOURCE_WARN_AFTER_MS;
33088
+ const globalTimeoutMs = ctx.resourceTimeoutMs ?? DEFAULT_RESOURCE_TIMEOUT_MS;
33089
+ const timeoutMs = ctx.resourceTimeoutByType?.[resource.resourceType] ?? Math.max(providerMinTimeoutMs, globalTimeoutMs);
33028
33090
  await withResourceDeadline(
33029
33091
  async () => {
33092
+ const maxAttempts = provider.disableOuterRetry ? 0 : 3;
33030
33093
  let lastDeleteError;
33031
- for (let attempt = 0; attempt <= 3; attempt++) {
33094
+ for (let attempt = 0; attempt <= maxAttempts; attempt++) {
33032
33095
  try {
33033
33096
  await provider.delete(
33034
33097
  logicalId,
@@ -33042,11 +33105,11 @@ Acquiring lock for stack ${stackName}...`);
33042
33105
  lastDeleteError = retryError;
33043
33106
  const msg = retryError instanceof Error ? retryError.message : String(retryError);
33044
33107
  const isRetryable = msg.includes("Too Many Requests") || msg.includes("has dependencies") || msg.includes("can't be deleted since") || msg.includes("DependencyViolation");
33045
- if (!isRetryable || attempt >= 3)
33108
+ if (!isRetryable || attempt >= maxAttempts)
33046
33109
  break;
33047
33110
  const delay = 5e3 * Math.pow(2, attempt);
33048
33111
  logger.debug(
33049
- ` \u23F3 Retrying delete ${logicalId} in ${delay / 1e3}s (attempt ${attempt + 1}/3)`
33112
+ ` \u23F3 Retrying delete ${logicalId} in ${delay / 1e3}s (attempt ${attempt + 1}/${maxAttempts})`
33050
33113
  );
33051
33114
  await new Promise((resolve4) => setTimeout(resolve4, delay));
33052
33115
  }
@@ -35573,7 +35636,7 @@ function reorderArgs(argv) {
35573
35636
  }
35574
35637
  async function main() {
35575
35638
  const program = new Command13();
35576
- program.name("cdkd").description("CDK Direct - Deploy AWS CDK apps directly via SDK/Cloud Control API").version("0.27.0");
35639
+ program.name("cdkd").description("CDK Direct - Deploy AWS CDK apps directly via SDK/Cloud Control API").version("0.28.0");
35577
35640
  program.addCommand(createBootstrapCommand());
35578
35641
  program.addCommand(createSynthCommand());
35579
35642
  program.addCommand(createListCommand());