@cloudsnorkel/cdk-github-runners 0.14.24 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/.jsii +5400 -255
  2. package/API.md +1048 -24
  3. package/README.md +52 -0
  4. package/assets/delete-failed-runner.lambda/index.js +105 -9
  5. package/assets/idle-runner-repear.lambda/index.js +136 -14
  6. package/assets/image-builders/aws-image-builder/delete-resources.lambda/index.js +1 -1
  7. package/assets/image-builders/build-image.lambda/index.js +1 -1
  8. package/assets/providers/ami-root-device.lambda/index.js +1 -1
  9. package/assets/setup.lambda/index.html +7 -7
  10. package/assets/setup.lambda/index.js +101 -8
  11. package/assets/status.lambda/index.js +104 -8
  12. package/assets/token-retriever.lambda/index.js +104 -8
  13. package/assets/warm-runner-manager.lambda/index.js +5892 -0
  14. package/assets/webhook-handler.lambda/index.js +109 -11
  15. package/assets/webhook-redelivery.lambda/index.js +122 -24
  16. package/lib/access.js +1 -1
  17. package/lib/delete-failed-runner.lambda.js +2 -2
  18. package/lib/idle-runner-repear.lambda.js +33 -7
  19. package/lib/image-builders/api.js +1 -1
  20. package/lib/image-builders/aws-image-builder/base-image.d.ts +13 -0
  21. package/lib/image-builders/aws-image-builder/base-image.js +36 -3
  22. package/lib/image-builders/aws-image-builder/builder.js +4 -4
  23. package/lib/image-builders/aws-image-builder/delete-resources.lambda.js +2 -2
  24. package/lib/image-builders/aws-image-builder/deprecated/ami.js +1 -1
  25. package/lib/image-builders/aws-image-builder/deprecated/container.js +1 -1
  26. package/lib/image-builders/aws-image-builder/deprecated/linux-components.js +1 -1
  27. package/lib/image-builders/aws-image-builder/deprecated/windows-components.js +1 -1
  28. package/lib/image-builders/build-image.lambda.js +2 -2
  29. package/lib/image-builders/codebuild-deprecated.js +1 -1
  30. package/lib/image-builders/components.js +3 -3
  31. package/lib/image-builders/static.js +1 -1
  32. package/lib/index.d.ts +1 -0
  33. package/lib/index.js +2 -1
  34. package/lib/lambda-github.d.ts +1 -1
  35. package/lib/lambda-github.js +3 -2
  36. package/lib/lambda-helpers.js +4 -4
  37. package/lib/providers/ami-root-device.lambda.js +2 -2
  38. package/lib/providers/codebuild.d.ts +16 -0
  39. package/lib/providers/codebuild.js +15 -4
  40. package/lib/providers/common.js +3 -3
  41. package/lib/providers/composite.js +1 -1
  42. package/lib/providers/ec2.d.ts +5 -0
  43. package/lib/providers/ec2.js +31 -17
  44. package/lib/providers/ecs.d.ts +17 -0
  45. package/lib/providers/ecs.js +43 -38
  46. package/lib/providers/fargate.js +9 -31
  47. package/lib/providers/lambda.js +2 -2
  48. package/lib/runner.d.ts +25 -2
  49. package/lib/runner.js +119 -17
  50. package/lib/secrets.js +1 -1
  51. package/lib/setup.lambda.js +2 -2
  52. package/lib/utils.d.ts +10 -1
  53. package/lib/utils.js +15 -1
  54. package/lib/warm-runner-manager-function.d.ts +18 -0
  55. package/lib/warm-runner-manager-function.js +24 -0
  56. package/lib/warm-runner-manager.lambda.d.ts +41 -0
  57. package/lib/warm-runner-manager.lambda.js +487 -0
  58. package/lib/warm-runner.d.ts +147 -0
  59. package/lib/warm-runner.js +210 -0
  60. package/lib/webhook-handler.lambda.js +5 -3
  61. package/lib/webhook-redelivery.lambda.js +17 -16
  62. package/lib/webhook.d.ts +4 -0
  63. package/lib/webhook.js +2 -1
  64. package/node_modules/cron-parser/LICENSE +21 -0
  65. package/node_modules/cron-parser/README.md +408 -0
  66. package/node_modules/cron-parser/dist/CronDate.js +518 -0
  67. package/node_modules/cron-parser/dist/CronExpression.js +520 -0
  68. package/node_modules/cron-parser/dist/CronExpressionParser.js +382 -0
  69. package/node_modules/cron-parser/dist/CronFieldCollection.js +371 -0
  70. package/node_modules/cron-parser/dist/CronFileParser.js +109 -0
  71. package/node_modules/cron-parser/dist/fields/CronDayOfMonth.js +44 -0
  72. package/node_modules/cron-parser/dist/fields/CronDayOfWeek.js +51 -0
  73. package/node_modules/cron-parser/dist/fields/CronField.js +214 -0
  74. package/node_modules/cron-parser/dist/fields/CronHour.js +40 -0
  75. package/node_modules/cron-parser/dist/fields/CronMinute.js +40 -0
  76. package/node_modules/cron-parser/dist/fields/CronMonth.js +44 -0
  77. package/node_modules/cron-parser/dist/fields/CronSecond.js +40 -0
  78. package/node_modules/cron-parser/dist/fields/index.js +24 -0
  79. package/node_modules/cron-parser/dist/fields/types.js +2 -0
  80. package/node_modules/cron-parser/dist/index.js +31 -0
  81. package/node_modules/cron-parser/dist/types/CronDate.d.ts +288 -0
  82. package/node_modules/cron-parser/dist/types/CronExpression.d.ts +118 -0
  83. package/node_modules/cron-parser/dist/types/CronExpressionParser.d.ts +70 -0
  84. package/node_modules/cron-parser/dist/types/CronFieldCollection.d.ts +153 -0
  85. package/node_modules/cron-parser/dist/types/CronFileParser.d.ts +30 -0
  86. package/node_modules/cron-parser/dist/types/fields/CronDayOfMonth.d.ts +25 -0
  87. package/node_modules/cron-parser/dist/types/fields/CronDayOfWeek.d.ts +30 -0
  88. package/node_modules/cron-parser/dist/types/fields/CronField.d.ts +130 -0
  89. package/node_modules/cron-parser/dist/types/fields/CronHour.d.ts +23 -0
  90. package/node_modules/cron-parser/dist/types/fields/CronMinute.d.ts +23 -0
  91. package/node_modules/cron-parser/dist/types/fields/CronMonth.d.ts +24 -0
  92. package/node_modules/cron-parser/dist/types/fields/CronSecond.d.ts +23 -0
  93. package/node_modules/cron-parser/dist/types/fields/index.d.ts +8 -0
  94. package/node_modules/cron-parser/dist/types/fields/types.d.ts +18 -0
  95. package/node_modules/cron-parser/dist/types/index.d.ts +8 -0
  96. package/node_modules/cron-parser/dist/types/utils/random.d.ts +10 -0
  97. package/node_modules/cron-parser/dist/utils/random.js +38 -0
  98. package/node_modules/cron-parser/package.json +117 -0
  99. package/node_modules/luxon/LICENSE.md +7 -0
  100. package/node_modules/luxon/README.md +55 -0
  101. package/node_modules/luxon/build/amd/luxon.js +8741 -0
  102. package/node_modules/luxon/build/amd/luxon.js.map +1 -0
  103. package/node_modules/luxon/build/cjs-browser/luxon.js +8739 -0
  104. package/node_modules/luxon/build/cjs-browser/luxon.js.map +1 -0
  105. package/node_modules/luxon/build/es6/luxon.mjs +8133 -0
  106. package/node_modules/luxon/build/es6/luxon.mjs.map +1 -0
  107. package/node_modules/luxon/build/global/luxon.js +8744 -0
  108. package/node_modules/luxon/build/global/luxon.js.map +1 -0
  109. package/node_modules/luxon/build/global/luxon.min.js +1 -0
  110. package/node_modules/luxon/build/global/luxon.min.js.map +1 -0
  111. package/node_modules/luxon/build/node/luxon.js +7792 -0
  112. package/node_modules/luxon/build/node/luxon.js.map +1 -0
  113. package/node_modules/luxon/package.json +87 -0
  114. package/node_modules/luxon/src/datetime.js +2603 -0
  115. package/node_modules/luxon/src/duration.js +1009 -0
  116. package/node_modules/luxon/src/errors.js +61 -0
  117. package/node_modules/luxon/src/impl/conversions.js +206 -0
  118. package/node_modules/luxon/src/impl/diff.js +95 -0
  119. package/node_modules/luxon/src/impl/digits.js +94 -0
  120. package/node_modules/luxon/src/impl/english.js +233 -0
  121. package/node_modules/luxon/src/impl/formats.js +176 -0
  122. package/node_modules/luxon/src/impl/formatter.js +434 -0
  123. package/node_modules/luxon/src/impl/invalid.js +14 -0
  124. package/node_modules/luxon/src/impl/locale.js +569 -0
  125. package/node_modules/luxon/src/impl/regexParser.js +335 -0
  126. package/node_modules/luxon/src/impl/tokenParser.js +505 -0
  127. package/node_modules/luxon/src/impl/util.js +330 -0
  128. package/node_modules/luxon/src/impl/zoneUtil.js +34 -0
  129. package/node_modules/luxon/src/info.js +205 -0
  130. package/node_modules/luxon/src/interval.js +669 -0
  131. package/node_modules/luxon/src/luxon.js +26 -0
  132. package/node_modules/luxon/src/package.json +4 -0
  133. package/node_modules/luxon/src/settings.js +180 -0
  134. package/node_modules/luxon/src/zone.js +97 -0
  135. package/node_modules/luxon/src/zones/IANAZone.js +235 -0
  136. package/node_modules/luxon/src/zones/fixedOffsetZone.js +150 -0
  137. package/node_modules/luxon/src/zones/invalidZone.js +53 -0
  138. package/node_modules/luxon/src/zones/systemZone.js +61 -0
  139. package/package.json +33 -24
@@ -0,0 +1,487 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.handler = handler;
4
+ /**
5
+ * Warm Runner Manager Lambda
6
+ *
7
+ * Maintains a pool of pre-provisioned ("warm") GitHub self-hosted runners so jobs
8
+ * start with near-zero latency instead of waiting for a fresh runner to provision.
9
+ *
10
+ * ## Lifecycle
11
+ *
12
+ * 1. **Fill** — An EventBridge cron rule (midnight UTC for always-on, or window
13
+ * start for scheduled) sends a fill payload to the shared SQS queue. For
14
+ * AlwaysOnWarmRunner only, a CloudFormation custom resource (on deploy)
15
+ * invokes this Lambda directly to fill immediately; the deadline is set to
16
+ * the next midnight UTC (not full 24h) so runners last until the next cron
17
+ * fill. ScheduledWarmRunner has no deployment-fill — first fill is at the
18
+ * next schedule occurrence.
19
+ *
20
+ * 2. **Keeper** — Each keeper message tracks one runner. The SQS queue delivers the
21
+ * message, this Lambda inspects the runner, and one of the following happens:
22
+ * - Runner is past its deadline → the keeper stops the Step Function and deletes
23
+ * the runner. No replacement is created.
24
+ * - Runner is idle and within deadline → message is returned to the queue
25
+ * (via batch item failure) to be checked again after the visibility timeout.
26
+ * - Runner is busy or its Step Function finished → a replacement runner is
27
+ * started with the same deadline, and a new keeper message is enqueued.
28
+ * - Config hash mismatch (config was changed/removed since this runner was
29
+ * created) → the runner is stopped and deleted. No replacement is created.
30
+ * This is how old runners are cleaned up quickly on config changes.
31
+ *
32
+ * 3. **Shutdown mechanisms** — The keeper is the primary mechanism for enforcing the
33
+ * absolute deadline: when a runner is past its deadline, the keeper stops the
34
+ * Step Function and deletes the runner. Fallbacks:
35
+ * - **Idle reaper**: Measures idle time from runner *registration* (cdkghr:started
36
+ * label), not step function start. So it fires at deadline + provisioning delay.
37
+ * It's a fallback if the keeper misses a message; it does not enforce the
38
+ * absolute deadline precisely.
39
+ * - **Step Function idle timeout**: Each runner is started with `maxIdleSeconds`
40
+ * matching the deadline. If both keeper and idle reaper miss it, the Step
41
+ * Function will self-terminate when its idle timeout fires.
42
+ *
43
+ * ## Config hash
44
+ *
45
+ * Each warm runner config (provider, count, labels, owner, repo, duration) is
46
+ * hashed at CDK synth time. All current hashes are stored in the WARM_CONFIG_HASHES
47
+ * environment variable. Fill payloads and keeper messages carry the hash. When the
48
+ * keeper processes a message whose hash is not in the current set, it knows the
49
+ * config was changed or removed and stops the runner immediately. This helps quickly
50
+ * get rid of stale runners while keeping over-provisioning to a minimum.
51
+ *
52
+ * ## Gotchas
53
+ *
54
+ * - Keeper messages rely on SQS redelivery (batch item failure) for periodic
55
+ * checking. The visibility timeout (1 min) determines how often runners are
56
+ * polled. Failed messages are retried until they succeed or the runner
57
+ * self-terminates at its idle timeout.
58
+ * - Each fill unconditionally starts `count` runners — it does not check how many
59
+ * are already running. On cron fire, this creates a brief overlap with the
60
+ * previous cycle's runners (which are near their deadline).
61
+ * - Removing all warm runners configurations may result in warm runners staying
62
+ * around until they expire. To remove all warm runners quickly, set count to 0
63
+ * and deploy. Only once all the warm runners are stopped, you can remove all
64
+ * configurations and deploy again.
65
+ * - **Gaps in coverage**: The Step Function that provisions each warm runner uses
66
+ * increasing timeouts between retries for provider failures (CodeBuild timeout,
67
+ * Lambda timeout, capacity errors, etc.). While a warm runner slot is retrying,
68
+ * that slot has no runner. This may create gaps in coverage. An idle warm runner
69
+ * that fails to provision (or whose replacement fails) will be unavailable until
70
+ * the retry succeeds. Current retry mechanism has built-in back-off rate and can
71
+ * be tweaked using `retryOptions`. This will be improved in the future.
72
+ */
73
+ const crypto = require("crypto");
74
+ const client_sfn_1 = require("@aws-sdk/client-sfn");
75
+ const client_sqs_1 = require("@aws-sdk/client-sqs");
76
+ const lambda_github_1 = require("./lambda-github");
77
+ const lambda_helpers_1 = require("./lambda-helpers");
78
+ const sfn = new client_sfn_1.SFNClient();
79
+ const sqs = new client_sqs_1.SQSClient();
80
+ const SFN_EXECUTION_NAME_MAX_LENGTH = 80;
81
+ function isSqsEvent(event) {
82
+ return Array.isArray(event.Records);
83
+ }
84
+ function isFillInput(event) {
85
+ return typeof event === 'object' && event !== null && event.action === 'fill';
86
+ }
87
+ function isCustomResourceEvent(event) {
88
+ const e = event;
89
+ return typeof e?.RequestType === 'string' && typeof e?.ResponseURL === 'string';
90
+ }
91
+ function requireEnv(name) {
92
+ const value = process.env[name];
93
+ if (!value) {
94
+ throw new Error(`Missing environment variable ${name}`);
95
+ }
96
+ return value;
97
+ }
98
+ /**
99
+ * Deterministic execution name for idempotent fills. Same seed → same name, so retries
100
+ * (custom resource, SQS redelivery) don't create duplicate runners.
101
+ */
102
+ function deterministicExecutionName(providerPath, seed) {
103
+ const pathWithoutStack = providerPath.split('/').slice(1).join('/') || providerPath;
104
+ const sanitized = `warm-${pathWithoutStack.replace(/[^a-zA-Z0-9-]/g, '-')}`;
105
+ const hash = crypto.createHash('sha256').update(seed).digest('hex').slice(0, 16);
106
+ const maxPrefixLen = SFN_EXECUTION_NAME_MAX_LENGTH - hash.length - 1;
107
+ return `${sanitized.slice(0, maxPrefixLen)}-${hash}`;
108
+ }
109
+ /** Returns Unix ms of the next midnight UTC. Used for AlwaysOn deployment-fill deadline. */
110
+ function getNextMidnightUtcMs() {
111
+ const now = new Date();
112
+ const nextMidnight = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate() + 1, 0, 0, 0, 0));
113
+ return nextMidnight.getTime();
114
+ }
115
+ /** Find installation id for our app. Normal code path gets this from the webhook payload, but we schedule these ourselves. */
116
+ async function resolveInstallationId(owner, repo) {
117
+ const appOctokit = await (0, lambda_github_1.getAppOctokit)();
118
+ if (!appOctokit) {
119
+ return undefined; // PAT authentication
120
+ }
121
+ if (repo) {
122
+ const { data } = await appOctokit.rest.apps.getRepoInstallation({ owner, repo });
123
+ return data.id;
124
+ }
125
+ else {
126
+ const { data } = await appOctokit.rest.apps.getOrgInstallation({ org: owner });
127
+ return data.id;
128
+ }
129
+ }
130
+ /** Start a warm runner and enqueue a keeper message using SQS. */
131
+ async function startWarmRunnerAndEnqueueKeeper(input) {
132
+ const stepFunctionArn = requireEnv('STEP_FUNCTION_ARN');
133
+ const queueUrl = requireEnv('WARM_RUNNER_QUEUE_URL');
134
+ const remainingSeconds = Math.floor((input.absoluteDeadline - Date.now()) / 1000);
135
+ if (remainingSeconds <= 0) {
136
+ console.log({
137
+ notice: 'Absolute deadline already passed; not starting replacement',
138
+ configHash: input.configHash,
139
+ runnerName: input.executionName,
140
+ input,
141
+ });
142
+ return;
143
+ }
144
+ let executionArn;
145
+ try {
146
+ const result = await sfn.send(new client_sfn_1.StartExecutionCommand({
147
+ stateMachineArn: stepFunctionArn,
148
+ name: input.executionName,
149
+ input: JSON.stringify({
150
+ owner: input.owner,
151
+ repo: input.repo || '',
152
+ jobId: -1,
153
+ jobUrl: '',
154
+ installationId: input.installationId ?? -1,
155
+ jobLabels: input.providerLabels.join(','),
156
+ provider: input.providerPath,
157
+ labels: [...input.providerLabels, 'cdkghr:warm'].join(','),
158
+ maxIdleSeconds: remainingSeconds,
159
+ }),
160
+ }));
161
+ executionArn = result.executionArn;
162
+ }
163
+ catch (e) {
164
+ if (e instanceof client_sfn_1.ExecutionAlreadyExists) {
165
+ // Idempotent retry: execution was already started (e.g. Lambda timed out mid-fill).
166
+ // Don't enqueue. The first attempt already did, so duplicate keeper messages would cause duplicate replacements.
167
+ console.log({
168
+ notice: 'ExecutionAlreadyExists — idempotent retry, skipping enqueue',
169
+ configHash: input.configHash,
170
+ slot: input.slot,
171
+ runnerName: input.executionName,
172
+ });
173
+ return;
174
+ }
175
+ else {
176
+ throw e;
177
+ }
178
+ }
179
+ const message = {
180
+ executionArn,
181
+ runnerName: input.executionName,
182
+ owner: input.owner,
183
+ repo: input.repo,
184
+ installationId: input.installationId,
185
+ providerPath: input.providerPath,
186
+ providerLabels: input.providerLabels,
187
+ absoluteDeadline: input.absoluteDeadline,
188
+ configHash: input.configHash,
189
+ };
190
+ await sqs.send(new client_sqs_1.SendMessageCommand({
191
+ QueueUrl: queueUrl,
192
+ MessageBody: JSON.stringify(message),
193
+ }));
194
+ console.log({
195
+ notice: 'Started warm runner and enqueued keeper message',
196
+ configHash: input.configHash,
197
+ slot: input.slot,
198
+ runnerName: input.executionName,
199
+ executionArn,
200
+ remainingSeconds,
201
+ });
202
+ }
203
+ /**
204
+ * Unconditionally starts `count` warm runners for the given config and enqueues keeper messages.
205
+ *
206
+ * @param input the fill payload
207
+ * @param getNameForSlot a function to generate a unique and stable execution name for each slot
208
+ * @param source the source of the fill for debugging purposes
209
+ * @param absoluteDeadlineOverride if provided, use this instead of now + duration (for AlwaysOn deployment-fill)
210
+ */
211
+ async function runFiller(input, getNameForSlot, source, absoluteDeadlineOverride) {
212
+ const installationId = await resolveInstallationId(input.owner, input.repo);
213
+ const absoluteDeadline = absoluteDeadlineOverride ?? Date.now() + input.duration * 1000;
214
+ for (let i = 0; i < input.count; i++) {
215
+ await startWarmRunnerAndEnqueueKeeper({
216
+ providerPath: input.providerPath,
217
+ providerLabels: input.providerLabels,
218
+ owner: input.owner,
219
+ repo: input.repo,
220
+ installationId,
221
+ absoluteDeadline,
222
+ configHash: input.configHash,
223
+ executionName: getNameForSlot(i),
224
+ slot: i,
225
+ });
226
+ }
227
+ console.log({
228
+ notice: 'Fill complete - started warm runners',
229
+ source,
230
+ configHash: input.configHash,
231
+ providerPath: input.providerPath,
232
+ started: input.count,
233
+ });
234
+ }
235
+ /** Stop the step function and delete the runner from GitHub. */
236
+ async function stopAndDeleteRunner(input, octokit, secrets, reason) {
237
+ try {
238
+ await sfn.send(new client_sfn_1.StopExecutionCommand({
239
+ executionArn: input.executionArn,
240
+ error: reason,
241
+ cause: 'Warm runner stopped by keeper',
242
+ }));
243
+ }
244
+ catch (e) {
245
+ console.error({
246
+ notice: 'Failed to stop step function',
247
+ configHash: input.configHash,
248
+ runnerName: input.runnerName,
249
+ executionArn: input.executionArn,
250
+ error: e,
251
+ input,
252
+ });
253
+ }
254
+ const runner = await (0, lambda_github_1.getRunner)(octokit, secrets.runnerLevel, input.owner, input.repo, input.runnerName);
255
+ if (runner) {
256
+ try {
257
+ await (0, lambda_github_1.deleteRunner)(octokit, secrets.runnerLevel, input.owner, input.repo, runner.id);
258
+ }
259
+ catch (e) {
260
+ console.error({
261
+ notice: 'Failed to delete runner',
262
+ configHash: input.configHash,
263
+ runnerName: input.runnerName,
264
+ runnerId: runner.id,
265
+ error: e,
266
+ input,
267
+ });
268
+ }
269
+ }
270
+ }
271
+ /**
272
+ * Warm runner manager Lambda - handles three invocation modes:
273
+ *
274
+ * 1. CloudFormation Custom Resource - triggered on stack deploy (Create/Update) for AlwaysOnWarmRunner only. Runs
275
+ * runFiller with deadline = next midnight UTC so runners last until the next cron fill. Delete is a no-op.
276
+ * 2. SQS messages - fill or keeper
277
+ * - Fill - from EventBridge cron. Uses messageId for deterministic execution names (idempotent on redelivery).
278
+ * - Keeper - tracks one runner. Uses SQS message cycling for periodic checks.
279
+ * Each message tracks one warm runner. The keeper checks:
280
+ * - Past deadline - stop the Step Function and delete the runner.
281
+ * - Config hash - if the message's `configHash` doesn't match the current `WARM_CONFIG_HASHES` env var, the runner is from a stale config - stop it and discard the message without replacement.
282
+ * - Busy/finished - if the Step Function ended or the GitHub runner is busy (took a job), start a replacement runner (inheriting the same deadline and config hash).
283
+ * - Not found yet (runner/infrastructure still starting) - retry later (message goes back to queue).
284
+ * - Still idle - retry later to check again.
285
+ */
286
+ async function handler(event) {
287
+ if (isCustomResourceEvent(event)) {
288
+ const physicalId = ('PhysicalResourceId' in event ? event.PhysicalResourceId : undefined) ?? event.LogicalResourceId;
289
+ try {
290
+ const props = event.ResourceProperties;
291
+ console.log({
292
+ notice: 'Custom resource fill',
293
+ requestType: event.RequestType,
294
+ logicalResourceId: event.LogicalResourceId,
295
+ configHash: props.configHash,
296
+ providerPath: props.providerPath,
297
+ count: props.count,
298
+ });
299
+ if (event.RequestType === 'Create' || event.RequestType === 'Update') {
300
+ const getNameForSlot = (slot) => deterministicExecutionName(props.providerPath, `${event.LogicalResourceId}:${event.RequestType}:${props.configHash}:${slot}`);
301
+ const deadline = getNextMidnightUtcMs();
302
+ await runFiller(props, getNameForSlot, 'customResource', deadline);
303
+ }
304
+ await (0, lambda_helpers_1.customResourceRespond)(event, 'SUCCESS', 'OK', physicalId, {});
305
+ }
306
+ catch (e) {
307
+ console.error({ notice: 'Custom resource handler failed', error: e });
308
+ await (0, lambda_helpers_1.customResourceRespond)(event, 'FAILED', e.message || 'Internal Error', physicalId, {});
309
+ }
310
+ return;
311
+ }
312
+ if (!isSqsEvent(event)) {
313
+ console.error({ notice: 'Unknown event type; ignoring', event });
314
+ return;
315
+ }
316
+ const validHashes = new Set((process.env.WARM_CONFIG_HASHES ?? '').split(',').filter(Boolean));
317
+ const result = { batchItemFailures: [] };
318
+ const octokitCache = new Map();
319
+ for (const record of event.Records) {
320
+ let body;
321
+ try {
322
+ body = JSON.parse(record.body);
323
+ }
324
+ catch (e) {
325
+ console.error({
326
+ notice: 'Failed to parse message body',
327
+ requestId: record.messageId,
328
+ error: e,
329
+ });
330
+ continue;
331
+ }
332
+ const retryLater = () => result.batchItemFailures.push({ itemIdentifier: record.messageId });
333
+ const isFill = isFillInput(body);
334
+ const configHash = body.configHash;
335
+ const runnerName = isFill ? undefined : body.runnerName;
336
+ console.log({
337
+ notice: 'Processing SQS message',
338
+ messageId: record.messageId,
339
+ configHash,
340
+ runnerName,
341
+ });
342
+ // scheduled fill from EventBridge via SQS
343
+ if (isFill) {
344
+ const fillPayload = body;
345
+ try {
346
+ console.log({
347
+ notice: 'Scheduled fill',
348
+ configHash,
349
+ providerPath: fillPayload.providerPath,
350
+ count: fillPayload.count,
351
+ });
352
+ const getNameForSlot = (slot) => deterministicExecutionName(fillPayload.providerPath, `${record.messageId}:${slot}`);
353
+ await runFiller(fillPayload, getNameForSlot, 'scheduled', undefined);
354
+ }
355
+ catch (e) {
356
+ console.error({
357
+ notice: 'Fill failed',
358
+ messageId: record.messageId,
359
+ configHash: fillPayload.configHash,
360
+ error: e,
361
+ });
362
+ retryLater();
363
+ }
364
+ continue;
365
+ }
366
+ // keeper message
367
+ const input = body;
368
+ console.log({
369
+ notice: 'Checking warm runner',
370
+ configHash: input.configHash,
371
+ runnerName: input.runnerName,
372
+ });
373
+ // get github access (cached per installationId to avoid re-reading the secrets manager and Github API every time)
374
+ let octokit;
375
+ let secrets;
376
+ const cached = octokitCache.get(input.installationId);
377
+ if (cached) {
378
+ octokit = cached.octokit;
379
+ secrets = cached.secrets;
380
+ }
381
+ else {
382
+ const got = await (0, lambda_github_1.getOctokit)(input.installationId);
383
+ octokit = got.octokit;
384
+ secrets = got.githubSecrets;
385
+ octokitCache.set(input.installationId, { octokit, secrets });
386
+ }
387
+ // stale config - best-effort stop, then discard message (runner will self-terminate at its idle timeout)
388
+ if (!validHashes.has(input.configHash)) {
389
+ console.log({
390
+ notice: 'Config hash mismatch (new CDK deployment, old runner) - stopping stale warm runner',
391
+ configHash: input.configHash,
392
+ runnerName: input.runnerName,
393
+ validHashes: validHashes,
394
+ });
395
+ try {
396
+ await stopAndDeleteRunner(input, octokit, secrets, 'StaleWarmRunner');
397
+ }
398
+ catch (e) {
399
+ console.error({
400
+ notice: 'Best-effort cleanup of stale warm runner failed; it will self-terminate at idle timeout',
401
+ configHash: input.configHash,
402
+ runnerName: input.runnerName,
403
+ error: e,
404
+ });
405
+ }
406
+ continue;
407
+ }
408
+ // past deadline - keeper must stop and delete the runner
409
+ if (Date.now() >= input.absoluteDeadline) {
410
+ console.log({
411
+ notice: 'Warm runner past deadline, stopping and deleting',
412
+ configHash: input.configHash,
413
+ runnerName: input.runnerName,
414
+ });
415
+ try {
416
+ await stopAndDeleteRunner(input, octokit, secrets, 'WarmRunnerExpired');
417
+ }
418
+ catch (e) {
419
+ console.error({
420
+ notice: 'Failed to stop expired warm runner',
421
+ configHash: input.configHash,
422
+ runnerName: input.runnerName,
423
+ error: e,
424
+ });
425
+ // don't retry to not accidentally create a new runner
426
+ // idle reaper will take care of it soon enough
427
+ }
428
+ continue;
429
+ }
430
+ // check if step function is still running
431
+ const execution = await sfn.send(new client_sfn_1.DescribeExecutionCommand({ executionArn: input.executionArn }));
432
+ const stillRunning = execution.status === 'RUNNING';
433
+ // find runner
434
+ const runner = await (0, lambda_github_1.getRunner)(octokit, secrets.runnerLevel, input.owner, input.repo, input.runnerName);
435
+ // need replacement: step function finished (not running) or runner took a job (busy)
436
+ if (!stillRunning || runner?.busy) {
437
+ console.log({
438
+ notice: 'Warm runner finished or busy; starting replacement',
439
+ configHash: input.configHash,
440
+ runnerName: input.runnerName,
441
+ stillRunning,
442
+ runnerBusy: runner?.busy ?? false,
443
+ });
444
+ try {
445
+ await startWarmRunnerAndEnqueueKeeper({
446
+ providerPath: input.providerPath,
447
+ providerLabels: input.providerLabels,
448
+ owner: input.owner,
449
+ repo: input.repo,
450
+ installationId: input.installationId,
451
+ absoluteDeadline: input.absoluteDeadline,
452
+ configHash: input.configHash,
453
+ executionName: deterministicExecutionName(input.providerPath, record.messageId),
454
+ });
455
+ }
456
+ catch (e) {
457
+ console.error({
458
+ notice: 'Failed to start replacement warm runner',
459
+ configHash: input.configHash,
460
+ runnerName: input.runnerName,
461
+ error: e,
462
+ });
463
+ retryLater();
464
+ }
465
+ continue;
466
+ }
467
+ // step function still running but runner not found yet
468
+ if (!runner) {
469
+ console.log({
470
+ notice: 'Runner not running yet',
471
+ configHash: input.configHash,
472
+ runnerName: input.runnerName,
473
+ });
474
+ retryLater();
475
+ continue;
476
+ }
477
+ // still idle - check again later
478
+ console.log({
479
+ notice: 'Runner still idle - will check again later',
480
+ configHash: input.configHash,
481
+ runnerName: input.runnerName,
482
+ });
483
+ retryLater();
484
+ }
485
+ return result;
486
+ }
487
+ //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"warm-runner-manager.lambda.js","sourceRoot":"","sources":["../src/warm-runner-manager.lambda.ts"],"names":[],"mappings":";;AAyVA,0BAoNC;AA7iBD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoEG;AACH,iCAAiC;AACjC,oDAM6B;AAC7B,oDAAoE;AAGpE,mDAAoG;AACpG,qDAAyD;AAEzD,MAAM,GAAG,GAAG,IAAI,sBAAS,EAAE,CAAC;AAC5B,MAAM,GAAG,GAAG,IAAI,sBAAS,EAAE,CAAC;AAE5B,MAAM,6BAA6B,GAAG,EAAE,CAAC;AA4BzC,SAAS,UAAU,CAAC,KAAc;IAChC,OAAO,KAAK,CAAC,OAAO,CAAE,KAA4B,CAAC,OAAO,CAAC,CAAC;AAC9D,CAAC;AAED,SAAS,WAAW,CAAC,KAAc;IACjC,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAK,KAA+B,CAAC,MAAM,KAAK,MAAM,CAAC;AAC3G,CAAC;AAED,SAAS,qBAAqB,CAAC,KAAc;IAC3C,MAAM,CAAC,GAAG,KAAoD,CAAC;IAC/D,OAAO,OAAO,CAAC,EAAE,WAAW,KAAK,QAAQ,IAAI,OAAO,CAAC,EAAE,WAAW,KAAK,QAAQ,CAAC;AAClF,CAAC;AAED,SAAS,UAAU,CAAC,IAAY;IAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAChC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,gCAAgC,IAAI,EAAE,CAAC,CAAC;IAC1D,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAcD;;;GAGG;AACH,SAAS,0BAA0B,CAAC,YAAoB,EAAE,IAAY;IACpE,MAAM,gBAAgB,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,YAAY,CAAC;IACpF,MAAM,SAAS,GAAG,QAAQ,gBAAgB,CAAC,OAAO,CAAC,gBAAgB,EAAE,GAAG,CAAC,EAAE,CAAC;IAC5E,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACjF,MAAM,YAAY,GAAG,6BAA6B,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;IACrE,OAAO,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,YAAY,CAAC,IAAI,IAAI,EAAE,CAAC;AACvD,CAAC;AAED,4FAA4F;AAC5F,SAAS,oBAAoB;IAC3B,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;IACvB,MAAM,YAAY,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,cAAc,EAAE,EAAE,GAAG,CAAC,WAAW,EAAE,EAAE,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACnH,OAAO,YAAY,CAAC,OAAO,EAAE,CAAC;AAChC,CAAC;AAED,8HAA8H;AAC9H,KAAK,UAAU,qBAAqB,CAAC,KAAa,EAAE,IAAY;IAC9D,MAAM,UAAU,GAAG,MAAM,IAAA,6BAAa,GAAE,CAAC;IACzC,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,SAAS,CAAC,CAAC,qBAAqB;IACzC,CAAC;IAED,IAAI,IAAI,EAAE,CAAC;QACT,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QACjF,OAAO,IAAI,CAAC,EAAE,CAAC;IACjB,CAAC;SAAM,CAAC;QACN,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,kBAAkB,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;QAC/E,OAAO,IAAI,CAAC,EAAE,CAAC;IACjB,CAAC;AACH,CAAC;AAED,kEAAkE;AAClE,KAAK,UAAU,+BAA+B,CAAC,KAA2B;IACxE,MAAM,eAAe,GAAG,UAAU,CAAC,mBAAmB,CAAC,CAAC;IACxD,MAAM,QAAQ,GAAG,UAAU,CAAC,uBAAuB,CAAC,CAAC;IAErD,MAAM,gBAAgB,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,gBAAgB,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC;IAClF,IAAI,gBAAgB,IAAI,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,GAAG,CAAC;YACV,MAAM,EAAE,4DAA4D;YACpE,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,UAAU,EAAE,KAAK,CAAC,aAAa;YAC/B,KAAK;SACN,CAAC,CAAC;QACH,OAAO;IACT,CAAC;IAED,IAAI,YAAoB,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,kCAAqB,CAAC;YACtD,eAAe,EAAE,eAAe;YAChC,IAAI,EAAE,KAAK,CAAC,aAAa;YACzB,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC;gBACpB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,EAAE;gBACtB,KAAK,EAAE,CAAC,CAAC;gBACT,MAAM,EAAE,EAAE;gBACV,cAAc,EAAE,KAAK,CAAC,cAAc,IAAI,CAAC,CAAC;gBAC1C,SAAS,EAAE,KAAK,CAAC,cAAc,CAAC,IAAI,CAAC,GAAG,CAAC;gBACzC,QAAQ,EAAE,KAAK,CAAC,YAAY;gBAC5B,MAAM,EAAE,CAAC,GAAG,KAAK,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;gBAC1D,cAAc,EAAE,gBAAgB;aACjC,CAAC;SACH,CAAC,CAAC,CAAC;QACJ,YAAY,GAAG,MAAM,CAAC,YAAa,CAAC;IACtC,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,IAAI,CAAC,YAAY,mCAAsB,EAAE,CAAC;YACxC,oFAAoF;YACpF,iHAAiH;YACjH,OAAO,CAAC,GAAG,CAAC;gBACV,MAAM,EAAE,6DAA6D;gBACrE,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,UAAU,EAAE,KAAK,CAAC,aAAa;aAChC,CAAC,CAAC;YACH,OAAO;QACT,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,CAAC;QACV,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAA4B;QACvC,YAAY;QACZ,UAAU,EAAE,KAAK,CAAC,aAAa;QAC/B,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,cAAc,EAAE,KAAK,CAAC,cAAc;QACpC,YAAY,EAAE,KAAK,CAAC,YAAY;QAChC,cAAc,EAAE,KAAK,CAAC,cAAc;QACpC,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;QACxC,UAAU,EAAE,KAAK,CAAC,UAAU;KAC7B,CAAC;IAEF,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,+BAAkB,CAAC;QACpC,QAAQ,EAAE,QAAQ;QAClB,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KACrC,CAAC,CAAC,CAAC;IAEJ,OAAO,CAAC,GAAG,CAAC;QACV,MAAM,EAAE,iDAAiD;QACzD,UAAU,EAAE,KAAK,CAAC,UAAU;QAC5B,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,UAAU,EAAE,KAAK,CAAC,aAAa;QAC/B,YAAY;QACZ,gBAAgB;KACjB,CAAC,CAAC;AACL,CAAC;AAED;;;;;;;GAOG;AACH,KAAK,UAAU,SAAS,CAAC,KAA4B,EAAE,cAAwC,EAAE,MAAc,EAAE,wBAAiC;IAChJ,MAAM,cAAc,GAAG,MAAM,qBAAqB,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;IAC5E,MAAM,gBAAgB,GAAG,wBAAwB,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,QAAQ,GAAG,IAAI,CAAC;IAExF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,+BAA+B,CAAC;YACpC,YAAY,EAAE,KAAK,CAAC,YAAY;YAChC,cAAc,EAAE,KAAK,CAAC,cAAc;YACpC,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,cAAc;YACd,gBAAgB;YAChB,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,aAAa,EAAE,cAAc,CAAC,CAAC,CAAC;YAChC,IAAI,EAAE,CAAC;SACR,CAAC,CAAC;IACL,CAAC;IAED,OAAO,CAAC,GAAG,CAAC;QACV,MAAM,EAAE,sCAAsC;QAC9C,MAAM;QACN,UAAU,EAAE,KAAK,CAAC,UAAU;QAC5B,YAAY,EAAE,KAAK,CAAC,YAAY;QAChC,OAAO,EAAE,KAAK,CAAC,KAAK;KACrB,CAAC,CAAC;AACL,CAAC;AAED,gEAAgE;AAChE,KAAK,UAAU,mBAAmB,CAAC,KAA8B,EAAE,OAAgB,EAAE,OAAsB,EAAE,MAAc;IACzH,IAAI,CAAC;QACH,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,iCAAoB,CAAC;YACtC,YAAY,EAAE,KAAK,CAAC,YAAY;YAChC,KAAK,EAAE,MAAM;YACb,KAAK,EAAE,+BAA+B;SACvC,CAAC,CAAC,CAAC;IACN,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,CAAC,KAAK,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,YAAY,EAAE,KAAK,CAAC,YAAY;YAChC,KAAK,EAAE,CAAC;YACR,KAAK;SACN,CAAC,CAAC;IACL,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,IAAA,yBAAS,EAAC,OAAO,EAAE,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;IACxG,IAAI,MAAM,EAAE,CAAC;QACX,IAAI,CAAC;YACH,MAAM,IAAA,4BAAY,EAAC,OAAO,EAAE,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;QACvF,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC;gBACZ,MAAM,EAAE,yBAAyB;gBACjC,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,QAAQ,EAAE,MAAM,CAAC,EAAE;gBACnB,KAAK,EAAE,CAAC;gBACR,KAAK;aACN,CAAC,CAAC;QACL,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACI,KAAK,UAAU,OAAO,CAAC,KAAuE;IACnG,IAAI,qBAAqB,CAAC,KAAK,CAAC,EAAE,CAAC;QACjC,MAAM,UAAU,GAAG,CAAC,oBAAoB,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC,iBAAiB,CAAC;QACrH,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,KAAK,CAAC,kBAAsD,CAAC;YAC3E,OAAO,CAAC,GAAG,CAAC;gBACV,MAAM,EAAE,sBAAsB;gBAC9B,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,iBAAiB,EAAE,KAAK,CAAC,iBAAiB;gBAC1C,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,YAAY,EAAE,KAAK,CAAC,YAAY;gBAChC,KAAK,EAAE,KAAK,CAAC,KAAK;aACnB,CAAC,CAAC;YACH,IAAI,KAAK,CAAC,WAAW,KAAK,QAAQ,IAAI,KAAK,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;gBACrE,MAAM,cAAc,GAAG,CAAC,IAAY,EAAE,EAAE,CACtC,0BAA0B,CAAC,KAAK,CAAC,YAAY,EAAE,GAAG,KAAK,CAAC,iBAAiB,IAAI,KAAK,CAAC,WAAW,IAAI,KAAK,CAAC,UAAU,IAAI,IAAI,EAAE,CAAC,CAAC;gBAChI,MAAM,QAAQ,GAAG,oBAAoB,EAAE,CAAC;gBACxC,MAAM,SAAS,CAAC,KAAK,EAAE,cAAc,EAAE,gBAAgB,EAAE,QAAQ,CAAC,CAAC;YACrE,CAAC;YACD,MAAM,IAAA,sCAAqB,EAAC,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,CAAC,CAAC;QACtE,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,gCAAgC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;YACtE,MAAM,IAAA,sCAAqB,EAAC,KAAK,EAAE,QAAQ,EAAG,CAAW,CAAC,OAAO,IAAI,gBAAgB,EAAE,UAAU,EAAE,EAAE,CAAC,CAAC;QACzG,CAAC;QACD,OAAO;IACT,CAAC;IAED,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,8BAA8B,EAAE,KAAK,EAAE,CAAC,CAAC;QACjE,OAAO;IACT,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,kBAAkB,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;IAC/F,MAAM,MAAM,GAA+B,EAAE,iBAAiB,EAAE,EAAE,EAAE,CAAC;IACrE,MAAM,YAAY,GAAG,IAAI,GAAG,EAAoE,CAAC;IAEjG,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QACnC,IAAI,IAAqD,CAAC;QAC1D,IAAI,CAAC;YACH,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAoD,CAAC;QACpF,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO,CAAC,KAAK,CAAC;gBACZ,MAAM,EAAE,8BAA8B;gBACtC,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,KAAK,EAAE,CAAC;aACT,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,MAAM,UAAU,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,iBAAiB,CAAC,IAAI,CAAC,EAAE,cAAc,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;QAE7F,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;QACjC,MAAM,UAAU,GAAI,IAAwD,CAAC,UAAU,CAAC;QACxF,MAAM,UAAU,GAAG,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAE,IAAgC,CAAC,UAAU,CAAC;QACrF,OAAO,CAAC,GAAG,CAAC;YACV,MAAM,EAAE,wBAAwB;YAChC,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,UAAU;YACV,UAAU;SACX,CAAC,CAAC;QAEH,0CAA0C;QAC1C,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,WAAW,GAAG,IAA6B,CAAC;YAClD,IAAI,CAAC;gBACH,OAAO,CAAC,GAAG,CAAC;oBACV,MAAM,EAAE,gBAAgB;oBACxB,UAAU;oBACV,YAAY,EAAE,WAAW,CAAC,YAAY;oBACtC,KAAK,EAAE,WAAW,CAAC,KAAK;iBACzB,CAAC,CAAC;gBACH,MAAM,cAAc,GAAG,CAAC,IAAY,EAAE,EAAE,CACtC,0BAA0B,CAAC,WAAW,CAAC,YAAY,EAAE,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,EAAE,CAAC,CAAC;gBACtF,MAAM,SAAS,CAAC,WAAW,EAAE,cAAc,EAAE,WAAW,EAAE,SAAS,CAAC,CAAC;YACvE,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,CAAC,KAAK,CAAC;oBACZ,MAAM,EAAE,aAAa;oBACrB,SAAS,EAAE,MAAM,CAAC,SAAS;oBAC3B,UAAU,EAAE,WAAW,CAAC,UAAU;oBAClC,KAAK,EAAE,CAAC;iBACT,CAAC,CAAC;gBACH,UAAU,EAAE,CAAC;YACf,CAAC;YACD,SAAS;QACX,CAAC;QAED,iBAAiB;QACjB,MAAM,KAAK,GAAG,IAA+B,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC;YACV,MAAM,EAAE,sBAAsB;YAC9B,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;SAC7B,CAAC,CAAC;QAEH,kHAAkH;QAClH,IAAI,OAAgB,CAAC;QACrB,IAAI,OAAsB,CAAC;QAC3B,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QACtD,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;YACzB,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;QAC3B,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,GAAG,MAAM,IAAA,0BAAU,EAAC,KAAK,CAAC,cAAc,CAAC,CAAC;YACnD,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC;YACtB,OAAO,GAAG,GAAG,CAAC,aAAa,CAAC;YAC5B,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC;QAC/D,CAAC;QAED,yGAAyG;QACzG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC;YACvC,OAAO,CAAC,GAAG,CAAC;gBACV,MAAM,EAAE,oFAAoF;gBAC5F,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,WAAW,EAAE,WAAW;aACzB,CAAC,CAAC;YAEH,IAAI,CAAC;gBACH,MAAM,mBAAmB,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,iBAAiB,CAAC,CAAC;YACxE,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,CAAC,KAAK,CAAC;oBACZ,MAAM,EAAE,yFAAyF;oBACjG,UAAU,EAAE,KAAK,CAAC,UAAU;oBAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;oBAC5B,KAAK,EAAE,CAAC;iBACT,CAAC,CAAC;YACL,CAAC;YACD,SAAS;QACX,CAAC;QAED,yDAAyD;QACzD,IAAI,IAAI,CAAC,GAAG,EAAE,IAAI,KAAK,CAAC,gBAAgB,EAAE,CAAC;YACzC,OAAO,CAAC,GAAG,CAAC;gBACV,MAAM,EAAE,kDAAkD;gBAC1D,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;aAC7B,CAAC,CAAC;YACH,IAAI,CAAC;gBACH,MAAM,mBAAmB,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,mBAAmB,CAAC,CAAC;YAC1E,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,CAAC,KAAK,CAAC;oBACZ,MAAM,EAAE,oCAAoC;oBAC5C,UAAU,EAAE,KAAK,CAAC,UAAU;oBAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;oBAC5B,KAAK,EAAE,CAAC;iBACT,CAAC,CAAC;gBACH,sDAAsD;gBACtD,+CAA+C;YACjD,CAAC;YACD,SAAS;QACX,CAAC;QAED,0CAA0C;QAC1C,MAAM,SAAS,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,qCAAwB,CAAC,EAAE,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QACrG,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,KAAK,SAAS,CAAC;QAEpD,cAAc;QACd,MAAM,MAAM,GAAG,MAAM,IAAA,yBAAS,EAAC,OAAO,EAAE,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;QAExG,qFAAqF;QACrF,IAAI,CAAC,YAAY,IAAI,MAAM,EAAE,IAAI,EAAE,CAAC;YAClC,OAAO,CAAC,GAAG,CAAC;gBACV,MAAM,EAAE,oDAAoD;gBAC5D,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,YAAY;gBACZ,UAAU,EAAE,MAAM,EAAE,IAAI,IAAI,KAAK;aAClC,CAAC,CAAC;YACH,IAAI,CAAC;gBACH,MAAM,+BAA+B,CAAC;oBACpC,YAAY,EAAE,KAAK,CAAC,YAAY;oBAChC,cAAc,EAAE,KAAK,CAAC,cAAc;oBACpC,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,cAAc,EAAE,KAAK,CAAC,cAAc;oBACpC,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;oBACxC,UAAU,EAAE,KAAK,CAAC,UAAU;oBAC5B,aAAa,EAAE,0BAA0B,CAAC,KAAK,CAAC,YAAY,EAAE,MAAM,CAAC,SAAS,CAAC;iBAChF,CAAC,CAAC;YACL,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,OAAO,CAAC,KAAK,CAAC;oBACZ,MAAM,EAAE,yCAAyC;oBACjD,UAAU,EAAE,KAAK,CAAC,UAAU;oBAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;oBAC5B,KAAK,EAAE,CAAC;iBACT,CAAC,CAAC;gBACH,UAAU,EAAE,CAAC;YACf,CAAC;YACD,SAAS;QACX,CAAC;QAED,uDAAuD;QACvD,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,OAAO,CAAC,GAAG,CAAC;gBACV,MAAM,EAAE,wBAAwB;gBAChC,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;aAC7B,CAAC,CAAC;YACH,UAAU,EAAE,CAAC;YACb,SAAS;QACX,CAAC;QAED,iCAAiC;QACjC,OAAO,CAAC,GAAG,CAAC;YACV,MAAM,EAAE,4CAA4C;YACpD,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,UAAU,EAAE,KAAK,CAAC,UAAU;SAC7B,CAAC,CAAC;QACH,UAAU,EAAE,CAAC;IACf,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC","sourcesContent":["/**\n * Warm Runner Manager Lambda\n *\n * Maintains a pool of pre-provisioned (\"warm\") GitHub self-hosted runners so jobs\n * start with near-zero latency instead of waiting for a fresh runner to provision.\n *\n * ## Lifecycle\n *\n * 1. **Fill** — An EventBridge cron rule (midnight UTC for always-on, or window\n *    start for scheduled) sends a fill payload to the shared SQS queue. For\n *    AlwaysOnWarmRunner only, a CloudFormation custom resource (on deploy)\n *    invokes this Lambda directly to fill immediately; the deadline is set to\n *    the next midnight UTC (not full 24h) so runners last until the next cron\n *    fill. ScheduledWarmRunner has no deployment-fill — first fill is at the\n *    next schedule occurrence.\n *\n * 2. **Keeper** — Each keeper message tracks one runner. The SQS queue delivers the\n *    message, this Lambda inspects the runner, and one of the following happens:\n *    - Runner is past its deadline → the keeper stops the Step Function and deletes\n *      the runner. No replacement is created.\n *    - Runner is idle and within deadline → message is returned to the queue\n *      (via batch item failure) to be checked again after the visibility timeout.\n *    - Runner is busy or its Step Function finished → a replacement runner is\n *      started with the same deadline, and a new keeper message is enqueued.\n *    - Config hash mismatch (config was changed/removed since this runner was\n *      created) → the runner is stopped and deleted. No replacement is created.\n *      This is how old runners are cleaned up quickly on config changes.\n *\n * 3. **Shutdown mechanisms** — The keeper is the primary mechanism for enforcing the\n *    absolute deadline: when a runner is past its deadline, the keeper stops the\n *    Step Function and deletes the runner. Fallbacks:\n *    - **Idle reaper**: Measures idle time from runner *registration* (cdkghr:started\n *      label), not step function start. So it fires at deadline + provisioning delay.\n *      It's a fallback if the keeper misses a message; it does not enforce the\n *      absolute deadline precisely.\n *    - **Step Function idle timeout**: Each runner is started with `maxIdleSeconds`\n *      matching the deadline. If both keeper and idle reaper miss it, the Step\n *      Function will self-terminate when its idle timeout fires.\n *\n * ## Config hash\n *\n * Each warm runner config (provider, count, labels, owner, repo, duration) is\n * hashed at CDK synth time. All current hashes are stored in the WARM_CONFIG_HASHES\n * environment variable. Fill payloads and keeper messages carry the hash. When the\n * keeper processes a message whose hash is not in the current set, it knows the\n * config was changed or removed and stops the runner immediately. This helps quickly\n * get rid of stale runners while keeping over-provisioning to a minimum.\n *\n * ## Gotchas\n *\n * - Keeper messages rely on SQS redelivery (batch item failure) for periodic\n *   checking. The visibility timeout (1 min) determines how often runners are\n *   polled. Failed messages are retried until they succeed or the runner\n *   self-terminates at its idle timeout.\n * - Each fill unconditionally starts `count` runners — it does not check how many\n *   are already running. On cron fire, this creates a brief overlap with the\n *   previous cycle's runners (which are near their deadline).\n * - Removing all warm runners configurations may result in warm runners staying\n *   around until they expire. To remove all warm runners quickly, set count to 0\n *   and deploy. Only once all the warm runners are stopped, you can remove all\n *   configurations and deploy again.\n * - **Gaps in coverage**: The Step Function that provisions each warm runner uses\n *   increasing timeouts between retries for provider failures (CodeBuild timeout,\n *   Lambda timeout, capacity errors, etc.). While a warm runner slot is retrying,\n *   that slot has no runner. This may create gaps in coverage. An idle warm runner\n *   that fails to provision (or whose replacement fails) will be unavailable until\n *   the retry succeeds. Current retry mechanism has built-in back-off rate and can\n *   be tweaked using `retryOptions`. This will be improved in the future.\n */\nimport * as crypto from 'crypto';\nimport {\n  DescribeExecutionCommand,\n  SFNClient,\n  StartExecutionCommand,\n  StopExecutionCommand,\n  ExecutionAlreadyExists,\n} from '@aws-sdk/client-sfn';\nimport { SQSClient, SendMessageCommand } from '@aws-sdk/client-sqs';\nimport type { Octokit } from '@octokit/rest';\nimport * as AWSLambda from 'aws-lambda';\nimport { deleteRunner, getAppOctokit, getOctokit, getRunner, GitHubSecrets } from './lambda-github';\nimport { customResourceRespond } from './lambda-helpers';\n\nconst sfn = new SFNClient();\nconst sqs = new SQSClient();\n\nconst SFN_EXECUTION_NAME_MAX_LENGTH = 80;\n\nexport interface WarmRunnerKeeperMessage {\n  readonly executionArn: string;\n  readonly runnerName: string;\n  readonly owner: string;\n  readonly repo: string;\n  readonly installationId?: number;\n  readonly providerPath: string;\n  readonly providerLabels: string[];\n  readonly absoluteDeadline: number; // Unix ms — inherited by replacements\n  readonly configHash: string;\n}\n\n/**\n * @internal\n */\nexport interface WarmRunnerFillPayload {\n  readonly action: 'fill';\n  readonly providerPath: string;\n  readonly providerLabels: string[];\n  readonly count: number;\n  readonly duration: number;\n  readonly owner: string;\n  readonly repo: string;\n  readonly configHash: string;\n}\n\nfunction isSqsEvent(event: unknown): event is AWSLambda.SQSEvent {\n  return Array.isArray((event as AWSLambda.SQSEvent).Records);\n}\n\nfunction isFillInput(event: unknown): event is WarmRunnerFillPayload {\n  return typeof event === 'object' && event !== null && (event as WarmRunnerFillPayload).action === 'fill';\n}\n\nfunction isCustomResourceEvent(event: unknown): event is AWSLambda.CloudFormationCustomResourceEvent {\n  const e = event as AWSLambda.CloudFormationCustomResourceEvent;\n  return typeof e?.RequestType === 'string' && typeof e?.ResponseURL === 'string';\n}\n\nfunction requireEnv(name: string) {\n  const value = process.env[name];\n  if (!value) {\n    throw new Error(`Missing environment variable ${name}`);\n  }\n  return value;\n}\n\ninterface StartWarmRunnerInput {\n  readonly providerPath: string;\n  readonly providerLabels: string[];\n  readonly owner: string;\n  readonly repo: string;\n  readonly installationId?: number;\n  readonly absoluteDeadline: number; // Unix ms\n  readonly configHash: string;\n  readonly executionName: string;\n  readonly slot?: number; // 0-based index when filling multiple slots; helps correlate logs\n}\n\n/**\n * Deterministic execution name for idempotent fills. Same seed → same name, so retries\n * (custom resource, SQS redelivery) don't create duplicate runners.\n */\nfunction deterministicExecutionName(providerPath: string, seed: string) {\n  const pathWithoutStack = providerPath.split('/').slice(1).join('/') || providerPath;\n  const sanitized = `warm-${pathWithoutStack.replace(/[^a-zA-Z0-9-]/g, '-')}`;\n  const hash = crypto.createHash('sha256').update(seed).digest('hex').slice(0, 16);\n  const maxPrefixLen = SFN_EXECUTION_NAME_MAX_LENGTH - hash.length - 1;\n  return `${sanitized.slice(0, maxPrefixLen)}-${hash}`;\n}\n\n/** Returns Unix ms of the next midnight UTC. Used for AlwaysOn deployment-fill deadline. */\nfunction getNextMidnightUtcMs(): number {\n  const now = new Date();\n  const nextMidnight = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate() + 1, 0, 0, 0, 0));\n  return nextMidnight.getTime();\n}\n\n/** Find installation id for our app. Normal code path gets this from the webhook payload, but we schedule these ourselves. */\nasync function resolveInstallationId(owner: string, repo: string) {\n  const appOctokit = await getAppOctokit();\n  if (!appOctokit) {\n    return undefined; // PAT authentication\n  }\n\n  if (repo) {\n    const { data } = await appOctokit.rest.apps.getRepoInstallation({ owner, repo });\n    return data.id;\n  } else {\n    const { data } = await appOctokit.rest.apps.getOrgInstallation({ org: owner });\n    return data.id;\n  }\n}\n\n/** Start a warm runner and enqueue a keeper message using SQS. */\nasync function startWarmRunnerAndEnqueueKeeper(input: StartWarmRunnerInput) {\n  const stepFunctionArn = requireEnv('STEP_FUNCTION_ARN');\n  const queueUrl = requireEnv('WARM_RUNNER_QUEUE_URL');\n\n  const remainingSeconds = Math.floor((input.absoluteDeadline - Date.now()) / 1000);\n  if (remainingSeconds <= 0) {\n    console.log({\n      notice: 'Absolute deadline already passed; not starting replacement',\n      configHash: input.configHash,\n      runnerName: input.executionName,\n      input,\n    });\n    return;\n  }\n\n  let executionArn: string;\n  try {\n    const result = await sfn.send(new StartExecutionCommand({\n      stateMachineArn: stepFunctionArn,\n      name: input.executionName,\n      input: JSON.stringify({\n        owner: input.owner,\n        repo: input.repo || '',\n        jobId: -1,\n        jobUrl: '',\n        installationId: input.installationId ?? -1,\n        jobLabels: input.providerLabels.join(','),\n        provider: input.providerPath,\n        labels: [...input.providerLabels, 'cdkghr:warm'].join(','),\n        maxIdleSeconds: remainingSeconds,\n      }),\n    }));\n    executionArn = result.executionArn!;\n  } catch (e) {\n    if (e instanceof ExecutionAlreadyExists) {\n      // Idempotent retry: execution was already started (e.g. Lambda timed out mid-fill).\n      // Don't enqueue. The first attempt already did, so duplicate keeper messages would cause duplicate replacements.\n      console.log({\n        notice: 'ExecutionAlreadyExists — idempotent retry, skipping enqueue',\n        configHash: input.configHash,\n        slot: input.slot,\n        runnerName: input.executionName,\n      });\n      return;\n    } else {\n      throw e;\n    }\n  }\n\n  const message: WarmRunnerKeeperMessage = {\n    executionArn,\n    runnerName: input.executionName,\n    owner: input.owner,\n    repo: input.repo,\n    installationId: input.installationId,\n    providerPath: input.providerPath,\n    providerLabels: input.providerLabels,\n    absoluteDeadline: input.absoluteDeadline,\n    configHash: input.configHash,\n  };\n\n  await sqs.send(new SendMessageCommand({\n    QueueUrl: queueUrl,\n    MessageBody: JSON.stringify(message),\n  }));\n\n  console.log({\n    notice: 'Started warm runner and enqueued keeper message',\n    configHash: input.configHash,\n    slot: input.slot,\n    runnerName: input.executionName,\n    executionArn,\n    remainingSeconds,\n  });\n}\n\n/**\n * Unconditionally starts `count` warm runners for the given config and enqueues keeper messages.\n *\n * @param input the fill payload\n * @param getNameForSlot a function to generate a unique and stable execution name for each slot\n * @param source the source of the fill for debugging purposes\n * @param absoluteDeadlineOverride if provided, use this instead of now + duration (for AlwaysOn deployment-fill)\n */\nasync function runFiller(input: WarmRunnerFillPayload, getNameForSlot: (slot: number) => string, source: string, absoluteDeadlineOverride?: number) {\n  const installationId = await resolveInstallationId(input.owner, input.repo);\n  const absoluteDeadline = absoluteDeadlineOverride ?? Date.now() + input.duration * 1000;\n\n  for (let i = 0; i < input.count; i++) {\n    await startWarmRunnerAndEnqueueKeeper({\n      providerPath: input.providerPath,\n      providerLabels: input.providerLabels,\n      owner: input.owner,\n      repo: input.repo,\n      installationId,\n      absoluteDeadline,\n      configHash: input.configHash,\n      executionName: getNameForSlot(i),\n      slot: i,\n    });\n  }\n\n  console.log({\n    notice: 'Fill complete - started warm runners',\n    source,\n    configHash: input.configHash,\n    providerPath: input.providerPath,\n    started: input.count,\n  });\n}\n\n/** Stop the step function and delete the runner from GitHub. */\nasync function stopAndDeleteRunner(input: WarmRunnerKeeperMessage, octokit: Octokit, secrets: GitHubSecrets, reason: string) {\n  try {\n    await sfn.send(new StopExecutionCommand({\n      executionArn: input.executionArn,\n      error: reason,\n      cause: 'Warm runner stopped by keeper',\n    }));\n  } catch (e) {\n    console.error({\n      notice: 'Failed to stop step function',\n      configHash: input.configHash,\n      runnerName: input.runnerName,\n      executionArn: input.executionArn,\n      error: e,\n      input,\n    });\n  }\n\n  const runner = await getRunner(octokit, secrets.runnerLevel, input.owner, input.repo, input.runnerName);\n  if (runner) {\n    try {\n      await deleteRunner(octokit, secrets.runnerLevel, input.owner, input.repo, runner.id);\n    } catch (e) {\n      console.error({\n        notice: 'Failed to delete runner',\n        configHash: input.configHash,\n        runnerName: input.runnerName,\n        runnerId: runner.id,\n        error: e,\n        input,\n      });\n    }\n  }\n}\n\n/**\n * Warm runner manager Lambda - handles three invocation modes:\n *\n * 1. CloudFormation Custom Resource - triggered on stack deploy (Create/Update) for AlwaysOnWarmRunner only. Runs\n *    runFiller with deadline = next midnight UTC so runners last until the next cron fill. Delete is a no-op.\n * 2. SQS messages - fill or keeper\n *    - Fill - from EventBridge cron. Uses messageId for deterministic execution names (idempotent on redelivery).\n *    - Keeper - tracks one runner. Uses SQS message cycling for periodic checks.\n *      Each message tracks one warm runner. The keeper checks:\n *      - Past deadline - stop the Step Function and delete the runner.\n *      - Config hash - if the message's `configHash` doesn't match the current `WARM_CONFIG_HASHES` env var, the runner is from a stale config - stop it and discard the message without replacement.\n *      - Busy/finished - if the Step Function ended or the GitHub runner is busy (took a job), start a replacement runner (inheriting the same deadline and config hash).\n *      - Not found yet (runner/infrastructure still starting) - retry later (message goes back to queue).\n *      - Still idle - retry later to check again.\n */\nexport async function handler(event: AWSLambda.SQSEvent | AWSLambda.CloudFormationCustomResourceEvent) {\n  if (isCustomResourceEvent(event)) {\n    const physicalId = ('PhysicalResourceId' in event ? event.PhysicalResourceId : undefined) ?? event.LogicalResourceId;\n    try {\n      const props = event.ResourceProperties as unknown as WarmRunnerFillPayload;\n      console.log({\n        notice: 'Custom resource fill',\n        requestType: event.RequestType,\n        logicalResourceId: event.LogicalResourceId,\n        configHash: props.configHash,\n        providerPath: props.providerPath,\n        count: props.count,\n      });\n      if (event.RequestType === 'Create' || event.RequestType === 'Update') {\n        const getNameForSlot = (slot: number) =>\n          deterministicExecutionName(props.providerPath, `${event.LogicalResourceId}:${event.RequestType}:${props.configHash}:${slot}`);\n        const deadline = getNextMidnightUtcMs();\n        await runFiller(props, getNameForSlot, 'customResource', deadline);\n      }\n      await customResourceRespond(event, 'SUCCESS', 'OK', physicalId, {});\n    } catch (e) {\n      console.error({ notice: 'Custom resource handler failed', error: e });\n      await customResourceRespond(event, 'FAILED', (e as Error).message || 'Internal Error', physicalId, {});\n    }\n    return;\n  }\n\n  if (!isSqsEvent(event)) {\n    console.error({ notice: 'Unknown event type; ignoring', event });\n    return;\n  }\n\n  const validHashes = new Set((process.env.WARM_CONFIG_HASHES ?? '').split(',').filter(Boolean));\n  const result: AWSLambda.SQSBatchResponse = { batchItemFailures: [] };\n  const octokitCache = new Map<number | undefined, { octokit: Octokit; secrets: GitHubSecrets }>();\n\n  for (const record of event.Records) {\n    let body: WarmRunnerKeeperMessage | WarmRunnerFillPayload;\n    try {\n      body = JSON.parse(record.body) as WarmRunnerKeeperMessage | WarmRunnerFillPayload;\n    } catch (e) {\n      console.error({\n        notice: 'Failed to parse message body',\n        requestId: record.messageId,\n        error: e,\n      });\n      continue;\n    }\n\n    const retryLater = () => result.batchItemFailures.push({ itemIdentifier: record.messageId });\n\n    const isFill = isFillInput(body);\n    const configHash = (body as WarmRunnerFillPayload & WarmRunnerKeeperMessage).configHash;\n    const runnerName = isFill ? undefined : (body as WarmRunnerKeeperMessage).runnerName;\n    console.log({\n      notice: 'Processing SQS message',\n      messageId: record.messageId,\n      configHash,\n      runnerName,\n    });\n\n    // scheduled fill from EventBridge via SQS\n    if (isFill) {\n      const fillPayload = body as WarmRunnerFillPayload;\n      try {\n        console.log({\n          notice: 'Scheduled fill',\n          configHash,\n          providerPath: fillPayload.providerPath,\n          count: fillPayload.count,\n        });\n        const getNameForSlot = (slot: number) =>\n          deterministicExecutionName(fillPayload.providerPath, `${record.messageId}:${slot}`);\n        await runFiller(fillPayload, getNameForSlot, 'scheduled', undefined);\n      } catch (e) {\n        console.error({\n          notice: 'Fill failed',\n          messageId: record.messageId,\n          configHash: fillPayload.configHash,\n          error: e,\n        });\n        retryLater();\n      }\n      continue;\n    }\n\n    // keeper message\n    const input = body as WarmRunnerKeeperMessage;\n    console.log({\n      notice: 'Checking warm runner',\n      configHash: input.configHash,\n      runnerName: input.runnerName,\n    });\n\n    // get github access (cached per installationId to avoid re-reading the secrets manager and Github API every time)\n    let octokit: Octokit;\n    let secrets: GitHubSecrets;\n    const cached = octokitCache.get(input.installationId);\n    if (cached) {\n      octokit = cached.octokit;\n      secrets = cached.secrets;\n    } else {\n      const got = await getOctokit(input.installationId);\n      octokit = got.octokit;\n      secrets = got.githubSecrets;\n      octokitCache.set(input.installationId, { octokit, secrets });\n    }\n\n    // stale config - best-effort stop, then discard message (runner will self-terminate at its idle timeout)\n    if (!validHashes.has(input.configHash)) {\n      console.log({\n        notice: 'Config hash mismatch (new CDK deployment, old runner) - stopping stale warm runner',\n        configHash: input.configHash,\n        runnerName: input.runnerName,\n        validHashes: validHashes,\n      });\n\n      try {\n        await stopAndDeleteRunner(input, octokit, secrets, 'StaleWarmRunner');\n      } catch (e) {\n        console.error({\n          notice: 'Best-effort cleanup of stale warm runner failed; it will self-terminate at idle timeout',\n          configHash: input.configHash,\n          runnerName: input.runnerName,\n          error: e,\n        });\n      }\n      continue;\n    }\n\n    // past deadline - keeper must stop and delete the runner\n    if (Date.now() >= input.absoluteDeadline) {\n      console.log({\n        notice: 'Warm runner past deadline, stopping and deleting',\n        configHash: input.configHash,\n        runnerName: input.runnerName,\n      });\n      try {\n        await stopAndDeleteRunner(input, octokit, secrets, 'WarmRunnerExpired');\n      } catch (e) {\n        console.error({\n          notice: 'Failed to stop expired warm runner',\n          configHash: input.configHash,\n          runnerName: input.runnerName,\n          error: e,\n        });\n        // don't retry to not accidentally create a new runner\n        // idle reaper will take care of it soon enough\n      }\n      continue;\n    }\n\n    // check if step function is still running\n    const execution = await sfn.send(new DescribeExecutionCommand({ executionArn: input.executionArn }));\n    const stillRunning = execution.status === 'RUNNING';\n\n    // find runner\n    const runner = await getRunner(octokit, secrets.runnerLevel, input.owner, input.repo, input.runnerName);\n\n    // need replacement: step function finished (not running) or runner took a job (busy)\n    if (!stillRunning || runner?.busy) {\n      console.log({\n        notice: 'Warm runner finished or busy; starting replacement',\n        configHash: input.configHash,\n        runnerName: input.runnerName,\n        stillRunning,\n        runnerBusy: runner?.busy ?? false,\n      });\n      try {\n        await startWarmRunnerAndEnqueueKeeper({\n          providerPath: input.providerPath,\n          providerLabels: input.providerLabels,\n          owner: input.owner,\n          repo: input.repo,\n          installationId: input.installationId,\n          absoluteDeadline: input.absoluteDeadline,\n          configHash: input.configHash,\n          executionName: deterministicExecutionName(input.providerPath, record.messageId),\n        });\n      } catch (e) {\n        console.error({\n          notice: 'Failed to start replacement warm runner',\n          configHash: input.configHash,\n          runnerName: input.runnerName,\n          error: e,\n        });\n        retryLater();\n      }\n      continue;\n    }\n\n    // step function still running but runner not found yet\n    if (!runner) {\n      console.log({\n        notice: 'Runner not running yet',\n        configHash: input.configHash,\n        runnerName: input.runnerName,\n      });\n      retryLater();\n      continue;\n    }\n\n    // still idle - check again later\n    console.log({\n      notice: 'Runner still idle - will check again later',\n      configHash: input.configHash,\n      runnerName: input.runnerName,\n    });\n    retryLater();\n  }\n\n  return result;\n}\n"]}