@redwoodjs/agent-ci 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/LICENSE +110 -0
  2. package/README.md +79 -0
  3. package/dist/cli.js +628 -0
  4. package/dist/config.js +63 -0
  5. package/dist/docker/container-config.js +178 -0
  6. package/dist/docker/container-config.test.js +156 -0
  7. package/dist/docker/service-containers.js +205 -0
  8. package/dist/docker/service-containers.test.js +236 -0
  9. package/dist/docker/shutdown.js +120 -0
  10. package/dist/docker/shutdown.test.js +148 -0
  11. package/dist/output/agent-mode.js +7 -0
  12. package/dist/output/agent-mode.test.js +36 -0
  13. package/dist/output/cleanup.js +218 -0
  14. package/dist/output/cleanup.test.js +241 -0
  15. package/dist/output/concurrency.js +57 -0
  16. package/dist/output/concurrency.test.js +88 -0
  17. package/dist/output/debug.js +36 -0
  18. package/dist/output/logger.js +57 -0
  19. package/dist/output/logger.test.js +82 -0
  20. package/dist/output/reporter.js +67 -0
  21. package/dist/output/run-state.js +126 -0
  22. package/dist/output/run-state.test.js +169 -0
  23. package/dist/output/state-renderer.js +149 -0
  24. package/dist/output/state-renderer.test.js +488 -0
  25. package/dist/output/tree-renderer.js +52 -0
  26. package/dist/output/tree-renderer.test.js +105 -0
  27. package/dist/output/working-directory.js +20 -0
  28. package/dist/runner/directory-setup.js +98 -0
  29. package/dist/runner/directory-setup.test.js +31 -0
  30. package/dist/runner/git-shim.js +92 -0
  31. package/dist/runner/git-shim.test.js +57 -0
  32. package/dist/runner/local-job.js +691 -0
  33. package/dist/runner/metadata.js +90 -0
  34. package/dist/runner/metadata.test.js +127 -0
  35. package/dist/runner/result-builder.js +119 -0
  36. package/dist/runner/result-builder.test.js +177 -0
  37. package/dist/runner/step-wrapper.js +82 -0
  38. package/dist/runner/step-wrapper.test.js +77 -0
  39. package/dist/runner/sync.js +80 -0
  40. package/dist/runner/workspace.js +66 -0
  41. package/dist/types.js +1 -0
  42. package/dist/workflow/job-scheduler.js +62 -0
  43. package/dist/workflow/job-scheduler.test.js +130 -0
  44. package/dist/workflow/workflow-parser.js +556 -0
  45. package/dist/workflow/workflow-parser.test.js +642 -0
  46. package/package.json +39 -0
  47. package/shim.sh +11 -0
@@ -0,0 +1,691 @@
1
+ import Docker from "dockerode";
2
+ import path from "path";
3
+ import fs from "fs";
4
+ import { execSync } from "child_process";
5
+ import { createInterface } from "readline";
6
+ import { config } from "../config.js";
7
+ import { createLogContext } from "../output/logger.js";
8
+ import { getWorkingDirectory } from "../output/working-directory.js";
9
+ import { debugRunner, debugBoot } from "../output/debug.js";
10
+ import { startServiceContainers, cleanupServiceContainers, } from "../docker/service-containers.js";
11
+ import { killRunnerContainers } from "../docker/shutdown.js";
12
+ import { startEphemeralDtu } from "dtu-github-actions/ephemeral";
13
+ import { tailLogFile } from "../output/reporter.js";
14
+ import { writeJobMetadata } from "./metadata.js";
15
+ import { computeFakeSha, writeGitShim } from "./git-shim.js";
16
+ import { prepareWorkspace } from "./workspace.js";
17
+ import { createRunDirectories } from "./directory-setup.js";
18
+ import { buildContainerEnv, buildContainerBinds, buildContainerCmd, resolveDtuHost, resolveDockerApiUrl, } from "../docker/container-config.js";
19
+ import { buildJobResult, sanitizeStepName } from "./result-builder.js";
20
+ import { wrapJobSteps } from "./step-wrapper.js";
21
+ import { syncWorkspaceForRetry } from "./sync.js";
22
+ // ─── Docker setup ─────────────────────────────────────────────────────────────
23
+ const dockerHost = process.env.DOCKER_HOST || "unix:///var/run/docker.sock";
24
+ const dockerConfig = dockerHost.startsWith("unix://")
25
+ ? { socketPath: dockerHost.replace("unix://", "") }
26
+ : { host: dockerHost, protocol: "ssh" };
27
+ const docker = new Docker(dockerConfig);
28
+ const IMAGE = "ghcr.io/actions/actions-runner:latest";
29
+ // ─── Pre-baked runner credentials ─────────────────────────────────────────────
30
+ // The GitHub Actions runner normally requires `config.sh` (a .NET binary) to
31
+ // generate .runner, .credentials and .credentials_rsaparams before run.sh can
32
+ // start. Each invocation cold-starts .NET 6, costing ~3-5s — and we were
33
+ // running it twice (remove + register).
34
+ //
35
+ // Since the DTU mock accepts any credential values, we write these files
36
+ // directly with deterministic content, saving ~5-10s per container start.
37
+ function writeRunnerCredentials(runnerDir, runnerName, serverUrl) {
38
+ // .runner — tells run.sh who it is and where to connect
39
+ const dotRunner = {
40
+ agentId: 1,
41
+ agentName: runnerName,
42
+ poolId: 1,
43
+ poolName: "Default",
44
+ serverUrl: new URL(serverUrl).origin,
45
+ gitHubUrl: serverUrl,
46
+ workFolder: "_work",
47
+ ephemeral: true,
48
+ };
49
+ fs.writeFileSync(path.join(runnerDir, ".runner"), JSON.stringify(dotRunner, null, 2));
50
+ // .credentials — OAuth scheme that run.sh reads to authenticate with the DTU
51
+ const dotCredentials = {
52
+ scheme: "OAuth",
53
+ data: {
54
+ clientId: "00000000-0000-0000-0000-000000000000",
55
+ authorizationUrl: `${serverUrl}/_apis/oauth2/token`,
56
+ oAuthEndpointUrl: `${serverUrl}/_apis/oauth2/token`,
57
+ requireFipsCryptography: "False",
58
+ },
59
+ };
60
+ fs.writeFileSync(path.join(runnerDir, ".credentials"), JSON.stringify(dotCredentials, null, 2));
61
+ // .credentials_rsaparams — RSA key the runner uses for token signing.
62
+ // Format: RSAParametersSerializable JSON (ISerializable with lowercase keys
63
+ // matching the RSAParametersSerializable constructor). The DTU mock never
64
+ // validates signatures, so we use a static pre-generated RSA 2048-bit key.
65
+ const dotRsaParams = {
66
+ d: "CQpCI+sO2GD1N/JsHHI9zEhMlu5Fcc8mU4O2bO6iscOsagFjvEnTesJgydC/Go1HuOBlx+GT9EG2h7+juS0z2o5n8Mvt5BBxlK+tqoDOs8VfQ9CSUl3hqYRPeNdBfnA1w8ovLW0wqfPO08FWTLI0urYsnwjZ5BQrBM+D7zYeA0aCsKdo75bKmaEKnmqrtIEhb7hE45XQa32Yt0RPCPi8QcQAY2HLHbdWdZYDj6k/UuDvz9H/xlDzwYq6Yikk2RSMArFzaufxCGS9tBZNEACDPYgnZnEMXRcvsnZ9FYbq81KOSifCmq7Yocq+j3rY5zJCD+PIDY9QJwPxB4PGasRKAQ==",
67
+ dp: "A0sY1oOz1+3uUMiy+I5xGuHGHOrEQPYspd1xGClBYYsa/Za0UDWS7V0Tn1cbRWfWtNe5vTpxcvwQd6UZBwrtHF6R2zyXFhE++PLPhCe0tH4C5FY9i9jUw9Vo8t44i/s5JUHU2B1mEptXFUA0GcVrLKS8toZSgqELSS2Q/YLRxoE=",
68
+ dq: "GrLC9dPJ5n3VYw51ghCH7tybUN9/Oe4T8d9v4dLQ34RQEWHwRd4g3U3zkvuhpXFPloUTMmkxS7MF5pS1evrtzkay4QUTDv+28s0xRuAsw5qNTzuFygg8t93MvpvTVZ2TNApW6C7NFvkL9NbxAnU8+I61/3ow7i6a7oYJJ0hWAxE=",
69
+ exponent: "AQAB",
70
+ inverseQ: "8DVz9FSvEdt5W4B9OjgakZHwGfnhn2VLDUxrsR5ilC5tPC/IgA8C2xEfKQM1t+K/N3pAYHBYQ6EPgtW4kquBS/Sy102xbRI7GSCnUbRtTpWYPOaCn6EaxBNzwWzbp5vCbCGvFqlSu4+OBYRVe+iCj+gAnkmT/TKPhHHbTjJHvw==",
71
+ modulus: "x0eoW2DD7xsW5YiorMN8pNHVvZk4ED1SHlA/bmVnRz5FjEDnQloMn0nBgIUHxoNArksknrp/FOVJv5sJHJTiRZkOp+ZmH7d3W3gmw63IxK2C5pV+6xfav9jR2+Wt/6FMYMgG2utBdF95oif1f2XREFovHoXkWms2l0CPLLHVPO44Hh9EEmBmjOeMJEZkulHJ44z9y8e+GZ2nYqO0ZiRWQcRObZ0vlRaGg6PPOl4ltay0BfNksMB3NDtlhkdVkAEFQxEaZZDK9NtkvNljXCioP3TyTAbqNUGsYCA5D+IHGZT9An99J9vUqTFP6TKjqUvy9WNiIzaUksCySA0a4SVBkQ==",
72
+ p: "8fgAdmWy+sTzAN19fYkWMQqeC7t1BCQMo5z5knfVLg8TtwP9ZGqDtoe+r0bGv3UgVsvvDdP/QwRvRVP+5G9l999Y6b4VbSdUbrfPfOgjpPDmRTQzHDve5jh5xBENQoRXYm7PMgHGmjwuFsE/tKtSGTrvt2Z3qcYAo0IOqLLhYmE=",
73
+ q: "0tXx4+P7gUWePf92UJLkzhNBClvdnmDbIt52Lui7YCARczbN/asCDJxcMy6Bh3qmIx/bNuOUrfzHkYZHfnRw8AGEK80qmiLLPI6jrUBOGRajmzemGQx0W8FWalEQfGdNIv9R2nsegDRoMq255Zo/qX60xQ6abpp0c6UNhVYSjTE=",
74
+ };
75
+ fs.writeFileSync(path.join(runnerDir, ".credentials_rsaparams"), JSON.stringify(dotRsaParams));
76
+ }
77
+ // ─── Main ─────────────────────────────────────────────────────────────────────
78
+ export async function executeLocalJob(job, options) {
79
+ const pauseOnFailure = options?.pauseOnFailure ?? false;
80
+ const startTime = Date.now();
81
+ const store = options?.store;
82
+ // ── Pre-flight: verify Docker is reachable ────────────────────────────────
83
+ try {
84
+ await docker.ping();
85
+ }
86
+ catch (err) {
87
+ const isSocket = err?.code === "ECONNREFUSED" || err?.code === "ENOENT";
88
+ const hint = isSocket
89
+ ? "Docker does not appear to be running."
90
+ : `Docker is not reachable: ${err?.message || err}`;
91
+ throw new Error(`${hint}\n` +
92
+ "\n" +
93
+ " To fix this:\n" +
94
+ " 1. Start your Docker runtime (OrbStack, Docker Desktop, etc.)\n" +
95
+ " 2. Wait for the engine to be ready\n" +
96
+ " 3. Re-run the workflow\n");
97
+ }
98
+ // ── Prepare directories ───────────────────────────────────────────────────
99
+ const { name: containerName, runDir, logDir, debugLogPath, } = createLogContext("agent-ci", job.runnerName);
100
+ // Register the job in the store so the render loop can show the boot spinner
101
+ store?.addJob(job.workflowPath ?? "", job.taskId ?? "job", containerName, {
102
+ logDir,
103
+ debugLogPath,
104
+ });
105
+ store?.updateJob(containerName, {
106
+ status: "booting",
107
+ startedAt: new Date().toISOString(),
108
+ });
109
+ const bootStart = Date.now();
110
+ const bt = (label, since) => {
111
+ debugBoot(`${containerName} ${label}: ${Date.now() - since}ms`);
112
+ return Date.now();
113
+ };
114
+ // Start an ephemeral in-process DTU for this job run so each job gets its
115
+ // own isolated DTU instance on a random port — eliminating port conflicts.
116
+ let t0 = Date.now();
117
+ const dtuCacheDir = path.resolve(getWorkingDirectory(), "cache", "dtu");
118
+ const ephemeralDtu = await startEphemeralDtu(dtuCacheDir).catch(() => null);
119
+ const dtuUrl = ephemeralDtu?.url ?? config.GITHUB_API_URL;
120
+ t0 = bt("dtu-start", t0);
121
+ await fetch(`${dtuUrl}/_dtu/start-runner`, {
122
+ method: "POST",
123
+ headers: { "Content-Type": "application/json" },
124
+ body: JSON.stringify({
125
+ runnerName: containerName,
126
+ logDir,
127
+ timelineDir: logDir,
128
+ // Package manager stores are bind-mounted into the container, so there's
129
+ // no need for the runner to tar/gzip them. Tell the DTU to return a
130
+ // synthetic hit for any cache key matching these patterns — skipping the
131
+ // 60s+ tar entirely.
132
+ virtualCachePatterns: ["pnpm", "npm", "yarn", "bun"],
133
+ }),
134
+ }).catch(() => {
135
+ /* non-fatal */
136
+ });
137
+ t0 = bt("dtu-register", t0);
138
+ // Write metadata if available (to help the UI map logs to workflows)
139
+ writeJobMetadata({ logDir, containerName, job });
140
+ // Open debug stream to capture raw container output
141
+ const debugStream = fs.createWriteStream(debugLogPath);
142
+ // ── Create run directories ────────────────────────────────────────────────
143
+ const dirs = createRunDirectories({
144
+ runDir,
145
+ githubRepo: job.githubRepo,
146
+ workflowPath: job.workflowPath,
147
+ });
148
+ // Signal handler: ensure cleanup runs even when killed.
149
+ const signalCleanup = () => {
150
+ killRunnerContainers(containerName);
151
+ for (const d of [dirs.containerWorkDir, dirs.shimsDir, dirs.signalsDir, dirs.diagDir]) {
152
+ try {
153
+ fs.rmSync(d, { recursive: true, force: true });
154
+ }
155
+ catch { }
156
+ }
157
+ process.exit(1);
158
+ };
159
+ process.once("SIGINT", signalCleanup);
160
+ process.once("SIGTERM", signalCleanup);
161
+ try {
162
+ // 1. Seed the job to Local DTU
163
+ const [githubOwner, githubRepoName] = (job.githubRepo || "").split("/");
164
+ const overriddenRepository = job.githubRepo
165
+ ? {
166
+ full_name: job.githubRepo,
167
+ name: githubRepoName,
168
+ owner: { login: githubOwner },
169
+ default_branch: job.repository?.default_branch || "main",
170
+ }
171
+ : job.repository;
172
+ const seededSteps = pauseOnFailure ? wrapJobSteps(job.steps ?? [], true) : job.steps;
173
+ t0 = Date.now();
174
+ const seedResponse = await fetch(`${dtuUrl}/_dtu/seed`, {
175
+ method: "POST",
176
+ headers: { "Content-Type": "application/json" },
177
+ body: JSON.stringify({
178
+ id: job.githubJobId || "1",
179
+ name: "job",
180
+ status: "queued",
181
+ localPath: dirs.workspaceDir,
182
+ ...job,
183
+ steps: seededSteps,
184
+ repository: overriddenRepository,
185
+ }),
186
+ });
187
+ if (!seedResponse.ok) {
188
+ throw new Error(`Failed to seed DTU: ${seedResponse.status} ${seedResponse.statusText}`);
189
+ }
190
+ t0 = bt("dtu-seed", t0);
191
+ // 2. Registration token (mock for local)
192
+ const registrationToken = "mock_local_token";
193
+ // 4. Write git shim BEFORE container start so the entrypoint can install it
194
+ // immediately. On Linux, prepareWorkspace (rsync) is slow enough that the
195
+ // container entrypoint would race ahead and find an empty shims dir.
196
+ const fakeSha = computeFakeSha(job.headSha);
197
+ writeGitShim(dirs.shimsDir, fakeSha);
198
+ // Prepare workspace files in parallel with container setup
199
+ const workspacePrepStart = Date.now();
200
+ const workspacePrepPromise = (async () => {
201
+ try {
202
+ prepareWorkspace({
203
+ workflowPath: job.workflowPath,
204
+ headSha: job.headSha,
205
+ githubRepo: job.githubRepo,
206
+ workspaceDir: dirs.workspaceDir,
207
+ });
208
+ }
209
+ catch (err) {
210
+ debugRunner(`Failed to prepare workspace: ${err}. Using host fallback.`);
211
+ }
212
+ try {
213
+ execSync(`chmod -R 777 "${dirs.containerWorkDir}" "${dirs.diagDir}"`, { stdio: "pipe" });
214
+ }
215
+ catch {
216
+ // Non-fatal: entrypoint has a fallback
217
+ }
218
+ bt("workspace-prep", workspacePrepStart);
219
+ })();
220
+ // 6. Spawn container
221
+ const dtuPort = new URL(dtuUrl).port || "80";
222
+ const dtuHost = resolveDtuHost();
223
+ const dockerApiUrl = resolveDockerApiUrl(dtuUrl, dtuHost);
224
+ const githubRepo = job.githubRepo || config.GITHUB_REPO;
225
+ const repoUrl = `${dockerApiUrl}/${githubRepo}`;
226
+ debugRunner(`Spawning container ${containerName}...`);
227
+ // Pre-cleanup: remove any stale container with the same name
228
+ try {
229
+ const stale = docker.getContainer(containerName);
230
+ await stale.remove({ force: true });
231
+ }
232
+ catch {
233
+ // Ignore - container doesn't exist
234
+ }
235
+ // ── Service containers ────────────────────────────────────────────────────
236
+ let serviceCtx;
237
+ if (job.services && job.services.length > 0) {
238
+ const svcStart = Date.now();
239
+ debugRunner(`Starting ${job.services.length} service container(s)...`);
240
+ serviceCtx = await startServiceContainers(docker, job.services, containerName, (line) => debugRunner(line));
241
+ bt("service-containers", svcStart);
242
+ }
243
+ const svcPortForwardSnippet = serviceCtx?.portForwards.length
244
+ ? serviceCtx.portForwards.join(" \n") + " \nsleep 0.3 && "
245
+ : "";
246
+ // ── Direct container injection ─────────────────────────────────────────────
247
+ const hostWorkDir = dirs.containerWorkDir;
248
+ const hostRunnerSeedDir = path.resolve(getWorkingDirectory(), "runner");
249
+ const hostRunnerDir = path.resolve(runDir, "runner");
250
+ const useDirectContainer = !!job.container;
251
+ const containerImage = useDirectContainer ? job.container.image : IMAGE;
252
+ if (useDirectContainer) {
253
+ await fs.promises.mkdir(hostRunnerSeedDir, { recursive: true });
254
+ const markerFile = path.join(hostRunnerSeedDir, ".seeded");
255
+ try {
256
+ await fs.promises.access(markerFile);
257
+ }
258
+ catch {
259
+ debugRunner(`Extracting runner binary to host (one-time)...`);
260
+ const tmpName = `agent-ci-seed-runner-${Date.now()}`;
261
+ const seedContainer = await docker.createContainer({
262
+ Image: IMAGE,
263
+ name: tmpName,
264
+ Cmd: ["true"],
265
+ });
266
+ const { execSync } = await import("node:child_process");
267
+ execSync(`docker cp ${tmpName}:/home/runner/. "${hostRunnerSeedDir}/"`, { stdio: "pipe" });
268
+ await seedContainer.remove();
269
+ const configShPath = path.join(hostRunnerSeedDir, "config.sh");
270
+ let configSh = await fs.promises.readFile(configShPath, "utf8");
271
+ configSh = configSh.replace(/# Check dotnet Core.*?^fi$/ms, "# Dependency checks removed for container injection");
272
+ await fs.promises.writeFile(configShPath, configSh);
273
+ await fs.promises.writeFile(markerFile, new Date().toISOString());
274
+ debugRunner(`Runner extracted.`);
275
+ }
276
+ for (const staleFile of [".runner", ".credentials", ".credentials_rsaparams"]) {
277
+ try {
278
+ fs.rmSync(path.join(hostRunnerSeedDir, staleFile));
279
+ }
280
+ catch {
281
+ /* not present */
282
+ }
283
+ }
284
+ execSync(`cp -a "${hostRunnerSeedDir}" "${hostRunnerDir}"`, { stdio: "pipe" });
285
+ const resolvedUrl = `${dockerApiUrl}/${githubRepo}`;
286
+ writeRunnerCredentials(hostRunnerDir, containerName, resolvedUrl);
287
+ }
288
+ if (useDirectContainer) {
289
+ debugRunner(`Pulling ${containerImage}...`);
290
+ await new Promise((resolve, reject) => {
291
+ docker.pull(containerImage, (err, stream) => {
292
+ if (err) {
293
+ return reject(err);
294
+ }
295
+ docker.modem.followProgress(stream, (err) => {
296
+ if (err) {
297
+ return reject(err);
298
+ }
299
+ resolve();
300
+ });
301
+ });
302
+ });
303
+ }
304
+ const containerEnv = buildContainerEnv({
305
+ containerName,
306
+ registrationToken,
307
+ repoUrl,
308
+ dockerApiUrl,
309
+ githubRepo,
310
+ headSha: job.headSha,
311
+ dtuHost,
312
+ useDirectContainer,
313
+ });
314
+ const containerBinds = buildContainerBinds({
315
+ hostWorkDir,
316
+ shimsDir: dirs.shimsDir,
317
+ signalsDir: pauseOnFailure ? dirs.signalsDir : undefined,
318
+ diagDir: dirs.diagDir,
319
+ toolCacheDir: dirs.toolCacheDir,
320
+ pnpmStoreDir: dirs.pnpmStoreDir,
321
+ npmCacheDir: dirs.npmCacheDir,
322
+ bunCacheDir: dirs.bunCacheDir,
323
+ playwrightCacheDir: dirs.playwrightCacheDir,
324
+ warmModulesDir: dirs.warmModulesDir,
325
+ hostRunnerDir,
326
+ useDirectContainer,
327
+ });
328
+ const containerCmd = buildContainerCmd({
329
+ svcPortForwardSnippet,
330
+ dtuPort,
331
+ dtuHost,
332
+ useDirectContainer,
333
+ containerName,
334
+ });
335
+ t0 = Date.now();
336
+ const container = await docker.createContainer({
337
+ Image: containerImage,
338
+ name: containerName,
339
+ Env: containerEnv,
340
+ ...(useDirectContainer ? { Entrypoint: ["bash"] } : {}),
341
+ Cmd: containerCmd,
342
+ HostConfig: {
343
+ Binds: containerBinds,
344
+ AutoRemove: false,
345
+ Ulimits: [{ Name: "nofile", Soft: 65536, Hard: 65536 }],
346
+ ...(serviceCtx ? { NetworkMode: serviceCtx.networkName } : {}),
347
+ },
348
+ Tty: true,
349
+ });
350
+ t0 = bt("container-create", t0);
351
+ await workspacePrepPromise;
352
+ t0 = Date.now();
353
+ await container.start();
354
+ bt("container-start", t0);
355
+ // 7. Stream logs ───────────────────────────────────────────────────────────
356
+ const rawStream = (await container.logs({
357
+ follow: true,
358
+ stdout: true,
359
+ stderr: true,
360
+ }));
361
+ let tailDone = false;
362
+ let lastFailedStep = null;
363
+ let isPaused = false;
364
+ let pausedStepName = null;
365
+ let pausedAtMs = null;
366
+ let lastSeenAttempt = 0;
367
+ let isBooting = true;
368
+ let stdinListening = false;
369
+ const timelinePath = path.join(logDir, "timeline.json");
370
+ const pausedSignalPath = path.join(dirs.signalsDir, "paused");
371
+ const signalsRunDir = path.dirname(dirs.signalsDir);
372
+ // Listen for Enter key to trigger retry when paused
373
+ const setupStdinRetry = () => {
374
+ if (stdinListening || !process.stdin.isTTY) {
375
+ return;
376
+ }
377
+ stdinListening = true;
378
+ process.stdin.setRawMode(true);
379
+ process.stdin.resume();
380
+ process.stdin.on("data", (key) => {
381
+ if (key[0] === 3) {
382
+ process.stdin.setRawMode(false);
383
+ process.exit(130);
384
+ }
385
+ if (key[0] === 13 && isPaused) {
386
+ syncWorkspaceForRetry(signalsRunDir);
387
+ fs.writeFileSync(path.join(dirs.signalsDir, "retry"), "");
388
+ }
389
+ });
390
+ };
391
+ const cleanupStdin = () => {
392
+ if (stdinListening && process.stdin.isTTY) {
393
+ process.stdin.setRawMode(false);
394
+ process.stdin.pause();
395
+ process.stdin.removeAllListeners("data");
396
+ stdinListening = false;
397
+ }
398
+ };
399
+ // ── Timeline → store updater ──────────────────────────────────────────────
400
+ // Reads timeline.json and the paused signal, then updates the RunStateStore.
401
+ // The render loop in cli.ts reads the store and calls renderRunState().
402
+ const updateStoreFromTimeline = () => {
403
+ try {
404
+ // ── Pause-on-failure: check for paused signal ───────────────────────
405
+ if (pauseOnFailure && fs.existsSync(pausedSignalPath)) {
406
+ const content = fs.readFileSync(pausedSignalPath, "utf-8").trim();
407
+ const lines = content.split("\n");
408
+ pausedStepName = lines[0] || null;
409
+ const attempt = parseInt(lines[1] || "1", 10);
410
+ if (attempt !== lastSeenAttempt) {
411
+ lastSeenAttempt = attempt;
412
+ isPaused = true;
413
+ pausedAtMs = Date.now();
414
+ setupStdinRetry();
415
+ // Read last output lines from the failed step's log
416
+ let tailLines = [];
417
+ if (pausedStepName) {
418
+ const stepsDir = path.join(logDir, "steps");
419
+ const sanitized = sanitizeStepName(pausedStepName);
420
+ const byName = path.join(stepsDir, `${sanitized}.log`);
421
+ tailLines = tailLogFile(byName, 20);
422
+ if (tailLines.length === 0 && fs.existsSync(stepsDir)) {
423
+ let newest = "";
424
+ let newestMtime = 0;
425
+ for (const f of fs.readdirSync(stepsDir)) {
426
+ if (!f.endsWith(".log")) {
427
+ continue;
428
+ }
429
+ const mt = fs.statSync(path.join(stepsDir, f)).mtimeMs;
430
+ if (mt > newestMtime) {
431
+ newestMtime = mt;
432
+ newest = f;
433
+ }
434
+ }
435
+ if (newest) {
436
+ tailLines = tailLogFile(path.join(stepsDir, newest), 20);
437
+ }
438
+ }
439
+ }
440
+ store?.updateJob(containerName, {
441
+ status: "paused",
442
+ pausedAtStep: pausedStepName || undefined,
443
+ pausedAtMs: new Date(pausedAtMs).toISOString(),
444
+ attempt: lastSeenAttempt,
445
+ lastOutputLines: tailLines,
446
+ });
447
+ }
448
+ }
449
+ else if (isPaused && !fs.existsSync(pausedSignalPath)) {
450
+ // Pause signal removed — job is retrying
451
+ isPaused = false;
452
+ pausedAtMs = null;
453
+ store?.updateJob(containerName, { status: "running", pausedAtMs: undefined });
454
+ }
455
+ if (!fs.existsSync(timelinePath)) {
456
+ return;
457
+ }
458
+ const records = JSON.parse(fs.readFileSync(timelinePath, "utf-8"));
459
+ const steps = records
460
+ .filter((r) => r.type === "Task" && r.name)
461
+ .sort((a, b) => (a.order ?? 0) - (b.order ?? 0));
462
+ if (steps.length === 0) {
463
+ return;
464
+ }
465
+ // ── Transition from booting to running on first timeline entry ────────
466
+ if (isBooting) {
467
+ isBooting = false;
468
+ bt("total", bootStart);
469
+ store?.updateJob(containerName, {
470
+ status: isPaused ? "paused" : "running",
471
+ bootDurationMs: Date.now() - bootStart,
472
+ });
473
+ }
474
+ // ── Build StepState[] from timeline records ───────────────────────────
475
+ const seenNames = new Set();
476
+ let hasPostSteps = false;
477
+ let completeJobRecord = null;
478
+ const preCountNames = new Set();
479
+ for (const r of steps) {
480
+ if (!preCountNames.has(r.name)) {
481
+ preCountNames.add(r.name);
482
+ }
483
+ else {
484
+ hasPostSteps = true;
485
+ }
486
+ }
487
+ const hasCompleteJob = preCountNames.has("Complete job");
488
+ // Total = unique names (minus "Complete job") + "Post Setup" (if any) + "Complete job"
489
+ const totalSteps = preCountNames.size -
490
+ (hasCompleteJob ? 1 : 0) +
491
+ (hasPostSteps ? 1 : 0) +
492
+ (hasCompleteJob ? 1 : 0);
493
+ const padW = String(totalSteps).length;
494
+ let stepIdx = 0;
495
+ const newSteps = [];
496
+ for (const r of steps) {
497
+ if (seenNames.has(r.name)) {
498
+ continue;
499
+ }
500
+ seenNames.add(r.name);
501
+ if (r.name === "Complete job") {
502
+ completeJobRecord = r;
503
+ continue;
504
+ }
505
+ stepIdx++;
506
+ const durationMs = r.startTime && r.finishTime
507
+ ? new Date(r.finishTime).getTime() - new Date(r.startTime).getTime()
508
+ : undefined;
509
+ let status;
510
+ if (!r.result && r.state !== "completed") {
511
+ if (r.startTime) {
512
+ status = isPaused && pausedStepName === r.name ? "paused" : "running";
513
+ }
514
+ else {
515
+ status = "pending";
516
+ }
517
+ }
518
+ else {
519
+ const result = (r.result || "").toLowerCase();
520
+ if (result === "failed") {
521
+ lastFailedStep = r.name;
522
+ status = "failed";
523
+ }
524
+ else if (result === "skipped") {
525
+ status = "skipped";
526
+ }
527
+ else {
528
+ status = "completed";
529
+ }
530
+ }
531
+ newSteps.push({
532
+ name: r.name,
533
+ index: stepIdx,
534
+ status,
535
+ startedAt: r.startTime,
536
+ completedAt: r.finishTime,
537
+ durationMs,
538
+ });
539
+ void padW; // used for totalSteps calculation above
540
+ }
541
+ const jobFinished = !!completeJobRecord?.result;
542
+ if (hasPostSteps && jobFinished) {
543
+ stepIdx++;
544
+ newSteps.push({ name: "Post Setup", index: stepIdx, status: "completed" });
545
+ }
546
+ if (completeJobRecord && jobFinished) {
547
+ stepIdx++;
548
+ const durationMs = completeJobRecord.startTime && completeJobRecord.finishTime
549
+ ? new Date(completeJobRecord.finishTime).getTime() -
550
+ new Date(completeJobRecord.startTime).getTime()
551
+ : undefined;
552
+ newSteps.push({
553
+ name: "Complete job",
554
+ index: stepIdx,
555
+ status: "completed",
556
+ startedAt: completeJobRecord.startTime,
557
+ completedAt: completeJobRecord.finishTime,
558
+ durationMs,
559
+ });
560
+ }
561
+ // Compute total duration from timeline step times
562
+ let totalDurationMs;
563
+ if (jobFinished) {
564
+ const allTimes = steps
565
+ .filter((r) => r.startTime && r.finishTime)
566
+ .map((r) => ({
567
+ start: new Date(r.startTime).getTime(),
568
+ end: new Date(r.finishTime).getTime(),
569
+ }));
570
+ if (allTimes.length > 0) {
571
+ const earliest = Math.min(...allTimes.map((t) => t.start));
572
+ const latest = Math.max(...allTimes.map((t) => t.end));
573
+ const ms = latest - earliest;
574
+ if (!isNaN(ms) && ms >= 0) {
575
+ totalDurationMs = ms;
576
+ }
577
+ }
578
+ }
579
+ store?.updateJob(containerName, {
580
+ steps: newSteps,
581
+ ...(jobFinished
582
+ ? {
583
+ status: lastFailedStep ? "failed" : "completed",
584
+ failedStep: lastFailedStep || undefined,
585
+ durationMs: totalDurationMs,
586
+ }
587
+ : {}),
588
+ });
589
+ }
590
+ catch {
591
+ // Best-effort
592
+ }
593
+ };
594
+ const pollPromise = (async () => {
595
+ while (!tailDone) {
596
+ updateStoreFromTimeline();
597
+ await new Promise((r) => setTimeout(r, 100));
598
+ }
599
+ // Final update
600
+ updateStoreFromTimeline();
601
+ })();
602
+ // Start waiting for container exit in parallel with log streaming.
603
+ const containerWaitPromise = container.wait();
604
+ await new Promise((resolve) => {
605
+ const rl = createInterface({ input: rawStream, crlfDelay: Infinity });
606
+ rl.on("line", (line) => {
607
+ debugStream.write(line + "\n");
608
+ });
609
+ rl.on("close", () => {
610
+ resolve();
611
+ });
612
+ containerWaitPromise
613
+ .then(() => {
614
+ rawStream.destroy?.();
615
+ })
616
+ .catch(() => { });
617
+ });
618
+ tailDone = true;
619
+ cleanupStdin();
620
+ await pollPromise;
621
+ // 8. Wait for completion
622
+ const CONTAINER_EXIT_TIMEOUT_MS = 30000;
623
+ let waitResult;
624
+ try {
625
+ waitResult = await Promise.race([
626
+ containerWaitPromise,
627
+ new Promise((_, reject) => setTimeout(() => reject(new Error("Container exit timeout")), CONTAINER_EXIT_TIMEOUT_MS)),
628
+ ]);
629
+ }
630
+ catch {
631
+ debugRunner(`Runner did not exit within ${CONTAINER_EXIT_TIMEOUT_MS / 1000}s, force-stopping container…`);
632
+ try {
633
+ await container.stop({ t: 5 });
634
+ }
635
+ catch {
636
+ /* already stopped */
637
+ }
638
+ waitResult = await container.wait();
639
+ }
640
+ const containerExitCode = waitResult.StatusCode;
641
+ const jobSucceeded = lastFailedStep === null && containerExitCode === 0;
642
+ // Update store with final exit code on failure
643
+ if (!jobSucceeded) {
644
+ store?.updateJob(containerName, {
645
+ failedExitCode: containerExitCode !== 0 ? containerExitCode : undefined,
646
+ });
647
+ }
648
+ await new Promise((resolve) => debugStream.end(resolve));
649
+ // Cleanup
650
+ try {
651
+ await container.remove({ force: true });
652
+ }
653
+ catch {
654
+ /* already removed */
655
+ }
656
+ if (serviceCtx) {
657
+ await cleanupServiceContainers(docker, serviceCtx, (line) => debugRunner(line));
658
+ }
659
+ if (fs.existsSync(dirs.shimsDir)) {
660
+ fs.rmSync(dirs.shimsDir, { recursive: true, force: true });
661
+ }
662
+ if (!pauseOnFailure && fs.existsSync(dirs.signalsDir)) {
663
+ fs.rmSync(dirs.signalsDir, { recursive: true, force: true });
664
+ }
665
+ if (fs.existsSync(dirs.diagDir)) {
666
+ fs.rmSync(dirs.diagDir, { recursive: true, force: true });
667
+ }
668
+ if (fs.existsSync(hostRunnerDir)) {
669
+ fs.rmSync(hostRunnerDir, { recursive: true, force: true });
670
+ }
671
+ if (jobSucceeded && fs.existsSync(dirs.containerWorkDir)) {
672
+ fs.rmSync(dirs.containerWorkDir, { recursive: true, force: true });
673
+ }
674
+ await ephemeralDtu?.close().catch(() => { });
675
+ return buildJobResult({
676
+ containerName,
677
+ job,
678
+ startTime,
679
+ jobSucceeded,
680
+ lastFailedStep,
681
+ containerExitCode,
682
+ timelinePath,
683
+ logDir,
684
+ debugLogPath,
685
+ });
686
+ }
687
+ finally {
688
+ process.removeListener("SIGINT", signalCleanup);
689
+ process.removeListener("SIGTERM", signalCleanup);
690
+ }
691
+ }