doc-detective 4.3.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,757 @@
1
+ #!/usr/bin/env node
2
+ // @ts-check
3
+ /**
4
+ * Doc Detective platform runner entrypoint.
5
+ *
6
+ * The doc-detective.com platform launches Fly.io machines that boot
7
+ * this script as PID 1. The script orchestrates a single run end-to-end:
8
+ *
9
+ * 1. read DD_RUN_TOKEN / DD_API_BASE / DD_RUN_ID from env
10
+ * 2. GET {api}/api/runs/{id}/spec → fetch config + source + secrets
11
+ * 3. provision /workspace from the source_snapshot (github clone or
12
+ * inline-spec write-out)
13
+ * 4. spawn the local `doc-detective` CLI with the merged config in
14
+ * DOC_DETECTIVE_CONFIG and project secrets in env
15
+ * 5. tee child stdout/stderr to the platform via batched
16
+ * POST /api/runs/{id}/logs
17
+ * 6. POST /api/runs/{id}/finalize { status, exit_code, summary }
18
+ *
19
+ * Belt-and-suspenders: a process-level setTimeout fires
20
+ * `process.exit(124)` at DD_TIMEOUT_SECONDS so a runner that loses
21
+ * connectivity to the API still self-terminates. The platform watchdog
22
+ * is the authoritative timeout — this is just a local guard.
23
+ *
24
+ * SIGTERM (Fly destroy on user-cancel or watchdog stop): best-effort
25
+ * POST a `canceled` finalize, then exit. The server is the source of
26
+ * truth — finalize on SIGTERM is advisory.
27
+ *
28
+ * Artifact upload (saveScreenshot, recordVideo, html report, …) is
29
+ * deliberately out of scope for this iteration; the entrypoint
30
+ * finalizes with an empty artifacts list. A follow-up will walk
31
+ * config.output and POST signed-upload requests.
32
+ *
33
+ * No third-party deps — only Node built-ins. The image already ships
34
+ * doc-detective globally; we don't add to its install footprint.
35
+ */
36
+
37
+ import { spawn } from 'node:child_process';
38
+ import { mkdir, rm, writeFile } from 'node:fs/promises';
39
+ import path from 'node:path';
40
+ import process from 'node:process';
41
+
42
+ const WORKSPACE_DIR = '/workspace';
43
+ const SPECS_SUBDIR = 'specs';
44
+ const OUTPUT_SUBDIR = 'output';
45
+
46
+ /**
47
+ * Project-secret keys that would clobber container-level env or
48
+ * fundamentally alter how Node / the doc-detective CLI executes if
49
+ * the platform let them through. The platform's secret-key validator
50
+ * only constrains key shape (`/^[A-Za-z_][A-Za-z0-9_]*$/`), so a user
51
+ * could create a secret named `PATH` or `NODE_OPTIONS` and have it
52
+ * override the container's value once we spread `secrets` into
53
+ * `childEnv`. Defense in depth: filter here at injection time and log
54
+ * the rejection so the run's logs make the source of the discrepancy
55
+ * obvious. The platform should also reject these names server-side;
56
+ * this list is the runner-side backstop.
57
+ */
58
+ const SECRET_DENYLIST = new Set([
59
+ 'PATH',
60
+ 'HOME',
61
+ 'NODE_OPTIONS',
62
+ 'NODE_PATH',
63
+ 'LD_PRELOAD',
64
+ 'LD_LIBRARY_PATH',
65
+ 'DOC_DETECTIVE_CONFIG',
66
+ 'DOC_DETECTIVE_API',
67
+ 'DOC_DETECTIVE_META'
68
+ ]);
69
+ const LOG_BATCH_SIZE = 100;
70
+ const LOG_FLUSH_INTERVAL_MS = 1000;
71
+ // Per-line UTF-8 byte cap aligned with the platform's 64 KB validator.
72
+ // Anything longer is sliced into 60 KB chunks (4 KB headroom for
73
+ // non-ASCII expansion when the slice falls mid-codepoint).
74
+ const LOG_LINE_BYTE_LIMIT = 60 * 1024;
75
+ // Cap on concurrently-pending /logs POSTs. With LOG_BATCH_SIZE=100 and
76
+ // LOG_LINE_BYTE_LIMIT=60 KB, each pending batch retains ~6 MB. 8
77
+ // pending = ~48 MB worst case — bounded enough to stay well under
78
+ // any realistic Fly machine memory cap even when the platform API
79
+ // stalls. Past the cap we load-shed: drop the new batch with a warn
80
+ // log rather than backpressure (which would block the synchronous
81
+ // `add()` callers — the child process's stdout/stderr `data`
82
+ // handlers — and surface as a runaway memory accumulation in the
83
+ // stream's internal buffer instead).
84
+ const LOG_MAX_PENDING_BATCHES = 8;
85
+
86
+ /**
87
+ * Tiny structured logger — writes a JSON line to the *real* stderr so
88
+ * it lands in Fly's machine logs even when our own log forwarder is
89
+ * down. Distinct from the captured-and-forwarded child stdout/stderr.
90
+ */
91
+ function localLog(level, msg, extra) {
92
+ const line = { ts: new Date().toISOString(), level, msg, ...(extra ?? {}) };
93
+ process.stderr.write(JSON.stringify(line) + '\n');
94
+ }
95
+
96
+ function readRequiredEnv(name) {
97
+ const v = process.env[name];
98
+ if (!v || v.length === 0) {
99
+ throw new Error(`Missing required env var: ${name}`);
100
+ }
101
+ return v;
102
+ }
103
+
104
+ /** Default per-call fetch budget. The platform watchdog is the
105
+ * authoritative timeout for the run as a whole; this just keeps a
106
+ * single hung request from holding everything else hostage. */
107
+ const API_CALL_TIMEOUT_MS = 30_000;
108
+ /** Slightly longer budget for /finalize so it has a chance to land
109
+ * even when the platform is under load — finalize is the
110
+ * acknowledgment that lets the row leave its terminal-but-stuck
111
+ * state on time, so it gets priority over /logs. */
112
+ const API_FINALIZE_TIMEOUT_MS = 60_000;
113
+
114
+ /**
115
+ * Authenticated fetch against the platform API. Throws on non-2xx
116
+ * (other than the explicitly-allowed status codes); returns the raw
117
+ * Response so callers can choose to read JSON or treat as fire-and-forget.
118
+ *
119
+ * `timeoutMs` aborts the request if the platform blackholes (TCP
120
+ * accept, no response). Without it a hung POST holds the runner open
121
+ * until the global self-kill fires (default 30 minutes). Defaults to
122
+ * 30s; finalize callers pass a longer budget.
123
+ */
124
+ async function apiCall(method, url, token, body, allowedStatuses = [], timeoutMs = API_CALL_TIMEOUT_MS) {
125
+ const headers = { authorization: `Bearer ${token}` };
126
+ if (body !== undefined) headers['content-type'] = 'application/json';
127
+ let res;
128
+ try {
129
+ res = await fetch(url, {
130
+ method,
131
+ headers,
132
+ body: body === undefined ? undefined : JSON.stringify(body),
133
+ signal: AbortSignal.timeout(timeoutMs)
134
+ });
135
+ } catch (e) {
136
+ // AbortSignal.timeout fires a TimeoutError; surface that with the
137
+ // URL + budget so logs make the cause obvious.
138
+ if (e && e.name === 'TimeoutError') {
139
+ throw new Error(`${method} ${url} timed out after ${timeoutMs}ms`);
140
+ }
141
+ throw e;
142
+ }
143
+ if (!res.ok && !allowedStatuses.includes(res.status)) {
144
+ const text = await res.text().catch(() => '');
145
+ throw new Error(`${method} ${url} failed: ${res.status} ${res.statusText} ${text}`);
146
+ }
147
+ return res;
148
+ }
149
+
150
+ /**
151
+ * Fetch the spec from the platform. The handler returns 410 Gone if
152
+ * the run was canceled before the runner first connected — we treat
153
+ * that as a clean no-op exit. Anything else 4xx/5xx is fatal.
154
+ */
155
+ async function fetchSpec(apiBase, runId, token) {
156
+ const url = `${apiBase}/api/runs/${encodeURIComponent(runId)}/spec`;
157
+ const res = await apiCall('GET', url, token, undefined, [410]);
158
+ if (res.status === 410) {
159
+ return { canceled: true };
160
+ }
161
+ const spec = await res.json();
162
+ return { canceled: false, spec };
163
+ }
164
+
165
+ /**
166
+ * Run a child process to completion, returning its exit code. Stdout
167
+ * and stderr are streamed to `onLine(stream, payload)`.
168
+ */
169
+ function runChild(cmd, args, opts, onLine) {
170
+ return new Promise((resolve, reject) => {
171
+ const child = spawn(cmd, args, {
172
+ cwd: opts.cwd,
173
+ env: opts.env,
174
+ // Parent receives stdout/stderr as pipes so we can intercept;
175
+ // stdin is /dev/null since doc-detective doesn't read from it.
176
+ stdio: ['ignore', 'pipe', 'pipe']
177
+ });
178
+
179
+ // Track a SIGTERM handler that forwards the signal so cancel
180
+ // propagates to the child instead of orphaning it.
181
+ const forwardTerm = () => {
182
+ try {
183
+ child.kill('SIGTERM');
184
+ } catch {
185
+ // child may have already exited
186
+ }
187
+ };
188
+ process.on('SIGTERM', forwardTerm);
189
+
190
+ const lineBuffer = { stdout: '', stderr: '' };
191
+ function attach(stream, name) {
192
+ stream.setEncoding('utf8');
193
+ stream.on('data', (chunk) => {
194
+ lineBuffer[name] += chunk;
195
+ let idx;
196
+ while ((idx = lineBuffer[name].indexOf('\n')) !== -1) {
197
+ const line = lineBuffer[name].slice(0, idx);
198
+ lineBuffer[name] = lineBuffer[name].slice(idx + 1);
199
+ if (line.length > 0) onLine(name, line);
200
+ }
201
+ });
202
+ stream.on('end', () => {
203
+ if (lineBuffer[name].length > 0) {
204
+ onLine(name, lineBuffer[name]);
205
+ lineBuffer[name] = '';
206
+ }
207
+ });
208
+ }
209
+ attach(child.stdout, 'stdout');
210
+ attach(child.stderr, 'stderr');
211
+
212
+ child.on('error', (err) => {
213
+ process.off('SIGTERM', forwardTerm);
214
+ reject(err);
215
+ });
216
+ // `'close'`, not `'exit'`: 'exit' fires as soon as the child
217
+ // process terminates, but stdout/stderr's 'end' events come
218
+ // later. The trailing-line flush in attach() runs from those
219
+ // 'end' handlers, so resolving on 'exit' could let runChild's
220
+ // caller proceed before onLine had been invoked for the
221
+ // residual buffer of a child that wrote a partial last line.
222
+ // 'close' fires only after both the process has exited AND
223
+ // the stdio streams have closed.
224
+ child.on('close', (code, signal) => {
225
+ process.off('SIGTERM', forwardTerm);
226
+ // Node's exit-code conventions: when terminated by signal, code
227
+ // is null and the canonical exit is 128 + signal-number. We
228
+ // don't map every signal — SIGTERM (15) is the one we forward.
229
+ if (code !== null) resolve(code);
230
+ else if (signal === 'SIGTERM') resolve(143);
231
+ else resolve(1);
232
+ });
233
+ });
234
+ }
235
+
236
+ /**
237
+ * Slice an oversize line into chunks at LOG_LINE_BYTE_LIMIT bytes so
238
+ * the platform's 64 KB-per-line cap doesn't bounce the whole batch.
239
+ */
240
+ function sliceLogLine(payload) {
241
+ const enc = new TextEncoder();
242
+ const dec = new TextDecoder('utf-8');
243
+ const bytes = enc.encode(payload);
244
+ if (bytes.byteLength <= LOG_LINE_BYTE_LIMIT) return [payload];
245
+ const out = [];
246
+ for (let i = 0; i < bytes.byteLength; i += LOG_LINE_BYTE_LIMIT) {
247
+ // Slice on byte boundaries. TextDecoder's default `stream: false`
248
+ // replaces incomplete multi-byte sequences at the slice edge
249
+ // with U+FFFD — the fragment is *not* stitched back together
250
+ // from the next slice. Acceptable for log preservation: the
251
+ // platform accepts the data as opaque text, and rare
252
+ // multi-byte boundaries on a 60 KB cliff are noise compared
253
+ // to the value of preserving the rest of the line.
254
+ const slice = bytes.subarray(i, Math.min(i + LOG_LINE_BYTE_LIMIT, bytes.byteLength));
255
+ out.push(dec.decode(slice));
256
+ }
257
+ return out;
258
+ }
259
+
260
+ /**
261
+ * Buffered log shipper. Lines accumulate in memory; we flush when the
262
+ * batch hits LOG_BATCH_SIZE or LOG_FLUSH_INTERVAL_MS elapses, whichever
263
+ * comes first.
264
+ *
265
+ * `add()` is synchronous (callers don't await per-line work); when a
266
+ * batch trips LOG_BATCH_SIZE the POST fires-and-forgets but the
267
+ * promise lands in the `pending` set so `flush()` is a true
268
+ * "everything is posted" gate. Without that, callers awaiting
269
+ * `flush()` after an auto-flush could see an empty buffer and resolve
270
+ * before the in-flight POST completed — losing logs at finalize time.
271
+ */
272
+ function makeLogShipper(apiBase, runId, token) {
273
+ const buffer = [];
274
+ const pending = new Set();
275
+ let dropped = 0;
276
+ let timer = null;
277
+
278
+ function fireBatch(lines) {
279
+ // Load-shed: if too many POSTs are already in flight (platform
280
+ // API blackholing, slow network), drop the new batch and bump
281
+ // a counter. Without this cap, `add()` continues to enqueue
282
+ // indefinitely and pending grows unbounded — each retained
283
+ // batch holds ~6 MB so a sustained outage OOM-kills the
284
+ // machine. The lost log lines are noted in localLog (Fly
285
+ // machine log) and a single summary stderr breadcrumb so the
286
+ // run's own log stream tells the user something went missing.
287
+ if (pending.size >= LOG_MAX_PENDING_BATCHES) {
288
+ if (dropped === 0) {
289
+ // First-drop breadcrumb. Subsequent drops are silent on
290
+ // stderr (lest we DOS the run's own log stream), but
291
+ // each one bumps the counter for finalize-time visibility.
292
+ try {
293
+ process.stderr.write(
294
+ `runner: /logs backlog at cap (${LOG_MAX_PENDING_BATCHES} pending); dropping batches until pressure clears\n`
295
+ );
296
+ } catch {
297
+ // stderr write can throw if the stream is gone; harmless.
298
+ }
299
+ }
300
+ dropped += lines.length;
301
+ localLog('warn', 'log batch shed under backpressure', {
302
+ pending: pending.size,
303
+ droppedSoFar: dropped
304
+ });
305
+ return Promise.resolve();
306
+ }
307
+ const p = (async () => {
308
+ try {
309
+ await apiCall(
310
+ 'POST',
311
+ `${apiBase}/api/runs/${encodeURIComponent(runId)}/logs`,
312
+ token,
313
+ { lines }
314
+ );
315
+ } catch (e) {
316
+ // Don't fail the run for transient log-ship failures. The
317
+ // run remains correct from the user's perspective; logs may
318
+ // be incomplete. Surface to the local stderr so the Fly
319
+ // machine log captures the drop.
320
+ localLog('warn', 'log ship failed', { err: String(e) });
321
+ } finally {
322
+ pending.delete(p);
323
+ }
324
+ })();
325
+ pending.add(p);
326
+ return p;
327
+ }
328
+
329
+ async function flushNow() {
330
+ if (timer) {
331
+ clearTimeout(timer);
332
+ timer = null;
333
+ }
334
+ if (buffer.length > 0) {
335
+ fireBatch(buffer.splice(0, buffer.length));
336
+ }
337
+ // Wait for *all* in-flight POSTs (this batch's plus any that were
338
+ // fired by an earlier auto-flush). allSettled because individual
339
+ // failures are already swallowed inside fireBatch — no rejection
340
+ // can escape — but allSettled is the principled choice if that
341
+ // ever changes.
342
+ if (pending.size > 0) {
343
+ await Promise.allSettled(Array.from(pending));
344
+ }
345
+ }
346
+
347
+ function schedule() {
348
+ if (timer) return;
349
+ timer = setTimeout(() => {
350
+ timer = null;
351
+ // Errors are swallowed inside fireBatch and flushNow uses
352
+ // allSettled, so no rejection should escape today; the
353
+ // .catch is a belt-and-suspenders against future changes
354
+ // that might let one through and silently terminate the
355
+ // process with an unhandledRejection.
356
+ flushNow().catch((e) =>
357
+ localLog('warn', 'scheduled flush rejected', { err: String(e) })
358
+ );
359
+ }, LOG_FLUSH_INTERVAL_MS);
360
+ }
361
+
362
+ return {
363
+ add(stream, payload) {
364
+ const ts = new Date().toISOString();
365
+ for (const slice of sliceLogLine(payload)) {
366
+ buffer.push({ ts, stream, payload: slice });
367
+ if (buffer.length >= LOG_BATCH_SIZE) {
368
+ // Auto-flush — but keep iterating: an oversize line that
369
+ // produced N slices must still get its remaining N-1
370
+ // slices into the next batch. An earlier draft of this
371
+ // function `return`ed here and silently dropped the
372
+ // tail.
373
+ fireBatch(buffer.splice(0, buffer.length));
374
+ }
375
+ }
376
+ // Only schedule a timed flush if there's actually something
377
+ // to flush — guards against a wasted setTimeout cycle when
378
+ // the loop's last slice was the one that auto-flushed.
379
+ if (buffer.length > 0) schedule();
380
+ },
381
+ flush: flushNow
382
+ };
383
+ }
384
+
385
+ /**
386
+ * Build the effective config_v3 to pass via DOC_DETECTIVE_CONFIG.
387
+ *
388
+ * Platform-controlled overrides (clobber whatever the user committed):
389
+ * * `output` → /workspace/output (predictable artifact root for the
390
+ * planned upload step + keeps the user's relative paths from
391
+ * escaping the workspace)
392
+ * * `input` → /workspace/specs (only for inline specs; for github
393
+ * sources we leave the user's `input` alone so their committed
394
+ * paths resolve from the cloned repo cwd)
395
+ */
396
+ function buildEffectiveConfig(configSnapshot, source, workspaceDir = WORKSPACE_DIR) {
397
+ const effective = { ...(configSnapshot ?? {}) };
398
+ // posix.join: these paths land inside DOC_DETECTIVE_CONFIG and are
399
+ // interpreted by the doc-detective CLI inside the Linux container.
400
+ // Plain path.join would emit backslashes on Windows test runners
401
+ // and break the equality assertions, even though no Windows
402
+ // runtime ever sees this string.
403
+ //
404
+ // `workspaceDir` matches the path provisionWorkspace materialized
405
+ // to. main() resolves it once from DD_WORKSPACE_DIR and threads
406
+ // it through both functions so the CLI's view of the filesystem
407
+ // can't drift from where the runner actually wrote specs/output.
408
+ effective.output = path.posix.join(workspaceDir, OUTPUT_SUBDIR);
409
+ if (source.type === 'inline') {
410
+ effective.input = path.posix.join(workspaceDir, SPECS_SUBDIR);
411
+ }
412
+ return effective;
413
+ }
414
+
415
+ /**
416
+ * Resolve `pathPrefix` against `workspaceDir` and reject values that
417
+ * traverse out of the workspace (e.g. "../etc"). Pure — no fs / spawn
418
+ * — so it's directly unit-testable without invoking git.
419
+ *
420
+ * Defense in depth: the platform-side validator already constrains
421
+ * path_prefix at project-create time, but a `../etc` would still
422
+ * traverse via path.join. path.resolve normalizes ".." segments and
423
+ * treats absolute path_prefix as an override; we reject anything
424
+ * that lands outside workspaceDir (or equals it, which is the
425
+ * no-prefix base case).
426
+ *
427
+ * Returns the resolved path on success; throws on traversal.
428
+ */
429
+ function resolvePathPrefix(workspaceDir, pathPrefix) {
430
+ if (!pathPrefix || pathPrefix.length === 0) return workspaceDir;
431
+ const resolved = path.resolve(workspaceDir, pathPrefix);
432
+ const wsWithSep = workspaceDir.endsWith(path.sep)
433
+ ? workspaceDir
434
+ : workspaceDir + path.sep;
435
+ if (resolved !== workspaceDir && !resolved.startsWith(wsWithSep)) {
436
+ throw new Error(`path_prefix escapes workspace: ${pathPrefix}`);
437
+ }
438
+ return resolved;
439
+ }
440
+
441
+ /**
442
+ * Provision the workspace dir based on source_snapshot. Returns the
443
+ * cwd the doc-detective CLI should run from.
444
+ *
445
+ * `workspaceDir` defaults to /workspace (the in-container path). Tests
446
+ * pass a tmpdir so they don't touch the real /workspace.
447
+ */
448
+ async function provisionWorkspace(source, workspaceDir = WORKSPACE_DIR) {
449
+ // Wipe any prior state — the machine is fresh, but a retry of this
450
+ // script (e.g. a runner-internal restart) would otherwise inherit
451
+ // stale clones.
452
+ await rm(workspaceDir, { recursive: true, force: true });
453
+
454
+ if (source.type === 'github') {
455
+ // Required-field guards so a regression in the platform's spec
456
+ // shape surfaces as a clear error in run logs instead of an
457
+ // inscrutable `git clone --branch undefined` failure.
458
+ if (typeof source.repo !== 'string' || source.repo.length === 0) {
459
+ throw new Error('github source missing required field: repo');
460
+ }
461
+ if (typeof source.ref !== 'string' || source.ref.length === 0) {
462
+ throw new Error('github source missing required field: ref');
463
+ }
464
+
465
+ // Validate path_prefix *before* the clone so a malformed value
466
+ // fails fast without burning a network round-trip.
467
+ const cwd = resolvePathPrefix(workspaceDir, source.path_prefix);
468
+
469
+ // Shallow public clone. Auth-required GitHub repos are out of
470
+ // scope here — the platform UI requires the user to point at a
471
+ // public repo or commit secrets via the project secrets layer.
472
+ // The destination must NOT exist or must be empty for git clone
473
+ // to succeed; we only `rm` above and let git create the
474
+ // directory itself, then create the output subdir post-clone.
475
+ // Pre-creating workspaceDir + output/ would cause clone to
476
+ // fail with "destination path already exists and is not an
477
+ // empty directory."
478
+ const parent = path.dirname(workspaceDir);
479
+ await mkdir(parent, { recursive: true });
480
+ const repoUrl = `https://github.com/${source.repo}.git`;
481
+ const args = ['clone', '--depth=1', '--branch', source.ref, '--', repoUrl, workspaceDir];
482
+ const code = await new Promise((resolve, reject) => {
483
+ const child = spawn('git', args, {
484
+ cwd: parent,
485
+ env: process.env,
486
+ stdio: ['ignore', 'inherit', 'inherit']
487
+ });
488
+ child.on('error', reject);
489
+ child.on('exit', (c) => resolve(c ?? 1));
490
+ });
491
+ if (code !== 0) {
492
+ throw new Error(`git clone failed (exit ${code}) for ${source.repo}@${source.ref}`);
493
+ }
494
+ await mkdir(path.join(workspaceDir, OUTPUT_SUBDIR), { recursive: true });
495
+ return cwd;
496
+ }
497
+
498
+ if (source.type === 'inline') {
499
+ await mkdir(workspaceDir, { recursive: true });
500
+ await mkdir(path.join(workspaceDir, OUTPUT_SUBDIR), { recursive: true });
501
+ const specsDir = path.join(workspaceDir, SPECS_SUBDIR);
502
+ await mkdir(specsDir, { recursive: true });
503
+ const specs = Array.isArray(source.specs) ? source.specs : [];
504
+ for (let i = 0; i < specs.length; i++) {
505
+ const file = path.join(specsDir, `spec-${String(i).padStart(4, '0')}.json`);
506
+ await writeFile(file, JSON.stringify(specs[i], null, 2), 'utf8');
507
+ }
508
+ return workspaceDir;
509
+ }
510
+
511
+ throw new Error(`Unsupported source type: ${String(/** @type {any} */ (source).type)}`);
512
+ }
513
+
514
+ /**
515
+ * Drop project secrets whose key collides with a container-controlled
516
+ * env var. `onReject` is invoked once per dropped key — callers use it
517
+ * to surface the rejection in the run logs so the user can correct
518
+ * their project secrets.
519
+ */
520
+ function filterSecrets(secrets, onReject) {
521
+ const out = {};
522
+ for (const [k, v] of Object.entries(secrets)) {
523
+ if (SECRET_DENYLIST.has(k)) {
524
+ if (onReject) onReject(k);
525
+ continue;
526
+ }
527
+ out[k] = v;
528
+ }
529
+ return out;
530
+ }
531
+
532
+ /**
533
+ * POST /finalize. Best-effort — the caller decides whether a failure
534
+ * here propagates to the process exit code.
535
+ */
536
+ async function postFinalize(apiBase, runId, token, body) {
537
+ try {
538
+ await apiCall(
539
+ 'POST',
540
+ `${apiBase}/api/runs/${encodeURIComponent(runId)}/finalize`,
541
+ token,
542
+ body,
543
+ [],
544
+ API_FINALIZE_TIMEOUT_MS
545
+ );
546
+ return true;
547
+ } catch (e) {
548
+ localLog('warn', 'finalize failed', { err: String(e) });
549
+ return false;
550
+ }
551
+ }
552
+
553
+ async function main() {
554
+ const apiBase = readRequiredEnv('DD_API_BASE').replace(/\/+$/, '');
555
+ const runId = readRequiredEnv('DD_RUN_ID');
556
+ const token = readRequiredEnv('DD_RUN_TOKEN');
557
+ // Reject NaN / non-finite / non-positive values so a bad
558
+ // DD_TIMEOUT_SECONDS env (e.g. an unset-but-quoted-empty-string,
559
+ // or a typo) doesn't fire setTimeout(NaN) — which fires
560
+ // immediately and self-kills the runner before /spec lands.
561
+ const rawTimeout = Number(process.env.DD_TIMEOUT_SECONDS);
562
+ const timeoutSeconds = Number.isFinite(rawTimeout) && rawTimeout > 0 ? rawTimeout : 1800;
563
+
564
+ // Belt-and-suspenders self-kill. Authoritative timeout is the
565
+ // platform watchdog; this just guarantees a runner that has lost
566
+ // API connectivity stops burning compute eventually.
567
+ const selfKill = setTimeout(() => {
568
+ localLog('warn', 'self-kill timeout exceeded', { timeoutSeconds });
569
+ process.exit(124);
570
+ }, timeoutSeconds * 1000);
571
+ // .unref() — the timer alone shouldn't keep the event loop alive.
572
+ selfKill.unref();
573
+
574
+ let canceledBySignal = false;
575
+ let childRunning = false;
576
+ const onPreSpawnSigterm = async () => {
577
+ canceledBySignal = true;
578
+ localLog('info', 'SIGTERM received before child spawn; posting advisory cancel');
579
+ // Best-effort early-cancel finalize. The platform watchdog +
580
+ // cancel handler are still source of truth — this just helps
581
+ // the row land at `canceled` faster when SIGTERM arrives
582
+ // during fetchSpec / provisionWorkspace, before the child has
583
+ // even started. Once the child is running, runChild's own
584
+ // SIGTERM handler takes over and the post-spawn cleanup path
585
+ // covers finalize.
586
+ if (childRunning) return;
587
+ await postFinalize(apiBase, runId, token, {
588
+ status: 'canceled',
589
+ exit_code: 143,
590
+ summary: { reason: 'sigterm_pre_spawn' }
591
+ });
592
+ // Re-check after the await: main() may have raced past the
593
+ // process.off + childRunning=true sequence while we were on
594
+ // the network. Calling process.exit(143) at that point would
595
+ // orphan the just-spawned child. Yield to runChild's
596
+ // forwardTerm + post-spawn finalize instead.
597
+ if (childRunning) return;
598
+ process.exit(143);
599
+ };
600
+ process.on('SIGTERM', onPreSpawnSigterm);
601
+
602
+ // Step 1: fetch spec.
603
+ const { canceled, spec } = await fetchSpec(apiBase, runId, token);
604
+ if (canceled) {
605
+ localLog('info', 'run canceled before spec fetch (410); exiting cleanly');
606
+ // Nothing to finalize — the row is already terminal on the server.
607
+ return 0;
608
+ }
609
+
610
+ // `DD_WORKSPACE_DIR` is a test/ops seam — defaults to /workspace
611
+ // (the in-container path). Tests redirect this to a tmpdir so
612
+ // they don't touch the real /workspace; an operator could
613
+ // redirect to a per-machine spool dir. Resolved once and
614
+ // threaded through provisionWorkspace + buildEffectiveConfig so
615
+ // the CLI's view can't drift from where we materialized files.
616
+ const workspaceDir = process.env.DD_WORKSPACE_DIR || WORKSPACE_DIR;
617
+
618
+ const source = spec.source_snapshot ?? { type: 'inline', specs: [] };
619
+ const config = buildEffectiveConfig(spec.config_snapshot, source, workspaceDir);
620
+ const secrets = spec.secrets ?? {};
621
+
622
+ // The spec is the source of truth for everything else (config,
623
+ // source, secrets) — it should win for timeout too. Prefer the
624
+ // spec value, fall back to the env timeout we already set up
625
+ // (kept as a bootstrap so the pre-spec self-kill is non-NaN).
626
+ // Reset the self-kill timer to the spec's value so a project that
627
+ // asked for 60s isn't held open until the 1800s default fires.
628
+ const specTimeout = Number(spec.timeout_seconds);
629
+ if (Number.isFinite(specTimeout) && specTimeout > 0 && specTimeout !== timeoutSeconds) {
630
+ clearTimeout(selfKill);
631
+ const respec = setTimeout(() => {
632
+ localLog('warn', 'self-kill timeout exceeded', { timeoutSeconds: specTimeout });
633
+ process.exit(124);
634
+ }, specTimeout * 1000);
635
+ respec.unref();
636
+ }
637
+
638
+ const shipper = makeLogShipper(apiBase, runId, token);
639
+
640
+ let cwd;
641
+ try {
642
+ cwd = await provisionWorkspace(source, workspaceDir);
643
+ } catch (e) {
644
+ localLog('error', 'workspace provision failed', { err: String(e) });
645
+ shipper.add('stderr', `workspace provision failed: ${String(e)}`);
646
+ await shipper.flush();
647
+ await postFinalize(apiBase, runId, token, {
648
+ status: 'failed',
649
+ exit_code: 1,
650
+ summary: { reason: 'workspace_provision_failed', error: String(e) }
651
+ });
652
+ return 1;
653
+ }
654
+
655
+ // Step 2: spawn doc-detective.
656
+ const safeSecrets = filterSecrets(secrets, (key) =>
657
+ shipper.add('stderr', `runner: dropping reserved env var "${key}" from project secrets`)
658
+ );
659
+ const childEnv = {
660
+ ...process.env,
661
+ ...safeSecrets,
662
+ DOC_DETECTIVE_CONFIG: JSON.stringify(config)
663
+ };
664
+ // Don't leak our bearer token into the child's env — the runner
665
+ // owns the platform conversation, not the test job. DD_RUN_ID and
666
+ // DD_API_BASE stay in the child's env intentionally: they're
667
+ // non-sensitive identifiers and a future doc-detective release
668
+ // may want them for diagnostics or reporter hooks.
669
+ delete childEnv.DD_RUN_TOKEN;
670
+
671
+ // `DD_RUNNER_CMD` is a test/ops seam — defaults to the canonical
672
+ // `doc-detective` binary, but tests override it to a fixture
673
+ // script and a future ops scenario could point it at a different
674
+ // CLI installation path. Not advertised; the runner's contract
675
+ // with users is `doc-detective`.
676
+ const runnerCmd = process.env.DD_RUNNER_CMD || 'doc-detective';
677
+
678
+ // Hand off SIGTERM ownership to runChild — its forwarder kills the
679
+ // child process; the post-spawn finalize logic below handles the
680
+ // advisory POST. Without removing the pre-spawn handler, both
681
+ // would fire and we'd race two finalize POSTs.
682
+ process.off('SIGTERM', onPreSpawnSigterm);
683
+ childRunning = true;
684
+
685
+ let exitCode;
686
+ try {
687
+ exitCode = await runChild(runnerCmd, [], { cwd, env: childEnv }, (stream, line) =>
688
+ shipper.add(stream, line)
689
+ );
690
+ } catch (e) {
691
+ localLog('error', 'child spawn failed', { err: String(e) });
692
+ shipper.add('stderr', `failed to spawn doc-detective: ${String(e)}`);
693
+ await shipper.flush();
694
+ await postFinalize(apiBase, runId, token, {
695
+ status: 'failed',
696
+ exit_code: 1,
697
+ summary: { reason: 'spawn_failed', error: String(e) }
698
+ });
699
+ return 1;
700
+ }
701
+
702
+ // Drain any buffered logs before finalizing — the finalize handler
703
+ // deletes the run_token, so a late /logs POST after finalize 401s.
704
+ await shipper.flush();
705
+
706
+ // Map exit code → finalize status. SIGTERM (143) signals cancel;
707
+ // the watchdog or cancel handler is the source of truth, but our
708
+ // best-effort POST helps the row land at `canceled` faster.
709
+ let finalizeBody;
710
+ if (canceledBySignal || exitCode === 143) {
711
+ finalizeBody = {
712
+ status: 'canceled',
713
+ exit_code: exitCode,
714
+ summary: { reason: 'sigterm' }
715
+ };
716
+ } else if (exitCode === 0) {
717
+ finalizeBody = { status: 'succeeded', exit_code: 0, summary: {} };
718
+ } else {
719
+ finalizeBody = {
720
+ status: 'failed',
721
+ exit_code: exitCode,
722
+ summary: { reason: 'nonzero_exit' }
723
+ };
724
+ }
725
+ await postFinalize(apiBase, runId, token, finalizeBody);
726
+ return exitCode;
727
+ }
728
+
729
+ // Module-import guard so the test file can `import` from this module
730
+ // without triggering main(). Node's `import.meta.url === ...` idiom
731
+ // for "am I the entrypoint?" — works under both `node` and `tsx` and
732
+ // doesn't require require.main.
733
+ const isEntry = import.meta.url === `file://${process.argv[1]}`;
734
+ if (isEntry) {
735
+ main()
736
+ .then((code) => process.exit(code))
737
+ .catch((err) => {
738
+ localLog('fatal', 'entrypoint crashed', { err: String(err?.stack ?? err) });
739
+ process.exit(1);
740
+ });
741
+ }
742
+
743
+ export {
744
+ apiCall,
745
+ buildEffectiveConfig,
746
+ fetchSpec,
747
+ filterSecrets,
748
+ main,
749
+ makeLogShipper,
750
+ postFinalize,
751
+ provisionWorkspace,
752
+ readRequiredEnv,
753
+ resolvePathPrefix,
754
+ runChild,
755
+ SECRET_DENYLIST,
756
+ sliceLogLine
757
+ };