@specific.dev/spectest 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,740 @@
1
+ // Interactive terminal handle for tests. Backed by `docker exec -it`
2
+ // inside a `script(1)` PTY wrapper so the container program sees a real
3
+ // TTY (line discipline, ANSI escapes, isatty(1) true) while the daemon
4
+ // drives stdin/stdout through ordinary pipes.
5
+ //
6
+ // We feed every byte the PTY emits into two sinks:
7
+ // 1. A headless `@xterm/headless` Terminal that maintains the rendered
8
+ // screen state (cursor, scrollback, alt-screen, attributes). This is
9
+ // what `waitFor`'s predicate evaluates against (and what the
10
+ // internal `rawScreen()` returns) — the analogue of evaluating
11
+ // against the DOM in a Browser session.
12
+ // 2. The asciicast frame callback the daemon hooks up to a
13
+ // TerminalSessionRecord. Same recording format as before; the web
14
+ // UI replays it with asciinema-player.
15
+ //
16
+ // The one-shot `ctx.terminal(service, command, opts?)` API in `index.ts`
17
+ // is built on top of this factory — it opens a Terminal, doesn't write
18
+ // to stdin, waits for the process to exit, and closes. So everything
19
+ // that flows through `ctx.terminal` flows through this code too.
20
+ //
21
+ // PTY-on-the-host trick: the daemon runs under systemd with no controlling
22
+ // TTY, so `docker exec -it` (the docker CLI rejects -it when its own stdin
23
+ // isn't a TTY) needs a PTY wrapper. `script -qfc 'CMD' /dev/null` allocates
24
+ // a PTY for CMD; we set the slave's size with `stty rows R cols C` inside
25
+ // the wrapped command so the container app sees the cols/rows the caller
26
+ // asked for. `script(1)` ships in util-linux which is essential on Debian,
27
+ // so it's present in the Freestyle base image without extra apt deps.
28
+
29
+ import { Terminal as XtermHeadless } from "@xterm/headless";
30
+
31
+ import { recordTerminalStep, reserveEvent, truncateUtf8 } from "./recorder.js";
32
+ import type { EventReservation } from "./recorder.js";
33
+ import type { TerminalStepAction } from "./recorder.js";
34
+ import { wrap } from "./inspect.js";
35
+ import type { Wrapped } from "./inspect.js";
36
+
37
+ export interface TerminalOpts {
38
+ /** PTY column count. Defaults to 80. */
39
+ cols?: number;
40
+ /** PTY row count. Defaults to 24. */
41
+ rows?: number;
42
+ /** Extra env vars; merged onto the container's. `TERM` defaults to
43
+ * `xterm-256color`. */
44
+ env?: Record<string, string>;
45
+ /**
46
+ * Command to run inside the container. When omitted, opens an
47
+ * interactive login shell (`sh -l`) — what you want for the typical
48
+ * "send commands, inspect output" interactive flow.
49
+ *
50
+ * When set, the command is the entrypoint and the session ends when
51
+ * it exits. This is what the one-shot `ctx.terminal(...)` wrapper
52
+ * passes through.
53
+ */
54
+ command?: string;
55
+ /**
56
+ * Hard timeout for the whole session in ms. When the session is
57
+ * interactive (no `command`), this defaults to undefined (no timeout)
58
+ * and the daemon's test-level timeout is what trips. For one-shot
59
+ * runs, the wrapper supplies the test's default timeout.
60
+ */
61
+ timeoutMs?: number;
62
+ }
63
+
64
+ /** Result of the one-shot `ctx.terminal(...)` convenience. */
65
+ export interface TerminalResult {
66
+ /** Full decoded PTY output (raw bytes, ANSI escapes included). Capped. */
67
+ output: string;
68
+ exitCode: number;
69
+ durationMs: number;
70
+ /** Opaque id linking to the persisted asciicast session. */
71
+ sessionId: string;
72
+ }
73
+
74
+ /**
75
+ * Interactive terminal handle. Operations are sequential per terminal —
76
+ * concurrent `send`/`waitFor` against the same handle has no well-defined
77
+ * meaning since both touch the same PTY/screen buffer. Open multiple
78
+ * terminals for parallel sessions.
79
+ */
80
+ export interface Terminal {
81
+ /** PTY column count this terminal was opened with. */
82
+ readonly cols: number;
83
+ /** PTY row count this terminal was opened with. */
84
+ readonly rows: number;
85
+ /** Session id linking to the asciicast record. */
86
+ readonly sessionId: string;
87
+ /**
88
+ * Resolves when the underlying process ends — by `close()`, by the
89
+ * inner command exiting, or by timeout — to a `TerminalResult` with
90
+ * the full captured `output` and `exitCode`. Never rejects.
91
+ *
92
+ * The result is provenance-tagged against the `exit` step, so an
93
+ * `expect((await term.exited).output).toContain(...)` /
94
+ * `expect((await term.exited).exitCode).toBe(0)` nests under the
95
+ * terminal in the timeline. This is how you assert on a long-lived
96
+ * terminal's transcript — there is no separate `output()`/`screen()`
97
+ * getter (use `waitFor` to observe mid-session).
98
+ */
99
+ readonly exited: Promise<Wrapped<TerminalResult>>;
100
+ /** Exit code if the process has exited, else `undefined`. Non-blocking
101
+ * peek; unlike `exited` it records no step and isn't tagged. */
102
+ readonly exitCode: number | undefined;
103
+
104
+ /** Write raw bytes to the PTY. No newline added. */
105
+ send(input: string): Promise<void>;
106
+ /** Convenience: `send(line + "\n")`. */
107
+ sendLine(line: string): Promise<void>;
108
+ /**
109
+ * Press a named key. Supported names: `Enter`, `Tab`, `Backspace`,
110
+ * `Escape`, `Up`/`Down`/`Left`/`Right`, `Home`, `End`, `PageUp`,
111
+ * `PageDown`, `Delete`, and `Ctrl+<letter>` (case-insensitive).
112
+ * Anything else is sent as the literal string.
113
+ */
114
+ press(key: string): Promise<void>;
115
+
116
+ /**
117
+ * Poll until `matcher` resolves truthy against the rendered screen.
118
+ * This is the primitive for observing terminal state — race-free
119
+ * (it polls until the condition holds rather than reading a snapshot
120
+ * that may not have rendered yet) and recorded as a tied step.
121
+ * `matcher` can be:
122
+ * - a string → matches when the screen contains it; resolves to
123
+ * the string. The wait itself is the assertion (it throws on
124
+ * timeout), so no extra `expect` is needed for "contains X".
125
+ * - a regex → matches on `.test`; resolves to the match array.
126
+ * - a function → `fn(screen, output)` — any truthy return resolves
127
+ * and becomes the (provenance-tagged) result, so you can extract
128
+ * a value and `expect(...)` on it.
129
+ *
130
+ * `description` is a short human label that shows up in the event
131
+ * log so timelines don't fill with anonymous waits.
132
+ *
133
+ * Defaults: 5 s total timeout, 100 ms between polls. Throws on
134
+ * timeout.
135
+ */
136
+ waitFor(
137
+ description: string,
138
+ matcher: string,
139
+ opts?: { timeoutMs?: number; intervalMs?: number },
140
+ ): Promise<Wrapped<string>>;
141
+ waitFor(
142
+ description: string,
143
+ matcher: RegExp,
144
+ opts?: { timeoutMs?: number; intervalMs?: number },
145
+ ): Promise<Wrapped<RegExpMatchArray>>;
146
+ waitFor<T = unknown>(
147
+ description: string,
148
+ matcher: (screen: string, output: string) => T,
149
+ opts?: { timeoutMs?: number; intervalMs?: number },
150
+ ): Promise<Wrapped<NonNullable<Awaited<T>>>>;
151
+
152
+ /**
153
+ * Tear down. Writes Ctrl-D to stdin to nudge the shell out, then
154
+ * SIGKILLs after a grace period. Idempotent. Resolves to a
155
+ * `TerminalResult` (tagged against the `close` step) so you can assert
156
+ * on the transcript of a session that doesn't self-terminate. Optional
157
+ * — terminals are not auto-closed when a test ends.
158
+ */
159
+ close(): Promise<Wrapped<TerminalResult>>;
160
+ }
161
+
162
+ /**
163
+ * The concrete handle `openTerminal` returns. Extends the public
164
+ * `Terminal` with raw accessors the daemon uses internally to build
165
+ * `TerminalResult`s and event previews. Deliberately NOT on the public
166
+ * `Terminal` surface: test code observes via `waitFor` (race-free, tied)
167
+ * and reads the transcript from `exited`/`close`, never from an
168
+ * out-of-band, untracked getter.
169
+ */
170
+ export interface InternalTerminal extends Terminal {
171
+ /** Cumulative raw PTY bytes since the session opened, capped. ANSI included. */
172
+ rawOutput(): string;
173
+ /** Rendered screen — visible rows joined by newlines, trailing
174
+ * whitespace trimmed, cursor-position-aware (ANSI already applied). */
175
+ rawScreen(): string;
176
+ }
177
+
178
+ // Cap on cumulative output we keep in memory. The asciicast frames
179
+ // grow separately (they live on the session record) and aren't bounded
180
+ // here. Matches `TERMINAL_OUTPUT_CAP_BYTES` in daemon.ts.
181
+ const OUTPUT_CAP_BYTES = 1024 * 1024;
182
+
183
+ // Default scrollback for the headless emulator. Generous enough that
184
+ // long output histories aren't lost between `screen()` calls, small
185
+ // enough that the per-session memory cost stays bounded.
186
+ const SCROLLBACK = 10_000;
187
+
188
+ // Deadline for the TTY echo when recording a send/press step.
189
+ //
190
+ // Why we wait at all: `castTimeSec` is supposed to point at the cast
191
+ // frame the user wants to see when they click the step. For
192
+ // send/press, that's the frame containing the echo of what we typed.
193
+ // The echo is a PTY round-trip (our pipe → script → container TTY →
194
+ // kernel echo back → script → our pipe) that takes a few ms — at the
195
+ // instant `writeStdin` returns, no frame yet exists that contains the
196
+ // typed bytes. So we await the next ingested frame; that frame IS the
197
+ // echo. This isn't a fixed delay — it resolves as soon as the echo
198
+ // lands, typically <30ms.
199
+ //
200
+ // The timeout is a safety hatch only: if the program reads the input
201
+ // without ever echoing (rare — would require either raw mode + no app
202
+ // response, or a closed-fd condition), we don't want to hang the test.
203
+ // In that case `castTimeSec` falls back to the write moment, which is
204
+ // fine because there's no later echo frame for it to misalign with.
205
+ const ECHO_WAIT_MS = 120;
206
+
207
+ /** Sink for asciicast frames produced by this terminal. */
208
+ export interface TerminalFrameSink {
209
+ pushFrame(tSec: number, data: string): void;
210
+ markClosed(): void;
211
+ }
212
+
213
+ export interface OpenTerminalArgs {
214
+ service: string;
215
+ opts?: TerminalOpts;
216
+ /** Asciicast frames go here; the daemon owns the session record. */
217
+ sink: TerminalFrameSink;
218
+ /** Session id (precomputed so the inline TerminalEvent can reference
219
+ * it before the session record is finished). */
220
+ sessionId: string;
221
+ /**
222
+ * When set, the daemon emits per-op events into the recorder under
223
+ * this session id. Mirrors `BrowserSessionRecorder.sessionId`.
224
+ * `null` disables per-op event recording (e.g. eval, where there's
225
+ * no active recorder).
226
+ */
227
+ recordEvents?: boolean;
228
+ }
229
+
230
+ /**
231
+ * Open a PTY-backed interactive terminal in a service container.
232
+ *
233
+ * The factory itself doesn't know about `TerminalSessionRecord`s — it
234
+ * just calls `sink.pushFrame` for every chunk it sees and `markClosed`
235
+ * when the session ends. The daemon owns the record and the sink.
236
+ */
237
+ export async function openTerminal(args: OpenTerminalArgs): Promise<InternalTerminal> {
238
+ const { service, opts, sink, sessionId } = args;
239
+ const cols = opts?.cols ?? 80;
240
+ const rows = opts?.rows ?? 24;
241
+ const userEnv = opts?.env ?? {};
242
+ const term = userEnv.TERM ?? "xterm-256color";
243
+
244
+ // Build the inner docker exec invocation. With `-it` the container
245
+ // sees a real TTY and dockerd merges stderr into stdout, so we only
246
+ // need to drain one stream from the PTY.
247
+ const dockerArgs: string[] = ["exec", "-it"];
248
+ dockerArgs.push("-e", `TERM=${term}`);
249
+ dockerArgs.push("-e", `COLUMNS=${cols}`);
250
+ dockerArgs.push("-e", `LINES=${rows}`);
251
+ for (const [k, v] of Object.entries(userEnv)) {
252
+ if (k === "TERM") continue;
253
+ dockerArgs.push("-e", `${k}=${v}`);
254
+ }
255
+ dockerArgs.push(service);
256
+ if (opts?.command !== undefined) {
257
+ dockerArgs.push("sh", "-lc", opts.command);
258
+ } else {
259
+ // Login shell — gives the user a $PS1 prompt to drive interactively.
260
+ dockerArgs.push("sh", "-l");
261
+ }
262
+
263
+ // Quote the inner docker command for `script -c`. We pass it as a
264
+ // single shell string. `stty` sets the PTY slave's size to the
265
+ // requested cols/rows before exec'ing docker — without this, the
266
+ // container app would see whatever size script's parent had (often
267
+ // 80x24, but the daemon has none, so it's whatever util-linux picks
268
+ // as the fallback).
269
+ const inner = `stty rows ${rows} cols ${cols} 2>/dev/null; exec ${shellQuote(["docker", ...dockerArgs])}`;
270
+ // `script -q -f -c CMD /dev/null` runs CMD in a PTY (no preamble, flushed
271
+ // after each write, output discarded — we capture via stdout pipe).
272
+ const spawnArgs = ["script", "-qfc", inner, "/dev/null"];
273
+
274
+ const start = Date.now();
275
+ const proc = Bun.spawn(spawnArgs, {
276
+ stdin: "pipe",
277
+ stdout: "pipe",
278
+ stderr: "pipe",
279
+ });
280
+
281
+ // Headless screen model. `allowProposedApi` lets us call `buffer.active`
282
+ // safely (it's marked EXPERIMENTAL in the public typings).
283
+ const emu = new XtermHeadless({
284
+ cols,
285
+ rows,
286
+ scrollback: SCROLLBACK,
287
+ allowProposedApi: true,
288
+ });
289
+
290
+ let output = "";
291
+ let exited = false;
292
+ let exitCode: number | undefined;
293
+ let closed = false;
294
+ let resolveExit!: (code: number) => void;
295
+ const rawExited = new Promise<number>((res) => {
296
+ resolveExit = res;
297
+ });
298
+ // Cast time at the moment the process actually exited. Captured by
299
+ // the IIFE below so the `exit` step we record points the player at
300
+ // the very end of the visible session.
301
+ let exitCastTimeSec: number | undefined;
302
+
303
+ const decoder = new TextDecoder("utf-8", { fatal: false });
304
+ // Subscribers waiting for the next frame to arrive. Used by send /
305
+ // sendLine / press so the recorded `castTimeSec` lands on the echo's
306
+ // frame instead of microseconds before it.
307
+ const frameWaiters: Array<() => void> = [];
308
+ const ingest = (chunk: string): void => {
309
+ const t = (Date.now() - start) / 1000;
310
+ sink.pushFrame(t, chunk);
311
+ emu.write(chunk);
312
+ if (output.length < OUTPUT_CAP_BYTES) {
313
+ const room = OUTPUT_CAP_BYTES - output.length;
314
+ output += chunk.length > room ? chunk.slice(0, room) : chunk;
315
+ }
316
+ if (frameWaiters.length > 0) {
317
+ const fns = frameWaiters.splice(0, frameWaiters.length);
318
+ for (const fn of fns) fn();
319
+ }
320
+ };
321
+
322
+ /**
323
+ * Resolve when the next PTY frame is ingested, or after `timeoutMs`
324
+ * elapses — whichever comes first. Used to delay a send/press
325
+ * recording just long enough for the TTY echo to land on a frame, so
326
+ * the seek target lines up with the visible state of the screen.
327
+ */
328
+ const waitForNextFrame = (timeoutMs: number): Promise<void> => {
329
+ return new Promise<void>((resolve) => {
330
+ let done = false;
331
+ const finish = (): void => {
332
+ if (done) return;
333
+ done = true;
334
+ resolve();
335
+ };
336
+ frameWaiters.push(finish);
337
+ setTimeout(finish, timeoutMs);
338
+ });
339
+ };
340
+
341
+ const drainStream = async (
342
+ stream: ReadableStream<Uint8Array> | null | undefined,
343
+ ): Promise<void> => {
344
+ if (!stream) return;
345
+ const reader = stream.getReader();
346
+ try {
347
+ while (true) {
348
+ const { done, value } = await reader.read();
349
+ if (done) return;
350
+ if (!value || value.byteLength === 0) continue;
351
+ const chunk = decoder.decode(value, { stream: true });
352
+ if (chunk.length === 0) continue;
353
+ ingest(chunk);
354
+ }
355
+ } finally {
356
+ reader.releaseLock();
357
+ }
358
+ };
359
+
360
+ // Drain stdout + stderr concurrently. `script` writes everything to
361
+ // stdout under `-t`, but we drain stderr anyway in case script(1)
362
+ // itself emits diagnostics there.
363
+ const stdout = proc.stdout as ReadableStream<Uint8Array> | null | undefined;
364
+ const stderr = proc.stderr as ReadableStream<Uint8Array> | null | undefined;
365
+ const drained = Promise.all([drainStream(stdout), drainStream(stderr)]);
366
+
367
+ // Race the optional session-level timeout. For interactive terminals
368
+ // we usually have no timeout; the wrapper for one-shot supplies one.
369
+ let timer: ReturnType<typeof setTimeout> | undefined;
370
+ if (opts?.timeoutMs && opts.timeoutMs > 0) {
371
+ timer = setTimeout(() => {
372
+ if (!exited) {
373
+ try {
374
+ proc.kill();
375
+ } catch {
376
+ /* ignore */
377
+ }
378
+ }
379
+ }, opts.timeoutMs);
380
+ }
381
+
382
+ // When the process exits, drain whatever's left and resolve `exited`.
383
+ // We don't await `drained` here — it's already running and will
384
+ // settle on its own; the explicit `await drained` happens in close().
385
+ (async () => {
386
+ const code = await proc.exited;
387
+ exited = true;
388
+ exitCode = code;
389
+ if (timer) clearTimeout(timer);
390
+ // Let `drained` finish so the final asciicast frames land before we
391
+ // mark the sink closed. drainStream is bounded by the OS pipe being
392
+ // closed when proc exits, so this won't hang.
393
+ try {
394
+ await drained;
395
+ } catch {
396
+ /* ignore */
397
+ }
398
+ // Record cast time AFTER drain, so the `exit` step we record below
399
+ // points the player at the very last frame (the program's final
400
+ // output before EOF).
401
+ exitCastTimeSec = (Date.now() - start) / 1000;
402
+ sink.markClosed();
403
+ resolveExit(code);
404
+ })();
405
+
406
+ // Helper to write raw bytes to the PTY. We treat stdin as binary
407
+ // pipes (Bun's `proc.stdin` is a Web `WritableStream<Uint8Array>` or
408
+ // a FileSink-like depending on the version — use the FileSink-style
409
+ // `write(string)` which both shapes accept).
410
+ const stdinWriter = proc.stdin as { write(s: string): unknown; flush?(): unknown } | undefined;
411
+ const writeStdin = async (data: string): Promise<void> => {
412
+ if (exited || closed || !stdinWriter) return;
413
+ stdinWriter.write(data);
414
+ try {
415
+ const r = stdinWriter.flush?.();
416
+ // FileSink.flush returns void | number; ignore the return.
417
+ void r;
418
+ } catch {
419
+ /* ignore */
420
+ }
421
+ };
422
+
423
+ const renderScreen = (): string => {
424
+ const buf = emu.buffer.active;
425
+ const lines: string[] = [];
426
+ // We render only the visible viewport (cursor-position-aware): the
427
+ // typical mental model of "what does the user see right now".
428
+ const viewportTop = buf.viewportY;
429
+ for (let y = 0; y < emu.rows; y++) {
430
+ const line = buf.getLine(viewportTop + y);
431
+ lines.push(line ? line.translateToString(true) : "");
432
+ }
433
+ // Drop trailing empty lines so multi-line predicates aren't padded.
434
+ while (lines.length > 1 && lines[lines.length - 1] === "") lines.pop();
435
+ return lines.join("\n");
436
+ };
437
+
438
+ const recordOpEvent = (
439
+ action: TerminalStepAction,
440
+ fields: Record<string, unknown>,
441
+ startedAt: number,
442
+ error?: string,
443
+ reservation?: EventReservation,
444
+ ): number | undefined => {
445
+ if (!args.recordEvents) return undefined;
446
+ const preview = truncateUtf8(renderScreen());
447
+ // Use the SAME clock the asciicast frames use (offset from `start`,
448
+ // which is captured at Bun.spawn). That way the UI seeks the player
449
+ // directly to this value with no frame arithmetic.
450
+ const castTimeSec = (Date.now() - start) / 1000;
451
+ // Return the event seq so callers can `wrap()` their result value
452
+ // against it — e.g. `expect(await term.waitFor(...))` then links its
453
+ // assertion to this step (sourceSeq) and the UI nests it here.
454
+ return recordTerminalStep({
455
+ sessionId,
456
+ service,
457
+ action,
458
+ castTimeSec,
459
+ durationMs: Date.now() - startedAt,
460
+ screenPreview: preview.value,
461
+ screenTruncated: preview.truncated,
462
+ error,
463
+ ...fields,
464
+ }, reservation);
465
+ };
466
+
467
+ // Assemble the public result shape (mirrors the one-shot
468
+ // `ctx.terminal(...)` TerminalResult) from current capture state.
469
+ const buildResult = (code: number): TerminalResult => ({
470
+ output,
471
+ exitCode: code,
472
+ durationMs: Date.now() - start,
473
+ sessionId,
474
+ });
475
+
476
+ // Memoised promise behind `term.exited`. Lazily constructed on first
477
+ // read so the `exit` step is only recorded if the test actually
478
+ // awaits exit — the one-shot wrapper internally uses `rawExited`
479
+ // directly and so skips the step (which is what we want; the one-shot
480
+ // already emits a `terminal` event covering the whole lifecycle).
481
+ let exitedAccessor: Promise<Wrapped<TerminalResult>> | undefined;
482
+ const getExited = (): Promise<Wrapped<TerminalResult>> => {
483
+ if (exitedAccessor) return exitedAccessor;
484
+ const resv = reserveEvent();
485
+ exitedAccessor = (async () => {
486
+ const code = await rawExited;
487
+ const result = buildResult(code);
488
+ // No recorder (eval/setup): there's no event to nest under, but we
489
+ // still wrap (with an undefined seq) so the accessor's wrapped type is
490
+ // honest at runtime — same as the one-shot `ctx.terminal`/`fetch` paths.
491
+ if (!args.recordEvents) {
492
+ return wrap(result, undefined) as unknown as Wrapped<TerminalResult>;
493
+ }
494
+ const preview = truncateUtf8(renderScreen());
495
+ const seq = recordTerminalStep({
496
+ sessionId,
497
+ service,
498
+ action: "exit",
499
+ castTimeSec: exitCastTimeSec ?? (Date.now() - start) / 1000,
500
+ durationMs: 0,
501
+ screenPreview: preview.value,
502
+ screenTruncated: preview.truncated,
503
+ exitCode: code,
504
+ }, resv);
505
+ // Tag the whole result against the exit step so assertions on
506
+ // `.output` / `.exitCode` nest under the terminal in the timeline.
507
+ return wrap(result, seq) as unknown as Wrapped<TerminalResult>;
508
+ })();
509
+ return exitedAccessor;
510
+ };
511
+
512
+ const handle: InternalTerminal = {
513
+ cols,
514
+ rows,
515
+ sessionId,
516
+ get exited() {
517
+ return getExited();
518
+ },
519
+ get exitCode() {
520
+ return exitCode;
521
+ },
522
+ async send(input) {
523
+ const t = Date.now();
524
+ const resv = reserveEvent();
525
+ const trunc = truncateUtf8(input);
526
+ try {
527
+ await writeStdin(input);
528
+ // Block until the echo of what we just wrote lands on a PTY
529
+ // frame. `writeStdin` returns the instant the bytes are in
530
+ // the pipe buffer; the kernel TTY echo takes a few ms to
531
+ // round-trip back through script + docker exec + our stdout
532
+ // pipe. Recording `castTimeSec` before that frame arrives
533
+ // would point the step's seek target at a screen state that
534
+ // doesn't yet show what was typed. The await resolves the
535
+ // moment that next frame is ingested — it's the mechanism,
536
+ // not a delay.
537
+ await waitForNextFrame(ECHO_WAIT_MS);
538
+ recordOpEvent("send", { text: trunc.value, textTruncated: trunc.truncated }, t, undefined, resv);
539
+ } catch (err) {
540
+ const e = err as Error;
541
+ recordOpEvent(
542
+ "send",
543
+ { text: trunc.value, textTruncated: trunc.truncated },
544
+ t,
545
+ e?.message ?? String(err),
546
+ resv,
547
+ );
548
+ throw err;
549
+ }
550
+ },
551
+ async sendLine(line) {
552
+ const t = Date.now();
553
+ const resv = reserveEvent();
554
+ const trunc = truncateUtf8(line);
555
+ try {
556
+ await writeStdin(line + "\n");
557
+ await waitForNextFrame(ECHO_WAIT_MS);
558
+ recordOpEvent("sendLine", { text: trunc.value, textTruncated: trunc.truncated }, t, undefined, resv);
559
+ } catch (err) {
560
+ const e = err as Error;
561
+ recordOpEvent(
562
+ "sendLine",
563
+ { text: trunc.value, textTruncated: trunc.truncated },
564
+ t,
565
+ e?.message ?? String(err),
566
+ resv,
567
+ );
568
+ throw err;
569
+ }
570
+ },
571
+ async press(key) {
572
+ const t = Date.now();
573
+ const resv = reserveEvent();
574
+ const bytes = keyToBytes(key);
575
+ try {
576
+ await writeStdin(bytes);
577
+ await waitForNextFrame(ECHO_WAIT_MS);
578
+ recordOpEvent("press", { key }, t, undefined, resv);
579
+ } catch (err) {
580
+ const e = err as Error;
581
+ recordOpEvent("press", { key }, t, e?.message ?? String(err), resv);
582
+ throw err;
583
+ }
584
+ },
585
+ rawOutput() {
586
+ return output;
587
+ },
588
+ rawScreen() {
589
+ return renderScreen();
590
+ },
591
+ async waitFor(
592
+ description: string,
593
+ matcher: string | RegExp | ((screen: string, output: string) => unknown),
594
+ waitOpts?: { timeoutMs?: number; intervalMs?: number },
595
+ ) {
596
+ const timeoutMs = waitOpts?.timeoutMs ?? 5_000;
597
+ const intervalMs = waitOpts?.intervalMs ?? 100;
598
+ const t = Date.now();
599
+ const resv = reserveEvent();
600
+ const deadline = t + timeoutMs;
601
+ let attempts = 0;
602
+ const check = (): unknown => {
603
+ attempts++;
604
+ const scr = renderScreen();
605
+ if (typeof matcher === "string") {
606
+ return scr.includes(matcher) ? matcher : null;
607
+ }
608
+ if (matcher instanceof RegExp) {
609
+ const m = scr.match(matcher);
610
+ return m ?? null;
611
+ }
612
+ return matcher(scr, output);
613
+ };
614
+ while (true) {
615
+ const v = check();
616
+ if (v) {
617
+ const seq = recordOpEvent("waitFor", { description, attempts, matched: true }, t, undefined, resv);
618
+ // Tag the matched value so an `expect(...)` on it links its
619
+ // assertion back to this waitFor step (sourceSeq) instead of
620
+ // rendering as an orphan sidebar row. Mirrors how http/db/exec
621
+ // results carry their op's seq. `as never` lets this single
622
+ // body satisfy the three overload return types.
623
+ return wrap(v, seq) as never;
624
+ }
625
+ if (Date.now() >= deadline) {
626
+ const err = `waitFor ${JSON.stringify(description)} timed out after ${timeoutMs}ms (${attempts} attempts)`;
627
+ recordOpEvent(
628
+ "waitFor",
629
+ { description, attempts, matched: false },
630
+ t,
631
+ err,
632
+ resv,
633
+ );
634
+ throw new Error(err);
635
+ }
636
+ await new Promise((r) => setTimeout(r, intervalMs));
637
+ }
638
+ },
639
+ async close() {
640
+ // Use `rawExited` rather than `term.exited` everywhere we need a
641
+ // raw number internally — the public getter resolves to a wrapped
642
+ // result (and records an `exit` step on first read) which we don't
643
+ // want as a side-effect of internal teardown logic.
644
+ if (closed)
645
+ return buildResult(
646
+ exitCode ?? (await rawExited),
647
+ ) as unknown as Wrapped<TerminalResult>;
648
+ closed = true;
649
+ const t = Date.now();
650
+ const resv = reserveEvent();
651
+ // Polite first: send EOF (Ctrl-D). For a login shell this exits
652
+ // cleanly. For a long-running command it's a no-op and we fall
653
+ // through to SIGKILL.
654
+ try {
655
+ await writeStdin("\x04");
656
+ } catch {
657
+ /* ignore */
658
+ }
659
+ // Closing stdin signals EOF to the script wrapper too.
660
+ try {
661
+ (proc.stdin as { end?: () => void } | undefined)?.end?.();
662
+ } catch {
663
+ /* ignore */
664
+ }
665
+ // Grace period before forcibly killing.
666
+ const grace = 1_500;
667
+ const raced = await Promise.race([
668
+ rawExited,
669
+ new Promise<"timeout">((r) => setTimeout(() => r("timeout"), grace)),
670
+ ]);
671
+ if (raced === "timeout") {
672
+ try {
673
+ proc.kill();
674
+ } catch {
675
+ /* ignore */
676
+ }
677
+ }
678
+ const code = await rawExited;
679
+ const result = buildResult(code);
680
+ const seq = recordOpEvent("close", { exitCode: code }, t, undefined, resv);
681
+ // Tag the result against the close step so assertions on the
682
+ // transcript of a non-self-terminating session nest under it.
683
+ return wrap(result, seq) as unknown as Wrapped<TerminalResult>;
684
+ },
685
+ };
686
+
687
+ return handle;
688
+ }
689
+
690
+ // ────────────────────────────────────────────────────────────────────────
691
+ // Key name → byte sequence
692
+ // ────────────────────────────────────────────────────────────────────────
693
+
694
+ const KEY_MAP: Record<string, string> = {
695
+ enter: "\r",
696
+ return: "\r",
697
+ tab: "\t",
698
+ backspace: "\x7f",
699
+ delete: "\x1b[3~",
700
+ escape: "\x1b",
701
+ esc: "\x1b",
702
+ up: "\x1b[A",
703
+ down: "\x1b[B",
704
+ right: "\x1b[C",
705
+ left: "\x1b[D",
706
+ home: "\x1b[H",
707
+ end: "\x1b[F",
708
+ pageup: "\x1b[5~",
709
+ pagedown: "\x1b[6~",
710
+ space: " ",
711
+ };
712
+
713
+ function keyToBytes(key: string): string {
714
+ const norm = key.replace(/[\s_-]/g, "").toLowerCase();
715
+ if (KEY_MAP[norm] !== undefined) return KEY_MAP[norm];
716
+ // Ctrl+<letter>: ASCII control char 1-26.
717
+ const ctrl = key.match(/^ctrl[\s_+-]+([a-z])$/i);
718
+ if (ctrl) {
719
+ const letter = ctrl[1].toLowerCase();
720
+ const code = letter.charCodeAt(0) - "a".charCodeAt(0) + 1;
721
+ return String.fromCharCode(code);
722
+ }
723
+ // Fall back to the literal string — lets callers type a single
724
+ // character via `press("X")` even if it's not in the table.
725
+ return key;
726
+ }
727
+
728
+ // ────────────────────────────────────────────────────────────────────────
729
+ // Shell quoting for `script -c CMD`
730
+ // ────────────────────────────────────────────────────────────────────────
731
+
732
+ function shellQuote(argv: string[]): string {
733
+ return argv
734
+ .map((a) => {
735
+ if (a.length === 0) return "''";
736
+ if (/^[A-Za-z0-9_@%+=:,./-]+$/.test(a)) return a;
737
+ return `'${a.replace(/'/g, `'\\''`)}'`;
738
+ })
739
+ .join(" ");
740
+ }