ptywright 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -0
- package/dist/agent.mjs +1 -1
- package/dist/bin/ptywright.mjs +1 -1
- package/dist/{cli-CfvlbRoZ.mjs → cli-C40H_ElC.mjs} +55 -21
- package/dist/cli.mjs +1 -1
- package/dist/config-B0r-JCFI.mjs +52 -0
- package/dist/config.mjs +2 -0
- package/dist/index.mjs +1 -1
- package/dist/mcp.mjs +1 -1
- package/dist/pty-cassette.mjs +1 -1
- package/dist/{runner-zi0nItvB.mjs → runner-CembqDgJ.mjs} +59 -10
- package/dist/{server-BC3yo-dq.mjs → server-h--2U0Ic.mjs} +1 -1
- package/package.json +2 -1
- package/skills/ptywright-testing/SKILL.md +113 -79
- package/skills/ptywright-testing/agents/openai.yaml +4 -0
- package/skills/ptywright-testing/references/agent-regression.md +132 -0
- package/skills/ptywright-testing/references/ci-and-debugging.md +95 -0
- package/skills/ptywright-testing/references/mcp-tools.md +91 -0
- package/skills/ptywright-testing/references/raw-pty-cassettes.md +82 -0
- package/skills/ptywright-testing/references/script-runner.md +80 -0
- /package/dist/{pty_like-Cpkh_O9B.mjs → pty_like-DqCo7XdB.mjs} +0 -0
package/README.md
CHANGED
|
@@ -380,6 +380,44 @@ Artifacts are split intentionally:
|
|
|
380
380
|
- `tests/agent-snapshots/<name>/` contains stable terminal/DOM baselines.
|
|
381
381
|
- `--update-snapshots` is the explicit update path for intentional UI changes.
|
|
382
382
|
|
|
383
|
+
### Project Config
|
|
384
|
+
|
|
385
|
+
For repeated agent regression work, put project-level defaults in
|
|
386
|
+
`ptywright.config.ts` instead of repeating paths and browser defaults in every
|
|
387
|
+
flow file. The CLI discovers `ptywright.config.ts|mts|cts|js|mjs|cjs` from the
|
|
388
|
+
current directory upward, and `--config <file>` selects one explicitly.
|
|
389
|
+
|
|
390
|
+
```ts
|
|
391
|
+
import { defineConfig } from "ptywright/config";
|
|
392
|
+
|
|
393
|
+
export default defineConfig({
|
|
394
|
+
agent: {
|
|
395
|
+
artifactsRoot: ".tmp/agent",
|
|
396
|
+
cassetteDir: "tests/agent-cassettes",
|
|
397
|
+
snapshotDir: "tests/agent-snapshots",
|
|
398
|
+
defaults: {
|
|
399
|
+
headless: true,
|
|
400
|
+
timeoutMs: 45_000,
|
|
401
|
+
screenshot: false,
|
|
402
|
+
viewports: [{ name: "desktop", width: 1280, height: 820 }],
|
|
403
|
+
mask: [{ regex: "session_[a-z0-9]+", replacement: "<session>" }],
|
|
404
|
+
},
|
|
405
|
+
},
|
|
406
|
+
});
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
```bash
|
|
410
|
+
ptywright agent run tests/agents/codex.flow.json --update-snapshots
|
|
411
|
+
ptywright agent check
|
|
412
|
+
ptywright agent replay-all --update-snapshots
|
|
413
|
+
ptywright agent promote .tmp/agent/codex/codex.cassette.json --update-snapshots
|
|
414
|
+
```
|
|
415
|
+
|
|
416
|
+
Config paths are resolved relative to the config file directory. CLI arguments
|
|
417
|
+
override config defaults, and fields written in a flow file override config
|
|
418
|
+
defaults for that flow. The flow file remains the test case; the config file is
|
|
419
|
+
only for shared project defaults and common artifact locations.
|
|
420
|
+
|
|
383
421
|
`launch.mode=command` is the recommended integration contract. `command` and
|
|
384
422
|
`args` are spawned directly, and ptywright reads the first URL printed to stdout
|
|
385
423
|
or stderr. Use `waitForUrlMs` to tune startup timeouts and `urlRegex` when the
|
package/dist/agent.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { a as runAgentSpecPath, i as runAgentSpec, n as printAgentLaunchPlan, r as replayAgentRecordPath, t as defaultSpecNameForPath } from "./runner-
|
|
1
|
+
import { a as runAgentSpecPath, i as runAgentSpec, n as printAgentLaunchPlan, r as replayAgentRecordPath, t as defaultSpecNameForPath } from "./runner-CembqDgJ.mjs";
|
|
2
2
|
export { defaultSpecNameForPath, printAgentLaunchPlan, replayAgentRecordPath, runAgentSpec, runAgentSpecPath };
|
package/dist/bin/ptywright.mjs
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { c as createDefaultPtyAdapter, l as resolvePtyBackend } from "./runner-zApMYWZx.mjs";
|
|
2
|
-
import { a as readScriptManifestPath, c as resolveScriptManifestPath, d as resolveScriptRunSummaryPath, f as runScriptPath, i as findScriptSummaryManifest, l as validateScriptManifest, n as runAllScripts, o as relocateScriptManifestCommands, s as resolveManifestPrimaryPath$1, t as createPtywrightServer, u as readScriptRunSummaryPath } from "./server-
|
|
3
|
-
import { C as
|
|
4
|
-
import {
|
|
2
|
+
import { a as readScriptManifestPath, c as resolveScriptManifestPath, d as resolveScriptRunSummaryPath, f as runScriptPath, i as findScriptSummaryManifest, l as validateScriptManifest, n as runAllScripts, o as relocateScriptManifestCommands, s as resolveManifestPrimaryPath$1, t as createPtywrightServer, u as readScriptRunSummaryPath } from "./server-h--2U0Ic.mjs";
|
|
3
|
+
import { C as agentManifestPath, D as writeAgentManifestPath, E as validateAgentManifestFiles, S as AGENT_MANIFEST_FILE_NAME, T as readAgentManifestPath, _ as isAgentCassetteLike, a as runAgentSpecPath, b as sanitizeArtifactName, c as agentRunModeSchema, d as readAgentRunRecordPath, f as writeAgentRunRecordPath, g as normalizeAgentFlowSpecWithConfig, h as resolveAgentLaunchTarget, l as formatAgentArgv, m as createAgentTemplateSpec, o as loadAgentSpec, p as formatArgv, r as replayAgentRecordPath, s as AGENT_RUN_RECORD_SCHEMA_URL, u as isAgentRunRecordLike, v as readAgentCassettePath, w as isAgentManifestLike, x as launchAgentBrowser, y as normalizeAgentFlowSpec } from "./runner-CembqDgJ.mjs";
|
|
4
|
+
import { n as loadPtywrightConfig } from "./config-B0r-JCFI.mjs";
|
|
5
|
+
import { c as createPtyCassetteReplay, i as formatPtyCassetteInspectLines, l as readPtyCassettePath, o as inspectPtyCassettePath, r as createPtyCassetteRecorder, t as wrapPtyLike, v as validatePtyCassette } from "./pty_like-DqCo7XdB.mjs";
|
|
5
6
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
6
7
|
import { z } from "zod";
|
|
7
8
|
import { basename, dirname, extname, isAbsolute, join, relative, resolve } from "node:path";
|
|
@@ -1901,7 +1902,7 @@ async function promoteAgentCassette(options) {
|
|
|
1901
1902
|
const sourceCassette = readAgentCassettePath(resolveSourceCassettePath(resolve(process.cwd(), options.sourcePath)));
|
|
1902
1903
|
const name = sanitizeArtifactName(sourceCassette.name);
|
|
1903
1904
|
const cassetteDir = options.cassetteDir ?? "tests/agent-cassettes";
|
|
1904
|
-
const snapshotDir = options.snapshotDir ?? join("tests
|
|
1905
|
+
const snapshotDir = options.snapshotDir ?? join(options.snapshotRoot ?? "tests/agent-snapshots", name);
|
|
1905
1906
|
const artifactsRoot = options.artifactsRoot ?? join(".tmp", "agent-promote", name);
|
|
1906
1907
|
const targetDir = join(cassetteDir, name);
|
|
1907
1908
|
const targetCassettePath = join(targetDir, `${name}.cassette.json`);
|
|
@@ -2130,10 +2131,10 @@ function emptyReplayResult(dir, suiteDir, updateSnapshots) {
|
|
|
2130
2131
|
//#endregion
|
|
2131
2132
|
//#region src/agent/recorder.ts
|
|
2132
2133
|
async function recordAgentSpecPath(specPath, options) {
|
|
2133
|
-
return recordAgentSpec((await loadAgentSpec(specPath)).
|
|
2134
|
+
return recordAgentSpec((await loadAgentSpec(specPath)).raw, options);
|
|
2134
2135
|
}
|
|
2135
2136
|
async function recordAgentSpec(input, options) {
|
|
2136
|
-
const spec =
|
|
2137
|
+
const spec = normalizeAgentFlowSpecWithConfig(input, options.config);
|
|
2137
2138
|
const rootDir = options.rootDir ? resolve(process.cwd(), options.rootDir) : process.cwd();
|
|
2138
2139
|
const outPath = isAbsolute(options.outPath) ? options.outPath : resolve(process.cwd(), options.outPath);
|
|
2139
2140
|
const durationMs = options.durationMs ?? 3e4;
|
|
@@ -2923,6 +2924,7 @@ function usage() {
|
|
|
2923
2924
|
" --json Print machine-readable script artifact output",
|
|
2924
2925
|
"",
|
|
2925
2926
|
"Agent options:",
|
|
2927
|
+
" --config <file> Use a ptywright.config.* file",
|
|
2926
2928
|
" --artifacts-dir <dir> Override agent run artifact directory",
|
|
2927
2929
|
" --cassette-dir <dir> Committed cassette directory for promote/check",
|
|
2928
2930
|
" --snapshot-dir <dir> Snapshot directory for promoted cassettes",
|
|
@@ -3264,6 +3266,12 @@ function parseAgentArgs(argv) {
|
|
|
3264
3266
|
out.flavor = parseAgentFlavor(arg);
|
|
3265
3267
|
continue;
|
|
3266
3268
|
}
|
|
3269
|
+
if (arg === "--config") {
|
|
3270
|
+
if (!next) throw new Error(`missing <file> for --config`);
|
|
3271
|
+
out.configPath = next;
|
|
3272
|
+
i += 1;
|
|
3273
|
+
continue;
|
|
3274
|
+
}
|
|
3267
3275
|
if (arg === "--artifacts-root" && next) {
|
|
3268
3276
|
out.artifactsRoot = next;
|
|
3269
3277
|
i += 1;
|
|
@@ -3334,13 +3342,32 @@ function parseAgentArgs(argv) {
|
|
|
3334
3342
|
outPath: out.outPath,
|
|
3335
3343
|
durationMs: out.durationMs,
|
|
3336
3344
|
commandName: out.commandName,
|
|
3345
|
+
configPath: out.configPath,
|
|
3337
3346
|
updateSnapshots: out.updateSnapshots,
|
|
3338
3347
|
headed: out.headed,
|
|
3339
3348
|
json: out.json
|
|
3340
3349
|
};
|
|
3341
3350
|
}
|
|
3351
|
+
function shouldLoadAgentConfig(mode) {
|
|
3352
|
+
return mode === "run" || mode === "record" || mode === "replay" || mode === "promote" || mode === "replay-all" || mode === "rerun" || mode === "check";
|
|
3353
|
+
}
|
|
3354
|
+
function resolveAgentHeadless(args, config) {
|
|
3355
|
+
if (args.headed) return false;
|
|
3356
|
+
return config?.agent?.defaults?.headless ?? true;
|
|
3357
|
+
}
|
|
3358
|
+
function resolveAgentConfigPath(config, path) {
|
|
3359
|
+
if (!path) return void 0;
|
|
3360
|
+
if (isAbsolute(path)) return path;
|
|
3361
|
+
return resolve(config?.rootDir ?? process.cwd(), path);
|
|
3362
|
+
}
|
|
3363
|
+
function resolveCliPath(path) {
|
|
3364
|
+
if (!path) return void 0;
|
|
3365
|
+
return isAbsolute(path) ? path : resolve(process.cwd(), path);
|
|
3366
|
+
}
|
|
3342
3367
|
async function cmdAgent(argv) {
|
|
3343
3368
|
const args = parseAgentArgs(argv);
|
|
3369
|
+
const config = shouldLoadAgentConfig(args.mode) ? await loadPtywrightConfig({ configPath: args.configPath }) : void 0;
|
|
3370
|
+
const headless = resolveAgentHeadless(args, config);
|
|
3344
3371
|
if (args.mode === "init") {
|
|
3345
3372
|
const spec = createAgentTemplateSpec(args.flavor ?? "generic");
|
|
3346
3373
|
const path = args.path;
|
|
@@ -3356,7 +3383,8 @@ async function cmdAgent(argv) {
|
|
|
3356
3383
|
const result = await recordAgentSpecPath(args.path, {
|
|
3357
3384
|
outPath: args.outPath,
|
|
3358
3385
|
durationMs: args.durationMs,
|
|
3359
|
-
headless
|
|
3386
|
+
headless,
|
|
3387
|
+
config
|
|
3360
3388
|
});
|
|
3361
3389
|
logLines([
|
|
3362
3390
|
`${result.ok ? "ok" : "failed"} record=${result.outPath}`,
|
|
@@ -3420,13 +3448,17 @@ async function cmdAgent(argv) {
|
|
|
3420
3448
|
const argv = selected.command.argv;
|
|
3421
3449
|
validateAgentCommandArgv(argv, selected.name);
|
|
3422
3450
|
const [, , subcommand, ...rest] = argv;
|
|
3423
|
-
return cmdAgent([
|
|
3451
|
+
return cmdAgent([
|
|
3452
|
+
subcommand ?? "",
|
|
3453
|
+
...rest,
|
|
3454
|
+
...args.configPath ? ["--config", args.configPath] : []
|
|
3455
|
+
]);
|
|
3424
3456
|
}
|
|
3425
3457
|
if (args.mode === "check") {
|
|
3426
3458
|
const result = await checkAgentRegression({
|
|
3427
|
-
cassetteDir: args.path ?? args.cassetteDir,
|
|
3428
|
-
artifactsRoot: args.artifactsRoot,
|
|
3429
|
-
headless
|
|
3459
|
+
cassetteDir: args.path ?? args.cassetteDir ?? resolveAgentConfigPath(config, config?.agent?.cassetteDir),
|
|
3460
|
+
artifactsRoot: args.artifactsRoot ?? resolveAgentConfigPath(config, config?.agent?.artifactsRoot),
|
|
3461
|
+
headless,
|
|
3430
3462
|
updateSnapshots: args.updateSnapshots
|
|
3431
3463
|
});
|
|
3432
3464
|
if (args.json) logLines([JSON.stringify(formatAgentCheckJson(result), null, 2)], false);
|
|
@@ -3436,10 +3468,11 @@ async function cmdAgent(argv) {
|
|
|
3436
3468
|
if (args.mode === "promote") {
|
|
3437
3469
|
const result = await promoteAgentCassette({
|
|
3438
3470
|
sourcePath: args.path,
|
|
3439
|
-
cassetteDir: args.cassetteDir,
|
|
3471
|
+
cassetteDir: args.cassetteDir ?? resolveAgentConfigPath(config, config?.agent?.cassetteDir),
|
|
3440
3472
|
snapshotDir: args.snapshotDir,
|
|
3441
|
-
|
|
3442
|
-
|
|
3473
|
+
snapshotRoot: args.snapshotDir ? void 0 : resolveAgentConfigPath(config, config?.agent?.snapshotDir),
|
|
3474
|
+
artifactsRoot: args.artifactsRoot ?? resolveAgentConfigPath(config, config?.agent?.artifactsRoot),
|
|
3475
|
+
headless,
|
|
3443
3476
|
updateSnapshots: args.updateSnapshots
|
|
3444
3477
|
});
|
|
3445
3478
|
if (args.json) logLines([JSON.stringify(formatAgentPromoteSummary(result), null, 2)], false);
|
|
@@ -3449,8 +3482,8 @@ async function cmdAgent(argv) {
|
|
|
3449
3482
|
if (args.mode === "rerun") {
|
|
3450
3483
|
const rerun = await rerunAgentSummary({
|
|
3451
3484
|
path: args.path,
|
|
3452
|
-
artifactsRoot: args.artifactsRoot,
|
|
3453
|
-
headless
|
|
3485
|
+
artifactsRoot: args.artifactsRoot ?? resolveAgentConfigPath(config, config?.agent?.artifactsRoot),
|
|
3486
|
+
headless,
|
|
3454
3487
|
updateSnapshots: args.updateSnapshots
|
|
3455
3488
|
});
|
|
3456
3489
|
if (rerun.kind === "check-summary") {
|
|
@@ -3480,9 +3513,9 @@ async function cmdAgent(argv) {
|
|
|
3480
3513
|
}
|
|
3481
3514
|
if (args.mode === "replay-all") {
|
|
3482
3515
|
const result = await replayAllAgentRecords({
|
|
3483
|
-
dir: args.path,
|
|
3484
|
-
artifactsRoot: args.artifactsRoot,
|
|
3485
|
-
headless
|
|
3516
|
+
dir: args.path ?? resolveAgentConfigPath(config, config?.agent?.cassetteDir),
|
|
3517
|
+
artifactsRoot: args.artifactsRoot ?? resolveAgentConfigPath(config, config?.agent?.artifactsRoot),
|
|
3518
|
+
headless,
|
|
3486
3519
|
updateSnapshots: args.updateSnapshots
|
|
3487
3520
|
});
|
|
3488
3521
|
const failures = result.entries.filter((entry) => !entry.result.ok);
|
|
@@ -3507,9 +3540,10 @@ async function cmdAgent(argv) {
|
|
|
3507
3540
|
return 1;
|
|
3508
3541
|
}
|
|
3509
3542
|
const options = {
|
|
3510
|
-
artifactsDir: args.artifactsDir,
|
|
3543
|
+
artifactsDir: resolveCliPath(args.artifactsDir),
|
|
3511
3544
|
updateSnapshots: args.updateSnapshots,
|
|
3512
|
-
headless
|
|
3545
|
+
headless,
|
|
3546
|
+
config
|
|
3513
3547
|
};
|
|
3514
3548
|
const result = args.mode === "run" ? await runAgentSpecPath(args.path, options) : await replayAgentRecordPath(args.path, options);
|
|
3515
3549
|
if (args.json) {
|
package/dist/cli.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { t as main } from "./cli-
|
|
1
|
+
import { t as main } from "./cli-C40H_ElC.mjs";
|
|
2
2
|
export { main };
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { dirname, isAbsolute, resolve } from "node:path";
|
|
2
|
+
import { pathToFileURL } from "node:url";
|
|
3
|
+
import { existsSync } from "node:fs";
|
|
4
|
+
//#region src/config.ts
|
|
5
|
+
const CONFIG_FILE_NAMES = [
|
|
6
|
+
"ptywright.config.ts",
|
|
7
|
+
"ptywright.config.mts",
|
|
8
|
+
"ptywright.config.cts",
|
|
9
|
+
"ptywright.config.js",
|
|
10
|
+
"ptywright.config.mjs",
|
|
11
|
+
"ptywright.config.cjs"
|
|
12
|
+
];
|
|
13
|
+
function defineConfig(config) {
|
|
14
|
+
return config;
|
|
15
|
+
}
|
|
16
|
+
async function loadPtywrightConfig(options = {}) {
|
|
17
|
+
const cwd = resolve(options.cwd ?? process.cwd());
|
|
18
|
+
const configPath = resolveConfigPath({
|
|
19
|
+
cwd,
|
|
20
|
+
configPath: options.configPath
|
|
21
|
+
});
|
|
22
|
+
if (!configPath) return { rootDir: cwd };
|
|
23
|
+
const mod = await import(`${pathToFileURL(configPath).href}?t=${Date.now()}`);
|
|
24
|
+
return {
|
|
25
|
+
...normalizePtywrightConfig(mod.default ?? mod.config, configPath),
|
|
26
|
+
configPath,
|
|
27
|
+
rootDir: dirname(configPath)
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
function resolveConfigPath(options) {
|
|
31
|
+
if (options.configPath) {
|
|
32
|
+
const explicitPath = isAbsolute(options.configPath) ? options.configPath : resolve(options.cwd, options.configPath);
|
|
33
|
+
if (!existsSync(explicitPath)) throw new Error(`ptywright config not found: ${options.configPath}`);
|
|
34
|
+
return explicitPath;
|
|
35
|
+
}
|
|
36
|
+
let current = options.cwd;
|
|
37
|
+
while (true) {
|
|
38
|
+
for (const fileName of CONFIG_FILE_NAMES) {
|
|
39
|
+
const candidate = resolve(current, fileName);
|
|
40
|
+
if (existsSync(candidate)) return candidate;
|
|
41
|
+
}
|
|
42
|
+
const parent = dirname(current);
|
|
43
|
+
if (parent === current) return;
|
|
44
|
+
current = parent;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
function normalizePtywrightConfig(input, configPath) {
|
|
48
|
+
if (!input || typeof input !== "object" || Array.isArray(input)) throw new Error(`invalid ptywright config: expected object in ${configPath}`);
|
|
49
|
+
return input;
|
|
50
|
+
}
|
|
51
|
+
//#endregion
|
|
52
|
+
export { loadPtywrightConfig as n, defineConfig as t };
|
package/dist/config.mjs
ADDED
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { t as createPtywrightServer } from "./server-
|
|
1
|
+
import { t as createPtywrightServer } from "./server-h--2U0Ic.mjs";
|
|
2
2
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
3
3
|
//#region src/index.ts
|
|
4
4
|
const { server, sessions } = createPtywrightServer();
|
package/dist/mcp.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { t as createPtywrightServer } from "./server-
|
|
1
|
+
import { t as createPtywrightServer } from "./server-h--2U0Ic.mjs";
|
|
2
2
|
export { createPtywrightServer };
|
package/dist/pty-cassette.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { S as dataToBytes, _ as ptyCassetteSchema, a as inspectPtyCassette, b as byteLength, c as createPtyCassetteReplay, d as PTY_CASSETTE_SCHEMA_URL, f as normalizePtyCassette, g as ptyCassetteResizeEventSchema, h as ptyCassetteExitEventSchema, i as formatPtyCassetteInspectLines, l as readPtyCassettePath, m as ptyCassetteEventSchema, n as PtyCassetteRecorder, o as inspectPtyCassettePath, p as ptyCassetteDataEventSchema, r as createPtyCassetteRecorder, s as PtyCassetteReplay, t as wrapPtyLike, u as writePtyCassettePath, v as validatePtyCassette, x as dataToBase64, y as base64ToBytes } from "./pty_like-
|
|
1
|
+
import { S as dataToBytes, _ as ptyCassetteSchema, a as inspectPtyCassette, b as byteLength, c as createPtyCassetteReplay, d as PTY_CASSETTE_SCHEMA_URL, f as normalizePtyCassette, g as ptyCassetteResizeEventSchema, h as ptyCassetteExitEventSchema, i as formatPtyCassetteInspectLines, l as readPtyCassettePath, m as ptyCassetteEventSchema, n as PtyCassetteRecorder, o as inspectPtyCassettePath, p as ptyCassetteDataEventSchema, r as createPtyCassetteRecorder, s as PtyCassetteReplay, t as wrapPtyLike, u as writePtyCassettePath, v as validatePtyCassette, x as dataToBase64, y as base64ToBytes } from "./pty_like-DqCo7XdB.mjs";
|
|
2
2
|
//#region src/pty-cassette/bun_terminal.ts
|
|
3
3
|
function wrapBunTerminalOptions(options, recorder) {
|
|
4
4
|
const onData = options.data;
|
|
@@ -624,6 +624,43 @@ function escapeHtml$1(input) {
|
|
|
624
624
|
return input.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
625
625
|
}
|
|
626
626
|
//#endregion
|
|
627
|
+
//#region src/agent/config_defaults.ts
|
|
628
|
+
function normalizeAgentFlowSpecWithConfig(input, config) {
|
|
629
|
+
return normalizeAgentFlowSpec(applyAgentConfigDefaults(agentFlowSpecSchema.parse(input), config));
|
|
630
|
+
}
|
|
631
|
+
function applyAgentConfigDefaults(input, config) {
|
|
632
|
+
const agent = config?.agent;
|
|
633
|
+
if (!agent) return input;
|
|
634
|
+
const name = sanitizeArtifactName(input.name ?? "agent-flow");
|
|
635
|
+
const configDefaults = agent.defaults ?? {};
|
|
636
|
+
const specDefaults = input.defaults ?? {};
|
|
637
|
+
const viewports = input.viewports ? void 0 : cloneViewports(configDefaults.viewports);
|
|
638
|
+
return {
|
|
639
|
+
...input,
|
|
640
|
+
artifactsDir: input.artifactsDir ?? resolveNamedDir(agent.artifactsRoot, name, config.rootDir),
|
|
641
|
+
snapshotDir: input.snapshotDir ?? resolveNamedDir(agent.snapshotDir, name, config.rootDir),
|
|
642
|
+
viewports: viewports ?? input.viewports,
|
|
643
|
+
defaults: {
|
|
644
|
+
...specDefaults,
|
|
645
|
+
timeoutMs: specDefaults.timeoutMs ?? configDefaults.timeoutMs,
|
|
646
|
+
screenshot: specDefaults.screenshot ?? configDefaults.screenshot,
|
|
647
|
+
mask: mergeMaskRules(configDefaults.mask, specDefaults.mask)
|
|
648
|
+
}
|
|
649
|
+
};
|
|
650
|
+
}
|
|
651
|
+
function resolveNamedDir(root, name, configRoot) {
|
|
652
|
+
if (!root) return void 0;
|
|
653
|
+
const namedDir = join(root, name);
|
|
654
|
+
return isAbsolute(namedDir) ? namedDir : resolve(configRoot, namedDir);
|
|
655
|
+
}
|
|
656
|
+
function cloneViewports(viewports) {
|
|
657
|
+
return Array.isArray(viewports) && viewports.length > 0 ? viewports.map((viewport) => ({ ...viewport })) : void 0;
|
|
658
|
+
}
|
|
659
|
+
function mergeMaskRules(configMask, specMask) {
|
|
660
|
+
const merged = [...configMask ?? [], ...specMask ?? []];
|
|
661
|
+
return merged.length > 0 ? merged : void 0;
|
|
662
|
+
}
|
|
663
|
+
//#endregion
|
|
627
664
|
//#region src/agent/command_launch.ts
|
|
628
665
|
const DEFAULT_URL_REGEX = /https?:\/\/[^\s"'<>]+/;
|
|
629
666
|
function buildCommandLaunchCommand(launch, options = {}) {
|
|
@@ -1285,20 +1322,26 @@ function escapeAttribute(input) {
|
|
|
1285
1322
|
//#region src/agent/spec_loader.ts
|
|
1286
1323
|
async function loadAgentSpec(specPath) {
|
|
1287
1324
|
const resolved = resolve(process.cwd(), specPath);
|
|
1288
|
-
if (resolved.endsWith(".json"))
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1325
|
+
if (resolved.endsWith(".json")) {
|
|
1326
|
+
const raw = JSON.parse(readFileSync(resolved, "utf8"));
|
|
1327
|
+
return {
|
|
1328
|
+
spec: normalizeAgentFlowSpec(raw),
|
|
1329
|
+
raw,
|
|
1330
|
+
path: resolved
|
|
1331
|
+
};
|
|
1332
|
+
}
|
|
1292
1333
|
const mod = await import(`${pathToFileURL(resolved).href}?t=${Date.now()}`);
|
|
1334
|
+
const raw = mod.default ?? mod.spec;
|
|
1293
1335
|
return {
|
|
1294
|
-
spec: normalizeAgentFlowSpec(
|
|
1336
|
+
spec: normalizeAgentFlowSpec(raw),
|
|
1337
|
+
raw,
|
|
1295
1338
|
path: resolved
|
|
1296
1339
|
};
|
|
1297
1340
|
}
|
|
1298
1341
|
//#endregion
|
|
1299
1342
|
//#region src/agent/runner.ts
|
|
1300
1343
|
async function runAgentSpecPath(specPath, options = {}) {
|
|
1301
|
-
return runAgentSpec((await loadAgentSpec(specPath)).
|
|
1344
|
+
return runAgentSpec((await loadAgentSpec(specPath)).raw, options);
|
|
1302
1345
|
}
|
|
1303
1346
|
async function replayAgentRecordPath(recordPath, options = {}) {
|
|
1304
1347
|
const raw = JSON.parse(readFileSync(recordPath, "utf8"));
|
|
@@ -1311,14 +1354,20 @@ async function replayAgentRecordPath(recordPath, options = {}) {
|
|
|
1311
1354
|
artifactsDir: options.artifactsDir ?? join(dirname(recordPath), "replay")
|
|
1312
1355
|
});
|
|
1313
1356
|
}
|
|
1314
|
-
if (record.spec) return runAgentSpec(record.spec,
|
|
1357
|
+
if (record.spec) return runAgentSpec(record.spec, {
|
|
1358
|
+
...options,
|
|
1359
|
+
config: void 0
|
|
1360
|
+
});
|
|
1315
1361
|
if (!record.flowPath) throw new Error(`invalid agent run record: missing replay source in ${recordPath}`);
|
|
1316
|
-
return runAgentSpecPath(isAbsolute(record.flowPath) ? record.flowPath : resolve(dirname(recordPath), record.flowPath),
|
|
1362
|
+
return runAgentSpecPath(isAbsolute(record.flowPath) ? record.flowPath : resolve(dirname(recordPath), record.flowPath), {
|
|
1363
|
+
...options,
|
|
1364
|
+
config: void 0
|
|
1365
|
+
});
|
|
1317
1366
|
}
|
|
1318
1367
|
async function runAgentSpec(input, options = {}) {
|
|
1319
1368
|
const startedAt = Date.now();
|
|
1320
1369
|
const rootDir = options.rootDir ? resolve(process.cwd(), options.rootDir) : process.cwd();
|
|
1321
|
-
const spec =
|
|
1370
|
+
const spec = normalizeAgentFlowSpecWithConfig(input, options.replayCassette ? void 0 : options.config);
|
|
1322
1371
|
const name = sanitizeArtifactName(spec.name ?? "agent-flow");
|
|
1323
1372
|
const artifactsDir = resolve(rootDir, options.artifactsDir ?? spec.artifactsDir ?? join(".tmp", "agent", name));
|
|
1324
1373
|
const snapshotDir = resolve(rootDir, spec.snapshotDir ?? join("snapshots", name));
|
|
@@ -1871,4 +1920,4 @@ function defaultSpecNameForPath(path) {
|
|
|
1871
1920
|
return sanitizeArtifactName(basename(path, extname(path)));
|
|
1872
1921
|
}
|
|
1873
1922
|
//#endregion
|
|
1874
|
-
export {
|
|
1923
|
+
export { agentManifestPath as C, writeAgentManifestPath as D, validateAgentManifestFiles as E, AGENT_MANIFEST_FILE_NAME as S, readAgentManifestPath as T, isAgentCassetteLike as _, runAgentSpecPath as a, sanitizeArtifactName as b, agentRunModeSchema as c, readAgentRunRecordPath as d, writeAgentRunRecordPath as f, normalizeAgentFlowSpecWithConfig as g, resolveAgentLaunchTarget as h, runAgentSpec as i, formatAgentArgv as l, createAgentTemplateSpec as m, printAgentLaunchPlan as n, loadAgentSpec as o, formatArgv as p, replayAgentRecordPath as r, AGENT_RUN_RECORD_SCHEMA_URL as s, defaultSpecNameForPath as t, isAgentRunRecordLike as u, readAgentCassettePath as v, isAgentManifestLike as w, launchAgentBrowser as x, normalizeAgentFlowSpec as y };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ptywright",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "Terminal/TUI automation driver over PTY + xterm, exposed as MCP tools",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"agent",
|
|
@@ -37,6 +37,7 @@
|
|
|
37
37
|
"exports": {
|
|
38
38
|
".": "./dist/cli.mjs",
|
|
39
39
|
"./agent": "./dist/agent.mjs",
|
|
40
|
+
"./config": "./dist/config.mjs",
|
|
40
41
|
"./mcp": "./dist/mcp.mjs",
|
|
41
42
|
"./pty-cassette": "./dist/pty-cassette.mjs",
|
|
42
43
|
"./session": "./dist/session.mjs",
|
|
@@ -1,122 +1,156 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: ptywright-testing
|
|
3
|
-
description:
|
|
3
|
+
description: Build, run, record, replay, debug, and maintain deterministic terminal, TUI, PTY cassette, and browser-terminal agent regression tests with ptywright. Use when an agent needs to drive CLI/TUI apps, create ptywright scripts, configure ptywright.config.*, record or replay PTY output, solidify browser terminal agent flows into non-AI snapshot tests, inspect generated artifacts, or diagnose ptywright CI failures.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# Ptywright Testing
|
|
7
7
|
|
|
8
|
-
Use ptywright
|
|
8
|
+
Use ptywright when the task involves terminal or browser-terminal behavior that should be repeatable without manual inspection. Prefer stable text, DOM, and terminal snapshots over screenshots unless the user explicitly needs visual media.
|
|
9
9
|
|
|
10
|
-
##
|
|
10
|
+
## First Decision
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
12
|
+
Choose one workflow before editing:
|
|
13
|
+
|
|
14
|
+
- **Browser terminal agent regression**: Use when a web app renders a terminal and exposes `[data-terminal-root]`, or when testing integrations such as Codex/Claude/Droid wrappers. Read `references/agent-regression.md`.
|
|
15
|
+
- **Raw PTY recording and replay**: Use when the user wants to capture terminal bytes from `node-pty`, Bun Terminal, `bun-pty`, or an arbitrary command, then replay them into another renderer. Read `references/raw-pty-cassettes.md`.
|
|
16
|
+
- **Scripted TUI tests**: Use when testing a CLI/TUI directly through ptywright scripts, golden snapshots, and HTML reports. Read `references/script-runner.md`.
|
|
17
|
+
- **MCP interactive driving or recording**: Use when an agent should interact through ptywright MCP tools or record an MCP-driven session into a script. Read `references/mcp-tools.md`.
|
|
18
|
+
- **CI/debugging/artifact triage**: Use when a ptywright run failed, snapshots mismatch, a manifest is stale, or reusable commands need to be executed. Read `references/ci-and-debugging.md`.
|
|
19
|
+
|
|
20
|
+
If more than one workflow applies, start with the highest-level workflow that preserves determinism. For example, for an evolving browser terminal renderer, record a raw PTY cassette first, then create a browser agent regression that replays the cassette into the renderer.
|
|
15
21
|
|
|
16
|
-
|
|
17
|
-
bun add -g ptywright
|
|
18
|
-
ptywright <command>
|
|
22
|
+
## Installation And Entry Points
|
|
19
23
|
|
|
20
|
-
|
|
24
|
+
Prefer the local project command when working inside a ptywright checkout:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
21
27
|
bun run bin/ptywright <command>
|
|
22
28
|
```
|
|
23
29
|
|
|
24
|
-
|
|
30
|
+
Prefer published package commands in downstream projects:
|
|
25
31
|
|
|
26
|
-
|
|
27
|
-
|
|
32
|
+
```bash
|
|
33
|
+
bunx ptywright@latest <command>
|
|
34
|
+
# or
|
|
35
|
+
npx ptywright@latest <command>
|
|
36
|
+
```
|
|
28
37
|
|
|
29
|
-
|
|
38
|
+
Common commands:
|
|
30
39
|
|
|
31
40
|
```bash
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
41
|
+
ptywright mcp
|
|
42
|
+
ptywright mcp --caps core
|
|
43
|
+
ptywright run <file.json|file.ts>
|
|
44
|
+
ptywright run-all --dir scripts
|
|
45
|
+
ptywright agent run <flow.json> --update-snapshots
|
|
46
|
+
ptywright agent check
|
|
47
|
+
ptywright pty record --out tests/cassettes/session.pty.json -- <command> [args...]
|
|
48
|
+
```
|
|
37
49
|
|
|
38
|
-
|
|
39
|
-
|
|
50
|
+
## Project Config
|
|
51
|
+
|
|
52
|
+
Use `ptywright.config.ts` for project defaults, not as a second test DSL. The flow file remains the test case.
|
|
53
|
+
|
|
54
|
+
```ts
|
|
55
|
+
import { defineConfig } from "ptywright/config";
|
|
56
|
+
|
|
57
|
+
export default defineConfig({
|
|
58
|
+
agent: {
|
|
59
|
+
artifactsRoot: ".tmp/agent",
|
|
60
|
+
cassetteDir: "tests/agent-cassettes",
|
|
61
|
+
snapshotDir: "tests/agent-snapshots",
|
|
62
|
+
defaults: {
|
|
63
|
+
headless: true,
|
|
64
|
+
timeoutMs: 45_000,
|
|
65
|
+
screenshot: false,
|
|
66
|
+
viewports: [{ name: "desktop", width: 1280, height: 820 }],
|
|
67
|
+
mask: [{ regex: "session_[a-z0-9]+", replacement: "<session>" }],
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
});
|
|
40
71
|
```
|
|
41
72
|
|
|
42
|
-
|
|
73
|
+
Priority rule: explicit CLI args override flow fields, and flow fields override config defaults. Config-relative paths resolve from the config file directory.
|
|
74
|
+
|
|
75
|
+
## Core Invariants
|
|
43
76
|
|
|
44
|
-
|
|
77
|
+
- Keep tests deterministic: fixed terminal size, explicit waits, stable snapshots, masks for random text.
|
|
78
|
+
- Prefer structured APIs and generated reusable commands over shell string reconstruction.
|
|
79
|
+
- Treat `--update-snapshots` as the only intentional baseline update path.
|
|
80
|
+
- Use generated manifests and summaries as durable reproduction bundles.
|
|
81
|
+
- Do not hand-edit cassette, run-record, summary, or manifest command metadata unless a test explicitly asks for malformed fixture data.
|
|
82
|
+
- Avoid app-specific assumptions. ptywright should integrate with any renderer through commands, URLs, DOM roots, and cassette data.
|
|
45
83
|
|
|
46
|
-
|
|
84
|
+
## Minimal Examples
|
|
85
|
+
|
|
86
|
+
Browser agent flow:
|
|
47
87
|
|
|
48
88
|
```json
|
|
49
89
|
{
|
|
50
|
-
"
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
90
|
+
"name": "browser_terminal_smoke",
|
|
91
|
+
"launch": {
|
|
92
|
+
"mode": "command",
|
|
93
|
+
"agentFlavor": "generic",
|
|
94
|
+
"command": "node",
|
|
95
|
+
"args": ["scripts/start-browser-terminal.js", "--print-url"],
|
|
96
|
+
"waitForUrlMs": 15000
|
|
97
|
+
},
|
|
98
|
+
"steps": [
|
|
99
|
+
{ "type": "waitForStableDom" },
|
|
100
|
+
{ "type": "snapshot", "name": "ready", "targets": ["terminal", "dom"] }
|
|
101
|
+
]
|
|
56
102
|
}
|
|
57
103
|
```
|
|
58
104
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
### Run the whole suite (preferred)
|
|
105
|
+
Raw PTY cassette:
|
|
62
106
|
|
|
63
107
|
```bash
|
|
64
|
-
|
|
108
|
+
ptywright pty record --out tests/cassettes/codex.pty.json -- codex --yolo
|
|
109
|
+
ptywright pty replay tests/cassettes/codex.pty.json --speed 0
|
|
110
|
+
ptywright pty validate tests/cassettes/codex.pty.json
|
|
65
111
|
```
|
|
66
112
|
|
|
67
|
-
|
|
68
|
-
- `reportPath` (open in a browser)
|
|
69
|
-
- `summaryPath` (`run.summary.json` for agents/CI)
|
|
70
|
-
|
|
71
|
-
MCP equivalent:
|
|
72
|
-
- `run_all_scripts` (defaults: `dir="scripts"`, suite report in `.tmp/run-all/`)
|
|
73
|
-
- Keep MCP output small: `run_all_scripts(includeEntries="failures", maxEntries=20)`
|
|
113
|
+
Script runner:
|
|
74
114
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
115
|
+
```json
|
|
116
|
+
{
|
|
117
|
+
"name": "tui_smoke",
|
|
118
|
+
"command": ["bun", "tests/fixtures/tui_demo.ts"],
|
|
119
|
+
"cols": 80,
|
|
120
|
+
"rows": 24,
|
|
121
|
+
"steps": [
|
|
122
|
+
{ "type": "waitForText", "text": "Ready" },
|
|
123
|
+
{ "type": "snapshot", "kind": "text", "saveAs": "ready" }
|
|
124
|
+
]
|
|
125
|
+
}
|
|
79
126
|
```
|
|
80
127
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
## Debug a failure
|
|
84
|
-
|
|
85
|
-
Script runner artifacts to check (paths are returned by CLI/MCP):
|
|
86
|
-
|
|
87
|
-
- `*.report.html` (timeline + snapshots)
|
|
88
|
-
- `*.cast` (full playback)
|
|
89
|
-
- `failure.last.view.txt` / `failure.last.txt` (last screen)
|
|
90
|
-
- `failure.error.txt` (stack trace)
|
|
128
|
+
## Verification Commands
|
|
91
129
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
## Record an interactive flow (MCP)
|
|
95
|
-
|
|
96
|
-
1) `start_script_recording(name=...)`
|
|
97
|
-
2) Drive the app with normal tools:
|
|
98
|
-
- `launch_session` → `send_text` / `press_key` / `wait_for_text` / `snapshot_*`
|
|
99
|
-
3) Add golden checkpoints: `mark(label=...)`
|
|
100
|
-
4) Export: `stop_script_recording(recordingId=..., writeFiles=true)`
|
|
101
|
-
|
|
102
|
-
## All-tools smoke (recommended)
|
|
103
|
-
|
|
104
|
-
To verify ptywright MCP tool coverage without relying on external apps/network, run:
|
|
130
|
+
Use the narrowest useful verification first, then broaden when editing shared behavior:
|
|
105
131
|
|
|
106
132
|
```bash
|
|
107
|
-
bun
|
|
133
|
+
bun run format:check
|
|
134
|
+
bun run lint
|
|
135
|
+
bun test tests/agent_config.test.ts
|
|
136
|
+
bun test tests/agent_rerun.test.ts
|
|
137
|
+
bun run build
|
|
138
|
+
bun run check
|
|
108
139
|
```
|
|
109
140
|
|
|
110
|
-
|
|
141
|
+
For downstream projects:
|
|
111
142
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
143
|
+
```bash
|
|
144
|
+
ptywright agent validate <artifact-or-dir>
|
|
145
|
+
ptywright agent inspect <artifact-or-dir>
|
|
146
|
+
ptywright agent commands <artifact-or-dir> --json
|
|
147
|
+
ptywright agent exec <artifact-or-dir> --command rerun
|
|
148
|
+
```
|
|
118
149
|
|
|
119
|
-
##
|
|
150
|
+
## Resource Map
|
|
120
151
|
|
|
121
|
-
- `
|
|
122
|
-
|
|
152
|
+
- `references/agent-regression.md`: Browser terminal agent flows, cassettes, snapshots, promote/check/rerun, and renderer integration.
|
|
153
|
+
- `references/raw-pty-cassettes.md`: Raw PTY cassette recording, replay, wrapper integration, and renderer handoff.
|
|
154
|
+
- `references/script-runner.md`: JSON/TS script runner, MCP script recording, goldens, masks, and reports.
|
|
155
|
+
- `references/mcp-tools.md`: MCP setup and tool selection.
|
|
156
|
+
- `references/ci-and-debugging.md`: Failure triage, manifests, reusable commands, snapshot updates, and CI gates.
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# Browser Agent Regression
|
|
2
|
+
|
|
3
|
+
Use this workflow when ptywright drives a browser-hosted terminal renderer. The renderer must expose a terminal root as `[data-terminal-root]`.
|
|
4
|
+
|
|
5
|
+
## Contract
|
|
6
|
+
|
|
7
|
+
`launch.mode=command` is the preferred integration:
|
|
8
|
+
|
|
9
|
+
- `command` and `args` start a wrapper or app process.
|
|
10
|
+
- The process prints a browser URL to stdout or stderr.
|
|
11
|
+
- ptywright opens that URL with Playwright.
|
|
12
|
+
- The page renders the terminal under `[data-terminal-root]`.
|
|
13
|
+
- Steps drive browser input and compare terminal/DOM snapshots.
|
|
14
|
+
|
|
15
|
+
Use `launch.mode=url` only when the page is already running.
|
|
16
|
+
|
|
17
|
+
## Flow Lifecycle
|
|
18
|
+
|
|
19
|
+
1. Create a flow JSON or TS file.
|
|
20
|
+
2. Run live once and write baselines:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
ptywright agent run tests/agents/name.flow.json --update-snapshots
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
3. Compare later without updating:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
ptywright agent run tests/agents/name.flow.json
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
4. Replay a run record or cassette without the live agent:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
ptywright agent replay .tmp/agent/name/name.agent-run.json
|
|
36
|
+
ptywright agent replay .tmp/agent/name/name.cassette.json
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
5. Promote a good live run into committed non-AI regression:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
ptywright agent promote .tmp/agent/name/name.cassette.json --update-snapshots
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
6. Run the committed suite:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
ptywright agent check
|
|
49
|
+
ptywright agent replay-all tests/agent-cassettes --update-snapshots
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Recommended Flow Shape
|
|
53
|
+
|
|
54
|
+
```json
|
|
55
|
+
{
|
|
56
|
+
"name": "agent_renderer_smoke",
|
|
57
|
+
"launch": {
|
|
58
|
+
"mode": "command",
|
|
59
|
+
"agentFlavor": "codex",
|
|
60
|
+
"command": "node",
|
|
61
|
+
"args": [
|
|
62
|
+
"tests/harness/browser-terminal.js",
|
|
63
|
+
"--",
|
|
64
|
+
"codex",
|
|
65
|
+
"--yolo",
|
|
66
|
+
"--print-url"
|
|
67
|
+
],
|
|
68
|
+
"waitForUrlMs": 20000,
|
|
69
|
+
"urlRegex": "(https?://\\S+)"
|
|
70
|
+
},
|
|
71
|
+
"defaults": {
|
|
72
|
+
"timeoutMs": 45000,
|
|
73
|
+
"screenshot": false,
|
|
74
|
+
"mask": [{ "regex": "req_[a-zA-Z0-9]+", "replacement": "<request-id>" }]
|
|
75
|
+
},
|
|
76
|
+
"viewports": [{ "name": "desktop", "width": 1280, "height": 820 }],
|
|
77
|
+
"steps": [
|
|
78
|
+
{ "type": "waitForStableDom", "quietMs": 600 },
|
|
79
|
+
{ "type": "snapshot", "name": "launch", "targets": ["terminal", "dom"] }
|
|
80
|
+
]
|
|
81
|
+
}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Keep the flow generic. ptywright should not import app internals. The downstream app should provide a command or test harness that prints a browser URL and can consume replay data if needed.
|
|
85
|
+
|
|
86
|
+
## Recording Browser Interactions
|
|
87
|
+
|
|
88
|
+
Use `agent record` when manually exploring a browser-terminal flow:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
ptywright agent record tests/agents/base.flow.json \
|
|
92
|
+
--out tests/agents/recorded.flow.json \
|
|
93
|
+
--duration-ms 60000 \
|
|
94
|
+
--headed
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
End recording by waiting for `duration-ms` to elapse or by stopping the process. The output is a normal flow JSON containing keyboard/click steps plus a final checkpoint.
|
|
98
|
+
|
|
99
|
+
## Non-AI Regression Strategy
|
|
100
|
+
|
|
101
|
+
For evolving agent UIs:
|
|
102
|
+
|
|
103
|
+
1. Capture or create a stable PTY or browser-agent cassette.
|
|
104
|
+
2. Replay that cassette into the renderer.
|
|
105
|
+
3. Snapshot terminal text and DOM.
|
|
106
|
+
4. Commit cassette and snapshots.
|
|
107
|
+
5. Use `agent check` in CI.
|
|
108
|
+
|
|
109
|
+
This lets renderer changes be verified without asking the live AI to reproduce the same answer.
|
|
110
|
+
|
|
111
|
+
## Artifact Meanings
|
|
112
|
+
|
|
113
|
+
- `.agent-run.json`: Per-run record with `commands.replay.argv` and `commands.updateSnapshots.argv`.
|
|
114
|
+
- `.cassette.json`: Normalized flow spec plus captured terminal/DOM frames and hashes.
|
|
115
|
+
- `agent-replay.summary.json`: Replay-all suite summary.
|
|
116
|
+
- `agent-check.summary.json`: Committed cassette check summary.
|
|
117
|
+
- `agent-promote.summary.json`: Promote operation summary.
|
|
118
|
+
- `ptywright-agent.manifest.json`: Hash-indexed portable artifact bundle.
|
|
119
|
+
- `index.html`: Human-readable report with snapshots and reusable commands.
|
|
120
|
+
|
|
121
|
+
## Common Commands
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
ptywright agent inspect .tmp/agent-check
|
|
125
|
+
ptywright agent validate .tmp/agent-check
|
|
126
|
+
ptywright agent commands .tmp/agent-check --json
|
|
127
|
+
ptywright agent exec .tmp/agent-check --command rerun
|
|
128
|
+
ptywright agent exec .tmp/agent-check --command updateSnapshots
|
|
129
|
+
ptywright agent rerun .tmp/agent-check/agent-check.summary.json
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Prefer `agent exec` when an artifact already contains a reusable command. It avoids shell parsing and relocates copied manifest bundles safely.
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# CI And Debugging
|
|
2
|
+
|
|
3
|
+
Use this guide when a ptywright command fails, CI times out, snapshots mismatch, or generated artifact commands need to be reused.
|
|
4
|
+
|
|
5
|
+
## First Triage
|
|
6
|
+
|
|
7
|
+
1. Read the failing command and exact artifact paths from the log.
|
|
8
|
+
2. Open the HTML report if available.
|
|
9
|
+
3. Inspect the generated summary JSON.
|
|
10
|
+
4. Run validation on the artifact or directory.
|
|
11
|
+
5. Use generated commands instead of reconstructing shell strings manually.
|
|
12
|
+
|
|
13
|
+
Commands:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
ptywright agent inspect <artifact-or-dir>
|
|
17
|
+
ptywright agent validate <artifact-or-dir>
|
|
18
|
+
ptywright agent commands <artifact-or-dir> --json
|
|
19
|
+
ptywright agent commands <artifact-or-dir> --command rerun
|
|
20
|
+
ptywright agent exec <artifact-or-dir> --command rerun
|
|
21
|
+
ptywright agent exec <artifact-or-dir> --command updateSnapshots
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Snapshot Mismatches
|
|
25
|
+
|
|
26
|
+
Default replay/check mode compares snapshots. Only update baselines intentionally:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
ptywright agent replay-all tests/agent-cassettes --update-snapshots
|
|
30
|
+
ptywright agent exec <artifact-or-dir> --command updateSnapshots
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
For script runner:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
ptywright run-all --dir scripts --update-goldens
|
|
37
|
+
ptywright script exec <summary-or-dir> --command updateGoldens
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Always inspect diffs before committing updated baselines.
|
|
41
|
+
|
|
42
|
+
## Portable Bundles
|
|
43
|
+
|
|
44
|
+
Agent run/check/promote/replay-all outputs include `ptywright-agent.manifest.json`. A manifest bundle can be copied and still supports:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
ptywright agent inspect <copied-dir>
|
|
48
|
+
ptywright agent commands <copied-dir> --json
|
|
49
|
+
ptywright agent exec <copied-dir> --command rerun
|
|
50
|
+
ptywright agent validate <copied-dir>
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
If a directory has artifacts but no top-level manifest, use `agent validate <dir>` for recursive validation. `agent commands` and `agent exec` expect a manifest-backed command bundle for directory arguments.
|
|
54
|
+
|
|
55
|
+
## Common Failure Causes
|
|
56
|
+
|
|
57
|
+
- Missing `[data-terminal-root]` in browser terminal pages.
|
|
58
|
+
- Flow waits on unstable AI prose instead of stable markers.
|
|
59
|
+
- Snapshot baseline was not updated after an intentional UI change.
|
|
60
|
+
- Random text was not masked.
|
|
61
|
+
- Relative cassette or snapshot paths were moved without a manifest bundle.
|
|
62
|
+
- Stored command metadata in summaries was hand-edited and no longer matches schema expectations.
|
|
63
|
+
- CI is too slow for tests that run multiple full browser replays in one case.
|
|
64
|
+
|
|
65
|
+
## Timeout Reduction
|
|
66
|
+
|
|
67
|
+
When a test times out:
|
|
68
|
+
|
|
69
|
+
- Avoid running setup and rerun paths that both do full browser replay in the same test.
|
|
70
|
+
- Use summary fixtures to test command metadata or override behavior.
|
|
71
|
+
- Keep one full end-to-end test per workflow and make surrounding tests narrower.
|
|
72
|
+
- Use committed deterministic cassettes instead of live agents.
|
|
73
|
+
- Keep test timeouts realistic but do not hide structural slowness by only increasing timeouts.
|
|
74
|
+
|
|
75
|
+
## Repository Gates
|
|
76
|
+
|
|
77
|
+
For ptywright itself:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
bun run format:check
|
|
81
|
+
bun run lint
|
|
82
|
+
bun test tests/agent_rerun.test.ts
|
|
83
|
+
bun test tests/agent_promote.test.ts tests/agent_commands.test.ts
|
|
84
|
+
bun run build
|
|
85
|
+
bun run check
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
For downstream projects:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
ptywright agent check
|
|
92
|
+
ptywright agent validate .tmp/agent-check
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Use the narrowest failing test while iterating, then broaden before finalizing shared behavior.
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# MCP Tools
|
|
2
|
+
|
|
3
|
+
Use MCP when an agent should interact with a live terminal session, inspect terminal state, or record an exploratory flow into a script.
|
|
4
|
+
|
|
5
|
+
## Start Server
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
ptywright mcp
|
|
9
|
+
ptywright mcp --caps core
|
|
10
|
+
ptywright mcp --caps core,script,recording
|
|
11
|
+
ptywright mcp-http --port 3000
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Capabilities:
|
|
15
|
+
|
|
16
|
+
- `core`: Launch sessions, send input, wait, snapshot.
|
|
17
|
+
- `debug`: Extra inspection and traces.
|
|
18
|
+
- `script`: Run script files and suites.
|
|
19
|
+
- `recording`: Record MCP tool calls into scripts.
|
|
20
|
+
- `all`: Everything.
|
|
21
|
+
|
|
22
|
+
Use smaller capability sets to reduce agent context pressure.
|
|
23
|
+
|
|
24
|
+
## Client Config
|
|
25
|
+
|
|
26
|
+
Example for clients that use a JSON MCP server config:
|
|
27
|
+
|
|
28
|
+
```json
|
|
29
|
+
{
|
|
30
|
+
"mcpServers": {
|
|
31
|
+
"ptywright": {
|
|
32
|
+
"command": "bunx",
|
|
33
|
+
"args": ["ptywright@latest", "mcp", "--caps", "core,script,recording"]
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Inside this repository, use:
|
|
40
|
+
|
|
41
|
+
```json
|
|
42
|
+
{
|
|
43
|
+
"mcpServers": {
|
|
44
|
+
"ptywright": {
|
|
45
|
+
"command": "bun",
|
|
46
|
+
"args": ["run", "src/cli.ts", "mcp"]
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Tool Selection
|
|
53
|
+
|
|
54
|
+
Typical interactive sequence:
|
|
55
|
+
|
|
56
|
+
1. `launch_session` with fixed `cols`, `rows`, and `env.TERM`.
|
|
57
|
+
2. `wait_for_text` for stable startup markers.
|
|
58
|
+
3. `send_text`, `press_key`, or mouse tools.
|
|
59
|
+
4. `wait_for_stable_screen` before snapshots.
|
|
60
|
+
5. `snapshot_text`, `snapshot_view`, or `snapshot_grid`.
|
|
61
|
+
6. `close_session` when done.
|
|
62
|
+
|
|
63
|
+
Prefer semantic terminal snapshots over screenshots. Use screenshots only if the task explicitly needs visual proof.
|
|
64
|
+
|
|
65
|
+
## Recording
|
|
66
|
+
|
|
67
|
+
Use recording when an exploratory interaction should become a repeatable test:
|
|
68
|
+
|
|
69
|
+
```text
|
|
70
|
+
start_script_recording
|
|
71
|
+
launch_session
|
|
72
|
+
send_text / press_key / wait_for_text / snapshot_text
|
|
73
|
+
mark
|
|
74
|
+
stop_script_recording(writeFiles=true)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
After export, run the generated script from the CLI to ensure it is deterministic:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
ptywright run <exported-script.json>
|
|
81
|
+
ptywright run <exported-script.json> --update-goldens
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Context Control
|
|
85
|
+
|
|
86
|
+
When using MCP from an LLM agent:
|
|
87
|
+
|
|
88
|
+
- Avoid returning huge terminal text unless needed.
|
|
89
|
+
- Prefer `includeText=false` or failure-only entries for suite tools when available.
|
|
90
|
+
- Use report and summary paths for detailed inspection.
|
|
91
|
+
- Use masks early if non-deterministic output appears.
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Raw PTY Cassettes
|
|
2
|
+
|
|
3
|
+
Use raw PTY cassettes when the goal is to capture terminal output once and replay it later without relaunching the original CLI, AI agent, or TUI.
|
|
4
|
+
|
|
5
|
+
## CLI Recording
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
ptywright pty record --out tests/cassettes/session.pty.json -- <command> [args...]
|
|
9
|
+
ptywright pty validate tests/cassettes/session.pty.json
|
|
10
|
+
ptywright pty inspect tests/cassettes/session.pty.json
|
|
11
|
+
ptywright pty replay tests/cassettes/session.pty.json --speed 0
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Examples:
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
ptywright pty record --out tests/cassettes/codex-yolo.pty.json -- codex --yolo
|
|
18
|
+
ptywright pty record --out tests/cassettes/browser-terminal-codex.pty.json -- \
|
|
19
|
+
node tests/harness/browser-terminal.js -- codex --yolo
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Use `--cols`, `--rows`, `--term`, and `--backend` to stabilize output:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
ptywright pty record \
|
|
26
|
+
--out tests/cassettes/session.pty.json \
|
|
27
|
+
--cols 120 \
|
|
28
|
+
--rows 32 \
|
|
29
|
+
--term xterm-256color \
|
|
30
|
+
--backend auto \
|
|
31
|
+
-- <command>
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Programmatic Integration
|
|
35
|
+
|
|
36
|
+
Use `ptywright/pty-cassette` in projects that already control a PTY-like object.
|
|
37
|
+
|
|
38
|
+
```ts
|
|
39
|
+
import { wrapPtyLike } from "ptywright/pty-cassette";
|
|
40
|
+
|
|
41
|
+
const recorder = wrapPtyLike(ptyProcess, {
|
|
42
|
+
path: "tests/cassettes/session.pty.json",
|
|
43
|
+
command: ["codex", "--yolo"],
|
|
44
|
+
cols: 120,
|
|
45
|
+
rows: 32,
|
|
46
|
+
term: "xterm-256color",
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
// Use recorder.process like the original ptyProcess.
|
|
50
|
+
// Close/finalize according to the package API.
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Prefer wrapper integration when a downstream project wants to keep using native `node-pty`, Bun Terminal, or `bun-pty` while still producing ptywright-compatible data.
|
|
54
|
+
|
|
55
|
+
## Renderer Handoff Pattern
|
|
56
|
+
|
|
57
|
+
For browser terminal renderers:
|
|
58
|
+
|
|
59
|
+
1. Record raw PTY output as `*.pty.json`.
|
|
60
|
+
2. Add a small local harness in the renderer project that loads this cassette and renders it into the browser terminal.
|
|
61
|
+
3. Print the browser URL from that harness.
|
|
62
|
+
4. Use a ptywright agent flow to open the URL and snapshot `[data-terminal-root]`.
|
|
63
|
+
|
|
64
|
+
This separates byte-level reproduction from renderer-level DOM regression.
|
|
65
|
+
|
|
66
|
+
## Updating Scenarios Without Duplicating Huge Sessions
|
|
67
|
+
|
|
68
|
+
Avoid repeatedly recording long sessions just to test one rendering edge.
|
|
69
|
+
|
|
70
|
+
Recommended patterns:
|
|
71
|
+
|
|
72
|
+
- Keep small, named cassettes for specific UI states: `code-block.pty.json`, `spinner.pty.json`, `long-line.pty.json`.
|
|
73
|
+
- Prefer fixture commands that emit deterministic terminal sequences for a targeted state.
|
|
74
|
+
- Trim at the source by recording a shorter command or a purpose-built harness.
|
|
75
|
+
- Use masks to normalize timestamps, ids, spinner ticks, and model names.
|
|
76
|
+
- Store cassettes under `tests/cassettes/` and keep renderer snapshots under `tests/agent-snapshots/`.
|
|
77
|
+
|
|
78
|
+
If an existing long cassette is useful but contains irrelevant frames, create a derived fixture in the app's harness rather than hand-editing hashes unless the project has a supported cassette transform.
|
|
79
|
+
|
|
80
|
+
## When To Use Browser Agent Cassettes Instead
|
|
81
|
+
|
|
82
|
+
Use browser agent cassettes when you need DOM snapshots, viewport coverage, or Playwright interactions. Use raw PTY cassettes when you only need terminal bytes and want broad compatibility with any PTY provider.
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Script Runner
|
|
2
|
+
|
|
3
|
+
Use scripts for deterministic CLI/TUI tests that do not need a browser terminal renderer.
|
|
4
|
+
|
|
5
|
+
## JSON Script
|
|
6
|
+
|
|
7
|
+
```json
|
|
8
|
+
{
|
|
9
|
+
"$schema": "../schemas/ptywright-script.schema.json",
|
|
10
|
+
"name": "tui_smoke",
|
|
11
|
+
"command": ["bun", "tests/fixtures/tui_demo.ts"],
|
|
12
|
+
"cols": 80,
|
|
13
|
+
"rows": 24,
|
|
14
|
+
"env": { "TERM": "xterm-256color" },
|
|
15
|
+
"steps": [
|
|
16
|
+
{ "type": "waitForText", "text": "Ready", "scope": "buffer" },
|
|
17
|
+
{ "type": "snapshot", "kind": "text", "saveAs": "ready" },
|
|
18
|
+
{ "type": "expectGolden", "name": "ready" }
|
|
19
|
+
]
|
|
20
|
+
}
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Run it:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
ptywright run scripts/tui_smoke.json
|
|
27
|
+
ptywright run scripts/tui_smoke.json --update-goldens
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Run a suite:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
ptywright run-all --dir scripts
|
|
34
|
+
ptywright run-all --dir scripts --update-goldens
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## TypeScript Scripts
|
|
38
|
+
|
|
39
|
+
Use TS scripts when the test needs custom data, helper functions, or custom steps. Keep business logic small. If the script gets complex, move deterministic behavior into a fixture program and keep the ptywright script declarative.
|
|
40
|
+
|
|
41
|
+
## MCP Recording To Script
|
|
42
|
+
|
|
43
|
+
When driving a TUI through MCP tools:
|
|
44
|
+
|
|
45
|
+
1. `start_script_recording(name=...)`
|
|
46
|
+
2. Use normal tools such as `launch_session`, `send_text`, `press_key`, `wait_for_text`, and `snapshot_text`.
|
|
47
|
+
3. Add checkpoints with `mark(label=...)`.
|
|
48
|
+
4. `stop_script_recording(recordingId=..., writeFiles=true)`.
|
|
49
|
+
|
|
50
|
+
The exported script can be committed and replayed without the original agent interaction.
|
|
51
|
+
|
|
52
|
+
## Reports And Artifacts
|
|
53
|
+
|
|
54
|
+
Look for:
|
|
55
|
+
|
|
56
|
+
- `index.html` or `*.report.html`: Timeline report.
|
|
57
|
+
- `*.cast`: Playback stream.
|
|
58
|
+
- `run.summary.json`: Suite/run summary.
|
|
59
|
+
- `failure.last.view.txt`: Last visible terminal state.
|
|
60
|
+
- `failure.last.txt`: Plain last screen.
|
|
61
|
+
- `failure.error.txt`: Error details.
|
|
62
|
+
|
|
63
|
+
## Snapshot Rules
|
|
64
|
+
|
|
65
|
+
- Use `snapshot_text` or text snapshots for stable regression.
|
|
66
|
+
- Use ANSI snapshots only when style information matters.
|
|
67
|
+
- Use masks for random tokens, timestamps, ids, progress counters, and spinner glyphs.
|
|
68
|
+
- Use `scope="buffer"` when content may scroll out of the viewport.
|
|
69
|
+
- Use explicit waits before snapshots. Prefer `waitForText` or stable-screen waits over fixed sleeps.
|
|
70
|
+
|
|
71
|
+
## CI Pattern
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
ptywright run-all --dir scripts
|
|
75
|
+
ptywright script validate .tmp/run-all
|
|
76
|
+
ptywright script commands .tmp/run-all --json
|
|
77
|
+
ptywright script exec .tmp/run-all --command updateGoldens
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Use update commands only for intentional baseline changes.
|
|
File without changes
|