@delt/tester-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +36 -0
  2. package/bin/tester-mcp.js +2 -0
  3. package/dist/cli.d.ts +1 -0
  4. package/dist/cli.js +106 -0
  5. package/dist/config/loadConfig.d.ts +14 -0
  6. package/dist/config/loadConfig.js +20 -0
  7. package/dist/env/captureEnv.d.ts +8 -0
  8. package/dist/env/captureEnv.js +20 -0
  9. package/dist/guide/loadGuide.d.ts +6 -0
  10. package/dist/guide/loadGuide.js +23 -0
  11. package/dist/init.d.ts +17 -0
  12. package/dist/init.js +127 -0
  13. package/dist/result/parseExecutorResult.d.ts +11 -0
  14. package/dist/result/parseExecutorResult.js +29 -0
  15. package/dist/result/types.d.ts +44 -0
  16. package/dist/result/types.js +1 -0
  17. package/dist/result/writeResult.d.ts +3 -0
  18. package/dist/result/writeResult.js +22 -0
  19. package/dist/run/buildExecutorArgs.d.ts +6 -0
  20. package/dist/run/buildExecutorArgs.js +39 -0
  21. package/dist/run/buildPrompt.d.ts +7 -0
  22. package/dist/run/buildPrompt.js +84 -0
  23. package/dist/run/runScenario.d.ts +18 -0
  24. package/dist/run/runScenario.js +26 -0
  25. package/dist/run/runScenarios.d.ts +11 -0
  26. package/dist/run/runScenarios.js +47 -0
  27. package/dist/run/spawnExecutor.d.ts +34 -0
  28. package/dist/run/spawnExecutor.js +93 -0
  29. package/dist/run/streamParser.d.ts +17 -0
  30. package/dist/run/streamParser.js +51 -0
  31. package/dist/run/summarizeLine.d.ts +1 -0
  32. package/dist/run/summarizeLine.js +29 -0
  33. package/dist/scenario/actions.d.ts +5 -0
  34. package/dist/scenario/actions.js +33 -0
  35. package/dist/scenario/expandScenarioPaths.d.ts +1 -0
  36. package/dist/scenario/expandScenarioPaths.js +31 -0
  37. package/dist/scenario/parseScenario.d.ts +2 -0
  38. package/dist/scenario/parseScenario.js +37 -0
  39. package/dist/scenario/types.d.ts +47 -0
  40. package/dist/scenario/types.js +1 -0
  41. package/dist/secrets/loadSecretsFile.d.ts +1 -0
  42. package/dist/secrets/loadSecretsFile.js +10 -0
  43. package/dist/secrets/redactSecrets.d.ts +6 -0
  44. package/dist/secrets/redactSecrets.js +46 -0
  45. package/dist/secrets/resolveSecrets.d.ts +5 -0
  46. package/dist/secrets/resolveSecrets.js +16 -0
  47. package/dist/util/runId.d.ts +1 -0
  48. package/dist/util/runId.js +3 -0
  49. package/package.json +19 -0
  50. package/skills/tester-mcp/SKILL.md +29 -0
  51. package/skills/tester-mcp/document-guide.md +146 -0
package/README.md ADDED
@@ -0,0 +1,36 @@
1
+ # @delt/tester-mcp
2
+
3
+ Opus (planner) + Haiku (executor) + Chrome screen E2E test orchestrator.
4
+ The CLI spawns `claude -p --chrome` per scenario and reports one of four labels:
5
+ PASS / PARTIAL / FAIL / NOT_TESTED.
6
+
7
+ ## Install
8
+
9
+ ```
10
+ npm i -g @delt/tester-mcp
11
+ tester-mcp init # installs the skill, scaffolds config + secrets example
12
+ ```
13
+
14
+ ## Use
15
+
16
+ ```
17
+ tester-mcp run scenarios/<area>/<id>.yaml -c tester-mcp.config.yaml
18
+ ```
19
+
20
+ ## Authoring & prerequisites
21
+
22
+ This tool is designed to be driven by an AI agent. The authoritative,
23
+ version-pinned reference for prerequisites and the scenario DSL is the CLI itself:
24
+
25
+ ```
26
+ tester-mcp document-guide
27
+ ```
28
+
29
+ Run that before writing scenarios. (This README intentionally does not duplicate
30
+ those facts — the command is the single source of truth.)
31
+
32
+ ## Scenarios live in your project, not here
33
+
34
+ Scenario YAML files belong to the project under test — commit them to that repo.
35
+ For a minimal example, run `tester-mcp document-guide`; it includes a sample
36
+ scenario.
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ import "../dist/cli.js";
package/dist/cli.d.ts ADDED
@@ -0,0 +1 @@
1
+ export {};
package/dist/cli.js ADDED
@@ -0,0 +1,106 @@
1
+ import { Command } from "commander";
2
+ import { resolve, join, dirname } from "node:path";
3
+ import { readFileSync, existsSync } from "node:fs";
4
+ import { fileURLToPath } from "node:url";
5
+ import { loadConfig } from "./config/loadConfig.js";
6
+ import { parseScenario } from "./scenario/parseScenario.js";
7
+ import { resolveSecrets } from "./secrets/resolveSecrets.js";
8
+ import { loadSecretsFile } from "./secrets/loadSecretsFile.js";
9
+ import { collectSecretValues, redactSecrets } from "./secrets/redactSecrets.js";
10
+ import { runInit } from "./init.js";
11
+ import { runScenarios, clampConcurrency, MAX_CONCURRENCY } from "./run/runScenarios.js";
12
+ import { expandScenarioPaths } from "./scenario/expandScenarioPaths.js";
13
+ import { writeScenarioResult, writeSummary } from "./result/writeResult.js";
14
+ import { captureEnv } from "./env/captureEnv.js";
15
+ import { makeRunId } from "./util/runId.js";
16
+ import { loadGuide } from "./guide/loadGuide.js";
17
+ const program = new Command();
18
+ program.name("tester-mcp").description("Opus+Haiku+Chrome 통합 테스트 (Phase 1: 화면검증)");
19
+ // [확장5] validate/report/diff 는 여기에 .command() 추가.
20
+ program
21
+ .command("run")
22
+ .argument("<scenarios...>", "시나리오 YAML 경로(파일/디렉토리, 여러 개 가능)")
23
+ .option("-c, --config <path>", "설정 파일", "tester-mcp.config.yaml")
24
+ .option("--secrets <path>", "시크릿 파일", "tester-mcp.secrets.yaml")
25
+ .option("--front-dir <path>", "frontend git 디렉토리(commit 캡처)")
26
+ .option("--timeout <ms>", "executor 하드 타임아웃(ms, config runner.timeout_ms 오버라이드)")
27
+ .option("--concurrency <n>", `병렬 executor 수(1~${MAX_CONCURRENCY}, 기본 min(시나리오 수, ${MAX_CONCURRENCY}))`)
28
+ .option("--verbose", "executor 이벤트를 콘솔에 실시간 출력")
29
+ .option("--out-dir <path>", "결과/로그 출력 베이스 디렉토리(기본 runs)", "runs")
30
+ .action(async (scenarioPaths, opts) => {
31
+ try {
32
+ const config = loadConfig(resolve(opts.config));
33
+ const secrets = loadSecretsFile(resolve(opts.secrets));
34
+ const files = expandScenarioPaths(scenarioPaths);
35
+ const scenarios = files.map((f) => parseScenario(readFileSync(f, "utf8")));
36
+ const runId = makeRunId();
37
+ const runDir = join(opts.outDir ?? "runs", runId);
38
+ const env = captureEnv({ model: config.runner.model, frontendDir: opts.frontDir });
39
+ const timeoutMs = opts.timeout ? Number(opts.timeout) : config.runner.timeout_ms;
40
+ const concurrency = clampConcurrency(opts.concurrency ? Number(opts.concurrency) : undefined, scenarios.length);
41
+ if (scenarios.length > 1)
42
+ console.log(`시나리오 ${scenarios.length}개 · 병렬 ${concurrency}`);
43
+ // Compute secret values before run so they can be redacted in streaming logs.
44
+ const secretValues = collectSecretValues({ secrets, env: process.env });
45
+ const results = await runScenarios(scenarios, {
46
+ runId,
47
+ targets: { frontend: config.targets.frontend },
48
+ model: config.runner.model,
49
+ env,
50
+ resolveValue: (v) => resolveSecrets(v, { secrets }),
51
+ timeoutMs,
52
+ }, concurrency, { verbose: opts.verbose, secretValues, outDir: opts.outDir ?? "runs" });
53
+ const safe = results.map((r) => redactSecrets(r, secretValues));
54
+ const startedAt = safe[0]?.started_at ?? new Date().toISOString();
55
+ for (const s of safe) {
56
+ writeScenarioResult(runDir, s);
57
+ console.log(`[${s.status}] ${s.scenario_id} → ${join(runDir, s.scenario_id + ".json")}`);
58
+ if (s.evidence?.length)
59
+ console.log(" evidence:", s.evidence.join(" | "));
60
+ }
61
+ writeSummary(runDir, runId, startedAt, safe);
62
+ // Exit 0 only if every scenario passed or partially passed.
63
+ const ok = safe.every((s) => s.status === "PASS" || s.status === "PARTIAL");
64
+ process.exit(ok ? 0 : 1);
65
+ }
66
+ catch (err) {
67
+ console.error("실행 오류:", err instanceof Error ? err.message : err);
68
+ process.exit(2);
69
+ }
70
+ });
71
+ program
72
+ .command("init")
73
+ .description("스킬·설정·시크릿 예시를 설치하는 셋업 마법사")
74
+ .option("--global", "전역 스킬 설치(~/.claude/skills)")
75
+ .option("--project [path]", "프로젝트 스킬 설치(<path>/.claude/skills, 기본 cwd)")
76
+ .option("--frontend <url>", "frontend URL")
77
+ .option("--backend <url>", "backend URL(선택)")
78
+ .option("--model <m>", "executor 모델")
79
+ .option("--yes", "비대화형(기본값/플래그 사용)")
80
+ .action(async (opts) => {
81
+ try {
82
+ await runInit(opts);
83
+ }
84
+ catch (err) {
85
+ console.error("init 오류:", err instanceof Error ? err.message : err);
86
+ process.exit(2);
87
+ }
88
+ });
89
+ program
90
+ .command("document-guide")
91
+ .description("Print the authoring guide (prerequisites + scenario DSL) to stdout")
92
+ .action(() => {
93
+ try {
94
+ const here = dirname(fileURLToPath(import.meta.url));
95
+ const guide = loadGuide(here, {
96
+ exists: existsSync,
97
+ read: (p) => readFileSync(p, "utf8"),
98
+ });
99
+ process.stdout.write(guide.endsWith("\n") ? guide : guide + "\n");
100
+ }
101
+ catch (err) {
102
+ console.error("document-guide error:", err instanceof Error ? err.message : err);
103
+ process.exit(2);
104
+ }
105
+ });
106
+ program.parseAsync();
@@ -0,0 +1,14 @@
1
+ export declare const DEFAULT_TIMEOUT_MS = 300000;
2
+ export interface Config {
3
+ project: string;
4
+ targets: {
5
+ frontend: string;
6
+ backend?: string;
7
+ };
8
+ runner: {
9
+ model: string;
10
+ timeout_ms: number;
11
+ };
12
+ }
13
+ export declare function parseConfig(yamlText: string): Config;
14
+ export declare function loadConfig(path: string): Config;
@@ -0,0 +1,20 @@
1
+ import { readFileSync } from "node:fs";
2
+ import { parse as parseYaml } from "yaml";
3
+ export const DEFAULT_TIMEOUT_MS = 300_000; // 5분 — executor 하드 타임아웃 기본값
4
+ export function parseConfig(yamlText) {
5
+ const raw = (parseYaml(yamlText) ?? {});
6
+ const frontend = raw?.targets?.frontend;
7
+ if (typeof frontend !== "string")
8
+ throw new Error("설정 필수 필드 누락: targets.frontend");
9
+ return {
10
+ project: typeof raw.project === "string" ? raw.project : "unknown",
11
+ targets: { frontend, backend: raw?.targets?.backend },
12
+ runner: {
13
+ model: raw?.runner?.model ?? "haiku",
14
+ timeout_ms: typeof raw?.runner?.timeout_ms === "number" ? raw.runner.timeout_ms : DEFAULT_TIMEOUT_MS,
15
+ },
16
+ };
17
+ }
18
+ export function loadConfig(path) {
19
+ return parseConfig(readFileSync(path, "utf8"));
20
+ }
@@ -0,0 +1,8 @@
1
+ import type { Environment } from "../result/types.js";
2
+ export interface CaptureEnvOptions {
3
+ model: string;
4
+ frontendDir?: string;
5
+ backendDir?: string;
6
+ gitSha?: (dir: string) => string | undefined;
7
+ }
8
+ export declare function captureEnv(opts: CaptureEnvOptions): Environment;
@@ -0,0 +1,20 @@
1
+ import { execFileSync } from "node:child_process";
2
+ function realGitSha(dir) {
3
+ try {
4
+ return execFileSync("git", ["-C", dir, "rev-parse", "HEAD"], { encoding: "utf8" }).trim();
5
+ }
6
+ catch {
7
+ return undefined;
8
+ }
9
+ }
10
+ export function captureEnv(opts) {
11
+ const gitSha = opts.gitSha ?? realGitSha;
12
+ return {
13
+ frontend_commit: opts.frontendDir ? gitSha(opts.frontendDir) : undefined,
14
+ backend_commit: opts.backendDir ? gitSha(opts.backendDir) : undefined,
15
+ browser: "chrome (claude-in-chrome)",
16
+ runner_model: opts.model,
17
+ node_version: process.version,
18
+ os: process.platform,
19
+ };
20
+ }
@@ -0,0 +1,6 @@
1
+ export interface GuideFs {
2
+ exists(p: string): boolean;
3
+ read(p: string): string;
4
+ }
5
+ export declare function findGuidePath(startDir: string, exists: (p: string) => boolean): string | undefined;
6
+ export declare function loadGuide(startDir: string, fs: GuideFs): string;
@@ -0,0 +1,23 @@
1
+ import { join, dirname } from "node:path";
2
+ // Walk up from startDir looking for skills/tester-mcp/document-guide.md.
3
+ // Works for dist/ (pkg root two levels up) and src/ via tsx (one level up).
4
+ export function findGuidePath(startDir, exists) {
5
+ let dir = startDir;
6
+ for (let i = 0; i < 6; i++) {
7
+ const candidate = join(dir, "skills", "tester-mcp", "document-guide.md");
8
+ if (exists(candidate))
9
+ return candidate;
10
+ const parent = dirname(dir);
11
+ if (parent === dir)
12
+ break;
13
+ dir = parent;
14
+ }
15
+ return undefined;
16
+ }
17
+ export function loadGuide(startDir, fs) {
18
+ const path = findGuidePath(startDir, fs.exists);
19
+ if (!path) {
20
+ throw new Error("document-guide.md not found. Reinstall the package or run from the project root.");
21
+ }
22
+ return fs.read(path);
23
+ }
package/dist/init.d.ts ADDED
@@ -0,0 +1,17 @@
1
+ export declare function renderConfigYaml(o: {
2
+ frontend: string;
3
+ backend?: string;
4
+ model: string;
5
+ }): string;
6
+ export declare function skillsDirFor(scope: "global" | "project", projectPath: string, home: string): string;
7
+ export declare function secretsExampleYaml(): string;
8
+ export declare function findBundledSkill(startDir: string): string | undefined;
9
+ export interface InitOptions {
10
+ global?: boolean;
11
+ project?: string | boolean;
12
+ frontend?: string;
13
+ backend?: string;
14
+ model?: string;
15
+ yes?: boolean;
16
+ }
17
+ export declare function runInit(opts: InitOptions): Promise<void>;
package/dist/init.js ADDED
@@ -0,0 +1,127 @@
1
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
2
+ import { dirname, join } from "node:path";
3
+ import { fileURLToPath } from "node:url";
4
+ import { homedir } from "node:os";
5
+ import { createInterface } from "node:readline/promises";
6
+ // ── PURE testable helpers ────────────────────────────────────────────────
7
+ export function renderConfigYaml(o) {
8
+ const lines = ["targets:", ` frontend: ${o.frontend}`];
9
+ if (o.backend)
10
+ lines.push(` backend: ${o.backend}`);
11
+ lines.push("runner:", ` model: ${o.model}`);
12
+ return lines.join("\n") + "\n";
13
+ }
14
+ export function skillsDirFor(scope, projectPath, home) {
15
+ return scope === "global"
16
+ ? join(home, ".claude", "skills")
17
+ : join(projectPath, ".claude", "skills");
18
+ }
19
+ export function secretsExampleYaml() {
20
+ return ["tester:", ' username: "YOUR_ID"', ' password: "YOUR_PASSWORD"', ""].join("\n");
21
+ }
22
+ // ── thin glue (not unit-tested) ──────────────────────────────────────────
23
+ // Locate the bundled skill by walking up from the running file looking for a
24
+ // directory containing skills/tester-mcp/SKILL.md. Works for dist/ (pkgRoot two
25
+ // levels up) and src/ via tsx (pkgRoot one level up).
26
+ export function findBundledSkill(startDir) {
27
+ let dir = startDir;
28
+ for (let i = 0; i < 6; i++) {
29
+ const candidate = join(dir, "skills", "tester-mcp", "SKILL.md");
30
+ if (existsSync(candidate))
31
+ return candidate;
32
+ const parent = dirname(dir);
33
+ if (parent === dir)
34
+ break;
35
+ dir = parent;
36
+ }
37
+ return undefined;
38
+ }
39
+ export async function runInit(opts) {
40
+ const nonInteractive = !!opts.yes;
41
+ let scope;
42
+ let projectPath = typeof opts.project === "string" ? opts.project : process.cwd();
43
+ let frontend = opts.frontend;
44
+ let backend = opts.backend;
45
+ let model = opts.model;
46
+ // Determine scope from flags.
47
+ if (opts.global)
48
+ scope = "global";
49
+ else if (opts.project !== undefined)
50
+ scope = "project";
51
+ else
52
+ scope = nonInteractive ? "project" : "project"; // default project; may be overridden interactively
53
+ if (!nonInteractive) {
54
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
55
+ try {
56
+ if (!opts.global && opts.project === undefined) {
57
+ const ans = (await rl.question("스킬 설치 범위 [project/global] (project): ")).trim();
58
+ if (ans === "global")
59
+ scope = "global";
60
+ }
61
+ if (scope === "project" && typeof opts.project !== "string") {
62
+ const ans = (await rl.question(`프로젝트 경로 (${projectPath}): `)).trim();
63
+ if (ans)
64
+ projectPath = ans;
65
+ }
66
+ if (frontend === undefined) {
67
+ const ans = (await rl.question("frontend URL (http://localhost:5173): ")).trim();
68
+ frontend = ans || "http://localhost:5173";
69
+ }
70
+ if (backend === undefined) {
71
+ const ans = (await rl.question("backend URL (선택, 비우면 생략): ")).trim();
72
+ backend = ans || undefined;
73
+ }
74
+ if (model === undefined) {
75
+ const ans = (await rl.question("model (haiku): ")).trim();
76
+ model = ans || "haiku";
77
+ }
78
+ }
79
+ finally {
80
+ rl.close();
81
+ }
82
+ }
83
+ // Apply defaults for non-interactive / unfilled values.
84
+ frontend = frontend || "http://localhost:5173";
85
+ model = model || "haiku";
86
+ const home = homedir();
87
+ const cwd = process.cwd();
88
+ const configDir = scope === "project" ? projectPath : cwd;
89
+ // 1) Copy bundled skill.
90
+ const here = dirname(fileURLToPath(import.meta.url));
91
+ const bundled = findBundledSkill(here);
92
+ const skillsDir = skillsDirFor(scope, projectPath, home);
93
+ const destSkill = join(skillsDir, "tester-mcp", "SKILL.md");
94
+ if (bundled) {
95
+ mkdirSync(dirname(destSkill), { recursive: true });
96
+ writeFileSync(destSkill, readFileSync(bundled, "utf8"), "utf8");
97
+ console.log(`스킬 복사: ${destSkill}`);
98
+ }
99
+ else {
100
+ console.warn("경고: 번들 스킬(skills/tester-mcp/SKILL.md)을 찾지 못했습니다. 스킬 복사를 건너뜁니다.");
101
+ }
102
+ // 2) Write config.
103
+ const configPath = join(configDir, "tester-mcp.config.yaml");
104
+ mkdirSync(dirname(configPath), { recursive: true });
105
+ writeFileSync(configPath, renderConfigYaml({ frontend, backend, model }), "utf8");
106
+ console.log(`설정 작성: ${configPath}`);
107
+ // 3) Scaffold secrets example + ensure .gitignore.
108
+ const examplePath = join(cwd, "tester-mcp.secrets.example.yaml");
109
+ writeFileSync(examplePath, secretsExampleYaml(), "utf8");
110
+ console.log(`시크릿 예시 작성: ${examplePath}`);
111
+ const gitignorePath = join(cwd, ".gitignore");
112
+ const entry = "tester-mcp.secrets.yaml";
113
+ let gi = existsSync(gitignorePath) ? readFileSync(gitignorePath, "utf8") : "";
114
+ if (!gi.split(/\r?\n/).some((l) => l.trim() === entry)) {
115
+ if (gi.length && !gi.endsWith("\n"))
116
+ gi += "\n";
117
+ gi += entry + "\n";
118
+ writeFileSync(gitignorePath, gi, "utf8");
119
+ console.log(`.gitignore 갱신: ${entry} 추가`);
120
+ }
121
+ // 4) Next steps.
122
+ console.log("\n다음 단계:");
123
+ console.log(` 1) cp tester-mcp.secrets.example.yaml tester-mcp.secrets.yaml`);
124
+ console.log(` → 테스트 계정(username/password)을 채우세요. (이 파일은 gitignore됨)`);
125
+ console.log(` 2) 시나리오 작성 후 실행:`);
126
+ console.log(` tester-mcp run scenarios/<area>/<id>.yaml -c ${join(configDir, "tester-mcp.config.yaml")}`);
127
+ }
@@ -0,0 +1,11 @@
1
+ import { type Status } from "./types.js";
2
+ export interface PartialResult {
3
+ status: Status;
4
+ evidence?: string[];
5
+ steps?: any[];
6
+ handoff_notes?: string;
7
+ not_tested_reason?: string;
8
+ pattern_inference?: "assumed_ok" | "unknown";
9
+ raw_executor_text?: string;
10
+ }
11
+ export declare function parseExecutorResult(resultText: string): PartialResult;
@@ -0,0 +1,29 @@
1
+ import { STATUSES } from "./types.js";
2
+ function extractJson(text) {
3
+ const fenced = text.match(/```json\s*([\s\S]*?)```/i);
4
+ const candidate = fenced ? fenced[1] : text;
5
+ const start = candidate.indexOf("{"), end = candidate.lastIndexOf("}");
6
+ if (start === -1 || end <= start)
7
+ return null;
8
+ try {
9
+ return JSON.parse(candidate.slice(start, end + 1));
10
+ }
11
+ catch {
12
+ return null;
13
+ }
14
+ }
15
+ export function parseExecutorResult(resultText) {
16
+ const obj = extractJson(resultText);
17
+ if (!obj || typeof obj !== "object")
18
+ return { status: "NOT_TESTED", not_tested_reason: "executor 출력 JSON 파싱 실패", raw_executor_text: resultText };
19
+ if (!STATUSES.includes(obj.status))
20
+ return { status: "NOT_TESTED", not_tested_reason: `status 라벨이 4종 아님: ${String(obj.status)}`, raw_executor_text: resultText };
21
+ return {
22
+ status: obj.status,
23
+ evidence: Array.isArray(obj.evidence) ? obj.evidence : undefined,
24
+ steps: Array.isArray(obj.steps) ? obj.steps : undefined,
25
+ handoff_notes: typeof obj.handoff_notes === "string" ? obj.handoff_notes : undefined,
26
+ not_tested_reason: typeof obj.not_tested_reason === "string" ? obj.not_tested_reason : undefined,
27
+ pattern_inference: obj.pattern_inference,
28
+ };
29
+ }
@@ -0,0 +1,44 @@
1
+ export type Status = "PASS" | "PARTIAL" | "FAIL" | "NOT_TESTED";
2
+ export declare const STATUSES: Status[];
3
+ export interface StepResult {
4
+ index: number;
5
+ action: string;
6
+ status: Status;
7
+ error?: string;
8
+ screenshot?: string;
9
+ }
10
+ export interface Environment {
11
+ frontend_commit?: string;
12
+ backend_commit?: string;
13
+ browser?: string;
14
+ runner_model?: string;
15
+ node_version: string;
16
+ os: string;
17
+ }
18
+ export interface ScenarioResult {
19
+ run_id: string;
20
+ scenario_id: string;
21
+ status: Status;
22
+ not_tested_reason?: string;
23
+ pattern_inference?: "assumed_ok" | "unknown";
24
+ evidence?: string[];
25
+ started_at: string;
26
+ duration_ms: number;
27
+ steps: StepResult[];
28
+ environment: Environment;
29
+ handoff_notes?: string;
30
+ raw_executor_text?: string;
31
+ last_tool?: string;
32
+ tool_count?: number;
33
+ executor_log?: string;
34
+ }
35
+ export interface RunSummary {
36
+ run_id: string;
37
+ started_at: string;
38
+ total: number;
39
+ by_status: Record<Status, number>;
40
+ scenarios: {
41
+ scenario_id: string;
42
+ status: Status;
43
+ }[];
44
+ }
@@ -0,0 +1 @@
1
+ export const STATUSES = ["PASS", "PARTIAL", "FAIL", "NOT_TESTED"];
@@ -0,0 +1,3 @@
1
+ import { type ScenarioResult } from "./types.js";
2
+ export declare function writeScenarioResult(runDir: string, result: ScenarioResult): string;
3
+ export declare function writeSummary(runDir: string, runId: string, startedAt: string, results: ScenarioResult[]): string;
@@ -0,0 +1,22 @@
1
+ import { mkdirSync, writeFileSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import { STATUSES } from "./types.js";
4
+ export function writeScenarioResult(runDir, result) {
5
+ mkdirSync(runDir, { recursive: true });
6
+ const p = join(runDir, `${result.scenario_id}.json`);
7
+ writeFileSync(p, JSON.stringify(result, null, 2), "utf8");
8
+ return p;
9
+ }
10
+ export function writeSummary(runDir, runId, startedAt, results) {
11
+ mkdirSync(runDir, { recursive: true });
12
+ const byStatus = Object.fromEntries(STATUSES.map((s) => [s, 0]));
13
+ for (const r of results)
14
+ byStatus[r.status]++;
15
+ const summary = {
16
+ run_id: runId, started_at: startedAt, total: results.length, by_status: byStatus,
17
+ scenarios: results.map((r) => ({ scenario_id: r.scenario_id, status: r.status })),
18
+ };
19
+ const p = join(runDir, "summary.json");
20
+ writeFileSync(p, JSON.stringify(summary, null, 2), "utf8");
21
+ return p;
22
+ }
@@ -0,0 +1,6 @@
1
+ export interface ExecutorArgsOptions {
2
+ prompt: string;
3
+ systemPrompt: string;
4
+ model: string;
5
+ }
6
+ export declare function buildExecutorArgs(o: ExecutorArgsOptions): string[];
@@ -0,0 +1,39 @@
1
+ export function buildExecutorArgs(o) {
2
+ // Flag set verified by live smoke tests against claude -p --chrome:
3
+ // - --dangerously-skip-permissions: required so chrome tools (navigate/click) auto-run.
4
+ // (--permission-mode dontAsk BLOCKS them → executor stalls on a permission prompt.)
5
+ // - NO --bare: --bare skips auth context too → executor fails with "Not logged in".
6
+ // (--bare would only work with ANTHROPIC_API_KEY env; OAuth/keychain are never read.)
7
+ // - --strict-mcp-config + empty --mcp-config: load NO ambient MCP servers
8
+ // (obsidian/alarm/gmail/...). chrome tools still come from --chrome (verified: a
9
+ // smoke run reported chrome_tools_available=yes and read example.com under this combo),
10
+ // so this trims unused MCP tool schemas + instructions from the executor's context.
11
+ // - --exclude-dynamic-system-prompt-sections: move per-machine bits (cwd/env/git) out of
12
+ // the system prompt → better cross-process prompt-cache reuse across parallel/repeat runs.
13
+ return [
14
+ "-p", o.prompt,
15
+ "--chrome",
16
+ "--model", o.model,
17
+ "--append-system-prompt", o.systemPrompt,
18
+ "--output-format", "stream-json",
19
+ "--verbose", // stream-json requires --verbose; emits per-event JSON lines
20
+ "--dangerously-skip-permissions",
21
+ "--no-session-persistence",
22
+ "--strict-mcp-config",
23
+ "--mcp-config", JSON.stringify({ mcpServers: {} }),
24
+ "--exclude-dynamic-system-prompt-sections",
25
+ // ── Isolation (verified live 2026-05-26) ──
26
+ // Without these the executor inherits the HOST environment — project/global
27
+ // CLAUDE.md, the SessionStart "superpowers" hook, and all skills — and gets
28
+ // hijacked into the doc/skill workflow: it calls Skill, spawns subagents (Task),
29
+ // and runs Bash/Write/Edit/Read, never touching the browser, until the timeout.
30
+ // (Live diagnostic: WITHOUT these → zero chrome tool calls; WITH these → immediate
31
+ // mcp__claude-in-chrome__* calls.) --bare would also strip all this but breaks
32
+ // OAuth/keychain auth ("Not logged in"), so we strip piecemeal instead.
33
+ "--disable-slash-commands", // no Skill invocation
34
+ "--setting-sources", "user", // skip project/local settings (hooks/config)
35
+ "--settings", JSON.stringify({ disableAllHooks: true }), // no hooks (SessionStart)
36
+ "--disallowedTools", // hard-deny the wandering tools
37
+ "Skill,Task,Agent,Bash,Write,Edit,Read,NotebookEdit,Glob,Grep,WebFetch,WebSearch",
38
+ ];
39
+ }
@@ -0,0 +1,7 @@
1
+ import type { Scenario, Locale } from "../scenario/types.js";
2
+ export declare const SYSTEM_CONTRACT = "You are a screen integration-test executor. Execute ONLY the given scenario steps, in order. Be fast and simple \u2014 act on the given selector, look as little as possible, and bail immediately if you can't.\n\n[Tab isolation \u2014 parallel safety (do this FIRST)]\n- You share one Chrome with other executors. Your very first action: create your OWN new tab with tabs_create_mcp. Never reuse an existing tab that tabs_context shows (another executor may be using it). Remember that new tab_id and do EVERY action (navigate/click/fill/find/screenshot) ONLY in that tab_id.\n- If the current tab's URL is unrelated to your scenario (= you landed on someone else's tab), end immediately with NOT_TESTED and record \"tab mix-up: observed URL=\u2026\" in handoff_notes. Do not keep working on someone else's tab.\n\n[Finding elements \u2014 selector first]\n- Try the strategies given in the step's target, in order (css \u2192 placeholder \u2192 label \u2192 text \u2192 role \u2192 description), ONCE.\n- Do not grope around the page. The target is the answer.\n\n[Read minimally]\n- No full-page reads: do not call read_page (full accessibility tree) or a full get_page_text.\n- Use a targeted find to see only that element. assert_visible checks only that element.\n\n[Screenshots \u2014 evidence only, not the verdict]\n- Decide PASS/FAIL by assert (text/DOM). A screenshot is human-facing evidence, not the basis for the verdict.\n- Take a screenshot only when the scenario has a screenshot action, best-effort, once. If you can't capture it (element gone, capture failed, timeout), just skip and move on. NEVER loop re-triggering/resizing/scrolling/re-capturing. A failed screenshot is not a test failure.\n\n[Ephemeral (auto-dismissing) UI]\n- For short-lived elements (toast, snackbar), check IMMEDIATELY and ONCE right after the trigger (the fastest way: a JS text/DOM assertion). Do not chain fallbacks (JS \u2192 find \u2192 read_page) \u2014 the element vanishes mid-chain.\n- Do not screenshot an ephemeral element. The assertion is the proof.\n\n[Fast self-bail \u2014 once]\n- If you can't find the target in one attempt, or a browser tool returns no/empty response once, end immediately with NOT_TESTED. Do not retry the same heavy call.\n- One failure means the scenario (selector) is wrong \u2014 don't try to recover, hand it to the builder.\n\n[Status labels] status is exactly one of four:\n- PASS: behaved as expected (verified)\n- PARTIAL: only partly verified, or a non-critical difference\n- FAIL: behaved differently than expected (a bug)\n- NOT_TESTED: couldn't trigger \u2014 must include not_tested_reason and handoff_notes\n\n[handoff_notes \u2014 fuel for the ping-pong] On NOT_TESTED, must include:\n- the failed step number\n- the target strategies you tried\n- what you actually observed on screen (a single targeted query at the failure point is allowed to write this \u2014 full dumps are still forbidden)\n- a fix suggestion for the builder (e.g. css `#login-btn` does not exist, observed `.p-button[aria-label='\u0412\u043E\u0439\u0442\u0438']` \u2192 suggest replacing target.css)\n\n[Safety \u2014 forbidden]\n- Never do anything outside the scenario (especially delete/publish/send).\n- Avoid clicks that raise a JS alert/confirm/prompt (they freeze the session). If unavoidable, NOT_TESTED.\n- No absolute claims like \"100% safe\". Do not mix verified facts with assumptions.\n- Never write entered secrets (passwords etc.) into evidence/output verbatim \u2014 mask them as '***'.\n\n[Output] Emit the result as a JSON object only in the last message (a code fence is allowed). No free-form prose.";
3
+ export interface PromptTargets {
4
+ frontend: string;
5
+ }
6
+ export declare function localeToLanguageType(locale: Locale): string;
7
+ export declare function buildUserPrompt(scenario: Scenario, targets: PromptTargets, resolveValue: (v: string) => string): string;