@delt/tester-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -0
- package/bin/tester-mcp.js +2 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +106 -0
- package/dist/config/loadConfig.d.ts +14 -0
- package/dist/config/loadConfig.js +20 -0
- package/dist/env/captureEnv.d.ts +8 -0
- package/dist/env/captureEnv.js +20 -0
- package/dist/guide/loadGuide.d.ts +6 -0
- package/dist/guide/loadGuide.js +23 -0
- package/dist/init.d.ts +17 -0
- package/dist/init.js +127 -0
- package/dist/result/parseExecutorResult.d.ts +11 -0
- package/dist/result/parseExecutorResult.js +29 -0
- package/dist/result/types.d.ts +44 -0
- package/dist/result/types.js +1 -0
- package/dist/result/writeResult.d.ts +3 -0
- package/dist/result/writeResult.js +22 -0
- package/dist/run/buildExecutorArgs.d.ts +6 -0
- package/dist/run/buildExecutorArgs.js +39 -0
- package/dist/run/buildPrompt.d.ts +7 -0
- package/dist/run/buildPrompt.js +84 -0
- package/dist/run/runScenario.d.ts +18 -0
- package/dist/run/runScenario.js +26 -0
- package/dist/run/runScenarios.d.ts +11 -0
- package/dist/run/runScenarios.js +47 -0
- package/dist/run/spawnExecutor.d.ts +34 -0
- package/dist/run/spawnExecutor.js +93 -0
- package/dist/run/streamParser.d.ts +17 -0
- package/dist/run/streamParser.js +51 -0
- package/dist/run/summarizeLine.d.ts +1 -0
- package/dist/run/summarizeLine.js +29 -0
- package/dist/scenario/actions.d.ts +5 -0
- package/dist/scenario/actions.js +33 -0
- package/dist/scenario/expandScenarioPaths.d.ts +1 -0
- package/dist/scenario/expandScenarioPaths.js +31 -0
- package/dist/scenario/parseScenario.d.ts +2 -0
- package/dist/scenario/parseScenario.js +37 -0
- package/dist/scenario/types.d.ts +47 -0
- package/dist/scenario/types.js +1 -0
- package/dist/secrets/loadSecretsFile.d.ts +1 -0
- package/dist/secrets/loadSecretsFile.js +10 -0
- package/dist/secrets/redactSecrets.d.ts +6 -0
- package/dist/secrets/redactSecrets.js +46 -0
- package/dist/secrets/resolveSecrets.d.ts +5 -0
- package/dist/secrets/resolveSecrets.js +16 -0
- package/dist/util/runId.d.ts +1 -0
- package/dist/util/runId.js +3 -0
- package/package.json +19 -0
- package/skills/tester-mcp/SKILL.md +29 -0
- package/skills/tester-mcp/document-guide.md +146 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { renderStep } from "../scenario/actions.js";
|
|
2
|
+
export const SYSTEM_CONTRACT = `You are a screen integration-test executor. Execute ONLY the given scenario steps, in order. Be fast and simple — act on the given selector, look as little as possible, and bail immediately if you can't.
|
|
3
|
+
|
|
4
|
+
[Tab isolation — parallel safety (do this FIRST)]
|
|
5
|
+
- You share one Chrome with other executors. Your very first action: create your OWN new tab with tabs_create_mcp. Never reuse an existing tab that tabs_context shows (another executor may be using it). Remember that new tab_id and do EVERY action (navigate/click/fill/find/screenshot) ONLY in that tab_id.
|
|
6
|
+
- If the current tab's URL is unrelated to your scenario (= you landed on someone else's tab), end immediately with NOT_TESTED and record "tab mix-up: observed URL=…" in handoff_notes. Do not keep working on someone else's tab.
|
|
7
|
+
|
|
8
|
+
[Finding elements — selector first]
|
|
9
|
+
- Try the strategies given in the step's target, in order (css → placeholder → label → text → role → description), ONCE.
|
|
10
|
+
- Do not grope around the page. The target is the answer.
|
|
11
|
+
|
|
12
|
+
[Read minimally]
|
|
13
|
+
- No full-page reads: do not call read_page (full accessibility tree) or a full get_page_text.
|
|
14
|
+
- Use a targeted find to see only that element. assert_visible checks only that element.
|
|
15
|
+
|
|
16
|
+
[Screenshots — evidence only, not the verdict]
|
|
17
|
+
- Decide PASS/FAIL by assert (text/DOM). A screenshot is human-facing evidence, not the basis for the verdict.
|
|
18
|
+
- Take a screenshot only when the scenario has a screenshot action, best-effort, once. If you can't capture it (element gone, capture failed, timeout), just skip and move on. NEVER loop re-triggering/resizing/scrolling/re-capturing. A failed screenshot is not a test failure.
|
|
19
|
+
|
|
20
|
+
[Ephemeral (auto-dismissing) UI]
|
|
21
|
+
- For short-lived elements (toast, snackbar), check IMMEDIATELY and ONCE right after the trigger (the fastest way: a JS text/DOM assertion). Do not chain fallbacks (JS → find → read_page) — the element vanishes mid-chain.
|
|
22
|
+
- Do not screenshot an ephemeral element. The assertion is the proof.
|
|
23
|
+
|
|
24
|
+
[Fast self-bail — once]
|
|
25
|
+
- If you can't find the target in one attempt, or a browser tool returns no/empty response once, end immediately with NOT_TESTED. Do not retry the same heavy call.
|
|
26
|
+
- One failure means the scenario (selector) is wrong — don't try to recover, hand it to the builder.
|
|
27
|
+
|
|
28
|
+
[Status labels] status is exactly one of four:
|
|
29
|
+
- PASS: behaved as expected (verified)
|
|
30
|
+
- PARTIAL: only partly verified, or a non-critical difference
|
|
31
|
+
- FAIL: behaved differently than expected (a bug)
|
|
32
|
+
- NOT_TESTED: couldn't trigger — must include not_tested_reason and handoff_notes
|
|
33
|
+
|
|
34
|
+
[handoff_notes — fuel for the ping-pong] On NOT_TESTED, must include:
|
|
35
|
+
- the failed step number
|
|
36
|
+
- the target strategies you tried
|
|
37
|
+
- what you actually observed on screen (a single targeted query at the failure point is allowed to write this — full dumps are still forbidden)
|
|
38
|
+
- a fix suggestion for the builder (e.g. css \`#login-btn\` does not exist, observed \`.p-button[aria-label='Войти']\` → suggest replacing target.css)
|
|
39
|
+
|
|
40
|
+
[Safety — forbidden]
|
|
41
|
+
- Never do anything outside the scenario (especially delete/publish/send).
|
|
42
|
+
- Avoid clicks that raise a JS alert/confirm/prompt (they freeze the session). If unavoidable, NOT_TESTED.
|
|
43
|
+
- No absolute claims like "100% safe". Do not mix verified facts with assumptions.
|
|
44
|
+
- Never write entered secrets (passwords etc.) into evidence/output verbatim — mask them as '***'.
|
|
45
|
+
|
|
46
|
+
[Output] Emit the result as a JSON object only in the last message (a code fence is allowed). No free-form prose.`;
|
|
47
|
+
const LANG_MAP = { kg: "lng_type_1", ru: "lng_type_2", kr: "lng_type_3" };
|
|
48
|
+
export function localeToLanguageType(locale) {
|
|
49
|
+
return LANG_MAP[locale];
|
|
50
|
+
}
|
|
51
|
+
// resolveValue: caller injects the secrets resolver (mocked in tests).
|
|
52
|
+
export function buildUserPrompt(scenario, targets, resolveValue) {
|
|
53
|
+
const locale = scenario.locale ?? "ru";
|
|
54
|
+
const langType = localeToLanguageType(locale);
|
|
55
|
+
const checklist = scenario.steps
|
|
56
|
+
.map((s) => {
|
|
57
|
+
const resolved = s.action === "fill" ? { ...s, value: resolveValue(s.value) } : s;
|
|
58
|
+
return renderStep(resolved);
|
|
59
|
+
})
|
|
60
|
+
.map((line, i) => `${i + 1}. ${line}`)
|
|
61
|
+
.join("\n");
|
|
62
|
+
const ephemeralNote = scenario.ephemeral
|
|
63
|
+
? "\n- ⚠ ephemeral check: this screen vanishes quickly (toast etc.). Assert ONCE immediately after the trigger; no fallback chain, no screenshot."
|
|
64
|
+
: "";
|
|
65
|
+
return `# Project context
|
|
66
|
+
- App (frontend): ${targets.frontend}
|
|
67
|
+
- Stack: Vue 3 + PrimeVue. Targets are pre-resolved from source by the author — use them as-is. If a target is wrong or missing, don't grope; record the actual element you observed in handoff_notes and end with NOT_TESTED.${ephemeralNote}
|
|
68
|
+
|
|
69
|
+
# Locale pin (deterministic test)
|
|
70
|
+
Before starting, run localStorage.setItem('languageType', '${langType}') in the browser console, then reload the page. (locale=${locale})
|
|
71
|
+
|
|
72
|
+
# Scenario: ${scenario.title} (id: ${scenario.id})
|
|
73
|
+
Run the steps below in order. URLs are relative to the frontend base:
|
|
74
|
+
${checklist}
|
|
75
|
+
|
|
76
|
+
# Output format (JSON only in the last message)
|
|
77
|
+
{
|
|
78
|
+
"status": "PASS | PARTIAL | FAIL | NOT_TESTED",
|
|
79
|
+
"evidence": ["basis — the text/structure/screenshot you saw"],
|
|
80
|
+
"steps": [{ "index": 1, "action": "navigate", "status": "PASS" }],
|
|
81
|
+
"not_tested_reason": "only when NOT_TESTED",
|
|
82
|
+
"handoff_notes": "where you got stuck / next start point"
|
|
83
|
+
}`;
|
|
84
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { Scenario } from "../scenario/types.js";
|
|
2
|
+
import type { Environment, ScenarioResult } from "../result/types.js";
|
|
3
|
+
import { type StreamSpawner } from "./spawnExecutor.js";
|
|
4
|
+
export interface RunScenarioOptions {
|
|
5
|
+
runId: string;
|
|
6
|
+
targets: {
|
|
7
|
+
frontend: string;
|
|
8
|
+
};
|
|
9
|
+
model: string;
|
|
10
|
+
env: Environment;
|
|
11
|
+
resolveValue: (v: string) => string;
|
|
12
|
+
now?: () => Date;
|
|
13
|
+
timeoutMs?: number;
|
|
14
|
+
spawner?: StreamSpawner;
|
|
15
|
+
logLine?: (line: string) => void;
|
|
16
|
+
executorLog?: string;
|
|
17
|
+
}
|
|
18
|
+
export declare function runScenario(scenario: Scenario, opts: RunScenarioOptions): Promise<ScenarioResult>;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { buildUserPrompt, SYSTEM_CONTRACT } from "./buildPrompt.js";
|
|
2
|
+
import { spawnExecutor } from "./spawnExecutor.js";
|
|
3
|
+
import { parseExecutorResult } from "../result/parseExecutorResult.js";
|
|
4
|
+
export async function runScenario(scenario, opts) {
|
|
5
|
+
const now = opts.now ?? (() => new Date());
|
|
6
|
+
const startedAt = now();
|
|
7
|
+
const { envelope, state, killedReason } = await spawnExecutor({ prompt: buildUserPrompt(scenario, opts.targets, opts.resolveValue), systemPrompt: SYSTEM_CONTRACT, model: opts.model }, { spawner: opts.spawner, logLine: opts.logLine, timeoutMs: opts.timeoutMs });
|
|
8
|
+
const common = {
|
|
9
|
+
run_id: opts.runId, scenario_id: scenario.id,
|
|
10
|
+
started_at: startedAt.toISOString(), duration_ms: now().getTime() - startedAt.getTime(),
|
|
11
|
+
environment: opts.env,
|
|
12
|
+
last_tool: state.lastTool, tool_count: state.toolCount, executor_log: opts.executorLog,
|
|
13
|
+
};
|
|
14
|
+
if (!envelope) {
|
|
15
|
+
const why = killedReason === "stall" ? "무응답(스톨)" : killedReason === "timeout" ? "하드 타임아웃" : "executor가 결과를 방출하지 않음";
|
|
16
|
+
const lastBit = state.lastTool ? ` — 마지막 도구 '${state.lastTool}' (호출 ${state.toolCount}회)` : " — 도구 호출 0회";
|
|
17
|
+
return { ...common, status: "NOT_TESTED", not_tested_reason: `${why}${lastBit}`, steps: [] };
|
|
18
|
+
}
|
|
19
|
+
const parsed = parseExecutorResult(envelope.result);
|
|
20
|
+
return {
|
|
21
|
+
...common, status: parsed.status, not_tested_reason: parsed.not_tested_reason,
|
|
22
|
+
pattern_inference: parsed.pattern_inference, evidence: parsed.evidence,
|
|
23
|
+
steps: parsed.steps ?? [], handoff_notes: parsed.handoff_notes,
|
|
24
|
+
raw_executor_text: parsed.raw_executor_text,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { Scenario } from "../scenario/types.js";
|
|
2
|
+
import type { ScenarioResult } from "../result/types.js";
|
|
3
|
+
import { type RunScenarioOptions } from "./runScenario.js";
|
|
4
|
+
export declare const MAX_CONCURRENCY = 10;
|
|
5
|
+
export declare function clampConcurrency(requested: number | undefined, scenarioCount: number): number;
|
|
6
|
+
export interface RunScenariosLogging {
|
|
7
|
+
verbose?: boolean;
|
|
8
|
+
secretValues?: string[];
|
|
9
|
+
outDir?: string;
|
|
10
|
+
}
|
|
11
|
+
export declare function runScenarios(scenarios: Scenario[], opts: RunScenarioOptions, concurrency: number, logging?: RunScenariosLogging): Promise<ScenarioResult[]>;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { appendFileSync, mkdirSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { runScenario } from "./runScenario.js";
|
|
4
|
+
import { redactString } from "../secrets/redactSecrets.js";
|
|
5
|
+
import { summarizeLine } from "./summarizeLine.js";
|
|
6
|
+
// Hard ceiling on parallel executors. The orchestrating AI may *request* fewer
|
|
7
|
+
// (its judgment), but the CLI never spawns more than this many `claude` processes
|
|
8
|
+
// at once — keeps API rate-limit / browser-tab-group contention bounded.
|
|
9
|
+
// (Verified: 10 parallel claude --chrome runs completed with 0 tab collisions.)
|
|
10
|
+
export const MAX_CONCURRENCY = 10;
|
|
11
|
+
// Resolve the effective concurrency: clamp the requested value into [1, MAX_CONCURRENCY],
|
|
12
|
+
// defaulting to min(scenarioCount, MAX) when nothing valid is requested.
|
|
13
|
+
export function clampConcurrency(requested, scenarioCount) {
|
|
14
|
+
const fallback = Math.max(1, Math.min(scenarioCount, MAX_CONCURRENCY));
|
|
15
|
+
if (requested === undefined || !Number.isFinite(requested))
|
|
16
|
+
return fallback;
|
|
17
|
+
return Math.max(1, Math.min(Math.floor(requested), MAX_CONCURRENCY));
|
|
18
|
+
}
|
|
19
|
+
// Run scenarios with a bounded worker pool. Each scenario spawns its own executor
|
|
20
|
+
// process (via runScenario → spawnExecutor); `concurrency` caps how many run at once.
|
|
21
|
+
// Results keep input order regardless of completion order.
|
|
22
|
+
export async function runScenarios(scenarios, opts, concurrency, logging) {
|
|
23
|
+
const results = new Array(scenarios.length);
|
|
24
|
+
const workers = Math.max(1, Math.min(concurrency, scenarios.length || 1));
|
|
25
|
+
let next = 0;
|
|
26
|
+
const base = logging?.outDir ?? "runs";
|
|
27
|
+
const dir = join(base, opts.runId);
|
|
28
|
+
mkdirSync(dir, { recursive: true });
|
|
29
|
+
const worker = async () => {
|
|
30
|
+
for (let i = next++; i < scenarios.length; i = next++) {
|
|
31
|
+
const id = scenarios[i].id;
|
|
32
|
+
const logPath = join(dir, `${id}.log`);
|
|
33
|
+
const logLine = (line) => {
|
|
34
|
+
const safe = redactString(line, logging?.secretValues ?? []);
|
|
35
|
+
appendFileSync(logPath, safe + "\n");
|
|
36
|
+
if (logging?.verbose) {
|
|
37
|
+
const sum = summarizeLine(safe);
|
|
38
|
+
if (sum)
|
|
39
|
+
console.log(`[${id}] ${sum}`);
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
results[i] = await runScenario(scenarios[i], { ...opts, executorLog: logPath, logLine });
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
await Promise.all(Array.from({ length: workers }, () => worker()));
|
|
46
|
+
return results;
|
|
47
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { type ExecutorArgsOptions } from "./buildExecutorArgs.js";
|
|
2
|
+
import { type StreamState } from "./streamParser.js";
|
|
3
|
+
export interface Envelope {
|
|
4
|
+
result: string;
|
|
5
|
+
session_id?: string;
|
|
6
|
+
total_cost_usd?: number;
|
|
7
|
+
}
|
|
8
|
+
export declare function parseEnvelope(stdout: string): Envelope;
|
|
9
|
+
export interface SpawnHandle {
|
|
10
|
+
kill(signal: NodeJS.Signals): void;
|
|
11
|
+
}
|
|
12
|
+
export interface StreamSpawner {
|
|
13
|
+
(cmd: string, args: string[], handlers: {
|
|
14
|
+
onLine: (line: string) => void;
|
|
15
|
+
onStderrLine?: (line: string) => void;
|
|
16
|
+
onClose: (code: number | null, signal: string | null) => void;
|
|
17
|
+
}): SpawnHandle;
|
|
18
|
+
}
|
|
19
|
+
export interface SpawnExecutorResult {
|
|
20
|
+
envelope?: Envelope;
|
|
21
|
+
state: StreamState;
|
|
22
|
+
killedReason?: "stall" | "timeout";
|
|
23
|
+
}
|
|
24
|
+
export interface SpawnExecutorDeps {
|
|
25
|
+
spawner?: StreamSpawner;
|
|
26
|
+
logLine?: (line: string) => void;
|
|
27
|
+
now?: () => number;
|
|
28
|
+
timeoutMs?: number;
|
|
29
|
+
stallMs?: number;
|
|
30
|
+
tickMs?: number;
|
|
31
|
+
killGraceMs?: number;
|
|
32
|
+
forceResolveMs?: number;
|
|
33
|
+
}
|
|
34
|
+
export declare function spawnExecutor(opts: ExecutorArgsOptions, deps?: SpawnExecutorDeps): Promise<SpawnExecutorResult>;
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { buildExecutorArgs } from "./buildExecutorArgs.js";
|
|
3
|
+
import { makeStreamAccumulator } from "./streamParser.js";
|
|
4
|
+
export function parseEnvelope(stdout) {
|
|
5
|
+
try {
|
|
6
|
+
const o = JSON.parse(stdout);
|
|
7
|
+
if (o && typeof o.result === "string")
|
|
8
|
+
return o;
|
|
9
|
+
}
|
|
10
|
+
catch { /* fall through */ }
|
|
11
|
+
return { result: stdout };
|
|
12
|
+
}
|
|
13
|
+
// Default streaming spawner: spawn + newline buffering (stdout → onLine, stderr → onStderrLine), \r-stripped.
|
|
14
|
+
const defaultSpawner = (cmd, args, h) => {
|
|
15
|
+
const child = spawn(cmd, args, { stdio: ["ignore", "pipe", "pipe"] });
|
|
16
|
+
const lineBuf = (emit) => {
|
|
17
|
+
let buf = "";
|
|
18
|
+
return {
|
|
19
|
+
push(d) {
|
|
20
|
+
buf += d.toString();
|
|
21
|
+
let nl;
|
|
22
|
+
while ((nl = buf.indexOf("\n")) >= 0) {
|
|
23
|
+
emit(buf.slice(0, nl).replace(/\r$/, ""));
|
|
24
|
+
buf = buf.slice(nl + 1);
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
flush() { if (buf.trim())
|
|
28
|
+
emit(buf.replace(/\r$/, "")); },
|
|
29
|
+
};
|
|
30
|
+
};
|
|
31
|
+
const out = lineBuf(h.onLine);
|
|
32
|
+
const err = lineBuf((l) => h.onStderrLine?.(l));
|
|
33
|
+
child.stdout.on("data", (d) => out.push(d));
|
|
34
|
+
child.stderr.on("data", (d) => err.push(d));
|
|
35
|
+
child.on("close", (code, signal) => { out.flush(); err.flush(); h.onClose(code, signal); });
|
|
36
|
+
child.on("error", () => h.onClose(null, null));
|
|
37
|
+
return { kill: (sig) => { try {
|
|
38
|
+
child.kill(sig);
|
|
39
|
+
}
|
|
40
|
+
catch { /* already dead */ } } };
|
|
41
|
+
};
|
|
42
|
+
export function spawnExecutor(opts, deps = {}) {
|
|
43
|
+
const spawner = deps.spawner ?? defaultSpawner;
|
|
44
|
+
const logLine = deps.logLine ?? (() => { });
|
|
45
|
+
const now = deps.now ?? (() => Date.now());
|
|
46
|
+
const stallMs = deps.stallMs ?? 60_000;
|
|
47
|
+
const tickMs = deps.tickMs ?? 5_000;
|
|
48
|
+
const killGraceMs = deps.killGraceMs ?? 5_000;
|
|
49
|
+
const forceResolveMs = deps.forceResolveMs ?? 5_000;
|
|
50
|
+
const acc = makeStreamAccumulator(now);
|
|
51
|
+
return new Promise((resolve) => {
|
|
52
|
+
let lastEvent = now();
|
|
53
|
+
let killedReason;
|
|
54
|
+
let done = false;
|
|
55
|
+
let killing = false;
|
|
56
|
+
const intervals = [];
|
|
57
|
+
const timeouts = [];
|
|
58
|
+
const clearAll = () => { intervals.forEach(clearInterval); timeouts.forEach(clearTimeout); };
|
|
59
|
+
const settle = () => {
|
|
60
|
+
if (done)
|
|
61
|
+
return;
|
|
62
|
+
done = true;
|
|
63
|
+
clearAll();
|
|
64
|
+
const snap = acc.snapshot();
|
|
65
|
+
resolve({ envelope: snap.envelope, state: snap, killedReason });
|
|
66
|
+
};
|
|
67
|
+
let handle;
|
|
68
|
+
// SIGTERM, then escalate to SIGKILL, then force-resolve if close never fires (hung child can't hang the run).
|
|
69
|
+
const killEscalate = (reason) => {
|
|
70
|
+
if (killing || done)
|
|
71
|
+
return;
|
|
72
|
+
killing = true;
|
|
73
|
+
killedReason = reason;
|
|
74
|
+
handle.kill("SIGTERM");
|
|
75
|
+
timeouts.push(setTimeout(() => { if (!done)
|
|
76
|
+
handle.kill("SIGKILL"); }, killGraceMs));
|
|
77
|
+
timeouts.push(setTimeout(settle, killGraceMs + forceResolveMs));
|
|
78
|
+
};
|
|
79
|
+
handle = spawner("claude", buildExecutorArgs(opts), {
|
|
80
|
+
onLine: (line) => { lastEvent = now(); acc.push(line); logLine(line); },
|
|
81
|
+
onStderrLine: (line) => { logLine("[stderr] " + line); },
|
|
82
|
+
onClose: () => settle(),
|
|
83
|
+
});
|
|
84
|
+
intervals.push(setInterval(() => {
|
|
85
|
+
if (!done && !killing && now() - lastEvent > stallMs)
|
|
86
|
+
killEscalate("stall");
|
|
87
|
+
}, tickMs));
|
|
88
|
+
if (deps.timeoutMs) {
|
|
89
|
+
timeouts.push(setTimeout(() => { if (!done && !killing)
|
|
90
|
+
killEscalate("timeout"); }, deps.timeoutMs));
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { Envelope } from "./spawnExecutor.js";
|
|
2
|
+
export interface TrailItem {
|
|
3
|
+
t_ms: number;
|
|
4
|
+
phase: "use" | "result";
|
|
5
|
+
tool?: string;
|
|
6
|
+
is_error?: boolean;
|
|
7
|
+
}
|
|
8
|
+
export interface StreamState {
|
|
9
|
+
envelope?: Envelope;
|
|
10
|
+
trail: TrailItem[];
|
|
11
|
+
lastTool?: string;
|
|
12
|
+
toolCount: number;
|
|
13
|
+
}
|
|
14
|
+
export declare function makeStreamAccumulator(now?: () => number): {
|
|
15
|
+
push: (line: string) => void;
|
|
16
|
+
snapshot: () => StreamState;
|
|
17
|
+
};
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
// Parses stream-json lines into a diagnostic snapshot. Stores ONLY event
|
|
2
|
+
// types / tool names / timing — never tool inputs or assistant text (which
|
|
3
|
+
// may contain secrets). `now` is injected for deterministic timing in tests.
|
|
4
|
+
export function makeStreamAccumulator(now = () => Date.now()) {
|
|
5
|
+
const t0 = now();
|
|
6
|
+
const state = { trail: [], toolCount: 0 };
|
|
7
|
+
function handleContent(content) {
|
|
8
|
+
if (!Array.isArray(content))
|
|
9
|
+
return;
|
|
10
|
+
for (const c of content) {
|
|
11
|
+
if (c?.type === "tool_use") {
|
|
12
|
+
const tool = typeof c.name === "string" ? c.name : undefined;
|
|
13
|
+
state.trail.push({ t_ms: now() - t0, phase: "use", tool });
|
|
14
|
+
state.lastTool = tool;
|
|
15
|
+
state.toolCount++;
|
|
16
|
+
}
|
|
17
|
+
else if (c?.type === "tool_result") {
|
|
18
|
+
state.trail.push({ t_ms: now() - t0, phase: "result", is_error: !!c.is_error });
|
|
19
|
+
}
|
|
20
|
+
// text / input deltas are intentionally ignored (secret-safe).
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
function push(line) {
|
|
24
|
+
const s = line.trim();
|
|
25
|
+
if (!s)
|
|
26
|
+
return;
|
|
27
|
+
let ev;
|
|
28
|
+
try {
|
|
29
|
+
ev = JSON.parse(s);
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
const t = ev.type;
|
|
35
|
+
if (t === "assistant" || t === "user") {
|
|
36
|
+
handleContent(ev.message?.content);
|
|
37
|
+
}
|
|
38
|
+
else if (t === "result") {
|
|
39
|
+
// NOTE: result is the model's final text, stored RAW. It can contain secrets; redaction is the downstream layer's job (see redactSecrets). The trail/lastTool above never store raw text/inputs — only result does.
|
|
40
|
+
state.envelope = {
|
|
41
|
+
result: typeof ev.result === "string" ? ev.result : "",
|
|
42
|
+
session_id: typeof ev.session_id === "string" ? ev.session_id : undefined,
|
|
43
|
+
total_cost_usd: typeof ev.total_cost_usd === "number" ? ev.total_cost_usd : undefined,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
function snapshot() {
|
|
48
|
+
return { envelope: state.envelope, trail: [...state.trail], lastTool: state.lastTool, toolCount: state.toolCount };
|
|
49
|
+
}
|
|
50
|
+
return { push, snapshot };
|
|
51
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function summarizeLine(line: string): string | undefined;
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
// Condense one stream-json line into a terse console summary for --verbose.
|
|
2
|
+
// Returns undefined for non-tool events (narration/system) so the console
|
|
3
|
+
// shows only tool activity. Never includes inputs/text (secret-safe).
|
|
4
|
+
export function summarizeLine(line) {
|
|
5
|
+
const s = line.trim();
|
|
6
|
+
if (!s)
|
|
7
|
+
return undefined;
|
|
8
|
+
let ev;
|
|
9
|
+
try {
|
|
10
|
+
ev = JSON.parse(s);
|
|
11
|
+
}
|
|
12
|
+
catch {
|
|
13
|
+
return undefined;
|
|
14
|
+
}
|
|
15
|
+
if (ev.type === "result")
|
|
16
|
+
return "■ RESULT";
|
|
17
|
+
if (ev.type === "assistant" || ev.type === "user") {
|
|
18
|
+
const content = ev.message?.content;
|
|
19
|
+
if (!Array.isArray(content))
|
|
20
|
+
return undefined;
|
|
21
|
+
for (const c of content) {
|
|
22
|
+
if (c?.type === "tool_use")
|
|
23
|
+
return `→ ${String(c.name ?? "").replace("mcp__claude-in-chrome__", "")}`;
|
|
24
|
+
if (c?.type === "tool_result")
|
|
25
|
+
return c.is_error ? "✗ error" : "← ok";
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return undefined;
|
|
29
|
+
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { Step, ActionName, Target } from "./types.js";
|
|
2
|
+
export declare function describeTarget(t: Target): string;
|
|
3
|
+
export declare const KNOWN_ACTIONS: ActionName[];
|
|
4
|
+
export declare function isKnownAction(name: string): name is ActionName;
|
|
5
|
+
export declare function renderStep(step: Step): string;
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
// Priority order matches design §7: css/id > placeholder/label > text > role > description.
|
|
2
|
+
export function describeTarget(t) {
|
|
3
|
+
const parts = [];
|
|
4
|
+
if (t.css)
|
|
5
|
+
parts.push(`css ${t.css}`);
|
|
6
|
+
if (t.placeholder)
|
|
7
|
+
parts.push(`placeholder "${t.placeholder}"`);
|
|
8
|
+
if (t.label)
|
|
9
|
+
parts.push(`label "${t.label}"`);
|
|
10
|
+
if (t.text)
|
|
11
|
+
parts.push(`text "${t.text}"`);
|
|
12
|
+
if (t.role)
|
|
13
|
+
parts.push(`role ${t.role}`);
|
|
14
|
+
if (t.description)
|
|
15
|
+
parts.push(`description "${t.description}"`);
|
|
16
|
+
return parts.join(" / ") || "(no target)";
|
|
17
|
+
}
|
|
18
|
+
// [확장1] Single source of truth for actions + prompt rendering.
|
|
19
|
+
const RENDERERS = {
|
|
20
|
+
navigate: (s) => `Navigate: ${s.url}`,
|
|
21
|
+
fill: (s) => `Fill: [${describeTarget(s.target)}] ← "${s.value}"`,
|
|
22
|
+
click: (s) => `Click: [${describeTarget(s.target)}]${s.destructive ? " (destructive)" : ""}`,
|
|
23
|
+
wait_for: (s) => `Wait for: [${describeTarget(s.target)}] to appear${s.timeout_ms ? ` (${s.timeout_ms}ms)` : ""}`,
|
|
24
|
+
assert_visible: (s) => `Assert visible: [${describeTarget(s.target)}]`,
|
|
25
|
+
screenshot: (s) => `Screenshot${s.name ? `: ${s.name}` : ""}`,
|
|
26
|
+
};
|
|
27
|
+
export const KNOWN_ACTIONS = Object.keys(RENDERERS);
|
|
28
|
+
export function isKnownAction(name) {
|
|
29
|
+
return KNOWN_ACTIONS.includes(name);
|
|
30
|
+
}
|
|
31
|
+
export function renderStep(step) {
|
|
32
|
+
return RENDERERS[step.action](step);
|
|
33
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function expandScenarioPaths(inputs: string[]): string[];
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { readdirSync, statSync } from "node:fs";
|
|
2
|
+
import { join, resolve } from "node:path";
|
|
3
|
+
const YAML_RE = /\.ya?ml$/i;
|
|
4
|
+
// Expand CLI scenario inputs into a sorted, de-duplicated list of resolved file paths.
|
|
5
|
+
// - a directory → all *.yaml/*.yml directly inside it (sorted by name)
|
|
6
|
+
// - a file → itself
|
|
7
|
+
// Shells typically pre-expand globs; this only needs to handle files + dirs.
|
|
8
|
+
export function expandScenarioPaths(inputs) {
|
|
9
|
+
const out = [];
|
|
10
|
+
for (const input of inputs) {
|
|
11
|
+
const p = resolve(input);
|
|
12
|
+
let st;
|
|
13
|
+
try {
|
|
14
|
+
st = statSync(p);
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
throw new Error(`시나리오 경로 없음: ${input}`);
|
|
18
|
+
}
|
|
19
|
+
if (st.isDirectory()) {
|
|
20
|
+
const files = readdirSync(p).filter((f) => YAML_RE.test(f)).sort();
|
|
21
|
+
if (files.length === 0)
|
|
22
|
+
throw new Error(`디렉토리에 시나리오(.yaml/.yml) 없음: ${input}`);
|
|
23
|
+
for (const f of files)
|
|
24
|
+
out.push(join(p, f));
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
out.push(p);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
return [...new Set(out)];
|
|
31
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { parse as parseYaml } from "yaml";
|
|
2
|
+
import { isKnownAction } from "./actions.js";
|
|
3
|
+
const LOCALES = ["kg", "ru", "kr"];
|
|
4
|
+
export function parseScenario(yamlText) {
|
|
5
|
+
const raw = parseYaml(yamlText);
|
|
6
|
+
if (!raw || typeof raw !== "object")
|
|
7
|
+
throw new Error("시나리오 YAML 파싱 실패: 빈 문서");
|
|
8
|
+
if (typeof raw.id !== "string")
|
|
9
|
+
throw new Error("시나리오 필수 필드 누락: id");
|
|
10
|
+
if (!/^[A-Za-z0-9._-]+$/.test(raw.id)) {
|
|
11
|
+
throw new Error(`잘못된 시나리오 id '${raw.id}' — 영문/숫자/.-_ 만 허용 (경로 주입 방지)`);
|
|
12
|
+
}
|
|
13
|
+
if (typeof raw.title !== "string")
|
|
14
|
+
throw new Error("시나리오 필수 필드 누락: title");
|
|
15
|
+
if (!Array.isArray(raw.steps) || raw.steps.length === 0)
|
|
16
|
+
throw new Error("시나리오 필수 필드 누락: steps");
|
|
17
|
+
const steps = raw.steps.map((st, i) => {
|
|
18
|
+
if (!st || typeof st.action !== "string")
|
|
19
|
+
throw new Error(`step[${i}]: action 누락`);
|
|
20
|
+
if (!isKnownAction(st.action))
|
|
21
|
+
throw new Error(`step[${i}]: 알 수 없는 액션 "${st.action}" (이 슬라이스는 화면 액션만)`);
|
|
22
|
+
return st;
|
|
23
|
+
});
|
|
24
|
+
const locale = LOCALES.includes(raw.locale) ? raw.locale : undefined;
|
|
25
|
+
return {
|
|
26
|
+
id: raw.id,
|
|
27
|
+
title: raw.title,
|
|
28
|
+
steps,
|
|
29
|
+
locale,
|
|
30
|
+
login_as: typeof raw.login_as === "string" ? raw.login_as : undefined,
|
|
31
|
+
on_failure: raw.on_failure === "continue" ? "continue" : "stop",
|
|
32
|
+
optional: raw.optional === true,
|
|
33
|
+
defaults: raw.defaults ?? undefined,
|
|
34
|
+
precondition: typeof raw.precondition === "string" ? raw.precondition : undefined,
|
|
35
|
+
ephemeral: typeof raw.ephemeral === "boolean" ? raw.ephemeral : false,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
export interface Target {
|
|
2
|
+
css?: string;
|
|
3
|
+
placeholder?: string;
|
|
4
|
+
label?: string;
|
|
5
|
+
text?: string;
|
|
6
|
+
role?: string;
|
|
7
|
+
description?: string;
|
|
8
|
+
}
|
|
9
|
+
export type Locale = "kg" | "ru" | "kr";
|
|
10
|
+
export type Step = {
|
|
11
|
+
action: "navigate";
|
|
12
|
+
url: string;
|
|
13
|
+
} | {
|
|
14
|
+
action: "fill";
|
|
15
|
+
target: Target;
|
|
16
|
+
value: string;
|
|
17
|
+
} | {
|
|
18
|
+
action: "click";
|
|
19
|
+
target: Target;
|
|
20
|
+
destructive?: boolean;
|
|
21
|
+
} | {
|
|
22
|
+
action: "wait_for";
|
|
23
|
+
target: Target;
|
|
24
|
+
timeout_ms?: number;
|
|
25
|
+
} | {
|
|
26
|
+
action: "assert_visible";
|
|
27
|
+
target: Target;
|
|
28
|
+
} | {
|
|
29
|
+
action: "screenshot";
|
|
30
|
+
name?: string;
|
|
31
|
+
save?: boolean;
|
|
32
|
+
};
|
|
33
|
+
export type ActionName = Step["action"];
|
|
34
|
+
export interface Scenario {
|
|
35
|
+
id: string;
|
|
36
|
+
title: string;
|
|
37
|
+
steps: Step[];
|
|
38
|
+
locale?: Locale;
|
|
39
|
+
login_as?: string;
|
|
40
|
+
on_failure?: "stop" | "continue";
|
|
41
|
+
optional?: boolean;
|
|
42
|
+
defaults?: {
|
|
43
|
+
timeout_ms?: number;
|
|
44
|
+
};
|
|
45
|
+
precondition?: string;
|
|
46
|
+
ephemeral?: boolean;
|
|
47
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function loadSecretsFile(path: string): Record<string, unknown>;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import { parse as parseYaml } from "yaml";
|
|
3
|
+
// Load the gitignored secrets file (tester-mcp.secrets.yaml). File-first: if it
|
|
4
|
+
// exists, parse YAML → object; if missing, return {} (env SECRET_* is the fallback).
|
|
5
|
+
export function loadSecretsFile(path) {
|
|
6
|
+
if (!existsSync(path))
|
|
7
|
+
return {};
|
|
8
|
+
const parsed = parseYaml(readFileSync(path, "utf8"));
|
|
9
|
+
return (parsed ?? {});
|
|
10
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export declare function collectSecretValues(opts?: {
|
|
2
|
+
secrets?: Record<string, unknown>;
|
|
3
|
+
env?: Record<string, string | undefined>;
|
|
4
|
+
}): string[];
|
|
5
|
+
export declare function redactString(text: string, values: string[]): string;
|
|
6
|
+
export declare function redactSecrets<T>(obj: T, secretValues: string[]): T;
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
// Deterministic redaction of secret values from a result object before it is
|
|
2
|
+
// written/printed. The CLI knows the resolved secret values (from the secrets file
|
|
3
|
+
// and/or env SECRET_*), so we scrub them regardless of what the executor echoed.
|
|
4
|
+
// Defense-in-depth alongside the system-prompt instruction not to echo secrets.
|
|
5
|
+
function leafStrings(obj, out) {
|
|
6
|
+
if (typeof obj === "string") {
|
|
7
|
+
out.push(obj);
|
|
8
|
+
}
|
|
9
|
+
else if (Array.isArray(obj)) {
|
|
10
|
+
for (const v of obj)
|
|
11
|
+
leafStrings(v, out);
|
|
12
|
+
}
|
|
13
|
+
else if (obj && typeof obj === "object") {
|
|
14
|
+
for (const v of Object.values(obj))
|
|
15
|
+
leafStrings(v, out);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
export function collectSecretValues(opts = {}) {
|
|
19
|
+
const env = opts.env ?? {};
|
|
20
|
+
const values = [];
|
|
21
|
+
// All string leaf values from the secrets object (recurse).
|
|
22
|
+
leafStrings(opts.secrets, values);
|
|
23
|
+
// Plus all env values whose key starts with SECRET_.
|
|
24
|
+
for (const [k, v] of Object.entries(env)) {
|
|
25
|
+
if (k.startsWith("SECRET_") && typeof v === "string")
|
|
26
|
+
values.push(v);
|
|
27
|
+
}
|
|
28
|
+
// Guard against over-redaction: only scrub values long enough to be real secrets; dedupe.
|
|
29
|
+
return [...new Set(values.filter((v) => v.length >= 4))];
|
|
30
|
+
}
|
|
31
|
+
export function redactString(text, values) {
|
|
32
|
+
let out = text;
|
|
33
|
+
for (const v of values)
|
|
34
|
+
if (v)
|
|
35
|
+
out = out.split(v).join("***");
|
|
36
|
+
return out;
|
|
37
|
+
}
|
|
38
|
+
export function redactSecrets(obj, secretValues) {
|
|
39
|
+
if (secretValues.length === 0)
|
|
40
|
+
return obj;
|
|
41
|
+
let json = JSON.stringify(obj);
|
|
42
|
+
for (const s of secretValues) {
|
|
43
|
+
json = json.split(s).join("***");
|
|
44
|
+
}
|
|
45
|
+
return JSON.parse(json);
|
|
46
|
+
}
|