ccqa 0.3.4 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -1
- package/dist/bin/ccqa.mjs +1130 -180
- package/dist/package.json +1 -1
- package/dist/runtime/test-helpers.mjs +29 -7
- package/package.json +1 -1
package/dist/bin/ccqa.mjs
CHANGED
|
@@ -8,6 +8,7 @@ import { delimiter, dirname, join, resolve } from "node:path";
|
|
|
8
8
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
9
9
|
import matter from "gray-matter";
|
|
10
10
|
import { spawn } from "node:child_process";
|
|
11
|
+
import { createInterface } from "node:readline";
|
|
11
12
|
import { tmpdir } from "node:os";
|
|
12
13
|
//#region src/prompts/trace.ts
|
|
13
14
|
function generateSessionName() {
|
|
@@ -285,14 +286,17 @@ const STEP_ICONS = {
|
|
|
285
286
|
function header(command, target) {
|
|
286
287
|
process.stdout.write(`\nccqa ${command}${target ? ` ${target}` : ""}\n\n`);
|
|
287
288
|
}
|
|
289
|
+
function write(scope, message, sink = process.stdout) {
|
|
290
|
+
sink.write(`[${scope}] ${message}\n`);
|
|
291
|
+
}
|
|
288
292
|
function meta(key, value) {
|
|
289
|
-
|
|
293
|
+
write("meta", `${key}: ${value}`);
|
|
290
294
|
}
|
|
291
295
|
function blank() {
|
|
292
296
|
process.stdout.write("\n");
|
|
293
297
|
}
|
|
294
298
|
function info(message) {
|
|
295
|
-
|
|
299
|
+
write("info", message);
|
|
296
300
|
}
|
|
297
301
|
function step(type, stepId, detail) {
|
|
298
302
|
process.stdout.write(` ${STEP_ICONS[type]} [${stepId}] ${detail}\n`);
|
|
@@ -301,13 +305,37 @@ function bash(command) {
|
|
|
301
305
|
process.stdout.write(` $ ${command.slice(0, 120)}\n`);
|
|
302
306
|
}
|
|
303
307
|
function error(message) {
|
|
304
|
-
|
|
308
|
+
write("error", message, process.stderr);
|
|
305
309
|
}
|
|
306
310
|
function warn(message) {
|
|
307
|
-
|
|
311
|
+
write("warn", message, process.stderr);
|
|
308
312
|
}
|
|
309
313
|
function hint(message) {
|
|
310
|
-
process.stdout.write(
|
|
314
|
+
process.stdout.write("\n");
|
|
315
|
+
write("hint", message);
|
|
316
|
+
}
|
|
317
|
+
function fix(message) {
|
|
318
|
+
write("fix", message);
|
|
319
|
+
}
|
|
320
|
+
function run(message) {
|
|
321
|
+
write("run", message);
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Time a long-running step under the given scope, emitting `started` and
|
|
325
|
+
* `finished in N.Ns` markers. Scope must be a tag the user wants to grep
|
|
326
|
+
* for — typically "run" for vitest and "fix" for diagnose-loop steps.
|
|
327
|
+
*/
|
|
328
|
+
async function timedPhase(label, fn, scope = "fix") {
|
|
329
|
+
const startedAt = Date.now();
|
|
330
|
+
write(scope, `${label} started`);
|
|
331
|
+
try {
|
|
332
|
+
const result = await fn();
|
|
333
|
+
write(scope, `${label} finished in ${((Date.now() - startedAt) / 1e3).toFixed(1)}s`);
|
|
334
|
+
return result;
|
|
335
|
+
} catch (err) {
|
|
336
|
+
write(scope, `${label} threw after ${((Date.now() - startedAt) / 1e3).toFixed(1)}s`);
|
|
337
|
+
throw err;
|
|
338
|
+
}
|
|
311
339
|
}
|
|
312
340
|
//#endregion
|
|
313
341
|
//#region src/claude/invoke.ts
|
|
@@ -488,13 +516,27 @@ const CCQA_DIR = ".ccqa";
|
|
|
488
516
|
function getCcqaDir(cwd = process.cwd()) {
|
|
489
517
|
return join(cwd, CCQA_DIR);
|
|
490
518
|
}
|
|
519
|
+
/**
|
|
520
|
+
* Accepts both the canonical 2-segment alias and the on-disk 4-segment path
|
|
521
|
+
* (which is what shell tab-completion produces):
|
|
522
|
+
* - "tasks/create-and-complete"
|
|
523
|
+
* - "features/tasks/test-cases/create-and-complete"
|
|
524
|
+
* - ".ccqa/features/tasks/test-cases/create-and-complete"
|
|
525
|
+
* All forms resolve to { featureName: "tasks", specName: "create-and-complete" }.
|
|
526
|
+
* Trailing slashes are tolerated.
|
|
527
|
+
*/
|
|
491
528
|
function parseSpecPath(specPath) {
|
|
492
|
-
const parts = specPath.split("/");
|
|
493
|
-
if (parts
|
|
494
|
-
return {
|
|
529
|
+
const parts = specPath.replace(/^\.\/+/, "").replace(/\/+$/, "").split("/").filter((p) => p.length > 0);
|
|
530
|
+
if (parts[0] === ".ccqa") parts.shift();
|
|
531
|
+
if (parts.length === 4 && parts[0] === "features" && parts[2] === "test-cases") return {
|
|
532
|
+
featureName: parts[1],
|
|
533
|
+
specName: parts[3]
|
|
534
|
+
};
|
|
535
|
+
if (parts.length === 2 && parts[0] && parts[1]) return {
|
|
495
536
|
featureName: parts[0],
|
|
496
537
|
specName: parts[1]
|
|
497
538
|
};
|
|
539
|
+
throw new Error(`Invalid spec path: "${specPath}". Expected "<feature>/<spec>" or "features/<feature>/test-cases/<spec>".`);
|
|
498
540
|
}
|
|
499
541
|
function getFeatureDir(featureName, cwd) {
|
|
500
542
|
return join(getCcqaDir(cwd), "features", featureName);
|
|
@@ -706,10 +748,10 @@ function bundledVitestConfigPath() {
|
|
|
706
748
|
}
|
|
707
749
|
//#endregion
|
|
708
750
|
//#region src/runtime/spawn-vitest.ts
|
|
709
|
-
const require$
|
|
751
|
+
const require$2 = createRequire(import.meta.url);
|
|
710
752
|
function resolveVitestBin() {
|
|
711
|
-
const pkgPath = require$
|
|
712
|
-
const pkg = require$
|
|
753
|
+
const pkgPath = require$2.resolve("vitest/package.json");
|
|
754
|
+
const pkg = require$2(pkgPath);
|
|
713
755
|
const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
|
|
714
756
|
if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
|
|
715
757
|
return resolve(dirname(pkgPath), binRel);
|
|
@@ -727,6 +769,19 @@ async function spawnVitestCaptured(args, opts = {}) {
|
|
|
727
769
|
stderr
|
|
728
770
|
};
|
|
729
771
|
}
|
|
772
|
+
async function spawnVitestTeed(args, opts = {}) {
|
|
773
|
+
const child = spawnVitestChild(args, opts, "pipe");
|
|
774
|
+
const [stdout, stderr, exitCode] = await Promise.all([
|
|
775
|
+
teeDrain(child.stdout, process.stdout),
|
|
776
|
+
teeDrain(child.stderr, process.stderr),
|
|
777
|
+
waitExit(child)
|
|
778
|
+
]);
|
|
779
|
+
return {
|
|
780
|
+
exitCode,
|
|
781
|
+
stdout,
|
|
782
|
+
stderr
|
|
783
|
+
};
|
|
784
|
+
}
|
|
730
785
|
function spawnVitestStreaming(args, opts = {}) {
|
|
731
786
|
const child = spawnVitestChild(args, opts, "pipe");
|
|
732
787
|
return {
|
|
@@ -754,6 +809,15 @@ async function drain(stream) {
|
|
|
754
809
|
for await (const chunk of stream) buf += chunk;
|
|
755
810
|
return buf;
|
|
756
811
|
}
|
|
812
|
+
async function teeDrain(stream, sink) {
|
|
813
|
+
stream.setEncoding("utf8");
|
|
814
|
+
let buf = "";
|
|
815
|
+
for await (const chunk of stream) {
|
|
816
|
+
buf += chunk;
|
|
817
|
+
sink.write(chunk);
|
|
818
|
+
}
|
|
819
|
+
return buf;
|
|
820
|
+
}
|
|
757
821
|
function waitExit(child) {
|
|
758
822
|
return new Promise((resolvePromise, rejectPromise) => {
|
|
759
823
|
child.once("exit", (code) => resolvePromise(code ?? 0));
|
|
@@ -762,7 +826,7 @@ function waitExit(child) {
|
|
|
762
826
|
}
|
|
763
827
|
//#endregion
|
|
764
828
|
//#region src/runtime/agent-browser-bin.ts
|
|
765
|
-
const require = createRequire(import.meta.url);
|
|
829
|
+
const require$1 = createRequire(import.meta.url);
|
|
766
830
|
/**
|
|
767
831
|
* Resolves the directory containing the `agent-browser` shim that npm/pnpm
|
|
768
832
|
* exposes on PATH for the peer-installed package. Used by `ccqa trace` to
|
|
@@ -774,7 +838,7 @@ const require = createRequire(import.meta.url);
|
|
|
774
838
|
function resolveAgentBrowserBinDir() {
|
|
775
839
|
let pkgJsonPath;
|
|
776
840
|
try {
|
|
777
|
-
pkgJsonPath = require.resolve("agent-browser/package.json");
|
|
841
|
+
pkgJsonPath = require$1.resolve("agent-browser/package.json");
|
|
778
842
|
} catch {
|
|
779
843
|
return null;
|
|
780
844
|
}
|
|
@@ -793,8 +857,58 @@ function pathWithAgentBrowserShim(currentPath) {
|
|
|
793
857
|
return dir + delimiter + path;
|
|
794
858
|
}
|
|
795
859
|
//#endregion
|
|
860
|
+
//#region src/runtime/env-vars.ts
|
|
861
|
+
const ENV_VAR_RE = /\$\{([A-Z_][A-Z0-9_]*)\}|\$([A-Z_][A-Z0-9_]*)/g;
|
|
862
|
+
/**
|
|
863
|
+
* Returns true if the value contains at least one `$VAR` or `${VAR}` reference.
|
|
864
|
+
*/
|
|
865
|
+
function hasEnvRef(value) {
|
|
866
|
+
ENV_VAR_RE.lastIndex = 0;
|
|
867
|
+
return ENV_VAR_RE.test(value);
|
|
868
|
+
}
|
|
869
|
+
/**
|
|
870
|
+
* Resolve every `$VAR` / `${VAR}` reference against the current process env.
|
|
871
|
+
*
|
|
872
|
+
* Missing variables expand to the empty string, mirroring `sh` behaviour.
|
|
873
|
+
* Throwing would force ccqa to be invoked with every var set even for
|
|
874
|
+
* unused setups, which is more user-hostile than letting the test fail
|
|
875
|
+
* downstream with a clearer message ("login form rejected: empty password").
|
|
876
|
+
*/
|
|
877
|
+
function resolveEnvRefs(value) {
|
|
878
|
+
return value.replace(ENV_VAR_RE, (_, braced, plain) => {
|
|
879
|
+
const name = braced ?? plain ?? "";
|
|
880
|
+
return process.env[name] ?? "";
|
|
881
|
+
});
|
|
882
|
+
}
|
|
883
|
+
/**
|
|
884
|
+
* Embed `$VAR` / `${VAR}` as a JS template-literal expression that reads
|
|
885
|
+
* `process.env.VAR ?? ""` at runtime. Used by `ccqa generate` so the test
|
|
886
|
+
* script never bakes in the secret value.
|
|
887
|
+
*
|
|
888
|
+
* Returns a JavaScript string-literal expression (template literal when env
|
|
889
|
+
* refs are present, plain string literal otherwise).
|
|
890
|
+
*
|
|
891
|
+
* Examples:
|
|
892
|
+
* "${PASSWORD}" -> '`${process.env.PASSWORD ?? ""}`'
|
|
893
|
+
* "user-${SUFFIX}@x.com" -> '`user-${process.env.SUFFIX ?? ""}@x.com`'
|
|
894
|
+
* "literal value" -> '"literal value"'
|
|
895
|
+
*/
|
|
896
|
+
function envRefsToJsExpression(value) {
|
|
897
|
+
if (!hasEnvRef(value)) return JSON.stringify(value);
|
|
898
|
+
const escaped = value.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, (match, offset, source) => {
|
|
899
|
+
ENV_VAR_RE.lastIndex = 0;
|
|
900
|
+
let m;
|
|
901
|
+
while ((m = ENV_VAR_RE.exec(source)) !== null) if (m.index === offset) return "${";
|
|
902
|
+
return "\\${";
|
|
903
|
+
});
|
|
904
|
+
ENV_VAR_RE.lastIndex = 0;
|
|
905
|
+
return `\`${escaped.replace(ENV_VAR_RE, (_, braced, plain) => {
|
|
906
|
+
return `\${process.env.${braced ?? plain ?? ""} ?? ""}`;
|
|
907
|
+
})}\``;
|
|
908
|
+
}
|
|
909
|
+
//#endregion
|
|
796
910
|
//#region src/cli/trace.ts
|
|
797
|
-
const traceCommand = new Command("trace").argument("<feature/spec>", "Spec
|
|
911
|
+
const traceCommand = new Command("trace").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Run agent-browser, verify assertions, and record structured actions").action(async (specPath) => {
|
|
798
912
|
const { featureName, specName } = parseSpecPath(specPath);
|
|
799
913
|
await runTrace(featureName, specName);
|
|
800
914
|
});
|
|
@@ -892,8 +1006,8 @@ async function runSetups(setups, sessionName) {
|
|
|
892
1006
|
let script = await readFile(scriptPath, "utf-8").catch(() => {
|
|
893
1007
|
throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
|
|
894
1008
|
});
|
|
895
|
-
for (const [key, value] of Object.entries(ref.params ?? {})) script = script.replaceAll(`{{${key}}}`, value);
|
|
896
|
-
script = script.replace(/process\.env\.AGENT_BROWSER_SESSION\s
|
|
1009
|
+
for (const [key, value] of Object.entries(ref.params ?? {})) script = script.replaceAll(`{{${key}}}`, resolveEnvRefs(value));
|
|
1010
|
+
script = script.replace(/process\.env\.AGENT_BROWSER_SESSION\s*\|?\|?=\s*`.+`;/, `process.env.AGENT_BROWSER_SESSION = ${JSON.stringify(sessionName)};`);
|
|
897
1011
|
const tmpPath = join(getSetupDir(ref.name), `_run.spec.ts`);
|
|
898
1012
|
await writeFile(tmpPath, script, "utf-8");
|
|
899
1013
|
try {
|
|
@@ -1012,8 +1126,10 @@ function actionsToScript(actions, title, setupScripts) {
|
|
|
1012
1126
|
`import { spawnSync } from "node:child_process";`,
|
|
1013
1127
|
`import { ab, abWait, abAssertTextVisible, abAssertVisible, abAssertNotVisible, abAssertUrl, abAssertEnabled, abAssertDisabled, abAssertChecked, abAssertUnchecked } from "ccqa/test-helpers";`,
|
|
1014
1128
|
"",
|
|
1015
|
-
`// Single session shared across all tests — reset per run via cookies clear in first test
|
|
1016
|
-
|
|
1129
|
+
`// Single session shared across all tests — reset per run via cookies clear in first test.`,
|
|
1130
|
+
`// Use ||= so an outer harness (e.g. ccqa generate's auto-fix loop) can pre-set the session`,
|
|
1131
|
+
`// name and inspect the same session after the run finishes.`,
|
|
1132
|
+
`process.env.AGENT_BROWSER_SESSION ||= \`ccqa-run-\${Date.now()}\`;`,
|
|
1017
1133
|
""
|
|
1018
1134
|
]];
|
|
1019
1135
|
if (setupScripts?.length) for (const setup of setupScripts) parts.push(`test("setup: ${setup.name}", () => {`, setup.body, "}, 3 * 60 * 1000);", "");
|
|
@@ -1118,43 +1234,6 @@ function actionToLine(action) {
|
|
|
1118
1234
|
const j = (s) => JSON.stringify(s);
|
|
1119
1235
|
//#endregion
|
|
1120
1236
|
//#region src/prompts/codegen.ts
|
|
1121
|
-
function buildAutoFixPrompt(script, failureLog) {
|
|
1122
|
-
return `You are analyzing a failing E2E test script. The test fails because some browser actions execute before the page has finished loading or navigating.
|
|
1123
|
-
|
|
1124
|
-
Your task: identify which line numbers need a sleep/wait inserted BEFORE them to fix timing issues.
|
|
1125
|
-
|
|
1126
|
-
## Rules
|
|
1127
|
-
- ONLY identify lines where a sleep is needed — do NOT suggest any other changes
|
|
1128
|
-
- Common patterns that need a sleep:
|
|
1129
|
-
- After \`ab("open", ...)\` when the next line interacts with elements (fill, click, etc.)
|
|
1130
|
-
- After \`ab("press", "Enter")\` or \`ab("click", ...)\` when a page navigation occurs before the next action
|
|
1131
|
-
- After any action that triggers a redirect or page reload
|
|
1132
|
-
- Look at the error log to identify WHICH lines failed, then determine if a sleep before that line would fix it
|
|
1133
|
-
- If a \`spawnSync("sleep", ...)\` already exists before a failing line, suggest increasing its duration instead
|
|
1134
|
-
- Output ONLY a JSON array of objects, no explanation, no markdown code fences
|
|
1135
|
-
|
|
1136
|
-
## Output format
|
|
1137
|
-
Each object has:
|
|
1138
|
-
- "line": the 1-based line number to insert a sleep BEFORE
|
|
1139
|
-
- "seconds": recommended sleep duration (typically 3-5)
|
|
1140
|
-
- "reason": very short explanation (e.g., "page navigation after form submit")
|
|
1141
|
-
|
|
1142
|
-
If a sleep already exists and needs to be increased:
|
|
1143
|
-
- "line": the line number of the existing sleep
|
|
1144
|
-
- "increase_to": the new duration in seconds
|
|
1145
|
-
- "reason": explanation
|
|
1146
|
-
|
|
1147
|
-
Example output:
|
|
1148
|
-
[{"line": 15, "seconds": 3, "reason": "page navigation after press Enter"}, {"line": 22, "increase_to": 5, "reason": "slow page load"}]
|
|
1149
|
-
|
|
1150
|
-
If no fixes are needed, return: []
|
|
1151
|
-
|
|
1152
|
-
## Test Script (with line numbers)
|
|
1153
|
-
${script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n")}
|
|
1154
|
-
|
|
1155
|
-
## Failure Log
|
|
1156
|
-
${failureLog.slice(0, 3e3)}`;
|
|
1157
|
-
}
|
|
1158
1237
|
function buildCleanupPrompt(actions) {
|
|
1159
1238
|
return `You are given a list of browser actions recorded during an E2E test trace.
|
|
1160
1239
|
The trace contains noise: failed attempts, redundant retries, and duplicate operations recorded because the agent explored multiple strategies.
|
|
@@ -1185,54 +1264,890 @@ ${actions.map((a, i) => {
|
|
|
1185
1264
|
}).join("\n")}`;
|
|
1186
1265
|
}
|
|
1187
1266
|
//#endregion
|
|
1267
|
+
//#region src/diagnose/apply.ts
|
|
1268
|
+
function applyDiagnosis(script, diagnosis) {
|
|
1269
|
+
switch (diagnosis.type) {
|
|
1270
|
+
case "TIMING_ISSUE": return applyTiming(script, diagnosis.fixes);
|
|
1271
|
+
case "OVER_ASSERTION": return applyOverAssertion(script, diagnosis.lines);
|
|
1272
|
+
case "SELECTOR_DRIFT": return applySelectorDrift(script, diagnosis.line, diagnosis.oldSelector, diagnosis.newSelector);
|
|
1273
|
+
case "DATA_MISSING": return {
|
|
1274
|
+
applied: false,
|
|
1275
|
+
reason: `data missing — ${diagnosis.reason}`
|
|
1276
|
+
};
|
|
1277
|
+
case "UNKNOWN": return {
|
|
1278
|
+
applied: false,
|
|
1279
|
+
reason: `unknown failure — ${diagnosis.reason}`
|
|
1280
|
+
};
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
function applyTiming(script, fixes) {
|
|
1284
|
+
if (fixes.length === 0) return {
|
|
1285
|
+
applied: false,
|
|
1286
|
+
reason: "no timing fixes proposed"
|
|
1287
|
+
};
|
|
1288
|
+
const lines = script.split("\n");
|
|
1289
|
+
const summary = [];
|
|
1290
|
+
for (const fix of fixes) if (fix.kind === "increase") {
|
|
1291
|
+
const idx = fix.line - 1;
|
|
1292
|
+
if (idx < 0 || idx >= lines.length) continue;
|
|
1293
|
+
const original = lines[idx];
|
|
1294
|
+
const replaced = original.replace(/spawnSync\("sleep",\s*\["\d+"\]/, `spawnSync("sleep", ["${fix.increase_to}"]`);
|
|
1295
|
+
if (replaced !== original) {
|
|
1296
|
+
lines[idx] = replaced;
|
|
1297
|
+
summary.push(`line ${fix.line}: sleep → ${fix.increase_to}s`);
|
|
1298
|
+
}
|
|
1299
|
+
}
|
|
1300
|
+
const inserts = fixes.filter((f) => f.kind === "insert").sort((a, b) => b.line - a.line);
|
|
1301
|
+
for (const fix of inserts) {
|
|
1302
|
+
const idx = fix.line - 1;
|
|
1303
|
+
if (idx < 0 || idx > lines.length) continue;
|
|
1304
|
+
lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
|
|
1305
|
+
summary.push(`line ${fix.line}: insert sleep ${fix.seconds}s`);
|
|
1306
|
+
}
|
|
1307
|
+
if (summary.length === 0) return {
|
|
1308
|
+
applied: false,
|
|
1309
|
+
reason: "timing fixes pointed at out-of-range lines"
|
|
1310
|
+
};
|
|
1311
|
+
return {
|
|
1312
|
+
applied: true,
|
|
1313
|
+
script: lines.join("\n"),
|
|
1314
|
+
summary: summary.join("; ")
|
|
1315
|
+
};
|
|
1316
|
+
}
|
|
1317
|
+
function applyOverAssertion(script, lineNumbers) {
|
|
1318
|
+
if (lineNumbers.length === 0) return {
|
|
1319
|
+
applied: false,
|
|
1320
|
+
reason: "no lines to remove"
|
|
1321
|
+
};
|
|
1322
|
+
const lines = script.split("\n");
|
|
1323
|
+
const targets = [...new Set(lineNumbers)].sort((a, b) => b - a);
|
|
1324
|
+
const removed = [];
|
|
1325
|
+
for (const line of targets) {
|
|
1326
|
+
const idx = line - 1;
|
|
1327
|
+
if (idx < 0 || idx >= lines.length) continue;
|
|
1328
|
+
const content = lines[idx];
|
|
1329
|
+
if (!/abAssert/.test(content)) continue;
|
|
1330
|
+
removed.push(`line ${line}: ${content.trim()}`);
|
|
1331
|
+
lines.splice(idx, 1);
|
|
1332
|
+
}
|
|
1333
|
+
if (removed.length === 0) return {
|
|
1334
|
+
applied: false,
|
|
1335
|
+
reason: "no abAssert lines matched the proposed line numbers"
|
|
1336
|
+
};
|
|
1337
|
+
return {
|
|
1338
|
+
applied: true,
|
|
1339
|
+
script: lines.join("\n"),
|
|
1340
|
+
summary: `removed ${removed.length} assertion(s)`
|
|
1341
|
+
};
|
|
1342
|
+
}
|
|
1343
|
+
function applySelectorDrift(script, line, oldSelector, newSelector) {
|
|
1344
|
+
const lines = script.split("\n");
|
|
1345
|
+
const idx = line - 1;
|
|
1346
|
+
if (idx < 0 || idx >= lines.length) return {
|
|
1347
|
+
applied: false,
|
|
1348
|
+
reason: `line ${line} out of range`
|
|
1349
|
+
};
|
|
1350
|
+
const content = lines[idx];
|
|
1351
|
+
if (!content.includes(oldSelector)) return {
|
|
1352
|
+
applied: false,
|
|
1353
|
+
reason: `oldSelector not found on line ${line}`
|
|
1354
|
+
};
|
|
1355
|
+
lines[idx] = content.replaceAll(oldSelector, newSelector);
|
|
1356
|
+
return {
|
|
1357
|
+
applied: true,
|
|
1358
|
+
script: lines.join("\n"),
|
|
1359
|
+
summary: `line ${line}: "${oldSelector}" → "${newSelector}"`
|
|
1360
|
+
};
|
|
1361
|
+
}
|
|
1362
|
+
/**
|
|
1363
|
+
* Build a unified-style diff snippet for showing the user what would change.
|
|
1364
|
+
* Just the changed lines with -/+ prefixes; not a real patch.
|
|
1365
|
+
*/
|
|
1366
|
+
function previewDiff(before, after) {
|
|
1367
|
+
const a = before.split("\n");
|
|
1368
|
+
const b = after.split("\n");
|
|
1369
|
+
const out = [];
|
|
1370
|
+
const max = Math.max(a.length, b.length);
|
|
1371
|
+
for (let i = 0; i < max; i++) {
|
|
1372
|
+
if (a[i] === b[i]) continue;
|
|
1373
|
+
if (a[i] !== void 0) out.push(`- ${a[i]}`);
|
|
1374
|
+
if (b[i] !== void 0) out.push(`+ ${b[i]}`);
|
|
1375
|
+
}
|
|
1376
|
+
return out.join("\n");
|
|
1377
|
+
}
|
|
1378
|
+
//#endregion
|
|
1379
|
+
//#region src/diagnose/prompt.ts
|
|
1380
|
+
function buildDiagnosePrompt(input) {
|
|
1381
|
+
const { script, specMarkdown, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
|
|
1382
|
+
const numbered = script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n");
|
|
1383
|
+
return `You are diagnosing a failing E2E test. The test was generated from a recorded trace of the original interaction. Compare the failing run against the original spec and recorded actions to determine WHY the test failed and what the right fix is.
|
|
1384
|
+
|
|
1385
|
+
## Output language
|
|
1386
|
+
|
|
1387
|
+
Write all human-readable fields (\`reasoning\`, \`reason\`) in **${outputLanguage}** (BCP-47 tag).
|
|
1388
|
+
Selectors, file paths, identifiers, code, type names (TIMING_ISSUE, etc.), JSON keys, and quoted strings stay verbatim regardless of language.
|
|
1389
|
+
|
|
1390
|
+
## You have read-only filesystem tools
|
|
1391
|
+
|
|
1392
|
+
You can call \`Grep\`, \`Glob\`, and \`Read\` against the current repository before producing the JSON.
|
|
1393
|
+
|
|
1394
|
+
For SELECTOR_DRIFT specifically the failure log is usually NOT enough on its own — the runner only reports "selector X not visible". To confirm a rename, search the application source for the *type* of selector that's failing:
|
|
1395
|
+
|
|
1396
|
+
- For \`[aria-label='OLD']\` failures: \`Grep\` for \`aria-label=\` (or i18n key \`OLD\`) in the app source. If you find a near-miss like \`aria-label="NEW"\` whose text is a superset/rephrase of the failing label, that is your evidence.
|
|
1397
|
+
- For \`[placeholder='OLD']\` failures: \`Grep\` for \`placeholder=\`.
|
|
1398
|
+
- For \`[role='OLD']\` or \`[data-testid='OLD']\`: same pattern.
|
|
1399
|
+
- For \`text=OLD\` failures: \`Grep\` the source / i18n bundles for \`OLD\`. Locale files (\`*.json\`, \`*.yml\`, \`messages.ts\`, etc.) often hold the canonical strings.
|
|
1400
|
+
|
|
1401
|
+
You have **up to 10 tool turns**. Spend them on grep/read; do not loop. Only when you have concrete file:line evidence should you emit SELECTOR_DRIFT — otherwise prefer UNKNOWN with confidence < 0.4 and let the human decide.
|
|
1402
|
+
|
|
1403
|
+
Do NOT attempt to write, edit, run shell commands, or hit the network. Only Grep/Glob/Read.
|
|
1404
|
+
|
|
1405
|
+
## Diagnosis categories
|
|
1406
|
+
|
|
1407
|
+
Pick exactly ONE category. The output JSON must follow the shape for that category.
|
|
1408
|
+
|
|
1409
|
+
1. TIMING_ISSUE — element not yet present because the page hasn't loaded / navigated. Fix by inserting or extending sleeps.
|
|
1410
|
+
{
|
|
1411
|
+
"diagnosis": {
|
|
1412
|
+
"type": "TIMING_ISSUE",
|
|
1413
|
+
"fixes": [
|
|
1414
|
+
{ "kind": "insert", "line": <1-based>, "seconds": <int>, "reason": "<short>" },
|
|
1415
|
+
{ "kind": "increase", "line": <1-based of existing sleep>, "increase_to": <int>, "reason": "<short>" }
|
|
1416
|
+
]
|
|
1417
|
+
},
|
|
1418
|
+
"confidence": <0.0-1.0>,
|
|
1419
|
+
"reasoning": "<why timing is the cause>"
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
2. OVER_ASSERTION — the test is asserting something the spec never required, OR a recorded assertion that is environment-dependent (e.g. a placeholder text that varies). The right fix is to remove those lines from the test.
|
|
1423
|
+
{
|
|
1424
|
+
"diagnosis": {
|
|
1425
|
+
"type": "OVER_ASSERTION",
|
|
1426
|
+
"lines": [<1-based line numbers to remove>],
|
|
1427
|
+
"reason": "<short>"
|
|
1428
|
+
},
|
|
1429
|
+
"confidence": <0.0-1.0>,
|
|
1430
|
+
"reasoning": "<why this assertion isn't required by the spec>"
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1433
|
+
3. SELECTOR_DRIFT — the page is healthy but a selector has been renamed/refined since the trace was recorded. The failure log will typically contain a snapshot showing the new selector. ONLY use this when you can name the exact replacement selector.
|
|
1434
|
+
{
|
|
1435
|
+
"diagnosis": {
|
|
1436
|
+
"type": "SELECTOR_DRIFT",
|
|
1437
|
+
"line": <1-based>,
|
|
1438
|
+
"oldSelector": "<exact string in current line>",
|
|
1439
|
+
"newSelector": "<exact replacement>",
|
|
1440
|
+
"reason": "<short>"
|
|
1441
|
+
},
|
|
1442
|
+
"confidence": <0.0-1.0>,
|
|
1443
|
+
"reasoning": "<evidence from failure log>"
|
|
1444
|
+
}
|
|
1445
|
+
|
|
1446
|
+
4. DATA_MISSING — the test depends on data (a record, a setup, a logged-in state) that no longer exists. Not auto-fixable; the human must reseed or update the spec.
|
|
1447
|
+
{
|
|
1448
|
+
"diagnosis": { "type": "DATA_MISSING", "reason": "<what is missing>" },
|
|
1449
|
+
"confidence": <0.0-1.0>,
|
|
1450
|
+
"reasoning": "<evidence>"
|
|
1451
|
+
}
|
|
1452
|
+
|
|
1453
|
+
5. UNKNOWN — none of the above fit, or evidence is too weak to choose.
|
|
1454
|
+
{
|
|
1455
|
+
"diagnosis": { "type": "UNKNOWN", "reason": "<short>" },
|
|
1456
|
+
"confidence": <0.0-1.0>,
|
|
1457
|
+
"reasoning": "<what you saw and why you can't classify>"
|
|
1458
|
+
}
|
|
1459
|
+
|
|
1460
|
+
## Confidence guidance
|
|
1461
|
+
|
|
1462
|
+
- 0.9-1.0: failure log directly shows the cause (e.g. "selector X not found, snapshot lists Y" → SELECTOR_DRIFT)
|
|
1463
|
+
- 0.7-0.9: strong indirect evidence (e.g. timing pattern after navigation, or assertion text that doesn't appear in spec)
|
|
1464
|
+
- 0.4-0.7: plausible classification but multiple categories could explain it
|
|
1465
|
+
- < 0.4: prefer UNKNOWN over guessing
|
|
1466
|
+
|
|
1467
|
+
## Rules
|
|
1468
|
+
|
|
1469
|
+
- Your **final** assistant message must start with \`{\` and end with \`}\` — a single JSON object, nothing before or after. No prose preamble like "Confirmed: ...", no markdown fences, no commentary, no tool calls in the same turn. If you have an analysis sentence, put it in the \`reasoning\` field.
|
|
1470
|
+
- Line numbers refer to the numbered test script below (1-based).
|
|
1471
|
+
- For SELECTOR_DRIFT, \`oldSelector\` must match a substring of the script at that line; \`newSelector\` must be backed by a concrete file:line you read with Grep/Read (do not invent). Cite the evidence in \`reasoning\`.
|
|
1472
|
+
- For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`).
|
|
1473
|
+
- Cross-check assertions against the spec markdown. If the spec doesn't require the assertion, OVER_ASSERTION is the better diagnosis than SELECTOR_DRIFT.
|
|
1474
|
+
|
|
1475
|
+
## Test Spec (test-spec.md)
|
|
1476
|
+
${specMarkdown}
|
|
1477
|
+
|
|
1478
|
+
## Recorded Actions (actions.json summary)
|
|
1479
|
+
${actions.map((a, i) => {
|
|
1480
|
+
const parts = [`${i + 1}. ${a.command}`];
|
|
1481
|
+
if (a.assertType) parts.push(`assertType="${a.assertType}"`);
|
|
1482
|
+
if (a.selector) parts.push(`selector="${a.selector}"`);
|
|
1483
|
+
if (a.value) parts.push(`value="${a.value}"`);
|
|
1484
|
+
if (a.observation) parts.push(`→ ${a.observation}`);
|
|
1485
|
+
return parts.join(" ");
|
|
1486
|
+
}).join("\n")}
|
|
1487
|
+
|
|
1488
|
+
## Test Script (with line numbers)
|
|
1489
|
+
${numbered}
|
|
1490
|
+
|
|
1491
|
+
## Failure Log
|
|
1492
|
+
${failureLog.slice(0, 4e3)}${pageSnapshot ? formatPageSnapshot(pageSnapshot) : ""}`;
|
|
1493
|
+
}
|
|
1494
|
+
/**
|
|
1495
|
+
* Page snapshot captured by ccqa right after the failure (agent-browser
|
|
1496
|
+
* accessibility tree). When present, it usually decides SELECTOR_DRIFT vs
|
|
1497
|
+
* TIMING_ISSUE: a near-miss aria-label / role / placeholder in the
|
|
1498
|
+
* snapshot is direct evidence of a rename, while a tree that doesn't
|
|
1499
|
+
* contain the failing locator at all (without a near-miss) points to a
|
|
1500
|
+
* still-loading page or genuinely missing element.
|
|
1501
|
+
*/
|
|
1502
|
+
function formatPageSnapshot(snapshot) {
|
|
1503
|
+
return `
|
|
1504
|
+
|
|
1505
|
+
## Page Snapshot (accessibility tree captured right after the failure)
|
|
1506
|
+
|
|
1507
|
+
This is the live state of the page when the test failed. Prefer this over your own assumptions:
|
|
1508
|
+
|
|
1509
|
+
- If a near-miss of the failing selector appears here (e.g. failing \`[aria-label='A']\` and snapshot contains \`aria-label="A-prime"\`), that is direct evidence of SELECTOR_DRIFT — propose the snapshot's value as \`newSelector\`.
|
|
1510
|
+
- If the failing locator is genuinely absent and no near-miss exists, the page may be still loading (TIMING_ISSUE) or the spec is asserting something not on this page (OVER_ASSERTION / DATA_MISSING).
|
|
1511
|
+
- If the snapshot looks unrelated to the spec (e.g. error page, login wall), DATA_MISSING is likely.
|
|
1512
|
+
|
|
1513
|
+
\`\`\`
|
|
1514
|
+
${snapshot}
|
|
1515
|
+
\`\`\``;
|
|
1516
|
+
}
|
|
1517
|
+
//#endregion
|
|
1518
|
+
//#region src/diagnose/diagnose.ts
|
|
1519
|
+
async function diagnose(input) {
|
|
1520
|
+
const { result: raw, isError } = await invokeClaudeStreaming({
|
|
1521
|
+
prompt: buildDiagnosePrompt(input),
|
|
1522
|
+
allowedTools: [
|
|
1523
|
+
"Read",
|
|
1524
|
+
"Grep",
|
|
1525
|
+
"Glob"
|
|
1526
|
+
],
|
|
1527
|
+
maxTurns: 10
|
|
1528
|
+
}, () => {});
|
|
1529
|
+
if (isError) return {
|
|
1530
|
+
result: null,
|
|
1531
|
+
raw: raw ?? "",
|
|
1532
|
+
sdkError: true
|
|
1533
|
+
};
|
|
1534
|
+
if (!raw) return {
|
|
1535
|
+
result: null,
|
|
1536
|
+
raw: "",
|
|
1537
|
+
sdkError: false
|
|
1538
|
+
};
|
|
1539
|
+
const candidates = extractJsonCandidates(raw);
|
|
1540
|
+
for (const candidate of candidates) {
|
|
1541
|
+
let parsed;
|
|
1542
|
+
try {
|
|
1543
|
+
parsed = JSON.parse(candidate);
|
|
1544
|
+
} catch {
|
|
1545
|
+
continue;
|
|
1546
|
+
}
|
|
1547
|
+
const normalised = normaliseResult(parsed);
|
|
1548
|
+
if (normalised) return {
|
|
1549
|
+
result: normalised,
|
|
1550
|
+
raw,
|
|
1551
|
+
sdkError: false
|
|
1552
|
+
};
|
|
1553
|
+
}
|
|
1554
|
+
return {
|
|
1555
|
+
result: {
|
|
1556
|
+
diagnosis: {
|
|
1557
|
+
type: "UNKNOWN",
|
|
1558
|
+
reason: "diagnose returned no parseable diagnosis JSON"
|
|
1559
|
+
},
|
|
1560
|
+
confidence: 0,
|
|
1561
|
+
reasoning: truncate$1(raw, 1e3)
|
|
1562
|
+
},
|
|
1563
|
+
raw,
|
|
1564
|
+
sdkError: false
|
|
1565
|
+
};
|
|
1566
|
+
}
|
|
1567
|
+
/**
|
|
1568
|
+
* Pull every plausible JSON object out of `raw`. We try, in order:
|
|
1569
|
+
* 1. The whole string with code fences stripped (the prompt asks for
|
|
1570
|
+
* JSON-only, so this is the happy path).
|
|
1571
|
+
* 2. Each balanced `{...}` block found by scanning the text. The model
|
|
1572
|
+
* sometimes prefixes the JSON with a "Confirmed: ..." sentence or
|
|
1573
|
+
* mentions partial JSON in its tool-using reasoning; we want to
|
|
1574
|
+
* try the *last* well-formed object first because it's most likely
|
|
1575
|
+
* the final answer, then earlier ones as a fallback.
|
|
1576
|
+
*
|
|
1577
|
+
* The caller `JSON.parse`s each candidate and stops at the first match
|
|
1578
|
+
* that normalises to a known DiagnosisResult.
|
|
1579
|
+
*/
|
|
1580
|
+
function extractJsonCandidates(raw) {
|
|
1581
|
+
const out = [];
|
|
1582
|
+
const stripped = stripFence(raw);
|
|
1583
|
+
if (stripped) out.push(stripped);
|
|
1584
|
+
const blocks = [];
|
|
1585
|
+
let depth = 0;
|
|
1586
|
+
let start = -1;
|
|
1587
|
+
let inString = false;
|
|
1588
|
+
let escaped = false;
|
|
1589
|
+
for (let i = 0; i < raw.length; i++) {
|
|
1590
|
+
const ch = raw[i];
|
|
1591
|
+
if (inString) {
|
|
1592
|
+
if (escaped) escaped = false;
|
|
1593
|
+
else if (ch === "\\") escaped = true;
|
|
1594
|
+
else if (ch === "\"") inString = false;
|
|
1595
|
+
continue;
|
|
1596
|
+
}
|
|
1597
|
+
if (ch === "\"") {
|
|
1598
|
+
inString = true;
|
|
1599
|
+
continue;
|
|
1600
|
+
}
|
|
1601
|
+
if (ch === "{") {
|
|
1602
|
+
if (depth === 0) start = i;
|
|
1603
|
+
depth++;
|
|
1604
|
+
} else if (ch === "}") {
|
|
1605
|
+
depth--;
|
|
1606
|
+
if (depth === 0 && start >= 0) {
|
|
1607
|
+
blocks.push(raw.slice(start, i + 1));
|
|
1608
|
+
start = -1;
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
}
|
|
1612
|
+
for (let i = blocks.length - 1; i >= 0; i--) {
|
|
1613
|
+
const block = blocks[i];
|
|
1614
|
+
if (!out.includes(block)) out.push(block);
|
|
1615
|
+
}
|
|
1616
|
+
return out;
|
|
1617
|
+
}
|
|
1618
|
+
function truncate$1(s, max) {
|
|
1619
|
+
return s.length <= max ? s : `${s.slice(0, max)}... [truncated, ${s.length - max} more chars]`;
|
|
1620
|
+
}
|
|
1621
|
+
function stripFence(raw) {
|
|
1622
|
+
return raw.trim().replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, "").trim();
|
|
1623
|
+
}
|
|
1624
|
+
function normaliseResult(parsed) {
|
|
1625
|
+
if (!isObject(parsed)) return null;
|
|
1626
|
+
const diagnosis = normaliseDiagnosis(parsed["diagnosis"]);
|
|
1627
|
+
if (!diagnosis) return null;
|
|
1628
|
+
return {
|
|
1629
|
+
diagnosis,
|
|
1630
|
+
confidence: typeof parsed["confidence"] === "number" ? clamp(parsed["confidence"], 0, 1) : 0,
|
|
1631
|
+
reasoning: typeof parsed["reasoning"] === "string" ? parsed["reasoning"] : ""
|
|
1632
|
+
};
|
|
1633
|
+
}
|
|
1634
|
+
function normaliseDiagnosis(raw) {
|
|
1635
|
+
if (!isObject(raw)) return null;
|
|
1636
|
+
switch (raw["type"]) {
|
|
1637
|
+
case "TIMING_ISSUE": {
|
|
1638
|
+
const fixes = normaliseSleepFixes(raw["fixes"]);
|
|
1639
|
+
if (fixes.length === 0) return null;
|
|
1640
|
+
return {
|
|
1641
|
+
type: "TIMING_ISSUE",
|
|
1642
|
+
fixes
|
|
1643
|
+
};
|
|
1644
|
+
}
|
|
1645
|
+
case "OVER_ASSERTION": {
|
|
1646
|
+
const lines = Array.isArray(raw["lines"]) ? raw["lines"].filter((n) => typeof n === "number" && Number.isFinite(n)) : [];
|
|
1647
|
+
if (lines.length === 0) return null;
|
|
1648
|
+
return {
|
|
1649
|
+
type: "OVER_ASSERTION",
|
|
1650
|
+
lines,
|
|
1651
|
+
reason: typeof raw["reason"] === "string" ? raw["reason"] : ""
|
|
1652
|
+
};
|
|
1653
|
+
}
|
|
1654
|
+
case "SELECTOR_DRIFT": {
|
|
1655
|
+
const line = typeof raw["line"] === "number" ? raw["line"] : null;
|
|
1656
|
+
const oldSelector = typeof raw["oldSelector"] === "string" ? raw["oldSelector"] : null;
|
|
1657
|
+
const newSelector = typeof raw["newSelector"] === "string" ? raw["newSelector"] : null;
|
|
1658
|
+
if (line === null || !oldSelector || !newSelector) return null;
|
|
1659
|
+
return {
|
|
1660
|
+
type: "SELECTOR_DRIFT",
|
|
1661
|
+
line,
|
|
1662
|
+
oldSelector,
|
|
1663
|
+
newSelector,
|
|
1664
|
+
reason: typeof raw["reason"] === "string" ? raw["reason"] : ""
|
|
1665
|
+
};
|
|
1666
|
+
}
|
|
1667
|
+
case "DATA_MISSING": return {
|
|
1668
|
+
type: "DATA_MISSING",
|
|
1669
|
+
reason: typeof raw["reason"] === "string" ? raw["reason"] : ""
|
|
1670
|
+
};
|
|
1671
|
+
case "UNKNOWN": return {
|
|
1672
|
+
type: "UNKNOWN",
|
|
1673
|
+
reason: typeof raw["reason"] === "string" ? raw["reason"] : ""
|
|
1674
|
+
};
|
|
1675
|
+
default: return null;
|
|
1676
|
+
}
|
|
1677
|
+
}
|
|
1678
|
+
function normaliseSleepFixes(raw) {
|
|
1679
|
+
if (!Array.isArray(raw)) return [];
|
|
1680
|
+
const out = [];
|
|
1681
|
+
for (const item of raw) {
|
|
1682
|
+
if (!isObject(item)) continue;
|
|
1683
|
+
const line = typeof item["line"] === "number" ? item["line"] : null;
|
|
1684
|
+
if (line === null) continue;
|
|
1685
|
+
const reason = typeof item["reason"] === "string" ? item["reason"] : "";
|
|
1686
|
+
const kind = item["kind"];
|
|
1687
|
+
if (kind === "insert" || typeof item["seconds"] === "number" && item["increase_to"] === void 0) {
|
|
1688
|
+
const seconds = typeof item["seconds"] === "number" ? item["seconds"] : null;
|
|
1689
|
+
if (seconds === null) continue;
|
|
1690
|
+
out.push({
|
|
1691
|
+
kind: "insert",
|
|
1692
|
+
line,
|
|
1693
|
+
seconds,
|
|
1694
|
+
reason
|
|
1695
|
+
});
|
|
1696
|
+
continue;
|
|
1697
|
+
}
|
|
1698
|
+
if (kind === "increase" || typeof item["increase_to"] === "number") {
|
|
1699
|
+
const increaseTo = typeof item["increase_to"] === "number" ? item["increase_to"] : null;
|
|
1700
|
+
if (increaseTo === null) continue;
|
|
1701
|
+
out.push({
|
|
1702
|
+
kind: "increase",
|
|
1703
|
+
line,
|
|
1704
|
+
increase_to: increaseTo,
|
|
1705
|
+
reason
|
|
1706
|
+
});
|
|
1707
|
+
continue;
|
|
1708
|
+
}
|
|
1709
|
+
}
|
|
1710
|
+
return out;
|
|
1711
|
+
}
|
|
1712
|
+
function isObject(v) {
|
|
1713
|
+
return typeof v === "object" && v !== null && !Array.isArray(v);
|
|
1714
|
+
}
|
|
1715
|
+
function clamp(n, lo, hi) {
|
|
1716
|
+
if (n < lo) return lo;
|
|
1717
|
+
if (n > hi) return hi;
|
|
1718
|
+
return n;
|
|
1719
|
+
}
|
|
1720
|
+
//#endregion
|
|
1721
|
+
//#region src/diagnose/interactive.ts
|
|
1722
|
+
async function promptForChoice(input) {
|
|
1723
|
+
printContext(input);
|
|
1724
|
+
const rl = createInterface({
|
|
1725
|
+
input: process.stdin,
|
|
1726
|
+
output: process.stdout
|
|
1727
|
+
});
|
|
1728
|
+
try {
|
|
1729
|
+
while (true) switch ((await question(rl, "[a]pply / [s]kip / [m]anual / [q]uit > ")).trim().toLowerCase()) {
|
|
1730
|
+
case "a":
|
|
1731
|
+
case "apply": return "apply";
|
|
1732
|
+
case "s":
|
|
1733
|
+
case "skip": return "skip";
|
|
1734
|
+
case "m":
|
|
1735
|
+
case "manual": return "manual";
|
|
1736
|
+
case "q":
|
|
1737
|
+
case "quit": return "quit";
|
|
1738
|
+
default: process.stdout.write(" please answer a/s/m/q\n");
|
|
1739
|
+
}
|
|
1740
|
+
} finally {
|
|
1741
|
+
rl.close();
|
|
1742
|
+
}
|
|
1743
|
+
}
|
|
1744
|
+
function question(rl, prompt) {
|
|
1745
|
+
return new Promise((resolve) => rl.question(prompt, resolve));
|
|
1746
|
+
}
|
|
1747
|
+
function printContext({ result, diff, failureExcerpt }) {
|
|
1748
|
+
const { diagnosis, confidence, reasoning } = result;
|
|
1749
|
+
process.stdout.write("\n");
|
|
1750
|
+
process.stdout.write(`[fix] diagnosis: ${diagnosis.type} (confidence ${confidence.toFixed(2)})\n`);
|
|
1751
|
+
if (reasoning) process.stdout.write(`[fix] reasoning: ${reasoning}\n`);
|
|
1752
|
+
for (const line of formatDiagnosisDetail(diagnosis)) process.stdout.write(`[fix] ${line}\n`);
|
|
1753
|
+
if (failureExcerpt) {
|
|
1754
|
+
process.stdout.write("\n[fix] failure excerpt:\n");
|
|
1755
|
+
process.stdout.write(prefixLines(failureExcerpt, "[fix] "));
|
|
1756
|
+
process.stdout.write("\n");
|
|
1757
|
+
}
|
|
1758
|
+
if (diff) {
|
|
1759
|
+
process.stdout.write("\n[fix] proposed fix:\n");
|
|
1760
|
+
process.stdout.write(prefixLines(diff, "[fix] "));
|
|
1761
|
+
process.stdout.write("\n");
|
|
1762
|
+
}
|
|
1763
|
+
process.stdout.write("\n");
|
|
1764
|
+
}
|
|
1765
|
+
function formatDiagnosisDetail(diagnosis) {
|
|
1766
|
+
switch (diagnosis.type) {
|
|
1767
|
+
case "TIMING_ISSUE": return [`fixes: ${diagnosis.fixes.map((f) => f.kind === "insert" ? `insert ${f.seconds}s @ line ${f.line}` : `increase to ${f.increase_to}s @ line ${f.line}`).join(", ")}`];
|
|
1768
|
+
case "OVER_ASSERTION": return [`lines: ${diagnosis.lines.join(", ")}`, `reason: ${diagnosis.reason}`];
|
|
1769
|
+
case "SELECTOR_DRIFT": return [`line ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}"`, `reason: ${diagnosis.reason}`];
|
|
1770
|
+
case "DATA_MISSING":
|
|
1771
|
+
case "UNKNOWN": return [`reason: ${diagnosis.reason}`];
|
|
1772
|
+
}
|
|
1773
|
+
}
|
|
1774
|
+
function prefixLines(text, prefix) {
|
|
1775
|
+
return text.split("\n").map((l) => `${prefix}${l}`).join("\n");
|
|
1776
|
+
}
|
|
1777
|
+
//#endregion
|
|
1778
|
+
//#region src/diagnose/snapshot.ts
|
|
1779
|
+
const require = createRequire(import.meta.url);
|
|
1780
|
+
const SNAPSHOT_TIMEOUT_MS = 1e4;
|
|
1781
|
+
const CLOSE_TIMEOUT_MS = 1e4;
|
|
1782
|
+
const MAX_OUTPUT_BYTES = 6e4;
|
|
1783
|
+
function resolveAgentBrowserBin() {
|
|
1784
|
+
try {
|
|
1785
|
+
return require.resolve("agent-browser/bin/agent-browser.js");
|
|
1786
|
+
} catch {
|
|
1787
|
+
return null;
|
|
1788
|
+
}
|
|
1789
|
+
}
|
|
1790
|
+
/**
|
|
1791
|
+
* Run `agent-browser snapshot` against the session that the failed vitest
|
|
1792
|
+
* run just used, and return its accessibility-tree dump.
|
|
1793
|
+
*
|
|
1794
|
+
* Returns null when agent-browser is missing, the daemon has no live page
|
|
1795
|
+
* for the session, or the call exceeds {@link SNAPSHOT_TIMEOUT_MS}. We
|
|
1796
|
+
* never throw — a missing snapshot just means diagnose has less context.
|
|
1797
|
+
*
|
|
1798
|
+
* The output is truncated to {@link MAX_OUTPUT_BYTES} so the prompt stays
|
|
1799
|
+
* within budget on large pages.
|
|
1800
|
+
*/
|
|
1801
|
+
async function captureSnapshot(sessionName) {
|
|
1802
|
+
const abBin = resolveAgentBrowserBin();
|
|
1803
|
+
if (!abBin) return null;
|
|
1804
|
+
return new Promise((resolve) => {
|
|
1805
|
+
const child = spawn(process.execPath, [abBin, "snapshot"], {
|
|
1806
|
+
env: {
|
|
1807
|
+
...process.env,
|
|
1808
|
+
AGENT_BROWSER_SESSION: sessionName
|
|
1809
|
+
},
|
|
1810
|
+
stdio: [
|
|
1811
|
+
"ignore",
|
|
1812
|
+
"pipe",
|
|
1813
|
+
"pipe"
|
|
1814
|
+
]
|
|
1815
|
+
});
|
|
1816
|
+
let stdout = "";
|
|
1817
|
+
let stderr = "";
|
|
1818
|
+
let timedOut = false;
|
|
1819
|
+
const timer = setTimeout(() => {
|
|
1820
|
+
timedOut = true;
|
|
1821
|
+
child.kill("SIGTERM");
|
|
1822
|
+
}, SNAPSHOT_TIMEOUT_MS);
|
|
1823
|
+
child.stdout.setEncoding("utf8");
|
|
1824
|
+
child.stderr.setEncoding("utf8");
|
|
1825
|
+
child.stdout.on("data", (chunk) => {
|
|
1826
|
+
stdout += chunk;
|
|
1827
|
+
});
|
|
1828
|
+
child.stderr.on("data", (chunk) => {
|
|
1829
|
+
stderr += chunk;
|
|
1830
|
+
});
|
|
1831
|
+
child.on("error", () => {
|
|
1832
|
+
clearTimeout(timer);
|
|
1833
|
+
resolve(null);
|
|
1834
|
+
});
|
|
1835
|
+
child.on("exit", (code) => {
|
|
1836
|
+
clearTimeout(timer);
|
|
1837
|
+
if (timedOut || code !== 0) {
|
|
1838
|
+
resolve(null);
|
|
1839
|
+
return;
|
|
1840
|
+
}
|
|
1841
|
+
const trimmed = stdout.trim();
|
|
1842
|
+
if (!trimmed) {
|
|
1843
|
+
resolve(null);
|
|
1844
|
+
return;
|
|
1845
|
+
}
|
|
1846
|
+
resolve(truncate(trimmed, MAX_OUTPUT_BYTES));
|
|
1847
|
+
});
|
|
1848
|
+
});
|
|
1849
|
+
}
|
|
1850
|
+
function truncate(s, maxBytes) {
|
|
1851
|
+
if (s.length <= maxBytes) return s;
|
|
1852
|
+
return `${s.slice(0, maxBytes)}\n... [truncated, ${s.length - maxBytes} more chars]`;
|
|
1853
|
+
}
|
|
1854
|
+
/**
|
|
1855
|
+
* Close an agent-browser session by name. Used before/after a `ccqa generate`
|
|
1856
|
+
* run so a wedged daemon from a previous attempt can't hang the next one.
|
|
1857
|
+
*
|
|
1858
|
+
* Always resolves; never throws. If the binary is missing, the session
|
|
1859
|
+
* doesn't exist, or the call exceeds {@link CLOSE_TIMEOUT_MS}, we silently
|
|
1860
|
+
* return — close is best-effort cleanup, not a precondition.
|
|
1861
|
+
*/
|
|
1862
|
+
async function closeSession(sessionName) {
|
|
1863
|
+
const abBin = resolveAgentBrowserBin();
|
|
1864
|
+
if (!abBin) return;
|
|
1865
|
+
await new Promise((resolve) => {
|
|
1866
|
+
const child = spawn(process.execPath, [abBin, "close"], {
|
|
1867
|
+
env: {
|
|
1868
|
+
...process.env,
|
|
1869
|
+
AGENT_BROWSER_SESSION: sessionName
|
|
1870
|
+
},
|
|
1871
|
+
stdio: "ignore"
|
|
1872
|
+
});
|
|
1873
|
+
const timer = setTimeout(() => {
|
|
1874
|
+
child.kill("SIGTERM");
|
|
1875
|
+
}, CLOSE_TIMEOUT_MS);
|
|
1876
|
+
const finish = () => {
|
|
1877
|
+
clearTimeout(timer);
|
|
1878
|
+
resolve();
|
|
1879
|
+
};
|
|
1880
|
+
child.on("error", finish);
|
|
1881
|
+
child.on("exit", finish);
|
|
1882
|
+
});
|
|
1883
|
+
}
|
|
1884
|
+
//#endregion
|
|
1885
|
+
//#region src/diagnose/loop.ts
|
|
1886
|
+
const DEFAULT_CONFIDENCE_THRESHOLD = .8;
|
|
1887
|
+
/**
|
|
1888
|
+
* Returns true when vitest finally passed; false when retries were exhausted
|
|
1889
|
+
* or the diagnose loop chose to bail out early.
|
|
1890
|
+
*/
|
|
1891
|
+
async function runAutoFixLoop(input) {
|
|
1892
|
+
const { scriptPath, initialRun, specMarkdown, actions, maxRetries, mode, runVitest, agentBrowserSession, outputLanguage } = input;
|
|
1893
|
+
let { exitCode, output, currentScript } = initialRun;
|
|
1894
|
+
if (exitCode === 0) return true;
|
|
1895
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
1896
|
+
fix(`attempt ${attempt}/${maxRetries}`);
|
|
1897
|
+
blank();
|
|
1898
|
+
const pageSnapshot = agentBrowserSession ? await timedPhase("page snapshot", () => captureSnapshot(agentBrowserSession), "fix") : null;
|
|
1899
|
+
if (agentBrowserSession) if (pageSnapshot) fix(`page snapshot: ${pageSnapshot.length} chars captured`);
|
|
1900
|
+
else fix("page snapshot unavailable; continuing without it");
|
|
1901
|
+
const fixed = await diagnoseAndFix({
|
|
1902
|
+
script: currentScript,
|
|
1903
|
+
specMarkdown,
|
|
1904
|
+
actions,
|
|
1905
|
+
failureLog: output,
|
|
1906
|
+
pageSnapshot: pageSnapshot ?? void 0,
|
|
1907
|
+
mode,
|
|
1908
|
+
outputLanguage
|
|
1909
|
+
});
|
|
1910
|
+
if (!fixed) {
|
|
1911
|
+
fix("bailed out; see diagnosis above");
|
|
1912
|
+
return false;
|
|
1913
|
+
}
|
|
1914
|
+
await writeFile(scriptPath, fixed, "utf-8");
|
|
1915
|
+
fix(`saved: ${scriptPath}`);
|
|
1916
|
+
blank();
|
|
1917
|
+
({exitCode, output, currentScript} = await timedPhase(`vitest run #${attempt + 1}`, () => runVitest(scriptPath), "run"));
|
|
1918
|
+
if (exitCode === 0) return true;
|
|
1919
|
+
}
|
|
1920
|
+
return false;
|
|
1921
|
+
}
|
|
1922
|
+
async function diagnoseAndFix(input) {
|
|
1923
|
+
const { script, specMarkdown, actions, failureLog, pageSnapshot, mode, outputLanguage } = input;
|
|
1924
|
+
const outcome = await timedPhase("diagnose", () => diagnose({
|
|
1925
|
+
script,
|
|
1926
|
+
specMarkdown,
|
|
1927
|
+
actions,
|
|
1928
|
+
failureLog,
|
|
1929
|
+
pageSnapshot,
|
|
1930
|
+
outputLanguage
|
|
1931
|
+
}), "fix");
|
|
1932
|
+
if (outcome.sdkError) {
|
|
1933
|
+
fix("diagnose: SDK error talking to Claude");
|
|
1934
|
+
if (outcome.raw) fix(`diagnose raw: ${truncateForLog(outcome.raw)}`);
|
|
1935
|
+
hint("re-run later, or check ANTHROPIC_API_KEY / network connectivity");
|
|
1936
|
+
return null;
|
|
1937
|
+
}
|
|
1938
|
+
if (!outcome.result) {
|
|
1939
|
+
fix("diagnose: empty response from LLM");
|
|
1940
|
+
hint("re-run; if this keeps happening the failure log may be too short to diagnose");
|
|
1941
|
+
return null;
|
|
1942
|
+
}
|
|
1943
|
+
const result = outcome.result;
|
|
1944
|
+
reportDiagnosis(result);
|
|
1945
|
+
if (result.diagnosis.type === "DATA_MISSING" || result.diagnosis.type === "UNKNOWN") {
|
|
1946
|
+
handoffToUser(result, outcome.raw, outputLanguage);
|
|
1947
|
+
return null;
|
|
1948
|
+
}
|
|
1949
|
+
const apply = applyDiagnosis(script, result.diagnosis);
|
|
1950
|
+
if (!apply.applied) {
|
|
1951
|
+
fix(`cannot apply: ${apply.reason}`);
|
|
1952
|
+
handoffToUser(result, outcome.raw, outputLanguage);
|
|
1953
|
+
return null;
|
|
1954
|
+
}
|
|
1955
|
+
const decision = decide(result, mode);
|
|
1956
|
+
if (decision === "apply-auto") {
|
|
1957
|
+
fix(`applying automatically: ${apply.summary}`);
|
|
1958
|
+
return apply.script;
|
|
1959
|
+
}
|
|
1960
|
+
if (decision === "skip-low-confidence") {
|
|
1961
|
+
fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (--no-interactive)`);
|
|
1962
|
+
handoffToUser(result, outcome.raw, outputLanguage);
|
|
1963
|
+
return null;
|
|
1964
|
+
}
|
|
1965
|
+
switch (await promptForChoice({
|
|
1966
|
+
result,
|
|
1967
|
+
diff: previewDiff(script, apply.script),
|
|
1968
|
+
failureExcerpt: failureLog.slice(0, 800)
|
|
1969
|
+
})) {
|
|
1970
|
+
case "apply":
|
|
1971
|
+
fix(`applied: ${apply.summary}`);
|
|
1972
|
+
return apply.script;
|
|
1973
|
+
case "skip":
|
|
1974
|
+
fix("skipped; leaving script untouched");
|
|
1975
|
+
return null;
|
|
1976
|
+
case "manual":
|
|
1977
|
+
fix("paused for manual edit");
|
|
1978
|
+
handoffToUser(result, outcome.raw, outputLanguage);
|
|
1979
|
+
return null;
|
|
1980
|
+
case "quit":
|
|
1981
|
+
fix("user quit");
|
|
1982
|
+
process.exit(1);
|
|
1983
|
+
}
|
|
1984
|
+
}
|
|
1985
|
+
function decide(result, mode) {
|
|
1986
|
+
if (mode === "auto") return "apply-auto";
|
|
1987
|
+
const highConfidence = result.confidence >= DEFAULT_CONFIDENCE_THRESHOLD;
|
|
1988
|
+
if (mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
|
|
1989
|
+
return highConfidence ? "apply-auto" : "interactive";
|
|
1990
|
+
}
|
|
1991
|
+
function reportDiagnosis(result) {
|
|
1992
|
+
fix(`diagnosis: ${result.diagnosis.type}`);
|
|
1993
|
+
fix(`confidence: ${result.confidence.toFixed(2)}`);
|
|
1994
|
+
if (result.reasoning) fix(`reasoning: ${result.reasoning}`);
|
|
1995
|
+
}
|
|
1996
|
+
/**
|
|
1997
|
+
* Emit a category-specific [hint] block that tells the user what to do next.
|
|
1998
|
+
* Called whenever the loop has decided it cannot proceed on its own —
|
|
1999
|
+
* because the diagnosis is intrinsically not auto-fixable, because the
|
|
2000
|
+
* proposed fix wasn't applicable to the current script, or because the
|
|
2001
|
+
* confidence was too low under --no-interactive.
|
|
2002
|
+
*
|
|
2003
|
+
* The goal is to never leave the user with just "auto-fix exhausted" —
|
|
2004
|
+
* always state which side (test artifacts vs. application) likely needs
|
|
2005
|
+
* the next action.
|
|
2006
|
+
*/
|
|
2007
|
+
function handoffToUser(result, raw, language) {
|
|
2008
|
+
const lines = handoffMessage(result.diagnosis, normLang(language));
|
|
2009
|
+
for (const line of lines) hint(line);
|
|
2010
|
+
if (raw) fix(`diagnose raw: ${truncateForLog(raw)}`);
|
|
2011
|
+
}
|
|
2012
|
+
function normLang(language) {
|
|
2013
|
+
if (!language) return "en";
|
|
2014
|
+
return language.toLowerCase().startsWith("ja") ? "ja" : "en";
|
|
2015
|
+
}
|
|
2016
|
+
const HANDOFF = {
|
|
2017
|
+
en: handoffEn,
|
|
2018
|
+
ja: handoffJa
|
|
2019
|
+
};
|
|
2020
|
+
function handoffMessage(diagnosis, language) {
|
|
2021
|
+
return HANDOFF[language](diagnosis);
|
|
2022
|
+
}
|
|
2023
|
+
function handoffEn(diagnosis) {
|
|
2024
|
+
switch (diagnosis.type) {
|
|
2025
|
+
case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update test-spec.md prerequisites), then re-run trace + generate."];
|
|
2026
|
+
case "UNKNOWN": return [`could not classify the failure. ${diagnosis.reason}`, "next step: read the failure log above, decide whether the test or the app is wrong, and fix manually. consider re-running ccqa trace if the recorded flow no longer matches the live app."];
|
|
2027
|
+
case "SELECTOR_DRIFT": return [
|
|
2028
|
+
`selector likely drifted but auto-apply was not safe.`,
|
|
2029
|
+
`proposed: line ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason}).`,
|
|
2030
|
+
"next step: confirm in the live app and either accept the proposal manually, or re-run ccqa trace to recapture the new selector."
|
|
2031
|
+
];
|
|
2032
|
+
case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check test-spec.md. either delete the assertion from the test, or tighten the spec to require it."];
|
|
2033
|
+
case "TIMING_ISSUE": return [`timing fix proposed but couldn't be applied automatically.`, "next step: insert a sleep manually before the failing line, or re-run with a higher confidence trace."];
|
|
2034
|
+
}
|
|
2035
|
+
}
|
|
2036
|
+
function handoffJa(diagnosis) {
|
|
2037
|
+
switch (diagnosis.type) {
|
|
2038
|
+
case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する(または test-spec.md の prerequisites を更新)してから ccqa trace + generate をやり直してください。"];
|
|
2039
|
+
case "UNKNOWN": return [`失敗を分類できませんでした。${diagnosis.reason}`, "次のステップ: 上の失敗ログを確認し、テストとアプリのどちらが原因か判断して手動で修正してください。記録した手順がアプリの現状と合わない場合は ccqa trace の再実行を検討してください。"];
|
|
2040
|
+
case "SELECTOR_DRIFT": return [
|
|
2041
|
+
"selector が変わった可能性が高いですが、自動適用は安全でないと判断しました。",
|
|
2042
|
+
`提案: 行 ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason})`,
|
|
2043
|
+
"次のステップ: アプリで新 selector を確認し、手動で適用するか ccqa trace をやり直して新しい selector を取り直してください。"
|
|
2044
|
+
];
|
|
2045
|
+
case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ: test-spec.md と照合して、テスト側の assertion を削るか、spec 側を更新してください。"];
|
|
2046
|
+
case "TIMING_ISSUE": return ["timing 関連の修正案は出ましたが、自動適用できませんでした。", "次のステップ: 失敗行の前に手動で sleep を入れるか、より信頼度の高い trace を取り直してください。"];
|
|
2047
|
+
}
|
|
2048
|
+
}
|
|
2049
|
+
function truncateForLog(s) {
|
|
2050
|
+
const oneLine = s.replace(/\n+/g, " ⏎ ");
|
|
2051
|
+
return oneLine.length <= 400 ? oneLine : `${oneLine.slice(0, 400)}... [+${oneLine.length - 400} chars]`;
|
|
2052
|
+
}
|
|
2053
|
+
function resolveMode(opts) {
|
|
2054
|
+
if (opts.auto) return "auto";
|
|
2055
|
+
if (opts.interactive === false || opts.noInteractive) return "non-interactive";
|
|
2056
|
+
return "interactive";
|
|
2057
|
+
}
|
|
2058
|
+
//#endregion
|
|
1188
2059
|
//#region src/cli/generate.ts
|
|
1189
|
-
const generateCommand = new Command("generate").argument("<feature/spec>", "Spec
|
|
2060
|
+
const generateCommand = new Command("generate").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Generate agent-browser test script from recorded trace actions. test.spec.ts is regenerated from actions.json on every run; pass --force to overwrite manual edits.").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--force", "Overwrite an existing test.spec.ts without warning").option("--no-snapshot", "Don't pin AGENT_BROWSER_SESSION / capture page snapshots after a failure (debug toggle)").option("--language <bcp47>", "Language for diagnose reasoning / hint text (e.g. 'en', 'ja')", "en").action(async (specPath, opts) => {
|
|
1190
2061
|
const { featureName, specName } = parseSpecPath(specPath);
|
|
1191
|
-
|
|
2062
|
+
const mode = resolveMode(opts);
|
|
2063
|
+
const useSnapshot = opts.snapshot !== false;
|
|
2064
|
+
await runGenerate(featureName, specName, parseInt(opts.maxRetries, 10), mode, opts.force ?? false, useSnapshot, opts.language ?? "en");
|
|
1192
2065
|
});
|
|
1193
|
-
async function runGenerate(featureName, specName, maxRetries) {
|
|
2066
|
+
async function runGenerate(featureName, specName, maxRetries, mode, force, useSnapshot, outputLanguage) {
|
|
1194
2067
|
header("generate", `${featureName}/${specName}`);
|
|
1195
2068
|
await ensureCcqaDir();
|
|
2069
|
+
const existingScriptPath = await getTestScript(featureName, specName);
|
|
2070
|
+
if (existingScriptPath && !force) {
|
|
2071
|
+
if (!await confirmOverwrite(existingScriptPath)) {
|
|
2072
|
+
info("aborted; pass --force to overwrite without prompting");
|
|
2073
|
+
return;
|
|
2074
|
+
}
|
|
2075
|
+
}
|
|
1196
2076
|
const { path: actionsPath, actions } = await getTraceActions(featureName, specName);
|
|
1197
2077
|
meta("trace", actionsPath);
|
|
1198
2078
|
meta("actions", actions.length);
|
|
1199
|
-
const
|
|
2079
|
+
const specContent = await readSpecFile(featureName, specName);
|
|
2080
|
+
const spec = parseTestSpec(specContent);
|
|
1200
2081
|
const setupScripts = await loadSetupScripts(spec.setups);
|
|
1201
2082
|
if (setupScripts.length > 0) meta("setups", setupScripts.map((s) => s.name).join(", "));
|
|
2083
|
+
meta("fix-mode", mode);
|
|
2084
|
+
meta("language", outputLanguage);
|
|
1202
2085
|
blank();
|
|
1203
2086
|
const cleanedActions = await cleanupActions$1(actions);
|
|
1204
2087
|
if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
|
|
1205
2088
|
const scriptPath = await saveTestScript(featureName, specName, actionsToScript(cleanedActions, spec.title, setupScripts.length > 0 ? setupScripts : void 0));
|
|
1206
2089
|
meta("saved", scriptPath);
|
|
1207
2090
|
blank();
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
2091
|
+
const agentBrowserSession = useSnapshot ? `ccqa-generate-${Date.now()}` : void 0;
|
|
2092
|
+
const runVitestForSession = (path) => runVitest$1(path, agentBrowserSession);
|
|
2093
|
+
let signalHandler = null;
|
|
2094
|
+
if (agentBrowserSession) {
|
|
2095
|
+
await closeSession(agentBrowserSession);
|
|
2096
|
+
signalHandler = () => {
|
|
2097
|
+
closeSession(agentBrowserSession).finally(() => process.exit(130));
|
|
2098
|
+
};
|
|
2099
|
+
process.once("SIGINT", signalHandler);
|
|
2100
|
+
process.once("SIGTERM", signalHandler);
|
|
1212
2101
|
}
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
warn("could not determine fix from failure log");
|
|
1219
|
-
break;
|
|
2102
|
+
try {
|
|
2103
|
+
const initialRun = await timedPhase("vitest run #1", () => runVitestForSession(scriptPath), "run");
|
|
2104
|
+
if (initialRun.exitCode === 0) {
|
|
2105
|
+
hint(`run 'ccqa run ${featureName}/${specName}' to execute the test`);
|
|
2106
|
+
return;
|
|
1220
2107
|
}
|
|
1221
|
-
await
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
2108
|
+
if (await runAutoFixLoop({
|
|
2109
|
+
scriptPath,
|
|
2110
|
+
initialRun,
|
|
2111
|
+
specMarkdown: specContent,
|
|
2112
|
+
actions: cleanedActions,
|
|
2113
|
+
maxRetries,
|
|
2114
|
+
mode,
|
|
2115
|
+
runVitest: runVitestForSession,
|
|
2116
|
+
agentBrowserSession,
|
|
2117
|
+
outputLanguage
|
|
2118
|
+
})) {
|
|
1226
2119
|
hint(`run 'ccqa run ${featureName}/${specName}' to execute the test`);
|
|
1227
2120
|
return;
|
|
1228
2121
|
}
|
|
2122
|
+
warn("auto-fix exhausted; test still failing");
|
|
2123
|
+
process.exit(1);
|
|
2124
|
+
} finally {
|
|
2125
|
+
if (signalHandler) {
|
|
2126
|
+
process.off("SIGINT", signalHandler);
|
|
2127
|
+
process.off("SIGTERM", signalHandler);
|
|
2128
|
+
}
|
|
2129
|
+
if (agentBrowserSession) await closeSession(agentBrowserSession);
|
|
2130
|
+
}
|
|
2131
|
+
}
|
|
2132
|
+
async function confirmOverwrite(path) {
|
|
2133
|
+
if (!process.stdin.isTTY) {
|
|
2134
|
+
warn(`${path} exists and stdin is not a TTY; refusing to overwrite. Pass --force to allow.`);
|
|
2135
|
+
return false;
|
|
2136
|
+
}
|
|
2137
|
+
const rl = createInterface({
|
|
2138
|
+
input: process.stdin,
|
|
2139
|
+
output: process.stdout
|
|
2140
|
+
});
|
|
2141
|
+
try {
|
|
2142
|
+
process.stdout.write("\n");
|
|
2143
|
+
process.stdout.write(`[warn] ${path} already exists.\n`);
|
|
2144
|
+
process.stdout.write(`[warn] generate will regenerate it from actions.json and any manual edits will be lost.\n`);
|
|
2145
|
+
const norm = (await new Promise((res) => rl.question("Overwrite? [y/N] ", res))).trim().toLowerCase();
|
|
2146
|
+
return norm === "y" || norm === "yes";
|
|
2147
|
+
} finally {
|
|
2148
|
+
rl.close();
|
|
1229
2149
|
}
|
|
1230
|
-
warn("auto-fix exhausted — test still failing");
|
|
1231
|
-
process.exit(1);
|
|
1232
2150
|
}
|
|
1233
|
-
/**
|
|
1234
|
-
* Load setup test scripts, extract test body, and replace {{placeholders}} with params values.
|
|
1235
|
-
*/
|
|
1236
2151
|
async function loadSetupScripts(setups) {
|
|
1237
2152
|
if (!setups?.length) return [];
|
|
1238
2153
|
const result = [];
|
|
@@ -1282,48 +2197,28 @@ function extractTestBody(script) {
|
|
|
1282
2197
|
}
|
|
1283
2198
|
function replacePlaceholders(body, params) {
|
|
1284
2199
|
let result = body;
|
|
1285
|
-
for (const [key, value] of Object.entries(params))
|
|
2200
|
+
for (const [key, value] of Object.entries(params)) if (hasEnvRef(value)) {
|
|
2201
|
+
const expr = envRefsToJsExpression(value);
|
|
2202
|
+
const re = new RegExp(`(["'])\\{\\{${escapeRegExp(key)}\\}\\}\\1`, "g");
|
|
2203
|
+
result = result.replace(re, expr);
|
|
2204
|
+
result = result.replaceAll(`{{${key}}}`, value);
|
|
2205
|
+
} else result = result.replaceAll(`{{${key}}}`, value);
|
|
1286
2206
|
return result;
|
|
1287
2207
|
}
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
const { result, isError } = await invokeClaudeStreaming({
|
|
1291
|
-
prompt: buildAutoFixPrompt(script, failureLog),
|
|
1292
|
-
disableBuiltinTools: true,
|
|
1293
|
-
maxTurns: 1
|
|
1294
|
-
}, () => {});
|
|
1295
|
-
if (isError || !result) return null;
|
|
1296
|
-
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
1297
|
-
const fixes = JSON.parse(json);
|
|
1298
|
-
if (!Array.isArray(fixes) || fixes.length === 0) return null;
|
|
1299
|
-
return applySleepFixes$1(script, fixes);
|
|
1300
|
-
} catch {
|
|
1301
|
-
return null;
|
|
1302
|
-
}
|
|
1303
|
-
}
|
|
1304
|
-
function applySleepFixes$1(script, fixes) {
|
|
1305
|
-
const lines = script.split("\n");
|
|
1306
|
-
for (const fix of fixes) if ("increase_to" in fix) {
|
|
1307
|
-
const idx = fix.line - 1;
|
|
1308
|
-
if (idx >= 0 && idx < lines.length) lines[idx] = lines[idx].replace(/spawnSync\("sleep",\s*\["\d+"\]/, `spawnSync("sleep", ["${fix.increase_to}"]`);
|
|
1309
|
-
}
|
|
1310
|
-
const inserts = fixes.filter((f) => "seconds" in f && !("increase_to" in f)).sort((a, b) => b.line - a.line);
|
|
1311
|
-
for (const fix of inserts) {
|
|
1312
|
-
const idx = fix.line - 1;
|
|
1313
|
-
if (idx >= 0 && idx <= lines.length) lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
|
|
1314
|
-
}
|
|
1315
|
-
return lines.join("\n");
|
|
2208
|
+
function escapeRegExp(s) {
|
|
2209
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1316
2210
|
}
|
|
1317
|
-
async function runVitest$1(scriptPath) {
|
|
1318
|
-
const { exitCode, stdout, stderr } = await
|
|
2211
|
+
async function runVitest$1(scriptPath, agentBrowserSession) {
|
|
2212
|
+
const { exitCode, stdout, stderr } = await spawnVitestTeed([
|
|
1319
2213
|
"run",
|
|
1320
2214
|
"--config",
|
|
1321
2215
|
bundledVitestConfigPath(),
|
|
1322
2216
|
scriptPath
|
|
1323
|
-
]
|
|
2217
|
+
], agentBrowserSession ? { env: {
|
|
2218
|
+
...process.env,
|
|
2219
|
+
AGENT_BROWSER_SESSION: agentBrowserSession
|
|
2220
|
+
} } : {});
|
|
1324
2221
|
const currentScript = await readFile(scriptPath, "utf8");
|
|
1325
|
-
process.stdout.write(stdout);
|
|
1326
|
-
if (stderr) process.stderr.write(stderr);
|
|
1327
2222
|
return {
|
|
1328
2223
|
exitCode,
|
|
1329
2224
|
output: stdout + stderr,
|
|
@@ -1378,7 +2273,7 @@ async function runTests(target) {
|
|
|
1378
2273
|
warn(`${featureName}/${specName}: no test.spec.ts found`);
|
|
1379
2274
|
continue;
|
|
1380
2275
|
}
|
|
1381
|
-
|
|
2276
|
+
run(`${featureName}/${specName}`);
|
|
1382
2277
|
meta("test", scriptFile);
|
|
1383
2278
|
blank();
|
|
1384
2279
|
const reportFile = join(tmpDir, `report-${i}.json`);
|
|
@@ -1522,6 +2417,7 @@ async function runTraceSetup(name) {
|
|
|
1522
2417
|
await ensureCcqaDir();
|
|
1523
2418
|
const spec = parseSetupSpec(await readSetupSpecFile(name));
|
|
1524
2419
|
const resolvedSpec = replacePlaceholdersWithDummies(spec);
|
|
2420
|
+
const secretsToScrub = buildSecretsToScrub(spec);
|
|
1525
2421
|
meta("setup", spec.title);
|
|
1526
2422
|
meta("steps", spec.steps.length);
|
|
1527
2423
|
if (spec.placeholders) meta("placeholders", Object.keys(spec.placeholders).join(", "));
|
|
@@ -1542,8 +2438,12 @@ async function runTraceSetup(name) {
|
|
|
1542
2438
|
"Grep",
|
|
1543
2439
|
"Glob"
|
|
1544
2440
|
],
|
|
2441
|
+
env: {
|
|
2442
|
+
PATH: pathWithAgentBrowserShim(process.env["PATH"]),
|
|
2443
|
+
ANTHROPIC_API_KEY: ""
|
|
2444
|
+
},
|
|
1545
2445
|
onAbAction: (abAction) => {
|
|
1546
|
-
const action = parseAbAction(abAction);
|
|
2446
|
+
const action = parseAbAction(scrubSecrets(abAction, secretsToScrub));
|
|
1547
2447
|
if (action) traceActions.push(action);
|
|
1548
2448
|
},
|
|
1549
2449
|
onAbActionFailed: () => {
|
|
@@ -1565,7 +2465,7 @@ async function runTraceSetup(name) {
|
|
|
1565
2465
|
if (routeStep.status === "FAILED") overallStatus = "failed";
|
|
1566
2466
|
}
|
|
1567
2467
|
} else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
|
|
1568
|
-
const action = parseAbAction(trimmed);
|
|
2468
|
+
const action = parseAbAction(scrubSecrets(trimmed, secretsToScrub));
|
|
1569
2469
|
if (action) traceActions.push(action);
|
|
1570
2470
|
}
|
|
1571
2471
|
}
|
|
@@ -1591,7 +2491,7 @@ function replacePlaceholdersWithDummies(spec) {
|
|
|
1591
2491
|
const dummies = spec.placeholders;
|
|
1592
2492
|
const resolve = (text) => {
|
|
1593
2493
|
let result = text;
|
|
1594
|
-
for (const [key, def] of Object.entries(dummies)) result = result.replaceAll(`{{${key}}}`, def.dummy);
|
|
2494
|
+
for (const [key, def] of Object.entries(dummies)) result = result.replaceAll(`{{${key}}}`, resolveEnvRefs(def.dummy));
|
|
1595
2495
|
return result;
|
|
1596
2496
|
};
|
|
1597
2497
|
return {
|
|
@@ -1603,17 +2503,52 @@ function replacePlaceholdersWithDummies(spec) {
|
|
|
1603
2503
|
}))
|
|
1604
2504
|
};
|
|
1605
2505
|
}
|
|
2506
|
+
/**
|
|
2507
|
+
* Build the substitution map used to scrub real secret values out of
|
|
2508
|
+
* recorded actions before they are written to actions.json.
|
|
2509
|
+
*
|
|
2510
|
+
* For each placeholder whose dummy contains env refs, store
|
|
2511
|
+
* <resolved-value> -> <original ${VAR} string>
|
|
2512
|
+
* so that an `ab fill ... <secret>` line records the placeholder string
|
|
2513
|
+
* instead of the secret. Empty resolved values are skipped — they would
|
|
2514
|
+
* otherwise replace incidental empty strings in the recorded actions.
|
|
2515
|
+
*/
|
|
2516
|
+
function buildSecretsToScrub(spec) {
|
|
2517
|
+
const map = /* @__PURE__ */ new Map();
|
|
2518
|
+
if (!spec.placeholders) return map;
|
|
2519
|
+
const dummies = spec.placeholders;
|
|
2520
|
+
for (const def of Object.values(dummies)) {
|
|
2521
|
+
if (!hasEnvRef(def.dummy)) continue;
|
|
2522
|
+
const resolved = resolveEnvRefs(def.dummy);
|
|
2523
|
+
if (!resolved) continue;
|
|
2524
|
+
map.set(resolved, def.dummy);
|
|
2525
|
+
}
|
|
2526
|
+
return map;
|
|
2527
|
+
}
|
|
2528
|
+
/** Replace every occurrence of a recorded secret with its `${VAR}` placeholder. */
|
|
2529
|
+
function scrubSecrets(line, secrets) {
|
|
2530
|
+
if (secrets.size === 0) return line;
|
|
2531
|
+
let result = line;
|
|
2532
|
+
for (const [secret, placeholder] of secrets) {
|
|
2533
|
+
if (!result.includes(secret)) continue;
|
|
2534
|
+
result = result.split(secret).join(placeholder);
|
|
2535
|
+
}
|
|
2536
|
+
return result;
|
|
2537
|
+
}
|
|
1606
2538
|
//#endregion
|
|
1607
2539
|
//#region src/cli/generate-setup.ts
|
|
1608
|
-
const generateSetupCommand = new Command("generate-setup").argument("<name>", "Setup name to generate (e.g. login)").description("Clean up, validate, and templatize setup actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--from-dummy", "Resume from existing test.dummy.spec.ts (after manual fix)").action(async (name, opts) => {
|
|
1609
|
-
|
|
2540
|
+
const generateSetupCommand = new Command("generate-setup").argument("<name>", "Setup name to generate (e.g. login)").description("Clean up, validate, and templatize setup actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--from-dummy", "Resume from existing test.dummy.spec.ts (after manual fix)").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--language <bcp47>", "Language for diagnose reasoning / hint text (e.g. 'en', 'ja')", "en").action(async (name, opts) => {
|
|
2541
|
+
const mode = resolveMode(opts);
|
|
2542
|
+
await runGenerateSetup(name, parseInt(opts.maxRetries, 10), opts.fromDummy ?? false, mode, opts.language ?? "en");
|
|
1610
2543
|
});
|
|
1611
|
-
async function runGenerateSetup(name, maxRetries, fromDummy) {
|
|
2544
|
+
async function runGenerateSetup(name, maxRetries, fromDummy, mode, outputLanguage) {
|
|
1612
2545
|
header("generate-setup", name);
|
|
1613
2546
|
await ensureCcqaDir();
|
|
1614
|
-
const
|
|
2547
|
+
const specContent = await readSetupSpecFile(name);
|
|
2548
|
+
const spec = parseSetupSpec(specContent);
|
|
1615
2549
|
const dummyPath = join(getSetupDir(name), "test.dummy.spec.ts");
|
|
1616
2550
|
const finalPath = join(getSetupDir(name), "test.spec.ts");
|
|
2551
|
+
let cleanedActions = [];
|
|
1617
2552
|
if (fromDummy) {
|
|
1618
2553
|
if (!await stat(dummyPath).then(() => true).catch(() => false)) {
|
|
1619
2554
|
warn(`test.dummy.spec.ts not found. Run without --from-dummy first.`);
|
|
@@ -1624,40 +2559,52 @@ async function runGenerateSetup(name, maxRetries, fromDummy) {
|
|
|
1624
2559
|
const { actions } = await getSetupActions(name);
|
|
1625
2560
|
meta("setup", spec.title);
|
|
1626
2561
|
meta("actions", actions.length);
|
|
2562
|
+
meta("fix-mode", mode);
|
|
2563
|
+
meta("language", outputLanguage);
|
|
1627
2564
|
blank();
|
|
1628
|
-
|
|
2565
|
+
cleanedActions = await cleanupActions(actions);
|
|
1629
2566
|
if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
|
|
1630
2567
|
await writeFile(dummyPath, actionsToScript(cleanedActions, spec.title), "utf-8");
|
|
1631
2568
|
meta("saved", dummyPath);
|
|
1632
2569
|
}
|
|
1633
2570
|
blank();
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
2571
|
+
const agentBrowserSession = `ccqa-generate-setup-${name}-${Date.now()}`;
|
|
2572
|
+
const runVitestForSession = (path) => runVitestResolved(path, agentBrowserSession);
|
|
2573
|
+
await closeSession(agentBrowserSession);
|
|
2574
|
+
const signalHandler = () => {
|
|
2575
|
+
closeSession(agentBrowserSession).finally(() => process.exit(130));
|
|
2576
|
+
};
|
|
2577
|
+
process.once("SIGINT", signalHandler);
|
|
2578
|
+
process.once("SIGTERM", signalHandler);
|
|
2579
|
+
try {
|
|
2580
|
+
const initialRun = await timedPhase("vitest run #1", () => runVitestForSession(dummyPath), "run");
|
|
2581
|
+
let passed = initialRun.exitCode === 0;
|
|
2582
|
+
if (!passed) passed = await runAutoFixLoop({
|
|
2583
|
+
scriptPath: dummyPath,
|
|
2584
|
+
initialRun,
|
|
2585
|
+
specMarkdown: specContent,
|
|
2586
|
+
actions: cleanedActions,
|
|
2587
|
+
maxRetries,
|
|
2588
|
+
mode,
|
|
2589
|
+
runVitest: runVitestForSession,
|
|
2590
|
+
agentBrowserSession,
|
|
2591
|
+
outputLanguage
|
|
2592
|
+
});
|
|
2593
|
+
if (!passed) {
|
|
2594
|
+
warn("auto-fix exhausted; setup test still failing");
|
|
1652
2595
|
hint(`edit ${dummyPath} manually, then run: ccqa generate-setup ${name} --from-dummy`);
|
|
1653
2596
|
process.exit(1);
|
|
1654
2597
|
}
|
|
2598
|
+
await writeFile(finalPath, reversePlaceholdersInScript(await readFile(dummyPath, "utf8"), spec.placeholders), "utf-8");
|
|
2599
|
+
await unlink(dummyPath).catch(() => {});
|
|
2600
|
+
blank();
|
|
2601
|
+
meta("saved", finalPath);
|
|
2602
|
+
hint(`setup '${name}' is ready; reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
|
|
2603
|
+
} finally {
|
|
2604
|
+
process.off("SIGINT", signalHandler);
|
|
2605
|
+
process.off("SIGTERM", signalHandler);
|
|
2606
|
+
await closeSession(agentBrowserSession);
|
|
1655
2607
|
}
|
|
1656
|
-
await writeFile(finalPath, reversePlaceholdersInScript(currentScript, spec.placeholders), "utf-8");
|
|
1657
|
-
await unlink(dummyPath).catch(() => {});
|
|
1658
|
-
blank();
|
|
1659
|
-
meta("saved", finalPath);
|
|
1660
|
-
hint(`setup '${name}' is ready — reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
|
|
1661
2608
|
}
|
|
1662
2609
|
/**
|
|
1663
2610
|
* Replace dummy values with {{placeholder}} directly in the test script text.
|
|
@@ -1670,51 +2617,54 @@ function reversePlaceholdersInScript(script, placeholders) {
|
|
|
1670
2617
|
for (const [key, def] of entries) result = result.replaceAll(def.dummy, `{{${key}}}`);
|
|
1671
2618
|
return result;
|
|
1672
2619
|
}
|
|
1673
|
-
async function
|
|
1674
|
-
|
|
1675
|
-
const { result, isError } = await invokeClaudeStreaming({
|
|
1676
|
-
prompt: buildAutoFixPrompt(script, failureLog),
|
|
1677
|
-
disableBuiltinTools: true,
|
|
1678
|
-
maxTurns: 1
|
|
1679
|
-
}, () => {});
|
|
1680
|
-
if (isError || !result) return null;
|
|
1681
|
-
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
1682
|
-
const fixes = JSON.parse(json);
|
|
1683
|
-
if (!Array.isArray(fixes) || fixes.length === 0) return null;
|
|
1684
|
-
return applySleepFixes(script, fixes);
|
|
1685
|
-
} catch {
|
|
1686
|
-
return null;
|
|
1687
|
-
}
|
|
1688
|
-
}
|
|
1689
|
-
function applySleepFixes(script, fixes) {
|
|
1690
|
-
const lines = script.split("\n");
|
|
1691
|
-
for (const fix of fixes) if ("increase_to" in fix) {
|
|
1692
|
-
const idx = fix.line - 1;
|
|
1693
|
-
if (idx >= 0 && idx < lines.length) lines[idx] = lines[idx].replace(/spawnSync\("sleep",\s*\["\d+"\]/, `spawnSync("sleep", ["${fix.increase_to}"]`);
|
|
1694
|
-
}
|
|
1695
|
-
const inserts = fixes.filter((f) => "seconds" in f && !("increase_to" in f)).sort((a, b) => b.line - a.line);
|
|
1696
|
-
for (const fix of inserts) {
|
|
1697
|
-
const idx = fix.line - 1;
|
|
1698
|
-
if (idx >= 0 && idx <= lines.length) lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
|
|
1699
|
-
}
|
|
1700
|
-
return lines.join("\n");
|
|
1701
|
-
}
|
|
1702
|
-
async function runVitest(scriptPath) {
|
|
1703
|
-
const { exitCode, stdout, stderr } = await spawnVitestCaptured([
|
|
2620
|
+
async function runVitest(scriptPath, agentBrowserSession) {
|
|
2621
|
+
const { exitCode, stdout, stderr } = await spawnVitestTeed([
|
|
1704
2622
|
"run",
|
|
1705
2623
|
"--config",
|
|
1706
2624
|
bundledVitestConfigPath(),
|
|
1707
2625
|
scriptPath
|
|
1708
|
-
]
|
|
2626
|
+
], agentBrowserSession ? { env: {
|
|
2627
|
+
...process.env,
|
|
2628
|
+
AGENT_BROWSER_SESSION: agentBrowserSession
|
|
2629
|
+
} } : {});
|
|
1709
2630
|
const currentScript = await readFile(scriptPath, "utf8");
|
|
1710
|
-
process.stdout.write(stdout);
|
|
1711
|
-
if (stderr) process.stderr.write(stderr);
|
|
1712
2631
|
return {
|
|
1713
2632
|
exitCode,
|
|
1714
2633
|
output: stdout + stderr,
|
|
1715
2634
|
currentScript
|
|
1716
2635
|
};
|
|
1717
2636
|
}
|
|
2637
|
+
/**
|
|
2638
|
+
* Run vitest on `test.dummy.spec.ts`, but transparently expand any `${VAR}`
|
|
2639
|
+
* env refs to real values for the duration of the run. The original file is
|
|
2640
|
+
* preserved unchanged so subsequent reverse-replace still sees the env-ref
|
|
2641
|
+
* literals. Auto-fix edits the original file (via writeFile in callers), so
|
|
2642
|
+
* we always re-read it before each invocation.
|
|
2643
|
+
*/
|
|
2644
|
+
async function runVitestResolved(scriptPath, agentBrowserSession) {
|
|
2645
|
+
const original = await readFile(scriptPath, "utf8");
|
|
2646
|
+
if (!hasEnvRef(original)) return runVitest(scriptPath, agentBrowserSession);
|
|
2647
|
+
const tmpPath = scriptPath.replace(/\.ts$/, ".__resolved.spec.ts");
|
|
2648
|
+
await writeFile(tmpPath, resolveEnvRefs(original), "utf-8");
|
|
2649
|
+
try {
|
|
2650
|
+
const { exitCode, stdout, stderr } = await spawnVitestTeed([
|
|
2651
|
+
"run",
|
|
2652
|
+
"--config",
|
|
2653
|
+
bundledVitestConfigPath(),
|
|
2654
|
+
tmpPath
|
|
2655
|
+
], agentBrowserSession ? { env: {
|
|
2656
|
+
...process.env,
|
|
2657
|
+
AGENT_BROWSER_SESSION: agentBrowserSession
|
|
2658
|
+
} } : {});
|
|
2659
|
+
return {
|
|
2660
|
+
exitCode,
|
|
2661
|
+
output: stdout + stderr,
|
|
2662
|
+
currentScript: original
|
|
2663
|
+
};
|
|
2664
|
+
} finally {
|
|
2665
|
+
await unlink(tmpPath).catch(() => {});
|
|
2666
|
+
}
|
|
2667
|
+
}
|
|
1718
2668
|
async function cleanupActions(actions) {
|
|
1719
2669
|
try {
|
|
1720
2670
|
const { result, isError } = await invokeClaudeStreaming({
|