ccqa 0.3.5 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -1
- package/dist/bin/ccqa.mjs +1007 -182
- package/dist/package.json +1 -1
- package/dist/runtime/test-helpers.mjs +6 -2
- package/package.json +1 -1
package/dist/bin/ccqa.mjs
CHANGED
|
@@ -8,6 +8,7 @@ import { delimiter, dirname, join, resolve } from "node:path";
|
|
|
8
8
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
9
9
|
import matter from "gray-matter";
|
|
10
10
|
import { spawn } from "node:child_process";
|
|
11
|
+
import { createInterface } from "node:readline";
|
|
11
12
|
import { tmpdir } from "node:os";
|
|
12
13
|
//#region src/prompts/trace.ts
|
|
13
14
|
function generateSessionName() {
|
|
@@ -285,14 +286,17 @@ const STEP_ICONS = {
|
|
|
285
286
|
function header(command, target) {
|
|
286
287
|
process.stdout.write(`\nccqa ${command}${target ? ` ${target}` : ""}\n\n`);
|
|
287
288
|
}
|
|
289
|
+
function write(scope, message, sink = process.stdout) {
|
|
290
|
+
sink.write(`[${scope}] ${message}\n`);
|
|
291
|
+
}
|
|
288
292
|
function meta(key, value) {
|
|
289
|
-
|
|
293
|
+
write("meta", `${key}: ${value}`);
|
|
290
294
|
}
|
|
291
295
|
function blank() {
|
|
292
296
|
process.stdout.write("\n");
|
|
293
297
|
}
|
|
294
298
|
function info(message) {
|
|
295
|
-
|
|
299
|
+
write("info", message);
|
|
296
300
|
}
|
|
297
301
|
function step(type, stepId, detail) {
|
|
298
302
|
process.stdout.write(` ${STEP_ICONS[type]} [${stepId}] ${detail}\n`);
|
|
@@ -301,13 +305,37 @@ function bash(command) {
|
|
|
301
305
|
process.stdout.write(` $ ${command.slice(0, 120)}\n`);
|
|
302
306
|
}
|
|
303
307
|
function error(message) {
|
|
304
|
-
|
|
308
|
+
write("error", message, process.stderr);
|
|
305
309
|
}
|
|
306
310
|
function warn(message) {
|
|
307
|
-
|
|
311
|
+
write("warn", message, process.stderr);
|
|
308
312
|
}
|
|
309
313
|
function hint(message) {
|
|
310
|
-
process.stdout.write(
|
|
314
|
+
process.stdout.write("\n");
|
|
315
|
+
write("hint", message);
|
|
316
|
+
}
|
|
317
|
+
function fix(message) {
|
|
318
|
+
write("fix", message);
|
|
319
|
+
}
|
|
320
|
+
function run(message) {
|
|
321
|
+
write("run", message);
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Time a long-running step under the given scope, emitting `started` and
|
|
325
|
+
* `finished in N.Ns` markers. Scope must be a tag the user wants to grep
|
|
326
|
+
* for — typically "run" for vitest and "fix" for diagnose-loop steps.
|
|
327
|
+
*/
|
|
328
|
+
async function timedPhase(label, fn, scope = "fix") {
|
|
329
|
+
const startedAt = Date.now();
|
|
330
|
+
write(scope, `${label} started`);
|
|
331
|
+
try {
|
|
332
|
+
const result = await fn();
|
|
333
|
+
write(scope, `${label} finished in ${((Date.now() - startedAt) / 1e3).toFixed(1)}s`);
|
|
334
|
+
return result;
|
|
335
|
+
} catch (err) {
|
|
336
|
+
write(scope, `${label} threw after ${((Date.now() - startedAt) / 1e3).toFixed(1)}s`);
|
|
337
|
+
throw err;
|
|
338
|
+
}
|
|
311
339
|
}
|
|
312
340
|
//#endregion
|
|
313
341
|
//#region src/claude/invoke.ts
|
|
@@ -488,13 +516,27 @@ const CCQA_DIR = ".ccqa";
|
|
|
488
516
|
function getCcqaDir(cwd = process.cwd()) {
|
|
489
517
|
return join(cwd, CCQA_DIR);
|
|
490
518
|
}
|
|
519
|
+
/**
|
|
520
|
+
* Accepts both the canonical 2-segment alias and the on-disk 4-segment path
|
|
521
|
+
* (which is what shell tab-completion produces):
|
|
522
|
+
* - "tasks/create-and-complete"
|
|
523
|
+
* - "features/tasks/test-cases/create-and-complete"
|
|
524
|
+
* - ".ccqa/features/tasks/test-cases/create-and-complete"
|
|
525
|
+
* All forms resolve to { featureName: "tasks", specName: "create-and-complete" }.
|
|
526
|
+
* Trailing slashes are tolerated.
|
|
527
|
+
*/
|
|
491
528
|
function parseSpecPath(specPath) {
|
|
492
|
-
const parts = specPath.split("/");
|
|
493
|
-
if (parts
|
|
494
|
-
return {
|
|
529
|
+
const parts = specPath.replace(/^\.\/+/, "").replace(/\/+$/, "").split("/").filter((p) => p.length > 0);
|
|
530
|
+
if (parts[0] === ".ccqa") parts.shift();
|
|
531
|
+
if (parts.length === 4 && parts[0] === "features" && parts[2] === "test-cases") return {
|
|
532
|
+
featureName: parts[1],
|
|
533
|
+
specName: parts[3]
|
|
534
|
+
};
|
|
535
|
+
if (parts.length === 2 && parts[0] && parts[1]) return {
|
|
495
536
|
featureName: parts[0],
|
|
496
537
|
specName: parts[1]
|
|
497
538
|
};
|
|
539
|
+
throw new Error(`Invalid spec path: "${specPath}". Expected "<feature>/<spec>" or "features/<feature>/test-cases/<spec>".`);
|
|
498
540
|
}
|
|
499
541
|
function getFeatureDir(featureName, cwd) {
|
|
500
542
|
return join(getCcqaDir(cwd), "features", featureName);
|
|
@@ -706,10 +748,10 @@ function bundledVitestConfigPath() {
|
|
|
706
748
|
}
|
|
707
749
|
//#endregion
|
|
708
750
|
//#region src/runtime/spawn-vitest.ts
|
|
709
|
-
const require$
|
|
751
|
+
const require$2 = createRequire(import.meta.url);
|
|
710
752
|
function resolveVitestBin() {
|
|
711
|
-
const pkgPath = require$
|
|
712
|
-
const pkg = require$
|
|
753
|
+
const pkgPath = require$2.resolve("vitest/package.json");
|
|
754
|
+
const pkg = require$2(pkgPath);
|
|
713
755
|
const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
|
|
714
756
|
if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
|
|
715
757
|
return resolve(dirname(pkgPath), binRel);
|
|
@@ -727,6 +769,19 @@ async function spawnVitestCaptured(args, opts = {}) {
|
|
|
727
769
|
stderr
|
|
728
770
|
};
|
|
729
771
|
}
|
|
772
|
+
async function spawnVitestTeed(args, opts = {}) {
|
|
773
|
+
const child = spawnVitestChild(args, opts, "pipe");
|
|
774
|
+
const [stdout, stderr, exitCode] = await Promise.all([
|
|
775
|
+
teeDrain(child.stdout, process.stdout),
|
|
776
|
+
teeDrain(child.stderr, process.stderr),
|
|
777
|
+
waitExit(child)
|
|
778
|
+
]);
|
|
779
|
+
return {
|
|
780
|
+
exitCode,
|
|
781
|
+
stdout,
|
|
782
|
+
stderr
|
|
783
|
+
};
|
|
784
|
+
}
|
|
730
785
|
function spawnVitestStreaming(args, opts = {}) {
|
|
731
786
|
const child = spawnVitestChild(args, opts, "pipe");
|
|
732
787
|
return {
|
|
@@ -754,6 +809,15 @@ async function drain(stream) {
|
|
|
754
809
|
for await (const chunk of stream) buf += chunk;
|
|
755
810
|
return buf;
|
|
756
811
|
}
|
|
812
|
+
async function teeDrain(stream, sink) {
|
|
813
|
+
stream.setEncoding("utf8");
|
|
814
|
+
let buf = "";
|
|
815
|
+
for await (const chunk of stream) {
|
|
816
|
+
buf += chunk;
|
|
817
|
+
sink.write(chunk);
|
|
818
|
+
}
|
|
819
|
+
return buf;
|
|
820
|
+
}
|
|
757
821
|
function waitExit(child) {
|
|
758
822
|
return new Promise((resolvePromise, rejectPromise) => {
|
|
759
823
|
child.once("exit", (code) => resolvePromise(code ?? 0));
|
|
@@ -762,7 +826,7 @@ function waitExit(child) {
|
|
|
762
826
|
}
|
|
763
827
|
//#endregion
|
|
764
828
|
//#region src/runtime/agent-browser-bin.ts
|
|
765
|
-
const require = createRequire(import.meta.url);
|
|
829
|
+
const require$1 = createRequire(import.meta.url);
|
|
766
830
|
/**
|
|
767
831
|
* Resolves the directory containing the `agent-browser` shim that npm/pnpm
|
|
768
832
|
* exposes on PATH for the peer-installed package. Used by `ccqa trace` to
|
|
@@ -774,7 +838,7 @@ const require = createRequire(import.meta.url);
|
|
|
774
838
|
function resolveAgentBrowserBinDir() {
|
|
775
839
|
let pkgJsonPath;
|
|
776
840
|
try {
|
|
777
|
-
pkgJsonPath = require.resolve("agent-browser/package.json");
|
|
841
|
+
pkgJsonPath = require$1.resolve("agent-browser/package.json");
|
|
778
842
|
} catch {
|
|
779
843
|
return null;
|
|
780
844
|
}
|
|
@@ -844,7 +908,7 @@ function envRefsToJsExpression(value) {
|
|
|
844
908
|
}
|
|
845
909
|
//#endregion
|
|
846
910
|
//#region src/cli/trace.ts
|
|
847
|
-
const traceCommand = new Command("trace").argument("<feature/spec>", "Spec
|
|
911
|
+
const traceCommand = new Command("trace").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Run agent-browser, verify assertions, and record structured actions").action(async (specPath) => {
|
|
848
912
|
const { featureName, specName } = parseSpecPath(specPath);
|
|
849
913
|
await runTrace(featureName, specName);
|
|
850
914
|
});
|
|
@@ -943,7 +1007,7 @@ async function runSetups(setups, sessionName) {
|
|
|
943
1007
|
throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
|
|
944
1008
|
});
|
|
945
1009
|
for (const [key, value] of Object.entries(ref.params ?? {})) script = script.replaceAll(`{{${key}}}`, resolveEnvRefs(value));
|
|
946
|
-
script = script.replace(/process\.env\.AGENT_BROWSER_SESSION\s
|
|
1010
|
+
script = script.replace(/process\.env\.AGENT_BROWSER_SESSION\s*\|?\|?=\s*`.+`;/, `process.env.AGENT_BROWSER_SESSION = ${JSON.stringify(sessionName)};`);
|
|
947
1011
|
const tmpPath = join(getSetupDir(ref.name), `_run.spec.ts`);
|
|
948
1012
|
await writeFile(tmpPath, script, "utf-8");
|
|
949
1013
|
try {
|
|
@@ -1062,8 +1126,10 @@ function actionsToScript(actions, title, setupScripts) {
|
|
|
1062
1126
|
`import { spawnSync } from "node:child_process";`,
|
|
1063
1127
|
`import { ab, abWait, abAssertTextVisible, abAssertVisible, abAssertNotVisible, abAssertUrl, abAssertEnabled, abAssertDisabled, abAssertChecked, abAssertUnchecked } from "ccqa/test-helpers";`,
|
|
1064
1128
|
"",
|
|
1065
|
-
`// Single session shared across all tests — reset per run via cookies clear in first test
|
|
1066
|
-
|
|
1129
|
+
`// Single session shared across all tests — reset per run via cookies clear in first test.`,
|
|
1130
|
+
`// Use ||= so an outer harness (e.g. ccqa generate's auto-fix loop) can pre-set the session`,
|
|
1131
|
+
`// name and inspect the same session after the run finishes.`,
|
|
1132
|
+
`process.env.AGENT_BROWSER_SESSION ||= \`ccqa-run-\${Date.now()}\`;`,
|
|
1067
1133
|
""
|
|
1068
1134
|
]];
|
|
1069
1135
|
if (setupScripts?.length) for (const setup of setupScripts) parts.push(`test("setup: ${setup.name}", () => {`, setup.body, "}, 3 * 60 * 1000);", "");
|
|
@@ -1168,43 +1234,6 @@ function actionToLine(action) {
|
|
|
1168
1234
|
const j = (s) => JSON.stringify(s);
|
|
1169
1235
|
//#endregion
|
|
1170
1236
|
//#region src/prompts/codegen.ts
|
|
1171
|
-
function buildAutoFixPrompt(script, failureLog) {
|
|
1172
|
-
return `You are analyzing a failing E2E test script. The test fails because some browser actions execute before the page has finished loading or navigating.
|
|
1173
|
-
|
|
1174
|
-
Your task: identify which line numbers need a sleep/wait inserted BEFORE them to fix timing issues.
|
|
1175
|
-
|
|
1176
|
-
## Rules
|
|
1177
|
-
- ONLY identify lines where a sleep is needed — do NOT suggest any other changes
|
|
1178
|
-
- Common patterns that need a sleep:
|
|
1179
|
-
- After \`ab("open", ...)\` when the next line interacts with elements (fill, click, etc.)
|
|
1180
|
-
- After \`ab("press", "Enter")\` or \`ab("click", ...)\` when a page navigation occurs before the next action
|
|
1181
|
-
- After any action that triggers a redirect or page reload
|
|
1182
|
-
- Look at the error log to identify WHICH lines failed, then determine if a sleep before that line would fix it
|
|
1183
|
-
- If a \`spawnSync("sleep", ...)\` already exists before a failing line, suggest increasing its duration instead
|
|
1184
|
-
- Output ONLY a JSON array of objects, no explanation, no markdown code fences
|
|
1185
|
-
|
|
1186
|
-
## Output format
|
|
1187
|
-
Each object has:
|
|
1188
|
-
- "line": the 1-based line number to insert a sleep BEFORE
|
|
1189
|
-
- "seconds": recommended sleep duration (typically 3-5)
|
|
1190
|
-
- "reason": very short explanation (e.g., "page navigation after form submit")
|
|
1191
|
-
|
|
1192
|
-
If a sleep already exists and needs to be increased:
|
|
1193
|
-
- "line": the line number of the existing sleep
|
|
1194
|
-
- "increase_to": the new duration in seconds
|
|
1195
|
-
- "reason": explanation
|
|
1196
|
-
|
|
1197
|
-
Example output:
|
|
1198
|
-
[{"line": 15, "seconds": 3, "reason": "page navigation after press Enter"}, {"line": 22, "increase_to": 5, "reason": "slow page load"}]
|
|
1199
|
-
|
|
1200
|
-
If no fixes are needed, return: []
|
|
1201
|
-
|
|
1202
|
-
## Test Script (with line numbers)
|
|
1203
|
-
${script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n")}
|
|
1204
|
-
|
|
1205
|
-
## Failure Log
|
|
1206
|
-
${failureLog.slice(0, 3e3)}`;
|
|
1207
|
-
}
|
|
1208
1237
|
function buildCleanupPrompt(actions) {
|
|
1209
1238
|
return `You are given a list of browser actions recorded during an E2E test trace.
|
|
1210
1239
|
The trace contains noise: failed attempts, redundant retries, and duplicate operations recorded because the agent explored multiple strategies.
|
|
@@ -1235,54 +1264,890 @@ ${actions.map((a, i) => {
|
|
|
1235
1264
|
}).join("\n")}`;
|
|
1236
1265
|
}
|
|
1237
1266
|
//#endregion
|
|
1267
|
+
//#region src/diagnose/apply.ts
|
|
1268
|
+
function applyDiagnosis(script, diagnosis) {
|
|
1269
|
+
switch (diagnosis.type) {
|
|
1270
|
+
case "TIMING_ISSUE": return applyTiming(script, diagnosis.fixes);
|
|
1271
|
+
case "OVER_ASSERTION": return applyOverAssertion(script, diagnosis.lines);
|
|
1272
|
+
case "SELECTOR_DRIFT": return applySelectorDrift(script, diagnosis.line, diagnosis.oldSelector, diagnosis.newSelector);
|
|
1273
|
+
case "DATA_MISSING": return {
|
|
1274
|
+
applied: false,
|
|
1275
|
+
reason: `data missing — ${diagnosis.reason}`
|
|
1276
|
+
};
|
|
1277
|
+
case "UNKNOWN": return {
|
|
1278
|
+
applied: false,
|
|
1279
|
+
reason: `unknown failure — ${diagnosis.reason}`
|
|
1280
|
+
};
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
function applyTiming(script, fixes) {
|
|
1284
|
+
if (fixes.length === 0) return {
|
|
1285
|
+
applied: false,
|
|
1286
|
+
reason: "no timing fixes proposed"
|
|
1287
|
+
};
|
|
1288
|
+
const lines = script.split("\n");
|
|
1289
|
+
const summary = [];
|
|
1290
|
+
for (const fix of fixes) if (fix.kind === "increase") {
|
|
1291
|
+
const idx = fix.line - 1;
|
|
1292
|
+
if (idx < 0 || idx >= lines.length) continue;
|
|
1293
|
+
const original = lines[idx];
|
|
1294
|
+
const replaced = original.replace(/spawnSync\("sleep",\s*\["\d+"\]/, `spawnSync("sleep", ["${fix.increase_to}"]`);
|
|
1295
|
+
if (replaced !== original) {
|
|
1296
|
+
lines[idx] = replaced;
|
|
1297
|
+
summary.push(`line ${fix.line}: sleep → ${fix.increase_to}s`);
|
|
1298
|
+
}
|
|
1299
|
+
}
|
|
1300
|
+
const inserts = fixes.filter((f) => f.kind === "insert").sort((a, b) => b.line - a.line);
|
|
1301
|
+
for (const fix of inserts) {
|
|
1302
|
+
const idx = fix.line - 1;
|
|
1303
|
+
if (idx < 0 || idx > lines.length) continue;
|
|
1304
|
+
lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
|
|
1305
|
+
summary.push(`line ${fix.line}: insert sleep ${fix.seconds}s`);
|
|
1306
|
+
}
|
|
1307
|
+
if (summary.length === 0) return {
|
|
1308
|
+
applied: false,
|
|
1309
|
+
reason: "timing fixes pointed at out-of-range lines"
|
|
1310
|
+
};
|
|
1311
|
+
return {
|
|
1312
|
+
applied: true,
|
|
1313
|
+
script: lines.join("\n"),
|
|
1314
|
+
summary: summary.join("; ")
|
|
1315
|
+
};
|
|
1316
|
+
}
|
|
1317
|
+
function applyOverAssertion(script, lineNumbers) {
|
|
1318
|
+
if (lineNumbers.length === 0) return {
|
|
1319
|
+
applied: false,
|
|
1320
|
+
reason: "no lines to remove"
|
|
1321
|
+
};
|
|
1322
|
+
const lines = script.split("\n");
|
|
1323
|
+
const targets = [...new Set(lineNumbers)].sort((a, b) => b - a);
|
|
1324
|
+
const removed = [];
|
|
1325
|
+
for (const line of targets) {
|
|
1326
|
+
const idx = line - 1;
|
|
1327
|
+
if (idx < 0 || idx >= lines.length) continue;
|
|
1328
|
+
const content = lines[idx];
|
|
1329
|
+
if (!/abAssert/.test(content)) continue;
|
|
1330
|
+
removed.push(`line ${line}: ${content.trim()}`);
|
|
1331
|
+
lines.splice(idx, 1);
|
|
1332
|
+
}
|
|
1333
|
+
if (removed.length === 0) return {
|
|
1334
|
+
applied: false,
|
|
1335
|
+
reason: "no abAssert lines matched the proposed line numbers"
|
|
1336
|
+
};
|
|
1337
|
+
return {
|
|
1338
|
+
applied: true,
|
|
1339
|
+
script: lines.join("\n"),
|
|
1340
|
+
summary: `removed ${removed.length} assertion(s)`
|
|
1341
|
+
};
|
|
1342
|
+
}
|
|
1343
|
+
function applySelectorDrift(script, line, oldSelector, newSelector) {
|
|
1344
|
+
const lines = script.split("\n");
|
|
1345
|
+
const idx = line - 1;
|
|
1346
|
+
if (idx < 0 || idx >= lines.length) return {
|
|
1347
|
+
applied: false,
|
|
1348
|
+
reason: `line ${line} out of range`
|
|
1349
|
+
};
|
|
1350
|
+
const content = lines[idx];
|
|
1351
|
+
if (!content.includes(oldSelector)) return {
|
|
1352
|
+
applied: false,
|
|
1353
|
+
reason: `oldSelector not found on line ${line}`
|
|
1354
|
+
};
|
|
1355
|
+
lines[idx] = content.replaceAll(oldSelector, newSelector);
|
|
1356
|
+
return {
|
|
1357
|
+
applied: true,
|
|
1358
|
+
script: lines.join("\n"),
|
|
1359
|
+
summary: `line ${line}: "${oldSelector}" → "${newSelector}"`
|
|
1360
|
+
};
|
|
1361
|
+
}
|
|
1362
|
+
/**
|
|
1363
|
+
* Build a unified-style diff snippet for showing the user what would change.
|
|
1364
|
+
* Just the changed lines with -/+ prefixes; not a real patch.
|
|
1365
|
+
*/
|
|
1366
|
+
function previewDiff(before, after) {
|
|
1367
|
+
const a = before.split("\n");
|
|
1368
|
+
const b = after.split("\n");
|
|
1369
|
+
const out = [];
|
|
1370
|
+
const max = Math.max(a.length, b.length);
|
|
1371
|
+
for (let i = 0; i < max; i++) {
|
|
1372
|
+
if (a[i] === b[i]) continue;
|
|
1373
|
+
if (a[i] !== void 0) out.push(`- ${a[i]}`);
|
|
1374
|
+
if (b[i] !== void 0) out.push(`+ ${b[i]}`);
|
|
1375
|
+
}
|
|
1376
|
+
return out.join("\n");
|
|
1377
|
+
}
|
|
1378
|
+
//#endregion
|
|
1379
|
+
//#region src/diagnose/prompt.ts
|
|
1380
|
+
function buildDiagnosePrompt(input) {
|
|
1381
|
+
const { script, specMarkdown, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
|
|
1382
|
+
const numbered = script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n");
|
|
1383
|
+
return `You are diagnosing a failing E2E test. The test was generated from a recorded trace of the original interaction. Compare the failing run against the original spec and recorded actions to determine WHY the test failed and what the right fix is.
|
|
1384
|
+
|
|
1385
|
+
## Output language
|
|
1386
|
+
|
|
1387
|
+
Write all human-readable fields (\`reasoning\`, \`reason\`) in **${outputLanguage}** (BCP-47 tag).
|
|
1388
|
+
Selectors, file paths, identifiers, code, type names (TIMING_ISSUE, etc.), JSON keys, and quoted strings stay verbatim regardless of language.
|
|
1389
|
+
|
|
1390
|
+
## You have read-only filesystem tools
|
|
1391
|
+
|
|
1392
|
+
You can call \`Grep\`, \`Glob\`, and \`Read\` against the current repository before producing the JSON.
|
|
1393
|
+
|
|
1394
|
+
For SELECTOR_DRIFT specifically the failure log is usually NOT enough on its own — the runner only reports "selector X not visible". To confirm a rename, search the application source for the *type* of selector that's failing:
|
|
1395
|
+
|
|
1396
|
+
- For \`[aria-label='OLD']\` failures: \`Grep\` for \`aria-label=\` (or i18n key \`OLD\`) in the app source. If you find a near-miss like \`aria-label="NEW"\` whose text is a superset/rephrase of the failing label, that is your evidence.
|
|
1397
|
+
- For \`[placeholder='OLD']\` failures: \`Grep\` for \`placeholder=\`.
|
|
1398
|
+
- For \`[role='OLD']\` or \`[data-testid='OLD']\`: same pattern.
|
|
1399
|
+
- For \`text=OLD\` failures: \`Grep\` the source / i18n bundles for \`OLD\`. Locale files (\`*.json\`, \`*.yml\`, \`messages.ts\`, etc.) often hold the canonical strings.
|
|
1400
|
+
|
|
1401
|
+
You have **up to 10 tool turns**. Spend them on grep/read; do not loop. Only when you have concrete file:line evidence should you emit SELECTOR_DRIFT — otherwise prefer UNKNOWN with confidence < 0.4 and let the human decide.
|
|
1402
|
+
|
|
1403
|
+
Do NOT attempt to write, edit, run shell commands, or hit the network. Only Grep/Glob/Read.
|
|
1404
|
+
|
|
1405
|
+
## Diagnosis categories
|
|
1406
|
+
|
|
1407
|
+
Pick exactly ONE category. The output JSON must follow the shape for that category.
|
|
1408
|
+
|
|
1409
|
+
1. TIMING_ISSUE — element not yet present because the page hasn't loaded / navigated. Fix by inserting or extending sleeps.
|
|
1410
|
+
{
|
|
1411
|
+
"diagnosis": {
|
|
1412
|
+
"type": "TIMING_ISSUE",
|
|
1413
|
+
"fixes": [
|
|
1414
|
+
{ "kind": "insert", "line": <1-based>, "seconds": <int>, "reason": "<short>" },
|
|
1415
|
+
{ "kind": "increase", "line": <1-based of existing sleep>, "increase_to": <int>, "reason": "<short>" }
|
|
1416
|
+
]
|
|
1417
|
+
},
|
|
1418
|
+
"confidence": <0.0-1.0>,
|
|
1419
|
+
"reasoning": "<why timing is the cause>"
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
2. OVER_ASSERTION — the test is asserting something the spec never required, OR a recorded assertion that is environment-dependent (e.g. a placeholder text that varies). The right fix is to remove those lines from the test.
|
|
1423
|
+
{
|
|
1424
|
+
"diagnosis": {
|
|
1425
|
+
"type": "OVER_ASSERTION",
|
|
1426
|
+
"lines": [<1-based line numbers to remove>],
|
|
1427
|
+
"reason": "<short>"
|
|
1428
|
+
},
|
|
1429
|
+
"confidence": <0.0-1.0>,
|
|
1430
|
+
"reasoning": "<why this assertion isn't required by the spec>"
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1433
|
+
3. SELECTOR_DRIFT — the page is healthy but a selector has been renamed/refined since the trace was recorded. The failure log will typically contain a snapshot showing the new selector. ONLY use this when you can name the exact replacement selector.
|
|
1434
|
+
{
|
|
1435
|
+
"diagnosis": {
|
|
1436
|
+
"type": "SELECTOR_DRIFT",
|
|
1437
|
+
"line": <1-based>,
|
|
1438
|
+
"oldSelector": "<exact string in current line>",
|
|
1439
|
+
"newSelector": "<exact replacement>",
|
|
1440
|
+
"reason": "<short>"
|
|
1441
|
+
},
|
|
1442
|
+
"confidence": <0.0-1.0>,
|
|
1443
|
+
"reasoning": "<evidence from failure log>"
|
|
1444
|
+
}
|
|
1445
|
+
|
|
1446
|
+
4. DATA_MISSING — the test depends on data (a record, a setup, a logged-in state) that no longer exists. Not auto-fixable; the human must reseed or update the spec.
|
|
1447
|
+
{
|
|
1448
|
+
"diagnosis": { "type": "DATA_MISSING", "reason": "<what is missing>" },
|
|
1449
|
+
"confidence": <0.0-1.0>,
|
|
1450
|
+
"reasoning": "<evidence>"
|
|
1451
|
+
}
|
|
1452
|
+
|
|
1453
|
+
5. UNKNOWN — none of the above fit, or evidence is too weak to choose.
|
|
1454
|
+
{
|
|
1455
|
+
"diagnosis": { "type": "UNKNOWN", "reason": "<short>" },
|
|
1456
|
+
"confidence": <0.0-1.0>,
|
|
1457
|
+
"reasoning": "<what you saw and why you can't classify>"
|
|
1458
|
+
}
|
|
1459
|
+
|
|
1460
|
+
## Confidence guidance
|
|
1461
|
+
|
|
1462
|
+
- 0.9-1.0: failure log directly shows the cause (e.g. "selector X not found, snapshot lists Y" → SELECTOR_DRIFT)
|
|
1463
|
+
- 0.7-0.9: strong indirect evidence (e.g. timing pattern after navigation, or assertion text that doesn't appear in spec)
|
|
1464
|
+
- 0.4-0.7: plausible classification but multiple categories could explain it
|
|
1465
|
+
- < 0.4: prefer UNKNOWN over guessing
|
|
1466
|
+
|
|
1467
|
+
## Rules
|
|
1468
|
+
|
|
1469
|
+
- Your **final** assistant message must start with \`{\` and end with \`}\` — a single JSON object, nothing before or after. No prose preamble like "Confirmed: ...", no markdown fences, no commentary, no tool calls in the same turn. If you have an analysis sentence, put it in the \`reasoning\` field.
|
|
1470
|
+
- Line numbers refer to the numbered test script below (1-based).
|
|
1471
|
+
- For SELECTOR_DRIFT, \`oldSelector\` must match a substring of the script at that line; \`newSelector\` must be backed by a concrete file:line you read with Grep/Read (do not invent). Cite the evidence in \`reasoning\`.
|
|
1472
|
+
- For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`).
|
|
1473
|
+
- Cross-check assertions against the spec markdown. If the spec doesn't require the assertion, OVER_ASSERTION is the better diagnosis than SELECTOR_DRIFT.
|
|
1474
|
+
|
|
1475
|
+
## Test Spec (test-spec.md)
|
|
1476
|
+
${specMarkdown}
|
|
1477
|
+
|
|
1478
|
+
## Recorded Actions (actions.json summary)
|
|
1479
|
+
${actions.map((a, i) => {
|
|
1480
|
+
const parts = [`${i + 1}. ${a.command}`];
|
|
1481
|
+
if (a.assertType) parts.push(`assertType="${a.assertType}"`);
|
|
1482
|
+
if (a.selector) parts.push(`selector="${a.selector}"`);
|
|
1483
|
+
if (a.value) parts.push(`value="${a.value}"`);
|
|
1484
|
+
if (a.observation) parts.push(`→ ${a.observation}`);
|
|
1485
|
+
return parts.join(" ");
|
|
1486
|
+
}).join("\n")}
|
|
1487
|
+
|
|
1488
|
+
## Test Script (with line numbers)
|
|
1489
|
+
${numbered}
|
|
1490
|
+
|
|
1491
|
+
## Failure Log
|
|
1492
|
+
${failureLog.slice(0, 4e3)}${pageSnapshot ? formatPageSnapshot(pageSnapshot) : ""}`;
|
|
1493
|
+
}
|
|
1494
|
+
/**
|
|
1495
|
+
* Page snapshot captured by ccqa right after the failure (agent-browser
|
|
1496
|
+
* accessibility tree). When present, it usually decides SELECTOR_DRIFT vs
|
|
1497
|
+
* TIMING_ISSUE: a near-miss aria-label / role / placeholder in the
|
|
1498
|
+
* snapshot is direct evidence of a rename, while a tree that doesn't
|
|
1499
|
+
* contain the failing locator at all (without a near-miss) points to a
|
|
1500
|
+
* still-loading page or genuinely missing element.
|
|
1501
|
+
*/
|
|
1502
|
+
function formatPageSnapshot(snapshot) {
|
|
1503
|
+
return `
|
|
1504
|
+
|
|
1505
|
+
## Page Snapshot (accessibility tree captured right after the failure)
|
|
1506
|
+
|
|
1507
|
+
This is the live state of the page when the test failed. Prefer this over your own assumptions:
|
|
1508
|
+
|
|
1509
|
+
- If a near-miss of the failing selector appears here (e.g. failing \`[aria-label='A']\` and snapshot contains \`aria-label="A-prime"\`), that is direct evidence of SELECTOR_DRIFT — propose the snapshot's value as \`newSelector\`.
|
|
1510
|
+
- If the failing locator is genuinely absent and no near-miss exists, the page may be still loading (TIMING_ISSUE) or the spec is asserting something not on this page (OVER_ASSERTION / DATA_MISSING).
|
|
1511
|
+
- If the snapshot looks unrelated to the spec (e.g. error page, login wall), DATA_MISSING is likely.
|
|
1512
|
+
|
|
1513
|
+
\`\`\`
|
|
1514
|
+
${snapshot}
|
|
1515
|
+
\`\`\``;
|
|
1516
|
+
}
|
|
1517
|
+
//#endregion
|
|
1518
|
+
//#region src/diagnose/diagnose.ts
|
|
1519
|
+
async function diagnose(input) {
|
|
1520
|
+
const { result: raw, isError } = await invokeClaudeStreaming({
|
|
1521
|
+
prompt: buildDiagnosePrompt(input),
|
|
1522
|
+
allowedTools: [
|
|
1523
|
+
"Read",
|
|
1524
|
+
"Grep",
|
|
1525
|
+
"Glob"
|
|
1526
|
+
],
|
|
1527
|
+
maxTurns: 10
|
|
1528
|
+
}, () => {});
|
|
1529
|
+
if (isError) return {
|
|
1530
|
+
result: null,
|
|
1531
|
+
raw: raw ?? "",
|
|
1532
|
+
sdkError: true
|
|
1533
|
+
};
|
|
1534
|
+
if (!raw) return {
|
|
1535
|
+
result: null,
|
|
1536
|
+
raw: "",
|
|
1537
|
+
sdkError: false
|
|
1538
|
+
};
|
|
1539
|
+
const candidates = extractJsonCandidates(raw);
|
|
1540
|
+
for (const candidate of candidates) {
|
|
1541
|
+
let parsed;
|
|
1542
|
+
try {
|
|
1543
|
+
parsed = JSON.parse(candidate);
|
|
1544
|
+
} catch {
|
|
1545
|
+
continue;
|
|
1546
|
+
}
|
|
1547
|
+
const normalised = normaliseResult(parsed);
|
|
1548
|
+
if (normalised) return {
|
|
1549
|
+
result: normalised,
|
|
1550
|
+
raw,
|
|
1551
|
+
sdkError: false
|
|
1552
|
+
};
|
|
1553
|
+
}
|
|
1554
|
+
return {
|
|
1555
|
+
result: {
|
|
1556
|
+
diagnosis: {
|
|
1557
|
+
type: "UNKNOWN",
|
|
1558
|
+
reason: "diagnose returned no parseable diagnosis JSON"
|
|
1559
|
+
},
|
|
1560
|
+
confidence: 0,
|
|
1561
|
+
reasoning: truncate$1(raw, 1e3)
|
|
1562
|
+
},
|
|
1563
|
+
raw,
|
|
1564
|
+
sdkError: false
|
|
1565
|
+
};
|
|
1566
|
+
}
|
|
1567
|
+
/**
|
|
1568
|
+
* Pull every plausible JSON object out of `raw`. We try, in order:
|
|
1569
|
+
* 1. The whole string with code fences stripped (the prompt asks for
|
|
1570
|
+
* JSON-only, so this is the happy path).
|
|
1571
|
+
* 2. Each balanced `{...}` block found by scanning the text. The model
|
|
1572
|
+
* sometimes prefixes the JSON with a "Confirmed: ..." sentence or
|
|
1573
|
+
* mentions partial JSON in its tool-using reasoning; we want to
|
|
1574
|
+
* try the *last* well-formed object first because it's most likely
|
|
1575
|
+
* the final answer, then earlier ones as a fallback.
|
|
1576
|
+
*
|
|
1577
|
+
* The caller `JSON.parse`s each candidate and stops at the first match
|
|
1578
|
+
* that normalises to a known DiagnosisResult.
|
|
1579
|
+
*/
|
|
1580
|
+
function extractJsonCandidates(raw) {
|
|
1581
|
+
const out = [];
|
|
1582
|
+
const stripped = stripFence(raw);
|
|
1583
|
+
if (stripped) out.push(stripped);
|
|
1584
|
+
const blocks = [];
|
|
1585
|
+
let depth = 0;
|
|
1586
|
+
let start = -1;
|
|
1587
|
+
let inString = false;
|
|
1588
|
+
let escaped = false;
|
|
1589
|
+
for (let i = 0; i < raw.length; i++) {
|
|
1590
|
+
const ch = raw[i];
|
|
1591
|
+
if (inString) {
|
|
1592
|
+
if (escaped) escaped = false;
|
|
1593
|
+
else if (ch === "\\") escaped = true;
|
|
1594
|
+
else if (ch === "\"") inString = false;
|
|
1595
|
+
continue;
|
|
1596
|
+
}
|
|
1597
|
+
if (ch === "\"") {
|
|
1598
|
+
inString = true;
|
|
1599
|
+
continue;
|
|
1600
|
+
}
|
|
1601
|
+
if (ch === "{") {
|
|
1602
|
+
if (depth === 0) start = i;
|
|
1603
|
+
depth++;
|
|
1604
|
+
} else if (ch === "}") {
|
|
1605
|
+
depth--;
|
|
1606
|
+
if (depth === 0 && start >= 0) {
|
|
1607
|
+
blocks.push(raw.slice(start, i + 1));
|
|
1608
|
+
start = -1;
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
}
|
|
1612
|
+
for (let i = blocks.length - 1; i >= 0; i--) {
|
|
1613
|
+
const block = blocks[i];
|
|
1614
|
+
if (!out.includes(block)) out.push(block);
|
|
1615
|
+
}
|
|
1616
|
+
return out;
|
|
1617
|
+
}
|
|
1618
|
+
function truncate$1(s, max) {
|
|
1619
|
+
return s.length <= max ? s : `${s.slice(0, max)}... [truncated, ${s.length - max} more chars]`;
|
|
1620
|
+
}
|
|
1621
|
+
function stripFence(raw) {
|
|
1622
|
+
return raw.trim().replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, "").trim();
|
|
1623
|
+
}
|
|
1624
|
+
function normaliseResult(parsed) {
|
|
1625
|
+
if (!isObject(parsed)) return null;
|
|
1626
|
+
const diagnosis = normaliseDiagnosis(parsed["diagnosis"]);
|
|
1627
|
+
if (!diagnosis) return null;
|
|
1628
|
+
return {
|
|
1629
|
+
diagnosis,
|
|
1630
|
+
confidence: typeof parsed["confidence"] === "number" ? clamp(parsed["confidence"], 0, 1) : 0,
|
|
1631
|
+
reasoning: typeof parsed["reasoning"] === "string" ? parsed["reasoning"] : ""
|
|
1632
|
+
};
|
|
1633
|
+
}
|
|
1634
|
+
function normaliseDiagnosis(raw) {
|
|
1635
|
+
if (!isObject(raw)) return null;
|
|
1636
|
+
switch (raw["type"]) {
|
|
1637
|
+
case "TIMING_ISSUE": {
|
|
1638
|
+
const fixes = normaliseSleepFixes(raw["fixes"]);
|
|
1639
|
+
if (fixes.length === 0) return null;
|
|
1640
|
+
return {
|
|
1641
|
+
type: "TIMING_ISSUE",
|
|
1642
|
+
fixes
|
|
1643
|
+
};
|
|
1644
|
+
}
|
|
1645
|
+
case "OVER_ASSERTION": {
|
|
1646
|
+
const lines = Array.isArray(raw["lines"]) ? raw["lines"].filter((n) => typeof n === "number" && Number.isFinite(n)) : [];
|
|
1647
|
+
if (lines.length === 0) return null;
|
|
1648
|
+
return {
|
|
1649
|
+
type: "OVER_ASSERTION",
|
|
1650
|
+
lines,
|
|
1651
|
+
reason: typeof raw["reason"] === "string" ? raw["reason"] : ""
|
|
1652
|
+
};
|
|
1653
|
+
}
|
|
1654
|
+
case "SELECTOR_DRIFT": {
|
|
1655
|
+
const line = typeof raw["line"] === "number" ? raw["line"] : null;
|
|
1656
|
+
const oldSelector = typeof raw["oldSelector"] === "string" ? raw["oldSelector"] : null;
|
|
1657
|
+
const newSelector = typeof raw["newSelector"] === "string" ? raw["newSelector"] : null;
|
|
1658
|
+
if (line === null || !oldSelector || !newSelector) return null;
|
|
1659
|
+
return {
|
|
1660
|
+
type: "SELECTOR_DRIFT",
|
|
1661
|
+
line,
|
|
1662
|
+
oldSelector,
|
|
1663
|
+
newSelector,
|
|
1664
|
+
reason: typeof raw["reason"] === "string" ? raw["reason"] : ""
|
|
1665
|
+
};
|
|
1666
|
+
}
|
|
1667
|
+
case "DATA_MISSING": return {
|
|
1668
|
+
type: "DATA_MISSING",
|
|
1669
|
+
reason: typeof raw["reason"] === "string" ? raw["reason"] : ""
|
|
1670
|
+
};
|
|
1671
|
+
case "UNKNOWN": return {
|
|
1672
|
+
type: "UNKNOWN",
|
|
1673
|
+
reason: typeof raw["reason"] === "string" ? raw["reason"] : ""
|
|
1674
|
+
};
|
|
1675
|
+
default: return null;
|
|
1676
|
+
}
|
|
1677
|
+
}
|
|
1678
|
+
function normaliseSleepFixes(raw) {
|
|
1679
|
+
if (!Array.isArray(raw)) return [];
|
|
1680
|
+
const out = [];
|
|
1681
|
+
for (const item of raw) {
|
|
1682
|
+
if (!isObject(item)) continue;
|
|
1683
|
+
const line = typeof item["line"] === "number" ? item["line"] : null;
|
|
1684
|
+
if (line === null) continue;
|
|
1685
|
+
const reason = typeof item["reason"] === "string" ? item["reason"] : "";
|
|
1686
|
+
const kind = item["kind"];
|
|
1687
|
+
if (kind === "insert" || typeof item["seconds"] === "number" && item["increase_to"] === void 0) {
|
|
1688
|
+
const seconds = typeof item["seconds"] === "number" ? item["seconds"] : null;
|
|
1689
|
+
if (seconds === null) continue;
|
|
1690
|
+
out.push({
|
|
1691
|
+
kind: "insert",
|
|
1692
|
+
line,
|
|
1693
|
+
seconds,
|
|
1694
|
+
reason
|
|
1695
|
+
});
|
|
1696
|
+
continue;
|
|
1697
|
+
}
|
|
1698
|
+
if (kind === "increase" || typeof item["increase_to"] === "number") {
|
|
1699
|
+
const increaseTo = typeof item["increase_to"] === "number" ? item["increase_to"] : null;
|
|
1700
|
+
if (increaseTo === null) continue;
|
|
1701
|
+
out.push({
|
|
1702
|
+
kind: "increase",
|
|
1703
|
+
line,
|
|
1704
|
+
increase_to: increaseTo,
|
|
1705
|
+
reason
|
|
1706
|
+
});
|
|
1707
|
+
continue;
|
|
1708
|
+
}
|
|
1709
|
+
}
|
|
1710
|
+
return out;
|
|
1711
|
+
}
|
|
1712
|
+
function isObject(v) {
|
|
1713
|
+
return typeof v === "object" && v !== null && !Array.isArray(v);
|
|
1714
|
+
}
|
|
1715
|
+
function clamp(n, lo, hi) {
|
|
1716
|
+
if (n < lo) return lo;
|
|
1717
|
+
if (n > hi) return hi;
|
|
1718
|
+
return n;
|
|
1719
|
+
}
|
|
1720
|
+
//#endregion
|
|
1721
|
+
//#region src/diagnose/interactive.ts
|
|
1722
|
+
async function promptForChoice(input) {
|
|
1723
|
+
printContext(input);
|
|
1724
|
+
const rl = createInterface({
|
|
1725
|
+
input: process.stdin,
|
|
1726
|
+
output: process.stdout
|
|
1727
|
+
});
|
|
1728
|
+
try {
|
|
1729
|
+
while (true) switch ((await question(rl, "[a]pply / [s]kip / [m]anual / [q]uit > ")).trim().toLowerCase()) {
|
|
1730
|
+
case "a":
|
|
1731
|
+
case "apply": return "apply";
|
|
1732
|
+
case "s":
|
|
1733
|
+
case "skip": return "skip";
|
|
1734
|
+
case "m":
|
|
1735
|
+
case "manual": return "manual";
|
|
1736
|
+
case "q":
|
|
1737
|
+
case "quit": return "quit";
|
|
1738
|
+
default: process.stdout.write(" please answer a/s/m/q\n");
|
|
1739
|
+
}
|
|
1740
|
+
} finally {
|
|
1741
|
+
rl.close();
|
|
1742
|
+
}
|
|
1743
|
+
}
|
|
1744
|
+
function question(rl, prompt) {
|
|
1745
|
+
return new Promise((resolve) => rl.question(prompt, resolve));
|
|
1746
|
+
}
|
|
1747
|
+
function printContext({ result, diff, failureExcerpt }) {
|
|
1748
|
+
const { diagnosis, confidence, reasoning } = result;
|
|
1749
|
+
process.stdout.write("\n");
|
|
1750
|
+
process.stdout.write(`[fix] diagnosis: ${diagnosis.type} (confidence ${confidence.toFixed(2)})\n`);
|
|
1751
|
+
if (reasoning) process.stdout.write(`[fix] reasoning: ${reasoning}\n`);
|
|
1752
|
+
for (const line of formatDiagnosisDetail(diagnosis)) process.stdout.write(`[fix] ${line}\n`);
|
|
1753
|
+
if (failureExcerpt) {
|
|
1754
|
+
process.stdout.write("\n[fix] failure excerpt:\n");
|
|
1755
|
+
process.stdout.write(prefixLines(failureExcerpt, "[fix] "));
|
|
1756
|
+
process.stdout.write("\n");
|
|
1757
|
+
}
|
|
1758
|
+
if (diff) {
|
|
1759
|
+
process.stdout.write("\n[fix] proposed fix:\n");
|
|
1760
|
+
process.stdout.write(prefixLines(diff, "[fix] "));
|
|
1761
|
+
process.stdout.write("\n");
|
|
1762
|
+
}
|
|
1763
|
+
process.stdout.write("\n");
|
|
1764
|
+
}
|
|
1765
|
+
function formatDiagnosisDetail(diagnosis) {
|
|
1766
|
+
switch (diagnosis.type) {
|
|
1767
|
+
case "TIMING_ISSUE": return [`fixes: ${diagnosis.fixes.map((f) => f.kind === "insert" ? `insert ${f.seconds}s @ line ${f.line}` : `increase to ${f.increase_to}s @ line ${f.line}`).join(", ")}`];
|
|
1768
|
+
case "OVER_ASSERTION": return [`lines: ${diagnosis.lines.join(", ")}`, `reason: ${diagnosis.reason}`];
|
|
1769
|
+
case "SELECTOR_DRIFT": return [`line ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}"`, `reason: ${diagnosis.reason}`];
|
|
1770
|
+
case "DATA_MISSING":
|
|
1771
|
+
case "UNKNOWN": return [`reason: ${diagnosis.reason}`];
|
|
1772
|
+
}
|
|
1773
|
+
}
|
|
1774
|
+
function prefixLines(text, prefix) {
|
|
1775
|
+
return text.split("\n").map((l) => `${prefix}${l}`).join("\n");
|
|
1776
|
+
}
|
|
1777
|
+
//#endregion
|
|
1778
|
+
//#region src/diagnose/snapshot.ts
|
|
1779
|
+
const require = createRequire(import.meta.url);
|
|
1780
|
+
const SNAPSHOT_TIMEOUT_MS = 1e4;
|
|
1781
|
+
const CLOSE_TIMEOUT_MS = 1e4;
|
|
1782
|
+
const MAX_OUTPUT_BYTES = 6e4;
|
|
1783
|
+
function resolveAgentBrowserBin() {
|
|
1784
|
+
try {
|
|
1785
|
+
return require.resolve("agent-browser/bin/agent-browser.js");
|
|
1786
|
+
} catch {
|
|
1787
|
+
return null;
|
|
1788
|
+
}
|
|
1789
|
+
}
|
|
1790
|
+
/**
|
|
1791
|
+
* Run `agent-browser snapshot` against the session that the failed vitest
|
|
1792
|
+
* run just used, and return its accessibility-tree dump.
|
|
1793
|
+
*
|
|
1794
|
+
* Returns null when agent-browser is missing, the daemon has no live page
|
|
1795
|
+
* for the session, or the call exceeds {@link SNAPSHOT_TIMEOUT_MS}. We
|
|
1796
|
+
* never throw — a missing snapshot just means diagnose has less context.
|
|
1797
|
+
*
|
|
1798
|
+
* The output is truncated to {@link MAX_OUTPUT_BYTES} so the prompt stays
|
|
1799
|
+
* within budget on large pages.
|
|
1800
|
+
*/
|
|
1801
|
+
async function captureSnapshot(sessionName) {
|
|
1802
|
+
const abBin = resolveAgentBrowserBin();
|
|
1803
|
+
if (!abBin) return null;
|
|
1804
|
+
return new Promise((resolve) => {
|
|
1805
|
+
const child = spawn(process.execPath, [abBin, "snapshot"], {
|
|
1806
|
+
env: {
|
|
1807
|
+
...process.env,
|
|
1808
|
+
AGENT_BROWSER_SESSION: sessionName
|
|
1809
|
+
},
|
|
1810
|
+
stdio: [
|
|
1811
|
+
"ignore",
|
|
1812
|
+
"pipe",
|
|
1813
|
+
"pipe"
|
|
1814
|
+
]
|
|
1815
|
+
});
|
|
1816
|
+
let stdout = "";
|
|
1817
|
+
let stderr = "";
|
|
1818
|
+
let timedOut = false;
|
|
1819
|
+
const timer = setTimeout(() => {
|
|
1820
|
+
timedOut = true;
|
|
1821
|
+
child.kill("SIGTERM");
|
|
1822
|
+
}, SNAPSHOT_TIMEOUT_MS);
|
|
1823
|
+
child.stdout.setEncoding("utf8");
|
|
1824
|
+
child.stderr.setEncoding("utf8");
|
|
1825
|
+
child.stdout.on("data", (chunk) => {
|
|
1826
|
+
stdout += chunk;
|
|
1827
|
+
});
|
|
1828
|
+
child.stderr.on("data", (chunk) => {
|
|
1829
|
+
stderr += chunk;
|
|
1830
|
+
});
|
|
1831
|
+
child.on("error", () => {
|
|
1832
|
+
clearTimeout(timer);
|
|
1833
|
+
resolve(null);
|
|
1834
|
+
});
|
|
1835
|
+
child.on("exit", (code) => {
|
|
1836
|
+
clearTimeout(timer);
|
|
1837
|
+
if (timedOut || code !== 0) {
|
|
1838
|
+
resolve(null);
|
|
1839
|
+
return;
|
|
1840
|
+
}
|
|
1841
|
+
const trimmed = stdout.trim();
|
|
1842
|
+
if (!trimmed) {
|
|
1843
|
+
resolve(null);
|
|
1844
|
+
return;
|
|
1845
|
+
}
|
|
1846
|
+
resolve(truncate(trimmed, MAX_OUTPUT_BYTES));
|
|
1847
|
+
});
|
|
1848
|
+
});
|
|
1849
|
+
}
|
|
1850
|
+
function truncate(s, maxBytes) {
|
|
1851
|
+
if (s.length <= maxBytes) return s;
|
|
1852
|
+
return `${s.slice(0, maxBytes)}\n... [truncated, ${s.length - maxBytes} more chars]`;
|
|
1853
|
+
}
|
|
1854
|
+
/**
|
|
1855
|
+
* Close an agent-browser session by name. Used before/after a `ccqa generate`
|
|
1856
|
+
* run so a wedged daemon from a previous attempt can't hang the next one.
|
|
1857
|
+
*
|
|
1858
|
+
* Always resolves; never throws. If the binary is missing, the session
|
|
1859
|
+
* doesn't exist, or the call exceeds {@link CLOSE_TIMEOUT_MS}, we silently
|
|
1860
|
+
* return — close is best-effort cleanup, not a precondition.
|
|
1861
|
+
*/
|
|
1862
|
+
async function closeSession(sessionName) {
|
|
1863
|
+
const abBin = resolveAgentBrowserBin();
|
|
1864
|
+
if (!abBin) return;
|
|
1865
|
+
await new Promise((resolve) => {
|
|
1866
|
+
const child = spawn(process.execPath, [abBin, "close"], {
|
|
1867
|
+
env: {
|
|
1868
|
+
...process.env,
|
|
1869
|
+
AGENT_BROWSER_SESSION: sessionName
|
|
1870
|
+
},
|
|
1871
|
+
stdio: "ignore"
|
|
1872
|
+
});
|
|
1873
|
+
const timer = setTimeout(() => {
|
|
1874
|
+
child.kill("SIGTERM");
|
|
1875
|
+
}, CLOSE_TIMEOUT_MS);
|
|
1876
|
+
const finish = () => {
|
|
1877
|
+
clearTimeout(timer);
|
|
1878
|
+
resolve();
|
|
1879
|
+
};
|
|
1880
|
+
child.on("error", finish);
|
|
1881
|
+
child.on("exit", finish);
|
|
1882
|
+
});
|
|
1883
|
+
}
|
|
1884
|
+
//#endregion
|
|
1885
|
+
//#region src/diagnose/loop.ts
|
|
1886
|
+
const DEFAULT_CONFIDENCE_THRESHOLD = .8;
|
|
1887
|
+
/**
|
|
1888
|
+
* Returns true when vitest finally passed; false when retries were exhausted
|
|
1889
|
+
* or the diagnose loop chose to bail out early.
|
|
1890
|
+
*/
|
|
1891
|
+
async function runAutoFixLoop(input) {
|
|
1892
|
+
const { scriptPath, initialRun, specMarkdown, actions, maxRetries, mode, runVitest, agentBrowserSession, outputLanguage } = input;
|
|
1893
|
+
let { exitCode, output, currentScript } = initialRun;
|
|
1894
|
+
if (exitCode === 0) return true;
|
|
1895
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
1896
|
+
fix(`attempt ${attempt}/${maxRetries}`);
|
|
1897
|
+
blank();
|
|
1898
|
+
const pageSnapshot = agentBrowserSession ? await timedPhase("page snapshot", () => captureSnapshot(agentBrowserSession), "fix") : null;
|
|
1899
|
+
if (agentBrowserSession) if (pageSnapshot) fix(`page snapshot: ${pageSnapshot.length} chars captured`);
|
|
1900
|
+
else fix("page snapshot unavailable; continuing without it");
|
|
1901
|
+
const fixed = await diagnoseAndFix({
|
|
1902
|
+
script: currentScript,
|
|
1903
|
+
specMarkdown,
|
|
1904
|
+
actions,
|
|
1905
|
+
failureLog: output,
|
|
1906
|
+
pageSnapshot: pageSnapshot ?? void 0,
|
|
1907
|
+
mode,
|
|
1908
|
+
outputLanguage
|
|
1909
|
+
});
|
|
1910
|
+
if (!fixed) {
|
|
1911
|
+
fix("bailed out; see diagnosis above");
|
|
1912
|
+
return false;
|
|
1913
|
+
}
|
|
1914
|
+
await writeFile(scriptPath, fixed, "utf-8");
|
|
1915
|
+
fix(`saved: ${scriptPath}`);
|
|
1916
|
+
blank();
|
|
1917
|
+
({exitCode, output, currentScript} = await timedPhase(`vitest run #${attempt + 1}`, () => runVitest(scriptPath), "run"));
|
|
1918
|
+
if (exitCode === 0) return true;
|
|
1919
|
+
}
|
|
1920
|
+
return false;
|
|
1921
|
+
}
|
|
1922
|
+
async function diagnoseAndFix(input) {
|
|
1923
|
+
const { script, specMarkdown, actions, failureLog, pageSnapshot, mode, outputLanguage } = input;
|
|
1924
|
+
const outcome = await timedPhase("diagnose", () => diagnose({
|
|
1925
|
+
script,
|
|
1926
|
+
specMarkdown,
|
|
1927
|
+
actions,
|
|
1928
|
+
failureLog,
|
|
1929
|
+
pageSnapshot,
|
|
1930
|
+
outputLanguage
|
|
1931
|
+
}), "fix");
|
|
1932
|
+
if (outcome.sdkError) {
|
|
1933
|
+
fix("diagnose: SDK error talking to Claude");
|
|
1934
|
+
if (outcome.raw) fix(`diagnose raw: ${truncateForLog(outcome.raw)}`);
|
|
1935
|
+
hint("re-run later, or check ANTHROPIC_API_KEY / network connectivity");
|
|
1936
|
+
return null;
|
|
1937
|
+
}
|
|
1938
|
+
if (!outcome.result) {
|
|
1939
|
+
fix("diagnose: empty response from LLM");
|
|
1940
|
+
hint("re-run; if this keeps happening the failure log may be too short to diagnose");
|
|
1941
|
+
return null;
|
|
1942
|
+
}
|
|
1943
|
+
const result = outcome.result;
|
|
1944
|
+
reportDiagnosis(result);
|
|
1945
|
+
if (result.diagnosis.type === "DATA_MISSING" || result.diagnosis.type === "UNKNOWN") {
|
|
1946
|
+
handoffToUser(result, outcome.raw, outputLanguage);
|
|
1947
|
+
return null;
|
|
1948
|
+
}
|
|
1949
|
+
const apply = applyDiagnosis(script, result.diagnosis);
|
|
1950
|
+
if (!apply.applied) {
|
|
1951
|
+
fix(`cannot apply: ${apply.reason}`);
|
|
1952
|
+
handoffToUser(result, outcome.raw, outputLanguage);
|
|
1953
|
+
return null;
|
|
1954
|
+
}
|
|
1955
|
+
const decision = decide(result, mode);
|
|
1956
|
+
if (decision === "apply-auto") {
|
|
1957
|
+
fix(`applying automatically: ${apply.summary}`);
|
|
1958
|
+
return apply.script;
|
|
1959
|
+
}
|
|
1960
|
+
if (decision === "skip-low-confidence") {
|
|
1961
|
+
fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (--no-interactive)`);
|
|
1962
|
+
handoffToUser(result, outcome.raw, outputLanguage);
|
|
1963
|
+
return null;
|
|
1964
|
+
}
|
|
1965
|
+
switch (await promptForChoice({
|
|
1966
|
+
result,
|
|
1967
|
+
diff: previewDiff(script, apply.script),
|
|
1968
|
+
failureExcerpt: failureLog.slice(0, 800)
|
|
1969
|
+
})) {
|
|
1970
|
+
case "apply":
|
|
1971
|
+
fix(`applied: ${apply.summary}`);
|
|
1972
|
+
return apply.script;
|
|
1973
|
+
case "skip":
|
|
1974
|
+
fix("skipped; leaving script untouched");
|
|
1975
|
+
return null;
|
|
1976
|
+
case "manual":
|
|
1977
|
+
fix("paused for manual edit");
|
|
1978
|
+
handoffToUser(result, outcome.raw, outputLanguage);
|
|
1979
|
+
return null;
|
|
1980
|
+
case "quit":
|
|
1981
|
+
fix("user quit");
|
|
1982
|
+
process.exit(1);
|
|
1983
|
+
}
|
|
1984
|
+
}
|
|
1985
|
+
function decide(result, mode) {
|
|
1986
|
+
if (mode === "auto") return "apply-auto";
|
|
1987
|
+
const highConfidence = result.confidence >= DEFAULT_CONFIDENCE_THRESHOLD;
|
|
1988
|
+
if (mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
|
|
1989
|
+
return highConfidence ? "apply-auto" : "interactive";
|
|
1990
|
+
}
|
|
1991
|
+
function reportDiagnosis(result) {
|
|
1992
|
+
fix(`diagnosis: ${result.diagnosis.type}`);
|
|
1993
|
+
fix(`confidence: ${result.confidence.toFixed(2)}`);
|
|
1994
|
+
if (result.reasoning) fix(`reasoning: ${result.reasoning}`);
|
|
1995
|
+
}
|
|
1996
|
+
/**
|
|
1997
|
+
* Emit a category-specific [hint] block that tells the user what to do next.
|
|
1998
|
+
* Called whenever the loop has decided it cannot proceed on its own —
|
|
1999
|
+
* because the diagnosis is intrinsically not auto-fixable, because the
|
|
2000
|
+
* proposed fix wasn't applicable to the current script, or because the
|
|
2001
|
+
* confidence was too low under --no-interactive.
|
|
2002
|
+
*
|
|
2003
|
+
* The goal is to never leave the user with just "auto-fix exhausted" —
|
|
2004
|
+
* always state which side (test artifacts vs. application) likely needs
|
|
2005
|
+
* the next action.
|
|
2006
|
+
*/
|
|
2007
|
+
function handoffToUser(result, raw, language) {
|
|
2008
|
+
const lines = handoffMessage(result.diagnosis, normLang(language));
|
|
2009
|
+
for (const line of lines) hint(line);
|
|
2010
|
+
if (raw) fix(`diagnose raw: ${truncateForLog(raw)}`);
|
|
2011
|
+
}
|
|
2012
|
+
function normLang(language) {
|
|
2013
|
+
if (!language) return "en";
|
|
2014
|
+
return language.toLowerCase().startsWith("ja") ? "ja" : "en";
|
|
2015
|
+
}
|
|
2016
|
+
const HANDOFF = {
|
|
2017
|
+
en: handoffEn,
|
|
2018
|
+
ja: handoffJa
|
|
2019
|
+
};
|
|
2020
|
+
function handoffMessage(diagnosis, language) {
|
|
2021
|
+
return HANDOFF[language](diagnosis);
|
|
2022
|
+
}
|
|
2023
|
+
function handoffEn(diagnosis) {
|
|
2024
|
+
switch (diagnosis.type) {
|
|
2025
|
+
case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update test-spec.md prerequisites), then re-run trace + generate."];
|
|
2026
|
+
case "UNKNOWN": return [`could not classify the failure. ${diagnosis.reason}`, "next step: read the failure log above, decide whether the test or the app is wrong, and fix manually. consider re-running ccqa trace if the recorded flow no longer matches the live app."];
|
|
2027
|
+
case "SELECTOR_DRIFT": return [
|
|
2028
|
+
`selector likely drifted but auto-apply was not safe.`,
|
|
2029
|
+
`proposed: line ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason}).`,
|
|
2030
|
+
"next step: confirm in the live app and either accept the proposal manually, or re-run ccqa trace to recapture the new selector."
|
|
2031
|
+
];
|
|
2032
|
+
case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check test-spec.md. either delete the assertion from the test, or tighten the spec to require it."];
|
|
2033
|
+
case "TIMING_ISSUE": return [`timing fix proposed but couldn't be applied automatically.`, "next step: insert a sleep manually before the failing line, or re-run with a higher confidence trace."];
|
|
2034
|
+
}
|
|
2035
|
+
}
|
|
2036
|
+
function handoffJa(diagnosis) {
|
|
2037
|
+
switch (diagnosis.type) {
|
|
2038
|
+
case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する(または test-spec.md の prerequisites を更新)してから ccqa trace + generate をやり直してください。"];
|
|
2039
|
+
case "UNKNOWN": return [`失敗を分類できませんでした。${diagnosis.reason}`, "次のステップ: 上の失敗ログを確認し、テストとアプリのどちらが原因か判断して手動で修正してください。記録した手順がアプリの現状と合わない場合は ccqa trace の再実行を検討してください。"];
|
|
2040
|
+
case "SELECTOR_DRIFT": return [
|
|
2041
|
+
"selector が変わった可能性が高いですが、自動適用は安全でないと判断しました。",
|
|
2042
|
+
`提案: 行 ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason})`,
|
|
2043
|
+
"次のステップ: アプリで新 selector を確認し、手動で適用するか ccqa trace をやり直して新しい selector を取り直してください。"
|
|
2044
|
+
];
|
|
2045
|
+
case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ: test-spec.md と照合して、テスト側の assertion を削るか、spec 側を更新してください。"];
|
|
2046
|
+
case "TIMING_ISSUE": return ["timing 関連の修正案は出ましたが、自動適用できませんでした。", "次のステップ: 失敗行の前に手動で sleep を入れるか、より信頼度の高い trace を取り直してください。"];
|
|
2047
|
+
}
|
|
2048
|
+
}
|
|
2049
|
+
function truncateForLog(s) {
|
|
2050
|
+
const oneLine = s.replace(/\n+/g, " ⏎ ");
|
|
2051
|
+
return oneLine.length <= 400 ? oneLine : `${oneLine.slice(0, 400)}... [+${oneLine.length - 400} chars]`;
|
|
2052
|
+
}
|
|
2053
|
+
function resolveMode(opts) {
|
|
2054
|
+
if (opts.auto) return "auto";
|
|
2055
|
+
if (opts.interactive === false || opts.noInteractive) return "non-interactive";
|
|
2056
|
+
return "interactive";
|
|
2057
|
+
}
|
|
2058
|
+
//#endregion
|
|
1238
2059
|
//#region src/cli/generate.ts
|
|
1239
|
-
const generateCommand = new Command("generate").argument("<feature/spec>", "Spec
|
|
2060
|
+
const generateCommand = new Command("generate").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Generate agent-browser test script from recorded trace actions. test.spec.ts is regenerated from actions.json on every run; pass --force to overwrite manual edits.").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--force", "Overwrite an existing test.spec.ts without warning").option("--no-snapshot", "Don't pin AGENT_BROWSER_SESSION / capture page snapshots after a failure (debug toggle)").option("--language <bcp47>", "Language for diagnose reasoning / hint text (e.g. 'en', 'ja')", "en").action(async (specPath, opts) => {
|
|
1240
2061
|
const { featureName, specName } = parseSpecPath(specPath);
|
|
1241
|
-
|
|
2062
|
+
const mode = resolveMode(opts);
|
|
2063
|
+
const useSnapshot = opts.snapshot !== false;
|
|
2064
|
+
await runGenerate(featureName, specName, parseInt(opts.maxRetries, 10), mode, opts.force ?? false, useSnapshot, opts.language ?? "en");
|
|
1242
2065
|
});
|
|
1243
|
-
async function runGenerate(featureName, specName, maxRetries) {
|
|
2066
|
+
async function runGenerate(featureName, specName, maxRetries, mode, force, useSnapshot, outputLanguage) {
|
|
1244
2067
|
header("generate", `${featureName}/${specName}`);
|
|
1245
2068
|
await ensureCcqaDir();
|
|
2069
|
+
const existingScriptPath = await getTestScript(featureName, specName);
|
|
2070
|
+
if (existingScriptPath && !force) {
|
|
2071
|
+
if (!await confirmOverwrite(existingScriptPath)) {
|
|
2072
|
+
info("aborted; pass --force to overwrite without prompting");
|
|
2073
|
+
return;
|
|
2074
|
+
}
|
|
2075
|
+
}
|
|
1246
2076
|
const { path: actionsPath, actions } = await getTraceActions(featureName, specName);
|
|
1247
2077
|
meta("trace", actionsPath);
|
|
1248
2078
|
meta("actions", actions.length);
|
|
1249
|
-
const
|
|
2079
|
+
const specContent = await readSpecFile(featureName, specName);
|
|
2080
|
+
const spec = parseTestSpec(specContent);
|
|
1250
2081
|
const setupScripts = await loadSetupScripts(spec.setups);
|
|
1251
2082
|
if (setupScripts.length > 0) meta("setups", setupScripts.map((s) => s.name).join(", "));
|
|
2083
|
+
meta("fix-mode", mode);
|
|
2084
|
+
meta("language", outputLanguage);
|
|
1252
2085
|
blank();
|
|
1253
2086
|
const cleanedActions = await cleanupActions$1(actions);
|
|
1254
2087
|
if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
|
|
1255
2088
|
const scriptPath = await saveTestScript(featureName, specName, actionsToScript(cleanedActions, spec.title, setupScripts.length > 0 ? setupScripts : void 0));
|
|
1256
2089
|
meta("saved", scriptPath);
|
|
1257
2090
|
blank();
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
2091
|
+
const agentBrowserSession = useSnapshot ? `ccqa-generate-${Date.now()}` : void 0;
|
|
2092
|
+
const runVitestForSession = (path) => runVitest$1(path, agentBrowserSession);
|
|
2093
|
+
let signalHandler = null;
|
|
2094
|
+
if (agentBrowserSession) {
|
|
2095
|
+
await closeSession(agentBrowserSession);
|
|
2096
|
+
signalHandler = () => {
|
|
2097
|
+
closeSession(agentBrowserSession).finally(() => process.exit(130));
|
|
2098
|
+
};
|
|
2099
|
+
process.once("SIGINT", signalHandler);
|
|
2100
|
+
process.once("SIGTERM", signalHandler);
|
|
1262
2101
|
}
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
warn("could not determine fix from failure log");
|
|
1269
|
-
break;
|
|
2102
|
+
try {
|
|
2103
|
+
const initialRun = await timedPhase("vitest run #1", () => runVitestForSession(scriptPath), "run");
|
|
2104
|
+
if (initialRun.exitCode === 0) {
|
|
2105
|
+
hint(`run 'ccqa run ${featureName}/${specName}' to execute the test`);
|
|
2106
|
+
return;
|
|
1270
2107
|
}
|
|
1271
|
-
await
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
2108
|
+
if (await runAutoFixLoop({
|
|
2109
|
+
scriptPath,
|
|
2110
|
+
initialRun,
|
|
2111
|
+
specMarkdown: specContent,
|
|
2112
|
+
actions: cleanedActions,
|
|
2113
|
+
maxRetries,
|
|
2114
|
+
mode,
|
|
2115
|
+
runVitest: runVitestForSession,
|
|
2116
|
+
agentBrowserSession,
|
|
2117
|
+
outputLanguage
|
|
2118
|
+
})) {
|
|
1276
2119
|
hint(`run 'ccqa run ${featureName}/${specName}' to execute the test`);
|
|
1277
2120
|
return;
|
|
1278
2121
|
}
|
|
2122
|
+
warn("auto-fix exhausted; test still failing");
|
|
2123
|
+
process.exit(1);
|
|
2124
|
+
} finally {
|
|
2125
|
+
if (signalHandler) {
|
|
2126
|
+
process.off("SIGINT", signalHandler);
|
|
2127
|
+
process.off("SIGTERM", signalHandler);
|
|
2128
|
+
}
|
|
2129
|
+
if (agentBrowserSession) await closeSession(agentBrowserSession);
|
|
2130
|
+
}
|
|
2131
|
+
}
|
|
2132
|
+
async function confirmOverwrite(path) {
|
|
2133
|
+
if (!process.stdin.isTTY) {
|
|
2134
|
+
warn(`${path} exists and stdin is not a TTY; refusing to overwrite. Pass --force to allow.`);
|
|
2135
|
+
return false;
|
|
2136
|
+
}
|
|
2137
|
+
const rl = createInterface({
|
|
2138
|
+
input: process.stdin,
|
|
2139
|
+
output: process.stdout
|
|
2140
|
+
});
|
|
2141
|
+
try {
|
|
2142
|
+
process.stdout.write("\n");
|
|
2143
|
+
process.stdout.write(`[warn] ${path} already exists.\n`);
|
|
2144
|
+
process.stdout.write(`[warn] generate will regenerate it from actions.json and any manual edits will be lost.\n`);
|
|
2145
|
+
const norm = (await new Promise((res) => rl.question("Overwrite? [y/N] ", res))).trim().toLowerCase();
|
|
2146
|
+
return norm === "y" || norm === "yes";
|
|
2147
|
+
} finally {
|
|
2148
|
+
rl.close();
|
|
1279
2149
|
}
|
|
1280
|
-
warn("auto-fix exhausted — test still failing");
|
|
1281
|
-
process.exit(1);
|
|
1282
2150
|
}
|
|
1283
|
-
/**
|
|
1284
|
-
* Load setup test scripts, extract test body, and replace {{placeholders}} with params values.
|
|
1285
|
-
*/
|
|
1286
2151
|
async function loadSetupScripts(setups) {
|
|
1287
2152
|
if (!setups?.length) return [];
|
|
1288
2153
|
const result = [];
|
|
@@ -1343,45 +2208,17 @@ function replacePlaceholders(body, params) {
|
|
|
1343
2208
|
function escapeRegExp(s) {
|
|
1344
2209
|
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1345
2210
|
}
|
|
1346
|
-
async function
|
|
1347
|
-
|
|
1348
|
-
const { result, isError } = await invokeClaudeStreaming({
|
|
1349
|
-
prompt: buildAutoFixPrompt(script, failureLog),
|
|
1350
|
-
disableBuiltinTools: true,
|
|
1351
|
-
maxTurns: 1
|
|
1352
|
-
}, () => {});
|
|
1353
|
-
if (isError || !result) return null;
|
|
1354
|
-
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
1355
|
-
const fixes = JSON.parse(json);
|
|
1356
|
-
if (!Array.isArray(fixes) || fixes.length === 0) return null;
|
|
1357
|
-
return applySleepFixes$1(script, fixes);
|
|
1358
|
-
} catch {
|
|
1359
|
-
return null;
|
|
1360
|
-
}
|
|
1361
|
-
}
|
|
1362
|
-
function applySleepFixes$1(script, fixes) {
|
|
1363
|
-
const lines = script.split("\n");
|
|
1364
|
-
for (const fix of fixes) if ("increase_to" in fix) {
|
|
1365
|
-
const idx = fix.line - 1;
|
|
1366
|
-
if (idx >= 0 && idx < lines.length) lines[idx] = lines[idx].replace(/spawnSync\("sleep",\s*\["\d+"\]/, `spawnSync("sleep", ["${fix.increase_to}"]`);
|
|
1367
|
-
}
|
|
1368
|
-
const inserts = fixes.filter((f) => "seconds" in f && !("increase_to" in f)).sort((a, b) => b.line - a.line);
|
|
1369
|
-
for (const fix of inserts) {
|
|
1370
|
-
const idx = fix.line - 1;
|
|
1371
|
-
if (idx >= 0 && idx <= lines.length) lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
|
|
1372
|
-
}
|
|
1373
|
-
return lines.join("\n");
|
|
1374
|
-
}
|
|
1375
|
-
async function runVitest$1(scriptPath) {
|
|
1376
|
-
const { exitCode, stdout, stderr } = await spawnVitestCaptured([
|
|
2211
|
+
async function runVitest$1(scriptPath, agentBrowserSession) {
|
|
2212
|
+
const { exitCode, stdout, stderr } = await spawnVitestTeed([
|
|
1377
2213
|
"run",
|
|
1378
2214
|
"--config",
|
|
1379
2215
|
bundledVitestConfigPath(),
|
|
1380
2216
|
scriptPath
|
|
1381
|
-
]
|
|
2217
|
+
], agentBrowserSession ? { env: {
|
|
2218
|
+
...process.env,
|
|
2219
|
+
AGENT_BROWSER_SESSION: agentBrowserSession
|
|
2220
|
+
} } : {});
|
|
1382
2221
|
const currentScript = await readFile(scriptPath, "utf8");
|
|
1383
|
-
process.stdout.write(stdout);
|
|
1384
|
-
if (stderr) process.stderr.write(stderr);
|
|
1385
2222
|
return {
|
|
1386
2223
|
exitCode,
|
|
1387
2224
|
output: stdout + stderr,
|
|
@@ -1436,7 +2273,7 @@ async function runTests(target) {
|
|
|
1436
2273
|
warn(`${featureName}/${specName}: no test.spec.ts found`);
|
|
1437
2274
|
continue;
|
|
1438
2275
|
}
|
|
1439
|
-
|
|
2276
|
+
run(`${featureName}/${specName}`);
|
|
1440
2277
|
meta("test", scriptFile);
|
|
1441
2278
|
blank();
|
|
1442
2279
|
const reportFile = join(tmpDir, `report-${i}.json`);
|
|
@@ -1700,15 +2537,18 @@ function scrubSecrets(line, secrets) {
|
|
|
1700
2537
|
}
|
|
1701
2538
|
//#endregion
|
|
1702
2539
|
//#region src/cli/generate-setup.ts
|
|
1703
|
-
const generateSetupCommand = new Command("generate-setup").argument("<name>", "Setup name to generate (e.g. login)").description("Clean up, validate, and templatize setup actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--from-dummy", "Resume from existing test.dummy.spec.ts (after manual fix)").action(async (name, opts) => {
|
|
1704
|
-
|
|
2540
|
+
const generateSetupCommand = new Command("generate-setup").argument("<name>", "Setup name to generate (e.g. login)").description("Clean up, validate, and templatize setup actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--from-dummy", "Resume from existing test.dummy.spec.ts (after manual fix)").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--language <bcp47>", "Language for diagnose reasoning / hint text (e.g. 'en', 'ja')", "en").action(async (name, opts) => {
|
|
2541
|
+
const mode = resolveMode(opts);
|
|
2542
|
+
await runGenerateSetup(name, parseInt(opts.maxRetries, 10), opts.fromDummy ?? false, mode, opts.language ?? "en");
|
|
1705
2543
|
});
|
|
1706
|
-
async function runGenerateSetup(name, maxRetries, fromDummy) {
|
|
2544
|
+
async function runGenerateSetup(name, maxRetries, fromDummy, mode, outputLanguage) {
|
|
1707
2545
|
header("generate-setup", name);
|
|
1708
2546
|
await ensureCcqaDir();
|
|
1709
|
-
const
|
|
2547
|
+
const specContent = await readSetupSpecFile(name);
|
|
2548
|
+
const spec = parseSetupSpec(specContent);
|
|
1710
2549
|
const dummyPath = join(getSetupDir(name), "test.dummy.spec.ts");
|
|
1711
2550
|
const finalPath = join(getSetupDir(name), "test.spec.ts");
|
|
2551
|
+
let cleanedActions = [];
|
|
1712
2552
|
if (fromDummy) {
|
|
1713
2553
|
if (!await stat(dummyPath).then(() => true).catch(() => false)) {
|
|
1714
2554
|
warn(`test.dummy.spec.ts not found. Run without --from-dummy first.`);
|
|
@@ -1719,40 +2559,52 @@ async function runGenerateSetup(name, maxRetries, fromDummy) {
|
|
|
1719
2559
|
const { actions } = await getSetupActions(name);
|
|
1720
2560
|
meta("setup", spec.title);
|
|
1721
2561
|
meta("actions", actions.length);
|
|
2562
|
+
meta("fix-mode", mode);
|
|
2563
|
+
meta("language", outputLanguage);
|
|
1722
2564
|
blank();
|
|
1723
|
-
|
|
2565
|
+
cleanedActions = await cleanupActions(actions);
|
|
1724
2566
|
if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
|
|
1725
2567
|
await writeFile(dummyPath, actionsToScript(cleanedActions, spec.title), "utf-8");
|
|
1726
2568
|
meta("saved", dummyPath);
|
|
1727
2569
|
}
|
|
1728
2570
|
blank();
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
2571
|
+
const agentBrowserSession = `ccqa-generate-setup-${name}-${Date.now()}`;
|
|
2572
|
+
const runVitestForSession = (path) => runVitestResolved(path, agentBrowserSession);
|
|
2573
|
+
await closeSession(agentBrowserSession);
|
|
2574
|
+
const signalHandler = () => {
|
|
2575
|
+
closeSession(agentBrowserSession).finally(() => process.exit(130));
|
|
2576
|
+
};
|
|
2577
|
+
process.once("SIGINT", signalHandler);
|
|
2578
|
+
process.once("SIGTERM", signalHandler);
|
|
2579
|
+
try {
|
|
2580
|
+
const initialRun = await timedPhase("vitest run #1", () => runVitestForSession(dummyPath), "run");
|
|
2581
|
+
let passed = initialRun.exitCode === 0;
|
|
2582
|
+
if (!passed) passed = await runAutoFixLoop({
|
|
2583
|
+
scriptPath: dummyPath,
|
|
2584
|
+
initialRun,
|
|
2585
|
+
specMarkdown: specContent,
|
|
2586
|
+
actions: cleanedActions,
|
|
2587
|
+
maxRetries,
|
|
2588
|
+
mode,
|
|
2589
|
+
runVitest: runVitestForSession,
|
|
2590
|
+
agentBrowserSession,
|
|
2591
|
+
outputLanguage
|
|
2592
|
+
});
|
|
2593
|
+
if (!passed) {
|
|
2594
|
+
warn("auto-fix exhausted; setup test still failing");
|
|
1747
2595
|
hint(`edit ${dummyPath} manually, then run: ccqa generate-setup ${name} --from-dummy`);
|
|
1748
2596
|
process.exit(1);
|
|
1749
2597
|
}
|
|
2598
|
+
await writeFile(finalPath, reversePlaceholdersInScript(await readFile(dummyPath, "utf8"), spec.placeholders), "utf-8");
|
|
2599
|
+
await unlink(dummyPath).catch(() => {});
|
|
2600
|
+
blank();
|
|
2601
|
+
meta("saved", finalPath);
|
|
2602
|
+
hint(`setup '${name}' is ready; reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
|
|
2603
|
+
} finally {
|
|
2604
|
+
process.off("SIGINT", signalHandler);
|
|
2605
|
+
process.off("SIGTERM", signalHandler);
|
|
2606
|
+
await closeSession(agentBrowserSession);
|
|
1750
2607
|
}
|
|
1751
|
-
await writeFile(finalPath, reversePlaceholdersInScript(currentScript, spec.placeholders), "utf-8");
|
|
1752
|
-
await unlink(dummyPath).catch(() => {});
|
|
1753
|
-
blank();
|
|
1754
|
-
meta("saved", finalPath);
|
|
1755
|
-
hint(`setup '${name}' is ready — reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
|
|
1756
2608
|
}
|
|
1757
2609
|
/**
|
|
1758
2610
|
* Replace dummy values with {{placeholder}} directly in the test script text.
|
|
@@ -1765,45 +2617,17 @@ function reversePlaceholdersInScript(script, placeholders) {
|
|
|
1765
2617
|
for (const [key, def] of entries) result = result.replaceAll(def.dummy, `{{${key}}}`);
|
|
1766
2618
|
return result;
|
|
1767
2619
|
}
|
|
1768
|
-
async function
|
|
1769
|
-
|
|
1770
|
-
const { result, isError } = await invokeClaudeStreaming({
|
|
1771
|
-
prompt: buildAutoFixPrompt(script, failureLog),
|
|
1772
|
-
disableBuiltinTools: true,
|
|
1773
|
-
maxTurns: 1
|
|
1774
|
-
}, () => {});
|
|
1775
|
-
if (isError || !result) return null;
|
|
1776
|
-
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
1777
|
-
const fixes = JSON.parse(json);
|
|
1778
|
-
if (!Array.isArray(fixes) || fixes.length === 0) return null;
|
|
1779
|
-
return applySleepFixes(script, fixes);
|
|
1780
|
-
} catch {
|
|
1781
|
-
return null;
|
|
1782
|
-
}
|
|
1783
|
-
}
|
|
1784
|
-
function applySleepFixes(script, fixes) {
|
|
1785
|
-
const lines = script.split("\n");
|
|
1786
|
-
for (const fix of fixes) if ("increase_to" in fix) {
|
|
1787
|
-
const idx = fix.line - 1;
|
|
1788
|
-
if (idx >= 0 && idx < lines.length) lines[idx] = lines[idx].replace(/spawnSync\("sleep",\s*\["\d+"\]/, `spawnSync("sleep", ["${fix.increase_to}"]`);
|
|
1789
|
-
}
|
|
1790
|
-
const inserts = fixes.filter((f) => "seconds" in f && !("increase_to" in f)).sort((a, b) => b.line - a.line);
|
|
1791
|
-
for (const fix of inserts) {
|
|
1792
|
-
const idx = fix.line - 1;
|
|
1793
|
-
if (idx >= 0 && idx <= lines.length) lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
|
|
1794
|
-
}
|
|
1795
|
-
return lines.join("\n");
|
|
1796
|
-
}
|
|
1797
|
-
async function runVitest(scriptPath) {
|
|
1798
|
-
const { exitCode, stdout, stderr } = await spawnVitestCaptured([
|
|
2620
|
+
async function runVitest(scriptPath, agentBrowserSession) {
|
|
2621
|
+
const { exitCode, stdout, stderr } = await spawnVitestTeed([
|
|
1799
2622
|
"run",
|
|
1800
2623
|
"--config",
|
|
1801
2624
|
bundledVitestConfigPath(),
|
|
1802
2625
|
scriptPath
|
|
1803
|
-
]
|
|
2626
|
+
], agentBrowserSession ? { env: {
|
|
2627
|
+
...process.env,
|
|
2628
|
+
AGENT_BROWSER_SESSION: agentBrowserSession
|
|
2629
|
+
} } : {});
|
|
1804
2630
|
const currentScript = await readFile(scriptPath, "utf8");
|
|
1805
|
-
process.stdout.write(stdout);
|
|
1806
|
-
if (stderr) process.stderr.write(stderr);
|
|
1807
2631
|
return {
|
|
1808
2632
|
exitCode,
|
|
1809
2633
|
output: stdout + stderr,
|
|
@@ -1817,20 +2641,21 @@ async function runVitest(scriptPath) {
|
|
|
1817
2641
|
* literals. Auto-fix edits the original file (via writeFile in callers), so
|
|
1818
2642
|
* we always re-read it before each invocation.
|
|
1819
2643
|
*/
|
|
1820
|
-
async function runVitestResolved(scriptPath) {
|
|
2644
|
+
async function runVitestResolved(scriptPath, agentBrowserSession) {
|
|
1821
2645
|
const original = await readFile(scriptPath, "utf8");
|
|
1822
|
-
if (!hasEnvRef(original)) return runVitest(scriptPath);
|
|
2646
|
+
if (!hasEnvRef(original)) return runVitest(scriptPath, agentBrowserSession);
|
|
1823
2647
|
const tmpPath = scriptPath.replace(/\.ts$/, ".__resolved.spec.ts");
|
|
1824
2648
|
await writeFile(tmpPath, resolveEnvRefs(original), "utf-8");
|
|
1825
2649
|
try {
|
|
1826
|
-
const { exitCode, stdout, stderr } = await
|
|
2650
|
+
const { exitCode, stdout, stderr } = await spawnVitestTeed([
|
|
1827
2651
|
"run",
|
|
1828
2652
|
"--config",
|
|
1829
2653
|
bundledVitestConfigPath(),
|
|
1830
2654
|
tmpPath
|
|
1831
|
-
]
|
|
1832
|
-
|
|
1833
|
-
|
|
2655
|
+
], agentBrowserSession ? { env: {
|
|
2656
|
+
...process.env,
|
|
2657
|
+
AGENT_BROWSER_SESSION: agentBrowserSession
|
|
2658
|
+
} } : {});
|
|
1834
2659
|
return {
|
|
1835
2660
|
exitCode,
|
|
1836
2661
|
output: stdout + stderr,
|