ccqa 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/ccqa.mjs CHANGED
@@ -8,6 +8,7 @@ import { delimiter, dirname, join, resolve } from "node:path";
8
8
  import { query } from "@anthropic-ai/claude-agent-sdk";
9
9
  import matter from "gray-matter";
10
10
  import { spawn } from "node:child_process";
11
+ import { createInterface } from "node:readline";
11
12
  import { tmpdir } from "node:os";
12
13
  //#region src/prompts/trace.ts
13
14
  function generateSessionName() {
@@ -285,14 +286,17 @@ const STEP_ICONS = {
285
286
  function header(command, target) {
286
287
  process.stdout.write(`\nccqa ${command}${target ? ` ${target}` : ""}\n\n`);
287
288
  }
289
+ function write(scope, message, sink = process.stdout) {
290
+ sink.write(`[${scope}] ${message}\n`);
291
+ }
288
292
  function meta(key, value) {
289
- process.stdout.write(` ${key}: ${value}\n`);
293
+ write("meta", `${key}: ${value}`);
290
294
  }
291
295
  function blank() {
292
296
  process.stdout.write("\n");
293
297
  }
294
298
  function info(message) {
295
- process.stdout.write(`${message}\n`);
299
+ write("info", message);
296
300
  }
297
301
  function step(type, stepId, detail) {
298
302
  process.stdout.write(` ${STEP_ICONS[type]} [${stepId}] ${detail}\n`);
@@ -301,13 +305,37 @@ function bash(command) {
301
305
  process.stdout.write(` $ ${command.slice(0, 120)}\n`);
302
306
  }
303
307
  function error(message) {
304
- process.stderr.write(`error: ${message}\n`);
308
+ write("error", message, process.stderr);
305
309
  }
306
310
  function warn(message) {
307
- process.stderr.write(`warn: ${message}\n`);
311
+ write("warn", message, process.stderr);
308
312
  }
309
313
  function hint(message) {
310
- process.stdout.write(`\nhint: ${message}\n`);
314
+ process.stdout.write("\n");
315
+ write("hint", message);
316
+ }
317
+ function fix(message) {
318
+ write("fix", message);
319
+ }
320
+ function run(message) {
321
+ write("run", message);
322
+ }
323
+ /**
324
+ * Time a long-running step under the given scope, emitting `started` and
325
+ * `finished in N.Ns` markers. Scope must be a tag the user wants to grep
326
+ * for — typically "run" for vitest and "fix" for diagnose-loop steps.
327
+ */
328
+ async function timedPhase(label, fn, scope = "fix") {
329
+ const startedAt = Date.now();
330
+ write(scope, `${label} started`);
331
+ try {
332
+ const result = await fn();
333
+ write(scope, `${label} finished in ${((Date.now() - startedAt) / 1e3).toFixed(1)}s`);
334
+ return result;
335
+ } catch (err) {
336
+ write(scope, `${label} threw after ${((Date.now() - startedAt) / 1e3).toFixed(1)}s`);
337
+ throw err;
338
+ }
311
339
  }
312
340
  //#endregion
313
341
  //#region src/claude/invoke.ts
@@ -488,13 +516,27 @@ const CCQA_DIR = ".ccqa";
488
516
  function getCcqaDir(cwd = process.cwd()) {
489
517
  return join(cwd, CCQA_DIR);
490
518
  }
519
+ /**
520
+ * Accepts both the canonical 2-segment alias and the on-disk 4-segment path
521
+ * (which is what shell tab-completion produces):
522
+ * - "tasks/create-and-complete"
523
+ * - "features/tasks/test-cases/create-and-complete"
524
+ * - ".ccqa/features/tasks/test-cases/create-and-complete"
525
+ * All forms resolve to { featureName: "tasks", specName: "create-and-complete" }.
526
+ * Trailing slashes are tolerated.
527
+ */
491
528
  function parseSpecPath(specPath) {
492
- const parts = specPath.split("/");
493
- if (parts.length !== 2 || !parts[0] || !parts[1]) throw new Error(`Invalid spec path: "${specPath}". Expected format: "<feature>/<spec>"`);
494
- return {
529
+ const parts = specPath.replace(/^\.\/+/, "").replace(/\/+$/, "").split("/").filter((p) => p.length > 0);
530
+ if (parts[0] === ".ccqa") parts.shift();
531
+ if (parts.length === 4 && parts[0] === "features" && parts[2] === "test-cases") return {
532
+ featureName: parts[1],
533
+ specName: parts[3]
534
+ };
535
+ if (parts.length === 2 && parts[0] && parts[1]) return {
495
536
  featureName: parts[0],
496
537
  specName: parts[1]
497
538
  };
539
+ throw new Error(`Invalid spec path: "${specPath}". Expected "<feature>/<spec>" or "features/<feature>/test-cases/<spec>".`);
498
540
  }
499
541
  function getFeatureDir(featureName, cwd) {
500
542
  return join(getCcqaDir(cwd), "features", featureName);
@@ -706,10 +748,10 @@ function bundledVitestConfigPath() {
706
748
  }
707
749
  //#endregion
708
750
  //#region src/runtime/spawn-vitest.ts
709
- const require$1 = createRequire(import.meta.url);
751
+ const require$2 = createRequire(import.meta.url);
710
752
  function resolveVitestBin() {
711
- const pkgPath = require$1.resolve("vitest/package.json");
712
- const pkg = require$1(pkgPath);
753
+ const pkgPath = require$2.resolve("vitest/package.json");
754
+ const pkg = require$2(pkgPath);
713
755
  const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
714
756
  if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
715
757
  return resolve(dirname(pkgPath), binRel);
@@ -727,6 +769,19 @@ async function spawnVitestCaptured(args, opts = {}) {
727
769
  stderr
728
770
  };
729
771
  }
772
+ async function spawnVitestTeed(args, opts = {}) {
773
+ const child = spawnVitestChild(args, opts, "pipe");
774
+ const [stdout, stderr, exitCode] = await Promise.all([
775
+ teeDrain(child.stdout, process.stdout),
776
+ teeDrain(child.stderr, process.stderr),
777
+ waitExit(child)
778
+ ]);
779
+ return {
780
+ exitCode,
781
+ stdout,
782
+ stderr
783
+ };
784
+ }
730
785
  function spawnVitestStreaming(args, opts = {}) {
731
786
  const child = spawnVitestChild(args, opts, "pipe");
732
787
  return {
@@ -754,6 +809,15 @@ async function drain(stream) {
754
809
  for await (const chunk of stream) buf += chunk;
755
810
  return buf;
756
811
  }
812
+ async function teeDrain(stream, sink) {
813
+ stream.setEncoding("utf8");
814
+ let buf = "";
815
+ for await (const chunk of stream) {
816
+ buf += chunk;
817
+ sink.write(chunk);
818
+ }
819
+ return buf;
820
+ }
757
821
  function waitExit(child) {
758
822
  return new Promise((resolvePromise, rejectPromise) => {
759
823
  child.once("exit", (code) => resolvePromise(code ?? 0));
@@ -762,7 +826,7 @@ function waitExit(child) {
762
826
  }
763
827
  //#endregion
764
828
  //#region src/runtime/agent-browser-bin.ts
765
- const require = createRequire(import.meta.url);
829
+ const require$1 = createRequire(import.meta.url);
766
830
  /**
767
831
  * Resolves the directory containing the `agent-browser` shim that npm/pnpm
768
832
  * exposes on PATH for the peer-installed package. Used by `ccqa trace` to
@@ -774,7 +838,7 @@ const require = createRequire(import.meta.url);
774
838
  function resolveAgentBrowserBinDir() {
775
839
  let pkgJsonPath;
776
840
  try {
777
- pkgJsonPath = require.resolve("agent-browser/package.json");
841
+ pkgJsonPath = require$1.resolve("agent-browser/package.json");
778
842
  } catch {
779
843
  return null;
780
844
  }
@@ -844,7 +908,7 @@ function envRefsToJsExpression(value) {
844
908
  }
845
909
  //#endregion
846
910
  //#region src/cli/trace.ts
847
- const traceCommand = new Command("trace").argument("<feature/spec>", "Spec to trace (e.g. tasks/create-and-complete)").description("Run agent-browser, verify assertions, and record structured actions").action(async (specPath) => {
911
+ const traceCommand = new Command("trace").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Run agent-browser, verify assertions, and record structured actions").action(async (specPath) => {
848
912
  const { featureName, specName } = parseSpecPath(specPath);
849
913
  await runTrace(featureName, specName);
850
914
  });
@@ -943,7 +1007,7 @@ async function runSetups(setups, sessionName) {
943
1007
  throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
944
1008
  });
945
1009
  for (const [key, value] of Object.entries(ref.params ?? {})) script = script.replaceAll(`{{${key}}}`, resolveEnvRefs(value));
946
- script = script.replace(/process\.env\.AGENT_BROWSER_SESSION\s*=\s*`.+`;/, `process.env.AGENT_BROWSER_SESSION = ${JSON.stringify(sessionName)};`);
1010
+ script = script.replace(/process\.env\.AGENT_BROWSER_SESSION\s*\|?\|?=\s*`.+`;/, `process.env.AGENT_BROWSER_SESSION = ${JSON.stringify(sessionName)};`);
947
1011
  const tmpPath = join(getSetupDir(ref.name), `_run.spec.ts`);
948
1012
  await writeFile(tmpPath, script, "utf-8");
949
1013
  try {
@@ -1062,8 +1126,10 @@ function actionsToScript(actions, title, setupScripts) {
1062
1126
  `import { spawnSync } from "node:child_process";`,
1063
1127
  `import { ab, abWait, abAssertTextVisible, abAssertVisible, abAssertNotVisible, abAssertUrl, abAssertEnabled, abAssertDisabled, abAssertChecked, abAssertUnchecked } from "ccqa/test-helpers";`,
1064
1128
  "",
1065
- `// Single session shared across all tests — reset per run via cookies clear in first test`,
1066
- `process.env.AGENT_BROWSER_SESSION = \`ccqa-run-\${Date.now()}\`;`,
1129
+ `// Single session shared across all tests — reset per run via cookies clear in first test.`,
1130
+ `// Use ||= so an outer harness (e.g. ccqa generate's auto-fix loop) can pre-set the session`,
1131
+ `// name and inspect the same session after the run finishes.`,
1132
+ `process.env.AGENT_BROWSER_SESSION ||= \`ccqa-run-\${Date.now()}\`;`,
1067
1133
  ""
1068
1134
  ]];
1069
1135
  if (setupScripts?.length) for (const setup of setupScripts) parts.push(`test("setup: ${setup.name}", () => {`, setup.body, "}, 3 * 60 * 1000);", "");
@@ -1168,43 +1234,6 @@ function actionToLine(action) {
1168
1234
  const j = (s) => JSON.stringify(s);
1169
1235
  //#endregion
1170
1236
  //#region src/prompts/codegen.ts
1171
- function buildAutoFixPrompt(script, failureLog) {
1172
- return `You are analyzing a failing E2E test script. The test fails because some browser actions execute before the page has finished loading or navigating.
1173
-
1174
- Your task: identify which line numbers need a sleep/wait inserted BEFORE them to fix timing issues.
1175
-
1176
- ## Rules
1177
- - ONLY identify lines where a sleep is needed — do NOT suggest any other changes
1178
- - Common patterns that need a sleep:
1179
- - After \`ab("open", ...)\` when the next line interacts with elements (fill, click, etc.)
1180
- - After \`ab("press", "Enter")\` or \`ab("click", ...)\` when a page navigation occurs before the next action
1181
- - After any action that triggers a redirect or page reload
1182
- - Look at the error log to identify WHICH lines failed, then determine if a sleep before that line would fix it
1183
- - If a \`spawnSync("sleep", ...)\` already exists before a failing line, suggest increasing its duration instead
1184
- - Output ONLY a JSON array of objects, no explanation, no markdown code fences
1185
-
1186
- ## Output format
1187
- Each object has:
1188
- - "line": the 1-based line number to insert a sleep BEFORE
1189
- - "seconds": recommended sleep duration (typically 3-5)
1190
- - "reason": very short explanation (e.g., "page navigation after form submit")
1191
-
1192
- If a sleep already exists and needs to be increased:
1193
- - "line": the line number of the existing sleep
1194
- - "increase_to": the new duration in seconds
1195
- - "reason": explanation
1196
-
1197
- Example output:
1198
- [{"line": 15, "seconds": 3, "reason": "page navigation after press Enter"}, {"line": 22, "increase_to": 5, "reason": "slow page load"}]
1199
-
1200
- If no fixes are needed, return: []
1201
-
1202
- ## Test Script (with line numbers)
1203
- ${script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n")}
1204
-
1205
- ## Failure Log
1206
- ${failureLog.slice(0, 3e3)}`;
1207
- }
1208
1237
  function buildCleanupPrompt(actions) {
1209
1238
  return `You are given a list of browser actions recorded during an E2E test trace.
1210
1239
  The trace contains noise: failed attempts, redundant retries, and duplicate operations recorded because the agent explored multiple strategies.
@@ -1235,54 +1264,890 @@ ${actions.map((a, i) => {
1235
1264
  }).join("\n")}`;
1236
1265
  }
1237
1266
  //#endregion
1267
+ //#region src/diagnose/apply.ts
1268
+ function applyDiagnosis(script, diagnosis) {
1269
+ switch (diagnosis.type) {
1270
+ case "TIMING_ISSUE": return applyTiming(script, diagnosis.fixes);
1271
+ case "OVER_ASSERTION": return applyOverAssertion(script, diagnosis.lines);
1272
+ case "SELECTOR_DRIFT": return applySelectorDrift(script, diagnosis.line, diagnosis.oldSelector, diagnosis.newSelector);
1273
+ case "DATA_MISSING": return {
1274
+ applied: false,
1275
+ reason: `data missing — ${diagnosis.reason}`
1276
+ };
1277
+ case "UNKNOWN": return {
1278
+ applied: false,
1279
+ reason: `unknown failure — ${diagnosis.reason}`
1280
+ };
1281
+ }
1282
+ }
1283
+ function applyTiming(script, fixes) {
1284
+ if (fixes.length === 0) return {
1285
+ applied: false,
1286
+ reason: "no timing fixes proposed"
1287
+ };
1288
+ const lines = script.split("\n");
1289
+ const summary = [];
1290
+ for (const fix of fixes) if (fix.kind === "increase") {
1291
+ const idx = fix.line - 1;
1292
+ if (idx < 0 || idx >= lines.length) continue;
1293
+ const original = lines[idx];
1294
+ const replaced = original.replace(/spawnSync\("sleep",\s*\["\d+"\]/, `spawnSync("sleep", ["${fix.increase_to}"]`);
1295
+ if (replaced !== original) {
1296
+ lines[idx] = replaced;
1297
+ summary.push(`line ${fix.line}: sleep → ${fix.increase_to}s`);
1298
+ }
1299
+ }
1300
+ const inserts = fixes.filter((f) => f.kind === "insert").sort((a, b) => b.line - a.line);
1301
+ for (const fix of inserts) {
1302
+ const idx = fix.line - 1;
1303
+ if (idx < 0 || idx > lines.length) continue;
1304
+ lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
1305
+ summary.push(`line ${fix.line}: insert sleep ${fix.seconds}s`);
1306
+ }
1307
+ if (summary.length === 0) return {
1308
+ applied: false,
1309
+ reason: "timing fixes pointed at out-of-range lines"
1310
+ };
1311
+ return {
1312
+ applied: true,
1313
+ script: lines.join("\n"),
1314
+ summary: summary.join("; ")
1315
+ };
1316
+ }
1317
+ function applyOverAssertion(script, lineNumbers) {
1318
+ if (lineNumbers.length === 0) return {
1319
+ applied: false,
1320
+ reason: "no lines to remove"
1321
+ };
1322
+ const lines = script.split("\n");
1323
+ const targets = [...new Set(lineNumbers)].sort((a, b) => b - a);
1324
+ const removed = [];
1325
+ for (const line of targets) {
1326
+ const idx = line - 1;
1327
+ if (idx < 0 || idx >= lines.length) continue;
1328
+ const content = lines[idx];
1329
+ if (!/abAssert/.test(content)) continue;
1330
+ removed.push(`line ${line}: ${content.trim()}`);
1331
+ lines.splice(idx, 1);
1332
+ }
1333
+ if (removed.length === 0) return {
1334
+ applied: false,
1335
+ reason: "no abAssert lines matched the proposed line numbers"
1336
+ };
1337
+ return {
1338
+ applied: true,
1339
+ script: lines.join("\n"),
1340
+ summary: `removed ${removed.length} assertion(s)`
1341
+ };
1342
+ }
1343
+ function applySelectorDrift(script, line, oldSelector, newSelector) {
1344
+ const lines = script.split("\n");
1345
+ const idx = line - 1;
1346
+ if (idx < 0 || idx >= lines.length) return {
1347
+ applied: false,
1348
+ reason: `line ${line} out of range`
1349
+ };
1350
+ const content = lines[idx];
1351
+ if (!content.includes(oldSelector)) return {
1352
+ applied: false,
1353
+ reason: `oldSelector not found on line ${line}`
1354
+ };
1355
+ lines[idx] = content.replaceAll(oldSelector, newSelector);
1356
+ return {
1357
+ applied: true,
1358
+ script: lines.join("\n"),
1359
+ summary: `line ${line}: "${oldSelector}" → "${newSelector}"`
1360
+ };
1361
+ }
1362
+ /**
1363
+ * Build a unified-style diff snippet for showing the user what would change.
1364
+ * Just the changed lines with -/+ prefixes; not a real patch.
1365
+ */
1366
+ function previewDiff(before, after) {
1367
+ const a = before.split("\n");
1368
+ const b = after.split("\n");
1369
+ const out = [];
1370
+ const max = Math.max(a.length, b.length);
1371
+ for (let i = 0; i < max; i++) {
1372
+ if (a[i] === b[i]) continue;
1373
+ if (a[i] !== void 0) out.push(`- ${a[i]}`);
1374
+ if (b[i] !== void 0) out.push(`+ ${b[i]}`);
1375
+ }
1376
+ return out.join("\n");
1377
+ }
1378
+ //#endregion
1379
+ //#region src/diagnose/prompt.ts
1380
+ function buildDiagnosePrompt(input) {
1381
+ const { script, specMarkdown, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
1382
+ const numbered = script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n");
1383
+ return `You are diagnosing a failing E2E test. The test was generated from a recorded trace of the original interaction. Compare the failing run against the original spec and recorded actions to determine WHY the test failed and what the right fix is.
1384
+
1385
+ ## Output language
1386
+
1387
+ Write all human-readable fields (\`reasoning\`, \`reason\`) in **${outputLanguage}** (BCP-47 tag).
1388
+ Selectors, file paths, identifiers, code, type names (TIMING_ISSUE, etc.), JSON keys, and quoted strings stay verbatim regardless of language.
1389
+
1390
+ ## You have read-only filesystem tools
1391
+
1392
+ You can call \`Grep\`, \`Glob\`, and \`Read\` against the current repository before producing the JSON.
1393
+
1394
+ For SELECTOR_DRIFT specifically the failure log is usually NOT enough on its own — the runner only reports "selector X not visible". To confirm a rename, search the application source for the *type* of selector that's failing:
1395
+
1396
+ - For \`[aria-label='OLD']\` failures: \`Grep\` for \`aria-label=\` (or i18n key \`OLD\`) in the app source. If you find a near-miss like \`aria-label="NEW"\` whose text is a superset/rephrase of the failing label, that is your evidence.
1397
+ - For \`[placeholder='OLD']\` failures: \`Grep\` for \`placeholder=\`.
1398
+ - For \`[role='OLD']\` or \`[data-testid='OLD']\`: same pattern.
1399
+ - For \`text=OLD\` failures: \`Grep\` the source / i18n bundles for \`OLD\`. Locale files (\`*.json\`, \`*.yml\`, \`messages.ts\`, etc.) often hold the canonical strings.
1400
+
1401
+ You have **up to 10 tool turns**. Spend them on grep/read; do not loop. Only when you have concrete file:line evidence should you emit SELECTOR_DRIFT — otherwise prefer UNKNOWN with confidence < 0.4 and let the human decide.
1402
+
1403
+ Do NOT attempt to write, edit, run shell commands, or hit the network. Only Grep/Glob/Read.
1404
+
1405
+ ## Diagnosis categories
1406
+
1407
+ Pick exactly ONE category. The output JSON must follow the shape for that category.
1408
+
1409
+ 1. TIMING_ISSUE — element not yet present because the page hasn't loaded / navigated. Fix by inserting or extending sleeps.
1410
+ {
1411
+ "diagnosis": {
1412
+ "type": "TIMING_ISSUE",
1413
+ "fixes": [
1414
+ { "kind": "insert", "line": <1-based>, "seconds": <int>, "reason": "<short>" },
1415
+ { "kind": "increase", "line": <1-based of existing sleep>, "increase_to": <int>, "reason": "<short>" }
1416
+ ]
1417
+ },
1418
+ "confidence": <0.0-1.0>,
1419
+ "reasoning": "<why timing is the cause>"
1420
+ }
1421
+
1422
+ 2. OVER_ASSERTION — the test is asserting something the spec never required, OR a recorded assertion that is environment-dependent (e.g. a placeholder text that varies). The right fix is to remove those lines from the test.
1423
+ {
1424
+ "diagnosis": {
1425
+ "type": "OVER_ASSERTION",
1426
+ "lines": [<1-based line numbers to remove>],
1427
+ "reason": "<short>"
1428
+ },
1429
+ "confidence": <0.0-1.0>,
1430
+ "reasoning": "<why this assertion isn't required by the spec>"
1431
+ }
1432
+
1433
+ 3. SELECTOR_DRIFT — the page is healthy but a selector has been renamed/refined since the trace was recorded. The failure log will typically contain a snapshot showing the new selector. ONLY use this when you can name the exact replacement selector.
1434
+ {
1435
+ "diagnosis": {
1436
+ "type": "SELECTOR_DRIFT",
1437
+ "line": <1-based>,
1438
+ "oldSelector": "<exact string in current line>",
1439
+ "newSelector": "<exact replacement>",
1440
+ "reason": "<short>"
1441
+ },
1442
+ "confidence": <0.0-1.0>,
1443
+ "reasoning": "<evidence from failure log>"
1444
+ }
1445
+
1446
+ 4. DATA_MISSING — the test depends on data (a record, a setup, a logged-in state) that no longer exists. Not auto-fixable; the human must reseed or update the spec.
1447
+ {
1448
+ "diagnosis": { "type": "DATA_MISSING", "reason": "<what is missing>" },
1449
+ "confidence": <0.0-1.0>,
1450
+ "reasoning": "<evidence>"
1451
+ }
1452
+
1453
+ 5. UNKNOWN — none of the above fit, or evidence is too weak to choose.
1454
+ {
1455
+ "diagnosis": { "type": "UNKNOWN", "reason": "<short>" },
1456
+ "confidence": <0.0-1.0>,
1457
+ "reasoning": "<what you saw and why you can't classify>"
1458
+ }
1459
+
1460
+ ## Confidence guidance
1461
+
1462
+ - 0.9-1.0: failure log directly shows the cause (e.g. "selector X not found, snapshot lists Y" → SELECTOR_DRIFT)
1463
+ - 0.7-0.9: strong indirect evidence (e.g. timing pattern after navigation, or assertion text that doesn't appear in spec)
1464
+ - 0.4-0.7: plausible classification but multiple categories could explain it
1465
+ - < 0.4: prefer UNKNOWN over guessing
1466
+
1467
+ ## Rules
1468
+
1469
+ - Your **final** assistant message must start with \`{\` and end with \`}\` — a single JSON object, nothing before or after. No prose preamble like "Confirmed: ...", no markdown fences, no commentary, no tool calls in the same turn. If you have an analysis sentence, put it in the \`reasoning\` field.
1470
+ - Line numbers refer to the numbered test script below (1-based).
1471
+ - For SELECTOR_DRIFT, \`oldSelector\` must match a substring of the script at that line; \`newSelector\` must be backed by a concrete file:line you read with Grep/Read (do not invent). Cite the evidence in \`reasoning\`.
1472
+ - For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`).
1473
+ - Cross-check assertions against the spec markdown. If the spec doesn't require the assertion, OVER_ASSERTION is the better diagnosis than SELECTOR_DRIFT.
1474
+
1475
+ ## Test Spec (test-spec.md)
1476
+ ${specMarkdown}
1477
+
1478
+ ## Recorded Actions (actions.json summary)
1479
+ ${actions.map((a, i) => {
1480
+ const parts = [`${i + 1}. ${a.command}`];
1481
+ if (a.assertType) parts.push(`assertType="${a.assertType}"`);
1482
+ if (a.selector) parts.push(`selector="${a.selector}"`);
1483
+ if (a.value) parts.push(`value="${a.value}"`);
1484
+ if (a.observation) parts.push(`→ ${a.observation}`);
1485
+ return parts.join(" ");
1486
+ }).join("\n")}
1487
+
1488
+ ## Test Script (with line numbers)
1489
+ ${numbered}
1490
+
1491
+ ## Failure Log
1492
+ ${failureLog.slice(0, 4e3)}${pageSnapshot ? formatPageSnapshot(pageSnapshot) : ""}`;
1493
+ }
1494
+ /**
1495
+ * Page snapshot captured by ccqa right after the failure (agent-browser
1496
+ * accessibility tree). When present, it usually decides SELECTOR_DRIFT vs
1497
+ * TIMING_ISSUE: a near-miss aria-label / role / placeholder in the
1498
+ * snapshot is direct evidence of a rename, while a tree that doesn't
1499
+ * contain the failing locator at all (without a near-miss) points to a
1500
+ * still-loading page or genuinely missing element.
1501
+ */
1502
+ function formatPageSnapshot(snapshot) {
1503
+ return `
1504
+
1505
+ ## Page Snapshot (accessibility tree captured right after the failure)
1506
+
1507
+ This is the live state of the page when the test failed. Prefer this over your own assumptions:
1508
+
1509
+ - If a near-miss of the failing selector appears here (e.g. failing \`[aria-label='A']\` and snapshot contains \`aria-label="A-prime"\`), that is direct evidence of SELECTOR_DRIFT — propose the snapshot's value as \`newSelector\`.
1510
+ - If the failing locator is genuinely absent and no near-miss exists, the page may be still loading (TIMING_ISSUE) or the spec is asserting something not on this page (OVER_ASSERTION / DATA_MISSING).
1511
+ - If the snapshot looks unrelated to the spec (e.g. error page, login wall), DATA_MISSING is likely.
1512
+
1513
+ \`\`\`
1514
+ ${snapshot}
1515
+ \`\`\``;
1516
+ }
1517
+ //#endregion
1518
+ //#region src/diagnose/diagnose.ts
1519
+ async function diagnose(input) {
1520
+ const { result: raw, isError } = await invokeClaudeStreaming({
1521
+ prompt: buildDiagnosePrompt(input),
1522
+ allowedTools: [
1523
+ "Read",
1524
+ "Grep",
1525
+ "Glob"
1526
+ ],
1527
+ maxTurns: 10
1528
+ }, () => {});
1529
+ if (isError) return {
1530
+ result: null,
1531
+ raw: raw ?? "",
1532
+ sdkError: true
1533
+ };
1534
+ if (!raw) return {
1535
+ result: null,
1536
+ raw: "",
1537
+ sdkError: false
1538
+ };
1539
+ const candidates = extractJsonCandidates(raw);
1540
+ for (const candidate of candidates) {
1541
+ let parsed;
1542
+ try {
1543
+ parsed = JSON.parse(candidate);
1544
+ } catch {
1545
+ continue;
1546
+ }
1547
+ const normalised = normaliseResult(parsed);
1548
+ if (normalised) return {
1549
+ result: normalised,
1550
+ raw,
1551
+ sdkError: false
1552
+ };
1553
+ }
1554
+ return {
1555
+ result: {
1556
+ diagnosis: {
1557
+ type: "UNKNOWN",
1558
+ reason: "diagnose returned no parseable diagnosis JSON"
1559
+ },
1560
+ confidence: 0,
1561
+ reasoning: truncate$1(raw, 1e3)
1562
+ },
1563
+ raw,
1564
+ sdkError: false
1565
+ };
1566
+ }
1567
+ /**
1568
+ * Pull every plausible JSON object out of `raw`. We try, in order:
1569
+ * 1. The whole string with code fences stripped (the prompt asks for
1570
+ * JSON-only, so this is the happy path).
1571
+ * 2. Each balanced `{...}` block found by scanning the text. The model
1572
+ * sometimes prefixes the JSON with a "Confirmed: ..." sentence or
1573
+ * mentions partial JSON in its tool-using reasoning; we want to
1574
+ * try the *last* well-formed object first because it's most likely
1575
+ * the final answer, then earlier ones as a fallback.
1576
+ *
1577
+ * The caller `JSON.parse`s each candidate and stops at the first match
1578
+ * that normalises to a known DiagnosisResult.
1579
+ */
1580
+ function extractJsonCandidates(raw) {
1581
+ const out = [];
1582
+ const stripped = stripFence(raw);
1583
+ if (stripped) out.push(stripped);
1584
+ const blocks = [];
1585
+ let depth = 0;
1586
+ let start = -1;
1587
+ let inString = false;
1588
+ let escaped = false;
1589
+ for (let i = 0; i < raw.length; i++) {
1590
+ const ch = raw[i];
1591
+ if (inString) {
1592
+ if (escaped) escaped = false;
1593
+ else if (ch === "\\") escaped = true;
1594
+ else if (ch === "\"") inString = false;
1595
+ continue;
1596
+ }
1597
+ if (ch === "\"") {
1598
+ inString = true;
1599
+ continue;
1600
+ }
1601
+ if (ch === "{") {
1602
+ if (depth === 0) start = i;
1603
+ depth++;
1604
+ } else if (ch === "}") {
1605
+ depth--;
1606
+ if (depth === 0 && start >= 0) {
1607
+ blocks.push(raw.slice(start, i + 1));
1608
+ start = -1;
1609
+ }
1610
+ }
1611
+ }
1612
+ for (let i = blocks.length - 1; i >= 0; i--) {
1613
+ const block = blocks[i];
1614
+ if (!out.includes(block)) out.push(block);
1615
+ }
1616
+ return out;
1617
+ }
1618
+ function truncate$1(s, max) {
1619
+ return s.length <= max ? s : `${s.slice(0, max)}... [truncated, ${s.length - max} more chars]`;
1620
+ }
1621
+ function stripFence(raw) {
1622
+ return raw.trim().replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, "").trim();
1623
+ }
1624
+ function normaliseResult(parsed) {
1625
+ if (!isObject(parsed)) return null;
1626
+ const diagnosis = normaliseDiagnosis(parsed["diagnosis"]);
1627
+ if (!diagnosis) return null;
1628
+ return {
1629
+ diagnosis,
1630
+ confidence: typeof parsed["confidence"] === "number" ? clamp(parsed["confidence"], 0, 1) : 0,
1631
+ reasoning: typeof parsed["reasoning"] === "string" ? parsed["reasoning"] : ""
1632
+ };
1633
+ }
1634
+ function normaliseDiagnosis(raw) {
1635
+ if (!isObject(raw)) return null;
1636
+ switch (raw["type"]) {
1637
+ case "TIMING_ISSUE": {
1638
+ const fixes = normaliseSleepFixes(raw["fixes"]);
1639
+ if (fixes.length === 0) return null;
1640
+ return {
1641
+ type: "TIMING_ISSUE",
1642
+ fixes
1643
+ };
1644
+ }
1645
+ case "OVER_ASSERTION": {
1646
+ const lines = Array.isArray(raw["lines"]) ? raw["lines"].filter((n) => typeof n === "number" && Number.isFinite(n)) : [];
1647
+ if (lines.length === 0) return null;
1648
+ return {
1649
+ type: "OVER_ASSERTION",
1650
+ lines,
1651
+ reason: typeof raw["reason"] === "string" ? raw["reason"] : ""
1652
+ };
1653
+ }
1654
+ case "SELECTOR_DRIFT": {
1655
+ const line = typeof raw["line"] === "number" ? raw["line"] : null;
1656
+ const oldSelector = typeof raw["oldSelector"] === "string" ? raw["oldSelector"] : null;
1657
+ const newSelector = typeof raw["newSelector"] === "string" ? raw["newSelector"] : null;
1658
+ if (line === null || !oldSelector || !newSelector) return null;
1659
+ return {
1660
+ type: "SELECTOR_DRIFT",
1661
+ line,
1662
+ oldSelector,
1663
+ newSelector,
1664
+ reason: typeof raw["reason"] === "string" ? raw["reason"] : ""
1665
+ };
1666
+ }
1667
+ case "DATA_MISSING": return {
1668
+ type: "DATA_MISSING",
1669
+ reason: typeof raw["reason"] === "string" ? raw["reason"] : ""
1670
+ };
1671
+ case "UNKNOWN": return {
1672
+ type: "UNKNOWN",
1673
+ reason: typeof raw["reason"] === "string" ? raw["reason"] : ""
1674
+ };
1675
+ default: return null;
1676
+ }
1677
+ }
1678
+ function normaliseSleepFixes(raw) {
1679
+ if (!Array.isArray(raw)) return [];
1680
+ const out = [];
1681
+ for (const item of raw) {
1682
+ if (!isObject(item)) continue;
1683
+ const line = typeof item["line"] === "number" ? item["line"] : null;
1684
+ if (line === null) continue;
1685
+ const reason = typeof item["reason"] === "string" ? item["reason"] : "";
1686
+ const kind = item["kind"];
1687
+ if (kind === "insert" || typeof item["seconds"] === "number" && item["increase_to"] === void 0) {
1688
+ const seconds = typeof item["seconds"] === "number" ? item["seconds"] : null;
1689
+ if (seconds === null) continue;
1690
+ out.push({
1691
+ kind: "insert",
1692
+ line,
1693
+ seconds,
1694
+ reason
1695
+ });
1696
+ continue;
1697
+ }
1698
+ if (kind === "increase" || typeof item["increase_to"] === "number") {
1699
+ const increaseTo = typeof item["increase_to"] === "number" ? item["increase_to"] : null;
1700
+ if (increaseTo === null) continue;
1701
+ out.push({
1702
+ kind: "increase",
1703
+ line,
1704
+ increase_to: increaseTo,
1705
+ reason
1706
+ });
1707
+ continue;
1708
+ }
1709
+ }
1710
+ return out;
1711
+ }
1712
+ function isObject(v) {
1713
+ return typeof v === "object" && v !== null && !Array.isArray(v);
1714
+ }
1715
+ function clamp(n, lo, hi) {
1716
+ if (n < lo) return lo;
1717
+ if (n > hi) return hi;
1718
+ return n;
1719
+ }
1720
+ //#endregion
1721
+ //#region src/diagnose/interactive.ts
1722
+ async function promptForChoice(input) {
1723
+ printContext(input);
1724
+ const rl = createInterface({
1725
+ input: process.stdin,
1726
+ output: process.stdout
1727
+ });
1728
+ try {
1729
+ while (true) switch ((await question(rl, "[a]pply / [s]kip / [m]anual / [q]uit > ")).trim().toLowerCase()) {
1730
+ case "a":
1731
+ case "apply": return "apply";
1732
+ case "s":
1733
+ case "skip": return "skip";
1734
+ case "m":
1735
+ case "manual": return "manual";
1736
+ case "q":
1737
+ case "quit": return "quit";
1738
+ default: process.stdout.write(" please answer a/s/m/q\n");
1739
+ }
1740
+ } finally {
1741
+ rl.close();
1742
+ }
1743
+ }
1744
+ function question(rl, prompt) {
1745
+ return new Promise((resolve) => rl.question(prompt, resolve));
1746
+ }
1747
+ function printContext({ result, diff, failureExcerpt }) {
1748
+ const { diagnosis, confidence, reasoning } = result;
1749
+ process.stdout.write("\n");
1750
+ process.stdout.write(`[fix] diagnosis: ${diagnosis.type} (confidence ${confidence.toFixed(2)})\n`);
1751
+ if (reasoning) process.stdout.write(`[fix] reasoning: ${reasoning}\n`);
1752
+ for (const line of formatDiagnosisDetail(diagnosis)) process.stdout.write(`[fix] ${line}\n`);
1753
+ if (failureExcerpt) {
1754
+ process.stdout.write("\n[fix] failure excerpt:\n");
1755
+ process.stdout.write(prefixLines(failureExcerpt, "[fix] "));
1756
+ process.stdout.write("\n");
1757
+ }
1758
+ if (diff) {
1759
+ process.stdout.write("\n[fix] proposed fix:\n");
1760
+ process.stdout.write(prefixLines(diff, "[fix] "));
1761
+ process.stdout.write("\n");
1762
+ }
1763
+ process.stdout.write("\n");
1764
+ }
1765
+ function formatDiagnosisDetail(diagnosis) {
1766
+ switch (diagnosis.type) {
1767
+ case "TIMING_ISSUE": return [`fixes: ${diagnosis.fixes.map((f) => f.kind === "insert" ? `insert ${f.seconds}s @ line ${f.line}` : `increase to ${f.increase_to}s @ line ${f.line}`).join(", ")}`];
1768
+ case "OVER_ASSERTION": return [`lines: ${diagnosis.lines.join(", ")}`, `reason: ${diagnosis.reason}`];
1769
+ case "SELECTOR_DRIFT": return [`line ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}"`, `reason: ${diagnosis.reason}`];
1770
+ case "DATA_MISSING":
1771
+ case "UNKNOWN": return [`reason: ${diagnosis.reason}`];
1772
+ }
1773
+ }
1774
+ function prefixLines(text, prefix) {
1775
+ return text.split("\n").map((l) => `${prefix}${l}`).join("\n");
1776
+ }
1777
+ //#endregion
1778
+ //#region src/diagnose/snapshot.ts
1779
+ const require = createRequire(import.meta.url);
1780
+ const SNAPSHOT_TIMEOUT_MS = 1e4;
1781
+ const CLOSE_TIMEOUT_MS = 1e4;
1782
+ const MAX_OUTPUT_BYTES = 6e4;
1783
+ function resolveAgentBrowserBin() {
1784
+ try {
1785
+ return require.resolve("agent-browser/bin/agent-browser.js");
1786
+ } catch {
1787
+ return null;
1788
+ }
1789
+ }
1790
+ /**
1791
+ * Run `agent-browser snapshot` against the session that the failed vitest
1792
+ * run just used, and return its accessibility-tree dump.
1793
+ *
1794
+ * Returns null when agent-browser is missing, the daemon has no live page
1795
+ * for the session, or the call exceeds {@link SNAPSHOT_TIMEOUT_MS}. We
1796
+ * never throw — a missing snapshot just means diagnose has less context.
1797
+ *
1798
+ * The output is truncated to {@link MAX_OUTPUT_BYTES} so the prompt stays
1799
+ * within budget on large pages.
1800
+ */
1801
+ async function captureSnapshot(sessionName) {
1802
+ const abBin = resolveAgentBrowserBin();
1803
+ if (!abBin) return null;
1804
+ return new Promise((resolve) => {
1805
+ const child = spawn(process.execPath, [abBin, "snapshot"], {
1806
+ env: {
1807
+ ...process.env,
1808
+ AGENT_BROWSER_SESSION: sessionName
1809
+ },
1810
+ stdio: [
1811
+ "ignore",
1812
+ "pipe",
1813
+ "pipe"
1814
+ ]
1815
+ });
1816
+ let stdout = "";
1817
+ let stderr = "";
1818
+ let timedOut = false;
1819
+ const timer = setTimeout(() => {
1820
+ timedOut = true;
1821
+ child.kill("SIGTERM");
1822
+ }, SNAPSHOT_TIMEOUT_MS);
1823
+ child.stdout.setEncoding("utf8");
1824
+ child.stderr.setEncoding("utf8");
1825
+ child.stdout.on("data", (chunk) => {
1826
+ stdout += chunk;
1827
+ });
1828
+ child.stderr.on("data", (chunk) => {
1829
+ stderr += chunk;
1830
+ });
1831
+ child.on("error", () => {
1832
+ clearTimeout(timer);
1833
+ resolve(null);
1834
+ });
1835
+ child.on("exit", (code) => {
1836
+ clearTimeout(timer);
1837
+ if (timedOut || code !== 0) {
1838
+ resolve(null);
1839
+ return;
1840
+ }
1841
+ const trimmed = stdout.trim();
1842
+ if (!trimmed) {
1843
+ resolve(null);
1844
+ return;
1845
+ }
1846
+ resolve(truncate(trimmed, MAX_OUTPUT_BYTES));
1847
+ });
1848
+ });
1849
+ }
1850
+ function truncate(s, maxBytes) {
1851
+ if (s.length <= maxBytes) return s;
1852
+ return `${s.slice(0, maxBytes)}\n... [truncated, ${s.length - maxBytes} more chars]`;
1853
+ }
1854
+ /**
1855
+ * Close an agent-browser session by name. Used before/after a `ccqa generate`
1856
+ * run so a wedged daemon from a previous attempt can't hang the next one.
1857
+ *
1858
+ * Always resolves; never throws. If the binary is missing, the session
1859
+ * doesn't exist, or the call exceeds {@link CLOSE_TIMEOUT_MS}, we silently
1860
+ * return — close is best-effort cleanup, not a precondition.
1861
+ */
1862
+ async function closeSession(sessionName) {
1863
+ const abBin = resolveAgentBrowserBin();
1864
+ if (!abBin) return;
1865
+ await new Promise((resolve) => {
1866
+ const child = spawn(process.execPath, [abBin, "close"], {
1867
+ env: {
1868
+ ...process.env,
1869
+ AGENT_BROWSER_SESSION: sessionName
1870
+ },
1871
+ stdio: "ignore"
1872
+ });
1873
+ const timer = setTimeout(() => {
1874
+ child.kill("SIGTERM");
1875
+ }, CLOSE_TIMEOUT_MS);
1876
+ const finish = () => {
1877
+ clearTimeout(timer);
1878
+ resolve();
1879
+ };
1880
+ child.on("error", finish);
1881
+ child.on("exit", finish);
1882
+ });
1883
+ }
1884
+ //#endregion
1885
+ //#region src/diagnose/loop.ts
1886
+ const DEFAULT_CONFIDENCE_THRESHOLD = .8;
1887
+ /**
1888
+ * Returns true when vitest finally passed; false when retries were exhausted
1889
+ * or the diagnose loop chose to bail out early.
1890
+ */
1891
+ async function runAutoFixLoop(input) {
1892
+ const { scriptPath, initialRun, specMarkdown, actions, maxRetries, mode, runVitest, agentBrowserSession, outputLanguage } = input;
1893
+ let { exitCode, output, currentScript } = initialRun;
1894
+ if (exitCode === 0) return true;
1895
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
1896
+ fix(`attempt ${attempt}/${maxRetries}`);
1897
+ blank();
1898
+ const pageSnapshot = agentBrowserSession ? await timedPhase("page snapshot", () => captureSnapshot(agentBrowserSession), "fix") : null;
1899
+ if (agentBrowserSession) if (pageSnapshot) fix(`page snapshot: ${pageSnapshot.length} chars captured`);
1900
+ else fix("page snapshot unavailable; continuing without it");
1901
+ const fixed = await diagnoseAndFix({
1902
+ script: currentScript,
1903
+ specMarkdown,
1904
+ actions,
1905
+ failureLog: output,
1906
+ pageSnapshot: pageSnapshot ?? void 0,
1907
+ mode,
1908
+ outputLanguage
1909
+ });
1910
+ if (!fixed) {
1911
+ fix("bailed out; see diagnosis above");
1912
+ return false;
1913
+ }
1914
+ await writeFile(scriptPath, fixed, "utf-8");
1915
+ fix(`saved: ${scriptPath}`);
1916
+ blank();
1917
+ ({exitCode, output, currentScript} = await timedPhase(`vitest run #${attempt + 1}`, () => runVitest(scriptPath), "run"));
1918
+ if (exitCode === 0) return true;
1919
+ }
1920
+ return false;
1921
+ }
1922
+ async function diagnoseAndFix(input) {
1923
+ const { script, specMarkdown, actions, failureLog, pageSnapshot, mode, outputLanguage } = input;
1924
+ const outcome = await timedPhase("diagnose", () => diagnose({
1925
+ script,
1926
+ specMarkdown,
1927
+ actions,
1928
+ failureLog,
1929
+ pageSnapshot,
1930
+ outputLanguage
1931
+ }), "fix");
1932
+ if (outcome.sdkError) {
1933
+ fix("diagnose: SDK error talking to Claude");
1934
+ if (outcome.raw) fix(`diagnose raw: ${truncateForLog(outcome.raw)}`);
1935
+ hint("re-run later, or check ANTHROPIC_API_KEY / network connectivity");
1936
+ return null;
1937
+ }
1938
+ if (!outcome.result) {
1939
+ fix("diagnose: empty response from LLM");
1940
+ hint("re-run; if this keeps happening the failure log may be too short to diagnose");
1941
+ return null;
1942
+ }
1943
+ const result = outcome.result;
1944
+ reportDiagnosis(result);
1945
+ if (result.diagnosis.type === "DATA_MISSING" || result.diagnosis.type === "UNKNOWN") {
1946
+ handoffToUser(result, outcome.raw, outputLanguage);
1947
+ return null;
1948
+ }
1949
+ const apply = applyDiagnosis(script, result.diagnosis);
1950
+ if (!apply.applied) {
1951
+ fix(`cannot apply: ${apply.reason}`);
1952
+ handoffToUser(result, outcome.raw, outputLanguage);
1953
+ return null;
1954
+ }
1955
+ const decision = decide(result, mode);
1956
+ if (decision === "apply-auto") {
1957
+ fix(`applying automatically: ${apply.summary}`);
1958
+ return apply.script;
1959
+ }
1960
+ if (decision === "skip-low-confidence") {
1961
+ fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (--no-interactive)`);
1962
+ handoffToUser(result, outcome.raw, outputLanguage);
1963
+ return null;
1964
+ }
1965
+ switch (await promptForChoice({
1966
+ result,
1967
+ diff: previewDiff(script, apply.script),
1968
+ failureExcerpt: failureLog.slice(0, 800)
1969
+ })) {
1970
+ case "apply":
1971
+ fix(`applied: ${apply.summary}`);
1972
+ return apply.script;
1973
+ case "skip":
1974
+ fix("skipped; leaving script untouched");
1975
+ return null;
1976
+ case "manual":
1977
+ fix("paused for manual edit");
1978
+ handoffToUser(result, outcome.raw, outputLanguage);
1979
+ return null;
1980
+ case "quit":
1981
+ fix("user quit");
1982
+ process.exit(1);
1983
+ }
1984
+ }
1985
+ function decide(result, mode) {
1986
+ if (mode === "auto") return "apply-auto";
1987
+ const highConfidence = result.confidence >= DEFAULT_CONFIDENCE_THRESHOLD;
1988
+ if (mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
1989
+ return highConfidence ? "apply-auto" : "interactive";
1990
+ }
1991
+ function reportDiagnosis(result) {
1992
+ fix(`diagnosis: ${result.diagnosis.type}`);
1993
+ fix(`confidence: ${result.confidence.toFixed(2)}`);
1994
+ if (result.reasoning) fix(`reasoning: ${result.reasoning}`);
1995
+ }
1996
+ /**
1997
+ * Emit a category-specific [hint] block that tells the user what to do next.
1998
+ * Called whenever the loop has decided it cannot proceed on its own —
1999
+ * because the diagnosis is intrinsically not auto-fixable, because the
2000
+ * proposed fix wasn't applicable to the current script, or because the
2001
+ * confidence was too low under --no-interactive.
2002
+ *
2003
+ * The goal is to never leave the user with just "auto-fix exhausted" —
2004
+ * always state which side (test artifacts vs. application) likely needs
2005
+ * the next action.
2006
+ */
2007
+ function handoffToUser(result, raw, language) {
2008
+ const lines = handoffMessage(result.diagnosis, normLang(language));
2009
+ for (const line of lines) hint(line);
2010
+ if (raw) fix(`diagnose raw: ${truncateForLog(raw)}`);
2011
+ }
2012
+ function normLang(language) {
2013
+ if (!language) return "en";
2014
+ return language.toLowerCase().startsWith("ja") ? "ja" : "en";
2015
+ }
2016
+ const HANDOFF = {
2017
+ en: handoffEn,
2018
+ ja: handoffJa
2019
+ };
2020
+ function handoffMessage(diagnosis, language) {
2021
+ return HANDOFF[language](diagnosis);
2022
+ }
2023
+ function handoffEn(diagnosis) {
2024
+ switch (diagnosis.type) {
2025
+ case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update test-spec.md prerequisites), then re-run trace + generate."];
2026
+ case "UNKNOWN": return [`could not classify the failure. ${diagnosis.reason}`, "next step: read the failure log above, decide whether the test or the app is wrong, and fix manually. consider re-running ccqa trace if the recorded flow no longer matches the live app."];
2027
+ case "SELECTOR_DRIFT": return [
2028
+ `selector likely drifted but auto-apply was not safe.`,
2029
+ `proposed: line ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason}).`,
2030
+ "next step: confirm in the live app and either accept the proposal manually, or re-run ccqa trace to recapture the new selector."
2031
+ ];
2032
+ case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check test-spec.md. either delete the assertion from the test, or tighten the spec to require it."];
2033
+ case "TIMING_ISSUE": return [`timing fix proposed but couldn't be applied automatically.`, "next step: insert a sleep manually before the failing line, or re-run with a higher confidence trace."];
2034
+ }
2035
+ }
2036
+ function handoffJa(diagnosis) {
2037
+ switch (diagnosis.type) {
2038
+ case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する(または test-spec.md の prerequisites を更新)してから ccqa trace + generate をやり直してください。"];
2039
+ case "UNKNOWN": return [`失敗を分類できませんでした。${diagnosis.reason}`, "次のステップ: 上の失敗ログを確認し、テストとアプリのどちらが原因か判断して手動で修正してください。記録した手順がアプリの現状と合わない場合は ccqa trace の再実行を検討してください。"];
2040
+ case "SELECTOR_DRIFT": return [
2041
+ "selector が変わった可能性が高いですが、自動適用は安全でないと判断しました。",
2042
+ `提案: 行 ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason})`,
2043
+ "次のステップ: アプリで新 selector を確認し、手動で適用するか ccqa trace をやり直して新しい selector を取り直してください。"
2044
+ ];
2045
+ case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ: test-spec.md と照合して、テスト側の assertion を削るか、spec 側を更新してください。"];
2046
+ case "TIMING_ISSUE": return ["timing 関連の修正案は出ましたが、自動適用できませんでした。", "次のステップ: 失敗行の前に手動で sleep を入れるか、より信頼度の高い trace を取り直してください。"];
2047
+ }
2048
+ }
2049
+ function truncateForLog(s) {
2050
+ const oneLine = s.replace(/\n+/g, " ⏎ ");
2051
+ return oneLine.length <= 400 ? oneLine : `${oneLine.slice(0, 400)}... [+${oneLine.length - 400} chars]`;
2052
+ }
2053
+ function resolveMode(opts) {
2054
+ if (opts.auto) return "auto";
2055
+ if (opts.interactive === false || opts.noInteractive) return "non-interactive";
2056
+ return "interactive";
2057
+ }
2058
+ //#endregion
1238
2059
  //#region src/cli/generate.ts
1239
- const generateCommand = new Command("generate").argument("<feature/spec>", "Spec to generate test for (e.g. tasks/create-and-complete)").description("Generate agent-browser test script from recorded trace actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").action(async (specPath, opts) => {
2060
+ const generateCommand = new Command("generate").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Generate agent-browser test script from recorded trace actions. test.spec.ts is regenerated from actions.json on every run; pass --force to overwrite manual edits.").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--force", "Overwrite an existing test.spec.ts without warning").option("--no-snapshot", "Don't pin AGENT_BROWSER_SESSION / capture page snapshots after a failure (debug toggle)").option("--language <bcp47>", "Language for diagnose reasoning / hint text (e.g. 'en', 'ja')", "en").action(async (specPath, opts) => {
1240
2061
  const { featureName, specName } = parseSpecPath(specPath);
1241
- await runGenerate(featureName, specName, parseInt(opts.maxRetries, 10));
2062
+ const mode = resolveMode(opts);
2063
+ const useSnapshot = opts.snapshot !== false;
2064
+ await runGenerate(featureName, specName, parseInt(opts.maxRetries, 10), mode, opts.force ?? false, useSnapshot, opts.language ?? "en");
1242
2065
  });
1243
- async function runGenerate(featureName, specName, maxRetries) {
2066
+ async function runGenerate(featureName, specName, maxRetries, mode, force, useSnapshot, outputLanguage) {
1244
2067
  header("generate", `${featureName}/${specName}`);
1245
2068
  await ensureCcqaDir();
2069
+ const existingScriptPath = await getTestScript(featureName, specName);
2070
+ if (existingScriptPath && !force) {
2071
+ if (!await confirmOverwrite(existingScriptPath)) {
2072
+ info("aborted; pass --force to overwrite without prompting");
2073
+ return;
2074
+ }
2075
+ }
1246
2076
  const { path: actionsPath, actions } = await getTraceActions(featureName, specName);
1247
2077
  meta("trace", actionsPath);
1248
2078
  meta("actions", actions.length);
1249
- const spec = parseTestSpec(await readSpecFile(featureName, specName));
2079
+ const specContent = await readSpecFile(featureName, specName);
2080
+ const spec = parseTestSpec(specContent);
1250
2081
  const setupScripts = await loadSetupScripts(spec.setups);
1251
2082
  if (setupScripts.length > 0) meta("setups", setupScripts.map((s) => s.name).join(", "));
2083
+ meta("fix-mode", mode);
2084
+ meta("language", outputLanguage);
1252
2085
  blank();
1253
2086
  const cleanedActions = await cleanupActions$1(actions);
1254
2087
  if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
1255
2088
  const scriptPath = await saveTestScript(featureName, specName, actionsToScript(cleanedActions, spec.title, setupScripts.length > 0 ? setupScripts : void 0));
1256
2089
  meta("saved", scriptPath);
1257
2090
  blank();
1258
- let { exitCode, output, currentScript } = await runVitest$1(scriptPath);
1259
- if (exitCode === 0) {
1260
- hint(`run 'ccqa run ${featureName}/${specName}' to execute the test`);
1261
- return;
2091
+ const agentBrowserSession = useSnapshot ? `ccqa-generate-${Date.now()}` : void 0;
2092
+ const runVitestForSession = (path) => runVitest$1(path, agentBrowserSession);
2093
+ let signalHandler = null;
2094
+ if (agentBrowserSession) {
2095
+ await closeSession(agentBrowserSession);
2096
+ signalHandler = () => {
2097
+ closeSession(agentBrowserSession).finally(() => process.exit(130));
2098
+ };
2099
+ process.once("SIGINT", signalHandler);
2100
+ process.once("SIGTERM", signalHandler);
1262
2101
  }
1263
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
1264
- info(`auto-fix attempt ${attempt}/${maxRetries}...`);
1265
- blank();
1266
- const fixed = await autoFixWithLLM$1(currentScript, output);
1267
- if (!fixed) {
1268
- warn("could not determine fix from failure log");
1269
- break;
2102
+ try {
2103
+ const initialRun = await timedPhase("vitest run #1", () => runVitestForSession(scriptPath), "run");
2104
+ if (initialRun.exitCode === 0) {
2105
+ hint(`run 'ccqa run ${featureName}/${specName}' to execute the test`);
2106
+ return;
1270
2107
  }
1271
- await writeFile(scriptPath, fixed, "utf-8");
1272
- meta("saved", scriptPath);
1273
- blank();
1274
- ({exitCode, output, currentScript} = await runVitest$1(scriptPath));
1275
- if (exitCode === 0) {
2108
+ if (await runAutoFixLoop({
2109
+ scriptPath,
2110
+ initialRun,
2111
+ specMarkdown: specContent,
2112
+ actions: cleanedActions,
2113
+ maxRetries,
2114
+ mode,
2115
+ runVitest: runVitestForSession,
2116
+ agentBrowserSession,
2117
+ outputLanguage
2118
+ })) {
1276
2119
  hint(`run 'ccqa run ${featureName}/${specName}' to execute the test`);
1277
2120
  return;
1278
2121
  }
2122
+ warn("auto-fix exhausted; test still failing");
2123
+ process.exit(1);
2124
+ } finally {
2125
+ if (signalHandler) {
2126
+ process.off("SIGINT", signalHandler);
2127
+ process.off("SIGTERM", signalHandler);
2128
+ }
2129
+ if (agentBrowserSession) await closeSession(agentBrowserSession);
2130
+ }
2131
+ }
2132
+ async function confirmOverwrite(path) {
2133
+ if (!process.stdin.isTTY) {
2134
+ warn(`${path} exists and stdin is not a TTY; refusing to overwrite. Pass --force to allow.`);
2135
+ return false;
2136
+ }
2137
+ const rl = createInterface({
2138
+ input: process.stdin,
2139
+ output: process.stdout
2140
+ });
2141
+ try {
2142
+ process.stdout.write("\n");
2143
+ process.stdout.write(`[warn] ${path} already exists.\n`);
2144
+ process.stdout.write(`[warn] generate will regenerate it from actions.json and any manual edits will be lost.\n`);
2145
+ const norm = (await new Promise((res) => rl.question("Overwrite? [y/N] ", res))).trim().toLowerCase();
2146
+ return norm === "y" || norm === "yes";
2147
+ } finally {
2148
+ rl.close();
1279
2149
  }
1280
- warn("auto-fix exhausted — test still failing");
1281
- process.exit(1);
1282
2150
  }
1283
- /**
1284
- * Load setup test scripts, extract test body, and replace {{placeholders}} with params values.
1285
- */
1286
2151
  async function loadSetupScripts(setups) {
1287
2152
  if (!setups?.length) return [];
1288
2153
  const result = [];
@@ -1343,45 +2208,17 @@ function replacePlaceholders(body, params) {
1343
2208
  function escapeRegExp(s) {
1344
2209
  return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1345
2210
  }
1346
- async function autoFixWithLLM$1(script, failureLog) {
1347
- try {
1348
- const { result, isError } = await invokeClaudeStreaming({
1349
- prompt: buildAutoFixPrompt(script, failureLog),
1350
- disableBuiltinTools: true,
1351
- maxTurns: 1
1352
- }, () => {});
1353
- if (isError || !result) return null;
1354
- const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
1355
- const fixes = JSON.parse(json);
1356
- if (!Array.isArray(fixes) || fixes.length === 0) return null;
1357
- return applySleepFixes$1(script, fixes);
1358
- } catch {
1359
- return null;
1360
- }
1361
- }
1362
- function applySleepFixes$1(script, fixes) {
1363
- const lines = script.split("\n");
1364
- for (const fix of fixes) if ("increase_to" in fix) {
1365
- const idx = fix.line - 1;
1366
- if (idx >= 0 && idx < lines.length) lines[idx] = lines[idx].replace(/spawnSync\("sleep",\s*\["\d+"\]/, `spawnSync("sleep", ["${fix.increase_to}"]`);
1367
- }
1368
- const inserts = fixes.filter((f) => "seconds" in f && !("increase_to" in f)).sort((a, b) => b.line - a.line);
1369
- for (const fix of inserts) {
1370
- const idx = fix.line - 1;
1371
- if (idx >= 0 && idx <= lines.length) lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
1372
- }
1373
- return lines.join("\n");
1374
- }
1375
- async function runVitest$1(scriptPath) {
1376
- const { exitCode, stdout, stderr } = await spawnVitestCaptured([
2211
+ async function runVitest$1(scriptPath, agentBrowserSession) {
2212
+ const { exitCode, stdout, stderr } = await spawnVitestTeed([
1377
2213
  "run",
1378
2214
  "--config",
1379
2215
  bundledVitestConfigPath(),
1380
2216
  scriptPath
1381
- ]);
2217
+ ], agentBrowserSession ? { env: {
2218
+ ...process.env,
2219
+ AGENT_BROWSER_SESSION: agentBrowserSession
2220
+ } } : {});
1382
2221
  const currentScript = await readFile(scriptPath, "utf8");
1383
- process.stdout.write(stdout);
1384
- if (stderr) process.stderr.write(stderr);
1385
2222
  return {
1386
2223
  exitCode,
1387
2224
  output: stdout + stderr,
@@ -1436,7 +2273,7 @@ async function runTests(target) {
1436
2273
  warn(`${featureName}/${specName}: no test.spec.ts found`);
1437
2274
  continue;
1438
2275
  }
1439
- info(`▶ ${featureName}/${specName}`);
2276
+ run(`${featureName}/${specName}`);
1440
2277
  meta("test", scriptFile);
1441
2278
  blank();
1442
2279
  const reportFile = join(tmpDir, `report-${i}.json`);
@@ -1700,15 +2537,18 @@ function scrubSecrets(line, secrets) {
1700
2537
  }
1701
2538
  //#endregion
1702
2539
  //#region src/cli/generate-setup.ts
1703
- const generateSetupCommand = new Command("generate-setup").argument("<name>", "Setup name to generate (e.g. login)").description("Clean up, validate, and templatize setup actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--from-dummy", "Resume from existing test.dummy.spec.ts (after manual fix)").action(async (name, opts) => {
1704
- await runGenerateSetup(name, parseInt(opts.maxRetries, 10), opts.fromDummy ?? false);
2540
+ const generateSetupCommand = new Command("generate-setup").argument("<name>", "Setup name to generate (e.g. login)").description("Clean up, validate, and templatize setup actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--from-dummy", "Resume from existing test.dummy.spec.ts (after manual fix)").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--language <bcp47>", "Language for diagnose reasoning / hint text (e.g. 'en', 'ja')", "en").action(async (name, opts) => {
2541
+ const mode = resolveMode(opts);
2542
+ await runGenerateSetup(name, parseInt(opts.maxRetries, 10), opts.fromDummy ?? false, mode, opts.language ?? "en");
1705
2543
  });
1706
- async function runGenerateSetup(name, maxRetries, fromDummy) {
2544
+ async function runGenerateSetup(name, maxRetries, fromDummy, mode, outputLanguage) {
1707
2545
  header("generate-setup", name);
1708
2546
  await ensureCcqaDir();
1709
- const spec = parseSetupSpec(await readSetupSpecFile(name));
2547
+ const specContent = await readSetupSpecFile(name);
2548
+ const spec = parseSetupSpec(specContent);
1710
2549
  const dummyPath = join(getSetupDir(name), "test.dummy.spec.ts");
1711
2550
  const finalPath = join(getSetupDir(name), "test.spec.ts");
2551
+ let cleanedActions = [];
1712
2552
  if (fromDummy) {
1713
2553
  if (!await stat(dummyPath).then(() => true).catch(() => false)) {
1714
2554
  warn(`test.dummy.spec.ts not found. Run without --from-dummy first.`);
@@ -1719,40 +2559,52 @@ async function runGenerateSetup(name, maxRetries, fromDummy) {
1719
2559
  const { actions } = await getSetupActions(name);
1720
2560
  meta("setup", spec.title);
1721
2561
  meta("actions", actions.length);
2562
+ meta("fix-mode", mode);
2563
+ meta("language", outputLanguage);
1722
2564
  blank();
1723
- const cleanedActions = await cleanupActions(actions);
2565
+ cleanedActions = await cleanupActions(actions);
1724
2566
  if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
1725
2567
  await writeFile(dummyPath, actionsToScript(cleanedActions, spec.title), "utf-8");
1726
2568
  meta("saved", dummyPath);
1727
2569
  }
1728
2570
  blank();
1729
- let { exitCode, output, currentScript } = await runVitestResolved(dummyPath);
1730
- if (exitCode !== 0) {
1731
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
1732
- info(`auto-fix attempt ${attempt}/${maxRetries}...`);
1733
- blank();
1734
- const fixed = await autoFixWithLLM(currentScript, output);
1735
- if (!fixed) {
1736
- warn("could not determine fix from failure log");
1737
- break;
1738
- }
1739
- await writeFile(dummyPath, fixed, "utf-8");
1740
- meta("saved", dummyPath);
1741
- blank();
1742
- ({exitCode, output, currentScript} = await runVitestResolved(dummyPath));
1743
- if (exitCode === 0) break;
1744
- }
1745
- if (exitCode !== 0) {
1746
- warn("auto-fix exhausted — setup test still failing");
2571
+ const agentBrowserSession = `ccqa-generate-setup-${name}-${Date.now()}`;
2572
+ const runVitestForSession = (path) => runVitestResolved(path, agentBrowserSession);
2573
+ await closeSession(agentBrowserSession);
2574
+ const signalHandler = () => {
2575
+ closeSession(agentBrowserSession).finally(() => process.exit(130));
2576
+ };
2577
+ process.once("SIGINT", signalHandler);
2578
+ process.once("SIGTERM", signalHandler);
2579
+ try {
2580
+ const initialRun = await timedPhase("vitest run #1", () => runVitestForSession(dummyPath), "run");
2581
+ let passed = initialRun.exitCode === 0;
2582
+ if (!passed) passed = await runAutoFixLoop({
2583
+ scriptPath: dummyPath,
2584
+ initialRun,
2585
+ specMarkdown: specContent,
2586
+ actions: cleanedActions,
2587
+ maxRetries,
2588
+ mode,
2589
+ runVitest: runVitestForSession,
2590
+ agentBrowserSession,
2591
+ outputLanguage
2592
+ });
2593
+ if (!passed) {
2594
+ warn("auto-fix exhausted; setup test still failing");
1747
2595
  hint(`edit ${dummyPath} manually, then run: ccqa generate-setup ${name} --from-dummy`);
1748
2596
  process.exit(1);
1749
2597
  }
2598
+ await writeFile(finalPath, reversePlaceholdersInScript(await readFile(dummyPath, "utf8"), spec.placeholders), "utf-8");
2599
+ await unlink(dummyPath).catch(() => {});
2600
+ blank();
2601
+ meta("saved", finalPath);
2602
+ hint(`setup '${name}' is ready; reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
2603
+ } finally {
2604
+ process.off("SIGINT", signalHandler);
2605
+ process.off("SIGTERM", signalHandler);
2606
+ await closeSession(agentBrowserSession);
1750
2607
  }
1751
- await writeFile(finalPath, reversePlaceholdersInScript(currentScript, spec.placeholders), "utf-8");
1752
- await unlink(dummyPath).catch(() => {});
1753
- blank();
1754
- meta("saved", finalPath);
1755
- hint(`setup '${name}' is ready — reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
1756
2608
  }
1757
2609
  /**
1758
2610
  * Replace dummy values with {{placeholder}} directly in the test script text.
@@ -1765,45 +2617,17 @@ function reversePlaceholdersInScript(script, placeholders) {
1765
2617
  for (const [key, def] of entries) result = result.replaceAll(def.dummy, `{{${key}}}`);
1766
2618
  return result;
1767
2619
  }
1768
- async function autoFixWithLLM(script, failureLog) {
1769
- try {
1770
- const { result, isError } = await invokeClaudeStreaming({
1771
- prompt: buildAutoFixPrompt(script, failureLog),
1772
- disableBuiltinTools: true,
1773
- maxTurns: 1
1774
- }, () => {});
1775
- if (isError || !result) return null;
1776
- const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
1777
- const fixes = JSON.parse(json);
1778
- if (!Array.isArray(fixes) || fixes.length === 0) return null;
1779
- return applySleepFixes(script, fixes);
1780
- } catch {
1781
- return null;
1782
- }
1783
- }
1784
- function applySleepFixes(script, fixes) {
1785
- const lines = script.split("\n");
1786
- for (const fix of fixes) if ("increase_to" in fix) {
1787
- const idx = fix.line - 1;
1788
- if (idx >= 0 && idx < lines.length) lines[idx] = lines[idx].replace(/spawnSync\("sleep",\s*\["\d+"\]/, `spawnSync("sleep", ["${fix.increase_to}"]`);
1789
- }
1790
- const inserts = fixes.filter((f) => "seconds" in f && !("increase_to" in f)).sort((a, b) => b.line - a.line);
1791
- for (const fix of inserts) {
1792
- const idx = fix.line - 1;
1793
- if (idx >= 0 && idx <= lines.length) lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
1794
- }
1795
- return lines.join("\n");
1796
- }
1797
- async function runVitest(scriptPath) {
1798
- const { exitCode, stdout, stderr } = await spawnVitestCaptured([
2620
+ async function runVitest(scriptPath, agentBrowserSession) {
2621
+ const { exitCode, stdout, stderr } = await spawnVitestTeed([
1799
2622
  "run",
1800
2623
  "--config",
1801
2624
  bundledVitestConfigPath(),
1802
2625
  scriptPath
1803
- ]);
2626
+ ], agentBrowserSession ? { env: {
2627
+ ...process.env,
2628
+ AGENT_BROWSER_SESSION: agentBrowserSession
2629
+ } } : {});
1804
2630
  const currentScript = await readFile(scriptPath, "utf8");
1805
- process.stdout.write(stdout);
1806
- if (stderr) process.stderr.write(stderr);
1807
2631
  return {
1808
2632
  exitCode,
1809
2633
  output: stdout + stderr,
@@ -1817,20 +2641,21 @@ async function runVitest(scriptPath) {
1817
2641
  * literals. Auto-fix edits the original file (via writeFile in callers), so
1818
2642
  * we always re-read it before each invocation.
1819
2643
  */
1820
- async function runVitestResolved(scriptPath) {
2644
+ async function runVitestResolved(scriptPath, agentBrowserSession) {
1821
2645
  const original = await readFile(scriptPath, "utf8");
1822
- if (!hasEnvRef(original)) return runVitest(scriptPath);
2646
+ if (!hasEnvRef(original)) return runVitest(scriptPath, agentBrowserSession);
1823
2647
  const tmpPath = scriptPath.replace(/\.ts$/, ".__resolved.spec.ts");
1824
2648
  await writeFile(tmpPath, resolveEnvRefs(original), "utf-8");
1825
2649
  try {
1826
- const { exitCode, stdout, stderr } = await spawnVitestCaptured([
2650
+ const { exitCode, stdout, stderr } = await spawnVitestTeed([
1827
2651
  "run",
1828
2652
  "--config",
1829
2653
  bundledVitestConfigPath(),
1830
2654
  tmpPath
1831
- ]);
1832
- process.stdout.write(stdout);
1833
- if (stderr) process.stderr.write(stderr);
2655
+ ], agentBrowserSession ? { env: {
2656
+ ...process.env,
2657
+ AGENT_BROWSER_SESSION: agentBrowserSession
2658
+ } } : {});
1834
2659
  return {
1835
2660
  exitCode,
1836
2661
  output: stdout + stderr,