ccqa 0.8.3 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +115 -12
- package/dist/bin/ccqa.mjs +869 -303
- package/dist/package.json +1 -1
- package/dist/runtime/test-helpers.d.mts +8 -1
- package/dist/runtime/test-helpers.mjs +28 -3
- package/package.json +1 -1
package/dist/bin/ccqa.mjs
CHANGED
|
@@ -6,12 +6,14 @@ import { accessSync, existsSync, readFileSync, statSync } from "node:fs";
|
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
7
|
import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
|
|
8
8
|
import { homedir, tmpdir } from "node:os";
|
|
9
|
-
import { delimiter, dirname, join, posix, relative, resolve } from "node:path";
|
|
9
|
+
import { delimiter, dirname, isAbsolute, join, posix, relative, resolve } from "node:path";
|
|
10
10
|
import { parse, stringify } from "yaml";
|
|
11
11
|
import { ZodError, z } from "zod";
|
|
12
12
|
import { execFile, spawn, spawnSync } from "node:child_process";
|
|
13
13
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
14
|
+
import { AsyncLocalStorage } from "node:async_hooks";
|
|
14
15
|
import { promisify } from "node:util";
|
|
16
|
+
import { randomUUID } from "node:crypto";
|
|
15
17
|
import { createInterface } from "node:readline";
|
|
16
18
|
import { createInterface as createInterface$1 } from "node:readline/promises";
|
|
17
19
|
//#region src/runtime/env-vars.ts
|
|
@@ -139,6 +141,7 @@ const TestSpecSchema = z.object({
|
|
|
139
141
|
title: z.string().min(1),
|
|
140
142
|
relatedPaths: z.array(z.string().min(1)).optional(),
|
|
141
143
|
mode: SpecModeSchema.optional(),
|
|
144
|
+
statePath: z.string().min(1).optional(),
|
|
142
145
|
steps: z.array(StepSchema).min(1)
|
|
143
146
|
}).strict();
|
|
144
147
|
/** Default mode when `mode:` is absent. */
|
|
@@ -480,50 +483,62 @@ async function loadAvailableBlocks(cwd) {
|
|
|
480
483
|
}))
|
|
481
484
|
}));
|
|
482
485
|
}
|
|
483
|
-
const
|
|
484
|
-
const
|
|
486
|
+
const RECORD_USER_PROMPT_PATH = ".ccqa/prompts/record.user.md";
|
|
487
|
+
const RECORD_AGENT_PROMPT_PATH = ".ccqa/prompts/record.agent.md";
|
|
488
|
+
const LIVE_USER_PROMPT_PATH = ".ccqa/prompts/live.user.md";
|
|
489
|
+
const LIVE_AGENT_PROMPT_PATH = ".ccqa/prompts/live.agent.md";
|
|
485
490
|
const USER_PROMPT_MAX_BYTES = 32768;
|
|
486
491
|
/**
|
|
487
|
-
* Load
|
|
492
|
+
* Load the prompt bundle appended to the `ccqa record` (trace) system prompt.
|
|
488
493
|
*
|
|
489
|
-
*
|
|
490
|
-
*
|
|
491
|
-
*
|
|
494
|
+
* Reads `.ccqa/prompts/record.user.md` (human-maintained, stable project
|
|
495
|
+
* rules) and `.ccqa/prompts/record.agent.md` (auto-rewritten by
|
|
496
|
+
* `ccqa record --update-agent-prompt`). Returns null when both files are
|
|
497
|
+
* missing / empty. The combined text is capped at 32 KiB after concatenation.
|
|
492
498
|
*
|
|
493
|
-
*
|
|
494
|
-
* the OSS-default prompt — naming conventions, staging URL hints, repeated
|
|
495
|
-
* UI quirks that recur across specs. Anything that genuinely belongs in
|
|
496
|
-
* one spec should go in that spec's instruction, not here.
|
|
497
|
-
*
|
|
498
|
-
* Size-capped at 32 KiB to keep accidental commits of huge files from
|
|
499
|
-
* blowing up the system prompt; the cap is observable to callers as a
|
|
500
|
-
* truncated warning suffix.
|
|
499
|
+
* Use `ccqa init` to scaffold both files.
|
|
501
500
|
*/
|
|
502
|
-
async function
|
|
503
|
-
return
|
|
501
|
+
async function loadRecordPromptBundle(cwd) {
|
|
502
|
+
return loadPromptBundle(RECORD_USER_PROMPT_PATH, RECORD_AGENT_PROMPT_PATH, cwd);
|
|
504
503
|
}
|
|
505
504
|
/**
|
|
506
|
-
* Load
|
|
505
|
+
* Load the prompt bundle appended to the `ccqa run` (live mode) system prompt.
|
|
507
506
|
*
|
|
508
|
-
*
|
|
509
|
-
* `.ccqa/prompts/
|
|
510
|
-
*
|
|
511
|
-
*
|
|
512
|
-
* "this is fine" warnings, login flow quirks — belongs here. Keeping it in the
|
|
507
|
+
* Reads `.ccqa/prompts/live.user.md` (human-maintained, stable project
|
|
508
|
+
* rules) and `.ccqa/prompts/live.agent.md` (auto-rewritten by
|
|
509
|
+
* `ccqa run --update-agent-prompt`). Same null / cap semantics as
|
|
510
|
+
* `loadRecordPromptBundle`. Keeping product-specific context in the
|
|
513
511
|
* consuming repo (not the ccqa OSS prompt) is the explicit non-contamination
|
|
514
|
-
* boundary
|
|
515
|
-
* context they need.
|
|
512
|
+
* boundary.
|
|
516
513
|
*/
|
|
517
|
-
async function
|
|
518
|
-
return
|
|
514
|
+
async function loadLivePromptBundle(cwd) {
|
|
515
|
+
return loadPromptBundle(LIVE_USER_PROMPT_PATH, LIVE_AGENT_PROMPT_PATH, cwd);
|
|
519
516
|
}
|
|
520
|
-
async function
|
|
517
|
+
async function loadPromptBundle(userRelPath, agentRelPath, cwd) {
|
|
518
|
+
const [userText, agentText] = await Promise.all([readPromptFile(userRelPath, cwd), readPromptFile(agentRelPath, cwd)]);
|
|
519
|
+
if (userText === null && agentText === null) return null;
|
|
520
|
+
const sections = [];
|
|
521
|
+
const loaded = [];
|
|
522
|
+
if (userText !== null) {
|
|
523
|
+
sections.push(`### Project guidance (human-maintained)\n\n${userText}`);
|
|
524
|
+
loaded.push(userRelPath);
|
|
525
|
+
}
|
|
526
|
+
if (agentText !== null) {
|
|
527
|
+
sections.push(`### Agent learnings (auto-updated by ccqa --update-agent-prompt)\n\n${agentText}`);
|
|
528
|
+
loaded.push(agentRelPath);
|
|
529
|
+
}
|
|
530
|
+
let text = sections.join("\n\n");
|
|
531
|
+
if (text.length > USER_PROMPT_MAX_BYTES) text = text.slice(0, USER_PROMPT_MAX_BYTES) + `\n\n[ccqa] (prompt bundle truncated at ${USER_PROMPT_MAX_BYTES} bytes)`;
|
|
532
|
+
return {
|
|
533
|
+
text,
|
|
534
|
+
loaded
|
|
535
|
+
};
|
|
536
|
+
}
|
|
537
|
+
async function readPromptFile(relPath, cwd) {
|
|
521
538
|
const content = await readFile(join(cwd ?? process.cwd(), relPath), "utf-8").catch(() => null);
|
|
522
539
|
if (content === null) return null;
|
|
523
540
|
const trimmed = content.trim();
|
|
524
|
-
|
|
525
|
-
if (trimmed.length > USER_PROMPT_MAX_BYTES) return trimmed.slice(0, USER_PROMPT_MAX_BYTES) + `\n\n[ccqa] (${labelForTruncation} truncated at ${USER_PROMPT_MAX_BYTES} bytes)`;
|
|
526
|
-
return trimmed;
|
|
541
|
+
return trimmed.length === 0 ? null : trimmed;
|
|
527
542
|
}
|
|
528
543
|
/**
|
|
529
544
|
* Probe for orphaned files left over from earlier ccqa versions inside
|
|
@@ -567,9 +582,9 @@ async function getTestScript(featureName, specName, cwd) {
|
|
|
567
582
|
}
|
|
568
583
|
/**
|
|
569
584
|
* Variant of `listAllSpecs` for callers that care about the spec definition
|
|
570
|
-
* itself (spec.yaml) rather than its compiled vitest script. `ccqa run
|
|
571
|
-
*
|
|
572
|
-
* no `test.spec.ts` is still a valid target.
|
|
585
|
+
* itself (spec.yaml) rather than its compiled vitest script. `ccqa run` uses
|
|
586
|
+
* this for live-mode specs because they skip codegen entirely — a freshly
|
|
587
|
+
* drafted spec with no `test.spec.ts` is still a valid target.
|
|
573
588
|
*/
|
|
574
589
|
async function listAllSpecsWithSpecFile(cwd) {
|
|
575
590
|
return listAllSpecsFilteredBy(SPEC_FILE, cwd);
|
|
@@ -589,10 +604,10 @@ async function listAllSpecsFilteredBy(requiredFilename, cwd) {
|
|
|
589
604
|
}))).flat();
|
|
590
605
|
}
|
|
591
606
|
/**
|
|
592
|
-
* Resolve a CLI `<target>` argument into a list of spec refs.
|
|
593
|
-
* `ccqa run
|
|
594
|
-
*
|
|
595
|
-
*
|
|
607
|
+
* Resolve a CLI `<target>` argument into a list of spec refs. Used by
|
|
608
|
+
* `ccqa run`. Callers pass the right enumerator for "no target" (deterministic
|
|
609
|
+
* specs want `test.spec.ts`-having specs; live specs want `spec.yaml`-having
|
|
610
|
+
* specs).
|
|
596
611
|
*/
|
|
597
612
|
async function resolveSpecTargets(target, enumerateAll, cwd) {
|
|
598
613
|
if (!target) return enumerateAll();
|
|
@@ -745,6 +760,27 @@ function waitExit(child) {
|
|
|
745
760
|
});
|
|
746
761
|
}
|
|
747
762
|
//#endregion
|
|
763
|
+
//#region src/runtime/pool.ts
|
|
764
|
+
/**
|
|
765
|
+
* Run each item through `fn` with at most `concurrency` running at once.
|
|
766
|
+
* Results preserve input order. A throwing `fn` rejects the whole pool
|
|
767
|
+
* (callers that want per-item isolation should catch inside `fn`).
|
|
768
|
+
*/
|
|
769
|
+
async function runPool(items, concurrency, fn) {
|
|
770
|
+
const results = new Array(items.length);
|
|
771
|
+
let cursor = 0;
|
|
772
|
+
const worker = async () => {
|
|
773
|
+
while (true) {
|
|
774
|
+
const idx = cursor++;
|
|
775
|
+
if (idx >= items.length) return;
|
|
776
|
+
results[idx] = await fn(items[idx], idx);
|
|
777
|
+
}
|
|
778
|
+
};
|
|
779
|
+
const n = Math.max(1, Math.min(concurrency, items.length));
|
|
780
|
+
await Promise.all(Array.from({ length: n }, () => worker()));
|
|
781
|
+
return results;
|
|
782
|
+
}
|
|
783
|
+
//#endregion
|
|
748
784
|
//#region src/claude/extract-json.ts
|
|
749
785
|
/**
|
|
750
786
|
* Pulls a JSON object out of a Claude completion. Accepts either a fenced
|
|
@@ -767,26 +803,70 @@ const STEP_ICONS = {
|
|
|
767
803
|
STEP_SKIPPED: "⊘",
|
|
768
804
|
RUN_COMPLETED: "■"
|
|
769
805
|
};
|
|
806
|
+
/**
|
|
807
|
+
* When a `withBuffer` scope is active, every log line (stdout and stderr) is
|
|
808
|
+
* appended to its buffer instead of being written immediately. Parallel spec
|
|
809
|
+
* runs use this so each spec's narration — including logs emitted deep inside
|
|
810
|
+
* the live executor — flushes as one contiguous block, not interleaved.
|
|
811
|
+
*/
|
|
812
|
+
const bufferStore = new AsyncLocalStorage();
|
|
813
|
+
/** True while inside a `withBuffer` scope: progress lines avoid TTY cursor tricks. */
|
|
814
|
+
function isBuffered() {
|
|
815
|
+
return bufferStore.getStore() !== void 0;
|
|
816
|
+
}
|
|
817
|
+
function emit(text, sink = process.stdout) {
|
|
818
|
+
const store = bufferStore.getStore();
|
|
819
|
+
if (store) {
|
|
820
|
+
store.out.push(text);
|
|
821
|
+
return;
|
|
822
|
+
}
|
|
823
|
+
sink.write(text);
|
|
824
|
+
}
|
|
825
|
+
/**
|
|
826
|
+
* Write raw text to the active `withBuffer` scope, or straight to stdout when
|
|
827
|
+
* none is active. Lets a runner redirect sub-process output (e.g. a child's
|
|
828
|
+
* stdout) into the same buffer as its `log.*` lines so they flush together.
|
|
829
|
+
*/
|
|
830
|
+
function emitRaw(text) {
|
|
831
|
+
emit(text);
|
|
832
|
+
}
|
|
833
|
+
/**
|
|
834
|
+
* Run `fn` with all its log output captured into a buffer, then flush the
|
|
835
|
+
* buffer in one shot under `label`. Used by parallel runners to keep each
|
|
836
|
+
* spec's output legible. Output is flushed even when `fn` throws.
|
|
837
|
+
*
|
|
838
|
+
* When `buffered` is false, `fn` runs with no buffer so its output streams
|
|
839
|
+
* live — this is the sequential (concurrency 1) path, unchanged from before.
|
|
840
|
+
*/
|
|
841
|
+
async function withBuffer(label, buffered, fn) {
|
|
842
|
+
if (!buffered) return fn();
|
|
843
|
+
const store = { out: [] };
|
|
844
|
+
try {
|
|
845
|
+
return await bufferStore.run(store, fn);
|
|
846
|
+
} finally {
|
|
847
|
+
process.stdout.write(`\n──── ${label} ────\n${store.out.join("")}`);
|
|
848
|
+
}
|
|
849
|
+
}
|
|
770
850
|
function header(command, target) {
|
|
771
|
-
|
|
851
|
+
emit(`\nccqa ${command}${target ? ` ${target}` : ""}\n\n`);
|
|
772
852
|
}
|
|
773
853
|
function write(scope, message, sink = process.stdout) {
|
|
774
|
-
|
|
854
|
+
emit(`[${scope}] ${message}\n`, sink);
|
|
775
855
|
}
|
|
776
856
|
function meta(key, value) {
|
|
777
857
|
write("meta", `${key}: ${value}`);
|
|
778
858
|
}
|
|
779
859
|
function blank() {
|
|
780
|
-
|
|
860
|
+
emit("\n");
|
|
781
861
|
}
|
|
782
862
|
function info(message) {
|
|
783
863
|
write("info", message);
|
|
784
864
|
}
|
|
785
865
|
function step(type, stepId, detail) {
|
|
786
|
-
|
|
866
|
+
emit(` ${STEP_ICONS[type]} [${stepId}] ${detail}\n`);
|
|
787
867
|
}
|
|
788
868
|
function bash(command) {
|
|
789
|
-
|
|
869
|
+
emit(` $ ${command.slice(0, 120)}\n`);
|
|
790
870
|
}
|
|
791
871
|
function error(message) {
|
|
792
872
|
write("error", message, process.stderr);
|
|
@@ -795,7 +875,7 @@ function warn(message) {
|
|
|
795
875
|
write("warn", message, process.stderr);
|
|
796
876
|
}
|
|
797
877
|
function hint(message) {
|
|
798
|
-
|
|
878
|
+
emit("\n");
|
|
799
879
|
write("hint", message);
|
|
800
880
|
}
|
|
801
881
|
function fix(message) {
|
|
@@ -820,17 +900,17 @@ const PROGRESS_NONTTY_STRIDE = 5;
|
|
|
820
900
|
let lastProgressNonTtyEmit = -1;
|
|
821
901
|
function progress(current, total, label) {
|
|
822
902
|
const text = `[info] ${current + 1}/${total} ${label}`;
|
|
823
|
-
if (process.stdout.isTTY) {
|
|
903
|
+
if (process.stdout.isTTY && !isBuffered()) {
|
|
824
904
|
process.stdout.write(`\r${text}\x1b[K`);
|
|
825
905
|
return;
|
|
826
906
|
}
|
|
827
907
|
if (current === 0 || current - lastProgressNonTtyEmit >= PROGRESS_NONTTY_STRIDE) {
|
|
828
|
-
|
|
908
|
+
emit(`${text}\n`);
|
|
829
909
|
lastProgressNonTtyEmit = current;
|
|
830
910
|
}
|
|
831
911
|
}
|
|
832
912
|
function progressEnd() {
|
|
833
|
-
if (process.stdout.isTTY) process.stdout.write(`\r\x1b[K`);
|
|
913
|
+
if (process.stdout.isTTY && !isBuffered()) process.stdout.write(`\r\x1b[K`);
|
|
834
914
|
lastProgressNonTtyEmit = -1;
|
|
835
915
|
}
|
|
836
916
|
/**
|
|
@@ -1351,6 +1431,12 @@ function extractAbActionFromBashCommand(cmd) {
|
|
|
1351
1431
|
case "type":
|
|
1352
1432
|
case "select": return `AB_ACTION|${subCmd}|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
|
|
1353
1433
|
case "drag": return `AB_ACTION|drag|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
|
|
1434
|
+
case "upload": {
|
|
1435
|
+
const sel = args[0] ?? "";
|
|
1436
|
+
const files = args.slice(1);
|
|
1437
|
+
if (!sel || files.length === 0) return null;
|
|
1438
|
+
return `AB_ACTION|upload|${sel}|${files.join("|")}`;
|
|
1439
|
+
}
|
|
1354
1440
|
case "snapshot": return null;
|
|
1355
1441
|
case "find": return extractFindAbAction(args);
|
|
1356
1442
|
default: return null;
|
|
@@ -1688,25 +1774,15 @@ const DEFAULT_CONCURRENCY$1 = 3;
|
|
|
1688
1774
|
*/
|
|
1689
1775
|
async function analyzeDrift(input) {
|
|
1690
1776
|
const { targets, cwd, blocks, concurrency = DEFAULT_CONCURRENCY$1, model, language, onSpecStart } = input;
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
cwd,
|
|
1701
|
-
blocks,
|
|
1702
|
-
model,
|
|
1703
|
-
language
|
|
1704
|
-
});
|
|
1705
|
-
}
|
|
1706
|
-
};
|
|
1707
|
-
const pool = Array.from({ length: Math.min(concurrency, targets.length) }, () => worker());
|
|
1708
|
-
await Promise.all(pool);
|
|
1709
|
-
return results;
|
|
1777
|
+
return runPool(targets, concurrency, async (target) => {
|
|
1778
|
+
onSpecStart?.(target);
|
|
1779
|
+
return checkSpec(target, {
|
|
1780
|
+
cwd,
|
|
1781
|
+
blocks,
|
|
1782
|
+
model,
|
|
1783
|
+
language
|
|
1784
|
+
});
|
|
1785
|
+
});
|
|
1710
1786
|
}
|
|
1711
1787
|
async function checkSpec(target, opts) {
|
|
1712
1788
|
const { featureName, specName } = target;
|
|
@@ -2324,7 +2400,7 @@ function clamp(n, lo, hi) {
|
|
|
2324
2400
|
//#endregion
|
|
2325
2401
|
//#region src/report/prompt.ts
|
|
2326
2402
|
function buildFailureAnalysisPrompt(input) {
|
|
2327
|
-
const { script, specYaml, failureLog,
|
|
2403
|
+
const { script, specYaml, failureLog, liveTranscriptExcerpt, diffPatch, changedFiles, baseRef, driftIssues, outputLanguage = "auto" } = input;
|
|
2328
2404
|
return `You are analyzing a failing E2E regression test right after a source change landed. Your job is a root-cause CALL, not a fix: decide which of three categories explains the failure, using the source diff as your primary context.
|
|
2329
2405
|
|
|
2330
2406
|
${outputLanguageBlock(outputLanguage, "`reasoning`, `detail`", "label names (TEST_DRIFT, etc.)")}## The three categories
|
|
@@ -2396,7 +2472,7 @@ Evidence rules: TEST_DRIFT and SPEC_CHANGE require at least one concrete \`file\
|
|
|
2396
2472
|
## Test Spec (spec.yaml)
|
|
2397
2473
|
${specYaml}
|
|
2398
2474
|
|
|
2399
|
-
${buildExecutionEvidenceBlock(script, failureLog,
|
|
2475
|
+
${buildExecutionEvidenceBlock(script, failureLog, liveTranscriptExcerpt)}
|
|
2400
2476
|
|
|
2401
2477
|
${diffPatch ? `## Source changes since ${baseRef ?? "base"} (git diff, may be truncated)
|
|
2402
2478
|
|
|
@@ -2432,14 +2508,14 @@ ${driftIssues.map((i) => `- [${i.severity}] (${DRAFT_CATEGORY_LABEL[i.category]}
|
|
|
2432
2508
|
* never has to branch on mode — it just sees "here's what was executed
|
|
2433
2509
|
* and here's how it failed".
|
|
2434
2510
|
*/
|
|
2435
|
-
function buildExecutionEvidenceBlock(script, failureLog,
|
|
2511
|
+
function buildExecutionEvidenceBlock(script, failureLog, liveTranscriptExcerpt) {
|
|
2436
2512
|
const sections = [];
|
|
2437
2513
|
if (script && script.length > 0) sections.push(`## Test Script (with line numbers)
|
|
2438
2514
|
${numberLines(script)}`);
|
|
2439
2515
|
if (failureLog && failureLog.length > 0) sections.push(`## Failure Log
|
|
2440
2516
|
${failureLog.slice(0, 8e3)}`);
|
|
2441
|
-
if (
|
|
2442
|
-
${
|
|
2517
|
+
if (liveTranscriptExcerpt && liveTranscriptExcerpt.length > 0) sections.push(`## Live Run Transcript (summary of Claude's per-step execution)
|
|
2518
|
+
${liveTranscriptExcerpt}`);
|
|
2443
2519
|
if (sections.length === 0) return `## Execution evidence
|
|
2444
2520
|
|
|
2445
2521
|
(No script, failure log, or live transcript was captured for this run. Classify from spec.yaml + diff only, and be correspondingly more conservative — prefer UNKNOWN over a confident call.)`;
|
|
@@ -2535,11 +2611,11 @@ const ReportEvidenceSchema = z.object({
|
|
|
2535
2611
|
failureSummary: z.string().nullable().default(null)
|
|
2536
2612
|
});
|
|
2537
2613
|
/**
|
|
2538
|
-
* Per-step row for a
|
|
2539
|
-
* structure produced by `src/runtime/
|
|
2614
|
+
* Per-step row for a live-mode run (spec.yaml `mode: live`). Mirrors the
|
|
2615
|
+
* structure produced by `src/runtime/live-executor.ts:LiveStepResult` but
|
|
2540
2616
|
* encoded against the report schema so the HTML renderer can carry both
|
|
2541
|
-
* deterministic (`evidence`) and
|
|
2542
|
-
*
|
|
2617
|
+
* deterministic (`evidence`) and live (`liveRun`) sources of step-boundary
|
|
2618
|
+
* screenshots.
|
|
2543
2619
|
*
|
|
2544
2620
|
* `beforePng` / `afterPng` are RELATIVE to the HTML report directory — the
|
|
2545
2621
|
* caller computes the relative path with `node:path`'s `relative()` so the
|
|
@@ -2554,7 +2630,7 @@ const ReportEvidenceSchema = z.object({
|
|
|
2554
2630
|
* `models` is the union of model ids the SDK reported using; usually a
|
|
2555
2631
|
* single element, but the SDK can fan out across models in some modes.
|
|
2556
2632
|
*/
|
|
2557
|
-
const
|
|
2633
|
+
const LiveReportCostSchema = z.object({
|
|
2558
2634
|
totalCostUsd: z.number().nullable(),
|
|
2559
2635
|
durationApiMs: z.number().nullable(),
|
|
2560
2636
|
numTurns: z.number().nullable(),
|
|
@@ -2564,7 +2640,7 @@ const NdReportCostSchema = z.object({
|
|
|
2564
2640
|
outputTokens: z.number().nullable(),
|
|
2565
2641
|
models: z.array(z.string())
|
|
2566
2642
|
});
|
|
2567
|
-
const
|
|
2643
|
+
const LiveReportStepSchema = z.object({
|
|
2568
2644
|
stepId: z.string(),
|
|
2569
2645
|
source: z.string(),
|
|
2570
2646
|
instruction: z.string(),
|
|
@@ -2578,15 +2654,15 @@ const NdReportStepSchema = z.object({
|
|
|
2578
2654
|
beforePng: z.string().nullable(),
|
|
2579
2655
|
afterPng: z.string().nullable(),
|
|
2580
2656
|
durationMs: z.number(),
|
|
2581
|
-
cost:
|
|
2657
|
+
cost: LiveReportCostSchema
|
|
2582
2658
|
});
|
|
2583
|
-
const
|
|
2659
|
+
const LiveReportRunSchema = z.object({
|
|
2584
2660
|
runId: z.string(),
|
|
2585
2661
|
sessionName: z.string(),
|
|
2586
2662
|
startedAt: z.string(),
|
|
2587
2663
|
durationMs: z.number(),
|
|
2588
|
-
steps: z.array(
|
|
2589
|
-
cost:
|
|
2664
|
+
steps: z.array(LiveReportStepSchema),
|
|
2665
|
+
cost: LiveReportCostSchema
|
|
2590
2666
|
});
|
|
2591
2667
|
const ReportSpecResultSchema = z.object({
|
|
2592
2668
|
feature: z.string(),
|
|
@@ -2607,7 +2683,7 @@ const ReportSpecResultSchema = z.object({
|
|
|
2607
2683
|
diffExcerpt: z.string().nullable(),
|
|
2608
2684
|
specYaml: z.string().nullable(),
|
|
2609
2685
|
evidence: z.array(ReportEvidenceSchema).nullable(),
|
|
2610
|
-
|
|
2686
|
+
liveRun: LiveReportRunSchema.nullable()
|
|
2611
2687
|
});
|
|
2612
2688
|
z.object({
|
|
2613
2689
|
schemaVersion: z.literal(1),
|
|
@@ -2846,7 +2922,7 @@ function scopePatchForSpec(patch, relatedPaths, caps = {}) {
|
|
|
2846
2922
|
return parts.join("\n");
|
|
2847
2923
|
}
|
|
2848
2924
|
//#endregion
|
|
2849
|
-
//#region src/runtime/
|
|
2925
|
+
//#region src/runtime/live-cost-format.ts
|
|
2850
2926
|
/**
|
|
2851
2927
|
* Compact one-line cost summary. Format:
|
|
2852
2928
|
* "$0.1234 · 4 turns · 42 in / 6,511 out · 2.0M cached · sonnet"
|
|
@@ -2856,7 +2932,7 @@ function scopePatchForSpec(patch, relatedPaths, caps = {}) {
|
|
|
2856
2932
|
* `model=...` segment. `compact: true` (HTML chip) thousand-separates fresh
|
|
2857
2933
|
* tokens, abbreviates cache-read with K/M, drops the `model=` prefix.
|
|
2858
2934
|
*/
|
|
2859
|
-
function
|
|
2935
|
+
function formatLiveCost(cost, options) {
|
|
2860
2936
|
if (cost.totalCostUsd === null) return null;
|
|
2861
2937
|
const compact = options.compact;
|
|
2862
2938
|
const sep = compact ? " · " : " / ";
|
|
@@ -2875,7 +2951,7 @@ function formatNdCost(cost, options) {
|
|
|
2875
2951
|
* Sum of per-spec costs for a batch. Used only by the CLI batch summary.
|
|
2876
2952
|
* Returns null when no spec has cost data.
|
|
2877
2953
|
*/
|
|
2878
|
-
function
|
|
2954
|
+
function formatLiveBatchCost(costs) {
|
|
2879
2955
|
let totalUsd = 0;
|
|
2880
2956
|
let seen = false;
|
|
2881
2957
|
let totalIn = 0;
|
|
@@ -3176,7 +3252,7 @@ function renderResult(r, index, s) {
|
|
|
3176
3252
|
const heading = r.title ? `<span class="spec-title">${esc(r.title)}</span><span class="spec-slug">(${esc(id)})</span>` : `<span class="spec-title">${esc(id)}</span>`;
|
|
3177
3253
|
const predictionLine = r.status === "failed" && r.analysis ? `<span class="label-text label-${r.analysis.label}">${esc(displayLabel(r.analysis.label, s))} · ${Math.round(r.analysis.confidence * 100)}%</span>` : "";
|
|
3178
3254
|
const needsGradingDot = r.status === "failed" && r.analysis ? `<span class="needs-grading-dot" data-case-id="${esc(id)}" title="${esc(s.needsGrading)}"></span>` : "";
|
|
3179
|
-
const modeTag = r.
|
|
3255
|
+
const modeTag = r.liveRun ? `<span class="mode-tag" title="executed in live mode (Claude drove the browser per step)">LIVE</span>` : `<span class="mode-tag" title="executed in deterministic mode (vitest replayed test.spec.ts)">DETERMINISTIC</span>`;
|
|
3180
3256
|
return `<details class="spec ${r.status}" data-status="${r.status}" data-case-id="${esc(id)}"${r.status === "failed" ? " open" : ""}>
|
|
3181
3257
|
<summary>
|
|
3182
3258
|
${statusIcon(r.status)}
|
|
@@ -3189,7 +3265,7 @@ function renderResult(r, index, s) {
|
|
|
3189
3265
|
</summary>
|
|
3190
3266
|
<div class="spec-body">
|
|
3191
3267
|
${renderEvidence(r, s)}
|
|
3192
|
-
${r.
|
|
3268
|
+
${r.liveRun ? renderLiveRun(r.liveRun, s) : ""}
|
|
3193
3269
|
${renderSpecBody(r, index, s)}
|
|
3194
3270
|
${collapsible(s.collSpecYaml, s.collSpecYamlHelp, r.specYaml)}
|
|
3195
3271
|
</div>
|
|
@@ -3200,16 +3276,16 @@ function renderSpecBody(r, index, s) {
|
|
|
3200
3276
|
if (r.analysis) return renderAnalysis(r, index, s);
|
|
3201
3277
|
return renderSkippedWithSupporting(r, s);
|
|
3202
3278
|
}
|
|
3203
|
-
function
|
|
3204
|
-
const stepItems =
|
|
3279
|
+
function renderLiveRun(live, strings) {
|
|
3280
|
+
const stepItems = live.steps.map((s) => {
|
|
3205
3281
|
const before = s.beforePng ? `<a class="shot" href="${esc(s.beforePng)}" target="_blank" rel="noopener"><img src="${esc(s.beforePng)}" alt="before ${esc(s.stepId)}" loading="lazy"><span>before</span></a>` : "";
|
|
3206
3282
|
const after = s.afterPng ? `<a class="shot" href="${esc(s.afterPng)}" target="_blank" rel="noopener"><img src="${esc(s.afterPng)}" alt="after ${esc(s.stepId)}" loading="lazy"><span>after</span></a>` : "";
|
|
3207
3283
|
const dur = s.durationMs > 0 ? `<span class="duration">${formatDuration$1(s.durationMs)}</span>` : "";
|
|
3208
|
-
const stepCost =
|
|
3284
|
+
const stepCost = formatLiveCostChip(s.cost);
|
|
3209
3285
|
const stepModel = formatModelChip(s.cost.models);
|
|
3210
|
-
const sourceBadge = s.source && s.source !== "spec" ? `<span class="
|
|
3211
|
-
return `<li class="
|
|
3212
|
-
<div class="
|
|
3286
|
+
const sourceBadge = s.source && s.source !== "spec" ? `<span class="live-source">[${esc(s.source)}]</span>` : "";
|
|
3287
|
+
return `<li class="live-step ${s.status}">
|
|
3288
|
+
<div class="live-step-head">
|
|
3213
3289
|
${statusIcon(s.status)}
|
|
3214
3290
|
<span class="step-name">${esc(s.stepId)}</span>
|
|
3215
3291
|
${sourceBadge}
|
|
@@ -3218,44 +3294,44 @@ function renderNdRun(nd, strings) {
|
|
|
3218
3294
|
${stepCost}
|
|
3219
3295
|
${dur}
|
|
3220
3296
|
</div>
|
|
3221
|
-
<div class="
|
|
3222
|
-
<p class="
|
|
3223
|
-
<p class="
|
|
3224
|
-
${s.reasoning ? `<p class="
|
|
3225
|
-
${before || after ? `<div class="
|
|
3297
|
+
<div class="live-step-body">
|
|
3298
|
+
<p class="live-instr"><strong>${esc(strings.stepDoLabel)}:</strong> ${esc(s.instruction)}</p>
|
|
3299
|
+
<p class="live-instr"><strong>${esc(strings.stepExpectLabel)}:</strong> ${esc(s.expected)}</p>
|
|
3300
|
+
${s.reasoning ? `<p class="live-reasoning">${esc(s.reasoning)}</p>` : ""}
|
|
3301
|
+
${before || after ? `<div class="live-shots">${before}${after}</div>` : ""}
|
|
3226
3302
|
</div>
|
|
3227
3303
|
</li>`;
|
|
3228
3304
|
}).join("\n");
|
|
3229
|
-
const runCost =
|
|
3230
|
-
const runModel = formatModelChip(
|
|
3231
|
-
return `<section class="
|
|
3232
|
-
<details class="
|
|
3305
|
+
const runCost = formatLiveCostChip(live.cost);
|
|
3306
|
+
const runModel = formatModelChip(live.cost.models);
|
|
3307
|
+
return `<section class="live-run">
|
|
3308
|
+
<details class="live-run-meta">
|
|
3233
3309
|
<summary>${labelWithHelp(esc(strings.collLiveRunMeta), strings.collLiveRunMetaHelp)}</summary>
|
|
3234
|
-
<div class="
|
|
3310
|
+
<div class="live-run-meta-body">
|
|
3235
3311
|
<span class="dim">${esc(strings.liveRunIdLabel)}</span>
|
|
3236
|
-
<code>${esc(
|
|
3312
|
+
<code>${esc(live.runId)}</code>
|
|
3237
3313
|
<span class="dim">${esc(strings.liveSessionLabel)}</span>
|
|
3238
|
-
<code>${esc(
|
|
3314
|
+
<code>${esc(live.sessionName)}</code>
|
|
3239
3315
|
${runModel}
|
|
3240
3316
|
${runCost}
|
|
3241
|
-
<span class="duration">${formatDuration$1(
|
|
3317
|
+
<span class="duration">${formatDuration$1(live.durationMs)}</span>
|
|
3242
3318
|
</div>
|
|
3243
3319
|
</details>
|
|
3244
|
-
<ol class="
|
|
3320
|
+
<ol class="live-steps">${stepItems}</ol>
|
|
3245
3321
|
</section>`;
|
|
3246
3322
|
}
|
|
3247
3323
|
/** Compact dot-separated cost chip, e.g. "$0.1234 · 4 turns · 42 in / 6,511 out · 2.0M cached". */
|
|
3248
|
-
function
|
|
3249
|
-
const line =
|
|
3324
|
+
function formatLiveCostChip(cost) {
|
|
3325
|
+
const line = formatLiveCost(cost, { compact: true });
|
|
3250
3326
|
if (line === null) return "";
|
|
3251
|
-
return `<span class="
|
|
3327
|
+
return `<span class="live-cost" title="cost · turns · fresh-input/output tokens · cache-read input">${esc(line)}</span>`;
|
|
3252
3328
|
}
|
|
3253
3329
|
function formatModelChip(models) {
|
|
3254
3330
|
if (!models || models.length === 0) return "";
|
|
3255
|
-
return `<span class="
|
|
3331
|
+
return `<span class="live-model" title="Claude model id(s) reported by the SDK">${esc(models.join(", "))}</span>`;
|
|
3256
3332
|
}
|
|
3257
3333
|
/**
|
|
3258
|
-
* Per-step UI for deterministic runs. Adopts the same `
|
|
3334
|
+
* Per-step UI for deterministic runs. Adopts the same `live-step` card layout
|
|
3259
3335
|
* used by live runs so reviewers don't have to context-switch between two
|
|
3260
3336
|
* visual idioms. We map the evidence entries (which are already keyed by
|
|
3261
3337
|
* stepId) onto the same shape, leaving live-only fields (before png, cost,
|
|
@@ -3263,14 +3339,14 @@ function formatModelChip(models) {
|
|
|
3263
3339
|
*/
|
|
3264
3340
|
function renderEvidence(r, s) {
|
|
3265
3341
|
if (!r.evidence || r.evidence.length === 0) return "";
|
|
3266
|
-
return `<section class="
|
|
3267
|
-
<ol class="
|
|
3342
|
+
return `<section class="live-run">
|
|
3343
|
+
<ol class="live-steps">${r.evidence.map((e) => renderDetStepCard(e, s)).join("\n")}</ol>
|
|
3268
3344
|
</section>`;
|
|
3269
3345
|
}
|
|
3270
3346
|
function renderDetStepCard(e, s) {
|
|
3271
3347
|
const status = e.status === "failed" ? "failed" : "passed";
|
|
3272
|
-
const description = e.description ? `<p class="
|
|
3273
|
-
const failureBlock = e.status === "failed" && e.failureSummary ? `<p class="
|
|
3348
|
+
const description = e.description ? `<p class="live-instr"><strong>${esc(s.stepExpectLabel)}:</strong> ${esc(e.description)}</p>` : "";
|
|
3349
|
+
const failureBlock = e.status === "failed" && e.failureSummary ? `<p class="live-reasoning">${esc(e.failureSummary)}</p>` : "";
|
|
3274
3350
|
const metaRows = [];
|
|
3275
3351
|
if (e.url) {
|
|
3276
3352
|
const shortUrl = shortenUrl(e.url);
|
|
@@ -3279,16 +3355,16 @@ function renderDetStepCard(e, s) {
|
|
|
3279
3355
|
if (e.title) metaRows.push(`<div class="evidence-meta-row"><span class="evidence-meta-label">${esc(s.metaPage)}</span><span class="evidence-meta-value">${esc(e.title)}</span></div>`);
|
|
3280
3356
|
const meta = metaRows.length > 0 ? `<div class="evidence-meta">${metaRows.join("")}</div>` : "";
|
|
3281
3357
|
const after = `<a class="shot" href="${esc(e.pngPath)}" target="_blank" rel="noopener"><img src="${esc(e.pngPath)}" alt="${esc(e.stepId)}" loading="lazy"><span>after</span></a>`;
|
|
3282
|
-
return `<li class="
|
|
3283
|
-
<div class="
|
|
3358
|
+
return `<li class="live-step ${status}">
|
|
3359
|
+
<div class="live-step-head">
|
|
3284
3360
|
${statusIcon(status)}
|
|
3285
3361
|
<span class="step-name">${esc(e.stepId)}</span>
|
|
3286
3362
|
<span class="spacer"></span>
|
|
3287
3363
|
</div>
|
|
3288
|
-
<div class="
|
|
3364
|
+
<div class="live-step-body">
|
|
3289
3365
|
${description}
|
|
3290
3366
|
${failureBlock}
|
|
3291
|
-
<div class="
|
|
3367
|
+
<div class="live-shots">${after}</div>
|
|
3292
3368
|
${meta}
|
|
3293
3369
|
</div>
|
|
3294
3370
|
</li>`;
|
|
@@ -3726,54 +3802,54 @@ table.matrix td.miss-nonzero { background: var(--fail-bg); }
|
|
|
3726
3802
|
|
|
3727
3803
|
/* Per-step block: indented + a thin rail under the test title so the
|
|
3728
3804
|
hierarchy spec → test → step is visible. */
|
|
3729
|
-
.
|
|
3805
|
+
.live-run {
|
|
3730
3806
|
padding: 0 0 0 14px;
|
|
3731
3807
|
margin-left: 6px;
|
|
3732
3808
|
border-left: 1px solid var(--border-soft);
|
|
3733
3809
|
}
|
|
3734
|
-
.
|
|
3735
|
-
.
|
|
3810
|
+
.live-run-meta { margin: 0 0 8px; font-size: 11.5px; }
|
|
3811
|
+
.live-run-meta > summary {
|
|
3736
3812
|
cursor: pointer; color: var(--text-mute); list-style: none;
|
|
3737
3813
|
padding: 4px 0;
|
|
3738
3814
|
}
|
|
3739
|
-
.
|
|
3740
|
-
.
|
|
3815
|
+
.live-run-meta > summary::-webkit-details-marker { display: none; }
|
|
3816
|
+
.live-run-meta > summary::before {
|
|
3741
3817
|
content: "▸"; color: var(--text-dim); font-size: 10px;
|
|
3742
3818
|
margin-right: 6px; transition: transform 0.12s ease;
|
|
3743
3819
|
display: inline-block;
|
|
3744
3820
|
}
|
|
3745
|
-
.
|
|
3746
|
-
.
|
|
3821
|
+
.live-run-meta[open] > summary::before { transform: rotate(90deg); }
|
|
3822
|
+
.live-run-meta-body {
|
|
3747
3823
|
display: flex; gap: 12px; align-items: baseline; flex-wrap: wrap;
|
|
3748
3824
|
color: var(--text-mute); padding: 6px 0 8px 16px;
|
|
3749
3825
|
}
|
|
3750
|
-
.
|
|
3751
|
-
.
|
|
3826
|
+
.live-run-meta-body code { background: transparent; padding: 0; font-size: 11.5px; color: var(--text-dim); }
|
|
3827
|
+
.live-run-meta-body .dim { color: var(--text-mute); }
|
|
3752
3828
|
|
|
3753
3829
|
/* Steps: flat list. The separator between steps has to outweigh anything
|
|
3754
3830
|
*inside* a step (e.g. evidence-meta footer) so the eye finds the
|
|
3755
3831
|
step boundary at a glance — hence a solid var(--border), not the
|
|
3756
3832
|
softer hairline used inside the step body. */
|
|
3757
|
-
.
|
|
3758
|
-
.
|
|
3759
|
-
.
|
|
3760
|
-
.
|
|
3761
|
-
.
|
|
3762
|
-
.
|
|
3763
|
-
.
|
|
3764
|
-
.
|
|
3833
|
+
.live-steps { list-style: none; padding: 0; margin: 0; display: flex; flex-direction: column; gap: 0; }
|
|
3834
|
+
.live-step { border-top: 1px solid var(--border); padding: 16px 0; background: transparent; }
|
|
3835
|
+
.live-step:first-child { border-top: 0; padding-top: 0; }
|
|
3836
|
+
.live-step.skipped { opacity: 0.55; }
|
|
3837
|
+
.live-step-head { display: flex; align-items: baseline; gap: 8px; padding: 0; background: transparent; border-bottom: 0; font-size: 13px; margin-bottom: 6px; }
|
|
3838
|
+
.live-step-body { padding: 0; font-size: 12.5px; line-height: 1.55; }
|
|
3839
|
+
.live-step-body p { margin: 4px 0; }
|
|
3840
|
+
.live-instr strong { color: var(--text-mute); font-weight: 600; margin-right: 4px; font-size: 11px; letter-spacing: 0.04em; text-transform: uppercase; }
|
|
3765
3841
|
|
|
3766
3842
|
/* Reasoning: left rail, no fill. */
|
|
3767
|
-
.
|
|
3768
|
-
.
|
|
3843
|
+
.live-reasoning { color: var(--text-dim); font-style: italic; background: transparent; padding: 4px 0 4px 12px; border-left: 2px solid var(--fail); border-radius: 0; margin: 6px 0; }
|
|
3844
|
+
.live-step.passed .live-reasoning { border-left-color: var(--border); color: var(--text-mute); font-style: normal; }
|
|
3769
3845
|
|
|
3770
|
-
.
|
|
3771
|
-
.
|
|
3772
|
-
.
|
|
3773
|
-
.
|
|
3846
|
+
.live-source { font-size: 11px; color: var(--text-mute); }
|
|
3847
|
+
.live-shots { display: flex; gap: 12px; margin-top: 10px; flex-wrap: wrap; }
|
|
3848
|
+
.live-shots .shot { display: flex; flex-direction: column; align-items: center; gap: 4px; text-decoration: none; color: var(--text-mute); font-size: 10px; letter-spacing: 0.08em; }
|
|
3849
|
+
.live-shots .shot img { max-width: 280px; max-height: 180px; border: 1px solid var(--border-soft); border-radius: 3px; object-fit: contain; background: #000; }
|
|
3774
3850
|
|
|
3775
3851
|
/* Cost / model chips: muted text, no fill. */
|
|
3776
|
-
.
|
|
3852
|
+
.live-cost, .live-model {
|
|
3777
3853
|
font-size: 11px; padding: 0;
|
|
3778
3854
|
background: transparent;
|
|
3779
3855
|
color: var(--text-mute);
|
|
@@ -4083,6 +4159,123 @@ const CLIENT_JS = `
|
|
|
4083
4159
|
})();
|
|
4084
4160
|
`;
|
|
4085
4161
|
//#endregion
|
|
4162
|
+
//#region src/runtime/profile-env.ts
|
|
4163
|
+
/**
|
|
4164
|
+
* Profile env (Issue #37). A profile is a named `.env` under
|
|
4165
|
+
* `.ccqa/profiles/<name>.env`; its contents merge into `process.env` before any
|
|
4166
|
+
* spec work, so one spec targets dev/stg/prd without per-environment copies.
|
|
4167
|
+
* Spec `${VAR}` references all resolve against `process.env` downstream.
|
|
4168
|
+
*
|
|
4169
|
+
* The `.env` parser is a small hand-rolled subset (no dotenv dependency).
|
|
4170
|
+
*/
|
|
4171
|
+
/**
|
|
4172
|
+
* Parse a `.env` body into a `name → value` map. Subset: blank / `#` lines
|
|
4173
|
+
* skipped, optional leading `export`, split on the first `=`, surrounding
|
|
4174
|
+
* quotes stripped, inline `# comment` dropped. No multi-line / interpolation.
|
|
4175
|
+
*/
|
|
4176
|
+
function parseDotenv(content) {
|
|
4177
|
+
const out = {};
|
|
4178
|
+
for (const rawLine of content.split(/\r?\n/)) {
|
|
4179
|
+
const line = rawLine.trim();
|
|
4180
|
+
if (line === "" || line.startsWith("#")) continue;
|
|
4181
|
+
const withoutExport = line.replace(/^export\s+/, "");
|
|
4182
|
+
const eq = withoutExport.indexOf("=");
|
|
4183
|
+
if (eq === -1) continue;
|
|
4184
|
+
const key = withoutExport.slice(0, eq).trim();
|
|
4185
|
+
if (key === "") continue;
|
|
4186
|
+
out[key] = parseValue(withoutExport.slice(eq + 1).trim());
|
|
4187
|
+
}
|
|
4188
|
+
return out;
|
|
4189
|
+
}
|
|
4190
|
+
function parseValue(raw) {
|
|
4191
|
+
const quote = raw[0];
|
|
4192
|
+
if (quote === "\"" || quote === "'") {
|
|
4193
|
+
const close = raw.indexOf(quote, 1);
|
|
4194
|
+
if (close !== -1 && /^\s*(#.*)?$/.test(raw.slice(close + 1))) return raw.slice(1, close);
|
|
4195
|
+
}
|
|
4196
|
+
const hash = raw.search(/\s#/);
|
|
4197
|
+
return hash === -1 ? raw : raw.slice(0, hash).trimEnd();
|
|
4198
|
+
}
|
|
4199
|
+
var ProfileNotFoundError = class extends Error {
|
|
4200
|
+
profile;
|
|
4201
|
+
path;
|
|
4202
|
+
constructor(profile, path) {
|
|
4203
|
+
super(`profile "${profile}" not found: ${path}`);
|
|
4204
|
+
this.name = "ProfileNotFoundError";
|
|
4205
|
+
this.profile = profile;
|
|
4206
|
+
this.path = path;
|
|
4207
|
+
}
|
|
4208
|
+
};
|
|
4209
|
+
var InvalidProfileNameError = class extends Error {
|
|
4210
|
+
profile;
|
|
4211
|
+
constructor(profile) {
|
|
4212
|
+
super(`invalid profile name "${profile}": expected a bare name like "stg" (no path separators, no leading dot)`);
|
|
4213
|
+
this.name = "InvalidProfileNameError";
|
|
4214
|
+
this.profile = profile;
|
|
4215
|
+
}
|
|
4216
|
+
};
|
|
4217
|
+
/**
|
|
4218
|
+
* A profile name must be a single, non-dot-leading path segment, so
|
|
4219
|
+
* `--profile <name>` can't read a file outside the profiles dir (e.g.
|
|
4220
|
+
* `--profile ../../etc/hosts`). Rejecting separators and a leading dot already
|
|
4221
|
+
* blocks `..` traversal, so an in-name `..` (like `v1..2`) stays allowed.
|
|
4222
|
+
*/
|
|
4223
|
+
function assertValidProfileName(profile) {
|
|
4224
|
+
if (profile === "" || profile.includes("/") || profile.includes("\\") || profile.startsWith(".")) throw new InvalidProfileNameError(profile);
|
|
4225
|
+
}
|
|
4226
|
+
/** Absolute path of the `.env` file backing `<profile>` under `<cwd>/.ccqa/`. */
|
|
4227
|
+
function profilePath(profile, cwd) {
|
|
4228
|
+
assertValidProfileName(profile);
|
|
4229
|
+
return join(cwd, ".ccqa", "profiles", `${profile}.env`);
|
|
4230
|
+
}
|
|
4231
|
+
/** Read + parse a `.env`, or `null` if absent. Other read errors propagate. */
|
|
4232
|
+
async function readDotenv(path) {
|
|
4233
|
+
let content;
|
|
4234
|
+
try {
|
|
4235
|
+
content = await readFile(path, "utf8");
|
|
4236
|
+
} catch (err) {
|
|
4237
|
+
if (err.code === "ENOENT") return null;
|
|
4238
|
+
throw err;
|
|
4239
|
+
}
|
|
4240
|
+
return parseDotenv(content);
|
|
4241
|
+
}
|
|
4242
|
+
/**
|
|
4243
|
+
* Load `.ccqa/profiles/<profile>.env`. A missing file throws — a typo must fail
|
|
4244
|
+
* loudly, not silently resolve every credential to empty.
|
|
4245
|
+
*/
|
|
4246
|
+
async function loadProfileEnv(profile, cwd) {
|
|
4247
|
+
const path = profilePath(profile, cwd);
|
|
4248
|
+
const vars = await readDotenv(path);
|
|
4249
|
+
if (vars === null) throw new ProfileNotFoundError(profile, path);
|
|
4250
|
+
return vars;
|
|
4251
|
+
}
|
|
4252
|
+
/** Absolute path of the default `.env` ccqa loads when `--profile` is absent. */
|
|
4253
|
+
function defaultEnvPath(cwd) {
|
|
4254
|
+
return join(cwd, ".env");
|
|
4255
|
+
}
|
|
4256
|
+
/**
|
|
4257
|
+
* Load `<cwd>/.env`, the default when no `--profile` is given. A missing `.env`
|
|
4258
|
+
* is fine (returns `null`) — the run falls back to the existing `process.env`.
|
|
4259
|
+
*/
|
|
4260
|
+
async function loadDefaultEnv(cwd) {
|
|
4261
|
+
return readDotenv(defaultEnvPath(cwd));
|
|
4262
|
+
}
|
|
4263
|
+
/**
|
|
4264
|
+
* Merge vars into `process.env`. With `override` (the default), the profile
|
|
4265
|
+
* wins over inherited values. Returns the applied names — never values, so
|
|
4266
|
+
* callers log names only and secrets stay out of the log.
|
|
4267
|
+
*/
|
|
4268
|
+
function applyProfileEnv(vars, opts = {}) {
|
|
4269
|
+
const override = opts.override ?? true;
|
|
4270
|
+
const applied = [];
|
|
4271
|
+
for (const [name, value] of Object.entries(vars)) {
|
|
4272
|
+
if (!override && process.env[name] !== void 0) continue;
|
|
4273
|
+
process.env[name] = value;
|
|
4274
|
+
applied.push(name);
|
|
4275
|
+
}
|
|
4276
|
+
return applied;
|
|
4277
|
+
}
|
|
4278
|
+
//#endregion
|
|
4086
4279
|
//#region src/cli/options.ts
|
|
4087
4280
|
/**
|
|
4088
4281
|
* Shared `--language` flag. Every Claude-driven command writes some
|
|
@@ -4093,6 +4286,53 @@ const CLIENT_JS = `
|
|
|
4093
4286
|
function addLanguageOption(command) {
|
|
4094
4287
|
return command.option("--language <bcp47>", "Language for human-readable output (e.g. 'en', 'ja'). Default 'auto' follows the language of the spec/codebase.", DEFAULT_LANGUAGE);
|
|
4095
4288
|
}
|
|
4289
|
+
/**
|
|
4290
|
+
* Shared `--profile <name>` flag for the browser-driving commands (`run`,
|
|
4291
|
+
* `record`), registered identically so help text and behaviour don't drift.
|
|
4292
|
+
*/
|
|
4293
|
+
function addProfileOption(command) {
|
|
4294
|
+
return command.option("--profile <name>", "Load .ccqa/profiles/<name>.env into the environment before resolving spec ${VAR} references (URLs, credentials), so one spec can target dev/stg/prd without per-environment copies. Profile values override the inherited environment.");
|
|
4295
|
+
}
|
|
4296
|
+
/**
|
|
4297
|
+
* Merge the environment for a `run` / `record` invocation into `process.env`
|
|
4298
|
+
* before any spec work. With `--profile <name>`, load that profile (missing /
|
|
4299
|
+
* invalid → exit 2). Without it, auto-load `<cwd>/.env` if present (a missing
|
|
4300
|
+
* `.env` is fine). Checking `!== undefined` rejects `--profile ""` rather than
|
|
4301
|
+
* skipping it.
|
|
4302
|
+
*/
|
|
4303
|
+
async function applyProfileFromOption(profile, cwd) {
|
|
4304
|
+
if (profile !== void 0) await applyNamedProfile(profile, cwd);
|
|
4305
|
+
else await applyDefaultEnv(cwd);
|
|
4306
|
+
}
|
|
4307
|
+
/** "1 var" / "2 vars" — the count summary shared by both load paths' meta line. */
|
|
4308
|
+
function varCount(n) {
|
|
4309
|
+
return `${n} var${n === 1 ? "" : "s"}`;
|
|
4310
|
+
}
|
|
4311
|
+
async function applyNamedProfile(profile, cwd) {
|
|
4312
|
+
try {
|
|
4313
|
+
const applied = applyProfileEnv(await loadProfileEnv(profile, cwd));
|
|
4314
|
+
meta("profile", `${profile} (${varCount(applied.length)})`);
|
|
4315
|
+
if (applied.length === 0) warn(`profile "${profile}" defined no variables — spec $\{VAR} references will resolve to empty`);
|
|
4316
|
+
} catch (err) {
|
|
4317
|
+
if (err instanceof ProfileNotFoundError) {
|
|
4318
|
+
error(err.message);
|
|
4319
|
+
hint(`create ${err.path} with the environment's $\{VAR} values`);
|
|
4320
|
+
} else if (err instanceof InvalidProfileNameError) error(err.message);
|
|
4321
|
+
else error(`failed to load profile "${profile}": ${err instanceof Error ? err.message : String(err)}`);
|
|
4322
|
+
process.exit(2);
|
|
4323
|
+
}
|
|
4324
|
+
}
|
|
4325
|
+
async function applyDefaultEnv(cwd) {
|
|
4326
|
+
let vars;
|
|
4327
|
+
try {
|
|
4328
|
+
vars = await loadDefaultEnv(cwd);
|
|
4329
|
+
} catch (err) {
|
|
4330
|
+
error(`failed to load ${defaultEnvPath(cwd)}: ${err instanceof Error ? err.message : String(err)}`);
|
|
4331
|
+
process.exit(2);
|
|
4332
|
+
}
|
|
4333
|
+
if (vars === null) return;
|
|
4334
|
+
meta("env", `.env (${varCount(applyProfileEnv(vars, { override: false }).length)})`);
|
|
4335
|
+
}
|
|
4096
4336
|
//#endregion
|
|
4097
4337
|
//#region src/cli/resolve-cwd.ts
|
|
4098
4338
|
/**
|
|
@@ -4104,7 +4344,7 @@ function addLanguageOption(command) {
|
|
|
4104
4344
|
*
|
|
4105
4345
|
* It's mostly useful in monorepos where you want to invoke ccqa from the
|
|
4106
4346
|
* repo root but target a subpackage (e.g.
|
|
4107
|
-
* `ccqa run --cwd
|
|
4347
|
+
* `ccqa run --cwd apps/web-app`).
|
|
4108
4348
|
*
|
|
4109
4349
|
* Falls back to `process.cwd()` when the option is not given.
|
|
4110
4350
|
*/
|
|
@@ -4249,7 +4489,7 @@ function formatAgentBrowserUnavailableMessage() {
|
|
|
4249
4489
|
//#region src/cli/preflight.ts
|
|
4250
4490
|
/**
|
|
4251
4491
|
* Shared startup steps for every command that drives a real `agent-browser`
|
|
4252
|
-
* (currently `ccqa
|
|
4492
|
+
* (currently `ccqa record` (trace) and `ccqa run` (live mode)):
|
|
4253
4493
|
*
|
|
4254
4494
|
* 1. Verify the peer-installed agent-browser binary is reachable. On
|
|
4255
4495
|
* failure print the standard guidance and `process.exit(1)`; on
|
|
@@ -4276,14 +4516,14 @@ async function preflightAgentBrowserCommand() {
|
|
|
4276
4516
|
await warnStaleBlockArtifacts();
|
|
4277
4517
|
}
|
|
4278
4518
|
//#endregion
|
|
4279
|
-
//#region src/report/
|
|
4519
|
+
//#region src/report/live-transcript-excerpt.ts
|
|
4280
4520
|
/**
|
|
4281
4521
|
* Build a compact transcript summary for the failure classifier.
|
|
4282
4522
|
*
|
|
4283
4523
|
* Returns `null` when the run has no failed step (every step passed/skipped),
|
|
4284
4524
|
* since the failure analyzer has nothing to explain in that case.
|
|
4285
4525
|
*/
|
|
4286
|
-
async function
|
|
4526
|
+
async function buildLiveTranscriptExcerpt(result, options = {}) {
|
|
4287
4527
|
const failingIndex = result.steps.findIndex((s) => s.status === "failed");
|
|
4288
4528
|
if (failingIndex === -1) return null;
|
|
4289
4529
|
const failingStep = result.steps[failingIndex];
|
|
@@ -4316,7 +4556,7 @@ function oneLine$1(s) {
|
|
|
4316
4556
|
return s.replace(/\s+/g, " ").trim();
|
|
4317
4557
|
}
|
|
4318
4558
|
//#endregion
|
|
4319
|
-
//#region src/runtime/
|
|
4559
|
+
//#region src/runtime/live-artifacts.ts
|
|
4320
4560
|
/**
|
|
4321
4561
|
* Build a sortable run id from the current wall-clock time. ISO8601 with
|
|
4322
4562
|
* `:` / `.` replaced so it's filename-safe. Caller is expected to mkdir the
|
|
@@ -4346,6 +4586,12 @@ function stepArtifactPaths(runDir, stepId) {
|
|
|
4346
4586
|
//#endregion
|
|
4347
4587
|
//#region src/claude/agent-browser-invoke.ts
|
|
4348
4588
|
function agentBrowserInvokeBase(input) {
|
|
4589
|
+
const env = {
|
|
4590
|
+
AGENT_BROWSER_SESSION: input.sessionName,
|
|
4591
|
+
CCQA_RUN_ID: input.runId,
|
|
4592
|
+
PATH: pathWithAgentBrowserShim(process.env["PATH"])
|
|
4593
|
+
};
|
|
4594
|
+
if (input.statePath) env["CCQA_AB_STATE"] = input.statePath;
|
|
4349
4595
|
return {
|
|
4350
4596
|
allowedTools: [
|
|
4351
4597
|
"Bash(*)",
|
|
@@ -4353,23 +4599,25 @@ function agentBrowserInvokeBase(input) {
|
|
|
4353
4599
|
"Grep",
|
|
4354
4600
|
"Glob"
|
|
4355
4601
|
],
|
|
4356
|
-
env
|
|
4357
|
-
AGENT_BROWSER_SESSION: input.sessionName,
|
|
4358
|
-
CCQA_RUN_ID: input.runId,
|
|
4359
|
-
PATH: pathWithAgentBrowserShim(process.env["PATH"])
|
|
4360
|
-
}
|
|
4602
|
+
env
|
|
4361
4603
|
};
|
|
4362
4604
|
}
|
|
4363
4605
|
//#endregion
|
|
4364
|
-
//#region src/prompts/
|
|
4365
|
-
|
|
4366
|
-
|
|
4606
|
+
//#region src/prompts/live.ts
|
|
4607
|
+
/**
|
|
4608
|
+
* Unique agent-browser session name. The runId is millisecond-precision wall
|
|
4609
|
+
* clock, so under `--concurrency > 1` two specs can start in the same
|
|
4610
|
+
* millisecond and collide; a random suffix guarantees each spec gets its own
|
|
4611
|
+
* Chrome session and state never bleeds across parallel runs.
|
|
4612
|
+
*/
|
|
4613
|
+
function generateLiveSessionName() {
|
|
4614
|
+
return `ccqa-live-${buildRunId()}-${randomUUID().slice(0, 8)}`;
|
|
4367
4615
|
}
|
|
4368
4616
|
/**
|
|
4369
|
-
* Static prefix of the `ccqa run
|
|
4370
|
-
* reused across every step's invocation — the only piece that
|
|
4371
|
-
* step is the trailing "Your Task: <stepId>" section produced by
|
|
4372
|
-
* `
|
|
4617
|
+
* Static prefix of the `ccqa run` (live spec) system prompt. Built once per
|
|
4618
|
+
* run and reused across every step's invocation — the only piece that
|
|
4619
|
+
* changes per step is the trailing "Your Task: <stepId>" section produced by
|
|
4620
|
+
* `buildLiveSystemPromptStepSection`. Keeping the split here lets the prompt
|
|
4373
4621
|
* cache absorb the shared bulk and keeps each turn's prompt construction down
|
|
4374
4622
|
* to a small string concat.
|
|
4375
4623
|
*
|
|
@@ -4378,32 +4626,35 @@ function generateRunNdSessionName() {
|
|
|
4378
4626
|
* but never names a specific product, URL, account, role, or UI element.
|
|
4379
4627
|
* Project-specific guidance ("the admin tenant is foo.example", "session
|
|
4380
4628
|
* times out at X minutes", …) is appended from
|
|
4381
|
-
* `.ccqa/prompts/
|
|
4382
|
-
*
|
|
4629
|
+
* `.ccqa/prompts/live.user.md` (human-maintained) and
|
|
4630
|
+
* `.ccqa/prompts/live.agent.md` (updated by `ccqa run --update-agent-prompt`)
|
|
4631
|
+
* by the caller, so ccqa stays clean of downstream-product context.
|
|
4383
4632
|
*
|
|
4384
|
-
* Constraint posture: `ccqa
|
|
4385
|
-
* blocks `eval` / `@ref` / chained agent-browser invocations
|
|
4386
|
-
* trace outputs need to replay deterministically.
|
|
4387
|
-
* the model judges the step live — so those guards are off
|
|
4388
|
-
* told it may use any agent-browser subcommand and any
|
|
4389
|
-
|
|
4390
|
-
|
|
4633
|
+
* Constraint posture: `ccqa record` (trace) enforces a strict selector
|
|
4634
|
+
* whitelist and blocks `eval` / `@ref` / chained agent-browser invocations
|
|
4635
|
+
* because those trace outputs need to replay deterministically. Live specs
|
|
4636
|
+
* have no replay — the model judges the step live — so those guards are off
|
|
4637
|
+
* and the model is told it may use any agent-browser subcommand and any
|
|
4638
|
+
* selector strategy.
|
|
4639
|
+
*/
|
|
4640
|
+
function buildLiveSystemPromptPrefix(input) {
|
|
4391
4641
|
const stepsText = input.allSteps.map((s) => `### ${s.id} [${s.source}]
|
|
4392
4642
|
- **Instruction**: ${s.instruction}
|
|
4393
4643
|
- **Expected**: ${s.expected}`).join("\n\n");
|
|
4644
|
+
const stateLine = input.statePath ? `\n\nA pre-recorded auth-state file is provided at \`${input.statePath}\` (also in the env var \`CCQA_AB_STATE\`). **Always also pass \`--state "$CCQA_AB_STATE"\`** to every \`agent-browser\` command — this restores cookies and localStorage from a prior interactive login, so the user is already signed in to the application under test from step 1. The file is loaded read-only; do not run \`agent-browser state save\`.` : "";
|
|
4394
4645
|
return `You are a QA execution agent. You are executing ONE step of a browser-based end-to-end test and judging whether the step's expected outcome was achieved. You are NOT recording a replayable test script — be flexible, explore the DOM as needed, and make a clear pass / fail call at the end.
|
|
4395
4646
|
|
|
4396
4647
|
## Session
|
|
4397
4648
|
|
|
4398
4649
|
SESSION NAME: \`${input.sessionName}\`
|
|
4399
4650
|
|
|
4400
|
-
Always pass \`--session ${input.sessionName}\` to every \`agent-browser\` command. The session persists across steps within this test run, so the browser state from previous steps is already loaded when this turn starts
|
|
4651
|
+
Always pass \`--session ${input.sessionName}\` to every \`agent-browser\` command. The session persists across steps within this test run, so the browser state from previous steps is already loaded when this turn starts.${stateLine}
|
|
4401
4652
|
|
|
4402
4653
|
## Tools
|
|
4403
4654
|
|
|
4404
4655
|
You have:
|
|
4405
4656
|
|
|
4406
|
-
- **Bash** to run \`agent-browser\` (the full surface — \`open\`, \`snapshot\`, \`click\`, \`fill\`, \`press\`, \`wait\`, \`find\`, \`screenshot\`, \`eval\`, \`js\`, \`get\`, etc.). Any selector form is allowed: \`@ref\` (e.g. \`@e14\`), CSS selectors, \`text=...\`, \`[aria-label='...']\`, \`[data-testid='...']\`, bare tags inside \`find first/last/nth\` — whatever works for this single run. There is no replay contract to honour.
|
|
4657
|
+
- **Bash** to run \`agent-browser\` (the full surface — \`open\`, \`snapshot\`, \`click\`, \`fill\`, \`upload\`, \`press\`, \`wait\`, \`find\`, \`screenshot\`, \`eval\`, \`js\`, \`get\`, etc.). Any selector form is allowed: \`@ref\` (e.g. \`@e14\`), CSS selectors, \`text=...\`, \`[aria-label='...']\`, \`[data-testid='...']\`, bare tags inside \`find first/last/nth\` — whatever works for this single run. There is no replay contract to honour. For file inputs (\`<input type="file">\`) do NOT \`click\` the input — use \`agent-browser upload "<selector>" <path>\` so no OS file-picker dialog opens. Fixtures conventionally live under \`.ccqa/fixtures/\`; reference them via \`\${CCQA_FIXTURES_DIR}/<name>\`.
|
|
4407
4658
|
- **Read / Grep / Glob** for inspecting the application source code when you need to find a selector or understand routing. Read-only — do not modify source files.
|
|
4408
4659
|
|
|
4409
4660
|
## Test Specification
|
|
@@ -4456,7 +4707,7 @@ Everything else you write (narrative, tool output summaries, etc.) is fine — o
|
|
|
4456
4707
|
`;
|
|
4457
4708
|
}
|
|
4458
4709
|
/** Per-step trailer with the current step's instruction / expected. */
|
|
4459
|
-
function
|
|
4710
|
+
function buildLiveSystemPromptStepSection(step) {
|
|
4460
4711
|
return `
|
|
4461
4712
|
## Your Task: ${step.id}
|
|
4462
4713
|
|
|
@@ -4467,11 +4718,11 @@ Execute the instruction in the running browser session, then judge whether the e
|
|
|
4467
4718
|
`;
|
|
4468
4719
|
}
|
|
4469
4720
|
/** Per-turn user message — the system prompt already carries all spec context. */
|
|
4470
|
-
function
|
|
4721
|
+
function buildLiveUserPrompt(step) {
|
|
4471
4722
|
return `Execute step ${step.id} and emit your STEP_RESULT verdict as instructed in the system prompt.`;
|
|
4472
4723
|
}
|
|
4473
4724
|
//#endregion
|
|
4474
|
-
//#region src/runtime/
|
|
4725
|
+
//#region src/runtime/live-result-parse.ts
|
|
4475
4726
|
const MAX_REASON_LEN = 2e3;
|
|
4476
4727
|
/** Parse a single STEP_RESULT line. Returns null on malformed input. */
|
|
4477
4728
|
function parseStepResultLine(line) {
|
|
@@ -4501,7 +4752,7 @@ function findLastStepResult(text) {
|
|
|
4501
4752
|
//#region src/runtime/screenshot.ts
|
|
4502
4753
|
/**
|
|
4503
4754
|
* Take a PNG screenshot of the current page in the given agent-browser session
|
|
4504
|
-
* and write it to `outPath`. Used by `ccqa run
|
|
4755
|
+
* and write it to `outPath`. Used by `ccqa run` (live mode) to capture per-step
|
|
4505
4756
|
* artifacts (before / after the step's actions) so the human-readable run
|
|
4506
4757
|
* report has a visual trail even though no AB_ACTION stream is recorded.
|
|
4507
4758
|
*
|
|
@@ -4511,11 +4762,9 @@ function findLastStepResult(text) {
|
|
|
4511
4762
|
* artifact, not a reason to abort the test step.
|
|
4512
4763
|
*/
|
|
4513
4764
|
function takeScreenshot(sessionName, outPath, options) {
|
|
4514
|
-
const args = [
|
|
4515
|
-
|
|
4516
|
-
|
|
4517
|
-
"screenshot"
|
|
4518
|
-
];
|
|
4765
|
+
const args = ["--session", sessionName];
|
|
4766
|
+
if (options?.statePath) args.push("--state", options.statePath);
|
|
4767
|
+
args.push("screenshot");
|
|
4519
4768
|
if (options?.fullPage) args.push("--full");
|
|
4520
4769
|
args.push(outPath);
|
|
4521
4770
|
const res = spawnAB(args);
|
|
@@ -4530,10 +4779,10 @@ function takeScreenshot(sessionName, outPath, options) {
|
|
|
4530
4779
|
};
|
|
4531
4780
|
}
|
|
4532
4781
|
//#endregion
|
|
4533
|
-
//#region src/runtime/
|
|
4782
|
+
//#region src/runtime/live-executor.ts
|
|
4534
4783
|
/**
|
|
4535
|
-
* Run all spec steps once through Claude (
|
|
4536
|
-
*
|
|
4784
|
+
* Run all spec steps once through Claude (live mode). Each step is one Claude
|
|
4785
|
+
* invocation that:
|
|
4537
4786
|
* 1. takes a "before" screenshot of the live session
|
|
4538
4787
|
* 2. lets Claude execute the step's instruction via agent-browser (full
|
|
4539
4788
|
* surface, no replay-time selector constraints)
|
|
@@ -4544,20 +4793,23 @@ function takeScreenshot(sessionName, outPath, options) {
|
|
|
4544
4793
|
* the overall run status flips to `failed`. The Chrome session persists
|
|
4545
4794
|
* across steps so step N+1 starts on whatever page step N left the browser on.
|
|
4546
4795
|
*/
|
|
4547
|
-
async function
|
|
4796
|
+
async function runLiveExecutor(input) {
|
|
4548
4797
|
const startedAt = /* @__PURE__ */ new Date();
|
|
4549
4798
|
const stepResults = [];
|
|
4550
4799
|
let overallFailed = false;
|
|
4551
|
-
const
|
|
4800
|
+
const statePath = input.statePath ?? null;
|
|
4801
|
+
const promptPrefix = buildLiveSystemPromptPrefix({
|
|
4552
4802
|
title: input.spec.title,
|
|
4553
4803
|
allSteps: input.steps,
|
|
4554
|
-
sessionName: input.sessionName
|
|
4804
|
+
sessionName: input.sessionName,
|
|
4805
|
+
statePath
|
|
4555
4806
|
});
|
|
4556
4807
|
const suffixBlock = input.systemPromptSuffix ? `\n## Project-specific guidance\n\n${input.systemPromptSuffix}\n` : "";
|
|
4557
4808
|
const langDirective = languageDirective(input.language);
|
|
4558
4809
|
const invokeBase = agentBrowserInvokeBase({
|
|
4559
4810
|
sessionName: input.sessionName,
|
|
4560
|
-
runId: input.runId
|
|
4811
|
+
runId: input.runId,
|
|
4812
|
+
statePath
|
|
4561
4813
|
});
|
|
4562
4814
|
const retries = Math.max(0, input.retries ?? 0);
|
|
4563
4815
|
for (let i = 0; i < input.steps.length; i++) {
|
|
@@ -4571,8 +4823,8 @@ async function runNdExecutor(input) {
|
|
|
4571
4823
|
const paths = stepArtifactPaths(input.runDir, step$1.id);
|
|
4572
4824
|
await ensureDir(paths.beforePng);
|
|
4573
4825
|
const stepStartedAt = Date.now();
|
|
4574
|
-
const systemPrompt = promptPrefix +
|
|
4575
|
-
const userPrompt =
|
|
4826
|
+
const systemPrompt = promptPrefix + buildLiveSystemPromptStepSection(step$1) + suffixBlock + langDirective;
|
|
4827
|
+
const userPrompt = buildLiveUserPrompt(step$1);
|
|
4576
4828
|
let attempt = 0;
|
|
4577
4829
|
let lastOutcome = null;
|
|
4578
4830
|
while (attempt <= retries) {
|
|
@@ -4602,7 +4854,7 @@ async function runNdExecutor(input) {
|
|
|
4602
4854
|
}
|
|
4603
4855
|
}
|
|
4604
4856
|
async function executeStepAttempt(step, paths, systemPrompt, userPrompt) {
|
|
4605
|
-
const before = takeScreenshot(input.sessionName, paths.beforePng);
|
|
4857
|
+
const before = takeScreenshot(input.sessionName, paths.beforePng, { statePath });
|
|
4606
4858
|
if (!before.ok) warn(`screenshot (before, ${step.id}) failed: ${before.error}`);
|
|
4607
4859
|
const transcriptParts = [];
|
|
4608
4860
|
let isError = false;
|
|
@@ -4634,7 +4886,10 @@ async function runNdExecutor(input) {
|
|
|
4634
4886
|
transcriptParts.push(`[ccqa] invokeClaudeStreaming threw: ${err instanceof Error ? err.message : String(err)}`);
|
|
4635
4887
|
}
|
|
4636
4888
|
const transcript = transcriptParts.join("\n");
|
|
4637
|
-
const after = takeScreenshot(input.sessionName, paths.afterPng, {
|
|
4889
|
+
const after = takeScreenshot(input.sessionName, paths.afterPng, {
|
|
4890
|
+
fullPage: true,
|
|
4891
|
+
statePath
|
|
4892
|
+
});
|
|
4638
4893
|
if (!after.ok) warn(`screenshot (after, ${step.id}) failed: ${after.error}`);
|
|
4639
4894
|
await writeFile(paths.logTxt, transcript || "(no assistant text captured)", "utf-8");
|
|
4640
4895
|
const { status, reasoning } = judgeStepOutcome({
|
|
@@ -4750,24 +5005,24 @@ function truncateForLog$1(s) {
|
|
|
4750
5005
|
return oneLine.length > 100 ? oneLine.slice(0, 100) + "…" : oneLine;
|
|
4751
5006
|
}
|
|
4752
5007
|
//#endregion
|
|
4753
|
-
//#region src/report/
|
|
5008
|
+
//#region src/report/live-adapter.ts
|
|
4754
5009
|
/**
|
|
4755
|
-
* Convert one
|
|
4756
|
-
* `ReportSpecResult` shape consumed by `renderRunReport`.
|
|
4757
|
-
* does two non-trivial things:
|
|
5010
|
+
* Convert one live-mode (`mode: live`) execution result into the
|
|
5011
|
+
* persistence-layer `ReportSpecResult` shape consumed by `renderRunReport`.
|
|
5012
|
+
* The conversion does two non-trivial things:
|
|
4758
5013
|
*
|
|
4759
5014
|
* - rewrites the executor's absolute `beforePng`/`afterPng` paths as
|
|
4760
5015
|
* `reportDir`-relative hrefs so the rendered HTML opens its PNGs
|
|
4761
5016
|
* directly when the report dir + the run dir are downloaded together
|
|
4762
5017
|
* as a CI artifact bundle
|
|
4763
5018
|
* - nulls out every vitest-only field so the report renderer falls
|
|
4764
|
-
* through to its `
|
|
5019
|
+
* through to its `liveRun` branch
|
|
4765
5020
|
*
|
|
4766
5021
|
* Lives in `src/report/` (not the CLI) because the relative-path contract
|
|
4767
|
-
* on `
|
|
5022
|
+
* on `LiveReportStep.beforePng`/`afterPng` is a report-layer invariant,
|
|
4768
5023
|
* documented next to the schema, and the CLI should not own it.
|
|
4769
5024
|
*/
|
|
4770
|
-
function
|
|
5025
|
+
function liveRunToReportResult(args) {
|
|
4771
5026
|
const { featureName, specName, specYaml, result, reportDir } = args;
|
|
4772
5027
|
const steps = result.steps.map((s) => ({
|
|
4773
5028
|
stepId: s.stepId,
|
|
@@ -4781,7 +5036,7 @@ function ndRunToReportResult(args) {
|
|
|
4781
5036
|
durationMs: s.durationMs,
|
|
4782
5037
|
cost: { ...s.cost }
|
|
4783
5038
|
}));
|
|
4784
|
-
const
|
|
5039
|
+
const liveRun = {
|
|
4785
5040
|
runId: result.runId,
|
|
4786
5041
|
sessionName: result.sessionName,
|
|
4787
5042
|
startedAt: result.startedAt,
|
|
@@ -4804,16 +5059,16 @@ function ndRunToReportResult(args) {
|
|
|
4804
5059
|
diffExcerpt: null,
|
|
4805
5060
|
specYaml,
|
|
4806
5061
|
evidence: null,
|
|
4807
|
-
|
|
5062
|
+
liveRun
|
|
4808
5063
|
};
|
|
4809
5064
|
}
|
|
4810
5065
|
function relativeIfPresent(absPath, reportDir) {
|
|
4811
5066
|
return absPath === null ? null : relative(reportDir, absPath);
|
|
4812
5067
|
}
|
|
4813
5068
|
//#endregion
|
|
4814
|
-
//#region src/cli/run-
|
|
5069
|
+
//#region src/cli/run-live.ts
|
|
4815
5070
|
/**
|
|
4816
|
-
* Run pre-filtered `mode: live` specs through `
|
|
5071
|
+
* Run pre-filtered `mode: live` specs through `runLiveExecutor` (Claude +
|
|
4817
5072
|
* agent-browser) and, when `reportDir` is set, run drift audit + failure
|
|
4818
5073
|
* analysis to produce report rows. Sibling of `runDeterministicSpecs`.
|
|
4819
5074
|
*/
|
|
@@ -4825,24 +5080,25 @@ async function runLiveSpecs(specs, opts) {
|
|
|
4825
5080
|
const cwd = opts.cwd ?? process.cwd();
|
|
4826
5081
|
await preflightAgentBrowserCommand();
|
|
4827
5082
|
meta("live-specs", specs.length);
|
|
4828
|
-
const
|
|
4829
|
-
if (
|
|
4830
|
-
const
|
|
4831
|
-
|
|
4832
|
-
|
|
4833
|
-
const label = `${featureName}/${specName}`;
|
|
4834
|
-
|
|
4835
|
-
|
|
4836
|
-
|
|
4837
|
-
|
|
4838
|
-
|
|
4839
|
-
|
|
4840
|
-
|
|
4841
|
-
|
|
4842
|
-
|
|
4843
|
-
|
|
4844
|
-
|
|
4845
|
-
|
|
5083
|
+
const userPromptBundle = await loadLivePromptBundle(cwd);
|
|
5084
|
+
if (userPromptBundle !== null) meta("prompt", userPromptBundle.loaded.join(" + "));
|
|
5085
|
+
const userPromptSuffix = userPromptBundle?.text ?? null;
|
|
5086
|
+
const concurrency = Math.max(1, opts.concurrency ?? 1);
|
|
5087
|
+
const runs = await runPool(specs, concurrency, (spec, i) => {
|
|
5088
|
+
const label = `${spec.featureName}/${spec.specName}`;
|
|
5089
|
+
return withBuffer(label, concurrency > 1, () => {
|
|
5090
|
+
if (concurrency === 1 && specs.length > 1) {
|
|
5091
|
+
blank();
|
|
5092
|
+
info(`[${i + 1}/${specs.length}] ${label}`);
|
|
5093
|
+
}
|
|
5094
|
+
return runOneSpec({
|
|
5095
|
+
...spec,
|
|
5096
|
+
opts,
|
|
5097
|
+
userPromptSuffix,
|
|
5098
|
+
cwd
|
|
5099
|
+
});
|
|
5100
|
+
});
|
|
5101
|
+
});
|
|
4846
5102
|
const failedCount = runs.filter((r) => r.kind === "error" || r.kind === "run" && r.result.status === "failed").length;
|
|
4847
5103
|
blank();
|
|
4848
5104
|
meta("live-summary", `${runs.length - failedCount} passed / ${failedCount} failed`);
|
|
@@ -4859,7 +5115,7 @@ function buildLiveReportResults(runs, driftBySpec, analysisBySpec, reportDir, fa
|
|
|
4859
5115
|
if (r.kind !== "run") return [];
|
|
4860
5116
|
const key = `${r.featureName}/${r.specName}`;
|
|
4861
5117
|
return [{
|
|
4862
|
-
...
|
|
5118
|
+
...liveRunToReportResult({
|
|
4863
5119
|
featureName: r.featureName,
|
|
4864
5120
|
specName: r.specName,
|
|
4865
5121
|
specYaml: r.specYaml,
|
|
@@ -4889,7 +5145,7 @@ function analysisFieldsFor(a, status, failureAnalysisEnabled) {
|
|
|
4889
5145
|
/**
|
|
4890
5146
|
* Run `analyzeDrift` against every successfully-loaded spec and return a
|
|
4891
5147
|
* `featureName/specName → driftIssues` map. Drift findings are advisory —
|
|
4892
|
-
* they show in the HTML report but do not change the run
|
|
5148
|
+
* they show in the HTML report but do not change the live-run exit code.
|
|
4893
5149
|
*/
|
|
4894
5150
|
async function runDriftAudit(runs, opts, cwd) {
|
|
4895
5151
|
const targets = runs.filter((r) => r.kind === "run").map((r) => ({
|
|
@@ -4939,18 +5195,36 @@ async function runOneSpec(args) {
|
|
|
4939
5195
|
meta("steps", expanded.length);
|
|
4940
5196
|
const includes = collectIncludedBlockNames(spec);
|
|
4941
5197
|
if (includes.length > 0) meta("blocks", includes.join(", "));
|
|
4942
|
-
const sessionName =
|
|
5198
|
+
const sessionName = generateLiveSessionName();
|
|
4943
5199
|
meta("session", sessionName);
|
|
5200
|
+
let statePath = null;
|
|
5201
|
+
if (spec.statePath) {
|
|
5202
|
+
statePath = isAbsolute(spec.statePath) ? spec.statePath : resolve(cwd, spec.statePath);
|
|
5203
|
+
try {
|
|
5204
|
+
await access(statePath);
|
|
5205
|
+
} catch {
|
|
5206
|
+
const msg = `spec.statePath points to a missing file: ${statePath}`;
|
|
5207
|
+
error(msg);
|
|
5208
|
+
return {
|
|
5209
|
+
kind: "error",
|
|
5210
|
+
featureName,
|
|
5211
|
+
specName,
|
|
5212
|
+
error: msg
|
|
5213
|
+
};
|
|
5214
|
+
}
|
|
5215
|
+
meta("state", statePath);
|
|
5216
|
+
}
|
|
4944
5217
|
const runId = buildRunId();
|
|
4945
5218
|
const runDir = opts.out ?? join(specDir, "runs", runId);
|
|
4946
5219
|
await mkdir(runDir, { recursive: true });
|
|
4947
5220
|
meta("runDir", runDir);
|
|
4948
|
-
const result = await
|
|
5221
|
+
const result = await runLiveExecutor({
|
|
4949
5222
|
spec: { title: spec.title },
|
|
4950
5223
|
steps: expanded,
|
|
4951
5224
|
runId,
|
|
4952
5225
|
runDir,
|
|
4953
5226
|
sessionName,
|
|
5227
|
+
statePath,
|
|
4954
5228
|
systemPromptSuffix: userPromptSuffix,
|
|
4955
5229
|
model: opts.model,
|
|
4956
5230
|
language: opts.language,
|
|
@@ -4963,7 +5237,7 @@ async function runOneSpec(args) {
|
|
|
4963
5237
|
meta("saved", runJsonPath);
|
|
4964
5238
|
meta("status", result.status.toUpperCase());
|
|
4965
5239
|
meta("step-summary", `${count(result.steps, "passed")} passed / ${count(result.steps, "failed")} failed / ${count(result.steps, "skipped")} skipped`);
|
|
4966
|
-
const costLine =
|
|
5240
|
+
const costLine = formatLiveCost(result.cost, { compact: false });
|
|
4967
5241
|
if (costLine) meta("cost", costLine);
|
|
4968
5242
|
return {
|
|
4969
5243
|
kind: "run",
|
|
@@ -4975,7 +5249,7 @@ async function runOneSpec(args) {
|
|
|
4975
5249
|
};
|
|
4976
5250
|
}
|
|
4977
5251
|
function logBatchCost(runs) {
|
|
4978
|
-
const line =
|
|
5252
|
+
const line = formatLiveBatchCost(runs.flatMap((r) => r.kind === "run" ? [r.result.cost] : []));
|
|
4979
5253
|
if (line) meta("total-cost", line);
|
|
4980
5254
|
}
|
|
4981
5255
|
/**
|
|
@@ -5005,7 +5279,7 @@ async function runFailureAnalysisForLiveRuns(runs, driftBySpec, opts, cwd) {
|
|
|
5005
5279
|
for (const r of failed) {
|
|
5006
5280
|
const key = `${r.featureName}/${r.specName}`;
|
|
5007
5281
|
info(`failure analysis: ${key}`);
|
|
5008
|
-
const excerpt = await
|
|
5282
|
+
const excerpt = await buildLiveTranscriptExcerpt(r.result);
|
|
5009
5283
|
if (excerpt === null) {
|
|
5010
5284
|
out.set(key, {
|
|
5011
5285
|
analysis: null,
|
|
@@ -5016,7 +5290,7 @@ async function runFailureAnalysisForLiveRuns(runs, driftBySpec, opts, cwd) {
|
|
|
5016
5290
|
continue;
|
|
5017
5291
|
}
|
|
5018
5292
|
const outcome = await analyzeFailure({
|
|
5019
|
-
|
|
5293
|
+
liveTranscriptExcerpt: excerpt,
|
|
5020
5294
|
specYaml: r.specYaml,
|
|
5021
5295
|
diffPatch: diff.ok ? diff.diff.patch : null,
|
|
5022
5296
|
changedFiles: diff.ok ? diff.diff.nameStatus : null,
|
|
@@ -5067,6 +5341,100 @@ function oneLine(s) {
|
|
|
5067
5341
|
return s.replace(/\s+/g, " ").trim();
|
|
5068
5342
|
}
|
|
5069
5343
|
//#endregion
|
|
5344
|
+
//#region src/prompts/agent-update.ts
|
|
5345
|
+
function buildAgentUpdateSystemPrompt(input) {
|
|
5346
|
+
const modeLabel = input.mode === "live" ? "live (Claude drives every step at run time)" : "record (Claude records browser actions for vitest replay)";
|
|
5347
|
+
const userMdLabel = `${input.mode}.user.md`;
|
|
5348
|
+
const agentMdLabel = `${input.mode}.agent.md`;
|
|
5349
|
+
return `You maintain the auto-learned half of ccqa's prompt bundle for ${modeLabel}.
|
|
5350
|
+
|
|
5351
|
+
${outputLanguageBlock(input.language ?? "auto", "the bullet text", "headings, agent-browser subcommand names, selector tokens")}## What you are updating
|
|
5352
|
+
|
|
5353
|
+
\`.ccqa/prompts/${agentMdLabel}\` is appended to ccqa's system prompt for every ${input.mode === "live" ? "step of every `mode: live` spec" : "trace run of `ccqa record`"}. It is meant to capture **stable lessons learned from past runs** — concrete selectors that worked, login flow quirks the agent kept tripping on, common "this is fine" warnings to ignore.
|
|
5354
|
+
|
|
5355
|
+
The sibling file \`${userMdLabel}\` carries human-maintained project guidance (URLs, naming conventions). Rules already well-covered by \`${userMdLabel}\` should NOT be repeated here.
|
|
5356
|
+
|
|
5357
|
+
## Output rules
|
|
5358
|
+
|
|
5359
|
+
- Emit the COMPLETE replacement contents of \`${agentMdLabel}\`.
|
|
5360
|
+
- Concise bullet points. No narrative paragraphs. No preamble. No closing summary.
|
|
5361
|
+
- Each bullet is a single declarative sentence (or one bullet → one short selector / command).
|
|
5362
|
+
- Group related bullets under \`### …\` subheaders.
|
|
5363
|
+
- Skip everything that was already true and well-covered by the previous file or \`${userMdLabel}\`. Only persist new lessons.
|
|
5364
|
+
- Keep the whole file under ~3 KB.
|
|
5365
|
+
- Output ONLY the new file contents. NO code fences. NO surrounding prose. NO markdown frontmatter.
|
|
5366
|
+
- If the run summary contains nothing worth learning from, output the previous file unchanged.
|
|
5367
|
+
`;
|
|
5368
|
+
}
|
|
5369
|
+
function buildAgentUpdateUserPrompt(input) {
|
|
5370
|
+
const agentMdLabel = `${input.mode}.agent.md`;
|
|
5371
|
+
return `## Previous \`${agentMdLabel}\`
|
|
5372
|
+
|
|
5373
|
+
${input.currentAgentMd && input.currentAgentMd.trim().length > 0 ? input.currentAgentMd : "(no existing file — this will create one)"}
|
|
5374
|
+
|
|
5375
|
+
## Run summary
|
|
5376
|
+
|
|
5377
|
+
${input.runSummary}
|
|
5378
|
+
|
|
5379
|
+
## Your task
|
|
5380
|
+
|
|
5381
|
+
Write the new contents of \`${agentMdLabel}\`. Output ONLY the file contents — no preamble, no fences, no closing note.`;
|
|
5382
|
+
}
|
|
5383
|
+
//#endregion
|
|
5384
|
+
//#region src/cli/update-agent-prompt.ts
|
|
5385
|
+
/**
|
|
5386
|
+
* Refresh `.ccqa/prompts/<mode>.agent.md` from the latest run.
|
|
5387
|
+
*
|
|
5388
|
+
* Reads the existing file (if any) and a caller-supplied run summary, sends
|
|
5389
|
+
* both to Claude, and writes the response back over the agent prompt file.
|
|
5390
|
+
* Degrades gracefully when auth is missing — logs and returns — so the run
|
|
5391
|
+
* exit code is unaffected by this opt-in side step.
|
|
5392
|
+
*/
|
|
5393
|
+
async function updateAgentPrompt(args) {
|
|
5394
|
+
const { mode, runSummary, cwd, model, language } = args;
|
|
5395
|
+
const agentMdPath = join(cwd, ".ccqa", "prompts", `${mode}.agent.md`);
|
|
5396
|
+
const relPath = relative(cwd, agentMdPath);
|
|
5397
|
+
const auth = driftAuthAvailable();
|
|
5398
|
+
if (!auth.ok) {
|
|
5399
|
+
warn(`--update-agent-prompt skipped (${auth.reason})`);
|
|
5400
|
+
return;
|
|
5401
|
+
}
|
|
5402
|
+
const promptInput = {
|
|
5403
|
+
mode,
|
|
5404
|
+
currentAgentMd: await readFile(agentMdPath, "utf-8").catch(() => null),
|
|
5405
|
+
runSummary,
|
|
5406
|
+
...language ? { language } : {}
|
|
5407
|
+
};
|
|
5408
|
+
const systemPrompt = buildAgentUpdateSystemPrompt(promptInput);
|
|
5409
|
+
const userPrompt = buildAgentUpdateUserPrompt(promptInput);
|
|
5410
|
+
info(`--update-agent-prompt: refreshing ${relPath}`);
|
|
5411
|
+
const { result, isError } = await invokeClaudeStreaming({
|
|
5412
|
+
prompt: userPrompt,
|
|
5413
|
+
systemPrompt,
|
|
5414
|
+
allowedTools: [],
|
|
5415
|
+
disableBuiltinTools: true,
|
|
5416
|
+
...model ? { model } : {}
|
|
5417
|
+
}, () => {});
|
|
5418
|
+
if (isError || !result || result.trim().length === 0) {
|
|
5419
|
+
warn(`--update-agent-prompt: Claude returned no usable output${isError ? " (SDK error)" : ""}; leaving ${relPath} unchanged`);
|
|
5420
|
+
return;
|
|
5421
|
+
}
|
|
5422
|
+
const newText = stripCodeFences(result.trim()) + "\n";
|
|
5423
|
+
await mkdir(dirname(agentMdPath), { recursive: true });
|
|
5424
|
+
await writeFile(agentMdPath, newText, "utf-8");
|
|
5425
|
+
info(`--update-agent-prompt: wrote ${relPath} (${newText.length} bytes)`);
|
|
5426
|
+
info(`--update-agent-prompt: review the diff with: git diff -- "${relPath}"`);
|
|
5427
|
+
}
|
|
5428
|
+
/**
|
|
5429
|
+
* Some models still wrap the answer in a ```markdown fence despite the
|
|
5430
|
+
* system prompt asking otherwise. Strip a single outer fence when present so
|
|
5431
|
+
* the saved file is clean.
|
|
5432
|
+
*/
|
|
5433
|
+
function stripCodeFences(text) {
|
|
5434
|
+
const m = text.match(/^```[a-zA-Z]*\n([\s\S]*?)\n```\s*$/);
|
|
5435
|
+
return m && m[1] !== void 0 ? m[1] : text;
|
|
5436
|
+
}
|
|
5437
|
+
//#endregion
|
|
5070
5438
|
//#region src/cli/changed-specs.ts
|
|
5071
5439
|
/**
|
|
5072
5440
|
* Filter specs to those affected by the git diff against the resolved base
|
|
@@ -5122,28 +5490,57 @@ async function resolveVitestConfig(cwd) {
|
|
|
5122
5490
|
return bundledVitestConfigPath();
|
|
5123
5491
|
}
|
|
5124
5492
|
}
|
|
5125
|
-
const runCommand = addLanguageOption(new Command("run").argument("[
|
|
5493
|
+
const runCommand = addProfileOption(addLanguageOption(new Command("run").argument("[targets...]", "Specs to run, space-separated: each '<feature>/<spec>', '<feature>', or omit for all. Duplicates are de-duped.").description("Run specs. Each spec's execution mode comes from its spec.yaml `mode:` field (default deterministic; set `mode: live` to have Claude drive agent-browser live per step). Deterministic specs replay the recorded test.spec.ts under vitest. Pass --report to write one unified HTML report covering both modes.").option("--report [dir]", `Write a self-contained HTML run report (failure analysis + drift audit by default). Default dir: ${DEFAULT_REPORT_DIR}/`).option("--changed", "Restrict execution to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). Cannot be combined with an explicit spec id.").option("--no-failure-analysis", "Skip the per-failure root-cause classification (TEST_DRIFT / SPEC_CHANGE / PRODUCT_BUG). --report only.").option("--no-drift-audit", "Skip the spec↔code drift audit shown in the report. --report only.").option("--base <ref>", "Base ref the source diff is taken against for failure analysis (default: GITHUB_BASE_REF, then origin/main).").option("--cwd <path>", "Working directory containing the .ccqa/ tree (monorepo support). Defaults to the current directory.").option("--format <fmt>", "Additional output format alongside HTML when --report is set: 'text' (default), 'json' (writes report.json), 'github' (GitHub Actions annotations on stdout).", (raw) => {
|
|
5126
5494
|
if (REPORT_FORMATS.includes(raw)) return raw;
|
|
5127
5495
|
throw new Error(`--format must be one of ${REPORT_FORMATS.join(" | ")}`);
|
|
5128
5496
|
}, "text").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--no-evidence", `(deterministic only) Skip step-boundary evidence capture (PNG + meta JSON written to ${DEFAULT_REPORT_DIR}/${EVIDENCE_SUBDIR}/ by default).`).option("--retry <n>", "(live only) Retry each failed step up to N more times before recording failure. Default 0.", (raw) => {
|
|
5129
5497
|
const n = Number(raw);
|
|
5130
5498
|
if (!Number.isFinite(n) || n < 0 || Math.floor(n) !== n) throw new Error(`--retry must be a non-negative integer, got "${raw}"`);
|
|
5131
5499
|
return n;
|
|
5132
|
-
}, 0).option("--out <dir>", "(live only) Override the per-spec artifact directory. Default: <specDir>/runs/<runId>. Ignored when running multiple specs.")).action(async (
|
|
5133
|
-
await runDispatcher(
|
|
5500
|
+
}, 0).option("--out <dir>", "(live only) Override the per-spec artifact directory. Default: <specDir>/runs/<runId>. Ignored when running multiple specs.").option("--update-agent-prompt", "(live only) After the run finishes, ask Claude to refresh .ccqa/prompts/live.agent.md from a summary of the run.").option("--concurrency <n>", "Run up to N specs in parallel within each mode (deterministic / live). Default 1 (sequential). Live specs each get an isolated agent-browser session; high values spawn many headed Chrome instances.", parseConcurrency$1, 1))).action(async (targets, opts) => {
|
|
5501
|
+
await runDispatcher(targets, opts);
|
|
5134
5502
|
});
|
|
5503
|
+
/** Parse --concurrency: a positive integer. Rejects 0, negatives, non-integers. */
|
|
5504
|
+
function parseConcurrency$1(raw) {
|
|
5505
|
+
const n = Number(raw);
|
|
5506
|
+
if (!Number.isInteger(n) || n < 1) {
|
|
5507
|
+
error(`invalid --concurrency: ${raw} (expected positive integer)`);
|
|
5508
|
+
process.exit(2);
|
|
5509
|
+
}
|
|
5510
|
+
return n;
|
|
5511
|
+
}
|
|
5135
5512
|
function resolveReportDir(report, cwd) {
|
|
5136
5513
|
if (report === void 0 || report === false) return void 0;
|
|
5137
5514
|
return resolve(cwd, typeof report === "string" ? report : DEFAULT_REPORT_DIR);
|
|
5138
5515
|
}
|
|
5139
|
-
|
|
5140
|
-
|
|
5141
|
-
if (
|
|
5516
|
+
/** Header label shown after `ccqa run`: the lone target, a count, or a mode marker. */
|
|
5517
|
+
function headerTarget(targets, opts) {
|
|
5518
|
+
if (targets.length === 1) return targets[0];
|
|
5519
|
+
if (targets.length > 1) return `${targets.length} targets`;
|
|
5520
|
+
return opts.changed ? "(changed)" : "(all specs)";
|
|
5521
|
+
}
|
|
5522
|
+
/** De-dupe by `featureName/specName`, keeping first-seen order. */
|
|
5523
|
+
function dedupeSpecs(specs) {
|
|
5524
|
+
const seen = /* @__PURE__ */ new Set();
|
|
5525
|
+
const out = [];
|
|
5526
|
+
for (const s of specs) {
|
|
5527
|
+
const key = `${s.featureName}/${s.specName}`;
|
|
5528
|
+
if (seen.has(key)) continue;
|
|
5529
|
+
seen.add(key);
|
|
5530
|
+
out.push(s);
|
|
5531
|
+
}
|
|
5532
|
+
return out;
|
|
5533
|
+
}
|
|
5534
|
+
async function runDispatcher(targets, opts) {
|
|
5535
|
+
header("run", headerTarget(targets, opts));
|
|
5536
|
+
if (opts.changed && targets.length > 0) {
|
|
5142
5537
|
error("--changed and an explicit spec target cannot be combined");
|
|
5143
5538
|
process.exit(2);
|
|
5144
5539
|
}
|
|
5145
5540
|
const cwd = resolveCwd(opts.cwd);
|
|
5146
|
-
|
|
5541
|
+
await applyProfileFromOption(opts.profile, cwd);
|
|
5542
|
+
const enumerateAll = () => listAllSpecsWithSpecFile(cwd);
|
|
5543
|
+
let specs = dedupeSpecs((await Promise.all((targets.length ? targets : [void 0]).map((t) => resolveSpecTargets(t, enumerateAll, cwd)))).flat());
|
|
5147
5544
|
if (opts.changed) {
|
|
5148
5545
|
const before = specs.length;
|
|
5149
5546
|
specs = await collectChangedSpecs(specs, {
|
|
@@ -5163,7 +5560,8 @@ async function runDispatcher(target, opts) {
|
|
|
5163
5560
|
if (liveSpecs.length === 0) {
|
|
5164
5561
|
if (typeof opts.retry === "number" && opts.retry > 0) warn("--retry is ignored without any 'mode: live' spec");
|
|
5165
5562
|
if (opts.out) warn("--out is ignored without any 'mode: live' spec");
|
|
5166
|
-
|
|
5563
|
+
if (opts.updateAgentPrompt) warn("--update-agent-prompt is ignored without any 'mode: live' spec");
|
|
5564
|
+
} else if (opts.out && liveSpecs.length > 1) warn("--out is ignored when running multiple live specs");
|
|
5167
5565
|
if (detSpecs.length === 0 && opts.evidence === false) warn("--no-evidence is ignored without any 'mode: deterministic' spec");
|
|
5168
5566
|
blank();
|
|
5169
5567
|
const reportDir = resolveReportDir(opts.report, cwd);
|
|
@@ -5172,11 +5570,12 @@ async function runDispatcher(target, opts) {
|
|
|
5172
5570
|
const live = await runLiveSpecs(liveSpecs, {
|
|
5173
5571
|
...opts.model ? { model: opts.model } : {},
|
|
5174
5572
|
...opts.language ? { language: opts.language } : {},
|
|
5175
|
-
...opts.out ? { out: opts.out } : {},
|
|
5573
|
+
...opts.out && liveSpecs.length === 1 ? { out: opts.out } : {},
|
|
5176
5574
|
cwd,
|
|
5177
5575
|
...opts.base ? { base: opts.base } : {},
|
|
5178
5576
|
...reportDir ? { reportDir } : {},
|
|
5179
5577
|
...typeof opts.retry === "number" ? { retry: opts.retry } : {},
|
|
5578
|
+
concurrency: opts.concurrency ?? 1,
|
|
5180
5579
|
...reportDir && opts.driftAudit !== false ? { driftAudit: true } : {},
|
|
5181
5580
|
...reportDir && opts.failureAnalysis === false ? { failureAnalysis: false } : {}
|
|
5182
5581
|
});
|
|
@@ -5192,9 +5591,39 @@ async function runDispatcher(target, opts) {
|
|
|
5192
5591
|
opts
|
|
5193
5592
|
});
|
|
5194
5593
|
}
|
|
5594
|
+
if (opts.updateAgentPrompt && liveSpecs.length > 0) {
|
|
5595
|
+
blank();
|
|
5596
|
+
await updateAgentPrompt({
|
|
5597
|
+
mode: "live",
|
|
5598
|
+
runSummary: buildLiveRunSummary(live.reportResults),
|
|
5599
|
+
cwd,
|
|
5600
|
+
...opts.model ? { model: opts.model } : {},
|
|
5601
|
+
...opts.language ? { language: opts.language } : {}
|
|
5602
|
+
});
|
|
5603
|
+
}
|
|
5195
5604
|
process.exit(overallExitCode);
|
|
5196
5605
|
}
|
|
5197
5606
|
/**
|
|
5607
|
+
* Compact, prompt-friendly summary of one ccqa run for the live agent-prompt
|
|
5608
|
+
* update step. One section per spec: header line + per-step verdicts.
|
|
5609
|
+
* Kept to a few KB even with many specs/steps so the prompt cache can absorb
|
|
5610
|
+
* the bulk.
|
|
5611
|
+
*/
|
|
5612
|
+
function buildLiveRunSummary(results) {
|
|
5613
|
+
const sections = [];
|
|
5614
|
+
for (const r of results) {
|
|
5615
|
+
if (!r.liveRun) continue;
|
|
5616
|
+
const head = `## ${r.feature}/${r.spec} — ${r.status}`;
|
|
5617
|
+
const steps = r.liveRun.steps.map((s) => `- [${s.status}] ${s.stepId}: ${oneLineSummary$1(s.reasoning)}`).join("\n");
|
|
5618
|
+
sections.push(`${head}\n${steps}`);
|
|
5619
|
+
}
|
|
5620
|
+
return sections.length === 0 ? "(no live runs executed)" : sections.join("\n\n");
|
|
5621
|
+
}
|
|
5622
|
+
function oneLineSummary$1(s) {
|
|
5623
|
+
const flat = s.replace(/\s+/g, " ").trim();
|
|
5624
|
+
return flat.length > 240 ? flat.slice(0, 240) + "…" : flat || "(no reason given)";
|
|
5625
|
+
}
|
|
5626
|
+
/**
|
|
5198
5627
|
* Run pre-filtered deterministic specs under vitest. Empty input is a no-op.
|
|
5199
5628
|
* Captures step-boundary evidence under `<reportDir>/evidence/<feature>/<spec>/`
|
|
5200
5629
|
* when enabled.
|
|
@@ -5205,72 +5634,83 @@ async function runDeterministicSpecs(specs, opts, cwd, reportDirAbs) {
|
|
|
5205
5634
|
exitCode: 0
|
|
5206
5635
|
};
|
|
5207
5636
|
const tmpDir = await mkdtemp(join(tmpdir(), "ccqa-run-"));
|
|
5208
|
-
const summaries = [];
|
|
5209
|
-
let exitCode = 0;
|
|
5210
5637
|
const vitestConfig = await resolveVitestConfig(cwd);
|
|
5211
5638
|
const captureOutput = Boolean(opts.report);
|
|
5212
5639
|
const evidenceRoot = opts.evidence !== false ? join(reportDirAbs, EVIDENCE_SUBDIR) : null;
|
|
5640
|
+
const concurrency = Math.max(1, opts.concurrency ?? 1);
|
|
5641
|
+
const ctx = {
|
|
5642
|
+
cwd,
|
|
5643
|
+
tmpDir,
|
|
5644
|
+
vitestConfig,
|
|
5645
|
+
captureOutput,
|
|
5646
|
+
evidenceRoot
|
|
5647
|
+
};
|
|
5213
5648
|
try {
|
|
5214
|
-
|
|
5215
|
-
const { featureName, specName } = specs[i];
|
|
5216
|
-
const scriptFile = await getTestScript(featureName, specName, cwd);
|
|
5217
|
-
if (!scriptFile) {
|
|
5218
|
-
warn(`${featureName}/${specName}: no test.spec.ts found`);
|
|
5219
|
-
hint("run 'ccqa record <feature>/<spec>' to record it, or set 'mode: live' in spec.yaml");
|
|
5220
|
-
continue;
|
|
5221
|
-
}
|
|
5222
|
-
run(`${featureName}/${specName}`);
|
|
5223
|
-
meta("test", scriptFile);
|
|
5224
|
-
blank();
|
|
5225
|
-
const reportFile = join(tmpDir, `report-${i}.json`);
|
|
5226
|
-
const evidenceDir = evidenceRoot ? join(evidenceRoot, featureName, specName) : null;
|
|
5227
|
-
if (evidenceDir) {
|
|
5228
|
-
await rm(evidenceDir, {
|
|
5229
|
-
recursive: true,
|
|
5230
|
-
force: true
|
|
5231
|
-
});
|
|
5232
|
-
await mkdir(evidenceDir, { recursive: true });
|
|
5233
|
-
}
|
|
5234
|
-
const proc = spawnVitestStreaming([
|
|
5235
|
-
"run",
|
|
5236
|
-
"--config",
|
|
5237
|
-
vitestConfig,
|
|
5238
|
-
scriptFile,
|
|
5239
|
-
"--reporter=json",
|
|
5240
|
-
`--outputFile.json=${reportFile}`
|
|
5241
|
-
], {
|
|
5242
|
-
cwd,
|
|
5243
|
-
env: evidenceDir ? {
|
|
5244
|
-
...process.env,
|
|
5245
|
-
CCQA_EVIDENCE_DIR: evidenceDir
|
|
5246
|
-
} : process.env
|
|
5247
|
-
});
|
|
5248
|
-
const tail = captureOutput ? new TailBuffer(OUTPUT_TAIL_CAP) : null;
|
|
5249
|
-
await Promise.all([streamFiltered(proc.stdout, process.stdout, tail), streamFiltered(proc.stderr, process.stderr, tail)]);
|
|
5250
|
-
const specExitCode = await proc.exited;
|
|
5251
|
-
if (specExitCode !== 0) exitCode = specExitCode;
|
|
5252
|
-
const report = await readReport(reportFile);
|
|
5253
|
-
summaries.push({
|
|
5254
|
-
featureName,
|
|
5255
|
-
specName,
|
|
5256
|
-
scriptFile,
|
|
5257
|
-
report,
|
|
5258
|
-
exitCode: specExitCode,
|
|
5259
|
-
outputTail: tail ? tail.toString() : null,
|
|
5260
|
-
evidenceDir
|
|
5261
|
-
});
|
|
5262
|
-
blank();
|
|
5263
|
-
}
|
|
5649
|
+
const summaries = (await runPool(specs, concurrency, (spec, i) => withBuffer(`${spec.featureName}/${spec.specName}`, concurrency > 1, () => runOneDeterministicSpec(spec, i, ctx)))).filter((s) => s !== null);
|
|
5264
5650
|
printSummary(summaries);
|
|
5651
|
+
return {
|
|
5652
|
+
summaries,
|
|
5653
|
+
exitCode: summaries.reduce((acc, s) => s.exitCode !== 0 ? s.exitCode : acc, 0)
|
|
5654
|
+
};
|
|
5265
5655
|
} finally {
|
|
5266
5656
|
await rm(tmpDir, {
|
|
5267
5657
|
recursive: true,
|
|
5268
5658
|
force: true
|
|
5269
5659
|
});
|
|
5270
5660
|
}
|
|
5661
|
+
}
|
|
5662
|
+
/**
|
|
5663
|
+
* Run one spec under vitest. Returns null when the spec has no recorded
|
|
5664
|
+
* test.spec.ts (skipped). All output goes through the logger, so under a
|
|
5665
|
+
* `log.withBuffer` scope it's captured and flushed as one labelled block.
|
|
5666
|
+
*/
|
|
5667
|
+
async function runOneDeterministicSpec(spec, index, ctx) {
|
|
5668
|
+
const { featureName, specName } = spec;
|
|
5669
|
+
const scriptFile = await getTestScript(featureName, specName, ctx.cwd);
|
|
5670
|
+
if (!scriptFile) {
|
|
5671
|
+
warn(`${featureName}/${specName}: no test.spec.ts found`);
|
|
5672
|
+
hint("run 'ccqa record <feature>/<spec>' to record it, or set 'mode: live' in spec.yaml");
|
|
5673
|
+
return null;
|
|
5674
|
+
}
|
|
5675
|
+
run(`${featureName}/${specName}`);
|
|
5676
|
+
meta("test", scriptFile);
|
|
5677
|
+
blank();
|
|
5678
|
+
const reportFile = join(ctx.tmpDir, `report-${index}.json`);
|
|
5679
|
+
const evidenceDir = ctx.evidenceRoot ? join(ctx.evidenceRoot, featureName, specName) : null;
|
|
5680
|
+
if (evidenceDir) {
|
|
5681
|
+
await rm(evidenceDir, {
|
|
5682
|
+
recursive: true,
|
|
5683
|
+
force: true
|
|
5684
|
+
});
|
|
5685
|
+
await mkdir(evidenceDir, { recursive: true });
|
|
5686
|
+
}
|
|
5687
|
+
const proc = spawnVitestStreaming([
|
|
5688
|
+
"run",
|
|
5689
|
+
"--config",
|
|
5690
|
+
ctx.vitestConfig,
|
|
5691
|
+
scriptFile,
|
|
5692
|
+
"--reporter=json",
|
|
5693
|
+
`--outputFile.json=${reportFile}`
|
|
5694
|
+
], {
|
|
5695
|
+
cwd: ctx.cwd,
|
|
5696
|
+
env: evidenceDir ? {
|
|
5697
|
+
...process.env,
|
|
5698
|
+
CCQA_EVIDENCE_DIR: evidenceDir
|
|
5699
|
+
} : process.env
|
|
5700
|
+
});
|
|
5701
|
+
const sink = { write: emitRaw };
|
|
5702
|
+
const tail = ctx.captureOutput ? new TailBuffer(OUTPUT_TAIL_CAP) : null;
|
|
5703
|
+
await Promise.all([streamFiltered(proc.stdout, sink, tail), streamFiltered(proc.stderr, sink, tail)]);
|
|
5704
|
+
const specExitCode = await proc.exited;
|
|
5705
|
+
blank();
|
|
5271
5706
|
return {
|
|
5272
|
-
|
|
5273
|
-
|
|
5707
|
+
featureName,
|
|
5708
|
+
specName,
|
|
5709
|
+
scriptFile,
|
|
5710
|
+
report: await readReport(reportFile),
|
|
5711
|
+
exitCode: specExitCode,
|
|
5712
|
+
outputTail: tail ? tail.toString() : null,
|
|
5713
|
+
evidenceDir
|
|
5274
5714
|
};
|
|
5275
5715
|
}
|
|
5276
5716
|
function failedSpec(s) {
|
|
@@ -5358,7 +5798,7 @@ async function analyzeDeterministicSummaries(summaries, opts, cwd, reportDir) {
|
|
|
5358
5798
|
failureLogExcerpt: null,
|
|
5359
5799
|
diffExcerpt: null,
|
|
5360
5800
|
specYaml: null,
|
|
5361
|
-
|
|
5801
|
+
liveRun: null
|
|
5362
5802
|
});
|
|
5363
5803
|
continue;
|
|
5364
5804
|
}
|
|
@@ -5408,7 +5848,7 @@ async function analyzeDeterministicSummaries(summaries, opts, cwd, reportDir) {
|
|
|
5408
5848
|
failureLogExcerpt: failureLog.length > 0 ? failureLog : null,
|
|
5409
5849
|
diffExcerpt,
|
|
5410
5850
|
specYaml,
|
|
5411
|
-
|
|
5851
|
+
liveRun: null
|
|
5412
5852
|
});
|
|
5413
5853
|
}
|
|
5414
5854
|
return {
|
|
@@ -5719,6 +6159,7 @@ agent-browser --session SESSION wait --load networkidle
|
|
|
5719
6159
|
agent-browser --session SESSION get count "<selector>" # element-existence check (returns a number, fast)
|
|
5720
6160
|
agent-browser --session SESSION cookies clear
|
|
5721
6161
|
agent-browser --session SESSION find <locator> <value> <action> [<input>] [--name "<n>"] [--exact]
|
|
6162
|
+
agent-browser --session SESSION upload "<input[type=file] selector>" <file> [<file> ...]
|
|
5722
6163
|
# See "Selector Rules" for the full \`find\` subset.
|
|
5723
6164
|
# IMPORTANT: do NOT use \`wait "<css-selector>"\`. agent-browser ignores --timeout on a
|
|
5724
6165
|
# CSS-selector wait and blocks for ~150s when the selector never matches, killing the run.
|
|
@@ -5794,6 +6235,8 @@ find nth <index> "<ALLOWED-css>" <action>
|
|
|
5794
6235
|
|
|
5795
6236
|
**Verifying cleanup / deletion**: assert the *absence* of the deleted thing, not the surrounding listing screen's text. Use \`wait --fn "!document.body.innerText.includes('<unique-label>')"\` (text disappearance) — never \`wait "<css-selector>" --state hidden\` (blocks the daemon) and never \`wait --text "<navbar label>"\` (passes regardless of the deletion).
|
|
5796
6237
|
|
|
6238
|
+
**File inputs (\`<input type="file">\`) / OS file-picker dialogs**: do NOT \`click\` the input — that opens the OS picker, which agent-browser cannot drive. Use \`upload "<selector>" <path>\` instead. agent-browser sets the input's files directly via the underlying browser API, no native dialog ever opens. Use an ALLOWED selector to identify the input (\`[aria-label='…']\`, \`[data-testid='…']\`, \`[type='file']\` only when it's unique on the page). File paths must be plain shell args — wrap each in \`"\` for safety. Reference fixtures via \`\${CCQA_FIXTURES_DIR}/<name>\` so the same spec works locally and in CI; conventionally fixtures live under \`.ccqa/fixtures/\` and the env var resolves there. Multi-file inputs accept several positionals: \`upload "[aria-label='Attach']" "\${CCQA_FIXTURES_DIR}/a.pdf" "\${CCQA_FIXTURES_DIR}/b.pdf"\`.
|
|
6239
|
+
|
|
5797
6240
|
## Test Specification
|
|
5798
6241
|
|
|
5799
6242
|
Title: ${input.title}
|
|
@@ -5876,6 +6319,7 @@ AB_ACTION|select|<selector>|<value>|<aria label>
|
|
|
5876
6319
|
AB_ACTION|hover|<selector>|<visible label>
|
|
5877
6320
|
AB_ACTION|scroll|<direction>|<pixels>
|
|
5878
6321
|
AB_ACTION|drag|<source selector>|<target selector>|<source label>
|
|
6322
|
+
AB_ACTION|upload|<file-input selector>|<file1>[|<file2>...]
|
|
5879
6323
|
AB_ACTION|wait|<selector or text>|<label>
|
|
5880
6324
|
AB_ACTION|snapshot|<key observation, max 100 chars>
|
|
5881
6325
|
AB_ACTION|assert|<assertType>|<selector or "">|<value or "">|<observation>
|
|
@@ -6192,6 +6636,17 @@ function actionToAbArgs(action, sessionName) {
|
|
|
6192
6636
|
sub(action.selector),
|
|
6193
6637
|
sub(action.target)
|
|
6194
6638
|
];
|
|
6639
|
+
case "upload": {
|
|
6640
|
+
const sel = sub(action.selector);
|
|
6641
|
+
const files = (action.files ?? []).map((f) => sub(f));
|
|
6642
|
+
if (!sel || files.length === 0) return null;
|
|
6643
|
+
return [
|
|
6644
|
+
...base,
|
|
6645
|
+
"upload",
|
|
6646
|
+
sel,
|
|
6647
|
+
...files
|
|
6648
|
+
];
|
|
6649
|
+
}
|
|
6195
6650
|
case "wait": {
|
|
6196
6651
|
const raw = sub(action.selector);
|
|
6197
6652
|
if (!raw) return null;
|
|
@@ -6683,9 +7138,9 @@ async function runTrace(featureName, specName, model, validationMode = "lenient"
|
|
|
6683
7138
|
steps: expanded,
|
|
6684
7139
|
sessionName
|
|
6685
7140
|
});
|
|
6686
|
-
const
|
|
6687
|
-
if (
|
|
6688
|
-
const systemPrompt = (
|
|
7141
|
+
const promptBundle = await loadRecordPromptBundle();
|
|
7142
|
+
if (promptBundle !== null) meta("prompt", promptBundle.loaded.join(" + "));
|
|
7143
|
+
const systemPrompt = (promptBundle === null ? baseSystemPrompt : `${baseSystemPrompt}\n## Project-specific guidance\n\n${promptBundle.text}\n`) + languageDirective(language);
|
|
6689
7144
|
const prompt = buildTracePrompt(spec.title);
|
|
6690
7145
|
info("Running agent-browser session...");
|
|
6691
7146
|
blank();
|
|
@@ -6767,6 +7222,11 @@ async function runTrace(featureName, specName, model, validationMode = "lenient"
|
|
|
6767
7222
|
if (written) meta("relatedPaths", `${relatedPaths.length} path(s) written to ${written}`);
|
|
6768
7223
|
} else warn("trace did not emit a RELATED_PATHS block; drift --changed cannot scope this spec");
|
|
6769
7224
|
hint(`run 'ccqa generate ${featureName}/${specName}' to generate a test script`);
|
|
7225
|
+
return {
|
|
7226
|
+
route,
|
|
7227
|
+
actionsKept: validatedActions.length,
|
|
7228
|
+
actionsRecorded: traceActions.length
|
|
7229
|
+
};
|
|
6770
7230
|
}
|
|
6771
7231
|
/**
|
|
6772
7232
|
* Strip actions whose recorded fields contain "unstable literal" values
|
|
@@ -6825,7 +7285,7 @@ function dedupAndReport(actions) {
|
|
|
6825
7285
|
function isAdjacentDuplicate(a, b) {
|
|
6826
7286
|
if (a.command !== b.command) return false;
|
|
6827
7287
|
if ((a.stepId ?? "") !== (b.stepId ?? "")) return false;
|
|
6828
|
-
return (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.target ?? "") === (b.target ?? "") && (a.label ?? "") === (b.label ?? "") && (a.assertType ?? "") === (b.assertType ?? "") && (a.findLocator ?? "") === (b.findLocator ?? "") && (a.findValue ?? "") === (b.findValue ?? "") && (a.findName ?? "") === (b.findName ?? "") && (a.findIndex ?? -1) === (b.findIndex ?? -1) && (a.findExact ?? false) === (b.findExact ?? false);
|
|
7288
|
+
return (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.target ?? "") === (b.target ?? "") && (a.label ?? "") === (b.label ?? "") && (a.assertType ?? "") === (b.assertType ?? "") && (a.findLocator ?? "") === (b.findLocator ?? "") && (a.findValue ?? "") === (b.findValue ?? "") && (a.findName ?? "") === (b.findName ?? "") && (a.findIndex ?? -1) === (b.findIndex ?? -1) && (a.findExact ?? false) === (b.findExact ?? false) && (a.files ?? []).join("|") === (b.files ?? []).join("|");
|
|
6829
7289
|
}
|
|
6830
7290
|
/**
|
|
6831
7291
|
* Run the post-trace replay validation and emit user-visible drop reports.
|
|
@@ -7047,6 +7507,16 @@ function parseAbAction(line) {
|
|
|
7047
7507
|
target: parts[3],
|
|
7048
7508
|
label: parts[4]
|
|
7049
7509
|
};
|
|
7510
|
+
case "upload": {
|
|
7511
|
+
const selector = parts[2];
|
|
7512
|
+
const files = parts.slice(3).filter((f) => f !== "");
|
|
7513
|
+
if (!selector || files.length === 0) return null;
|
|
7514
|
+
return {
|
|
7515
|
+
command,
|
|
7516
|
+
selector,
|
|
7517
|
+
files
|
|
7518
|
+
};
|
|
7519
|
+
}
|
|
7050
7520
|
case "find_click":
|
|
7051
7521
|
case "find_dblclick":
|
|
7052
7522
|
case "find_hover":
|
|
@@ -7097,6 +7567,7 @@ function actionsToScript(input) {
|
|
|
7097
7567
|
`import { ${[
|
|
7098
7568
|
"ab",
|
|
7099
7569
|
"abWait",
|
|
7570
|
+
"abUpload",
|
|
7100
7571
|
"abAssertTextVisible",
|
|
7101
7572
|
"abAssertVisible",
|
|
7102
7573
|
"abAssertNotVisible",
|
|
@@ -7130,6 +7601,7 @@ const ELEMENT_COMMANDS = new Set([
|
|
|
7130
7601
|
"select",
|
|
7131
7602
|
"hover",
|
|
7132
7603
|
"drag",
|
|
7604
|
+
"upload",
|
|
7133
7605
|
"find_click",
|
|
7134
7606
|
"find_dblclick",
|
|
7135
7607
|
"find_fill",
|
|
@@ -7261,6 +7733,11 @@ function actionToLine(action) {
|
|
|
7261
7733
|
case "hover": return `ab("hover", ${j(action.selector)});`;
|
|
7262
7734
|
case "scroll": return `ab("scroll", ${[action.direction ?? "down", ...action.pixels ? [action.pixels] : []].map(j).join(", ")});`;
|
|
7263
7735
|
case "drag": return `ab("drag", ${j(action.selector)}, ${j(action.target)});`;
|
|
7736
|
+
case "upload": {
|
|
7737
|
+
const files = action.files ?? [];
|
|
7738
|
+
if (!action.selector || files.length === 0) return null;
|
|
7739
|
+
return `abUpload(${[j(action.selector), ...files.map(jExpr)].join(", ")});`;
|
|
7740
|
+
}
|
|
7264
7741
|
case "wait": {
|
|
7265
7742
|
const sel = action.selector;
|
|
7266
7743
|
if (/^\d+$/.test(sel)) return `spawnSync("sleep", [${j(sel)}], { stdio: "inherit" });`;
|
|
@@ -8336,21 +8813,23 @@ function toFixMode(autoFix) {
|
|
|
8336
8813
|
case "interactive": return "interactive";
|
|
8337
8814
|
}
|
|
8338
8815
|
}
|
|
8339
|
-
const recordCommand = addLanguageOption(new Command("record").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Record a deterministic test from a spec: run agent-browser to collect actions (trace), then generate test.spec.ts with auto-fix retries (generate). After recording, `ccqa run <feature/spec>` replays it under vitest (deterministic specs only — live specs do not need recording).").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--validation-mode <mode>", "Post-trace validation behaviour: 'lenient' (default) tags failing actions; 'strict' drops them.", (raw) => {
|
|
8816
|
+
const recordCommand = addProfileOption(addLanguageOption(new Command("record").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Record a deterministic test from a spec: run agent-browser to collect actions (trace), then generate test.spec.ts with auto-fix retries (generate). After recording, `ccqa run <feature/spec>` replays it under vitest (deterministic specs only — live specs do not need recording).").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--validation-mode <mode>", "Post-trace validation behaviour: 'lenient' (default) tags failing actions; 'strict' drops them.", (raw) => {
|
|
8340
8817
|
if (VALIDATION_MODES.includes(raw)) return raw;
|
|
8341
8818
|
throw new Error(`--validation-mode must be one of ${VALIDATION_MODES.join(" | ")}`);
|
|
8342
8819
|
}, "lenient").option("--auto-fix <mode>", "Auto-fix behaviour during script generation: 'interactive' (default, prompt y/N), 'auto' (apply without prompt, for CI), 'skip' (never prompt, only apply high-confidence fixes).", (raw) => {
|
|
8343
8820
|
if (AUTO_FIX_MODES.includes(raw)) return raw;
|
|
8344
8821
|
throw new Error(`--auto-fix must be one of ${AUTO_FIX_MODES.join(" | ")}`);
|
|
8345
|
-
}, "interactive").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--force", "Overwrite an existing test.spec.ts without warning").option("--no-snapshot", "Don't pin AGENT_BROWSER_SESSION / capture page snapshots after a failure (debug toggle)").option("--skip-trace", "Skip the trace step and run codegen against an existing actions.json").option("--skip-codegen", "Run only the trace step (do not generate test.spec.ts)")).action(async (specPath, opts) => {
|
|
8822
|
+
}, "interactive").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--force", "Overwrite an existing test.spec.ts without warning").option("--no-snapshot", "Don't pin AGENT_BROWSER_SESSION / capture page snapshots after a failure (debug toggle)").option("--skip-trace", "Skip the trace step and run codegen against an existing actions.json").option("--skip-codegen", "Run only the trace step (do not generate test.spec.ts)").option("--update-agent-prompt", "After the trace finishes, ask Claude to refresh .ccqa/prompts/record.agent.md from a summary of the run.").option("--cwd <path>", "Working directory containing the .ccqa/ tree (monorepo support). Defaults to the current directory."))).action(async (specPath, opts) => {
|
|
8346
8823
|
const { featureName, specName } = parseSpecPath(specPath);
|
|
8347
8824
|
const language = opts.language ?? "auto";
|
|
8348
8825
|
if (opts.skipTrace && opts.skipCodegen) {
|
|
8349
8826
|
error("--skip-trace and --skip-codegen cannot be combined; nothing would run");
|
|
8350
8827
|
process.exit(2);
|
|
8351
8828
|
}
|
|
8829
|
+
await applyProfileFromOption(opts.profile, resolveCwd(opts.cwd));
|
|
8830
|
+
let traceResult = null;
|
|
8352
8831
|
if (!opts.skipTrace) {
|
|
8353
|
-
await runTrace(featureName, specName, opts.model, opts.validationMode ?? "lenient", language);
|
|
8832
|
+
traceResult = await runTrace(featureName, specName, opts.model, opts.validationMode ?? "lenient", language);
|
|
8354
8833
|
blank();
|
|
8355
8834
|
}
|
|
8356
8835
|
if (!opts.skipCodegen) {
|
|
@@ -8358,7 +8837,37 @@ const recordCommand = addLanguageOption(new Command("record").argument("<feature
|
|
|
8358
8837
|
const useSnapshot = opts.snapshot !== false;
|
|
8359
8838
|
await runGenerate(featureName, specName, parseInt(opts.maxRetries ?? "3", 10), fixMode, opts.force ?? false, useSnapshot, language, opts.model);
|
|
8360
8839
|
}
|
|
8840
|
+
if (opts.updateAgentPrompt) if (traceResult === null) warn("--update-agent-prompt is ignored when --skip-trace is set (no run summary available)");
|
|
8841
|
+
else {
|
|
8842
|
+
const cwd = resolveCwd(opts.cwd);
|
|
8843
|
+
blank();
|
|
8844
|
+
await updateAgentPrompt({
|
|
8845
|
+
mode: "record",
|
|
8846
|
+
runSummary: buildRecordRunSummary(featureName, specName, traceResult),
|
|
8847
|
+
cwd,
|
|
8848
|
+
...opts.model ? { model: opts.model } : {},
|
|
8849
|
+
...language ? { language } : {}
|
|
8850
|
+
});
|
|
8851
|
+
}
|
|
8361
8852
|
});
|
|
8853
|
+
/**
|
|
8854
|
+
* Compact summary of the trace pass for the record agent-prompt refresh:
|
|
8855
|
+
* per-step title / action / observation / status. The route steps already
|
|
8856
|
+
* carry the assistant's own framing of what happened — perfect input for
|
|
8857
|
+
* "what should I remember next time".
|
|
8858
|
+
*/
|
|
8859
|
+
function buildRecordRunSummary(featureName, specName, t) {
|
|
8860
|
+
return `${`## ${featureName}/${specName} — ${t.route.status}\nActions: ${t.actionsKept} kept / ${t.actionsRecorded} recorded`}\n\n${t.route.steps.length === 0 ? "(no route steps recorded)" : t.route.steps.map((s) => [
|
|
8861
|
+
`### ${s.title} (${s.status})`,
|
|
8862
|
+
`- action: ${oneLineSummary(s.action)}`,
|
|
8863
|
+
`- observation: ${oneLineSummary(s.observation)}`,
|
|
8864
|
+
...s.reason ? [`- reason: ${oneLineSummary(s.reason)}`] : []
|
|
8865
|
+
].join("\n")).join("\n\n")}`;
|
|
8866
|
+
}
|
|
8867
|
+
function oneLineSummary(s) {
|
|
8868
|
+
const flat = s.replace(/\s+/g, " ").trim();
|
|
8869
|
+
return flat.length > 240 ? flat.slice(0, 240) + "…" : flat || "(none)";
|
|
8870
|
+
}
|
|
8362
8871
|
//#endregion
|
|
8363
8872
|
//#region src/cli/draft.ts
|
|
8364
8873
|
const CATEGORY_LABEL = DRAFT_CATEGORY_LABEL;
|
|
@@ -9128,6 +9637,64 @@ function parseConcurrency(raw) {
|
|
|
9128
9637
|
return n;
|
|
9129
9638
|
}
|
|
9130
9639
|
//#endregion
|
|
9640
|
+
//#region src/cli/init.ts
|
|
9641
|
+
const TEMPLATES = [
|
|
9642
|
+
{
|
|
9643
|
+
relPath: ".ccqa/prompts/live.user.md",
|
|
9644
|
+
content: `# Project guidance for live specs
|
|
9645
|
+
|
|
9646
|
+
Write stable, hand-maintained context here: staging URLs, naming conventions, known "this is fine" warnings. Lines you add will be appended verbatim to the system prompt of every step in 'mode: live' specs.
|
|
9647
|
+
`
|
|
9648
|
+
},
|
|
9649
|
+
{
|
|
9650
|
+
relPath: ".ccqa/prompts/live.agent.md",
|
|
9651
|
+
content: `# Agent learnings for live specs
|
|
9652
|
+
|
|
9653
|
+
This file is updated by 'ccqa run --update-agent-prompt'. You can edit it by hand, but the next --update-agent-prompt run may rewrite the whole file. Keep stable rules in live.user.md instead.
|
|
9654
|
+
`
|
|
9655
|
+
},
|
|
9656
|
+
{
|
|
9657
|
+
relPath: ".ccqa/prompts/record.user.md",
|
|
9658
|
+
content: `# Project guidance for ccqa record (deterministic trace)
|
|
9659
|
+
|
|
9660
|
+
Write stable, hand-maintained context here for the trace phase of 'ccqa record'. Lines you add will be appended verbatim to the trace system prompt.
|
|
9661
|
+
`
|
|
9662
|
+
},
|
|
9663
|
+
{
|
|
9664
|
+
relPath: ".ccqa/prompts/record.agent.md",
|
|
9665
|
+
content: `# Agent learnings for ccqa record
|
|
9666
|
+
|
|
9667
|
+
This file is updated by 'ccqa record --update-agent-prompt'. Same convention as live.agent.md — stable rules go in record.user.md.
|
|
9668
|
+
`
|
|
9669
|
+
}
|
|
9670
|
+
];
|
|
9671
|
+
const initCommand = new Command("init").description("Create .ccqa/prompts/{live,record}.{user,agent}.md template files (skips existing files unless --force).").option("--cwd <path>", "Working directory (default: cwd)").option("--force", "Overwrite existing files").action(async (opts) => {
|
|
9672
|
+
const cwd = resolveCwd(opts.cwd);
|
|
9673
|
+
header("init", cwd);
|
|
9674
|
+
await mkdir(join(cwd, ".ccqa", "prompts"), { recursive: true });
|
|
9675
|
+
const created = [];
|
|
9676
|
+
const skipped = [];
|
|
9677
|
+
for (const t of TEMPLATES) if (await writeTemplate(join(cwd, t.relPath), t.content, opts.force ?? false)) created.push(t.relPath);
|
|
9678
|
+
else skipped.push(t.relPath);
|
|
9679
|
+
for (const f of created) info(`created ${f}`);
|
|
9680
|
+
for (const f of skipped) info(`skipped ${f} (already exists; pass --force to overwrite)`);
|
|
9681
|
+
blank();
|
|
9682
|
+
meta("created", created.length);
|
|
9683
|
+
meta("skipped", skipped.length);
|
|
9684
|
+
});
|
|
9685
|
+
async function writeTemplate(absPath, content, force) {
|
|
9686
|
+
try {
|
|
9687
|
+
await writeFile(absPath, content, force ? { encoding: "utf-8" } : {
|
|
9688
|
+
encoding: "utf-8",
|
|
9689
|
+
flag: "wx"
|
|
9690
|
+
});
|
|
9691
|
+
return true;
|
|
9692
|
+
} catch (err) {
|
|
9693
|
+
if (typeof err === "object" && err !== null && err.code === "EEXIST") return false;
|
|
9694
|
+
throw err;
|
|
9695
|
+
}
|
|
9696
|
+
}
|
|
9697
|
+
//#endregion
|
|
9131
9698
|
//#region src/prompts/perspectives.ts
|
|
9132
9699
|
/**
|
|
9133
9700
|
* Build the system prompt. By default the descriptive fields follow the
|
|
@@ -9595,8 +10162,6 @@ function renderSpecMarkdown(spec, labels = LABELS_JA) {
|
|
|
9595
10162
|
lines.push("");
|
|
9596
10163
|
lines.push(`| ${labels.itemCol} | ${labels.valueCol} |`);
|
|
9597
10164
|
lines.push("| --- | --- |");
|
|
9598
|
-
lines.push(`| ${labels.modeLabel} | ${mdCell(modeLabel(spec.status, labels))} |`);
|
|
9599
|
-
lines.push(`| ${labels.statusCol} | ${mdCell(statusLabel(spec.status, labels))} |`);
|
|
9600
10165
|
if (spec.summary) lines.push(`| ${labels.summary} | ${mdCell(spec.summary)} |`);
|
|
9601
10166
|
if (spec.preconditions && spec.preconditions.length > 0) lines.push(`| ${labels.preconditions} | ${spec.preconditions.map(mdCell).join("<br>")} |`);
|
|
9602
10167
|
if (spec.startScreen) lines.push(`| ${labels.startScreen} | ${mdCell(spec.startScreen)} |`);
|
|
@@ -9628,6 +10193,7 @@ function resolvePackageJson() {
|
|
|
9628
10193
|
const { version } = JSON.parse(readFileSync(resolvePackageJson(), "utf8"));
|
|
9629
10194
|
const program = new Command();
|
|
9630
10195
|
program.name("ccqa").description("E2E test CLI using Claude Code + agent-browser").version(version);
|
|
10196
|
+
program.addCommand(initCommand);
|
|
9631
10197
|
program.addCommand(draftCommand);
|
|
9632
10198
|
program.addCommand(perspectivesCommand);
|
|
9633
10199
|
program.addCommand(recordCommand);
|