ccqa 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/ccqa.mjs +11 -7
- package/dist/package.json +1 -1
- package/package.json +1 -1
package/dist/bin/ccqa.mjs
CHANGED
|
@@ -4419,7 +4419,8 @@ ${stepsText}
|
|
|
4419
4419
|
1. Take a fresh \`snapshot\` to see the current page.
|
|
4420
4420
|
2. Carry out the instruction. Use whichever agent-browser subcommand and selector style works. If the first attempt fails, take another snapshot and try a different approach — you are not being recorded.
|
|
4421
4421
|
3. After the instruction is performed, take another \`snapshot\` (and optionally a \`get count\` / \`wait --text\` probe) to verify the expected outcome.
|
|
4422
|
-
4.
|
|
4422
|
+
4. **Before emitting STEP_RESULT, make the judgement target visible in the page** so the auto-captured "after" screenshot proves your verdict. Use \`agent-browser eval "<elementRef>.scrollIntoView({block:'center'})"\` or similar to bring the asserted row / banner / URL bar / bot reply into view. A correct verdict with no on-screen evidence is still a weak artifact.
|
|
4423
|
+
5. Decide: did the **Expected** condition hold? Be honest. If the page is in an unexpected state, that is a fail, not something to work around.
|
|
4423
4424
|
|
|
4424
4425
|
### Judgement rules
|
|
4425
4426
|
|
|
@@ -4428,6 +4429,7 @@ ${stepsText}
|
|
|
4428
4429
|
- If the expected outcome is partially satisfied (e.g. the page loaded but the asserted element is missing) — fail, and say which part is missing.
|
|
4429
4430
|
- Pass only when you have *positive* evidence (a successful snapshot, a verified URL, a wait that resolved). "No error shown" is not enough on its own.
|
|
4430
4431
|
- Do not invent success when blocked: fail honestly with a short reason.
|
|
4432
|
+
- **Evidence discipline**: when the assertion target is a specific row / message / banner / URL, scroll it into view (or focus the relevant pane) before letting the step end. The "after" screenshot is captured for you automatically — your job is to make sure that screenshot shows the thing your STEP_RESULT line is talking about.
|
|
4431
4433
|
|
|
4432
4434
|
### Output contract (STRICT)
|
|
4433
4435
|
|
|
@@ -4508,13 +4510,15 @@ function findLastStepResult(text) {
|
|
|
4508
4510
|
* and continues. We never throw, because a missing screenshot is a degraded
|
|
4509
4511
|
* artifact, not a reason to abort the test step.
|
|
4510
4512
|
*/
|
|
4511
|
-
function takeScreenshot(sessionName, outPath) {
|
|
4512
|
-
const
|
|
4513
|
+
function takeScreenshot(sessionName, outPath, options) {
|
|
4514
|
+
const args = [
|
|
4513
4515
|
"--session",
|
|
4514
4516
|
sessionName,
|
|
4515
|
-
"screenshot"
|
|
4516
|
-
|
|
4517
|
-
|
|
4517
|
+
"screenshot"
|
|
4518
|
+
];
|
|
4519
|
+
if (options?.fullPage) args.push("--full");
|
|
4520
|
+
args.push(outPath);
|
|
4521
|
+
const res = spawnAB(args);
|
|
4518
4522
|
if (res.status === 0) return {
|
|
4519
4523
|
ok: true,
|
|
4520
4524
|
path: outPath
|
|
@@ -4630,7 +4634,7 @@ async function runNdExecutor(input) {
|
|
|
4630
4634
|
transcriptParts.push(`[ccqa] invokeClaudeStreaming threw: ${err instanceof Error ? err.message : String(err)}`);
|
|
4631
4635
|
}
|
|
4632
4636
|
const transcript = transcriptParts.join("\n");
|
|
4633
|
-
const after = takeScreenshot(input.sessionName, paths.afterPng);
|
|
4637
|
+
const after = takeScreenshot(input.sessionName, paths.afterPng, { fullPage: true });
|
|
4634
4638
|
if (!after.ok) warn(`screenshot (after, ${step.id}) failed: ${after.error}`);
|
|
4635
4639
|
await writeFile(paths.logTxt, transcript || "(no assistant text captured)", "utf-8");
|
|
4636
4640
|
const { status, reasoning } = judgeStepOutcome({
|
package/dist/package.json
CHANGED