ccqa 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +278 -0
- package/bin/ccqa.ts +2 -0
- package/package.json +38 -0
- package/src/claude/invoke.test.ts +167 -0
- package/src/claude/invoke.ts +238 -0
- package/src/cli/generate-setup.ts +215 -0
- package/src/cli/generate.ts +224 -0
- package/src/cli/index.ts +21 -0
- package/src/cli/logger.ts +45 -0
- package/src/cli/run.ts +65 -0
- package/src/cli/trace-setup.ts +124 -0
- package/src/cli/trace.test.ts +233 -0
- package/src/cli/trace.ts +244 -0
- package/src/codegen/actions-to-script.ts +188 -0
- package/src/prompts/codegen.ts +73 -0
- package/src/prompts/trace.ts +278 -0
- package/src/runtime/test-helpers.ts +77 -0
- package/src/spec/parser.test.ts +135 -0
- package/src/spec/parser.ts +96 -0
- package/src/store/index.test.ts +107 -0
- package/src/store/index.ts +193 -0
- package/src/types.test.ts +96 -0
- package/src/types.ts +91 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
2
|
+
import type { SDKMessage, Options, HookInput } from "@anthropic-ai/claude-agent-sdk";
|
|
3
|
+
import * as log from "../cli/logger.ts";
|
|
4
|
+
|
|
5
|
+
export interface ClaudeInvokeOptions {
|
|
6
|
+
prompt: string;
|
|
7
|
+
systemPrompt?: string;
|
|
8
|
+
allowedTools?: string[];
|
|
9
|
+
disableBuiltinTools?: boolean;
|
|
10
|
+
mcpConfigPath?: string;
|
|
11
|
+
maxTurns?: number;
|
|
12
|
+
env?: Record<string, string>;
|
|
13
|
+
/** Called when an agent-browser command is intercepted; receives the AB_ACTION line. */
|
|
14
|
+
onAbAction?: (abAction: string) => void;
|
|
15
|
+
/** Called when an agent-browser command fails (exit non-zero); allows rolling back the last AB_ACTION. */
|
|
16
|
+
onAbActionFailed?: () => void;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function invokeClaudeStreaming(
|
|
20
|
+
options: ClaudeInvokeOptions,
|
|
21
|
+
onEvent: (msg: SDKMessage) => void,
|
|
22
|
+
): Promise<{ result: string; isError: boolean }> {
|
|
23
|
+
const {
|
|
24
|
+
prompt,
|
|
25
|
+
systemPrompt,
|
|
26
|
+
allowedTools,
|
|
27
|
+
disableBuiltinTools = false,
|
|
28
|
+
maxTurns,
|
|
29
|
+
env,
|
|
30
|
+
onAbAction,
|
|
31
|
+
onAbActionFailed,
|
|
32
|
+
} = options;
|
|
33
|
+
|
|
34
|
+
// Track the last agent-browser tool_use_id so PostToolUseFailure can roll back
|
|
35
|
+
let lastAbToolUseId: string | null = null;
|
|
36
|
+
|
|
37
|
+
const sdkOptions: Options = {
|
|
38
|
+
systemPrompt,
|
|
39
|
+
maxTurns,
|
|
40
|
+
allowedTools: allowedTools ?? ["Bash(*)"],
|
|
41
|
+
permissionMode: "bypassPermissions",
|
|
42
|
+
allowDangerouslySkipPermissions: true,
|
|
43
|
+
...(env ? { env: { ...process.env, ...env } as Record<string, string | undefined> } : {}),
|
|
44
|
+
...(disableBuiltinTools ? { tools: [] } : {}),
|
|
45
|
+
hooks:
|
|
46
|
+
onAbAction || onAbActionFailed
|
|
47
|
+
? {
|
|
48
|
+
PreToolUse: [
|
|
49
|
+
{
|
|
50
|
+
hooks: [
|
|
51
|
+
async (input: HookInput) => {
|
|
52
|
+
if (input.hook_event_name !== "PreToolUse") return {};
|
|
53
|
+
if (input.tool_name !== "Bash") return {};
|
|
54
|
+
const cmd = (input.tool_input as Record<string, unknown>)?.["command"];
|
|
55
|
+
if (typeof cmd !== "string") return {};
|
|
56
|
+
|
|
57
|
+
// Block eval/js/find/etc — they bypass structured action recording
|
|
58
|
+
if (isBlockedAbSubcommand(cmd)) {
|
|
59
|
+
return {
|
|
60
|
+
decision: "block",
|
|
61
|
+
reason: "This agent-browser subcommand is not allowed because it cannot be recorded as a structured test action. Use only the standard commands: click, check, fill, select, hover, press, wait. Take a fresh snapshot to find the correct selector.",
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Block @ref selectors — they are session-specific and not replayable
|
|
66
|
+
if (hasRefSelector(cmd)) {
|
|
67
|
+
return {
|
|
68
|
+
decision: "block",
|
|
69
|
+
reason: "@ref selectors (like @e14) are session-specific and change every run. They cannot be used in generated tests. Use one of the allowed selector formats instead: [aria-label='...'], text=..., [placeholder='...'], or [type='password']. Take a fresh snapshot and find the element's aria-label or visible text.",
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const ab = extractAbActionFromBashCommand(cmd);
|
|
74
|
+
if (ab && onAbAction) {
|
|
75
|
+
lastAbToolUseId = input.tool_use_id;
|
|
76
|
+
onAbAction(ab);
|
|
77
|
+
} else {
|
|
78
|
+
lastAbToolUseId = null;
|
|
79
|
+
}
|
|
80
|
+
return {};
|
|
81
|
+
},
|
|
82
|
+
],
|
|
83
|
+
},
|
|
84
|
+
],
|
|
85
|
+
PostToolUseFailure: [
|
|
86
|
+
{
|
|
87
|
+
hooks: [
|
|
88
|
+
async (input: HookInput) => {
|
|
89
|
+
if (input.hook_event_name !== "PostToolUseFailure") return {};
|
|
90
|
+
if (input.tool_name !== "Bash") return {};
|
|
91
|
+
// If the failed Bash command was the one that emitted an AB_ACTION, roll it back
|
|
92
|
+
if (input.tool_use_id === lastAbToolUseId && onAbActionFailed) {
|
|
93
|
+
onAbActionFailed();
|
|
94
|
+
lastAbToolUseId = null;
|
|
95
|
+
}
|
|
96
|
+
return {};
|
|
97
|
+
},
|
|
98
|
+
],
|
|
99
|
+
},
|
|
100
|
+
],
|
|
101
|
+
}
|
|
102
|
+
: undefined,
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
let result = "";
|
|
106
|
+
let isError = false;
|
|
107
|
+
|
|
108
|
+
const q = query({ prompt, options: sdkOptions });
|
|
109
|
+
|
|
110
|
+
for await (const msg of q) {
|
|
111
|
+
onEvent(msg);
|
|
112
|
+
|
|
113
|
+
if (msg.type === "assistant") {
|
|
114
|
+
for (const block of msg.message.content ?? []) {
|
|
115
|
+
if (block.type === "tool_use" && block.name === "Bash") {
|
|
116
|
+
const cmd = (block.input as Record<string, unknown>)?.["command"];
|
|
117
|
+
if (typeof cmd === "string") log.bash(cmd);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (msg.type === "result") {
|
|
123
|
+
result = msg.subtype === "success" ? msg.result : "";
|
|
124
|
+
isError = msg.is_error ?? false;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return { result, isError };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const BLOCKED_AB_SUBCOMMANDS = new Set(["eval", "js", "find", "label", "textbox"]);
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Shell-aware tokenizer: splits a command string into tokens respecting single/double quotes.
|
|
135
|
+
* e.g. `click "[role='dialog'] button:last-child"` → ["click", "[role='dialog'] button:last-child"]
|
|
136
|
+
*/
|
|
137
|
+
export function shellTokenize(s: string): string[] {
|
|
138
|
+
const tokens: string[] = [];
|
|
139
|
+
let cur = "";
|
|
140
|
+
let quote: '"' | "'" | null = null;
|
|
141
|
+
for (let i = 0; i < s.length; i++) {
|
|
142
|
+
const ch = s[i]!;
|
|
143
|
+
if (quote) {
|
|
144
|
+
if (ch === quote) { quote = null; }
|
|
145
|
+
else { cur += ch; }
|
|
146
|
+
} else if (ch === '"' || ch === "'") {
|
|
147
|
+
quote = ch;
|
|
148
|
+
} else if (ch === " " || ch === "\t") {
|
|
149
|
+
if (cur) { tokens.push(cur); cur = ""; }
|
|
150
|
+
} else {
|
|
151
|
+
cur += ch;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (cur) tokens.push(cur);
|
|
155
|
+
return tokens;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/** Extracts the subcommand from an `agent-browser [flags] <subcommand> [args...]` command string. */
|
|
159
|
+
export function extractAbSubcommand(cmd: string): string | null {
|
|
160
|
+
const abIdx = cmd.indexOf("agent-browser");
|
|
161
|
+
if (abIdx === -1) return null;
|
|
162
|
+
const rest = cmd.slice(abIdx + "agent-browser".length).trim();
|
|
163
|
+
const parts = shellTokenize(rest);
|
|
164
|
+
let i = 0;
|
|
165
|
+
while (i < parts.length && parts[i]!.startsWith("-")) { i += 2; }
|
|
166
|
+
return parts[i] ?? null;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/** Returns true if the agent-browser subcommand is blocked (eval/js/find/etc). */
|
|
170
|
+
export function isBlockedAbSubcommand(cmd: string): boolean {
|
|
171
|
+
const sub = extractAbSubcommand(cmd);
|
|
172
|
+
return sub !== null && BLOCKED_AB_SUBCOMMANDS.has(sub);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/** Returns true if any argument to an agent-browser command uses a @ref selector (e.g. @e14). */
|
|
176
|
+
export function hasRefSelector(cmd: string): boolean {
|
|
177
|
+
const abIdx = cmd.indexOf("agent-browser");
|
|
178
|
+
if (abIdx === -1) return false;
|
|
179
|
+
const rest = cmd.slice(abIdx + "agent-browser".length).trim();
|
|
180
|
+
const parts = shellTokenize(rest);
|
|
181
|
+
// Skip flags and subcommand, check remaining args
|
|
182
|
+
let i = 0;
|
|
183
|
+
while (i < parts.length && parts[i]!.startsWith("-")) { i += 2; }
|
|
184
|
+
i++; // skip subcommand
|
|
185
|
+
for (; i < parts.length; i++) {
|
|
186
|
+
if (/^@/.test(parts[i]!)) return true;
|
|
187
|
+
}
|
|
188
|
+
return false;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Parse an `agent-browser --session <name> <cmd> [args...]` bash command
|
|
193
|
+
* and return the corresponding AB_ACTION line, or null if not an agent-browser call.
|
|
194
|
+
*/
|
|
195
|
+
export function extractAbActionFromBashCommand(cmd: string): string | null {
|
|
196
|
+
const subCmd = extractAbSubcommand(cmd);
|
|
197
|
+
if (!subCmd) return null;
|
|
198
|
+
|
|
199
|
+
// Extract everything after "agent-browser" to get args (shell-aware tokenization)
|
|
200
|
+
const abIdx = cmd.indexOf("agent-browser");
|
|
201
|
+
const rest = cmd.slice(abIdx + "agent-browser".length).trim();
|
|
202
|
+
// Filter out shell redirects/pipes (2>&1, >&1, |, >file) that are not agent-browser args
|
|
203
|
+
const parts = shellTokenize(rest).filter(t => !/^(2?>|[|&>])/.test(t));
|
|
204
|
+
let i = 0;
|
|
205
|
+
while (i < parts.length && parts[i]!.startsWith("-")) { i += 2; }
|
|
206
|
+
const args = parts.slice(i + 1);
|
|
207
|
+
|
|
208
|
+
switch (subCmd) {
|
|
209
|
+
case "cookies":
|
|
210
|
+
if (args[0] === "clear") return "AB_ACTION|cookies_clear";
|
|
211
|
+
return null;
|
|
212
|
+
case "open":
|
|
213
|
+
return `AB_ACTION|open|${args[0] ?? ""}`;
|
|
214
|
+
case "press":
|
|
215
|
+
return `AB_ACTION|press|${args[0] ?? ""}`;
|
|
216
|
+
case "scroll":
|
|
217
|
+
return `AB_ACTION|scroll|${args.join("|")}`;
|
|
218
|
+
case "click":
|
|
219
|
+
case "dblclick":
|
|
220
|
+
case "check":
|
|
221
|
+
case "uncheck":
|
|
222
|
+
case "hover":
|
|
223
|
+
case "wait":
|
|
224
|
+
return `AB_ACTION|${subCmd}|${args[0] ?? ""}|${args[1] ?? ""}`;
|
|
225
|
+
case "fill":
|
|
226
|
+
case "type":
|
|
227
|
+
case "select":
|
|
228
|
+
return `AB_ACTION|${subCmd}|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
|
|
229
|
+
case "drag":
|
|
230
|
+
return `AB_ACTION|drag|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
|
|
231
|
+
case "snapshot":
|
|
232
|
+
// snapshot AB_ACTION is emitted by LLM with its own observation
|
|
233
|
+
return null;
|
|
234
|
+
default:
|
|
235
|
+
return null;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
import { readFile, writeFile, stat, unlink } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { Command } from "commander";
|
|
4
|
+
import {
|
|
5
|
+
ensureCcqaDir,
|
|
6
|
+
readSetupSpecFile,
|
|
7
|
+
getSetupActions,
|
|
8
|
+
getSetupDir,
|
|
9
|
+
saveSetupTestScript,
|
|
10
|
+
} from "../store/index.ts";
|
|
11
|
+
import { actionsToScript } from "../codegen/actions-to-script.ts";
|
|
12
|
+
import { buildCleanupPrompt, buildAutoFixPrompt } from "../prompts/codegen.ts";
|
|
13
|
+
import { invokeClaudeStreaming } from "../claude/invoke.ts";
|
|
14
|
+
import { parseSetupSpec } from "../spec/parser.ts";
|
|
15
|
+
import type { TraceAction } from "../types.ts";
|
|
16
|
+
import * as log from "./logger.ts";
|
|
17
|
+
|
|
18
|
+
export const generateSetupCommand = new Command("generate-setup")
|
|
19
|
+
.argument("<name>", "Setup name to generate (e.g. login)")
|
|
20
|
+
.description("Clean up, validate, and templatize setup actions")
|
|
21
|
+
.option("--max-retries <n>", "Maximum number of auto-fix retries", "3")
|
|
22
|
+
.option("--from-dummy", "Resume from existing test.dummy.spec.ts (after manual fix)")
|
|
23
|
+
.action(async (name: string, opts: { maxRetries: string; fromDummy?: boolean }) => {
|
|
24
|
+
await runGenerateSetup(name, parseInt(opts.maxRetries, 10), opts.fromDummy ?? false);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
async function runGenerateSetup(name: string, maxRetries: number, fromDummy: boolean): Promise<void> {
|
|
28
|
+
log.header("generate-setup", name);
|
|
29
|
+
|
|
30
|
+
await ensureCcqaDir();
|
|
31
|
+
|
|
32
|
+
const specContent = await readSetupSpecFile(name);
|
|
33
|
+
const spec = parseSetupSpec(specContent);
|
|
34
|
+
const dummyPath = join(getSetupDir(name), "test.dummy.spec.ts");
|
|
35
|
+
const finalPath = join(getSetupDir(name), "test.spec.ts");
|
|
36
|
+
|
|
37
|
+
// Phase 1: Generate or reuse test.dummy.spec.ts
|
|
38
|
+
if (fromDummy) {
|
|
39
|
+
// --from-dummy: use existing test.dummy.spec.ts
|
|
40
|
+
const exists = await stat(dummyPath).then(() => true).catch(() => false);
|
|
41
|
+
if (!exists) {
|
|
42
|
+
log.warn(`test.dummy.spec.ts not found. Run without --from-dummy first.`);
|
|
43
|
+
process.exit(1);
|
|
44
|
+
}
|
|
45
|
+
log.info("Resuming from existing test.dummy.spec.ts");
|
|
46
|
+
} else {
|
|
47
|
+
// Normal: generate from actions.json
|
|
48
|
+
const { actions } = await getSetupActions(name);
|
|
49
|
+
log.meta("setup", spec.title);
|
|
50
|
+
log.meta("actions", actions.length);
|
|
51
|
+
log.blank();
|
|
52
|
+
|
|
53
|
+
const cleanedActions = await cleanupActions(actions);
|
|
54
|
+
if (cleanedActions.length !== actions.length) {
|
|
55
|
+
log.meta("cleaned", cleanedActions.length);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const script = actionsToScript(cleanedActions, spec.title);
|
|
59
|
+
await writeFile(dummyPath, script, "utf-8");
|
|
60
|
+
log.meta("saved", dummyPath);
|
|
61
|
+
}
|
|
62
|
+
log.blank();
|
|
63
|
+
|
|
64
|
+
// Phase 2: Run vitest on test.dummy.spec.ts with auto-fix
|
|
65
|
+
let { exitCode, output, currentScript } = await runVitest(dummyPath);
|
|
66
|
+
|
|
67
|
+
if (exitCode !== 0) {
|
|
68
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
69
|
+
log.info(`auto-fix attempt ${attempt}/${maxRetries}...`);
|
|
70
|
+
log.blank();
|
|
71
|
+
|
|
72
|
+
const fixed = await autoFixWithLLM(currentScript, output);
|
|
73
|
+
if (!fixed) {
|
|
74
|
+
log.warn("could not determine fix from failure log");
|
|
75
|
+
break;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
await writeFile(dummyPath, fixed, "utf-8");
|
|
79
|
+
log.meta("saved", dummyPath);
|
|
80
|
+
log.blank();
|
|
81
|
+
|
|
82
|
+
({ exitCode, output, currentScript } = await runVitest(dummyPath));
|
|
83
|
+
if (exitCode === 0) break;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (exitCode !== 0) {
|
|
87
|
+
log.warn("auto-fix exhausted — setup test still failing");
|
|
88
|
+
log.hint(`edit ${dummyPath} manually, then run: ccqa generate-setup ${name} --from-dummy`);
|
|
89
|
+
process.exit(1);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Phase 3: Reverse-replace dummy values → {{placeholders}}, save as test.spec.ts
|
|
94
|
+
const templatizedScript = reversePlaceholdersInScript(
|
|
95
|
+
currentScript,
|
|
96
|
+
spec.placeholders as Record<string, { dummy: string; description?: string }> | undefined,
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
await writeFile(finalPath, templatizedScript, "utf-8");
|
|
100
|
+
await unlink(dummyPath).catch(() => {});
|
|
101
|
+
|
|
102
|
+
log.blank();
|
|
103
|
+
log.meta("saved", finalPath);
|
|
104
|
+
log.hint(`setup '${name}' is ready — reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Replace dummy values with {{placeholder}} directly in the test script text.
|
|
109
|
+
* Longer dummy values are replaced first to avoid partial matches.
|
|
110
|
+
*/
|
|
111
|
+
function reversePlaceholdersInScript(
|
|
112
|
+
script: string,
|
|
113
|
+
placeholders?: Record<string, { dummy: string; description?: string }>,
|
|
114
|
+
): string {
|
|
115
|
+
if (!placeholders) return script;
|
|
116
|
+
|
|
117
|
+
const entries = Object.entries(placeholders).sort(
|
|
118
|
+
(a, b) => b[1].dummy.length - a[1].dummy.length,
|
|
119
|
+
);
|
|
120
|
+
|
|
121
|
+
let result = script;
|
|
122
|
+
for (const [key, def] of entries) {
|
|
123
|
+
result = result.replaceAll(def.dummy, `{{${key}}}`);
|
|
124
|
+
}
|
|
125
|
+
return result;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// --- Shared utilities ---
|
|
129
|
+
|
|
130
|
+
interface SleepInsert { line: number; seconds: number; reason: string }
|
|
131
|
+
interface SleepIncrease { line: number; increase_to: number; reason: string }
|
|
132
|
+
type AutoFixAction = SleepInsert | SleepIncrease;
|
|
133
|
+
|
|
134
|
+
async function autoFixWithLLM(script: string, failureLog: string): Promise<string | null> {
|
|
135
|
+
try {
|
|
136
|
+
const prompt = buildAutoFixPrompt(script, failureLog);
|
|
137
|
+
const { result, isError } = await invokeClaudeStreaming(
|
|
138
|
+
{ prompt, disableBuiltinTools: true, maxTurns: 1 },
|
|
139
|
+
() => {},
|
|
140
|
+
);
|
|
141
|
+
if (isError || !result) return null;
|
|
142
|
+
|
|
143
|
+
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
144
|
+
const fixes = JSON.parse(json) as AutoFixAction[];
|
|
145
|
+
if (!Array.isArray(fixes) || fixes.length === 0) return null;
|
|
146
|
+
|
|
147
|
+
return applySleepFixes(script, fixes);
|
|
148
|
+
} catch {
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function applySleepFixes(script: string, fixes: AutoFixAction[]): string {
|
|
154
|
+
const lines = script.split("\n");
|
|
155
|
+
|
|
156
|
+
for (const fix of fixes) {
|
|
157
|
+
if ("increase_to" in fix) {
|
|
158
|
+
const idx = fix.line - 1;
|
|
159
|
+
if (idx >= 0 && idx < lines.length) {
|
|
160
|
+
lines[idx] = lines[idx]!.replace(
|
|
161
|
+
/spawnSync\("sleep",\s*\["\d+"\]/,
|
|
162
|
+
`spawnSync("sleep", ["${fix.increase_to}"]`,
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const inserts = fixes
|
|
169
|
+
.filter((f): f is SleepInsert => "seconds" in f && !("increase_to" in f))
|
|
170
|
+
.sort((a, b) => b.line - a.line);
|
|
171
|
+
|
|
172
|
+
for (const fix of inserts) {
|
|
173
|
+
const idx = fix.line - 1;
|
|
174
|
+
if (idx >= 0 && idx <= lines.length) {
|
|
175
|
+
lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return lines.join("\n");
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async function runVitest(scriptPath: string): Promise<{ exitCode: number; output: string; currentScript: string }> {
|
|
183
|
+
const proc = Bun.spawn(["bunx", "vitest", "run", scriptPath], {
|
|
184
|
+
stdout: "pipe",
|
|
185
|
+
stderr: "pipe",
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
const [stdoutText, stderrText, exitCode] = await Promise.all([
|
|
189
|
+
new Response(proc.stdout).text(),
|
|
190
|
+
new Response(proc.stderr).text(),
|
|
191
|
+
proc.exited,
|
|
192
|
+
]);
|
|
193
|
+
const currentScript = await Bun.file(scriptPath).text();
|
|
194
|
+
|
|
195
|
+
process.stdout.write(stdoutText);
|
|
196
|
+
if (stderrText) process.stderr.write(stderrText);
|
|
197
|
+
return { exitCode, output: stdoutText + stderrText, currentScript };
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
async function cleanupActions(actions: TraceAction[]): Promise<TraceAction[]> {
|
|
201
|
+
try {
|
|
202
|
+
const prompt = buildCleanupPrompt(actions);
|
|
203
|
+
const { result, isError } = await invokeClaudeStreaming(
|
|
204
|
+
{ prompt, disableBuiltinTools: true, maxTurns: 1 },
|
|
205
|
+
() => {},
|
|
206
|
+
);
|
|
207
|
+
if (isError || !result) return actions;
|
|
208
|
+
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
209
|
+
const parsed = JSON.parse(json) as TraceAction[];
|
|
210
|
+
if (Array.isArray(parsed) && parsed.length > 0) return parsed;
|
|
211
|
+
} catch {
|
|
212
|
+
// Fall through
|
|
213
|
+
}
|
|
214
|
+
return actions;
|
|
215
|
+
}
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import { writeFile } from "node:fs/promises";
|
|
2
|
+
import { Command } from "commander";
|
|
3
|
+
import { readFile } from "node:fs/promises";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import {
|
|
6
|
+
ensureCcqaDir,
|
|
7
|
+
parseSpecPath,
|
|
8
|
+
getTraceActions,
|
|
9
|
+
getSetupDir,
|
|
10
|
+
readSpecFile,
|
|
11
|
+
saveTestScript,
|
|
12
|
+
} from "../store/index.ts";
|
|
13
|
+
import { actionsToScript } from "../codegen/actions-to-script.ts";
|
|
14
|
+
import type { SetupScript } from "../codegen/actions-to-script.ts";
|
|
15
|
+
import { buildCleanupPrompt, buildAutoFixPrompt } from "../prompts/codegen.ts";
|
|
16
|
+
import { invokeClaudeStreaming } from "../claude/invoke.ts";
|
|
17
|
+
import { parseTestSpec } from "../spec/parser.ts";
|
|
18
|
+
import type { TraceAction } from "../types.ts";
|
|
19
|
+
import * as log from "./logger.ts";
|
|
20
|
+
|
|
21
|
+
export const generateCommand = new Command("generate")
|
|
22
|
+
.argument("<feature/spec>", "Spec to generate test for (e.g. tasks/create-and-complete)")
|
|
23
|
+
.description("Generate agent-browser test script from recorded trace actions")
|
|
24
|
+
.option("--max-retries <n>", "Maximum number of auto-fix retries", "3")
|
|
25
|
+
.action(async (specPath: string, opts: { maxRetries: string }) => {
|
|
26
|
+
const { featureName, specName } = parseSpecPath(specPath);
|
|
27
|
+
await runGenerate(featureName, specName, parseInt(opts.maxRetries, 10));
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
async function runGenerate(featureName: string, specName: string, maxRetries: number): Promise<void> {
|
|
31
|
+
log.header("generate", `${featureName}/${specName}`);
|
|
32
|
+
|
|
33
|
+
await ensureCcqaDir();
|
|
34
|
+
|
|
35
|
+
const { path: actionsPath, actions } = await getTraceActions(featureName, specName);
|
|
36
|
+
|
|
37
|
+
log.meta("trace", actionsPath);
|
|
38
|
+
log.meta("actions", actions.length);
|
|
39
|
+
|
|
40
|
+
// Load setup actions if test-spec references setups
|
|
41
|
+
const specContent = await readSpecFile(featureName, specName);
|
|
42
|
+
const spec = parseTestSpec(specContent);
|
|
43
|
+
const setupScripts = await loadSetupScripts(
|
|
44
|
+
spec.setups as Array<{ name: string; params?: Record<string, string> }> | undefined,
|
|
45
|
+
);
|
|
46
|
+
if (setupScripts.length > 0) {
|
|
47
|
+
log.meta("setups", setupScripts.map((s) => s.name).join(", "));
|
|
48
|
+
}
|
|
49
|
+
log.blank();
|
|
50
|
+
|
|
51
|
+
const cleanedActions = await cleanupActions(actions);
|
|
52
|
+
if (cleanedActions.length !== actions.length) {
|
|
53
|
+
log.meta("cleaned", cleanedActions.length);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const script = actionsToScript(cleanedActions, spec.title, setupScripts.length > 0 ? setupScripts : undefined);
|
|
57
|
+
const scriptPath = await saveTestScript(featureName, specName, script);
|
|
58
|
+
log.meta("saved", scriptPath);
|
|
59
|
+
log.blank();
|
|
60
|
+
|
|
61
|
+
let { exitCode, output, currentScript } = await runVitest(scriptPath);
|
|
62
|
+
if (exitCode === 0) {
|
|
63
|
+
log.hint(`run 'ccqa run ${featureName}/${specName}' to execute the test`);
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
68
|
+
log.info(`auto-fix attempt ${attempt}/${maxRetries}...`);
|
|
69
|
+
log.blank();
|
|
70
|
+
|
|
71
|
+
const fixed = await autoFixWithLLM(currentScript, output);
|
|
72
|
+
if (!fixed) {
|
|
73
|
+
log.warn("could not determine fix from failure log");
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
await writeFile(scriptPath, fixed, "utf-8");
|
|
78
|
+
log.meta("saved", scriptPath);
|
|
79
|
+
log.blank();
|
|
80
|
+
|
|
81
|
+
({ exitCode, output, currentScript } = await runVitest(scriptPath));
|
|
82
|
+
if (exitCode === 0) {
|
|
83
|
+
log.hint(`run 'ccqa run ${featureName}/${specName}' to execute the test`);
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
log.warn("auto-fix exhausted — test still failing");
|
|
89
|
+
process.exit(1);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Load setup test scripts, extract test body, and replace {{placeholders}} with params values.
|
|
94
|
+
*/
|
|
95
|
+
async function loadSetupScripts(
|
|
96
|
+
setups?: Array<{ name: string; params?: Record<string, string> }>,
|
|
97
|
+
): Promise<SetupScript[]> {
|
|
98
|
+
if (!setups?.length) return [];
|
|
99
|
+
|
|
100
|
+
const result: SetupScript[] = [];
|
|
101
|
+
for (const ref of setups) {
|
|
102
|
+
const scriptPath = join(getSetupDir(ref.name), "test.spec.ts");
|
|
103
|
+
const script = await readFile(scriptPath, "utf-8").catch(() => {
|
|
104
|
+
throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
|
|
105
|
+
});
|
|
106
|
+
const body = extractTestBody(script);
|
|
107
|
+
const resolved = replacePlaceholders(body, ref.params ?? {});
|
|
108
|
+
result.push({ name: ref.name, body: resolved });
|
|
109
|
+
}
|
|
110
|
+
return result;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Extract the test body (lines inside the first test() block) from a setup test script.
|
|
115
|
+
*/
|
|
116
|
+
function extractTestBody(script: string): string {
|
|
117
|
+
const lines = script.split("\n");
|
|
118
|
+
const startIdx = lines.findIndex((l) => /^\s*test\(/.test(l));
|
|
119
|
+
if (startIdx === -1) return "";
|
|
120
|
+
const bodyLines: string[] = [];
|
|
121
|
+
for (let i = startIdx + 1; i < lines.length; i++) {
|
|
122
|
+
// Match closing line: "}, N * 60 * 1000);" or "})" at the end of a test block
|
|
123
|
+
if (/^\s*\}[\s,);]/.test(lines[i]!)) break;
|
|
124
|
+
bodyLines.push(lines[i]!);
|
|
125
|
+
}
|
|
126
|
+
return bodyLines.join("\n");
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function replacePlaceholders(body: string, params: Record<string, string>): string {
|
|
130
|
+
let result = body;
|
|
131
|
+
for (const [key, value] of Object.entries(params)) {
|
|
132
|
+
result = result.replaceAll(`{{${key}}}`, value);
|
|
133
|
+
}
|
|
134
|
+
return result;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// --- Auto-fix ---
|
|
138
|
+
|
|
139
|
+
interface SleepInsert { line: number; seconds: number; reason: string }
|
|
140
|
+
interface SleepIncrease { line: number; increase_to: number; reason: string }
|
|
141
|
+
type AutoFixAction = SleepInsert | SleepIncrease;
|
|
142
|
+
|
|
143
|
+
async function autoFixWithLLM(script: string, failureLog: string): Promise<string | null> {
|
|
144
|
+
try {
|
|
145
|
+
const prompt = buildAutoFixPrompt(script, failureLog);
|
|
146
|
+
const { result, isError } = await invokeClaudeStreaming(
|
|
147
|
+
{ prompt, disableBuiltinTools: true, maxTurns: 1 },
|
|
148
|
+
() => {},
|
|
149
|
+
);
|
|
150
|
+
if (isError || !result) return null;
|
|
151
|
+
|
|
152
|
+
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
153
|
+
const fixes = JSON.parse(json) as AutoFixAction[];
|
|
154
|
+
if (!Array.isArray(fixes) || fixes.length === 0) return null;
|
|
155
|
+
|
|
156
|
+
return applySleepFixes(script, fixes);
|
|
157
|
+
} catch {
|
|
158
|
+
return null;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function applySleepFixes(script: string, fixes: AutoFixAction[]): string {
|
|
163
|
+
const lines = script.split("\n");
|
|
164
|
+
|
|
165
|
+
for (const fix of fixes) {
|
|
166
|
+
if ("increase_to" in fix) {
|
|
167
|
+
const idx = fix.line - 1;
|
|
168
|
+
if (idx >= 0 && idx < lines.length) {
|
|
169
|
+
lines[idx] = lines[idx]!.replace(
|
|
170
|
+
/spawnSync\("sleep",\s*\["\d+"\]/,
|
|
171
|
+
`spawnSync("sleep", ["${fix.increase_to}"]`,
|
|
172
|
+
);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const inserts = fixes
|
|
178
|
+
.filter((f): f is SleepInsert => "seconds" in f && !("increase_to" in f))
|
|
179
|
+
.sort((a, b) => b.line - a.line);
|
|
180
|
+
|
|
181
|
+
for (const fix of inserts) {
|
|
182
|
+
const idx = fix.line - 1;
|
|
183
|
+
if (idx >= 0 && idx <= lines.length) {
|
|
184
|
+
lines.splice(idx, 0, ` spawnSync("sleep", ["${fix.seconds}"], { stdio: "inherit" });`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return lines.join("\n");
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
async function runVitest(scriptPath: string): Promise<{ exitCode: number; output: string; currentScript: string }> {
|
|
192
|
+
const proc = Bun.spawn(["bunx", "vitest", "run", scriptPath], {
|
|
193
|
+
stdout: "pipe",
|
|
194
|
+
stderr: "pipe",
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
const [stdoutText, stderrText, exitCode] = await Promise.all([
|
|
198
|
+
new Response(proc.stdout).text(),
|
|
199
|
+
new Response(proc.stderr).text(),
|
|
200
|
+
proc.exited,
|
|
201
|
+
]);
|
|
202
|
+
const currentScript = await Bun.file(scriptPath).text();
|
|
203
|
+
|
|
204
|
+
process.stdout.write(stdoutText);
|
|
205
|
+
if (stderrText) process.stderr.write(stderrText);
|
|
206
|
+
return { exitCode, output: stdoutText + stderrText, currentScript };
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async function cleanupActions(actions: TraceAction[]): Promise<TraceAction[]> {
|
|
210
|
+
try {
|
|
211
|
+
const prompt = buildCleanupPrompt(actions);
|
|
212
|
+
const { result, isError } = await invokeClaudeStreaming(
|
|
213
|
+
{ prompt, disableBuiltinTools: true, maxTurns: 1 },
|
|
214
|
+
() => {},
|
|
215
|
+
);
|
|
216
|
+
if (isError || !result) return actions;
|
|
217
|
+
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
218
|
+
const parsed = JSON.parse(json) as TraceAction[];
|
|
219
|
+
if (Array.isArray(parsed) && parsed.length > 0) return parsed;
|
|
220
|
+
} catch {
|
|
221
|
+
// Fall through
|
|
222
|
+
}
|
|
223
|
+
return actions;
|
|
224
|
+
}
|
package/src/cli/index.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { Command } from "commander";
|
|
2
|
+
import { traceCommand } from "./trace.ts";
|
|
3
|
+
import { generateCommand } from "./generate.ts";
|
|
4
|
+
import { runCommand } from "./run.ts";
|
|
5
|
+
import { traceSetupCommand } from "./trace-setup.ts";
|
|
6
|
+
import { generateSetupCommand } from "./generate-setup.ts";
|
|
7
|
+
|
|
8
|
+
const program = new Command();
|
|
9
|
+
|
|
10
|
+
program
|
|
11
|
+
.name("ccqa")
|
|
12
|
+
.description("E2E test CLI using Claude Code + agent-browser")
|
|
13
|
+
.version("0.1.0");
|
|
14
|
+
|
|
15
|
+
program.addCommand(traceCommand);
|
|
16
|
+
program.addCommand(generateCommand);
|
|
17
|
+
program.addCommand(runCommand);
|
|
18
|
+
program.addCommand(traceSetupCommand);
|
|
19
|
+
program.addCommand(generateSetupCommand);
|
|
20
|
+
|
|
21
|
+
program.parse();
|