comfy-qa 2.3.0 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/browser-agent.ts +130 -55
- package/src/agent/orchestrator.ts +111 -70
- package/src/agent/research.ts +1 -45
- package/src/recorder/narration.ts +9 -2
- package/src/recorder/post-mix.ts +77 -57
- package/src/utils/llm.ts +41 -0
package/package.json
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { Page } from "playwright";
|
|
2
2
|
import type { RecorderSession } from "../browser/recorder";
|
|
3
|
-
import type { TestScenario
|
|
3
|
+
import type { TestScenario } from "./research";
|
|
4
|
+
import { callLLM } from "../utils/llm";
|
|
4
5
|
|
|
5
6
|
/** An action the AI agent decides to take */
|
|
6
7
|
interface AgentAction {
|
|
@@ -61,70 +62,30 @@ async function capturePageState(page: Page): Promise<{
|
|
|
61
62
|
return { screenshot, a11yTree, url, title, consoleErrors: [] };
|
|
62
63
|
}
|
|
63
64
|
|
|
64
|
-
/** Ask Claude to decide the next action based on the current page state */
|
|
65
|
+
/** Ask Claude to decide the next action based on the current page state (live fallback) */
|
|
65
66
|
async function askAgentForAction(
|
|
66
67
|
scenario: TestScenario,
|
|
67
68
|
stepIndex: number,
|
|
68
69
|
pageState: { screenshot: string; a11yTree: string; url: string; title: string },
|
|
69
70
|
history: string[]
|
|
70
71
|
): Promise<AgentAction[]> {
|
|
71
|
-
const prompt = `You are a QA automation agent controlling a browser via Playwright
|
|
72
|
+
const prompt = `You are a QA automation agent controlling a browser via Playwright.
|
|
72
73
|
|
|
73
|
-
##
|
|
74
|
-
${scenario.
|
|
75
|
-
|
|
76
|
-
##
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
##
|
|
80
|
-
"${scenario.steps[stepIndex]}"
|
|
81
|
-
|
|
82
|
-
## Expected Outcome
|
|
83
|
-
${scenario.expectedOutcome}
|
|
84
|
-
|
|
85
|
-
## Playwright Hint
|
|
86
|
-
${scenario.playwrightHint}
|
|
87
|
-
|
|
88
|
-
## Page State
|
|
89
|
-
- URL: ${pageState.url}
|
|
90
|
-
- Title: ${pageState.title}
|
|
74
|
+
## Scenario: ${scenario.name}
|
|
75
|
+
## Step ${stepIndex + 1}/${scenario.steps.length}: "${scenario.steps[stepIndex]}"
|
|
76
|
+
## Playwright Hint: ${scenario.playwrightHint}
|
|
77
|
+
## URL: ${pageState.url}
|
|
78
|
+
## A11y Tree (truncated):
|
|
79
|
+
${pageState.a11yTree.slice(0, 2000)}
|
|
80
|
+
## History: ${history.slice(-5).join("; ") || "(start)"}
|
|
91
81
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
## Action History
|
|
96
|
-
${history.slice(-10).join("\n") || "(start)"}
|
|
97
|
-
|
|
98
|
-
---
|
|
99
|
-
|
|
100
|
-
Return a JSON array of 1-5 actions to execute for this step. Each action:
|
|
101
|
-
{
|
|
102
|
-
"type": "click" | "type" | "scroll" | "hover" | "wait" | "key" | "done",
|
|
103
|
-
"selector": "CSS selector or text content to target",
|
|
104
|
-
"text": "text to type (for type action)",
|
|
105
|
-
"x": number, "y": number (for coordinate-based click),
|
|
106
|
-
"key": "key name (for key action, e.g. Enter, Tab)",
|
|
107
|
-
"ms": milliseconds (for wait action),
|
|
108
|
-
"observation": "what you expect to see / what you observed"
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
Use "done" when the current step is complete and we should move to the next step.
|
|
112
|
-
If ComfyUI is not loaded or the page shows something unexpected, include an observation explaining what you see.
|
|
113
|
-
Return ONLY the JSON array.`;
|
|
114
|
-
|
|
115
|
-
const proc = Bun.spawn(["claude", "--print", "--model", "claude-sonnet-4-6"], {
|
|
116
|
-
stdin: new TextEncoder().encode(prompt),
|
|
117
|
-
stdout: "pipe",
|
|
118
|
-
stderr: "pipe",
|
|
119
|
-
});
|
|
120
|
-
const output = await new Response(proc.stdout).text();
|
|
121
|
-
await proc.exited;
|
|
82
|
+
Return a JSON array of 1-5 actions:
|
|
83
|
+
{"type":"click"|"type"|"scroll"|"hover"|"wait"|"key"|"done","selector":"...","text":"...","key":"...","ms":N,"observation":"..."}
|
|
84
|
+
Use "done" when the step is complete. Return ONLY the JSON array.`;
|
|
122
85
|
|
|
86
|
+
const output = await callLLM(prompt);
|
|
123
87
|
const jsonMatch = output.match(/\[[\s\S]*\]/);
|
|
124
|
-
if (!jsonMatch) {
|
|
125
|
-
return [{ type: "done", observation: `Agent could not parse response: ${output.slice(0, 200)}` }];
|
|
126
|
-
}
|
|
127
|
-
|
|
88
|
+
if (!jsonMatch) return [{ type: "done", observation: `No JSON in response` }];
|
|
128
89
|
try {
|
|
129
90
|
return JSON.parse(jsonMatch[0]) as AgentAction[];
|
|
130
91
|
} catch {
|
|
@@ -196,6 +157,120 @@ async function executeAction(page: Page, action: AgentAction): Promise<string> {
|
|
|
196
157
|
}
|
|
197
158
|
}
|
|
198
159
|
|
|
160
|
+
// ─── Pre-planning ──────────────────────────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
export interface PlannedStep {
|
|
163
|
+
stepText: string;
|
|
164
|
+
actions: AgentAction[];
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export interface PlannedScenario {
|
|
168
|
+
scenarioIndex: number;
|
|
169
|
+
scenario: TestScenario;
|
|
170
|
+
steps: PlannedStep[];
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/** Pre-plan all actions for a scenario before recording starts (one LLM call, no browser) */
|
|
174
|
+
export async function prePlanScenario(
|
|
175
|
+
scenario: TestScenario,
|
|
176
|
+
scenarioIndex: number,
|
|
177
|
+
targetUrl: string,
|
|
178
|
+
): Promise<PlannedScenario> {
|
|
179
|
+
console.log(` [plan] Scenario ${scenarioIndex + 1}: ${scenario.name}`);
|
|
180
|
+
|
|
181
|
+
const prompt = `You are a Playwright automation expert. Plan concrete browser actions for this QA scenario.
|
|
182
|
+
Site URL: ${targetUrl}
|
|
183
|
+
|
|
184
|
+
## Scenario: ${scenario.name}
|
|
185
|
+
${scenario.description}
|
|
186
|
+
|
|
187
|
+
## Playwright Hint
|
|
188
|
+
${scenario.playwrightHint}
|
|
189
|
+
|
|
190
|
+
## Preconditions
|
|
191
|
+
${scenario.preconditions.join("\n")}
|
|
192
|
+
|
|
193
|
+
## Steps to automate
|
|
194
|
+
${scenario.steps.map((s, i) => `${i + 1}. ${s}`).join("\n")}
|
|
195
|
+
|
|
196
|
+
For each step, return concrete Playwright actions. Use real CSS selectors or accessible names from the site.
|
|
197
|
+
Prefer: getByRole, getByLabel, getByText, getByPlaceholder over brittle CSS selectors.
|
|
198
|
+
Express selectors as Playwright locator strings (e.g. "button:has-text('Submit')" or "[data-testid='search']").
|
|
199
|
+
|
|
200
|
+
Return a JSON array — one object per step:
|
|
201
|
+
[
|
|
202
|
+
{
|
|
203
|
+
"stepText": "exact step text",
|
|
204
|
+
"actions": [
|
|
205
|
+
{"type": "click"|"type"|"scroll"|"hover"|"wait"|"key", "selector": "...", "text": "...", "key": "...", "ms": N, "observation": "..."}
|
|
206
|
+
]
|
|
207
|
+
}
|
|
208
|
+
]
|
|
209
|
+
|
|
210
|
+
Return ONLY the JSON array. Be specific and actionable.`;
|
|
211
|
+
|
|
212
|
+
const output = await callLLM(prompt);
|
|
213
|
+
const jsonMatch = output.match(/\[[\s\S]*\]/);
|
|
214
|
+
if (jsonMatch) {
|
|
215
|
+
try {
|
|
216
|
+
const steps = JSON.parse(jsonMatch[0]) as PlannedStep[];
|
|
217
|
+
return { scenarioIndex, scenario, steps };
|
|
218
|
+
} catch {}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Fallback: one wait action per step so recording at least proceeds
|
|
222
|
+
return {
|
|
223
|
+
scenarioIndex,
|
|
224
|
+
scenario,
|
|
225
|
+
steps: scenario.steps.map((stepText) => ({
|
|
226
|
+
stepText,
|
|
227
|
+
actions: [{ type: "wait" as const, ms: 1500, observation: stepText }],
|
|
228
|
+
})),
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/** Execute pre-planned actions and return actual elapsed ms per step */
|
|
233
|
+
export async function runScenarioWithPlan(
|
|
234
|
+
session: RecorderSession,
|
|
235
|
+
plan: PlannedScenario,
|
|
236
|
+
): Promise<{ success: boolean; log: string[]; stepTimingsMs: number[] }> {
|
|
237
|
+
const log: string[] = [];
|
|
238
|
+
const stepTimingsMs: number[] = [];
|
|
239
|
+
|
|
240
|
+
await session.step(`Scenario ${plan.scenarioIndex + 1}: ${plan.scenario.name}`);
|
|
241
|
+
await session.plan(plan.scenario.description);
|
|
242
|
+
log.push(`=== Scenario: ${plan.scenario.name} ===`);
|
|
243
|
+
|
|
244
|
+
for (let stepIdx = 0; stepIdx < plan.steps.length; stepIdx++) {
|
|
245
|
+
const planned = plan.steps[stepIdx];
|
|
246
|
+
if (!planned) continue;
|
|
247
|
+
const stepText = planned.stepText;
|
|
248
|
+
await session.status(`Step ${stepIdx + 1}/${plan.steps.length}: ${stepText}`);
|
|
249
|
+
log.push(`--- Step ${stepIdx + 1}: ${stepText} ---`);
|
|
250
|
+
|
|
251
|
+
const stepStart = Date.now();
|
|
252
|
+
|
|
253
|
+
for (const action of planned.actions) {
|
|
254
|
+
if (action.observation) {
|
|
255
|
+
log.push(` [observe] ${action.observation}`);
|
|
256
|
+
await session.annotate(200, 300, action.observation, 1500);
|
|
257
|
+
}
|
|
258
|
+
const result = await executeAction(session.page, action);
|
|
259
|
+
log.push(` [action] ${result}`);
|
|
260
|
+
await session.page.waitForTimeout(150); // minimal visual pause
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
stepTimingsMs.push(Date.now() - stepStart);
|
|
264
|
+
|
|
265
|
+
await session.screenshot(
|
|
266
|
+
`scenario-${String(plan.scenarioIndex + 1).padStart(2, "0")}-step-${String(stepIdx + 1).padStart(2, "0")}`
|
|
267
|
+
);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
log.push(`=== Scenario complete ===`);
|
|
271
|
+
return { success: true, log, stepTimingsMs };
|
|
272
|
+
}
|
|
273
|
+
|
|
199
274
|
/** Run a full test scenario with AI-driven browser automation */
|
|
200
275
|
export async function runScenarioWithAgent(
|
|
201
276
|
session: RecorderSession,
|
|
@@ -4,7 +4,7 @@ import { fetchPR, fetchIssue, parseRef, fetchDeploymentPreviewUrl } from "../uti
|
|
|
4
4
|
import { detectRunningInstance, bootstrapWorkspace, type ComfyUIInstance, COMFYUI_REPOS, REPO_PROD_URLS } from "../utils/comfyui";
|
|
5
5
|
import { researchPR, researchIssue } from "./research";
|
|
6
6
|
import { startRecorder, navigateWithHUD } from "../browser/recorder";
|
|
7
|
-
import { runScenarioWithAgent, runScenarioResearchOnly } from "./browser-agent";
|
|
7
|
+
import { runScenarioWithAgent, runScenarioResearchOnly, prePlanScenario, runScenarioWithPlan, type PlannedScenario } from "./browser-agent";
|
|
8
8
|
import { saveReport } from "../report/generate";
|
|
9
9
|
import { generateE2ETest } from "../report/e2e-test";
|
|
10
10
|
import { ensureQASkill } from "../utils/qa-skill";
|
|
@@ -137,111 +137,152 @@ export async function runQA(opts: QAOptions): Promise<void> {
|
|
|
137
137
|
}
|
|
138
138
|
}
|
|
139
139
|
|
|
140
|
-
// Pre-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
text: `Step ${j + 1}: ${step.slice(0, 150)}`,
|
|
150
|
-
})),
|
|
151
|
-
]),
|
|
152
|
-
{ id: "outro", text: `QA session complete. Report and video evidence saved.` },
|
|
153
|
-
];
|
|
154
|
-
|
|
155
|
-
const narration = await generateNarration(narrationSegments, outputDir);
|
|
140
|
+
// [3a] Pre-plan all scenario actions before recording (parallel LLM calls, no browser yet)
|
|
141
|
+
let plans: PlannedScenario[] = [];
|
|
142
|
+
if (comfyUrl) {
|
|
143
|
+
console.log(`\n[3a/5] Pre-planning ${research.testScenarios.length} scenarios…`);
|
|
144
|
+
plans = await Promise.all(
|
|
145
|
+
research.testScenarios.map((s, i) => prePlanScenario(s, i, comfyUrl!))
|
|
146
|
+
);
|
|
147
|
+
console.log(` ✓ Plans ready`);
|
|
148
|
+
}
|
|
156
149
|
|
|
150
|
+
// [3b] Record — execute pre-planned actions (no LLM calls on the hot path)
|
|
151
|
+
// Narration is generated AFTER recording using real step timings for perfect sync.
|
|
157
152
|
const session = await startRecorder(outputDir, `qa-${parsed.number}`);
|
|
158
|
-
|
|
159
|
-
|
|
153
|
+
|
|
154
|
+
// Timing markers collected during recording, used for narration after
|
|
155
|
+
const introStartMs = Date.now();
|
|
156
|
+
let githubDoneMs = 0;
|
|
157
|
+
let analysisDoneMs = 0;
|
|
158
|
+
const scenarioStartMs: number[] = [];
|
|
159
|
+
const stepTimings: number[][] = []; // [scenarioIdx][stepIdx] = elapsed ms
|
|
160
160
|
|
|
161
161
|
try {
|
|
162
|
-
|
|
163
|
-
if (narration) await session.narrate("intro", `Opening ${target.url}`);
|
|
164
|
-
else await session.step(`Opening ${target.url}`);
|
|
162
|
+
await session.step(`Opening ${target.url}`);
|
|
165
163
|
await navigateWithHUD(session, target.url, `QA: ${targetType.toUpperCase()} #${parsed.number}`);
|
|
166
164
|
await session.plan(`Analyzing: ${target.title}`);
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
if (narration) await session.narrate("analysis", `${research.severity} severity`);
|
|
165
|
+
await session.page.waitForTimeout(2000);
|
|
166
|
+
githubDoneMs = Date.now();
|
|
170
167
|
await session.screenshot("01-github-page");
|
|
168
|
+
analysisDoneMs = Date.now();
|
|
171
169
|
|
|
172
|
-
if (comfyUrl) {
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
await session
|
|
176
|
-
await navigateWithHUD(session, comfyUrl, `ComfyUI QA — ${targetType.toUpperCase()} #${parsed.number}`);
|
|
170
|
+
if (comfyUrl && plans.length > 0) {
|
|
171
|
+
console.log(` [mode] Pre-planned QA against ${comfyUrl}`);
|
|
172
|
+
await session.step(`Navigating to ${comfyUrl}`);
|
|
173
|
+
await navigateWithHUD(session, comfyUrl, `QA — ${targetType.toUpperCase()} #${parsed.number}`);
|
|
177
174
|
await session.page.waitForTimeout(2000);
|
|
178
|
-
await session.screenshot("02-
|
|
179
|
-
|
|
180
|
-
for (let i = 0; i < research.testScenarios.length; i++) {
|
|
181
|
-
const scenario = research.testScenarios[i];
|
|
182
|
-
console.log(` [scenario ${i + 1}/${research.testScenarios.length}] ${scenario.name}`);
|
|
175
|
+
await session.screenshot("02-target-loaded");
|
|
183
176
|
|
|
184
|
-
|
|
177
|
+
for (const plan of plans) {
|
|
178
|
+
scenarioStartMs.push(Date.now());
|
|
179
|
+
const result = await runScenarioWithPlan(session, plan);
|
|
180
|
+
stepTimings.push(result.stepTimingsMs);
|
|
185
181
|
allLogs.push(...result.log);
|
|
186
182
|
|
|
183
|
+
if (plan.scenarioIndex < plans.length - 1) {
|
|
184
|
+
await session.page.goto(comfyUrl, { waitUntil: "domcontentloaded", timeout: 15000 });
|
|
185
|
+
await session.page.waitForTimeout(500);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
} else if (comfyUrl) {
|
|
189
|
+
// Fallback to live agent if pre-planning produced no plans
|
|
190
|
+
console.log(` [mode] Live agent QA against ${comfyUrl}`);
|
|
191
|
+
await navigateWithHUD(session, comfyUrl, `QA — ${targetType.toUpperCase()} #${parsed.number}`);
|
|
192
|
+
await session.page.waitForTimeout(2000);
|
|
193
|
+
await session.screenshot("02-target-loaded");
|
|
194
|
+
for (let i = 0; i < research.testScenarios.length; i++) {
|
|
195
|
+
scenarioStartMs.push(Date.now());
|
|
196
|
+
const result = await runScenarioWithAgent(session, research.testScenarios[i]!, i);
|
|
197
|
+
stepTimings.push([]);
|
|
198
|
+
allLogs.push(...result.log);
|
|
187
199
|
if (i < research.testScenarios.length - 1) {
|
|
188
200
|
await session.page.goto(comfyUrl, { waitUntil: "domcontentloaded", timeout: 15000 });
|
|
189
|
-
await session.page.waitForTimeout(
|
|
201
|
+
await session.page.waitForTimeout(500);
|
|
190
202
|
}
|
|
191
203
|
}
|
|
192
204
|
} else {
|
|
193
|
-
|
|
194
|
-
console.log(` [mode] Research-only (no ComfyUI instance available)`);
|
|
195
|
-
|
|
205
|
+
console.log(` [mode] Research-only (no target URL)`);
|
|
196
206
|
await session.step("Scrolling through issue details");
|
|
197
207
|
for (let scroll = 0; scroll < 3; scroll++) {
|
|
198
208
|
await session.page.mouse.wheel(0, 400);
|
|
199
209
|
await session.page.waitForTimeout(1000);
|
|
200
210
|
}
|
|
201
211
|
await session.screenshot("02-github-details");
|
|
202
|
-
|
|
203
212
|
for (let i = 0; i < research.testScenarios.length; i++) {
|
|
204
|
-
const
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
if (narration) {
|
|
208
|
-
await session.narrate(`scenario-${i + 1}-intro`, `Scenario ${i + 1}: ${scenario.name}`);
|
|
209
|
-
for (let j = 0; j < Math.min(scenario.steps.length, 5); j++) {
|
|
210
|
-
await session.narrate(`scenario-${i + 1}-step-${j + 1}`, `Step ${j + 1}: ${scenario.steps[j].slice(0, 80)}`);
|
|
211
|
-
}
|
|
212
|
-
await session.screenshot(`scenario-${String(i + 1).padStart(2, "0")}-plan`);
|
|
213
|
-
} else {
|
|
214
|
-
const result = await runScenarioResearchOnly(session, scenario, i);
|
|
215
|
-
allLogs.push(...result.log);
|
|
216
|
-
}
|
|
213
|
+
const result = await runScenarioResearchOnly(session, research.testScenarios[i]!, i);
|
|
214
|
+
allLogs.push(...result.log);
|
|
217
215
|
}
|
|
218
216
|
}
|
|
219
217
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
await session.
|
|
223
|
-
await session.page.waitForTimeout(1500);
|
|
218
|
+
await session.step("QA Session complete");
|
|
219
|
+
await session.status("QA finished");
|
|
220
|
+
await session.page.waitForTimeout(1000);
|
|
224
221
|
await session.screenshot("99-final");
|
|
225
222
|
screenshots = session.screenshots;
|
|
226
223
|
} finally {
|
|
227
|
-
const demoStartMs = session.getDemoStartMs();
|
|
228
224
|
await session.stop();
|
|
229
|
-
if (bootstrappedInstance)
|
|
230
|
-
await bootstrappedInstance.stop();
|
|
231
|
-
}
|
|
225
|
+
if (bootstrappedInstance) await bootstrappedInstance.stop();
|
|
232
226
|
const webm = path.join(outputDir, `qa-${parsed.number}.webm`);
|
|
233
227
|
if (fs.existsSync(webm)) videoPath = webm;
|
|
228
|
+
}
|
|
234
229
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
230
|
+
// [3c] Generate narration using REAL step timings measured during recording
|
|
231
|
+
if (videoPath) {
|
|
232
|
+
const recordingStart = introStartMs;
|
|
233
|
+
const toVideoMs = (absMs: number) => Math.max(0, absMs - recordingStart);
|
|
234
|
+
|
|
235
|
+
const narrationSegments: NarrationSegment[] = [
|
|
236
|
+
{
|
|
237
|
+
id: "intro",
|
|
238
|
+
text: `Welcome to comfy QA. Reviewing ${targetType} ${parsed.number}: ${target.title.slice(0, 100)}`,
|
|
239
|
+
startMs: 0,
|
|
240
|
+
},
|
|
241
|
+
{
|
|
242
|
+
id: "github",
|
|
243
|
+
text: `First, the GitHub ${targetType} page for context.`,
|
|
244
|
+
startMs: toVideoMs(githubDoneMs),
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
id: "analysis",
|
|
248
|
+
text: `Severity ${research.severity}. Affected area: ${research.affectedArea}.`,
|
|
249
|
+
startMs: toVideoMs(analysisDoneMs),
|
|
250
|
+
},
|
|
251
|
+
...research.testScenarios.flatMap((s, i): NarrationSegment[] => {
|
|
252
|
+
const scenStepTimings = stepTimings[i] ?? [];
|
|
253
|
+
const scenStart = scenarioStartMs[i] ?? analysisDoneMs;
|
|
254
|
+
// Accumulate step start times within the scenario
|
|
255
|
+
let stepCursor = toVideoMs(scenStart);
|
|
256
|
+
return [
|
|
257
|
+
{
|
|
258
|
+
id: `scenario-${i + 1}-intro`,
|
|
259
|
+
text: `Scenario ${i + 1}: ${s.name}. ${s.description.slice(0, 120)}`,
|
|
260
|
+
startMs: stepCursor,
|
|
261
|
+
},
|
|
262
|
+
...s.steps.slice(0, 5).map((step, j) => {
|
|
263
|
+
const start = stepCursor;
|
|
264
|
+
stepCursor += scenStepTimings[j] ?? 2000;
|
|
265
|
+
return {
|
|
266
|
+
id: `scenario-${i + 1}-step-${j + 1}`,
|
|
267
|
+
text: `Step ${j + 1}: ${step.slice(0, 150)}`,
|
|
268
|
+
startMs: start,
|
|
269
|
+
};
|
|
270
|
+
}),
|
|
271
|
+
];
|
|
272
|
+
}),
|
|
273
|
+
{ id: "outro", text: `QA session complete. Report and video evidence saved.`, startMs: toVideoMs(Date.now()) },
|
|
274
|
+
];
|
|
275
|
+
|
|
276
|
+
try {
|
|
277
|
+
console.log(`\n[3d/5] Generating narration from real timings…`);
|
|
278
|
+
const narration = await generateNarration(narrationSegments, outputDir);
|
|
279
|
+
if (narration) {
|
|
280
|
+
const finalPath = path.join(outputDir, `qa-${parsed.number}.mp4`);
|
|
281
|
+
await postMix(videoPath, narration.trackPath, narration.metaPath, 0, finalPath);
|
|
241
282
|
videoPath = finalPath;
|
|
242
|
-
} catch (err) {
|
|
243
|
-
console.log(` [post-mix] Failed: ${String(err).slice(0, 200)}`);
|
|
244
283
|
}
|
|
284
|
+
} catch (err) {
|
|
285
|
+
console.log(` [narration] Failed: ${String(err).slice(0, 200)}`);
|
|
245
286
|
}
|
|
246
287
|
}
|
|
247
288
|
|
package/src/agent/research.ts
CHANGED
|
@@ -1,49 +1,5 @@
|
|
|
1
|
-
import { $ } from "bun";
|
|
2
1
|
import type { PRInfo, IssueInfo } from "../utils/github";
|
|
3
|
-
|
|
4
|
-
const OPENROUTER_KEY = process.env.OPENROUTER_API_KEY ?? "";
|
|
5
|
-
const OPENROUTER_MODEL = process.env.OPENROUTER_MODEL ?? "openai/gpt-4.5";
|
|
6
|
-
|
|
7
|
-
async function callClaude(prompt: string): Promise<string> {
|
|
8
|
-
// Prefer OpenRouter
|
|
9
|
-
if (OPENROUTER_KEY) {
|
|
10
|
-
const res = await fetch("https://openrouter.ai/api/v1/chat/completions", {
|
|
11
|
-
method: "POST",
|
|
12
|
-
headers: { Authorization: `Bearer ${OPENROUTER_KEY}`, "content-type": "application/json" },
|
|
13
|
-
body: JSON.stringify({
|
|
14
|
-
model: OPENROUTER_MODEL,
|
|
15
|
-
messages: [{ role: "user", content: prompt }],
|
|
16
|
-
max_tokens: 4096,
|
|
17
|
-
}),
|
|
18
|
-
});
|
|
19
|
-
const json = (await res.json()) as any;
|
|
20
|
-
if (json.choices?.[0]?.message?.content) return json.choices[0].message.content;
|
|
21
|
-
console.log(` âš OpenRouter: ${json.error?.message?.slice(0, 80) ?? "empty response"}`);
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
// Fallback: Anthropic SDK
|
|
25
|
-
const apiKey = process.env.ANTHROPIC_API_KEY_QA ?? process.env.ANTHROPIC_API_KEY;
|
|
26
|
-
if (apiKey) {
|
|
27
|
-
const Anthropic = (await import("@anthropic-ai/sdk")).default;
|
|
28
|
-
const client = new Anthropic({ apiKey });
|
|
29
|
-
const response = await client.messages.create({
|
|
30
|
-
model: "claude-opus-4-6",
|
|
31
|
-
max_tokens: 4096,
|
|
32
|
-
messages: [{ role: "user", content: prompt }],
|
|
33
|
-
});
|
|
34
|
-
return response.content[0].type === "text" ? response.content[0].text : "";
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
// Last resort: claude CLI
|
|
38
|
-
const proc = Bun.spawn(["claude", "--print", "--model", "claude-opus-4-6"], {
|
|
39
|
-
stdin: new TextEncoder().encode(prompt),
|
|
40
|
-
stdout: "pipe",
|
|
41
|
-
stderr: "pipe",
|
|
42
|
-
});
|
|
43
|
-
const output = await new Response(proc.stdout).text();
|
|
44
|
-
await proc.exited;
|
|
45
|
-
return output;
|
|
46
|
-
}
|
|
2
|
+
import { callLLM as callClaude } from "../utils/llm";
|
|
47
3
|
|
|
48
4
|
/** Extract JSON from Claude response, handling code blocks and markdown wrapping */
|
|
49
5
|
function parseResearchJSON(text: string): ResearchResult {
|
|
@@ -15,6 +15,9 @@ const GEMINI_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
|
|
|
15
15
|
export interface NarrationSegment {
|
|
16
16
|
id: string;
|
|
17
17
|
text: string;
|
|
18
|
+
/** When this segment should start in the video (ms from recording start).
|
|
19
|
+
* If omitted, segments are concatenated sequentially. */
|
|
20
|
+
startMs?: number;
|
|
18
21
|
}
|
|
19
22
|
|
|
20
23
|
export interface NarrationResult {
|
|
@@ -166,9 +169,13 @@ export async function generateNarration(
|
|
|
166
169
|
|
|
167
170
|
const totalDurationMs = meta.reduce((sum, m) => sum + m.durationMs, 0);
|
|
168
171
|
|
|
169
|
-
// Save meta for subtitle generation
|
|
172
|
+
// Save meta for subtitle generation — include startMs for timed post-mix
|
|
170
173
|
const metaPath = path.join(narrationDir, "meta.json");
|
|
171
|
-
|
|
174
|
+
const segmentsWithStart = meta.map((m, i) => ({
|
|
175
|
+
...m,
|
|
176
|
+
startMs: segments[i]?.startMs ?? undefined,
|
|
177
|
+
}));
|
|
178
|
+
fs.writeFileSync(metaPath, JSON.stringify({ segments: segmentsWithStart, totalDurationMs }, null, 2));
|
|
172
179
|
|
|
173
180
|
console.log(` [narration] Track: ${trackPath} (${(totalDurationMs / 1000).toFixed(1)}s)`);
|
|
174
181
|
|
package/src/recorder/post-mix.ts
CHANGED
|
@@ -1,72 +1,61 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Post-mix narration audio onto the recorded video
|
|
3
|
-
* Uses ffmpeg adelay filter for sync — single offset = (demo_start - ffmpeg_start) ms.
|
|
2
|
+
* Post-mix narration audio onto the recorded video.
|
|
4
3
|
*
|
|
5
|
-
*
|
|
4
|
+
* Two modes:
|
|
5
|
+
* - Timed: each narration segment placed at its measured video timestamp (startMs).
|
|
6
|
+
* - Sequential (fallback): concatenated track delayed by offsetMs.
|
|
7
|
+
*
|
|
8
|
+
* Subtitles are embedded as mov_text soft stream (visible in VLC, browsers, Gemini)
|
|
9
|
+
* AND written as a VTT sidecar for the web player <track> element.
|
|
6
10
|
*/
|
|
7
11
|
import { $ } from "bun";
|
|
8
12
|
import * as fs from "fs";
|
|
9
13
|
import * as path from "path";
|
|
10
14
|
|
|
15
|
+
interface MetaSegment {
|
|
16
|
+
id: string;
|
|
17
|
+
text: string;
|
|
18
|
+
durationMs: number;
|
|
19
|
+
startMs?: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
11
22
|
interface Meta {
|
|
12
|
-
segments:
|
|
23
|
+
segments: MetaSegment[];
|
|
13
24
|
totalDurationMs: number;
|
|
14
25
|
}
|
|
15
26
|
|
|
16
|
-
/** Format ms as SRT timestamp HH:MM:SS,mmm */
|
|
17
27
|
function srtTime(ms: number): string {
|
|
18
28
|
const h = Math.floor(ms / 3600000);
|
|
19
29
|
const m = Math.floor((ms % 3600000) / 60000);
|
|
20
30
|
const s = Math.floor((ms % 60000) / 1000);
|
|
21
|
-
const
|
|
22
|
-
return `${String(h).padStart(2,
|
|
31
|
+
const r = ms % 1000;
|
|
32
|
+
return `${String(h).padStart(2,"0")}:${String(m).padStart(2,"0")}:${String(s).padStart(2,"0")},${String(r).padStart(3,"0")}`;
|
|
23
33
|
}
|
|
24
34
|
|
|
25
|
-
/** Format ms as WebVTT timestamp HH:MM:SS.mmm */
|
|
26
35
|
function vttTime(ms: number): string {
|
|
27
36
|
return srtTime(ms).replace(",", ".");
|
|
28
37
|
}
|
|
29
38
|
|
|
30
|
-
/**
|
|
31
|
-
function
|
|
32
|
-
const
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
const start = cursor;
|
|
36
|
-
const end = cursor + seg.durationMs;
|
|
37
|
-
lines.push(String(i + 1));
|
|
38
|
-
lines.push(`${srtTime(start)} --> ${srtTime(end)}`);
|
|
39
|
-
lines.push(seg.text);
|
|
40
|
-
lines.push("");
|
|
41
|
-
cursor = end;
|
|
42
|
-
});
|
|
43
|
-
fs.writeFileSync(outPath, lines.join("\n"));
|
|
44
|
-
}
|
|
39
|
+
/** Write narration.srt + narration.vtt to outDir. Returns srtPath. */
|
|
40
|
+
function writeSubtitleFiles(meta: Meta, fallbackOffsetMs: number, outDir: string): string {
|
|
41
|
+
const srtLines: string[] = [];
|
|
42
|
+
const vttLines: string[] = ["WEBVTT", ""];
|
|
43
|
+
let cursor = fallbackOffsetMs;
|
|
45
44
|
|
|
46
|
-
/** Generate WebVTT subtitle file from meta + initial offset (browser-native, no libass) */
|
|
47
|
-
export function generateVtt(meta: Meta, offsetMs: number, outPath: string): void {
|
|
48
|
-
const lines: string[] = ["WEBVTT", ""];
|
|
49
|
-
let cursor = offsetMs;
|
|
50
45
|
meta.segments.forEach((seg, i) => {
|
|
51
|
-
const start = cursor;
|
|
52
|
-
const end =
|
|
53
|
-
lines.push(String(i + 1));
|
|
54
|
-
lines.push(`${vttTime(start)} --> ${vttTime(end)}`);
|
|
55
|
-
lines.push(seg.text);
|
|
56
|
-
lines.push("");
|
|
46
|
+
const start = seg.startMs ?? cursor;
|
|
47
|
+
const end = start + seg.durationMs;
|
|
57
48
|
cursor = end;
|
|
49
|
+
srtLines.push(String(i + 1), `${srtTime(start)} --> ${srtTime(end)}`, seg.text, "");
|
|
50
|
+
vttLines.push(String(i + 1), `${vttTime(start)} --> ${vttTime(end)}`, seg.text, "");
|
|
58
51
|
});
|
|
59
|
-
|
|
52
|
+
|
|
53
|
+
const srtPath = path.join(outDir, "narration.srt");
|
|
54
|
+
fs.writeFileSync(srtPath, srtLines.join("\n"));
|
|
55
|
+
fs.writeFileSync(path.join(outDir, "narration.vtt"), vttLines.join("\n"));
|
|
56
|
+
return srtPath;
|
|
60
57
|
}
|
|
61
58
|
|
|
62
|
-
/**
|
|
63
|
-
* Mix audio + subtitles onto video.
|
|
64
|
-
* @param videoPath path to silent recorded video (webm/mp4)
|
|
65
|
-
* @param trackPath narration_track.wav from generateNarration
|
|
66
|
-
* @param metaPath meta.json from generateNarration
|
|
67
|
-
* @param offsetMs delay to apply to audio (when narration starts in video timeline)
|
|
68
|
-
* @param outPath output video path
|
|
69
|
-
*/
|
|
70
59
|
export async function postMix(
|
|
71
60
|
videoPath: string,
|
|
72
61
|
trackPath: string,
|
|
@@ -75,24 +64,55 @@ export async function postMix(
|
|
|
75
64
|
outPath: string
|
|
76
65
|
): Promise<void> {
|
|
77
66
|
const meta: Meta = JSON.parse(fs.readFileSync(metaPath, "utf-8"));
|
|
67
|
+
const outDir = path.dirname(outPath);
|
|
78
68
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
69
|
+
const timedMode = meta.segments.length > 0 && meta.segments.every((s) => s.startMs != null);
|
|
70
|
+
console.log(` [post-mix] Mode: ${timedMode ? "timed" : "sequential"} | ${meta.segments.length} segments`);
|
|
71
|
+
|
|
72
|
+
// Write subtitle files first (SRT embedded into mp4, VTT served as sidecar)
|
|
73
|
+
const srtPath = writeSubtitleFiles(meta, offsetMs, outDir);
|
|
83
74
|
|
|
84
|
-
|
|
85
|
-
|
|
75
|
+
if (timedMode) {
|
|
76
|
+
const narrationDir = path.dirname(trackPath);
|
|
77
|
+
const segWavs = meta.segments.map((s) => path.join(narrationDir, `${s.id}.wav`));
|
|
78
|
+
const missing = segWavs.filter((p) => !fs.existsSync(p));
|
|
86
79
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
80
|
+
if (missing.length === 0) {
|
|
81
|
+
// Build per-segment adelay filter
|
|
82
|
+
const audioInputs = segWavs.flatMap((p) => ["-i", p]);
|
|
83
|
+
const n = meta.segments.length;
|
|
84
|
+
const delays = meta.segments.map((s, i) =>
|
|
85
|
+
`[${i + 1}:a]adelay=${s.startMs}|${s.startMs}[a${i}]`
|
|
86
|
+
).join(";");
|
|
87
|
+
const mixIn = meta.segments.map((_, i) => `[a${i}]`).join("");
|
|
88
|
+
const audioFilter = `${delays};${mixIn}amix=inputs=${n}:normalize=0[aout]`;
|
|
89
|
+
const srtInputIdx = n + 1;
|
|
93
90
|
|
|
94
|
-
|
|
95
|
-
|
|
91
|
+
console.log(` [post-mix] Timed mix + subtitle embed…`);
|
|
92
|
+
await $`ffmpeg -y -i ${videoPath} ${audioInputs} -i ${srtPath} \
|
|
93
|
+
-filter_complex ${audioFilter} \
|
|
94
|
+
-map 0:v -map [aout] -map ${String(srtInputIdx)}:s \
|
|
95
|
+
-c:v libx264 -preset fast -pix_fmt yuv420p \
|
|
96
|
+
-c:a aac -b:a 128k \
|
|
97
|
+
-c:s mov_text -metadata:s:s:0 language=eng \
|
|
98
|
+
-shortest ${outPath}`.quiet();
|
|
99
|
+
|
|
100
|
+
console.log(` [post-mix] ✓ ${outPath}`);
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
console.log(` [post-mix] Missing ${missing.length} WAVs — falling back to sequential`);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Sequential mode
|
|
107
|
+
const adelay = `${offsetMs}|${offsetMs}`;
|
|
108
|
+
console.log(` [post-mix] Sequential mix + subtitle embed…`);
|
|
109
|
+
await $`ffmpeg -y -i ${videoPath} -i ${trackPath} -i ${srtPath} \
|
|
110
|
+
-filter_complex ${`[1:a]adelay=${adelay}[aout]`} \
|
|
111
|
+
-map 0:v -map [aout] -map 2:s \
|
|
112
|
+
-c:v libx264 -preset fast -pix_fmt yuv420p \
|
|
113
|
+
-c:a aac -b:a 128k \
|
|
114
|
+
-c:s mov_text -metadata:s:s:0 language=eng \
|
|
115
|
+
-shortest ${outPath}`.quiet();
|
|
96
116
|
|
|
97
|
-
console.log(` [post-mix]
|
|
117
|
+
console.log(` [post-mix] ✓ ${outPath}`);
|
|
98
118
|
}
|
package/src/utils/llm.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
const OPENROUTER_KEY = process.env.OPENROUTER_API_KEY ?? "";
|
|
2
|
+
const OPENROUTER_MODEL = process.env.OPENROUTER_MODEL ?? "openai/gpt-4.5";
|
|
3
|
+
|
|
4
|
+
export async function callLLM(prompt: string): Promise<string> {
|
|
5
|
+
if (OPENROUTER_KEY) {
|
|
6
|
+
const res = await fetch("https://openrouter.ai/api/v1/chat/completions", {
|
|
7
|
+
method: "POST",
|
|
8
|
+
headers: { Authorization: `Bearer ${OPENROUTER_KEY}`, "content-type": "application/json" },
|
|
9
|
+
body: JSON.stringify({
|
|
10
|
+
model: OPENROUTER_MODEL,
|
|
11
|
+
messages: [{ role: "user", content: prompt }],
|
|
12
|
+
max_tokens: 4096,
|
|
13
|
+
}),
|
|
14
|
+
});
|
|
15
|
+
const json = (await res.json()) as any;
|
|
16
|
+
if (json.choices?.[0]?.message?.content) return json.choices[0].message.content;
|
|
17
|
+
console.log(` âš OpenRouter: ${json.error?.message?.slice(0, 80) ?? "empty response"}`);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const apiKey = process.env.ANTHROPIC_API_KEY_QA ?? process.env.ANTHROPIC_API_KEY;
|
|
21
|
+
if (apiKey) {
|
|
22
|
+
const Anthropic = (await import("@anthropic-ai/sdk")).default;
|
|
23
|
+
const client = new Anthropic({ apiKey });
|
|
24
|
+
const response = await client.messages.create({
|
|
25
|
+
model: "claude-opus-4-6",
|
|
26
|
+
max_tokens: 4096,
|
|
27
|
+
messages: [{ role: "user", content: prompt }],
|
|
28
|
+
});
|
|
29
|
+
const block = response.content[0];
|
|
30
|
+
return block?.type === "text" ? block.text : "";
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const proc = Bun.spawn(["claude", "--print", "--model", "claude-opus-4-6"], {
|
|
34
|
+
stdin: new TextEncoder().encode(prompt),
|
|
35
|
+
stdout: "pipe",
|
|
36
|
+
stderr: "pipe",
|
|
37
|
+
});
|
|
38
|
+
const output = await new Response(proc.stdout).text();
|
|
39
|
+
await proc.exited;
|
|
40
|
+
return output;
|
|
41
|
+
}
|