comfy-qa 1.5.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "comfy-qa",
3
- "version": "1.5.0",
3
+ "version": "1.5.2",
4
4
  "description": "ComfyUI QA automation CLI",
5
5
  "repository": {
6
6
  "type": "git",
@@ -80,46 +80,48 @@ interface ResearchResults {
80
80
  const ANTHROPIC_KEY = process.env.ANTHROPIC_API_KEY_QA ?? process.env.ANTHROPIC_API_KEY ?? "";
81
81
  const OPENROUTER_KEY = process.env.OPENROUTER_API_KEY ?? "";
82
82
 
83
+ import Anthropic from "@anthropic-ai/sdk";
84
+
85
+ const anthropicClient = ANTHROPIC_KEY ? new Anthropic({ apiKey: ANTHROPIC_KEY, timeout: 60_000 }) : null;
86
+
83
87
  async function callLLM(system: string, messages: any[]): Promise<string> {
84
- if (ANTHROPIC_KEY) {
88
+ if (anthropicClient) {
85
89
  try {
86
- const res = await fetch("https://api.anthropic.com/v1/messages", {
90
+ const res = await anthropicClient.messages.create({
91
+ model: "claude-sonnet-4-20250514",
92
+ max_tokens: 8192,
93
+ system,
94
+ messages,
95
+ });
96
+ return res.content?.[0]?.type === "text" ? res.content[0].text : "";
97
+ } catch (err: any) {
98
+ console.log(` ⚠ Anthropic SDK: ${err.message?.slice(0, 80)}`);
99
+ }
100
+ }
101
+
102
+ if (OPENROUTER_KEY) {
103
+ try {
104
+ const controller = new AbortController();
105
+ const timer = setTimeout(() => controller.abort(), 60_000);
106
+ const res = await fetch("https://openrouter.ai/api/v1/chat/completions", {
87
107
  method: "POST",
88
- headers: {
89
- "x-api-key": ANTHROPIC_KEY,
90
- "anthropic-version": "2023-06-01",
91
- "content-type": "application/json",
92
- },
108
+ signal: controller.signal,
109
+ headers: { Authorization: `Bearer ${OPENROUTER_KEY}`, "content-type": "application/json" },
93
110
  body: JSON.stringify({
94
- model: "claude-sonnet-4-20250514",
111
+ model: "anthropic/claude-sonnet-4-20250514",
112
+ messages: [{ role: "system", content: system }, ...messages],
95
113
  max_tokens: 8192,
96
- system,
97
- messages,
98
114
  }),
99
115
  });
116
+ clearTimeout(timer);
100
117
  const json = (await res.json()) as any;
101
- return json.content?.[0]?.text ?? "";
102
- } catch {}
103
- }
104
-
105
- if (OPENROUTER_KEY) {
106
- const res = await fetch("https://openrouter.ai/api/v1/chat/completions", {
107
- method: "POST",
108
- headers: {
109
- Authorization: `Bearer ${OPENROUTER_KEY}`,
110
- "content-type": "application/json",
111
- },
112
- body: JSON.stringify({
113
- model: "anthropic/claude-sonnet-4-20250514",
114
- messages: [{ role: "system", content: system }, ...messages],
115
- max_tokens: 2048,
116
- }),
117
- });
118
- const json = (await res.json()) as any;
119
- return json.choices?.[0]?.message?.content ?? "";
118
+ return json.choices?.[0]?.message?.content ?? "";
119
+ } catch (err: any) {
120
+ console.log(` ⚠ OpenRouter: ${err.message?.slice(0, 60)}`);
121
+ }
120
122
  }
121
123
 
122
- throw new Error("No API key (ANTHROPIC_API_KEY_QA or OPENROUTER_API_KEY)");
124
+ return "";
123
125
  }
124
126
 
125
127
  // ---------------------------------------------------------------------------
@@ -229,20 +231,33 @@ async function testOperation(
229
231
  try {
230
232
  const state = await captureState(page);
231
233
 
232
- const systemPrompt = `You are a QA tester. Test a specific operation on a website.
234
+ const systemPrompt = `You are a QA tester recording a video demo of a website.
233
235
 
234
236
  Product: ${checklist.product}
235
237
 
236
238
  RULES:
237
239
  - Headless browser, NO URL bar. Use {"type": "goto", "text": "url"} to navigate.
238
240
  - Use simple CSS selectors. Maximum 5 actions.
239
- - Set "success": true ONLY if success criteria is met in the current state.
240
- - If content is already visible, set "success": true with empty actions.
241
- - On retry, try a different approach.
241
+ - Set "success": true if the success criteria is met in the current state.
242
+ - ALWAYS include at least 1 visual action (safeMove, hover, scroll) so the video shows
243
+ something happening. Even if content is already visible, move the cursor to it so the
244
+ viewer's eye is drawn to the relevant element.
245
+ - For "read" operations: use safeMove or hover to highlight the relevant element.
246
+ - For "create"/"update"/"delete" operations: perform the actual action (click, type).
247
+ - On retry, try a different selector approach.
248
+
249
+ Action types:
250
+ - {"type": "goto", "text": "url"} — navigate (use absolute URL)
251
+ - {"type": "safeMove", "selector": "..."} — move cursor to element (visual)
252
+ - {"type": "hover", "selector": "..."} — hover over element (visual)
253
+ - {"type": "scroll", "value": 300} — scroll down N pixels (visual)
254
+ - {"type": "click", "selector": "..."} — click element
255
+ - {"type": "type", "selector": "...", "text": "..."} — type text
256
+ - {"type": "wait", "value": 1000} — wait N ms
242
257
 
243
258
  Respond with ONLY JSON:
244
259
  {
245
- "actions": [{"type": "click", "selector": "..."}],
260
+ "actions": [{"type": "safeMove", "selector": "h1"}],
246
261
  "success": true/false,
247
262
  "observation": "what I see"
248
263
  }`;
@@ -464,6 +479,34 @@ function generateScorecardHtml(results: ResearchResults, checklist: Checklist):
464
479
  </body></html>`;
465
480
  }
466
481
 
482
+ /**
483
+ * Build a multi-sentence scorecard narration that takes ~12-15s to read
484
+ * so the scorecard stays on screen long enough to be readable.
485
+ */
486
+ function buildScorecardNarration(results: ResearchResults, checklist: Checklist): string {
487
+ const parts: string[] = [];
488
+ parts.push(`Here are the final QA results for ${checklist.product}.`);
489
+ parts.push(`Out of ${results.totalOperations} operations tested, ${results.totalPassed} passed, giving an overall score of ${results.scorePercent} percent.`);
490
+
491
+ const passed = results.features.filter(f => f.passed === f.total);
492
+ const partial = results.features.filter(f => f.passed < f.total);
493
+
494
+ if (passed.length > 0) {
495
+ const names = passed.map(f => f.name).join(", ");
496
+ parts.push(`The following features work as expected: ${names}.`);
497
+ }
498
+ if (partial.length > 0) {
499
+ const details = partial.map(f => {
500
+ const failedOps = f.operations.filter(o => !o.success).map(o => o.id).join(", ");
501
+ return `${f.name} scored ${f.score} — failing operations include ${failedOps}`;
502
+ }).join("; ");
503
+ parts.push(`Partial coverage in: ${details}.`);
504
+ }
505
+
506
+ parts.push(`This video serves as evidence of the current product state. Failing operations are demonstrated as bugs to be fixed.`);
507
+ return parts.join(" ");
508
+ }
509
+
467
510
  function escapeHtml(s: string): string {
468
511
  return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
469
512
  }
@@ -631,7 +674,7 @@ test("${slug} QA evidence", async ({ page }) => {
631
674
  ${segments.join("\n")}
632
675
 
633
676
  // Render the full scorecard as the last segment (visible for ~8s)
634
- .segment("Here are the final QA results for this product.", {
677
+ .segment(${JSON.stringify(buildScorecardNarration(results, checklist))}, {
635
678
  setup: async () => {
636
679
  await page.setContent(SCORECARD_HTML, { waitUntil: "domcontentloaded" });
637
680
  await page.waitForTimeout(500);
@@ -670,7 +713,10 @@ async function runSpec(specPath: string, label: string): Promise<{ ok: boolean;
670
713
  console.log(`\n${label}\n Running: bunx playwright test ${specPath}\n`);
671
714
  try {
672
715
  const result = await $`bunx playwright test ${specPath} --reporter=list 2>&1`.text();
673
- const ok = !result.includes("failed");
716
+ // Match Playwright's summary line: "N passed" or "N failed"
717
+ const passMatch = result.match(/(\d+) passed/);
718
+ const failMatch = result.match(/(\d+) failed/);
719
+ const ok = passMatch !== null && (failMatch === null || parseInt(failMatch[1]) === 0);
674
720
  console.log(result.slice(-1000));
675
721
  return { ok, output: result };
676
722
  } catch (err: any) {