@m8i-51/shoal 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +121 -0
- package/bin/shoal.js +56 -0
- package/framework/__tests__/coverage.test.ts +232 -0
- package/framework/__tests__/report.test.ts +154 -0
- package/framework/account-manager.ts +414 -0
- package/framework/agent-loop.ts +103 -0
- package/framework/agent-store.ts +47 -0
- package/framework/cost.ts +91 -0
- package/framework/coverage.ts +157 -0
- package/framework/findings.ts +53 -0
- package/framework/github.ts +64 -0
- package/framework/llm-client.ts +507 -0
- package/framework/observation.ts +182 -0
- package/framework/org-designer.ts +85 -0
- package/framework/product-discovery.ts +327 -0
- package/framework/report.ts +276 -0
- package/framework/scenario-designer.ts +141 -0
- package/framework/triage.ts +208 -0
- package/framework/types.ts +80 -0
- package/package.json +55 -0
- package/run.ts +1213 -0
- package/server/index.ts +227 -0
- package/server/runner.ts +125 -0
- package/server/runs.ts +103 -0
- package/targets/example.ts +55 -0
- package/targets/index.ts +17 -0
- package/targets/noop.ts +6 -0
- package/targets/types.ts +19 -0
- package/triage-only.ts +57 -0
- package/web/dist/assets/index-CD6EJ_1O.js +68 -0
- package/web/dist/assets/index-DPLuVm2n.css +1 -0
- package/web/dist/index.html +13 -0
package/run.ts
ADDED
|
@@ -0,0 +1,1213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* run.ts — Multi-agent runner
|
|
3
|
+
* hr → product discovery → api agents + browser agents → triage
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* ANTHROPIC_API_KEY=xxx GITHUB_TOKEN=xxx GITHUB_REPO=owner/repo npx tsx run.ts
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { config as loadEnv } from "dotenv";
|
|
10
|
+
loadEnv({ override: true }); // .env を常に優先(継承した環境変数を上書き)
|
|
11
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
12
|
+
import { chromium, type Page } from "playwright";
|
|
13
|
+
import * as fs from "fs";
|
|
14
|
+
import * as path from "path";
|
|
15
|
+
import { createLLMClient } from "./framework/llm-client";
|
|
16
|
+
import type { Tool } from "./framework/llm-client";
|
|
17
|
+
import { createMessageWithRetry, runAgentLoop, sleep, rateLimitRetries } from "./framework/agent-loop";
|
|
18
|
+
import { collectedFindings, initRunLog, saveRunLog, saveFinding, runLog } from "./framework/findings";
|
|
19
|
+
import { loadAgents, addAgent, retireAgent } from "./framework/agent-store";
|
|
20
|
+
import { updateCoverage, computeWeightedSummary } from "./framework/coverage";
|
|
21
|
+
import { postGitHubIssue, fetchClosedIssues, fetchOpenIssues } from "./framework/github";
|
|
22
|
+
import {
|
|
23
|
+
setupObservation,
|
|
24
|
+
getRecentConsoleLogs,
|
|
25
|
+
getRecentNetworkErrors,
|
|
26
|
+
buildObservationWarning,
|
|
27
|
+
readPageText,
|
|
28
|
+
readAccessibilityTree,
|
|
29
|
+
saveSnapshotBeforeAction,
|
|
30
|
+
getDiffFromSnapshot,
|
|
31
|
+
type ObservationState,
|
|
32
|
+
} from "./framework/observation";
|
|
33
|
+
import { discoverProduct, loadCachedSpec, type ProductSpec } from "./framework/product-discovery";
|
|
34
|
+
import { designOrg, UNIVERSAL_LENSES } from "./framework/org-designer";
|
|
35
|
+
import { designScenarios, type Scenario, type ScenarioOutcome } from "./framework/scenario-designer";
|
|
36
|
+
import { runTriageAgent } from "./framework/triage";
|
|
37
|
+
import { generateReport } from "./framework/report";
|
|
38
|
+
import type { AgentLog, Finding, RegressionCheck } from "./framework/types";
|
|
39
|
+
import { loadTarget } from "./targets";
|
|
40
|
+
import { runAccountManager, loadTestAccounts, type TestAccount } from "./framework/account-manager";
|
|
41
|
+
import { estimateCost, formatCostUSD } from "./framework/cost";
|
|
42
|
+
|
|
43
|
+
const BASE_URL = process.env.BASE_URL ?? "http://localhost:3000";
|
|
44
|
+
const GITHUB_TOKEN = process.env.GITHUB_TOKEN ?? "";
|
|
45
|
+
const GITHUB_REPO = process.env.GITHUB_REPO ?? "";
|
|
46
|
+
const githubOptions = { token: GITHUB_TOKEN, repo: GITHUB_REPO };
|
|
47
|
+
|
|
48
|
+
const TARGET = process.env.TARGET ?? "none";
|
|
49
|
+
let targetConfig = loadTarget(TARGET);
|
|
50
|
+
|
|
51
|
+
// Load shoal.config.ts / .js / .mjs from the working directory if present
|
|
52
|
+
for (const name of ["shoal.config.ts", "shoal.config.js", "shoal.config.mjs"]) {
|
|
53
|
+
const cfgPath = path.join(process.cwd(), name);
|
|
54
|
+
if (fs.existsSync(cfgPath)) {
|
|
55
|
+
try {
|
|
56
|
+
const mod = await import(cfgPath);
|
|
57
|
+
const t = mod.target ?? mod.default?.target;
|
|
58
|
+
if (t?.appTools && typeof t?.execute === "function") {
|
|
59
|
+
targetConfig = t;
|
|
60
|
+
console.log(`[config] loaded: ${name}`);
|
|
61
|
+
} else {
|
|
62
|
+
console.warn(`[config] ${name} found but does not export a valid target`);
|
|
63
|
+
}
|
|
64
|
+
} catch (e) {
|
|
65
|
+
console.warn(`[config] failed to load ${name}:`, e);
|
|
66
|
+
}
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// skip exploration when no API tools are configured
|
|
72
|
+
const MAX_EXPLORERS = targetConfig.appTools.length > 0
|
|
73
|
+
? parseInt(process.env.MAX_EXPLORERS ?? "4", 10)
|
|
74
|
+
: 0;
|
|
75
|
+
const MAX_BROWSERS = parseInt(process.env.MAX_BROWSERS ?? "2", 10);
|
|
76
|
+
|
|
77
|
+
const { client, defaultModel, provider: llmProvider } = createLLMClient();
|
|
78
|
+
|
|
79
|
+
// ================================================================
|
|
80
|
+
// Screenshots
|
|
81
|
+
// ================================================================
|
|
82
|
+
|
|
83
|
+
let screenshotDir: string;
|
|
84
|
+
|
|
85
|
+
function initDirs(): string {
|
|
86
|
+
const runId = `run_${Date.now()}`;
|
|
87
|
+
screenshotDir = path.join(process.cwd(), "logs", "screenshots", runId);
|
|
88
|
+
fs.mkdirSync(screenshotDir, { recursive: true });
|
|
89
|
+
return runId;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async function takeScreenshot(page: Page, label: string): Promise<{ base64: string; filePath: string }> {
|
|
93
|
+
const fileName = `${Date.now()}_${label.replace(/[^a-zA-Z0-9]/g, "_")}.png`;
|
|
94
|
+
const filePath = path.join(screenshotDir, fileName);
|
|
95
|
+
const buffer = await page.screenshot({ type: "png", fullPage: false });
|
|
96
|
+
fs.writeFileSync(filePath, buffer);
|
|
97
|
+
return { base64: buffer.toString("base64"), filePath };
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// ================================================================
|
|
101
|
+
// API agent tools
|
|
102
|
+
// ================================================================
|
|
103
|
+
|
|
104
|
+
const VALID_CATEGORIES = ["ux", "feature-request", "bug", "goal-gap"];
|
|
105
|
+
|
|
106
|
+
const POST_FEEDBACK_TOOL: Tool = {
|
|
107
|
+
name: "post_feedback",
|
|
108
|
+
description: "Record a finding about the app — usability issues, feature requests, or bug-like behavior. / アプリへのフィードバックを記録する",
|
|
109
|
+
input_schema: {
|
|
110
|
+
type: "object",
|
|
111
|
+
properties: {
|
|
112
|
+
title: { type: "string" },
|
|
113
|
+
body: { type: "string" },
|
|
114
|
+
category: { type: "string", enum: ["ux", "feature-request", "bug", "goal-gap"] },
|
|
115
|
+
},
|
|
116
|
+
required: ["title", "body", "category"],
|
|
117
|
+
},
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
const REPORT_REGRESSION_TOOL: Tool = {
|
|
121
|
+
name: "report_regression",
|
|
122
|
+
description: "Report a regression when a previously fixed bug has reappeared as a GitHub Issue. / 修正済みバグの再発をGitHub Issueとして報告する",
|
|
123
|
+
input_schema: {
|
|
124
|
+
type: "object",
|
|
125
|
+
properties: {
|
|
126
|
+
original_issue_number: { type: "number" },
|
|
127
|
+
original_issue_title: { type: "string" },
|
|
128
|
+
title: { type: "string" },
|
|
129
|
+
body: { type: "string" },
|
|
130
|
+
},
|
|
131
|
+
required: ["original_issue_number", "original_issue_title", "title", "body"],
|
|
132
|
+
},
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
const MARK_VERIFIED_TOOL: Tool = {
|
|
136
|
+
name: "mark_verified",
|
|
137
|
+
description: "Record that a closed Issue has been verified as still fixed. / 修正済みIssueが問題なく修正されていることを確認した場合に呼ぶ",
|
|
138
|
+
input_schema: {
|
|
139
|
+
type: "object",
|
|
140
|
+
properties: {
|
|
141
|
+
original_issue_number: { type: "number" },
|
|
142
|
+
original_issue_title: { type: "string" },
|
|
143
|
+
note: { type: "string" },
|
|
144
|
+
},
|
|
145
|
+
required: ["original_issue_number", "original_issue_title", "note"],
|
|
146
|
+
},
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
const POST_OUTCOME_TOOL: Tool = {
|
|
150
|
+
name: "post_outcome",
|
|
151
|
+
description: "Record whether you achieved your scenario goal. Call this at the end of your run if you were given a [Your Task for This Run] section. / [Your Task for This Run] セクションがある場合のみ、run の最後にゴール達成可否を記録する",
|
|
152
|
+
input_schema: {
|
|
153
|
+
type: "object",
|
|
154
|
+
properties: {
|
|
155
|
+
achieved: {
|
|
156
|
+
type: "boolean",
|
|
157
|
+
description: "true if you successfully completed the goal, false if you could not",
|
|
158
|
+
},
|
|
159
|
+
reason: {
|
|
160
|
+
type: "string",
|
|
161
|
+
description: "Brief explanation (1-2 sentences) of why the goal was or was not achieved",
|
|
162
|
+
},
|
|
163
|
+
},
|
|
164
|
+
required: ["achieved", "reason"],
|
|
165
|
+
},
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
const EXPLORER_TOOLS: Tool[] = [...targetConfig.appTools, POST_FEEDBACK_TOOL, POST_OUTCOME_TOOL];
|
|
169
|
+
|
|
170
|
+
function goalsSection(spec: ProductSpec): string {
|
|
171
|
+
if (!spec.appGoals?.length) return "";
|
|
172
|
+
return `\n[App Goals]\nThis app is designed to achieve the following goals. If you find anything that prevents these goals from being met, use category "goal-gap" when posting feedback.\n${spec.appGoals.map((g) => `- ${g}`).join("\n")}\n`;
|
|
173
|
+
}
|
|
174
|
+
const REGRESSION_TOOLS: Tool[] = [...targetConfig.appTools, REPORT_REGRESSION_TOOL, MARK_VERIFIED_TOOL];
|
|
175
|
+
|
|
176
|
+
function makeExecutor(agentLog: AgentLog, scenarioOutcomes: ScenarioOutcome[], scenario?: Scenario) {
|
|
177
|
+
return async (toolName: string, input: Record<string, unknown>): Promise<string> => {
|
|
178
|
+
const startedAt = Date.now();
|
|
179
|
+
let result: unknown;
|
|
180
|
+
try {
|
|
181
|
+
switch (toolName) {
|
|
182
|
+
case "post_outcome": {
|
|
183
|
+
const { achieved, reason } = input as { achieved: boolean; reason: string };
|
|
184
|
+
if (scenario) {
|
|
185
|
+
const outcome: ScenarioOutcome = {
|
|
186
|
+
scenarioId: scenario.id,
|
|
187
|
+
scenarioTitle: scenario.title,
|
|
188
|
+
agentId: agentLog.agentId,
|
|
189
|
+
agentName: agentLog.agentName,
|
|
190
|
+
achieved: Boolean(achieved),
|
|
191
|
+
reason: String(reason),
|
|
192
|
+
};
|
|
193
|
+
scenarioOutcomes.push(outcome);
|
|
194
|
+
console.log(` ${achieved ? "✓" : "✗"} [outcome] "${scenario.title}": ${achieved ? "achieved" : "NOT achieved"} — ${reason}`);
|
|
195
|
+
}
|
|
196
|
+
result = { recorded: true };
|
|
197
|
+
break;
|
|
198
|
+
}
|
|
199
|
+
case "post_feedback": {
|
|
200
|
+
const { title, body, category } = input as { title: string; body: string; category: string };
|
|
201
|
+
const safeCategory = VALID_CATEGORIES.includes(String(category)) ? String(category) : "ux";
|
|
202
|
+
const finding: Finding = {
|
|
203
|
+
id: `${agentLog.agentId}_${Date.now()}`,
|
|
204
|
+
runId: runLog.runId,
|
|
205
|
+
agentId: agentLog.agentId,
|
|
206
|
+
agentName: agentLog.agentName,
|
|
207
|
+
role: agentLog.role,
|
|
208
|
+
title: String(title),
|
|
209
|
+
body: String(body),
|
|
210
|
+
category: safeCategory,
|
|
211
|
+
timestamp: new Date().toISOString(),
|
|
212
|
+
};
|
|
213
|
+
saveFinding(finding);
|
|
214
|
+
agentLog.issuesPosted.push({ title: String(title), category: safeCategory, url: null });
|
|
215
|
+
console.log(` → [findings] saved: "${title}" (${safeCategory})`);
|
|
216
|
+
result = { saved: true, findingId: finding.id };
|
|
217
|
+
break;
|
|
218
|
+
}
|
|
219
|
+
case "report_regression": {
|
|
220
|
+
const { original_issue_number, original_issue_title, title, body } = input as {
|
|
221
|
+
original_issue_number: number; original_issue_title: string; title: string; body: string;
|
|
222
|
+
};
|
|
223
|
+
const url = await postGitHubIssue(
|
|
224
|
+
`[regression] ${title}`,
|
|
225
|
+
`**Regression:** #${original_issue_number} "${original_issue_title}" has reappeared.\n\n${body}\n\n---\n*This Issue was auto-generated by an AI regression agent*`,
|
|
226
|
+
["regression", "feedback-agent"],
|
|
227
|
+
githubOptions
|
|
228
|
+
);
|
|
229
|
+
const check: RegressionCheck = {
|
|
230
|
+
issueNumber: Number(original_issue_number),
|
|
231
|
+
issueTitle: String(original_issue_title),
|
|
232
|
+
status: "regressed",
|
|
233
|
+
note: String(body),
|
|
234
|
+
regressionUrl: url,
|
|
235
|
+
};
|
|
236
|
+
agentLog.regressionChecks.push(check);
|
|
237
|
+
runLog.summary.regressionChecked++;
|
|
238
|
+
runLog.summary.regressionFailed++;
|
|
239
|
+
result = { reported: true, url };
|
|
240
|
+
break;
|
|
241
|
+
}
|
|
242
|
+
case "mark_verified": {
|
|
243
|
+
const { original_issue_number, original_issue_title, note } = input as {
|
|
244
|
+
original_issue_number: number; original_issue_title: string; note: string;
|
|
245
|
+
};
|
|
246
|
+
agentLog.regressionChecks.push({
|
|
247
|
+
issueNumber: Number(original_issue_number),
|
|
248
|
+
issueTitle: String(original_issue_title),
|
|
249
|
+
status: "fixed",
|
|
250
|
+
note: String(note),
|
|
251
|
+
regressionUrl: null,
|
|
252
|
+
});
|
|
253
|
+
runLog.summary.regressionChecked++;
|
|
254
|
+
console.log(` ✓ verified: #${original_issue_number} "${original_issue_title}"`);
|
|
255
|
+
result = { verified: true };
|
|
256
|
+
break;
|
|
257
|
+
}
|
|
258
|
+
default:
|
|
259
|
+
result = await targetConfig.execute(toolName, input, agentLog.agentId);
|
|
260
|
+
}
|
|
261
|
+
} catch (e) {
|
|
262
|
+
result = { error: String(e) };
|
|
263
|
+
}
|
|
264
|
+
agentLog.actions.push({
|
|
265
|
+
timestamp: new Date().toISOString(),
|
|
266
|
+
tool: toolName,
|
|
267
|
+
input,
|
|
268
|
+
result,
|
|
269
|
+
durationMs: Date.now() - startedAt,
|
|
270
|
+
});
|
|
271
|
+
runLog.summary.totalActions++;
|
|
272
|
+
return JSON.stringify(result);
|
|
273
|
+
};
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// ================================================================
|
|
277
|
+
// API agents (exploration / regression)
|
|
278
|
+
// ================================================================
|
|
279
|
+
|
|
280
|
+
async function runExplorer(
|
|
281
|
+
agent: { id: string; name: string; persona: string; role: string },
|
|
282
|
+
productSpec: ProductSpec,
|
|
283
|
+
assignment: { scenario?: Scenario; lens?: string } = {},
|
|
284
|
+
scenarioOutcomes: ScenarioOutcome[] = [],
|
|
285
|
+
) {
|
|
286
|
+
const assignmentLabel = assignment.scenario
|
|
287
|
+
? `[scenario: ${assignment.scenario.title.slice(0, 35)}]`
|
|
288
|
+
: assignment.lens
|
|
289
|
+
? `[lens: ${assignment.lens.slice(0, 30)}...]`
|
|
290
|
+
: "[free exploration]";
|
|
291
|
+
console.log(`\n[explorer] ${agent.name} start ${assignmentLabel}`);
|
|
292
|
+
const agentLog: AgentLog = {
|
|
293
|
+
agentType: "explorer",
|
|
294
|
+
agentId: agent.id,
|
|
295
|
+
agentName: agent.name,
|
|
296
|
+
role: agent.role,
|
|
297
|
+
startedAt: new Date().toISOString(),
|
|
298
|
+
completedAt: null,
|
|
299
|
+
status: "completed",
|
|
300
|
+
iterations: 0,
|
|
301
|
+
actions: [],
|
|
302
|
+
issuesPosted: [],
|
|
303
|
+
regressionChecks: [],
|
|
304
|
+
error: null,
|
|
305
|
+
};
|
|
306
|
+
runLog.agents.push(agentLog);
|
|
307
|
+
|
|
308
|
+
const systemPrompt = `You are "${agent.name}".
|
|
309
|
+
Role: ${agent.role}
|
|
310
|
+
Persona: ${agent.persona}
|
|
311
|
+
|
|
312
|
+
You are an employee using "${productSpec.appName}".
|
|
313
|
+
Use the tools to interact with the app.
|
|
314
|
+
|
|
315
|
+
${productSpec.appDescription}
|
|
316
|
+
|
|
317
|
+
If you notice anything inconvenient, a missing feature, or bug-like behavior,
|
|
318
|
+
report it with the post_feedback tool.
|
|
319
|
+
|
|
320
|
+
[Implemented Features]
|
|
321
|
+
${productSpec.features}
|
|
322
|
+
${productSpec.uiFeatures ? `\n[UI-Only Features]\nThese features exist in the UI but may not be reflected in API responses. Keep them in mind when interpreting API results.\n${productSpec.uiFeatures}\n` : ""}${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\n` : ""}${goalsSection(productSpec)}${assignment.scenario
|
|
323
|
+
? `\n[Your Task for This Run]\nTitle: ${assignment.scenario.title}\nYou are: ${assignment.scenario.context}\nGoal: ${assignment.scenario.goal}\nConstraints: ${assignment.scenario.constraints}\n\nFocus on completing this task naturally. Report any issues you encounter along the way.\nWhen done (or if you cannot complete the goal), call post_outcome with achieved=true/false and a brief reason.\n`
|
|
324
|
+
: assignment.lens
|
|
325
|
+
? `\n[Focus Area for This Run]\n${assignment.lens}\nKeep this perspective in mind and prioritize reporting related issues.\n`
|
|
326
|
+
: ""}
|
|
327
|
+
Take 3–5 actions, then finish.`;
|
|
328
|
+
|
|
329
|
+
await runAgentLoop(agentLog, systemPrompt, EXPLORER_TOOLS, client, defaultModel, makeExecutor(agentLog, scenarioOutcomes, assignment.scenario));
|
|
330
|
+
console.log(`[explorer] ${agent.name} done`);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
async function runRegressionAgent(
|
|
334
|
+
agent: { id: string; name: string; persona: string; role: string },
|
|
335
|
+
closedIssues: { number: number; title: string; body: string; labels: string[] }[],
|
|
336
|
+
productSpec: ProductSpec
|
|
337
|
+
) {
|
|
338
|
+
console.log(`\n[regression] ${agent.name} start (${closedIssues.length} issues to check)`);
|
|
339
|
+
const agentLog: AgentLog = {
|
|
340
|
+
agentType: "regression",
|
|
341
|
+
agentId: agent.id,
|
|
342
|
+
agentName: agent.name,
|
|
343
|
+
role: agent.role,
|
|
344
|
+
startedAt: new Date().toISOString(),
|
|
345
|
+
completedAt: null,
|
|
346
|
+
status: "completed",
|
|
347
|
+
iterations: 0,
|
|
348
|
+
actions: [],
|
|
349
|
+
issuesPosted: [],
|
|
350
|
+
regressionChecks: [],
|
|
351
|
+
error: null,
|
|
352
|
+
};
|
|
353
|
+
runLog.agents.push(agentLog);
|
|
354
|
+
|
|
355
|
+
const issueList = closedIssues
|
|
356
|
+
.map((i) => `- Issue #${i.number}: ${i.title}\n ${i.body.slice(0, 200).replace(/\n/g, " ")}`)
|
|
357
|
+
.join("\n");
|
|
358
|
+
|
|
359
|
+
const systemPrompt = `You are "${agent.name}". Act as a QA engineer.
|
|
360
|
+
|
|
361
|
+
The following Issues have been closed as fixed. Verify they are actually fixed.
|
|
362
|
+
|
|
363
|
+
[Issues to Verify]
|
|
364
|
+
${issueList}
|
|
365
|
+
|
|
366
|
+
[Steps]
|
|
367
|
+
1. Read each Issue and perform actions that could reproduce it
|
|
368
|
+
2. If the problem reoccurs, report it with report_regression
|
|
369
|
+
3. If the problem is gone, record it with mark_verified
|
|
370
|
+
4. Finish after checking all items
|
|
371
|
+
|
|
372
|
+
[Reference: Implemented Features]
|
|
373
|
+
${productSpec.features}
|
|
374
|
+
${productSpec.uiFeatures ? `\n[UI-Only Features]\nThese features exist in the UI but may not be reflected in API responses.\n${productSpec.uiFeatures}\n` : ""}${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\n` : ""}${goalsSection(productSpec)}`;
|
|
375
|
+
|
|
376
|
+
await runAgentLoop(agentLog, systemPrompt, REGRESSION_TOOLS, client, defaultModel, makeExecutor(agentLog, []));
|
|
377
|
+
const checked = agentLog.regressionChecks.length;
|
|
378
|
+
const failed = agentLog.regressionChecks.filter((c) => c.status === "regressed").length;
|
|
379
|
+
console.log(`[regression] ${agent.name} done (checked: ${checked} / regressed: ${failed})`);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// ================================================================
|
|
383
|
+
// HR agent
|
|
384
|
+
// ================================================================
|
|
385
|
+
|
|
386
|
+
const HR_TOOLS: Anthropic.Tool[] = [
|
|
387
|
+
{
|
|
388
|
+
name: "get_agents",
|
|
389
|
+
description: "Get the current list of registered agents. / 現在登録されているエージェント一覧を取得する",
|
|
390
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
391
|
+
},
|
|
392
|
+
{
|
|
393
|
+
name: "get_coverage",
|
|
394
|
+
description: "Get a weighted summary of what has been explored across past runs. Use this to identify underrepresented lenses and perspectives before deciding whom to hire. / 過去のrunで何がどれだけ探索されたかの重み付きサマリーを取得する。採用方針の決定前に確認すること",
|
|
395
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
396
|
+
},
|
|
397
|
+
{
|
|
398
|
+
name: "get_open_issues",
|
|
399
|
+
description: "Get the titles and labels of currently open GitHub Issues (known problems). Use this to understand what is already known and recruit agents who are likely to explore DIFFERENT areas. / 現在オープンなGitHub Issueのタイトルとラベルを取得する。既知の問題を把握し、未探索領域を掘れるペルソナを採用するために使う",
|
|
400
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
401
|
+
},
|
|
402
|
+
{
|
|
403
|
+
name: "get_scenarios",
|
|
404
|
+
description: "Get the user test scenarios generated for this run. About 70% of agents will be assigned one of these scenarios — recruit personas whose background and role naturally fit the scenario contexts. / 今回のrunで生成されたユーザーシナリオ一覧を取得する。エージェントの約70%にシナリオが割り当てられるため、シナリオの文脈に自然にフィットするペルソナを採用すること",
|
|
405
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
406
|
+
},
|
|
407
|
+
{
|
|
408
|
+
name: "add_agent",
|
|
409
|
+
description: "Register a new agent (user persona). / 新しいエージェントを登録する",
|
|
410
|
+
input_schema: {
|
|
411
|
+
type: "object",
|
|
412
|
+
properties: {
|
|
413
|
+
name: { type: "string" },
|
|
414
|
+
role: { type: "string" },
|
|
415
|
+
persona: { type: "string" },
|
|
416
|
+
},
|
|
417
|
+
required: ["name", "role", "persona"],
|
|
418
|
+
},
|
|
419
|
+
},
|
|
420
|
+
{
|
|
421
|
+
name: "retire_agent",
|
|
422
|
+
description: "Retire an agent (e.g. due to long tenure). / エージェントを退職させる",
|
|
423
|
+
input_schema: {
|
|
424
|
+
type: "object",
|
|
425
|
+
properties: {
|
|
426
|
+
agentId: { type: "string" },
|
|
427
|
+
reason: { type: "string" },
|
|
428
|
+
},
|
|
429
|
+
required: ["agentId", "reason"],
|
|
430
|
+
},
|
|
431
|
+
},
|
|
432
|
+
];
|
|
433
|
+
|
|
434
|
+
async function runHRAgent(
|
|
435
|
+
productSpec: ProductSpec,
|
|
436
|
+
orgGuidance: string,
|
|
437
|
+
openIssues: { number: number; title: string; labels: string[] }[],
|
|
438
|
+
scenarios: Scenario[],
|
|
439
|
+
testAccounts: TestAccount[] = [],
|
|
440
|
+
): Promise<void> {
|
|
441
|
+
console.log("\n[hr] starting...");
|
|
442
|
+
const messages: Anthropic.MessageParam[] = [
|
|
443
|
+
{ role: "user", content: "Manage agent hiring and retirement." },
|
|
444
|
+
];
|
|
445
|
+
|
|
446
|
+
const accountContext = testAccounts.length > 0
|
|
447
|
+
? `\n[Available Test Accounts (one per role)]\n${testAccounts.map((a) => `- ${a.role}: ${a.email}`).join("\n")}\nWhen recruiting agents, match each persona's role to one of these accounts so they can operate with appropriate permissions.`
|
|
448
|
+
: "";
|
|
449
|
+
|
|
450
|
+
const systemPrompt = `You are the test agent manager for "${productSpec.appName}".
|
|
451
|
+
You recruit and manage agents that simulate real users of the app.
|
|
452
|
+
|
|
453
|
+
[Organization Design Guidelines]
|
|
454
|
+
${orgGuidance}${accountContext}
|
|
455
|
+
|
|
456
|
+
[Steps]
|
|
457
|
+
1. Call get_coverage to review which lenses and categories are underrepresented in past runs
|
|
458
|
+
2. Call get_open_issues to understand what problems are already known — recruit agents likely to find DIFFERENT issues in unexplored areas
|
|
459
|
+
3. Call get_scenarios to see the user test scenarios generated for this run — about 70% of agents will be assigned a scenario, so recruit personas whose background fits those scenarios
|
|
460
|
+
4. Call get_agents to check the current agent roster
|
|
461
|
+
5. Add 2–3 agents with add_agent — balance between scenario-fit personas (step 3), underrepresented lenses (step 1), and unexplored areas (step 2)${testAccounts.length > 0 ? "\n — assign each agent a role that matches one of the available test accounts" : ""}
|
|
462
|
+
6. If there are agents with old createdAt dates (oldest 1–2), retire them with retire_agent`;
|
|
463
|
+
|
|
464
|
+
try {
|
|
465
|
+
let iterations = 0;
|
|
466
|
+
while (iterations < 8) {
|
|
467
|
+
iterations++;
|
|
468
|
+
const response = await createMessageWithRetry(client, {
|
|
469
|
+
model: defaultModel,
|
|
470
|
+
max_tokens: 1024,
|
|
471
|
+
system: systemPrompt,
|
|
472
|
+
tools: HR_TOOLS,
|
|
473
|
+
messages,
|
|
474
|
+
});
|
|
475
|
+
messages.push({ role: "assistant", content: response.content });
|
|
476
|
+
const toolUses = response.content.filter(
|
|
477
|
+
(b): b is Anthropic.ToolUseBlock => b.type === "tool_use"
|
|
478
|
+
);
|
|
479
|
+
if (toolUses.length === 0 || response.stop_reason === "end_turn") break;
|
|
480
|
+
const toolResults: Anthropic.ToolResultBlockParam[] = [];
|
|
481
|
+
for (const toolUse of toolUses) {
|
|
482
|
+
let result: unknown;
|
|
483
|
+
if (toolUse.name === "get_coverage") {
|
|
484
|
+
result = computeWeightedSummary().formatted;
|
|
485
|
+
console.log(" [hr] coverage summary fetched");
|
|
486
|
+
} else if (toolUse.name === "get_open_issues") {
|
|
487
|
+
if (openIssues.length === 0) {
|
|
488
|
+
result = "(no open issues — either GitHub is not configured or there are no known issues yet)";
|
|
489
|
+
} else {
|
|
490
|
+
result = openIssues.map((i) => `- #${i.number}: ${i.title} [${i.labels.join(", ")}]`).join("\n");
|
|
491
|
+
}
|
|
492
|
+
console.log(` [hr] open issues fetched (${openIssues.length})`);
|
|
493
|
+
} else if (toolUse.name === "get_scenarios") {
|
|
494
|
+
if (scenarios.length === 0) {
|
|
495
|
+
result = "(no scenarios generated — all agents will use free-exploration mode)";
|
|
496
|
+
} else {
|
|
497
|
+
result = scenarios.map((s) =>
|
|
498
|
+
`[${s.id}] ${s.title}\n Context: ${s.context}\n Goal: ${s.goal}\n Constraints: ${s.constraints}`
|
|
499
|
+
).join("\n\n");
|
|
500
|
+
}
|
|
501
|
+
console.log(` [hr] scenarios fetched (${scenarios.length})`);
|
|
502
|
+
} else if (toolUse.name === "get_agents") {
|
|
503
|
+
const agents = loadAgents();
|
|
504
|
+
result = agents.map((a) => ({ id: a.id, name: a.name, role: a.role, createdAt: a.createdAt }));
|
|
505
|
+
console.log(` [hr] current agents: ${agents.length}`);
|
|
506
|
+
} else if (toolUse.name === "add_agent") {
|
|
507
|
+
const { name, role, persona } = toolUse.input as { name: string; role: string; persona: string };
|
|
508
|
+
result = addAgent({ name, role, persona });
|
|
509
|
+
console.log(` [hr] hired: ${name} (${role})`);
|
|
510
|
+
} else if (toolUse.name === "retire_agent") {
|
|
511
|
+
const { agentId, reason } = toolUse.input as { agentId: string; reason: string };
|
|
512
|
+
result = { success: retireAgent(agentId) };
|
|
513
|
+
console.log(` [hr] retired: ${agentId} — ${reason}`);
|
|
514
|
+
} else {
|
|
515
|
+
result = { error: "unknown tool" };
|
|
516
|
+
}
|
|
517
|
+
toolResults.push({ type: "tool_result", tool_use_id: toolUse.id, content: JSON.stringify(result) });
|
|
518
|
+
}
|
|
519
|
+
messages.push({ role: "user", content: toolResults });
|
|
520
|
+
}
|
|
521
|
+
console.log("[hr] done");
|
|
522
|
+
} catch (e) {
|
|
523
|
+
console.error("[hr] error:", e);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// ================================================================
|
|
528
|
+
// Browser agent tools
|
|
529
|
+
// ================================================================
|
|
530
|
+
|
|
531
|
+
interface BrowserAction {
|
|
532
|
+
timestamp: string;
|
|
533
|
+
tool: string;
|
|
534
|
+
input: Record<string, unknown>;
|
|
535
|
+
screenshotPath: string | null;
|
|
536
|
+
durationMs: number;
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
interface BrowserAgentLog {
|
|
540
|
+
agentName: string;
|
|
541
|
+
persona: string;
|
|
542
|
+
startedAt: string;
|
|
543
|
+
completedAt: string | null;
|
|
544
|
+
status: "completed" | "error" | "iteration_limit";
|
|
545
|
+
iterations: number;
|
|
546
|
+
actions: BrowserAction[];
|
|
547
|
+
feedbacksSaved: { title: string; category: string; findingId: string }[];
|
|
548
|
+
error: string | null;
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
const TOOLS_THAT_SEND_SCREENSHOT = new Set(["navigate", "post_feedback", "view_screen"]);
|
|
552
|
+
|
|
553
|
+
const BROWSER_TOOLS: Anthropic.Tool[] = [
|
|
554
|
+
...(MAX_EXPLORERS > 0 ? targetConfig.appTools.map((t) => ({ ...t, description: `[API check] ${t.description}` })) : []),
|
|
555
|
+
{
|
|
556
|
+
name: "view_screen",
|
|
557
|
+
description: "Capture the current screen. / 現在の画面を確認する",
|
|
558
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
559
|
+
},
|
|
560
|
+
{
|
|
561
|
+
name: "navigate",
|
|
562
|
+
description: "Navigate to a path. / 指定したパスに移動する",
|
|
563
|
+
input_schema: {
|
|
564
|
+
type: "object",
|
|
565
|
+
properties: { path: { type: "string" } },
|
|
566
|
+
required: ["path"],
|
|
567
|
+
},
|
|
568
|
+
},
|
|
569
|
+
{
|
|
570
|
+
name: "click",
|
|
571
|
+
description: "Click a button, link, or tab on screen. / 画面上の要素をクリックする",
|
|
572
|
+
input_schema: {
|
|
573
|
+
type: "object",
|
|
574
|
+
properties: { description: { type: "string" } },
|
|
575
|
+
required: ["description"],
|
|
576
|
+
},
|
|
577
|
+
},
|
|
578
|
+
{
|
|
579
|
+
name: "fill",
|
|
580
|
+
description: "Type text into an input field. / 入力フィールドにテキストを入力する",
|
|
581
|
+
input_schema: {
|
|
582
|
+
type: "object",
|
|
583
|
+
properties: {
|
|
584
|
+
label: { type: "string" },
|
|
585
|
+
value: { type: "string" },
|
|
586
|
+
},
|
|
587
|
+
required: ["label", "value"],
|
|
588
|
+
},
|
|
589
|
+
},
|
|
590
|
+
{
|
|
591
|
+
name: "select",
|
|
592
|
+
description: "Select an option from a dropdown. / ドロップダウンで選択する",
|
|
593
|
+
input_schema: {
|
|
594
|
+
type: "object",
|
|
595
|
+
properties: {
|
|
596
|
+
label: { type: "string" },
|
|
597
|
+
value: { type: "string" },
|
|
598
|
+
},
|
|
599
|
+
required: ["label", "value"],
|
|
600
|
+
},
|
|
601
|
+
},
|
|
602
|
+
{
|
|
603
|
+
name: "diff_since_last_action",
|
|
604
|
+
description: "Check what changed on the page since the last action. / 直前のアクションでページに何が変わったかを確認する",
|
|
605
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
606
|
+
},
|
|
607
|
+
{
|
|
608
|
+
name: "read_page_text",
|
|
609
|
+
description: "Get all visible text on the page. / ページ上の表示テキストをすべて取得する",
|
|
610
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
611
|
+
},
|
|
612
|
+
{
|
|
613
|
+
name: "read_accessibility_tree",
|
|
614
|
+
description: "Get the page's accessibility tree. / ページのアクセシビリティツリーを取得する",
|
|
615
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
616
|
+
},
|
|
617
|
+
{
|
|
618
|
+
name: "read_console_logs",
|
|
619
|
+
description: "Check browser console logs (errors and warnings). / ブラウザのコンソールログを確認する",
|
|
620
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
621
|
+
},
|
|
622
|
+
{
|
|
623
|
+
name: "read_network_errors",
|
|
624
|
+
description: "Check failed API requests. / 失敗したAPIリクエストの一覧を確認する",
|
|
625
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
626
|
+
},
|
|
627
|
+
{
|
|
628
|
+
name: "post_feedback",
|
|
629
|
+
description: "Record an issue or improvement as feedback. Becomes a GitHub Issue after triage. / 問題・改善点をフィードバックとして記録する",
|
|
630
|
+
input_schema: {
|
|
631
|
+
type: "object",
|
|
632
|
+
properties: {
|
|
633
|
+
title: { type: "string" },
|
|
634
|
+
body: { type: "string" },
|
|
635
|
+
category: { type: "string", enum: ["ux", "feature-request", "bug", "goal-gap"] },
|
|
636
|
+
},
|
|
637
|
+
required: ["title", "body", "category"],
|
|
638
|
+
},
|
|
639
|
+
},
|
|
640
|
+
{
|
|
641
|
+
name: "post_outcome",
|
|
642
|
+
description: "Record whether you achieved your scenario goal. Call this at the end of your run if you were given a [Your Task for This Run] section. / [Your Task for This Run] セクションがある場合のみ、run の最後にゴール達成可否を記録する",
|
|
643
|
+
input_schema: {
|
|
644
|
+
type: "object",
|
|
645
|
+
properties: {
|
|
646
|
+
achieved: { type: "boolean", description: "true if you successfully completed the goal, false if you could not" },
|
|
647
|
+
reason: { type: "string", description: "Brief explanation (1-2 sentences)" },
|
|
648
|
+
},
|
|
649
|
+
required: ["achieved", "reason"],
|
|
650
|
+
},
|
|
651
|
+
},
|
|
652
|
+
];
|
|
653
|
+
|
|
654
|
+
async function executeBrowserTool(
|
|
655
|
+
toolName: string,
|
|
656
|
+
input: Record<string, unknown>,
|
|
657
|
+
page: Page,
|
|
658
|
+
agentLog: BrowserAgentLog,
|
|
659
|
+
observation: ObservationState,
|
|
660
|
+
agentId: string,
|
|
661
|
+
scenarioOutcomes: ScenarioOutcome[],
|
|
662
|
+
scenario?: Scenario,
|
|
663
|
+
): Promise<{ text: string; screenshot: { base64: string; filePath: string } | null; sendToClaude: boolean }> {
|
|
664
|
+
const startedAt = Date.now();
|
|
665
|
+
let resultText = "";
|
|
666
|
+
let screenshot: { base64: string; filePath: string } | null = null;
|
|
667
|
+
let isError = false;
|
|
668
|
+
|
|
669
|
+
try {
|
|
670
|
+
switch (toolName) {
|
|
671
|
+
case "view_screen": {
|
|
672
|
+
screenshot = await takeScreenshot(page, "view_screen");
|
|
673
|
+
resultText = "Current screen.";
|
|
674
|
+
break;
|
|
675
|
+
}
|
|
676
|
+
case "navigate": {
|
|
677
|
+
const { path: navPath } = input as { path: string };
|
|
678
|
+
await saveSnapshotBeforeAction(page, observation);
|
|
679
|
+
await page.goto(`${BASE_URL}${navPath}`, { waitUntil: "networkidle" });
|
|
680
|
+
await page.waitForTimeout(500);
|
|
681
|
+
screenshot = await takeScreenshot(page, `navigate_${navPath.replace(/\//g, "_")}`);
|
|
682
|
+
resultText = `Navigated to ${navPath}`;
|
|
683
|
+
break;
|
|
684
|
+
}
|
|
685
|
+
case "click": {
|
|
686
|
+
const { description } = input as { description: string };
|
|
687
|
+
await saveSnapshotBeforeAction(page, observation);
|
|
688
|
+
const escapedDesc = description.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
689
|
+
const buttonLocator = page.getByRole("button", { name: new RegExp(escapedDesc, "i") });
|
|
690
|
+
const linkLocator = page.getByRole("link", { name: new RegExp(escapedDesc, "i") });
|
|
691
|
+
const textLocator = page.getByText(description, { exact: false });
|
|
692
|
+
let clicked = false;
|
|
693
|
+
for (const loc of [buttonLocator, linkLocator, textLocator]) {
|
|
694
|
+
try {
|
|
695
|
+
await loc.first().click({ timeout: 5000 });
|
|
696
|
+
clicked = true;
|
|
697
|
+
break;
|
|
698
|
+
} catch { /* try next */ }
|
|
699
|
+
}
|
|
700
|
+
if (!clicked) throw new Error(`No element matching: ${description}`);
|
|
701
|
+
await page.waitForTimeout(500);
|
|
702
|
+
screenshot = await takeScreenshot(page, `click_${description.slice(0, 20)}`);
|
|
703
|
+
resultText = `Clicked: ${description}`;
|
|
704
|
+
break;
|
|
705
|
+
}
|
|
706
|
+
case "fill": {
|
|
707
|
+
const { label, value } = input as { label: string; value: string };
|
|
708
|
+
await saveSnapshotBeforeAction(page, observation);
|
|
709
|
+
const byContainer = page
|
|
710
|
+
.locator("div")
|
|
711
|
+
.filter({ has: page.locator("label", { hasText: label }) })
|
|
712
|
+
.locator("input, textarea")
|
|
713
|
+
.first();
|
|
714
|
+
const byPlaceholder = page.getByPlaceholder(label, { exact: false });
|
|
715
|
+
const byAriaLabel = page.getByLabel(label, { exact: false });
|
|
716
|
+
let filled = false;
|
|
717
|
+
for (const el of [byContainer, byPlaceholder, byAriaLabel]) {
|
|
718
|
+
try {
|
|
719
|
+
await el.fill(value, { timeout: 5000 });
|
|
720
|
+
filled = true;
|
|
721
|
+
break;
|
|
722
|
+
} catch { /* try next */ }
|
|
723
|
+
}
|
|
724
|
+
if (!filled) throw new Error(`No input field matching: ${label}`);
|
|
725
|
+
await page.waitForTimeout(300);
|
|
726
|
+
screenshot = await takeScreenshot(page, `fill_${label.slice(0, 20)}`);
|
|
727
|
+
resultText = `Filled "${label}" with "${value}"`;
|
|
728
|
+
break;
|
|
729
|
+
}
|
|
730
|
+
case "select": {
|
|
731
|
+
const { label, value } = input as { label: string; value: string };
|
|
732
|
+
await saveSnapshotBeforeAction(page, observation);
|
|
733
|
+
const byAriaLabel = page.getByLabel(label, { exact: false });
|
|
734
|
+
const byContainer = page
|
|
735
|
+
.locator("div")
|
|
736
|
+
.filter({ has: page.locator("label", { hasText: label }) })
|
|
737
|
+
.locator("select")
|
|
738
|
+
.first();
|
|
739
|
+
let selected = false;
|
|
740
|
+
for (const el of [byAriaLabel, byContainer]) {
|
|
741
|
+
try {
|
|
742
|
+
await el.selectOption({ label: value }, { timeout: 5000 });
|
|
743
|
+
selected = true;
|
|
744
|
+
break;
|
|
745
|
+
} catch { /* try next */ }
|
|
746
|
+
}
|
|
747
|
+
if (!selected) throw new Error(`Could not select "${value}" in "${label}"`);
|
|
748
|
+
await page.waitForTimeout(300);
|
|
749
|
+
screenshot = await takeScreenshot(page, `select_${label.slice(0, 20)}`);
|
|
750
|
+
resultText = `Selected "${value}" in "${label}"`;
|
|
751
|
+
break;
|
|
752
|
+
}
|
|
753
|
+
case "diff_since_last_action": {
|
|
754
|
+
resultText = await getDiffFromSnapshot(page, observation);
|
|
755
|
+
break;
|
|
756
|
+
}
|
|
757
|
+
case "read_page_text": {
|
|
758
|
+
resultText = await readPageText(page);
|
|
759
|
+
break;
|
|
760
|
+
}
|
|
761
|
+
case "read_accessibility_tree": {
|
|
762
|
+
resultText = await readAccessibilityTree(page);
|
|
763
|
+
break;
|
|
764
|
+
}
|
|
765
|
+
case "read_console_logs": {
|
|
766
|
+
const logs = getRecentConsoleLogs(observation);
|
|
767
|
+
resultText = logs.length > 0 ? JSON.stringify(logs) : "(no console logs)";
|
|
768
|
+
break;
|
|
769
|
+
}
|
|
770
|
+
case "read_network_errors": {
|
|
771
|
+
const errors = getRecentNetworkErrors(observation);
|
|
772
|
+
resultText = errors.length > 0 ? JSON.stringify(errors) : "(no network errors)";
|
|
773
|
+
break;
|
|
774
|
+
}
|
|
775
|
+
case "post_outcome": {
|
|
776
|
+
const { achieved, reason } = input as { achieved: boolean; reason: string };
|
|
777
|
+
if (scenario) {
|
|
778
|
+
const outcome: ScenarioOutcome = {
|
|
779
|
+
scenarioId: scenario.id,
|
|
780
|
+
scenarioTitle: scenario.title,
|
|
781
|
+
agentId,
|
|
782
|
+
agentName: agentLog.agentName,
|
|
783
|
+
achieved: Boolean(achieved),
|
|
784
|
+
reason: String(reason),
|
|
785
|
+
};
|
|
786
|
+
scenarioOutcomes.push(outcome);
|
|
787
|
+
console.log(` ${achieved ? "✓" : "✗"} [outcome] "${scenario.title}": ${achieved ? "achieved" : "NOT achieved"} — ${reason}`);
|
|
788
|
+
}
|
|
789
|
+
resultText = "Outcome recorded.";
|
|
790
|
+
break;
|
|
791
|
+
}
|
|
792
|
+
case "post_feedback": {
|
|
793
|
+
const { title, body, category } = input as { title: string; body: string; category: string };
|
|
794
|
+
const safeCategory = VALID_CATEGORIES.includes(String(category)) ? String(category) : "ux";
|
|
795
|
+
screenshot = await takeScreenshot(page, `feedback_${String(title).slice(0, 20)}`);
|
|
796
|
+
const finding: Finding = {
|
|
797
|
+
id: `${agentId}_${Date.now()}`,
|
|
798
|
+
runId: runLog.runId,
|
|
799
|
+
agentId,
|
|
800
|
+
agentName: agentLog.agentName,
|
|
801
|
+
role: agentLog.persona,
|
|
802
|
+
title: String(title),
|
|
803
|
+
body: String(body),
|
|
804
|
+
category: safeCategory,
|
|
805
|
+
timestamp: new Date().toISOString(),
|
|
806
|
+
screenshotPath: screenshot.filePath,
|
|
807
|
+
};
|
|
808
|
+
saveFinding(finding);
|
|
809
|
+
agentLog.feedbacksSaved.push({ title: String(title), category: safeCategory, findingId: finding.id });
|
|
810
|
+
console.log(` → [findings] saved: "${title}" (${safeCategory})`);
|
|
811
|
+
resultText = `Feedback recorded: "${title}" (will become an Issue after triage)`;
|
|
812
|
+
break;
|
|
813
|
+
}
|
|
814
|
+
default: {
|
|
815
|
+
const apiResult = await targetConfig.execute(toolName, input, agentId);
|
|
816
|
+
resultText = JSON.stringify(apiResult);
|
|
817
|
+
break;
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
} catch (e) {
|
|
821
|
+
isError = true;
|
|
822
|
+
resultText = `error: ${String(e)}`;
|
|
823
|
+
try {
|
|
824
|
+
screenshot = await takeScreenshot(page, `error_${toolName}`);
|
|
825
|
+
} catch { /* ignore */ }
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
agentLog.actions.push({
|
|
829
|
+
timestamp: new Date().toISOString(),
|
|
830
|
+
tool: toolName,
|
|
831
|
+
input,
|
|
832
|
+
screenshotPath: screenshot?.filePath ?? null,
|
|
833
|
+
durationMs: Date.now() - startedAt,
|
|
834
|
+
});
|
|
835
|
+
|
|
836
|
+
const sendToClaude = isError || TOOLS_THAT_SEND_SCREENSHOT.has(toolName);
|
|
837
|
+
return { text: resultText, screenshot, sendToClaude };
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
async function runBrowserAgent(
|
|
841
|
+
agent: { id: string; name: string; persona: string; role: string },
|
|
842
|
+
page: Page,
|
|
843
|
+
productSpec: ProductSpec,
|
|
844
|
+
assignment: { scenario?: Scenario; lens?: string } = {},
|
|
845
|
+
scenarioOutcomes: ScenarioOutcome[] = [],
|
|
846
|
+
): Promise<BrowserAgentLog> {
|
|
847
|
+
const assignmentLabel = assignment.scenario
|
|
848
|
+
? `[scenario: ${assignment.scenario.title.slice(0, 35)}]`
|
|
849
|
+
: assignment.lens
|
|
850
|
+
? `[lens: ${assignment.lens.slice(0, 30)}...]`
|
|
851
|
+
: "[free exploration]";
|
|
852
|
+
console.log(`\n[browser] ${agent.name} start ${assignmentLabel}`);
|
|
853
|
+
|
|
854
|
+
const agentLog: BrowserAgentLog = {
|
|
855
|
+
agentName: agent.name,
|
|
856
|
+
persona: agent.persona,
|
|
857
|
+
startedAt: new Date().toISOString(),
|
|
858
|
+
completedAt: null,
|
|
859
|
+
status: "completed",
|
|
860
|
+
iterations: 0,
|
|
861
|
+
actions: [],
|
|
862
|
+
feedbacksSaved: [],
|
|
863
|
+
error: null,
|
|
864
|
+
};
|
|
865
|
+
|
|
866
|
+
const observation = setupObservation(page);
|
|
867
|
+
|
|
868
|
+
const systemPrompt = `You are "${agent.name}".
|
|
869
|
+
Role: ${agent.role}
|
|
870
|
+
Persona: ${agent.persona}
|
|
871
|
+
|
|
872
|
+
You are a real user of "${productSpec.appName}".
|
|
873
|
+
Use the browser tools to navigate the app and carry out everyday tasks.
|
|
874
|
+
|
|
875
|
+
[App Overview]
|
|
876
|
+
${productSpec.appDescription}
|
|
877
|
+
|
|
878
|
+
[How to Proceed]
|
|
879
|
+
1. Navigate to a page with navigate
|
|
880
|
+
2. Perform actual tasks on that page
|
|
881
|
+
3. If you find any issues, record them with post_feedback (they become Issues after triage)
|
|
882
|
+
4. Move to another page and repeat
|
|
883
|
+
5. Finish after 8–10 actions
|
|
884
|
+
|
|
885
|
+
[Using Observation Tools]
|
|
886
|
+
- To verify an action was actually applied, call diff_since_last_action
|
|
887
|
+
- If data isn't reflected or errors appear, call read_network_errors
|
|
888
|
+
- For unexpected behavior, call read_console_logs to check JS errors
|
|
889
|
+
- If problems are found, record them with post_feedback
|
|
890
|
+
|
|
891
|
+
[Using API Check Tools (tools prefixed with [API check])]
|
|
892
|
+
- After a browser action, verify the actual saved state via API
|
|
893
|
+
- Data visible in the browser but missing in the API (or vice versa) is an inconsistency bug — report with post_feedback
|
|
894
|
+
|
|
895
|
+
[Using view_screen]
|
|
896
|
+
- Call it once right after navigate
|
|
897
|
+
- Do not call it repeatedly on the same page
|
|
898
|
+
|
|
899
|
+
[Reference: Implemented Features]
|
|
900
|
+
${productSpec.features}
|
|
901
|
+
${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\n` : ""}${goalsSection(productSpec)}${assignment.scenario
|
|
902
|
+
? `\n[Your Task for This Run]\nTitle: ${assignment.scenario.title}\nYou are: ${assignment.scenario.context}\nGoal: ${assignment.scenario.goal}\nConstraints: ${assignment.scenario.constraints}\n\nFocus on completing this task naturally as this user. Report any issues you encounter along the way.\nWhen done (or if you cannot complete the goal), call post_outcome with achieved=true/false and a brief reason.`
|
|
903
|
+
: assignment.lens
|
|
904
|
+
? `\n[Focus Area for This Run]\n${assignment.lens}\nKeep this perspective in mind and prioritize reporting related issues.`
|
|
905
|
+
: ""}`;
|
|
906
|
+
|
|
907
|
+
await page.goto(BASE_URL, { waitUntil: "networkidle" });
|
|
908
|
+
await page.waitForTimeout(1000);
|
|
909
|
+
const initialScreenshot = await takeScreenshot(page, "initial");
|
|
910
|
+
|
|
911
|
+
const messages: Anthropic.MessageParam[] = [
|
|
912
|
+
{
|
|
913
|
+
role: "user",
|
|
914
|
+
content: [
|
|
915
|
+
{ type: "image", source: { type: "base64", media_type: "image/png", data: initialScreenshot.base64 } },
|
|
916
|
+
{ type: "text", text: "The app is open. Start using it." },
|
|
917
|
+
],
|
|
918
|
+
},
|
|
919
|
+
];
|
|
920
|
+
|
|
921
|
+
try {
|
|
922
|
+
while (agentLog.iterations < 12) {
|
|
923
|
+
agentLog.iterations++;
|
|
924
|
+
|
|
925
|
+
const response = await createMessageWithRetry(client, {
|
|
926
|
+
model: defaultModel,
|
|
927
|
+
max_tokens: 1024,
|
|
928
|
+
system: systemPrompt,
|
|
929
|
+
tools: BROWSER_TOOLS,
|
|
930
|
+
messages,
|
|
931
|
+
});
|
|
932
|
+
|
|
933
|
+
const assistantContent = response.content;
|
|
934
|
+
messages.push({ role: "assistant", content: assistantContent });
|
|
935
|
+
|
|
936
|
+
const toolUses = assistantContent.filter(
|
|
937
|
+
(b): b is Anthropic.ToolUseBlock => b.type === "tool_use"
|
|
938
|
+
);
|
|
939
|
+
|
|
940
|
+
if (toolUses.length === 0 || response.stop_reason === "end_turn") {
|
|
941
|
+
agentLog.status = "completed";
|
|
942
|
+
break;
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
if (agentLog.iterations >= 12) agentLog.status = "iteration_limit";
|
|
946
|
+
|
|
947
|
+
const toolResults: Anthropic.ToolResultBlockParam[] = [];
|
|
948
|
+
for (const toolUse of toolUses) {
|
|
949
|
+
console.log(` → ${toolUse.name}(${JSON.stringify(toolUse.input).slice(0, 60)})`);
|
|
950
|
+
|
|
951
|
+
const { text, screenshot, sendToClaude } = await executeBrowserTool(
|
|
952
|
+
toolUse.name,
|
|
953
|
+
toolUse.input as Record<string, unknown>,
|
|
954
|
+
page,
|
|
955
|
+
agentLog,
|
|
956
|
+
observation,
|
|
957
|
+
agent.id,
|
|
958
|
+
scenarioOutcomes,
|
|
959
|
+
assignment.scenario,
|
|
960
|
+
);
|
|
961
|
+
|
|
962
|
+
const content: Anthropic.ToolResultBlockParam["content"] =
|
|
963
|
+
sendToClaude && screenshot
|
|
964
|
+
? [
|
|
965
|
+
{ type: "text", text },
|
|
966
|
+
{ type: "image", source: { type: "base64", media_type: "image/png", data: screenshot.base64 } },
|
|
967
|
+
]
|
|
968
|
+
: text;
|
|
969
|
+
|
|
970
|
+
toolResults.push({ type: "tool_result", tool_use_id: toolUse.id, content });
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
const MAX_ITERATIONS = 12;
|
|
974
|
+
const remaining = MAX_ITERATIONS - agentLog.iterations;
|
|
975
|
+
let budgetHint = `[${remaining} turns remaining]`;
|
|
976
|
+
if (remaining <= 2) {
|
|
977
|
+
budgetHint += " Last turns. Post any remaining findings with post_feedback, then finish.";
|
|
978
|
+
} else if (remaining <= 4) {
|
|
979
|
+
budgetHint += " Start wrapping up.";
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
const PROGRESS_TOOLS = new Set(["navigate", "fill", "post_feedback"]);
|
|
983
|
+
const recent = agentLog.actions.slice(-5).map((a) => a.tool);
|
|
984
|
+
if (recent.length >= 5 && !recent.some((t) => PROGRESS_TOOLS.has(t))) {
|
|
985
|
+
budgetHint += " You seem stuck on the same page. Navigate to a different page.";
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
const observationWarning = buildObservationWarning(observation);
|
|
989
|
+
if (observationWarning) {
|
|
990
|
+
budgetHint += `\n\n${observationWarning}\nUse read_console_logs or read_network_errors for details.`;
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
const last = toolResults[toolResults.length - 1];
|
|
994
|
+
const lastContent = last.content;
|
|
995
|
+
toolResults[toolResults.length - 1] = {
|
|
996
|
+
...last,
|
|
997
|
+
content:
|
|
998
|
+
typeof lastContent === "string"
|
|
999
|
+
? `${lastContent}\n\n${budgetHint}`
|
|
1000
|
+
: ([...(lastContent as unknown[]), { type: "text" as const, text: budgetHint }] as Anthropic.ToolResultBlockParam["content"]),
|
|
1001
|
+
};
|
|
1002
|
+
|
|
1003
|
+
messages.push({ role: "user", content: toolResults });
|
|
1004
|
+
}
|
|
1005
|
+
} catch (e) {
|
|
1006
|
+
agentLog.status = "error";
|
|
1007
|
+
agentLog.error = String(e);
|
|
1008
|
+
console.error(`[${agent.name}] error:`, e);
|
|
1009
|
+
} finally {
|
|
1010
|
+
agentLog.completedAt = new Date().toISOString();
|
|
1011
|
+
}
|
|
1012
|
+
|
|
1013
|
+
console.log(`[browser] ${agent.name} done (feedback: ${agentLog.feedbacksSaved.length})`);
|
|
1014
|
+
return agentLog;
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
// ================================================================
|
|
1018
|
+
// Main
|
|
1019
|
+
// ================================================================
|
|
1020
|
+
|
|
1021
|
+
function pickAgents<T>(agents: T[], count: number): T[] {
|
|
1022
|
+
return [...agents].sort(() => Math.random() - 0.5).slice(0, count);
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
// 7:3 ratio: indices where (idx % 10) < 7 get a scenario, rest get a lens
|
|
1026
|
+
function pickAssignment(idx: number, scenarios: Scenario[]): { scenario?: Scenario; lens?: string } {
|
|
1027
|
+
if (scenarios.length > 0 && idx % 10 < 7) {
|
|
1028
|
+
return { scenario: scenarios[idx % scenarios.length] };
|
|
1029
|
+
}
|
|
1030
|
+
return { lens: UNIVERSAL_LENSES[idx % UNIVERSAL_LENSES.length] };
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
async function main() {
|
|
1034
|
+
initDirs();
|
|
1035
|
+
// run log を最初期化しておくことで、どの段階でエラーが起きても finally で saveRunLog() が動く
|
|
1036
|
+
initRunLog(0, GITHUB_REPO);
|
|
1037
|
+
|
|
1038
|
+
// 1. product discovery (cache or live)
|
|
1039
|
+
const browser = await chromium.launch({ headless: true });
|
|
1040
|
+
let productSpec: ProductSpec;
|
|
1041
|
+
const scenarioOutcomes: ScenarioOutcome[] = [];
|
|
1042
|
+
try {
|
|
1043
|
+
const cached = loadCachedSpec(BASE_URL);
|
|
1044
|
+
if (cached) {
|
|
1045
|
+
console.log(`\n[product-discovery] using cache (date: ${cached.discoveredAt?.slice(0, 10) ?? "unknown"}, confidence: ${cached.confidence})`);
|
|
1046
|
+
productSpec = cached;
|
|
1047
|
+
} else {
|
|
1048
|
+
const discoveryContext = await browser.newContext({ viewport: { width: 1024, height: 640 } });
|
|
1049
|
+
const discoveryPage = await discoveryContext.newPage();
|
|
1050
|
+
productSpec = await discoverProduct(BASE_URL, discoveryPage, client, defaultModel, targetConfig.projectPath);
|
|
1051
|
+
await discoveryContext.close();
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
// 2. org design (coverage-aware)
|
|
1055
|
+
const coverageSummary = computeWeightedSummary();
|
|
1056
|
+
console.log(`\n[coverage] ${coverageSummary.formatted.split("\n")[0]}`);
|
|
1057
|
+
const orgDesign = await designOrg(productSpec, client, defaultModel, coverageSummary.formatted);
|
|
1058
|
+
|
|
1059
|
+
// 3. open issues + scenario design (both feed into HR)
|
|
1060
|
+
const openIssues = await fetchOpenIssues(githubOptions);
|
|
1061
|
+
const scenarios = await designScenarios(productSpec, openIssues, client, defaultModel, 5, coverageSummary.formatted);
|
|
1062
|
+
|
|
1063
|
+
// 3.5. Account Manager(credentials が設定されている場合のみ)
|
|
1064
|
+
let testAccounts: TestAccount[] = [];
|
|
1065
|
+
if (targetConfig.credentials) {
|
|
1066
|
+
const accountContext = await browser.newContext({ viewport: { width: 1024, height: 640 } });
|
|
1067
|
+
try {
|
|
1068
|
+
testAccounts = await runAccountManager(
|
|
1069
|
+
BASE_URL,
|
|
1070
|
+
targetConfig.credentials,
|
|
1071
|
+
productSpec,
|
|
1072
|
+
accountContext,
|
|
1073
|
+
client,
|
|
1074
|
+
defaultModel,
|
|
1075
|
+
runLog.runId,
|
|
1076
|
+
);
|
|
1077
|
+
} finally {
|
|
1078
|
+
await accountContext.close();
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
// 4. HR agent
|
|
1083
|
+
await runHRAgent(productSpec, orgDesign.hrGuidance, openIssues, scenarios, testAccounts);
|
|
1084
|
+
|
|
1085
|
+
// 5. load agents + closed issues
|
|
1086
|
+
const allAgents = loadAgents();
|
|
1087
|
+
if (allAgents.length === 0) {
|
|
1088
|
+
console.error("No agents found. Check agents.json.");
|
|
1089
|
+
process.exit(1);
|
|
1090
|
+
}
|
|
1091
|
+
const closedIssues = await fetchClosedIssues(githubOptions);
|
|
1092
|
+
|
|
1093
|
+
// 5. エージェント数が確定したので totalAgents を更新
|
|
1094
|
+
runLog.summary.totalAgents = allAgents.length;
|
|
1095
|
+
|
|
1096
|
+
// 6. API agents (exploration + regression)
|
|
1097
|
+
const allExplorers = allAgents.slice(0, -1);
|
|
1098
|
+
const explorerAgents = pickAgents(allExplorers, Math.min(MAX_EXPLORERS, allExplorers.length));
|
|
1099
|
+
const regressionAgent = allAgents[allAgents.length - 1];
|
|
1100
|
+
console.log(`\nexplorers: ${explorerAgents.length} (max: ${MAX_EXPLORERS}) / regression: 1`);
|
|
1101
|
+
|
|
1102
|
+
// agentId → assignment(coverage 計算・レポート生成に使う)
|
|
1103
|
+
const agentAssignments = new Map<string, { scenario?: Scenario; lens?: string }>();
|
|
1104
|
+
|
|
1105
|
+
// シナリオ/レンズ割り当てのグローバルカウンタ(7:3 比率)
|
|
1106
|
+
let dispatchIdx = 0;
|
|
1107
|
+
|
|
1108
|
+
const CONCURRENCY = 2;
|
|
1109
|
+
for (let i = 0; i < explorerAgents.length; i += CONCURRENCY) {
|
|
1110
|
+
const batch = explorerAgents.slice(i, i + CONCURRENCY);
|
|
1111
|
+
await Promise.all(batch.map((agent) => {
|
|
1112
|
+
const assignment = pickAssignment(dispatchIdx++, scenarios);
|
|
1113
|
+
agentAssignments.set(agent.id, assignment);
|
|
1114
|
+
return runExplorer(agent, productSpec, assignment, scenarioOutcomes);
|
|
1115
|
+
}));
|
|
1116
|
+
if (i + CONCURRENCY < explorerAgents.length) {
|
|
1117
|
+
console.log("\n[batch done] waiting 5s before next batch...");
|
|
1118
|
+
await sleep(5000);
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
if (MAX_EXPLORERS === 0) {
|
|
1123
|
+
console.log("\n[regression] skipped (MAX_EXPLORERS=0)");
|
|
1124
|
+
} else if (closedIssues.length > 0) {
|
|
1125
|
+
await sleep(3000);
|
|
1126
|
+
await runRegressionAgent(regressionAgent, closedIssues, productSpec);
|
|
1127
|
+
} else {
|
|
1128
|
+
console.log("\n[regression] no closed issues — running as explorer");
|
|
1129
|
+
const assignment = pickAssignment(dispatchIdx++, scenarios);
|
|
1130
|
+
agentAssignments.set(regressionAgent.id, assignment);
|
|
1131
|
+
await runExplorer(regressionAgent, productSpec, assignment, scenarioOutcomes);
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
// 7. browser agents
|
|
1135
|
+
const browserAgents = pickAgents(allAgents, Math.min(MAX_BROWSERS, allAgents.length));
|
|
1136
|
+
console.log(`\nlaunching ${browserAgents.length} browser agents in parallel (max: ${MAX_BROWSERS})`);
|
|
1137
|
+
browserAgents.forEach((a) => console.log(` - ${a.name} (${a.role})`));
|
|
1138
|
+
|
|
1139
|
+
await sleep(2000);
|
|
1140
|
+
await Promise.all(
|
|
1141
|
+
browserAgents.map(async (agent) => {
|
|
1142
|
+
const assignment = pickAssignment(dispatchIdx++, scenarios);
|
|
1143
|
+
agentAssignments.set(agent.id, assignment);
|
|
1144
|
+
|
|
1145
|
+
// ロールが一致する storageState があれば使う
|
|
1146
|
+
const matchedAccount = testAccounts.find((a) => a.role === agent.role && a.storageStatePath);
|
|
1147
|
+
const contextOptions: Parameters<typeof browser.newContext>[0] = {
|
|
1148
|
+
viewport: { width: 1024, height: 640 },
|
|
1149
|
+
};
|
|
1150
|
+
if (matchedAccount?.storageStatePath) {
|
|
1151
|
+
contextOptions.storageState = matchedAccount.storageStatePath;
|
|
1152
|
+
}
|
|
1153
|
+
|
|
1154
|
+
const context = await browser.newContext(contextOptions);
|
|
1155
|
+
const page = await context.newPage();
|
|
1156
|
+
try {
|
|
1157
|
+
return await runBrowserAgent(agent, page, productSpec, assignment, scenarioOutcomes);
|
|
1158
|
+
} finally {
|
|
1159
|
+
await context.close();
|
|
1160
|
+
}
|
|
1161
|
+
})
|
|
1162
|
+
);
|
|
1163
|
+
|
|
1164
|
+
// 8. triage (API + browser findings)
|
|
1165
|
+
await sleep(2000);
|
|
1166
|
+
console.log(`\n[triage] collected findings: ${collectedFindings.length}`);
|
|
1167
|
+
let triageResult = { issued: [] as string[], skipped: [] as string[], unprocessed: [] as string[], issuesCreated: 0 };
|
|
1168
|
+
try {
|
|
1169
|
+
triageResult = await runTriageAgent(collectedFindings, client, defaultModel, githubOptions);
|
|
1170
|
+
runLog.summary.totalIssuesPosted += triageResult.issuesCreated;
|
|
1171
|
+
} catch (e) {
|
|
1172
|
+
console.error("[triage] error:", e);
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
// 9. generate HTML report
|
|
1176
|
+
const reportPath = generateReport(runLog, collectedFindings, triageResult, productSpec, scenarios, agentAssignments, scenarioOutcomes);
|
|
1177
|
+
console.log(`\n[report] ${reportPath}`);
|
|
1178
|
+
|
|
1179
|
+
// 10. update coverage
|
|
1180
|
+
updateCoverage(runLog.runId, collectedFindings, agentAssignments);
|
|
1181
|
+
|
|
1182
|
+
} finally {
|
|
1183
|
+
await browser.close();
|
|
1184
|
+
// エラー終了時も必ずログを保存する
|
|
1185
|
+
runLog.completedAt = new Date().toISOString();
|
|
1186
|
+
runLog.summary.rateLimitRetries = rateLimitRetries;
|
|
1187
|
+
runLog.summary.cost.estimatedUSD = await estimateCost(
|
|
1188
|
+
defaultModel, llmProvider,
|
|
1189
|
+
runLog.summary.cost.inputTokens,
|
|
1190
|
+
runLog.summary.cost.outputTokens,
|
|
1191
|
+
);
|
|
1192
|
+
saveRunLog();
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
console.log("\nAll agents done.");
|
|
1196
|
+
console.log(` findings collected: ${collectedFindings.length}`);
|
|
1197
|
+
console.log(` tokens: ${runLog.summary.cost.inputTokens} in / ${runLog.summary.cost.outputTokens} out — estimated cost: ${formatCostUSD(runLog.summary.cost.estimatedUSD)}`);
|
|
1198
|
+
console.log(` GitHub issues created: ${runLog.summary.totalIssuesPosted}`);
|
|
1199
|
+
console.log(` regression checks: ${runLog.summary.regressionChecked} (regressed: ${runLog.summary.regressionFailed})`);
|
|
1200
|
+
console.log(` screenshots: ${screenshotDir}`);
|
|
1201
|
+
|
|
1202
|
+
if (scenarioOutcomes.length > 0) {
|
|
1203
|
+
const failed = scenarioOutcomes.filter((o) => !o.achieved);
|
|
1204
|
+
console.log(` scenarios: ${scenarioOutcomes.length - failed.length}/${scenarioOutcomes.length} achieved`);
|
|
1205
|
+
if (failed.length > 0) {
|
|
1206
|
+
console.log(` ⚠ failed scenarios:`);
|
|
1207
|
+
failed.forEach((o) => console.log(` ✗ ${o.scenarioTitle} — ${o.reason}`));
|
|
1208
|
+
process.exitCode = 1;
|
|
1209
|
+
}
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
|
|
1213
|
+
main().catch(console.error);
|