@m8i-51/shoal 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/run.ts ADDED
@@ -0,0 +1,1213 @@
1
+ /**
2
+ * run.ts — Multi-agent runner
3
+ * hr → product discovery → api agents + browser agents → triage
4
+ *
5
+ * Usage:
6
+ * ANTHROPIC_API_KEY=xxx GITHUB_TOKEN=xxx GITHUB_REPO=owner/repo npx tsx run.ts
7
+ */
8
+
9
+ import { config as loadEnv } from "dotenv";
10
+ loadEnv({ override: true }); // .env を常に優先(継承した環境変数を上書き)
11
+ import Anthropic from "@anthropic-ai/sdk";
12
+ import { chromium, type Page } from "playwright";
13
+ import * as fs from "fs";
14
+ import * as path from "path";
15
+ import { createLLMClient } from "./framework/llm-client";
16
+ import type { Tool } from "./framework/llm-client";
17
+ import { createMessageWithRetry, runAgentLoop, sleep, rateLimitRetries } from "./framework/agent-loop";
18
+ import { collectedFindings, initRunLog, saveRunLog, saveFinding, runLog } from "./framework/findings";
19
+ import { loadAgents, addAgent, retireAgent } from "./framework/agent-store";
20
+ import { updateCoverage, computeWeightedSummary } from "./framework/coverage";
21
+ import { postGitHubIssue, fetchClosedIssues, fetchOpenIssues } from "./framework/github";
22
+ import {
23
+ setupObservation,
24
+ getRecentConsoleLogs,
25
+ getRecentNetworkErrors,
26
+ buildObservationWarning,
27
+ readPageText,
28
+ readAccessibilityTree,
29
+ saveSnapshotBeforeAction,
30
+ getDiffFromSnapshot,
31
+ type ObservationState,
32
+ } from "./framework/observation";
33
+ import { discoverProduct, loadCachedSpec, type ProductSpec } from "./framework/product-discovery";
34
+ import { designOrg, UNIVERSAL_LENSES } from "./framework/org-designer";
35
+ import { designScenarios, type Scenario, type ScenarioOutcome } from "./framework/scenario-designer";
36
+ import { runTriageAgent } from "./framework/triage";
37
+ import { generateReport } from "./framework/report";
38
+ import type { AgentLog, Finding, RegressionCheck } from "./framework/types";
39
+ import { loadTarget } from "./targets";
40
+ import { runAccountManager, loadTestAccounts, type TestAccount } from "./framework/account-manager";
41
+ import { estimateCost, formatCostUSD } from "./framework/cost";
42
+
43
+ const BASE_URL = process.env.BASE_URL ?? "http://localhost:3000";
44
+ const GITHUB_TOKEN = process.env.GITHUB_TOKEN ?? "";
45
+ const GITHUB_REPO = process.env.GITHUB_REPO ?? "";
46
+ const githubOptions = { token: GITHUB_TOKEN, repo: GITHUB_REPO };
47
+
48
+ const TARGET = process.env.TARGET ?? "none";
49
+ let targetConfig = loadTarget(TARGET);
50
+
51
+ // Load shoal.config.ts / .js / .mjs from the working directory if present
52
+ for (const name of ["shoal.config.ts", "shoal.config.js", "shoal.config.mjs"]) {
53
+ const cfgPath = path.join(process.cwd(), name);
54
+ if (fs.existsSync(cfgPath)) {
55
+ try {
56
+ const mod = await import(cfgPath);
57
+ const t = mod.target ?? mod.default?.target;
58
+ if (t?.appTools && typeof t?.execute === "function") {
59
+ targetConfig = t;
60
+ console.log(`[config] loaded: ${name}`);
61
+ } else {
62
+ console.warn(`[config] ${name} found but does not export a valid target`);
63
+ }
64
+ } catch (e) {
65
+ console.warn(`[config] failed to load ${name}:`, e);
66
+ }
67
+ break;
68
+ }
69
+ }
70
+
71
+ // skip exploration when no API tools are configured
72
+ const MAX_EXPLORERS = targetConfig.appTools.length > 0
73
+ ? parseInt(process.env.MAX_EXPLORERS ?? "4", 10)
74
+ : 0;
75
+ const MAX_BROWSERS = parseInt(process.env.MAX_BROWSERS ?? "2", 10);
76
+
77
+ const { client, defaultModel, provider: llmProvider } = createLLMClient();
78
+
79
+ // ================================================================
80
+ // Screenshots
81
+ // ================================================================
82
+
83
+ let screenshotDir: string;
84
+
85
+ function initDirs(): string {
86
+ const runId = `run_${Date.now()}`;
87
+ screenshotDir = path.join(process.cwd(), "logs", "screenshots", runId);
88
+ fs.mkdirSync(screenshotDir, { recursive: true });
89
+ return runId;
90
+ }
91
+
92
+ async function takeScreenshot(page: Page, label: string): Promise<{ base64: string; filePath: string }> {
93
+ const fileName = `${Date.now()}_${label.replace(/[^a-zA-Z0-9]/g, "_")}.png`;
94
+ const filePath = path.join(screenshotDir, fileName);
95
+ const buffer = await page.screenshot({ type: "png", fullPage: false });
96
+ fs.writeFileSync(filePath, buffer);
97
+ return { base64: buffer.toString("base64"), filePath };
98
+ }
99
+
100
+ // ================================================================
101
+ // API agent tools
102
+ // ================================================================
103
+
104
+ const VALID_CATEGORIES = ["ux", "feature-request", "bug", "goal-gap"];
105
+
106
+ const POST_FEEDBACK_TOOL: Tool = {
107
+ name: "post_feedback",
108
+ description: "Record a finding about the app — usability issues, feature requests, or bug-like behavior. / アプリへのフィードバックを記録する",
109
+ input_schema: {
110
+ type: "object",
111
+ properties: {
112
+ title: { type: "string" },
113
+ body: { type: "string" },
114
+ category: { type: "string", enum: ["ux", "feature-request", "bug", "goal-gap"] },
115
+ },
116
+ required: ["title", "body", "category"],
117
+ },
118
+ };
119
+
120
+ const REPORT_REGRESSION_TOOL: Tool = {
121
+ name: "report_regression",
122
+ description: "Report a regression when a previously fixed bug has reappeared as a GitHub Issue. / 修正済みバグの再発をGitHub Issueとして報告する",
123
+ input_schema: {
124
+ type: "object",
125
+ properties: {
126
+ original_issue_number: { type: "number" },
127
+ original_issue_title: { type: "string" },
128
+ title: { type: "string" },
129
+ body: { type: "string" },
130
+ },
131
+ required: ["original_issue_number", "original_issue_title", "title", "body"],
132
+ },
133
+ };
134
+
135
+ const MARK_VERIFIED_TOOL: Tool = {
136
+ name: "mark_verified",
137
+ description: "Record that a closed Issue has been verified as still fixed. / 修正済みIssueが問題なく修正されていることを確認した場合に呼ぶ",
138
+ input_schema: {
139
+ type: "object",
140
+ properties: {
141
+ original_issue_number: { type: "number" },
142
+ original_issue_title: { type: "string" },
143
+ note: { type: "string" },
144
+ },
145
+ required: ["original_issue_number", "original_issue_title", "note"],
146
+ },
147
+ };
148
+
149
+ const POST_OUTCOME_TOOL: Tool = {
150
+ name: "post_outcome",
151
+ description: "Record whether you achieved your scenario goal. Call this at the end of your run if you were given a [Your Task for This Run] section. / [Your Task for This Run] セクションがある場合のみ、run の最後にゴール達成可否を記録する",
152
+ input_schema: {
153
+ type: "object",
154
+ properties: {
155
+ achieved: {
156
+ type: "boolean",
157
+ description: "true if you successfully completed the goal, false if you could not",
158
+ },
159
+ reason: {
160
+ type: "string",
161
+ description: "Brief explanation (1-2 sentences) of why the goal was or was not achieved",
162
+ },
163
+ },
164
+ required: ["achieved", "reason"],
165
+ },
166
+ };
167
+
168
+ const EXPLORER_TOOLS: Tool[] = [...targetConfig.appTools, POST_FEEDBACK_TOOL, POST_OUTCOME_TOOL];
169
+
170
+ function goalsSection(spec: ProductSpec): string {
171
+ if (!spec.appGoals?.length) return "";
172
+ return `\n[App Goals]\nThis app is designed to achieve the following goals. If you find anything that prevents these goals from being met, use category "goal-gap" when posting feedback.\n${spec.appGoals.map((g) => `- ${g}`).join("\n")}\n`;
173
+ }
174
+ const REGRESSION_TOOLS: Tool[] = [...targetConfig.appTools, REPORT_REGRESSION_TOOL, MARK_VERIFIED_TOOL];
175
+
176
+ function makeExecutor(agentLog: AgentLog, scenarioOutcomes: ScenarioOutcome[], scenario?: Scenario) {
177
+ return async (toolName: string, input: Record<string, unknown>): Promise<string> => {
178
+ const startedAt = Date.now();
179
+ let result: unknown;
180
+ try {
181
+ switch (toolName) {
182
+ case "post_outcome": {
183
+ const { achieved, reason } = input as { achieved: boolean; reason: string };
184
+ if (scenario) {
185
+ const outcome: ScenarioOutcome = {
186
+ scenarioId: scenario.id,
187
+ scenarioTitle: scenario.title,
188
+ agentId: agentLog.agentId,
189
+ agentName: agentLog.agentName,
190
+ achieved: Boolean(achieved),
191
+ reason: String(reason),
192
+ };
193
+ scenarioOutcomes.push(outcome);
194
+ console.log(` ${achieved ? "✓" : "✗"} [outcome] "${scenario.title}": ${achieved ? "achieved" : "NOT achieved"} — ${reason}`);
195
+ }
196
+ result = { recorded: true };
197
+ break;
198
+ }
199
+ case "post_feedback": {
200
+ const { title, body, category } = input as { title: string; body: string; category: string };
201
+ const safeCategory = VALID_CATEGORIES.includes(String(category)) ? String(category) : "ux";
202
+ const finding: Finding = {
203
+ id: `${agentLog.agentId}_${Date.now()}`,
204
+ runId: runLog.runId,
205
+ agentId: agentLog.agentId,
206
+ agentName: agentLog.agentName,
207
+ role: agentLog.role,
208
+ title: String(title),
209
+ body: String(body),
210
+ category: safeCategory,
211
+ timestamp: new Date().toISOString(),
212
+ };
213
+ saveFinding(finding);
214
+ agentLog.issuesPosted.push({ title: String(title), category: safeCategory, url: null });
215
+ console.log(` → [findings] saved: "${title}" (${safeCategory})`);
216
+ result = { saved: true, findingId: finding.id };
217
+ break;
218
+ }
219
+ case "report_regression": {
220
+ const { original_issue_number, original_issue_title, title, body } = input as {
221
+ original_issue_number: number; original_issue_title: string; title: string; body: string;
222
+ };
223
+ const url = await postGitHubIssue(
224
+ `[regression] ${title}`,
225
+ `**Regression:** #${original_issue_number} "${original_issue_title}" has reappeared.\n\n${body}\n\n---\n*This Issue was auto-generated by an AI regression agent*`,
226
+ ["regression", "feedback-agent"],
227
+ githubOptions
228
+ );
229
+ const check: RegressionCheck = {
230
+ issueNumber: Number(original_issue_number),
231
+ issueTitle: String(original_issue_title),
232
+ status: "regressed",
233
+ note: String(body),
234
+ regressionUrl: url,
235
+ };
236
+ agentLog.regressionChecks.push(check);
237
+ runLog.summary.regressionChecked++;
238
+ runLog.summary.regressionFailed++;
239
+ result = { reported: true, url };
240
+ break;
241
+ }
242
+ case "mark_verified": {
243
+ const { original_issue_number, original_issue_title, note } = input as {
244
+ original_issue_number: number; original_issue_title: string; note: string;
245
+ };
246
+ agentLog.regressionChecks.push({
247
+ issueNumber: Number(original_issue_number),
248
+ issueTitle: String(original_issue_title),
249
+ status: "fixed",
250
+ note: String(note),
251
+ regressionUrl: null,
252
+ });
253
+ runLog.summary.regressionChecked++;
254
+ console.log(` ✓ verified: #${original_issue_number} "${original_issue_title}"`);
255
+ result = { verified: true };
256
+ break;
257
+ }
258
+ default:
259
+ result = await targetConfig.execute(toolName, input, agentLog.agentId);
260
+ }
261
+ } catch (e) {
262
+ result = { error: String(e) };
263
+ }
264
+ agentLog.actions.push({
265
+ timestamp: new Date().toISOString(),
266
+ tool: toolName,
267
+ input,
268
+ result,
269
+ durationMs: Date.now() - startedAt,
270
+ });
271
+ runLog.summary.totalActions++;
272
+ return JSON.stringify(result);
273
+ };
274
+ }
275
+
276
+ // ================================================================
277
+ // API agents (exploration / regression)
278
+ // ================================================================
279
+
280
+ async function runExplorer(
281
+ agent: { id: string; name: string; persona: string; role: string },
282
+ productSpec: ProductSpec,
283
+ assignment: { scenario?: Scenario; lens?: string } = {},
284
+ scenarioOutcomes: ScenarioOutcome[] = [],
285
+ ) {
286
+ const assignmentLabel = assignment.scenario
287
+ ? `[scenario: ${assignment.scenario.title.slice(0, 35)}]`
288
+ : assignment.lens
289
+ ? `[lens: ${assignment.lens.slice(0, 30)}...]`
290
+ : "[free exploration]";
291
+ console.log(`\n[explorer] ${agent.name} start ${assignmentLabel}`);
292
+ const agentLog: AgentLog = {
293
+ agentType: "explorer",
294
+ agentId: agent.id,
295
+ agentName: agent.name,
296
+ role: agent.role,
297
+ startedAt: new Date().toISOString(),
298
+ completedAt: null,
299
+ status: "completed",
300
+ iterations: 0,
301
+ actions: [],
302
+ issuesPosted: [],
303
+ regressionChecks: [],
304
+ error: null,
305
+ };
306
+ runLog.agents.push(agentLog);
307
+
308
+ const systemPrompt = `You are "${agent.name}".
309
+ Role: ${agent.role}
310
+ Persona: ${agent.persona}
311
+
312
+ You are an employee using "${productSpec.appName}".
313
+ Use the tools to interact with the app.
314
+
315
+ ${productSpec.appDescription}
316
+
317
+ If you notice anything inconvenient, a missing feature, or bug-like behavior,
318
+ report it with the post_feedback tool.
319
+
320
+ [Implemented Features]
321
+ ${productSpec.features}
322
+ ${productSpec.uiFeatures ? `\n[UI-Only Features]\nThese features exist in the UI but may not be reflected in API responses. Keep them in mind when interpreting API results.\n${productSpec.uiFeatures}\n` : ""}${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\n` : ""}${goalsSection(productSpec)}${assignment.scenario
323
+ ? `\n[Your Task for This Run]\nTitle: ${assignment.scenario.title}\nYou are: ${assignment.scenario.context}\nGoal: ${assignment.scenario.goal}\nConstraints: ${assignment.scenario.constraints}\n\nFocus on completing this task naturally. Report any issues you encounter along the way.\nWhen done (or if you cannot complete the goal), call post_outcome with achieved=true/false and a brief reason.\n`
324
+ : assignment.lens
325
+ ? `\n[Focus Area for This Run]\n${assignment.lens}\nKeep this perspective in mind and prioritize reporting related issues.\n`
326
+ : ""}
327
+ Take 3–5 actions, then finish.`;
328
+
329
+ await runAgentLoop(agentLog, systemPrompt, EXPLORER_TOOLS, client, defaultModel, makeExecutor(agentLog, scenarioOutcomes, assignment.scenario));
330
+ console.log(`[explorer] ${agent.name} done`);
331
+ }
332
+
333
+ async function runRegressionAgent(
334
+ agent: { id: string; name: string; persona: string; role: string },
335
+ closedIssues: { number: number; title: string; body: string; labels: string[] }[],
336
+ productSpec: ProductSpec
337
+ ) {
338
+ console.log(`\n[regression] ${agent.name} start (${closedIssues.length} issues to check)`);
339
+ const agentLog: AgentLog = {
340
+ agentType: "regression",
341
+ agentId: agent.id,
342
+ agentName: agent.name,
343
+ role: agent.role,
344
+ startedAt: new Date().toISOString(),
345
+ completedAt: null,
346
+ status: "completed",
347
+ iterations: 0,
348
+ actions: [],
349
+ issuesPosted: [],
350
+ regressionChecks: [],
351
+ error: null,
352
+ };
353
+ runLog.agents.push(agentLog);
354
+
355
+ const issueList = closedIssues
356
+ .map((i) => `- Issue #${i.number}: ${i.title}\n ${i.body.slice(0, 200).replace(/\n/g, " ")}`)
357
+ .join("\n");
358
+
359
+ const systemPrompt = `You are "${agent.name}". Act as a QA engineer.
360
+
361
+ The following Issues have been closed as fixed. Verify they are actually fixed.
362
+
363
+ [Issues to Verify]
364
+ ${issueList}
365
+
366
+ [Steps]
367
+ 1. Read each Issue and perform actions that could reproduce it
368
+ 2. If the problem reoccurs, report it with report_regression
369
+ 3. If the problem is gone, record it with mark_verified
370
+ 4. Finish after checking all items
371
+
372
+ [Reference: Implemented Features]
373
+ ${productSpec.features}
374
+ ${productSpec.uiFeatures ? `\n[UI-Only Features]\nThese features exist in the UI but may not be reflected in API responses.\n${productSpec.uiFeatures}\n` : ""}${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\n` : ""}${goalsSection(productSpec)}`;
375
+
376
+ await runAgentLoop(agentLog, systemPrompt, REGRESSION_TOOLS, client, defaultModel, makeExecutor(agentLog, []));
377
+ const checked = agentLog.regressionChecks.length;
378
+ const failed = agentLog.regressionChecks.filter((c) => c.status === "regressed").length;
379
+ console.log(`[regression] ${agent.name} done (checked: ${checked} / regressed: ${failed})`);
380
+ }
381
+
382
+ // ================================================================
383
+ // HR agent
384
+ // ================================================================
385
+
386
+ const HR_TOOLS: Anthropic.Tool[] = [
387
+ {
388
+ name: "get_agents",
389
+ description: "Get the current list of registered agents. / 現在登録されているエージェント一覧を取得する",
390
+ input_schema: { type: "object", properties: {}, required: [] },
391
+ },
392
+ {
393
+ name: "get_coverage",
394
+ description: "Get a weighted summary of what has been explored across past runs. Use this to identify underrepresented lenses and perspectives before deciding whom to hire. / 過去のrunで何がどれだけ探索されたかの重み付きサマリーを取得する。採用方針の決定前に確認すること",
395
+ input_schema: { type: "object", properties: {}, required: [] },
396
+ },
397
+ {
398
+ name: "get_open_issues",
399
+ description: "Get the titles and labels of currently open GitHub Issues (known problems). Use this to understand what is already known and recruit agents who are likely to explore DIFFERENT areas. / 現在オープンなGitHub Issueのタイトルとラベルを取得する。既知の問題を把握し、未探索領域を掘れるペルソナを採用するために使う",
400
+ input_schema: { type: "object", properties: {}, required: [] },
401
+ },
402
+ {
403
+ name: "get_scenarios",
404
+ description: "Get the user test scenarios generated for this run. About 70% of agents will be assigned one of these scenarios — recruit personas whose background and role naturally fit the scenario contexts. / 今回のrunで生成されたユーザーシナリオ一覧を取得する。エージェントの約70%にシナリオが割り当てられるため、シナリオの文脈に自然にフィットするペルソナを採用すること",
405
+ input_schema: { type: "object", properties: {}, required: [] },
406
+ },
407
+ {
408
+ name: "add_agent",
409
+ description: "Register a new agent (user persona). / 新しいエージェントを登録する",
410
+ input_schema: {
411
+ type: "object",
412
+ properties: {
413
+ name: { type: "string" },
414
+ role: { type: "string" },
415
+ persona: { type: "string" },
416
+ },
417
+ required: ["name", "role", "persona"],
418
+ },
419
+ },
420
+ {
421
+ name: "retire_agent",
422
+ description: "Retire an agent (e.g. due to long tenure). / エージェントを退職させる",
423
+ input_schema: {
424
+ type: "object",
425
+ properties: {
426
+ agentId: { type: "string" },
427
+ reason: { type: "string" },
428
+ },
429
+ required: ["agentId", "reason"],
430
+ },
431
+ },
432
+ ];
433
+
434
+ async function runHRAgent(
435
+ productSpec: ProductSpec,
436
+ orgGuidance: string,
437
+ openIssues: { number: number; title: string; labels: string[] }[],
438
+ scenarios: Scenario[],
439
+ testAccounts: TestAccount[] = [],
440
+ ): Promise<void> {
441
+ console.log("\n[hr] starting...");
442
+ const messages: Anthropic.MessageParam[] = [
443
+ { role: "user", content: "Manage agent hiring and retirement." },
444
+ ];
445
+
446
+ const accountContext = testAccounts.length > 0
447
+ ? `\n[Available Test Accounts (one per role)]\n${testAccounts.map((a) => `- ${a.role}: ${a.email}`).join("\n")}\nWhen recruiting agents, match each persona's role to one of these accounts so they can operate with appropriate permissions.`
448
+ : "";
449
+
450
+ const systemPrompt = `You are the test agent manager for "${productSpec.appName}".
451
+ You recruit and manage agents that simulate real users of the app.
452
+
453
+ [Organization Design Guidelines]
454
+ ${orgGuidance}${accountContext}
455
+
456
+ [Steps]
457
+ 1. Call get_coverage to review which lenses and categories are underrepresented in past runs
458
+ 2. Call get_open_issues to understand what problems are already known — recruit agents likely to find DIFFERENT issues in unexplored areas
459
+ 3. Call get_scenarios to see the user test scenarios generated for this run — about 70% of agents will be assigned a scenario, so recruit personas whose background fits those scenarios
460
+ 4. Call get_agents to check the current agent roster
461
+ 5. Add 2–3 agents with add_agent — balance between scenario-fit personas (step 3), underrepresented lenses (step 1), and unexplored areas (step 2)${testAccounts.length > 0 ? "\n — assign each agent a role that matches one of the available test accounts" : ""}
462
+ 6. If there are agents with old createdAt dates (oldest 1–2), retire them with retire_agent`;
463
+
464
+ try {
465
+ let iterations = 0;
466
+ while (iterations < 8) {
467
+ iterations++;
468
+ const response = await createMessageWithRetry(client, {
469
+ model: defaultModel,
470
+ max_tokens: 1024,
471
+ system: systemPrompt,
472
+ tools: HR_TOOLS,
473
+ messages,
474
+ });
475
+ messages.push({ role: "assistant", content: response.content });
476
+ const toolUses = response.content.filter(
477
+ (b): b is Anthropic.ToolUseBlock => b.type === "tool_use"
478
+ );
479
+ if (toolUses.length === 0 || response.stop_reason === "end_turn") break;
480
+ const toolResults: Anthropic.ToolResultBlockParam[] = [];
481
+ for (const toolUse of toolUses) {
482
+ let result: unknown;
483
+ if (toolUse.name === "get_coverage") {
484
+ result = computeWeightedSummary().formatted;
485
+ console.log(" [hr] coverage summary fetched");
486
+ } else if (toolUse.name === "get_open_issues") {
487
+ if (openIssues.length === 0) {
488
+ result = "(no open issues — either GitHub is not configured or there are no known issues yet)";
489
+ } else {
490
+ result = openIssues.map((i) => `- #${i.number}: ${i.title} [${i.labels.join(", ")}]`).join("\n");
491
+ }
492
+ console.log(` [hr] open issues fetched (${openIssues.length})`);
493
+ } else if (toolUse.name === "get_scenarios") {
494
+ if (scenarios.length === 0) {
495
+ result = "(no scenarios generated — all agents will use free-exploration mode)";
496
+ } else {
497
+ result = scenarios.map((s) =>
498
+ `[${s.id}] ${s.title}\n Context: ${s.context}\n Goal: ${s.goal}\n Constraints: ${s.constraints}`
499
+ ).join("\n\n");
500
+ }
501
+ console.log(` [hr] scenarios fetched (${scenarios.length})`);
502
+ } else if (toolUse.name === "get_agents") {
503
+ const agents = loadAgents();
504
+ result = agents.map((a) => ({ id: a.id, name: a.name, role: a.role, createdAt: a.createdAt }));
505
+ console.log(` [hr] current agents: ${agents.length}`);
506
+ } else if (toolUse.name === "add_agent") {
507
+ const { name, role, persona } = toolUse.input as { name: string; role: string; persona: string };
508
+ result = addAgent({ name, role, persona });
509
+ console.log(` [hr] hired: ${name} (${role})`);
510
+ } else if (toolUse.name === "retire_agent") {
511
+ const { agentId, reason } = toolUse.input as { agentId: string; reason: string };
512
+ result = { success: retireAgent(agentId) };
513
+ console.log(` [hr] retired: ${agentId} — ${reason}`);
514
+ } else {
515
+ result = { error: "unknown tool" };
516
+ }
517
+ toolResults.push({ type: "tool_result", tool_use_id: toolUse.id, content: JSON.stringify(result) });
518
+ }
519
+ messages.push({ role: "user", content: toolResults });
520
+ }
521
+ console.log("[hr] done");
522
+ } catch (e) {
523
+ console.error("[hr] error:", e);
524
+ }
525
+ }
526
+
527
+ // ================================================================
528
+ // Browser agent tools
529
+ // ================================================================
530
+
531
+ interface BrowserAction {
532
+ timestamp: string;
533
+ tool: string;
534
+ input: Record<string, unknown>;
535
+ screenshotPath: string | null;
536
+ durationMs: number;
537
+ }
538
+
539
+ interface BrowserAgentLog {
540
+ agentName: string;
541
+ persona: string;
542
+ startedAt: string;
543
+ completedAt: string | null;
544
+ status: "completed" | "error" | "iteration_limit";
545
+ iterations: number;
546
+ actions: BrowserAction[];
547
+ feedbacksSaved: { title: string; category: string; findingId: string }[];
548
+ error: string | null;
549
+ }
550
+
551
+ const TOOLS_THAT_SEND_SCREENSHOT = new Set(["navigate", "post_feedback", "view_screen"]);
552
+
553
+ const BROWSER_TOOLS: Anthropic.Tool[] = [
554
+ ...(MAX_EXPLORERS > 0 ? targetConfig.appTools.map((t) => ({ ...t, description: `[API check] ${t.description}` })) : []),
555
+ {
556
+ name: "view_screen",
557
+ description: "Capture the current screen. / 現在の画面を確認する",
558
+ input_schema: { type: "object", properties: {}, required: [] },
559
+ },
560
+ {
561
+ name: "navigate",
562
+ description: "Navigate to a path. / 指定したパスに移動する",
563
+ input_schema: {
564
+ type: "object",
565
+ properties: { path: { type: "string" } },
566
+ required: ["path"],
567
+ },
568
+ },
569
+ {
570
+ name: "click",
571
+ description: "Click a button, link, or tab on screen. / 画面上の要素をクリックする",
572
+ input_schema: {
573
+ type: "object",
574
+ properties: { description: { type: "string" } },
575
+ required: ["description"],
576
+ },
577
+ },
578
+ {
579
+ name: "fill",
580
+ description: "Type text into an input field. / 入力フィールドにテキストを入力する",
581
+ input_schema: {
582
+ type: "object",
583
+ properties: {
584
+ label: { type: "string" },
585
+ value: { type: "string" },
586
+ },
587
+ required: ["label", "value"],
588
+ },
589
+ },
590
+ {
591
+ name: "select",
592
+ description: "Select an option from a dropdown. / ドロップダウンで選択する",
593
+ input_schema: {
594
+ type: "object",
595
+ properties: {
596
+ label: { type: "string" },
597
+ value: { type: "string" },
598
+ },
599
+ required: ["label", "value"],
600
+ },
601
+ },
602
+ {
603
+ name: "diff_since_last_action",
604
+ description: "Check what changed on the page since the last action. / 直前のアクションでページに何が変わったかを確認する",
605
+ input_schema: { type: "object", properties: {}, required: [] },
606
+ },
607
+ {
608
+ name: "read_page_text",
609
+ description: "Get all visible text on the page. / ページ上の表示テキストをすべて取得する",
610
+ input_schema: { type: "object", properties: {}, required: [] },
611
+ },
612
+ {
613
+ name: "read_accessibility_tree",
614
+ description: "Get the page's accessibility tree. / ページのアクセシビリティツリーを取得する",
615
+ input_schema: { type: "object", properties: {}, required: [] },
616
+ },
617
+ {
618
+ name: "read_console_logs",
619
+ description: "Check browser console logs (errors and warnings). / ブラウザのコンソールログを確認する",
620
+ input_schema: { type: "object", properties: {}, required: [] },
621
+ },
622
+ {
623
+ name: "read_network_errors",
624
+ description: "Check failed API requests. / 失敗したAPIリクエストの一覧を確認する",
625
+ input_schema: { type: "object", properties: {}, required: [] },
626
+ },
627
+ {
628
+ name: "post_feedback",
629
+ description: "Record an issue or improvement as feedback. Becomes a GitHub Issue after triage. / 問題・改善点をフィードバックとして記録する",
630
+ input_schema: {
631
+ type: "object",
632
+ properties: {
633
+ title: { type: "string" },
634
+ body: { type: "string" },
635
+ category: { type: "string", enum: ["ux", "feature-request", "bug", "goal-gap"] },
636
+ },
637
+ required: ["title", "body", "category"],
638
+ },
639
+ },
640
+ {
641
+ name: "post_outcome",
642
+ description: "Record whether you achieved your scenario goal. Call this at the end of your run if you were given a [Your Task for This Run] section. / [Your Task for This Run] セクションがある場合のみ、run の最後にゴール達成可否を記録する",
643
+ input_schema: {
644
+ type: "object",
645
+ properties: {
646
+ achieved: { type: "boolean", description: "true if you successfully completed the goal, false if you could not" },
647
+ reason: { type: "string", description: "Brief explanation (1-2 sentences)" },
648
+ },
649
+ required: ["achieved", "reason"],
650
+ },
651
+ },
652
+ ];
653
+
654
+ async function executeBrowserTool(
655
+ toolName: string,
656
+ input: Record<string, unknown>,
657
+ page: Page,
658
+ agentLog: BrowserAgentLog,
659
+ observation: ObservationState,
660
+ agentId: string,
661
+ scenarioOutcomes: ScenarioOutcome[],
662
+ scenario?: Scenario,
663
+ ): Promise<{ text: string; screenshot: { base64: string; filePath: string } | null; sendToClaude: boolean }> {
664
+ const startedAt = Date.now();
665
+ let resultText = "";
666
+ let screenshot: { base64: string; filePath: string } | null = null;
667
+ let isError = false;
668
+
669
+ try {
670
+ switch (toolName) {
671
+ case "view_screen": {
672
+ screenshot = await takeScreenshot(page, "view_screen");
673
+ resultText = "Current screen.";
674
+ break;
675
+ }
676
+ case "navigate": {
677
+ const { path: navPath } = input as { path: string };
678
+ await saveSnapshotBeforeAction(page, observation);
679
+ await page.goto(`${BASE_URL}${navPath}`, { waitUntil: "networkidle" });
680
+ await page.waitForTimeout(500);
681
+ screenshot = await takeScreenshot(page, `navigate_${navPath.replace(/\//g, "_")}`);
682
+ resultText = `Navigated to ${navPath}`;
683
+ break;
684
+ }
685
+ case "click": {
686
+ const { description } = input as { description: string };
687
+ await saveSnapshotBeforeAction(page, observation);
688
+ const escapedDesc = description.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
689
+ const buttonLocator = page.getByRole("button", { name: new RegExp(escapedDesc, "i") });
690
+ const linkLocator = page.getByRole("link", { name: new RegExp(escapedDesc, "i") });
691
+ const textLocator = page.getByText(description, { exact: false });
692
+ let clicked = false;
693
+ for (const loc of [buttonLocator, linkLocator, textLocator]) {
694
+ try {
695
+ await loc.first().click({ timeout: 5000 });
696
+ clicked = true;
697
+ break;
698
+ } catch { /* try next */ }
699
+ }
700
+ if (!clicked) throw new Error(`No element matching: ${description}`);
701
+ await page.waitForTimeout(500);
702
+ screenshot = await takeScreenshot(page, `click_${description.slice(0, 20)}`);
703
+ resultText = `Clicked: ${description}`;
704
+ break;
705
+ }
706
+ case "fill": {
707
+ const { label, value } = input as { label: string; value: string };
708
+ await saveSnapshotBeforeAction(page, observation);
709
+ const byContainer = page
710
+ .locator("div")
711
+ .filter({ has: page.locator("label", { hasText: label }) })
712
+ .locator("input, textarea")
713
+ .first();
714
+ const byPlaceholder = page.getByPlaceholder(label, { exact: false });
715
+ const byAriaLabel = page.getByLabel(label, { exact: false });
716
+ let filled = false;
717
+ for (const el of [byContainer, byPlaceholder, byAriaLabel]) {
718
+ try {
719
+ await el.fill(value, { timeout: 5000 });
720
+ filled = true;
721
+ break;
722
+ } catch { /* try next */ }
723
+ }
724
+ if (!filled) throw new Error(`No input field matching: ${label}`);
725
+ await page.waitForTimeout(300);
726
+ screenshot = await takeScreenshot(page, `fill_${label.slice(0, 20)}`);
727
+ resultText = `Filled "${label}" with "${value}"`;
728
+ break;
729
+ }
730
+ case "select": {
731
+ const { label, value } = input as { label: string; value: string };
732
+ await saveSnapshotBeforeAction(page, observation);
733
+ const byAriaLabel = page.getByLabel(label, { exact: false });
734
+ const byContainer = page
735
+ .locator("div")
736
+ .filter({ has: page.locator("label", { hasText: label }) })
737
+ .locator("select")
738
+ .first();
739
+ let selected = false;
740
+ for (const el of [byAriaLabel, byContainer]) {
741
+ try {
742
+ await el.selectOption({ label: value }, { timeout: 5000 });
743
+ selected = true;
744
+ break;
745
+ } catch { /* try next */ }
746
+ }
747
+ if (!selected) throw new Error(`Could not select "${value}" in "${label}"`);
748
+ await page.waitForTimeout(300);
749
+ screenshot = await takeScreenshot(page, `select_${label.slice(0, 20)}`);
750
+ resultText = `Selected "${value}" in "${label}"`;
751
+ break;
752
+ }
753
+ case "diff_since_last_action": {
754
+ resultText = await getDiffFromSnapshot(page, observation);
755
+ break;
756
+ }
757
+ case "read_page_text": {
758
+ resultText = await readPageText(page);
759
+ break;
760
+ }
761
+ case "read_accessibility_tree": {
762
+ resultText = await readAccessibilityTree(page);
763
+ break;
764
+ }
765
+ case "read_console_logs": {
766
+ const logs = getRecentConsoleLogs(observation);
767
+ resultText = logs.length > 0 ? JSON.stringify(logs) : "(no console logs)";
768
+ break;
769
+ }
770
+ case "read_network_errors": {
771
+ const errors = getRecentNetworkErrors(observation);
772
+ resultText = errors.length > 0 ? JSON.stringify(errors) : "(no network errors)";
773
+ break;
774
+ }
775
+ case "post_outcome": {
776
+ const { achieved, reason } = input as { achieved: boolean; reason: string };
777
+ if (scenario) {
778
+ const outcome: ScenarioOutcome = {
779
+ scenarioId: scenario.id,
780
+ scenarioTitle: scenario.title,
781
+ agentId,
782
+ agentName: agentLog.agentName,
783
+ achieved: Boolean(achieved),
784
+ reason: String(reason),
785
+ };
786
+ scenarioOutcomes.push(outcome);
787
+ console.log(` ${achieved ? "✓" : "✗"} [outcome] "${scenario.title}": ${achieved ? "achieved" : "NOT achieved"} — ${reason}`);
788
+ }
789
+ resultText = "Outcome recorded.";
790
+ break;
791
+ }
792
+ case "post_feedback": {
793
+ const { title, body, category } = input as { title: string; body: string; category: string };
794
+ const safeCategory = VALID_CATEGORIES.includes(String(category)) ? String(category) : "ux";
795
+ screenshot = await takeScreenshot(page, `feedback_${String(title).slice(0, 20)}`);
796
+ const finding: Finding = {
797
+ id: `${agentId}_${Date.now()}`,
798
+ runId: runLog.runId,
799
+ agentId,
800
+ agentName: agentLog.agentName,
801
+ role: agentLog.persona,
802
+ title: String(title),
803
+ body: String(body),
804
+ category: safeCategory,
805
+ timestamp: new Date().toISOString(),
806
+ screenshotPath: screenshot.filePath,
807
+ };
808
+ saveFinding(finding);
809
+ agentLog.feedbacksSaved.push({ title: String(title), category: safeCategory, findingId: finding.id });
810
+ console.log(` → [findings] saved: "${title}" (${safeCategory})`);
811
+ resultText = `Feedback recorded: "${title}" (will become an Issue after triage)`;
812
+ break;
813
+ }
814
+ default: {
815
+ const apiResult = await targetConfig.execute(toolName, input, agentId);
816
+ resultText = JSON.stringify(apiResult);
817
+ break;
818
+ }
819
+ }
820
+ } catch (e) {
821
+ isError = true;
822
+ resultText = `error: ${String(e)}`;
823
+ try {
824
+ screenshot = await takeScreenshot(page, `error_${toolName}`);
825
+ } catch { /* ignore */ }
826
+ }
827
+
828
+ agentLog.actions.push({
829
+ timestamp: new Date().toISOString(),
830
+ tool: toolName,
831
+ input,
832
+ screenshotPath: screenshot?.filePath ?? null,
833
+ durationMs: Date.now() - startedAt,
834
+ });
835
+
836
+ const sendToClaude = isError || TOOLS_THAT_SEND_SCREENSHOT.has(toolName);
837
+ return { text: resultText, screenshot, sendToClaude };
838
+ }
839
+
840
+ async function runBrowserAgent(
841
+ agent: { id: string; name: string; persona: string; role: string },
842
+ page: Page,
843
+ productSpec: ProductSpec,
844
+ assignment: { scenario?: Scenario; lens?: string } = {},
845
+ scenarioOutcomes: ScenarioOutcome[] = [],
846
+ ): Promise<BrowserAgentLog> {
847
+ const assignmentLabel = assignment.scenario
848
+ ? `[scenario: ${assignment.scenario.title.slice(0, 35)}]`
849
+ : assignment.lens
850
+ ? `[lens: ${assignment.lens.slice(0, 30)}...]`
851
+ : "[free exploration]";
852
+ console.log(`\n[browser] ${agent.name} start ${assignmentLabel}`);
853
+
854
+ const agentLog: BrowserAgentLog = {
855
+ agentName: agent.name,
856
+ persona: agent.persona,
857
+ startedAt: new Date().toISOString(),
858
+ completedAt: null,
859
+ status: "completed",
860
+ iterations: 0,
861
+ actions: [],
862
+ feedbacksSaved: [],
863
+ error: null,
864
+ };
865
+
866
+ const observation = setupObservation(page);
867
+
868
+ const systemPrompt = `You are "${agent.name}".
869
+ Role: ${agent.role}
870
+ Persona: ${agent.persona}
871
+
872
+ You are a real user of "${productSpec.appName}".
873
+ Use the browser tools to navigate the app and carry out everyday tasks.
874
+
875
+ [App Overview]
876
+ ${productSpec.appDescription}
877
+
878
+ [How to Proceed]
879
+ 1. Navigate to a page with navigate
880
+ 2. Perform actual tasks on that page
881
+ 3. If you find any issues, record them with post_feedback (they become Issues after triage)
882
+ 4. Move to another page and repeat
883
+ 5. Finish after 8–10 actions
884
+
885
+ [Using Observation Tools]
886
+ - To verify an action was actually applied, call diff_since_last_action
887
+ - If data isn't reflected or errors appear, call read_network_errors
888
+ - For unexpected behavior, call read_console_logs to check JS errors
889
+ - If problems are found, record them with post_feedback
890
+
891
+ [Using API Check Tools (tools prefixed with [API check])]
892
+ - After a browser action, verify the actual saved state via API
893
+ - Data visible in the browser but missing in the API (or vice versa) is an inconsistency bug — report with post_feedback
894
+
895
+ [Using view_screen]
896
+ - Call it once right after navigate
897
+ - Do not call it repeatedly on the same page
898
+
899
+ [Reference: Implemented Features]
900
+ ${productSpec.features}
901
+ ${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\n` : ""}${goalsSection(productSpec)}${assignment.scenario
902
+ ? `\n[Your Task for This Run]\nTitle: ${assignment.scenario.title}\nYou are: ${assignment.scenario.context}\nGoal: ${assignment.scenario.goal}\nConstraints: ${assignment.scenario.constraints}\n\nFocus on completing this task naturally as this user. Report any issues you encounter along the way.\nWhen done (or if you cannot complete the goal), call post_outcome with achieved=true/false and a brief reason.`
903
+ : assignment.lens
904
+ ? `\n[Focus Area for This Run]\n${assignment.lens}\nKeep this perspective in mind and prioritize reporting related issues.`
905
+ : ""}`;
906
+
907
+ await page.goto(BASE_URL, { waitUntil: "networkidle" });
908
+ await page.waitForTimeout(1000);
909
+ const initialScreenshot = await takeScreenshot(page, "initial");
910
+
911
+ const messages: Anthropic.MessageParam[] = [
912
+ {
913
+ role: "user",
914
+ content: [
915
+ { type: "image", source: { type: "base64", media_type: "image/png", data: initialScreenshot.base64 } },
916
+ { type: "text", text: "The app is open. Start using it." },
917
+ ],
918
+ },
919
+ ];
920
+
921
+ try {
922
+ while (agentLog.iterations < 12) {
923
+ agentLog.iterations++;
924
+
925
+ const response = await createMessageWithRetry(client, {
926
+ model: defaultModel,
927
+ max_tokens: 1024,
928
+ system: systemPrompt,
929
+ tools: BROWSER_TOOLS,
930
+ messages,
931
+ });
932
+
933
+ const assistantContent = response.content;
934
+ messages.push({ role: "assistant", content: assistantContent });
935
+
936
+ const toolUses = assistantContent.filter(
937
+ (b): b is Anthropic.ToolUseBlock => b.type === "tool_use"
938
+ );
939
+
940
+ if (toolUses.length === 0 || response.stop_reason === "end_turn") {
941
+ agentLog.status = "completed";
942
+ break;
943
+ }
944
+
945
+ if (agentLog.iterations >= 12) agentLog.status = "iteration_limit";
946
+
947
+ const toolResults: Anthropic.ToolResultBlockParam[] = [];
948
+ for (const toolUse of toolUses) {
949
+ console.log(` → ${toolUse.name}(${JSON.stringify(toolUse.input).slice(0, 60)})`);
950
+
951
+ const { text, screenshot, sendToClaude } = await executeBrowserTool(
952
+ toolUse.name,
953
+ toolUse.input as Record<string, unknown>,
954
+ page,
955
+ agentLog,
956
+ observation,
957
+ agent.id,
958
+ scenarioOutcomes,
959
+ assignment.scenario,
960
+ );
961
+
962
+ const content: Anthropic.ToolResultBlockParam["content"] =
963
+ sendToClaude && screenshot
964
+ ? [
965
+ { type: "text", text },
966
+ { type: "image", source: { type: "base64", media_type: "image/png", data: screenshot.base64 } },
967
+ ]
968
+ : text;
969
+
970
+ toolResults.push({ type: "tool_result", tool_use_id: toolUse.id, content });
971
+ }
972
+
973
+ const MAX_ITERATIONS = 12;
974
+ const remaining = MAX_ITERATIONS - agentLog.iterations;
975
+ let budgetHint = `[${remaining} turns remaining]`;
976
+ if (remaining <= 2) {
977
+ budgetHint += " Last turns. Post any remaining findings with post_feedback, then finish.";
978
+ } else if (remaining <= 4) {
979
+ budgetHint += " Start wrapping up.";
980
+ }
981
+
982
+ const PROGRESS_TOOLS = new Set(["navigate", "fill", "post_feedback"]);
983
+ const recent = agentLog.actions.slice(-5).map((a) => a.tool);
984
+ if (recent.length >= 5 && !recent.some((t) => PROGRESS_TOOLS.has(t))) {
985
+ budgetHint += " You seem stuck on the same page. Navigate to a different page.";
986
+ }
987
+
988
+ const observationWarning = buildObservationWarning(observation);
989
+ if (observationWarning) {
990
+ budgetHint += `\n\n${observationWarning}\nUse read_console_logs or read_network_errors for details.`;
991
+ }
992
+
993
+ const last = toolResults[toolResults.length - 1];
994
+ const lastContent = last.content;
995
+ toolResults[toolResults.length - 1] = {
996
+ ...last,
997
+ content:
998
+ typeof lastContent === "string"
999
+ ? `${lastContent}\n\n${budgetHint}`
1000
+ : ([...(lastContent as unknown[]), { type: "text" as const, text: budgetHint }] as Anthropic.ToolResultBlockParam["content"]),
1001
+ };
1002
+
1003
+ messages.push({ role: "user", content: toolResults });
1004
+ }
1005
+ } catch (e) {
1006
+ agentLog.status = "error";
1007
+ agentLog.error = String(e);
1008
+ console.error(`[${agent.name}] error:`, e);
1009
+ } finally {
1010
+ agentLog.completedAt = new Date().toISOString();
1011
+ }
1012
+
1013
+ console.log(`[browser] ${agent.name} done (feedback: ${agentLog.feedbacksSaved.length})`);
1014
+ return agentLog;
1015
+ }
1016
+
1017
+ // ================================================================
1018
+ // Main
1019
+ // ================================================================
1020
+
1021
+ function pickAgents<T>(agents: T[], count: number): T[] {
1022
+ return [...agents].sort(() => Math.random() - 0.5).slice(0, count);
1023
+ }
1024
+
1025
+ // 7:3 ratio: indices where (idx % 10) < 7 get a scenario, rest get a lens
1026
+ function pickAssignment(idx: number, scenarios: Scenario[]): { scenario?: Scenario; lens?: string } {
1027
+ if (scenarios.length > 0 && idx % 10 < 7) {
1028
+ return { scenario: scenarios[idx % scenarios.length] };
1029
+ }
1030
+ return { lens: UNIVERSAL_LENSES[idx % UNIVERSAL_LENSES.length] };
1031
+ }
1032
+
1033
+ async function main() {
1034
+ initDirs();
1035
+ // run log を最初期化しておくことで、どの段階でエラーが起きても finally で saveRunLog() が動く
1036
+ initRunLog(0, GITHUB_REPO);
1037
+
1038
+ // 1. product discovery (cache or live)
1039
+ const browser = await chromium.launch({ headless: true });
1040
+ let productSpec: ProductSpec;
1041
+ const scenarioOutcomes: ScenarioOutcome[] = [];
1042
+ try {
1043
+ const cached = loadCachedSpec(BASE_URL);
1044
+ if (cached) {
1045
+ console.log(`\n[product-discovery] using cache (date: ${cached.discoveredAt?.slice(0, 10) ?? "unknown"}, confidence: ${cached.confidence})`);
1046
+ productSpec = cached;
1047
+ } else {
1048
+ const discoveryContext = await browser.newContext({ viewport: { width: 1024, height: 640 } });
1049
+ const discoveryPage = await discoveryContext.newPage();
1050
+ productSpec = await discoverProduct(BASE_URL, discoveryPage, client, defaultModel, targetConfig.projectPath);
1051
+ await discoveryContext.close();
1052
+ }
1053
+
1054
+ // 2. org design (coverage-aware)
1055
+ const coverageSummary = computeWeightedSummary();
1056
+ console.log(`\n[coverage] ${coverageSummary.formatted.split("\n")[0]}`);
1057
+ const orgDesign = await designOrg(productSpec, client, defaultModel, coverageSummary.formatted);
1058
+
1059
+ // 3. open issues + scenario design (both feed into HR)
1060
+ const openIssues = await fetchOpenIssues(githubOptions);
1061
+ const scenarios = await designScenarios(productSpec, openIssues, client, defaultModel, 5, coverageSummary.formatted);
1062
+
1063
+ // 3.5. Account Manager(credentials が設定されている場合のみ)
1064
+ let testAccounts: TestAccount[] = [];
1065
+ if (targetConfig.credentials) {
1066
+ const accountContext = await browser.newContext({ viewport: { width: 1024, height: 640 } });
1067
+ try {
1068
+ testAccounts = await runAccountManager(
1069
+ BASE_URL,
1070
+ targetConfig.credentials,
1071
+ productSpec,
1072
+ accountContext,
1073
+ client,
1074
+ defaultModel,
1075
+ runLog.runId,
1076
+ );
1077
+ } finally {
1078
+ await accountContext.close();
1079
+ }
1080
+ }
1081
+
1082
+ // 4. HR agent
1083
+ await runHRAgent(productSpec, orgDesign.hrGuidance, openIssues, scenarios, testAccounts);
1084
+
1085
+ // 5. load agents + closed issues
1086
+ const allAgents = loadAgents();
1087
+ if (allAgents.length === 0) {
1088
+ console.error("No agents found. Check agents.json.");
1089
+ process.exit(1);
1090
+ }
1091
+ const closedIssues = await fetchClosedIssues(githubOptions);
1092
+
1093
+ // 5. エージェント数が確定したので totalAgents を更新
1094
+ runLog.summary.totalAgents = allAgents.length;
1095
+
1096
+ // 6. API agents (exploration + regression)
1097
+ const allExplorers = allAgents.slice(0, -1);
1098
+ const explorerAgents = pickAgents(allExplorers, Math.min(MAX_EXPLORERS, allExplorers.length));
1099
+ const regressionAgent = allAgents[allAgents.length - 1];
1100
+ console.log(`\nexplorers: ${explorerAgents.length} (max: ${MAX_EXPLORERS}) / regression: 1`);
1101
+
1102
+ // agentId → assignment(coverage 計算・レポート生成に使う)
1103
+ const agentAssignments = new Map<string, { scenario?: Scenario; lens?: string }>();
1104
+
1105
+ // シナリオ/レンズ割り当てのグローバルカウンタ(7:3 比率)
1106
+ let dispatchIdx = 0;
1107
+
1108
+ const CONCURRENCY = 2;
1109
+ for (let i = 0; i < explorerAgents.length; i += CONCURRENCY) {
1110
+ const batch = explorerAgents.slice(i, i + CONCURRENCY);
1111
+ await Promise.all(batch.map((agent) => {
1112
+ const assignment = pickAssignment(dispatchIdx++, scenarios);
1113
+ agentAssignments.set(agent.id, assignment);
1114
+ return runExplorer(agent, productSpec, assignment, scenarioOutcomes);
1115
+ }));
1116
+ if (i + CONCURRENCY < explorerAgents.length) {
1117
+ console.log("\n[batch done] waiting 5s before next batch...");
1118
+ await sleep(5000);
1119
+ }
1120
+ }
1121
+
1122
+ if (MAX_EXPLORERS === 0) {
1123
+ console.log("\n[regression] skipped (MAX_EXPLORERS=0)");
1124
+ } else if (closedIssues.length > 0) {
1125
+ await sleep(3000);
1126
+ await runRegressionAgent(regressionAgent, closedIssues, productSpec);
1127
+ } else {
1128
+ console.log("\n[regression] no closed issues — running as explorer");
1129
+ const assignment = pickAssignment(dispatchIdx++, scenarios);
1130
+ agentAssignments.set(regressionAgent.id, assignment);
1131
+ await runExplorer(regressionAgent, productSpec, assignment, scenarioOutcomes);
1132
+ }
1133
+
1134
+ // 7. browser agents
1135
+ const browserAgents = pickAgents(allAgents, Math.min(MAX_BROWSERS, allAgents.length));
1136
+ console.log(`\nlaunching ${browserAgents.length} browser agents in parallel (max: ${MAX_BROWSERS})`);
1137
+ browserAgents.forEach((a) => console.log(` - ${a.name} (${a.role})`));
1138
+
1139
+ await sleep(2000);
1140
+ await Promise.all(
1141
+ browserAgents.map(async (agent) => {
1142
+ const assignment = pickAssignment(dispatchIdx++, scenarios);
1143
+ agentAssignments.set(agent.id, assignment);
1144
+
1145
+ // ロールが一致する storageState があれば使う
1146
+ const matchedAccount = testAccounts.find((a) => a.role === agent.role && a.storageStatePath);
1147
+ const contextOptions: Parameters<typeof browser.newContext>[0] = {
1148
+ viewport: { width: 1024, height: 640 },
1149
+ };
1150
+ if (matchedAccount?.storageStatePath) {
1151
+ contextOptions.storageState = matchedAccount.storageStatePath;
1152
+ }
1153
+
1154
+ const context = await browser.newContext(contextOptions);
1155
+ const page = await context.newPage();
1156
+ try {
1157
+ return await runBrowserAgent(agent, page, productSpec, assignment, scenarioOutcomes);
1158
+ } finally {
1159
+ await context.close();
1160
+ }
1161
+ })
1162
+ );
1163
+
1164
+ // 8. triage (API + browser findings)
1165
+ await sleep(2000);
1166
+ console.log(`\n[triage] collected findings: ${collectedFindings.length}`);
1167
+ let triageResult = { issued: [] as string[], skipped: [] as string[], unprocessed: [] as string[], issuesCreated: 0 };
1168
+ try {
1169
+ triageResult = await runTriageAgent(collectedFindings, client, defaultModel, githubOptions);
1170
+ runLog.summary.totalIssuesPosted += triageResult.issuesCreated;
1171
+ } catch (e) {
1172
+ console.error("[triage] error:", e);
1173
+ }
1174
+
1175
+ // 9. generate HTML report
1176
+ const reportPath = generateReport(runLog, collectedFindings, triageResult, productSpec, scenarios, agentAssignments, scenarioOutcomes);
1177
+ console.log(`\n[report] ${reportPath}`);
1178
+
1179
+ // 10. update coverage
1180
+ updateCoverage(runLog.runId, collectedFindings, agentAssignments);
1181
+
1182
+ } finally {
1183
+ await browser.close();
1184
+ // エラー終了時も必ずログを保存する
1185
+ runLog.completedAt = new Date().toISOString();
1186
+ runLog.summary.rateLimitRetries = rateLimitRetries;
1187
+ runLog.summary.cost.estimatedUSD = await estimateCost(
1188
+ defaultModel, llmProvider,
1189
+ runLog.summary.cost.inputTokens,
1190
+ runLog.summary.cost.outputTokens,
1191
+ );
1192
+ saveRunLog();
1193
+ }
1194
+
1195
+ console.log("\nAll agents done.");
1196
+ console.log(` findings collected: ${collectedFindings.length}`);
1197
+ console.log(` tokens: ${runLog.summary.cost.inputTokens} in / ${runLog.summary.cost.outputTokens} out — estimated cost: ${formatCostUSD(runLog.summary.cost.estimatedUSD)}`);
1198
+ console.log(` GitHub issues created: ${runLog.summary.totalIssuesPosted}`);
1199
+ console.log(` regression checks: ${runLog.summary.regressionChecked} (regressed: ${runLog.summary.regressionFailed})`);
1200
+ console.log(` screenshots: ${screenshotDir}`);
1201
+
1202
+ if (scenarioOutcomes.length > 0) {
1203
+ const failed = scenarioOutcomes.filter((o) => !o.achieved);
1204
+ console.log(` scenarios: ${scenarioOutcomes.length - failed.length}/${scenarioOutcomes.length} achieved`);
1205
+ if (failed.length > 0) {
1206
+ console.log(` ⚠ failed scenarios:`);
1207
+ failed.forEach((o) => console.log(` ✗ ${o.scenarioTitle} — ${o.reason}`));
1208
+ process.exitCode = 1;
1209
+ }
1210
+ }
1211
+ }
1212
+
1213
+ main().catch(console.error);