@m8i-51/shoal 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +121 -0
- package/bin/shoal.js +56 -0
- package/framework/__tests__/coverage.test.ts +232 -0
- package/framework/__tests__/report.test.ts +154 -0
- package/framework/account-manager.ts +414 -0
- package/framework/agent-loop.ts +103 -0
- package/framework/agent-store.ts +47 -0
- package/framework/cost.ts +91 -0
- package/framework/coverage.ts +157 -0
- package/framework/findings.ts +53 -0
- package/framework/github.ts +64 -0
- package/framework/llm-client.ts +507 -0
- package/framework/observation.ts +182 -0
- package/framework/org-designer.ts +85 -0
- package/framework/product-discovery.ts +327 -0
- package/framework/report.ts +276 -0
- package/framework/scenario-designer.ts +141 -0
- package/framework/triage.ts +208 -0
- package/framework/types.ts +80 -0
- package/package.json +55 -0
- package/run.ts +1213 -0
- package/server/index.ts +227 -0
- package/server/runner.ts +125 -0
- package/server/runs.ts +103 -0
- package/targets/example.ts +55 -0
- package/targets/index.ts +17 -0
- package/targets/noop.ts +6 -0
- package/targets/types.ts +19 -0
- package/triage-only.ts +57 -0
- package/web/dist/assets/index-CD6EJ_1O.js +68 -0
- package/web/dist/assets/index-DPLuVm2n.css +1 -0
- package/web/dist/index.html +13 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
import * as fs from "fs";
|
|
2
|
+
import * as path from "path";
|
|
3
|
+
import type { Finding, RunLog } from "./types";
|
|
4
|
+
import type { ProductSpec } from "./product-discovery";
|
|
5
|
+
import type { TriageResult } from "./triage";
|
|
6
|
+
import type { Scenario, ScenarioOutcome } from "./scenario-designer";
|
|
7
|
+
|
|
8
|
+
function esc(s: string): string {
|
|
9
|
+
return s
|
|
10
|
+
.replace(/&/g, "&")
|
|
11
|
+
.replace(/</g, "<")
|
|
12
|
+
.replace(/>/g, ">")
|
|
13
|
+
.replace(/"/g, """);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function embedImage(filePath: string | undefined): string | null {
|
|
17
|
+
if (!filePath) return null;
|
|
18
|
+
try {
|
|
19
|
+
if (!fs.existsSync(filePath)) return null;
|
|
20
|
+
const data = fs.readFileSync(filePath);
|
|
21
|
+
return `data:image/png;base64,${data.toString("base64")}`;
|
|
22
|
+
} catch {
|
|
23
|
+
return null;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function formatDuration(startedAt: string, completedAt: string | null): string {
|
|
28
|
+
if (!completedAt) return "—";
|
|
29
|
+
const ms = new Date(completedAt).getTime() - new Date(startedAt).getTime();
|
|
30
|
+
const s = Math.round(ms / 1000);
|
|
31
|
+
return s < 60 ? `${s}s` : `${Math.floor(s / 60)}m ${s % 60}s`;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function categoryColor(cat: string): string {
|
|
35
|
+
switch (cat) {
|
|
36
|
+
case "bug": return "#ef4444";
|
|
37
|
+
case "ux": return "#f97316";
|
|
38
|
+
case "feature-request": return "#3b82f6";
|
|
39
|
+
default: return "#6b7280";
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function generateReport(
|
|
44
|
+
runLog: RunLog,
|
|
45
|
+
findings: Finding[],
|
|
46
|
+
triageResult: TriageResult,
|
|
47
|
+
productSpec: ProductSpec,
|
|
48
|
+
scenarios: Scenario[],
|
|
49
|
+
agentAssignments: Map<string, { scenario?: Scenario; lens?: string }>,
|
|
50
|
+
scenarioOutcomes: ScenarioOutcome[] = [],
|
|
51
|
+
): string {
|
|
52
|
+
const reportPath = path.join(process.cwd(), "logs", `report_${runLog.runId}.html`);
|
|
53
|
+
|
|
54
|
+
const issuedSet = new Set(triageResult.issued);
|
|
55
|
+
const skippedSet = new Set(triageResult.skipped);
|
|
56
|
+
|
|
57
|
+
// issued → unprocessed → skipped の順に並べる
|
|
58
|
+
const sortedFindings = [...findings].sort((a, b) => {
|
|
59
|
+
const rank = (f: Finding) => (issuedSet.has(f.id) ? 0 : !skippedSet.has(f.id) ? 1 : 2);
|
|
60
|
+
return rank(a) - rank(b);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
const categoryCounts: Record<string, number> = {};
|
|
64
|
+
for (const f of findings) {
|
|
65
|
+
categoryCounts[f.category] = (categoryCounts[f.category] ?? 0) + 1;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const duration = formatDuration(runLog.startedAt, runLog.completedAt);
|
|
69
|
+
const date = new Date(runLog.startedAt).toLocaleString("ja-JP", { timeZone: "Asia/Tokyo" });
|
|
70
|
+
|
|
71
|
+
// ----------------------------------------------------------------
|
|
72
|
+
// Finding cards
|
|
73
|
+
// ----------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
const findingCards = sortedFindings.map((f) => {
|
|
76
|
+
const status = issuedSet.has(f.id) ? "issued" : skippedSet.has(f.id) ? "skipped" : "unprocessed";
|
|
77
|
+
const statusLabel = { issued: "→ Issue", skipped: "skipped", unprocessed: "pending" }[status];
|
|
78
|
+
const statusColor = { issued: "#22c55e", skipped: "#9ca3af", unprocessed: "#f59e0b" }[status];
|
|
79
|
+
const imgData = embedImage(f.screenshotPath);
|
|
80
|
+
const assignment = agentAssignments.get(f.agentId);
|
|
81
|
+
const assignmentTag = assignment?.scenario
|
|
82
|
+
? `<span class="assignment-tag scenario">scenario: ${esc(assignment.scenario.title)}</span>`
|
|
83
|
+
: assignment?.lens
|
|
84
|
+
? `<span class="assignment-tag lens">lens: ${esc(assignment.lens.split(":")[0].trim())}</span>`
|
|
85
|
+
: "";
|
|
86
|
+
|
|
87
|
+
return `
|
|
88
|
+
<div class="finding ${esc(status)}">
|
|
89
|
+
<div class="finding-header">
|
|
90
|
+
<span class="badge" style="background:${categoryColor(f.category)}">${esc(f.category)}</span>
|
|
91
|
+
<span class="badge" style="background:${statusColor}">${statusLabel}</span>
|
|
92
|
+
${assignmentTag}
|
|
93
|
+
<span class="agent-name">${esc(f.agentName)}</span>
|
|
94
|
+
</div>
|
|
95
|
+
<h3 class="finding-title">${esc(f.title)}</h3>
|
|
96
|
+
<p class="finding-body">${esc(f.body).replace(/\n/g, "<br>")}</p>
|
|
97
|
+
${imgData ? `<details class="screenshot-toggle"><summary>スクリーンショット</summary><img src="${imgData}" alt="screenshot" class="screenshot"></details>` : ""}
|
|
98
|
+
</div>`;
|
|
99
|
+
}).join("\n");
|
|
100
|
+
|
|
101
|
+
// ----------------------------------------------------------------
|
|
102
|
+
// Agent table rows
|
|
103
|
+
// ----------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
const agentRows = runLog.agents.map((a) => {
|
|
106
|
+
const assignment = agentAssignments.get(a.agentId);
|
|
107
|
+
const assignmentCell = assignment?.scenario
|
|
108
|
+
? `<span class="badge" style="background:#8b5cf6">scenario</span> ${esc(assignment.scenario.title)}`
|
|
109
|
+
: assignment?.lens
|
|
110
|
+
? `<span class="badge" style="background:#0ea5e9">lens</span> ${esc(assignment.lens.split(":")[0].trim())}`
|
|
111
|
+
: `<span class="badge" style="background:#9ca3af">${a.agentType === "regression" ? "regression" : "—"}</span>`;
|
|
112
|
+
const statusColor = a.status === "completed" ? "#22c55e" : "#ef4444";
|
|
113
|
+
return `<tr>
|
|
114
|
+
<td>${esc(a.agentName)}</td>
|
|
115
|
+
<td><span class="badge" style="background:#475569">${esc(a.agentType)}</span></td>
|
|
116
|
+
<td style="font-size:0.8rem">${assignmentCell}</td>
|
|
117
|
+
<td style="text-align:center">${a.iterations}</td>
|
|
118
|
+
<td><span class="badge" style="background:${statusColor}">${esc(a.status)}</span></td>
|
|
119
|
+
</tr>`;
|
|
120
|
+
}).join("\n");
|
|
121
|
+
|
|
122
|
+
// ----------------------------------------------------------------
|
|
123
|
+
// ----------------------------------------------------------------
|
|
124
|
+
// Scenario outcomes
|
|
125
|
+
// ----------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
const outcomesSection = scenarioOutcomes.length > 0 ? (() => {
|
|
128
|
+
const achieved = scenarioOutcomes.filter((o) => o.achieved).length;
|
|
129
|
+
const total = scenarioOutcomes.length;
|
|
130
|
+
const allPassed = achieved === total;
|
|
131
|
+
const headerColor = allPassed ? "#22c55e" : achieved === 0 ? "#ef4444" : "#f59e0b";
|
|
132
|
+
const rows = scenarioOutcomes.map((o) => `<tr>
|
|
133
|
+
<td>${esc(o.scenarioTitle)}</td>
|
|
134
|
+
<td>${esc(o.agentName)}</td>
|
|
135
|
+
<td><span class="badge" style="background:${o.achieved ? "#22c55e" : "#ef4444"}">${o.achieved ? "achieved" : "failed"}</span></td>
|
|
136
|
+
<td style="font-size:.8rem;color:#475569">${esc(o.reason)}</td>
|
|
137
|
+
</tr>`).join("\n");
|
|
138
|
+
return `
|
|
139
|
+
<section>
|
|
140
|
+
<h2>Scenario Outcomes <span style="font-size:.85rem;color:${headerColor};text-transform:none;letter-spacing:0;font-weight:600">${achieved}/${total} achieved</span></h2>
|
|
141
|
+
<table>
|
|
142
|
+
<thead><tr><th>Scenario</th><th>Agent</th><th>Result</th><th>Reason</th></tr></thead>
|
|
143
|
+
<tbody>${rows}</tbody>
|
|
144
|
+
</table>
|
|
145
|
+
</section>`;
|
|
146
|
+
})() : "";
|
|
147
|
+
|
|
148
|
+
// ----------------------------------------------------------------
|
|
149
|
+
// Scenario cards
|
|
150
|
+
// ----------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
const scenarioSection = scenarios.length > 0 ? `
|
|
153
|
+
<section>
|
|
154
|
+
<h2>Scenarios (${scenarios.length})</h2>
|
|
155
|
+
<div class="scenarios">
|
|
156
|
+
${scenarios.map((s) => `
|
|
157
|
+
<div class="scenario-card">
|
|
158
|
+
<div class="scenario-id">${esc(s.id)}</div>
|
|
159
|
+
<h3>${esc(s.title)}</h3>
|
|
160
|
+
<p><strong>Context:</strong> ${esc(s.context)}</p>
|
|
161
|
+
<p><strong>Goal:</strong> ${esc(s.goal)}</p>
|
|
162
|
+
<p><strong>Constraints:</strong> ${esc(s.constraints)}</p>
|
|
163
|
+
</div>`).join("")}
|
|
164
|
+
</div>
|
|
165
|
+
</section>` : "";
|
|
166
|
+
|
|
167
|
+
// ----------------------------------------------------------------
|
|
168
|
+
// Category bar
|
|
169
|
+
// ----------------------------------------------------------------
|
|
170
|
+
|
|
171
|
+
const totalFindings = findings.length;
|
|
172
|
+
const categoryBar = ["bug", "ux", "feature-request"]
|
|
173
|
+
.filter((cat) => categoryCounts[cat])
|
|
174
|
+
.map((cat) => {
|
|
175
|
+
const count = categoryCounts[cat] ?? 0;
|
|
176
|
+
const pct = totalFindings > 0 ? Math.max(Math.round((count / totalFindings) * 100), 8) : 0;
|
|
177
|
+
return `<div class="bar-segment" style="width:${pct}%;background:${categoryColor(cat)}" title="${cat}: ${count}">
|
|
178
|
+
<span>${esc(cat)} ${count}</span>
|
|
179
|
+
</div>`;
|
|
180
|
+
}).join("");
|
|
181
|
+
|
|
182
|
+
// ----------------------------------------------------------------
|
|
183
|
+
// Full HTML
|
|
184
|
+
// ----------------------------------------------------------------
|
|
185
|
+
|
|
186
|
+
const html = `<!DOCTYPE html>
|
|
187
|
+
<html lang="ja">
|
|
188
|
+
<head>
|
|
189
|
+
<meta charset="UTF-8">
|
|
190
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
191
|
+
<title>shoal — ${esc(productSpec.appName)}</title>
|
|
192
|
+
<style>
|
|
193
|
+
*{box-sizing:border-box;margin:0;padding:0}
|
|
194
|
+
body{font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;background:#f8fafc;color:#1e293b;line-height:1.5}
|
|
195
|
+
header{background:#1e293b;color:#f8fafc;padding:1.5rem 2rem}
|
|
196
|
+
header h1{font-size:1.25rem;font-weight:700;margin-bottom:.25rem}
|
|
197
|
+
header .meta{font-size:.875rem;color:#94a3b8}
|
|
198
|
+
main{max-width:960px;margin:0 auto;padding:2rem}
|
|
199
|
+
section{margin-bottom:2.5rem}
|
|
200
|
+
h2{font-size:.75rem;font-weight:700;text-transform:uppercase;letter-spacing:.08em;color:#64748b;margin-bottom:1rem;padding-bottom:.5rem;border-bottom:1px solid #e2e8f0}
|
|
201
|
+
.summary-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(110px,1fr));gap:1rem;margin-bottom:1.25rem}
|
|
202
|
+
.stat-card{background:#fff;border:1px solid #e2e8f0;border-radius:8px;padding:1rem;text-align:center}
|
|
203
|
+
.stat-card .number{font-size:2rem;font-weight:700;color:#1e293b}
|
|
204
|
+
.stat-card .label{font-size:.7rem;color:#64748b;margin-top:.25rem;text-transform:uppercase;letter-spacing:.05em}
|
|
205
|
+
.category-bar{display:flex;height:28px;border-radius:6px;overflow:hidden;background:#e2e8f0;margin-bottom:0}
|
|
206
|
+
.bar-segment{display:flex;align-items:center;padding:0 .5rem;min-width:60px}
|
|
207
|
+
.bar-segment span{font-size:.7rem;color:#fff;font-weight:700;white-space:nowrap}
|
|
208
|
+
.badge{display:inline-block;padding:.15rem .5rem;border-radius:9999px;font-size:.65rem;font-weight:700;color:#fff;white-space:nowrap}
|
|
209
|
+
.finding{background:#fff;border:1px solid #e2e8f0;border-radius:8px;padding:1rem 1.25rem;margin-bottom:.75rem}
|
|
210
|
+
.finding.skipped{opacity:.55}
|
|
211
|
+
.finding-header{display:flex;align-items:center;gap:.4rem;margin-bottom:.5rem;flex-wrap:wrap}
|
|
212
|
+
.agent-name{font-size:.75rem;color:#94a3b8;margin-left:auto}
|
|
213
|
+
.assignment-tag{font-size:.7rem;padding:.1rem .45rem;border-radius:4px;white-space:nowrap}
|
|
214
|
+
.assignment-tag.scenario{color:#7c3aed;background:#ede9fe}
|
|
215
|
+
.assignment-tag.lens{color:#0369a1;background:#e0f2fe}
|
|
216
|
+
.finding-title{font-size:.95rem;font-weight:600;margin-bottom:.35rem}
|
|
217
|
+
.finding-body{font-size:.85rem;color:#475569}
|
|
218
|
+
.screenshot-toggle{margin-top:.75rem}
|
|
219
|
+
.screenshot-toggle summary{font-size:.8rem;color:#64748b;cursor:pointer;user-select:none}
|
|
220
|
+
.screenshot{max-width:100%;max-height:400px;object-fit:contain;border:1px solid #e2e8f0;border-radius:4px;margin-top:.5rem;display:block}
|
|
221
|
+
table{width:100%;border-collapse:collapse;background:#fff;border:1px solid #e2e8f0;border-radius:8px;overflow:hidden;font-size:.85rem}
|
|
222
|
+
th{background:#f1f5f9;padding:.6rem 1rem;text-align:left;font-weight:700;color:#64748b;font-size:.7rem;text-transform:uppercase;letter-spacing:.05em}
|
|
223
|
+
td{padding:.6rem 1rem;border-top:1px solid #e2e8f0;vertical-align:middle}
|
|
224
|
+
.scenarios{display:grid;grid-template-columns:repeat(auto-fit,minmax(260px,1fr));gap:1rem}
|
|
225
|
+
.scenario-card{background:#fff;border:1px solid #e2e8f0;border-radius:8px;padding:1rem}
|
|
226
|
+
.scenario-id{font-size:.65rem;font-weight:700;color:#8b5cf6;text-transform:uppercase;letter-spacing:.08em;margin-bottom:.25rem}
|
|
227
|
+
.scenario-card h3{font-size:.875rem;font-weight:600;margin-bottom:.5rem}
|
|
228
|
+
.scenario-card p{font-size:.8rem;color:#475569;margin-top:.2rem}
|
|
229
|
+
</style>
|
|
230
|
+
</head>
|
|
231
|
+
<body>
|
|
232
|
+
<header>
|
|
233
|
+
<h1>shoal — ${esc(productSpec.appName)}</h1>
|
|
234
|
+
<p class="meta">${esc(date)} · ${esc(duration)} · ${esc(runLog.runId)}</p>
|
|
235
|
+
</header>
|
|
236
|
+
<main>
|
|
237
|
+
<section>
|
|
238
|
+
<h2>Summary</h2>
|
|
239
|
+
<div class="summary-grid">
|
|
240
|
+
<div class="stat-card"><div class="number">${findings.length}</div><div class="label">findings</div></div>
|
|
241
|
+
<div class="stat-card"><div class="number">${triageResult.issued.length}</div><div class="label">→ Issues</div></div>
|
|
242
|
+
<div class="stat-card"><div class="number">${triageResult.skipped.length}</div><div class="label">skipped</div></div>
|
|
243
|
+
<div class="stat-card"><div class="number">${triageResult.unprocessed.length}</div><div class="label">pending</div></div>
|
|
244
|
+
<div class="stat-card"><div class="number">${runLog.agents.length}</div><div class="label">agents</div></div>
|
|
245
|
+
</div>
|
|
246
|
+
<div class="category-bar">${categoryBar || '<div style="width:100%;display:flex;align-items:center;padding:0 .75rem;font-size:.75rem;color:#94a3b8">no findings</div>'}</div>
|
|
247
|
+
</section>
|
|
248
|
+
|
|
249
|
+
<section>
|
|
250
|
+
<h2>Findings (${findings.length})</h2>
|
|
251
|
+
${sortedFindings.length > 0 ? findingCards : "<p style='color:#94a3b8;font-size:.875rem'>No findings collected.</p>"}
|
|
252
|
+
</section>
|
|
253
|
+
|
|
254
|
+
${outcomesSection}
|
|
255
|
+
|
|
256
|
+
${scenarioSection}
|
|
257
|
+
|
|
258
|
+
<section>
|
|
259
|
+
<h2>Agents (${runLog.agents.length})</h2>
|
|
260
|
+
<table>
|
|
261
|
+
<thead>
|
|
262
|
+
<tr><th>Name</th><th>Type</th><th>Assignment</th><th style="text-align:center">Iter.</th><th>Status</th></tr>
|
|
263
|
+
</thead>
|
|
264
|
+
<tbody>
|
|
265
|
+
${agentRows}
|
|
266
|
+
</tbody>
|
|
267
|
+
</table>
|
|
268
|
+
</section>
|
|
269
|
+
</main>
|
|
270
|
+
</body>
|
|
271
|
+
</html>`;
|
|
272
|
+
|
|
273
|
+
fs.mkdirSync(path.dirname(reportPath), { recursive: true });
|
|
274
|
+
fs.writeFileSync(reportPath, html, "utf-8");
|
|
275
|
+
return reportPath;
|
|
276
|
+
}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
2
|
+
import type { LLMClient } from "./llm-client";
|
|
3
|
+
import { createMessageWithRetry } from "./agent-loop";
|
|
4
|
+
import type { ProductSpec } from "./product-discovery";
|
|
5
|
+
|
|
6
|
+
export interface Scenario {
|
|
7
|
+
id: string;
|
|
8
|
+
title: string;
|
|
9
|
+
context: string; // Who the user is and their situation
|
|
10
|
+
goal: string; // What they want to accomplish
|
|
11
|
+
constraints: string; // Special conditions (first-time user, under pressure, etc.)
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface ScenarioOutcome {
|
|
15
|
+
scenarioId: string;
|
|
16
|
+
scenarioTitle: string;
|
|
17
|
+
agentId: string;
|
|
18
|
+
agentName: string;
|
|
19
|
+
achieved: boolean;
|
|
20
|
+
reason: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const OUTPUT_SCENARIOS_TOOL: Anthropic.Tool = {
|
|
24
|
+
name: "output_scenarios",
|
|
25
|
+
description: "Output the generated test scenarios / 生成したテストシナリオを出力する",
|
|
26
|
+
input_schema: {
|
|
27
|
+
type: "object",
|
|
28
|
+
properties: {
|
|
29
|
+
scenarios: {
|
|
30
|
+
type: "array",
|
|
31
|
+
description: "List of user test scenarios",
|
|
32
|
+
items: {
|
|
33
|
+
type: "object",
|
|
34
|
+
properties: {
|
|
35
|
+
title: {
|
|
36
|
+
type: "string",
|
|
37
|
+
description: "Short scenario title (e.g. 'New employee submitting first purchase request')",
|
|
38
|
+
},
|
|
39
|
+
context: {
|
|
40
|
+
type: "string",
|
|
41
|
+
description: "Who is this user and what is their situation? (1-2 sentences)",
|
|
42
|
+
},
|
|
43
|
+
goal: {
|
|
44
|
+
type: "string",
|
|
45
|
+
description: "What does the user want to accomplish? (specific and actionable)",
|
|
46
|
+
},
|
|
47
|
+
constraints: {
|
|
48
|
+
type: "string",
|
|
49
|
+
description: "Special conditions: e.g. first time using this feature, in a hurry, unfamiliar with the approval flow, etc.",
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
required: ["title", "context", "goal", "constraints"],
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
required: ["scenarios"],
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
export async function designScenarios(
|
|
61
|
+
spec: ProductSpec,
|
|
62
|
+
openIssues: { number: number; title: string; labels: string[] }[],
|
|
63
|
+
client: LLMClient,
|
|
64
|
+
model: string,
|
|
65
|
+
count: number = 5,
|
|
66
|
+
coverageSummary?: string,
|
|
67
|
+
): Promise<Scenario[]> {
|
|
68
|
+
console.log("\n[scenario-designer] generating scenarios...");
|
|
69
|
+
|
|
70
|
+
const issueHints = openIssues.length > 0
|
|
71
|
+
? `\n[Known Open Issues — risky areas to naturally route scenarios through]\n${openIssues.slice(0, 15).map((i) => `- ${i.title} [${i.labels.join(", ")}]`).join("\n")}`
|
|
72
|
+
: "";
|
|
73
|
+
|
|
74
|
+
const coverageHints = coverageSummary
|
|
75
|
+
? `\n[Coverage History — adjust scenarios to explore underrepresented areas]\n${coverageSummary}`
|
|
76
|
+
: "";
|
|
77
|
+
|
|
78
|
+
const response = await createMessageWithRetry(client, {
|
|
79
|
+
model,
|
|
80
|
+
max_tokens: 2048,
|
|
81
|
+
system: `You are a QA scenario designer. Generate realistic user test scenarios for a web app.
|
|
82
|
+
Each scenario represents a believable task a real user would attempt — not a bug hunt, but a natural user journey.
|
|
83
|
+
Scenarios should collectively cover different user types, app areas, and workflows.`,
|
|
84
|
+
tools: [OUTPUT_SCENARIOS_TOOL],
|
|
85
|
+
messages: [
|
|
86
|
+
{
|
|
87
|
+
role: "user",
|
|
88
|
+
content: `Generate exactly ${count} test scenarios for this app.
|
|
89
|
+
|
|
90
|
+
[App Overview]
|
|
91
|
+
${spec.appDescription}
|
|
92
|
+
|
|
93
|
+
[Target Users]
|
|
94
|
+
${spec.targetUsers}
|
|
95
|
+
|
|
96
|
+
[Implemented Features]
|
|
97
|
+
${spec.features}${spec.uiFeatures ? `\n\n[UI-Only Features]\n${spec.uiFeatures}` : ""}${issueHints}${coverageHints}
|
|
98
|
+
|
|
99
|
+
Guidelines:
|
|
100
|
+
- Each scenario should be a realistic user task (not "find the bug")
|
|
101
|
+
- Cover different user types: power user, new user, occasional user, manager, etc.
|
|
102
|
+
- Cover different app areas and user journeys
|
|
103
|
+
- Make goals specific and actionable (not vague like "use the app")
|
|
104
|
+
- If open issues hint at risky areas, design natural scenarios that pass through those areas
|
|
105
|
+
- If coverage history shows underrepresented areas or lenses, bias scenarios toward those gaps
|
|
106
|
+
- Constraints should reflect realistic user states (first time, in a hurry, confused, etc.)
|
|
107
|
+
|
|
108
|
+
Call output_scenarios with exactly ${count} scenarios.`,
|
|
109
|
+
},
|
|
110
|
+
],
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
const toolUse = response.content.find(
|
|
114
|
+
(b): b is Anthropic.ToolUseBlock => b.type === "tool_use" && b.name === "output_scenarios"
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
if (!toolUse) {
|
|
118
|
+
console.warn("[scenario-designer] LLM did not call output_scenarios — falling back to lens-only mode");
|
|
119
|
+
return [];
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const raw = toolUse.input as { scenarios: { title: string; context: string; goal: string; constraints: string }[] };
|
|
123
|
+
|
|
124
|
+
if (!Array.isArray(raw.scenarios) || raw.scenarios.length === 0) {
|
|
125
|
+
console.warn("[scenario-designer] empty scenarios array returned");
|
|
126
|
+
return [];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const scenarios: Scenario[] = raw.scenarios.map((s, i) => ({
|
|
130
|
+
id: `scenario_${i + 1}`,
|
|
131
|
+
title: String(s.title),
|
|
132
|
+
context: String(s.context),
|
|
133
|
+
goal: String(s.goal),
|
|
134
|
+
constraints: String(s.constraints),
|
|
135
|
+
}));
|
|
136
|
+
|
|
137
|
+
console.log(`[scenario-designer] generated ${scenarios.length} scenarios:`);
|
|
138
|
+
scenarios.forEach((s) => console.log(` - [${s.id}] ${s.title}`));
|
|
139
|
+
|
|
140
|
+
return scenarios;
|
|
141
|
+
}
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
2
|
+
import * as fs from "fs";
|
|
3
|
+
import * as path from "path";
|
|
4
|
+
import type { LLMClient } from "./llm-client";
|
|
5
|
+
import type { Finding } from "./types";
|
|
6
|
+
import { createMessageWithRetry } from "./agent-loop";
|
|
7
|
+
import { postGitHubIssue, fetchOpenIssues } from "./github";
|
|
8
|
+
|
|
9
|
+
const TRIAGE_TOOLS: Anthropic.Tool[] = [
|
|
10
|
+
{
|
|
11
|
+
name: "get_all_findings",
|
|
12
|
+
description: "Get all feedback collected by agents / 全エージェントが収集したフィードバック一覧を取得する",
|
|
13
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
name: "create_issue",
|
|
17
|
+
description: "Post feedback as a GitHub Issue; multiple related findings can be merged into one / フィードバックをGitHub Issueとして投稿する。類似フィードバックをまとめて1件にできる",
|
|
18
|
+
input_schema: {
|
|
19
|
+
type: "object",
|
|
20
|
+
properties: {
|
|
21
|
+
title: { type: "string", description: "Issue title (concise)" },
|
|
22
|
+
body: { type: "string", description: "Issue body with details from multiple perspectives" },
|
|
23
|
+
category: { type: "string", enum: ["ux", "feature-request", "bug", "goal-gap"] },
|
|
24
|
+
merged_finding_ids: {
|
|
25
|
+
type: "array",
|
|
26
|
+
items: { type: "string" },
|
|
27
|
+
description: "IDs of the findings merged into this Issue",
|
|
28
|
+
},
|
|
29
|
+
},
|
|
30
|
+
required: ["title", "body", "category", "merged_finding_ids"],
|
|
31
|
+
},
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
name: "skip_finding",
|
|
35
|
+
description: "Skip a finding that duplicates an existing open GitHub Issue / 既存のOpenなGitHub Issueと重複するためスキップする",
|
|
36
|
+
input_schema: {
|
|
37
|
+
type: "object",
|
|
38
|
+
properties: {
|
|
39
|
+
finding_id: { type: "string", description: "ID of the finding to skip" },
|
|
40
|
+
reason: { type: "string", description: "Reason for skipping" },
|
|
41
|
+
},
|
|
42
|
+
required: ["finding_id", "reason"],
|
|
43
|
+
},
|
|
44
|
+
},
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
export interface TriageResult {
|
|
48
|
+
issued: string[];
|
|
49
|
+
skipped: string[];
|
|
50
|
+
unprocessed: string[];
|
|
51
|
+
issuesCreated: number;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export async function runTriageAgent(
|
|
55
|
+
findings: Finding[],
|
|
56
|
+
client: LLMClient,
|
|
57
|
+
model: string,
|
|
58
|
+
githubOptions: { token: string; repo: string }
|
|
59
|
+
): Promise<TriageResult> {
|
|
60
|
+
if (findings.length === 0) {
|
|
61
|
+
console.log("\n[triage] no findings, skipping");
|
|
62
|
+
return { issued: [], skipped: [], unprocessed: [], issuesCreated: 0 };
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
console.log(`\n[triage] starting (findings: ${findings.length})`);
|
|
66
|
+
|
|
67
|
+
const openIssues = await fetchOpenIssues(githubOptions);
|
|
68
|
+
const pendingIds = new Set(findings.map((f) => f.id));
|
|
69
|
+
const issuedIds: string[] = [];
|
|
70
|
+
const skippedIds: string[] = [];
|
|
71
|
+
let issuesCreated = 0;
|
|
72
|
+
let skipped = 0;
|
|
73
|
+
|
|
74
|
+
const openIssueList = openIssues.length > 0
|
|
75
|
+
? `\n\n[Existing open Issues (for deduplication)]\n${openIssues.map((i) => `- #${i.number}: ${i.title}`).join("\n")}`
|
|
76
|
+
: "";
|
|
77
|
+
|
|
78
|
+
const systemPrompt = `You are a feedback triage AI.
|
|
79
|
+
Organize feedback collected by multiple agents and post it as GitHub Issues.
|
|
80
|
+
|
|
81
|
+
[Steps]
|
|
82
|
+
1. Call get_all_findings to review collected feedback
|
|
83
|
+
2. Merge similar/duplicate feedback into a single Issue
|
|
84
|
+
3. Skip feedback that duplicates an existing open Issue using skip_finding
|
|
85
|
+
4. Post the rest with create_issue (no duplicates, only valuable findings)
|
|
86
|
+
5. Finish after processing all items${openIssueList}
|
|
87
|
+
|
|
88
|
+
[Category Guide]
|
|
89
|
+
- bug: incorrect or broken behavior
|
|
90
|
+
- ux: usability, interaction, or visual design issue
|
|
91
|
+
- feature-request: missing capability users would expect
|
|
92
|
+
- goal-gap: the app fails to meet one of its stated goals — use only when a finding directly undermines a specific app goal
|
|
93
|
+
|
|
94
|
+
[Merging Guidelines]
|
|
95
|
+
- Multiple reports about the same screen/feature can be merged into one Issue
|
|
96
|
+
- Merge into one Issue even across categories if it's the same underlying problem
|
|
97
|
+
- Include multiple perspectives in the body when merging
|
|
98
|
+
- Only post clearly valuable findings (skip operation errors or misunderstandings)
|
|
99
|
+
|
|
100
|
+
[Important Constraints]
|
|
101
|
+
- merged_finding_ids must contain at least one ID
|
|
102
|
+
- If a finding cannot be linked to any feedback, use skip_finding instead of create_issue`;
|
|
103
|
+
|
|
104
|
+
const messages: Anthropic.MessageParam[] = [
|
|
105
|
+
{ role: "user", content: "Triage the feedback and create GitHub Issues." },
|
|
106
|
+
];
|
|
107
|
+
|
|
108
|
+
let iterations = 0;
|
|
109
|
+
while (iterations < 15) {
|
|
110
|
+
iterations++;
|
|
111
|
+
|
|
112
|
+
const response = await createMessageWithRetry(client, {
|
|
113
|
+
model,
|
|
114
|
+
max_tokens: 2048,
|
|
115
|
+
system: systemPrompt,
|
|
116
|
+
tools: TRIAGE_TOOLS,
|
|
117
|
+
messages,
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
messages.push({ role: "assistant", content: response.content });
|
|
121
|
+
|
|
122
|
+
const toolUses = response.content.filter(
|
|
123
|
+
(b): b is Anthropic.ToolUseBlock => b.type === "tool_use"
|
|
124
|
+
);
|
|
125
|
+
if (toolUses.length === 0 || response.stop_reason === "end_turn") break;
|
|
126
|
+
|
|
127
|
+
const toolResults: Anthropic.ToolResultBlockParam[] = [];
|
|
128
|
+
for (const toolUse of toolUses) {
|
|
129
|
+
let result: unknown;
|
|
130
|
+
|
|
131
|
+
if (toolUse.name === "get_all_findings") {
|
|
132
|
+
result = findings.map((f) => ({
|
|
133
|
+
id: f.id,
|
|
134
|
+
agentName: f.agentName,
|
|
135
|
+
role: f.role,
|
|
136
|
+
title: f.title,
|
|
137
|
+
body: f.body,
|
|
138
|
+
category: f.category,
|
|
139
|
+
timestamp: f.timestamp,
|
|
140
|
+
pending: pendingIds.has(f.id),
|
|
141
|
+
}));
|
|
142
|
+
console.log(` [triage] fetched findings (${findings.length})`);
|
|
143
|
+
|
|
144
|
+
} else if (toolUse.name === "create_issue") {
|
|
145
|
+
const { title, body, category, merged_finding_ids } = toolUse.input as {
|
|
146
|
+
title: string;
|
|
147
|
+
body: string;
|
|
148
|
+
category: string;
|
|
149
|
+
merged_finding_ids: string[] | undefined;
|
|
150
|
+
};
|
|
151
|
+
const mergedIds = merged_finding_ids ?? [];
|
|
152
|
+
if (mergedIds.length === 0) {
|
|
153
|
+
result = { error: "merged_finding_ids must contain at least one ID" };
|
|
154
|
+
toolResults.push({ type: "tool_result", tool_use_id: toolUse.id, content: JSON.stringify(result) });
|
|
155
|
+
continue;
|
|
156
|
+
}
|
|
157
|
+
const mergedFindings = findings.filter((f) => mergedIds.includes(f.id));
|
|
158
|
+
const mergedAgents = mergedFindings.map((f) => `${f.agentName} (${f.role})`);
|
|
159
|
+
const screenshots = mergedFindings
|
|
160
|
+
.filter((f) => f.screenshotPath)
|
|
161
|
+
.map((f) => `- ${f.agentName}: ${f.screenshotPath}`);
|
|
162
|
+
const screenshotSection = screenshots.length > 0
|
|
163
|
+
? `\n\n**Screenshots:**\n${screenshots.join("\n")}`
|
|
164
|
+
: "";
|
|
165
|
+
const fullBody = `**Category:** ${category}\n\n${body}${screenshotSection}\n\n---\n**Reported by:** ${mergedAgents.join(", ")}\n*This Issue was auto-generated by an AI triage agent*`;
|
|
166
|
+
const url = await postGitHubIssue(`[${category}] ${title}`, fullBody, [category, "feedback-agent"], githubOptions);
|
|
167
|
+
mergedIds.forEach((id) => { pendingIds.delete(id); issuedIds.push(id); });
|
|
168
|
+
issuesCreated++;
|
|
169
|
+
result = { created: true, url, mergedCount: mergedIds.length };
|
|
170
|
+
console.log(` [triage] issue created: "${title}" (merged ${mergedIds.length})`);
|
|
171
|
+
|
|
172
|
+
} else if (toolUse.name === "skip_finding") {
|
|
173
|
+
const { finding_id, reason } = toolUse.input as { finding_id: string; reason: string };
|
|
174
|
+
pendingIds.delete(finding_id);
|
|
175
|
+
skippedIds.push(finding_id);
|
|
176
|
+
skipped++;
|
|
177
|
+
result = { skipped: true };
|
|
178
|
+
console.log(` [triage] skipped: ${finding_id} — ${reason}`);
|
|
179
|
+
|
|
180
|
+
} else {
|
|
181
|
+
result = { error: "unknown tool" };
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
toolResults.push({ type: "tool_result", tool_use_id: toolUse.id, content: JSON.stringify(result) });
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
messages.push({ role: "user", content: toolResults });
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if (findings.length > 0) {
|
|
191
|
+
const runId = findings[0].runId;
|
|
192
|
+
const findingsDir = path.join(process.cwd(), "findings", runId);
|
|
193
|
+
fs.writeFileSync(
|
|
194
|
+
path.join(findingsDir, "triage_result.json"),
|
|
195
|
+
JSON.stringify({
|
|
196
|
+
runId,
|
|
197
|
+
completedAt: new Date().toISOString(),
|
|
198
|
+
issued: issuedIds,
|
|
199
|
+
skipped: skippedIds,
|
|
200
|
+
unprocessed: Array.from(pendingIds),
|
|
201
|
+
}, null, 2),
|
|
202
|
+
"utf-8"
|
|
203
|
+
);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
console.log(`[triage] done (issues created: ${issuesCreated} / skipped: ${skipped})`);
|
|
207
|
+
return { issued: issuedIds, skipped: skippedIds, unprocessed: Array.from(pendingIds), issuesCreated };
|
|
208
|
+
}
|