@m8i-51/shoal 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +121 -0
- package/bin/shoal.js +56 -0
- package/framework/__tests__/coverage.test.ts +232 -0
- package/framework/__tests__/report.test.ts +154 -0
- package/framework/account-manager.ts +414 -0
- package/framework/agent-loop.ts +103 -0
- package/framework/agent-store.ts +47 -0
- package/framework/cost.ts +91 -0
- package/framework/coverage.ts +157 -0
- package/framework/findings.ts +53 -0
- package/framework/github.ts +64 -0
- package/framework/llm-client.ts +507 -0
- package/framework/observation.ts +182 -0
- package/framework/org-designer.ts +85 -0
- package/framework/product-discovery.ts +327 -0
- package/framework/report.ts +276 -0
- package/framework/scenario-designer.ts +141 -0
- package/framework/triage.ts +208 -0
- package/framework/types.ts +80 -0
- package/package.json +55 -0
- package/run.ts +1213 -0
- package/server/index.ts +227 -0
- package/server/runner.ts +125 -0
- package/server/runs.ts +103 -0
- package/targets/example.ts +55 -0
- package/targets/index.ts +17 -0
- package/targets/noop.ts +6 -0
- package/targets/types.ts +19 -0
- package/triage-only.ts +57 -0
- package/web/dist/assets/index-CD6EJ_1O.js +68 -0
- package/web/dist/assets/index-DPLuVm2n.css +1 -0
- package/web/dist/index.html +13 -0
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import * as fs from "fs";
|
|
2
|
+
import * as path from "path";
|
|
3
|
+
import type { Finding } from "./types";
|
|
4
|
+
import type { Scenario } from "./scenario-designer";
|
|
5
|
+
|
|
6
|
+
export interface RunCoverage {
|
|
7
|
+
runId: string;
|
|
8
|
+
timestamp: string;
|
|
9
|
+
findingsCount: number;
|
|
10
|
+
byCategory: Record<string, number>;
|
|
11
|
+
byLens: Record<string, number>;
|
|
12
|
+
byScenario: Record<string, number>;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface Coverage {
|
|
16
|
+
entries: RunCoverage[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface WeightedSummary {
|
|
20
|
+
totalWeighted: number;
|
|
21
|
+
byCategory: Record<string, number>;
|
|
22
|
+
byLens: Record<string, number>;
|
|
23
|
+
byScenario: Record<string, number>;
|
|
24
|
+
formatted: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const COVERAGE_PATH = path.join(process.cwd(), "coverage", "coverage.json");
|
|
28
|
+
const MAX_ENTRIES = 30;
|
|
29
|
+
const HALF_LIFE_DAYS = 7;
|
|
30
|
+
|
|
31
|
+
export function loadCoverage(): Coverage {
|
|
32
|
+
try {
|
|
33
|
+
if (fs.existsSync(COVERAGE_PATH)) {
|
|
34
|
+
return JSON.parse(fs.readFileSync(COVERAGE_PATH, "utf-8")) as Coverage;
|
|
35
|
+
}
|
|
36
|
+
} catch { /* ignore */ }
|
|
37
|
+
return { entries: [] };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function saveCoverage(coverage: Coverage): void {
|
|
41
|
+
fs.mkdirSync(path.dirname(COVERAGE_PATH), { recursive: true });
|
|
42
|
+
fs.writeFileSync(COVERAGE_PATH, JSON.stringify(coverage, null, 2), "utf-8");
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function updateCoverage(
|
|
46
|
+
runId: string,
|
|
47
|
+
findings: Finding[],
|
|
48
|
+
agentAssignments: Map<string, { scenario?: Scenario; lens?: string }>,
|
|
49
|
+
): void {
|
|
50
|
+
const coverage = loadCoverage();
|
|
51
|
+
|
|
52
|
+
const byCategory: Record<string, number> = {};
|
|
53
|
+
const byLens: Record<string, number> = {};
|
|
54
|
+
const byScenario: Record<string, number> = {};
|
|
55
|
+
|
|
56
|
+
for (const f of findings) {
|
|
57
|
+
byCategory[f.category] = (byCategory[f.category] ?? 0) + 1;
|
|
58
|
+
|
|
59
|
+
const assignment = agentAssignments.get(f.agentId);
|
|
60
|
+
if (assignment?.lens) {
|
|
61
|
+
const lensKey = assignment.lens.split(":")[0].trim();
|
|
62
|
+
byLens[lensKey] = (byLens[lensKey] ?? 0) + 1;
|
|
63
|
+
} else if (assignment?.scenario) {
|
|
64
|
+
const key = assignment.scenario.title;
|
|
65
|
+
byScenario[key] = (byScenario[key] ?? 0) + 1;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
coverage.entries.push({
|
|
70
|
+
runId,
|
|
71
|
+
timestamp: new Date().toISOString(),
|
|
72
|
+
findingsCount: findings.length,
|
|
73
|
+
byCategory,
|
|
74
|
+
byLens,
|
|
75
|
+
byScenario,
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
if (coverage.entries.length > MAX_ENTRIES) {
|
|
79
|
+
coverage.entries = coverage.entries.slice(-MAX_ENTRIES);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
saveCoverage(coverage);
|
|
83
|
+
console.log(`[coverage] updated (${coverage.entries.length} run(s) tracked)`);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export function computeWeightedSummary(): WeightedSummary {
|
|
87
|
+
const coverage = loadCoverage();
|
|
88
|
+
|
|
89
|
+
if (coverage.entries.length === 0) {
|
|
90
|
+
return {
|
|
91
|
+
totalWeighted: 0,
|
|
92
|
+
byCategory: {},
|
|
93
|
+
byLens: {},
|
|
94
|
+
byScenario: {},
|
|
95
|
+
formatted: "(no coverage data yet — this is the first run)",
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const now = Date.now();
|
|
100
|
+
const halfLifeMs = HALF_LIFE_DAYS * 24 * 60 * 60 * 1000;
|
|
101
|
+
|
|
102
|
+
const byCategory: Record<string, number> = {};
|
|
103
|
+
const byLens: Record<string, number> = {};
|
|
104
|
+
const byScenario: Record<string, number> = {};
|
|
105
|
+
let totalWeighted = 0;
|
|
106
|
+
|
|
107
|
+
for (const entry of coverage.entries) {
|
|
108
|
+
const age = now - new Date(entry.timestamp).getTime();
|
|
109
|
+
const weight = Math.pow(0.5, age / halfLifeMs);
|
|
110
|
+
|
|
111
|
+
for (const [cat, count] of Object.entries(entry.byCategory)) {
|
|
112
|
+
byCategory[cat] = (byCategory[cat] ?? 0) + count * weight;
|
|
113
|
+
}
|
|
114
|
+
for (const [lens, count] of Object.entries(entry.byLens)) {
|
|
115
|
+
byLens[lens] = (byLens[lens] ?? 0) + count * weight;
|
|
116
|
+
}
|
|
117
|
+
for (const [title, count] of Object.entries(entry.byScenario ?? {})) {
|
|
118
|
+
byScenario[title] = (byScenario[title] ?? 0) + count * weight;
|
|
119
|
+
}
|
|
120
|
+
totalWeighted += entry.findingsCount * weight;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// 小数点1桁に丸める
|
|
124
|
+
const round1 = (n: number) => Math.round(n * 10) / 10;
|
|
125
|
+
for (const k of Object.keys(byCategory)) byCategory[k] = round1(byCategory[k]);
|
|
126
|
+
for (const k of Object.keys(byLens)) byLens[k] = round1(byLens[k]);
|
|
127
|
+
for (const k of Object.keys(byScenario)) byScenario[k] = round1(byScenario[k]);
|
|
128
|
+
totalWeighted = round1(totalWeighted);
|
|
129
|
+
|
|
130
|
+
const sortedLens = Object.entries(byLens).sort((a, b) => b[1] - a[1]);
|
|
131
|
+
const sortedCategory = Object.entries(byCategory).sort((a, b) => b[1] - a[1]);
|
|
132
|
+
const sortedScenario = Object.entries(byScenario).sort((a, b) => b[1] - a[1]);
|
|
133
|
+
|
|
134
|
+
const avgLens = totalWeighted > 0 && sortedLens.length > 0
|
|
135
|
+
? totalWeighted / sortedLens.length
|
|
136
|
+
: 0;
|
|
137
|
+
const underrepresented = sortedLens
|
|
138
|
+
.filter(([, count]) => count < avgLens * 0.5)
|
|
139
|
+
.map(([lens]) => lens);
|
|
140
|
+
|
|
141
|
+
const scenarioLine = sortedScenario.length > 0
|
|
142
|
+
? `By scenario: ${sortedScenario.map(([t, c]) => `"${t}" (${c})`).join(", ")}`
|
|
143
|
+
: null;
|
|
144
|
+
|
|
145
|
+
const formatted = [
|
|
146
|
+
`Coverage summary (half-life: ${HALF_LIFE_DAYS} days, ${coverage.entries.length} run(s) tracked):`,
|
|
147
|
+
`Total weighted findings: ${totalWeighted}`,
|
|
148
|
+
`By lens: ${sortedLens.map(([l, c]) => `${l} (${c})`).join(" > ") || "(none)"}`,
|
|
149
|
+
scenarioLine,
|
|
150
|
+
`By category: ${sortedCategory.map(([c, n]) => `${c} (${n})`).join(" > ") || "(none)"}`,
|
|
151
|
+
underrepresented.length > 0
|
|
152
|
+
? `Underrepresented lenses: ${underrepresented.join(", ")} — consider recruiting agents with these perspectives`
|
|
153
|
+
: "All lenses have comparable coverage",
|
|
154
|
+
].filter(Boolean).join("\n");
|
|
155
|
+
|
|
156
|
+
return { totalWeighted, byCategory, byLens, byScenario, formatted };
|
|
157
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import * as fs from "fs";
|
|
2
|
+
import * as path from "path";
|
|
3
|
+
import type { Finding, RunLog } from "./types";
|
|
4
|
+
|
|
5
|
+
export const collectedFindings: Finding[] = [];
|
|
6
|
+
export let runLog: RunLog;
|
|
7
|
+
|
|
8
|
+
export function saveFinding(finding: Finding): void {
|
|
9
|
+
collectedFindings.push(finding);
|
|
10
|
+
const findingsDir = path.join(process.cwd(), "findings", finding.runId);
|
|
11
|
+
if (!fs.existsSync(findingsDir)) {
|
|
12
|
+
fs.mkdirSync(findingsDir, { recursive: true });
|
|
13
|
+
}
|
|
14
|
+
fs.writeFileSync(
|
|
15
|
+
path.join(findingsDir, `${finding.id}.json`),
|
|
16
|
+
JSON.stringify(finding, null, 2),
|
|
17
|
+
"utf-8"
|
|
18
|
+
);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function initRunLog(agentCount: number, repo: string): void {
|
|
22
|
+
runLog = {
|
|
23
|
+
runId: process.env.SHOAL_RUN_ID ?? `run_${Date.now()}`,
|
|
24
|
+
startedAt: new Date().toISOString(),
|
|
25
|
+
completedAt: null,
|
|
26
|
+
repo,
|
|
27
|
+
agents: [],
|
|
28
|
+
summary: {
|
|
29
|
+
totalAgents: agentCount,
|
|
30
|
+
completed: 0,
|
|
31
|
+
errors: 0,
|
|
32
|
+
iterationLimitReached: 0,
|
|
33
|
+
totalActions: 0,
|
|
34
|
+
totalIssuesPosted: 0,
|
|
35
|
+
regressionChecked: 0,
|
|
36
|
+
regressionFailed: 0,
|
|
37
|
+
rateLimitRetries: 0,
|
|
38
|
+
cost: { inputTokens: 0, outputTokens: 0, estimatedUSD: null },
|
|
39
|
+
},
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function saveRunLog(): void {
|
|
44
|
+
if (!runLog) return; // initRunLog が呼ばれる前にエラーが起きた場合はスキップ
|
|
45
|
+
const logsDir = path.join(process.cwd(), "logs");
|
|
46
|
+
if (!fs.existsSync(logsDir)) {
|
|
47
|
+
fs.mkdirSync(logsDir, { recursive: true });
|
|
48
|
+
}
|
|
49
|
+
const ts = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
|
|
50
|
+
const filePath = path.join(logsDir, `${ts}_${runLog.runId}.json`);
|
|
51
|
+
fs.writeFileSync(filePath, JSON.stringify(runLog, null, 2), "utf-8");
|
|
52
|
+
console.log(`\n[log] saved: ${filePath}`);
|
|
53
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import type { ClosedIssue } from "./types";
|
|
2
|
+
|
|
3
|
+
interface GitHubOptions {
|
|
4
|
+
token: string;
|
|
5
|
+
repo: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export async function postGitHubIssue(
|
|
9
|
+
title: string,
|
|
10
|
+
body: string,
|
|
11
|
+
labels: string[],
|
|
12
|
+
{ token, repo }: GitHubOptions
|
|
13
|
+
): Promise<string | null> {
|
|
14
|
+
if (!token || !repo) {
|
|
15
|
+
console.log("[github] skip (GITHUB_TOKEN or GITHUB_REPO not set)");
|
|
16
|
+
return null;
|
|
17
|
+
}
|
|
18
|
+
const [owner, repoName] = repo.split("/");
|
|
19
|
+
const res = await fetch(`https://api.github.com/repos/${owner}/${repoName}/issues`, {
|
|
20
|
+
method: "POST",
|
|
21
|
+
headers: { Authorization: `token ${token}`, "Content-Type": "application/json" },
|
|
22
|
+
body: JSON.stringify({ title, body, labels }),
|
|
23
|
+
});
|
|
24
|
+
const data = await res.json();
|
|
25
|
+
if (!res.ok) {
|
|
26
|
+
console.error(`[github] failed to create issue (${res.status}): ${JSON.stringify(data)}`);
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
console.log(`[github] issue created: ${data.html_url}`);
|
|
30
|
+
return data.html_url ?? null;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export async function fetchClosedIssues({ token, repo }: GitHubOptions): Promise<ClosedIssue[]> {
|
|
34
|
+
if (!token || !repo) return [];
|
|
35
|
+
const [owner, repoName] = repo.split("/");
|
|
36
|
+
const res = await fetch(
|
|
37
|
+
`https://api.github.com/repos/${owner}/${repoName}/issues?state=closed&labels=feedback-agent&per_page=20`,
|
|
38
|
+
{ headers: { Authorization: `token ${token}` } }
|
|
39
|
+
);
|
|
40
|
+
const data = await res.json();
|
|
41
|
+
if (!Array.isArray(data)) return [];
|
|
42
|
+
return data.map((issue: { number: number; title: string; body: string; labels: { name: string }[] }) => ({
|
|
43
|
+
number: issue.number,
|
|
44
|
+
title: issue.title,
|
|
45
|
+
body: issue.body ?? "",
|
|
46
|
+
labels: issue.labels.map((l) => l.name),
|
|
47
|
+
}));
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export async function fetchOpenIssues({ token, repo }: GitHubOptions): Promise<{ number: number; title: string; labels: string[] }[]> {
|
|
51
|
+
if (!token || !repo) return [];
|
|
52
|
+
const [owner, repoName] = repo.split("/");
|
|
53
|
+
const res = await fetch(
|
|
54
|
+
`https://api.github.com/repos/${owner}/${repoName}/issues?state=open&labels=feedback-agent&per_page=50`,
|
|
55
|
+
{ headers: { Authorization: `token ${token}` } }
|
|
56
|
+
);
|
|
57
|
+
const data = await res.json();
|
|
58
|
+
if (!Array.isArray(data)) return [];
|
|
59
|
+
return data.map((i: { number: number; title: string; labels: { name: string }[] }) => ({
|
|
60
|
+
number: i.number,
|
|
61
|
+
title: i.title,
|
|
62
|
+
labels: i.labels.map((l) => l.name),
|
|
63
|
+
}));
|
|
64
|
+
}
|