@m8i-51/shoal 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/framework/__tests__/cost.test.ts +118 -0
- package/framework/__tests__/coverage.test.ts +80 -0
- package/framework/__tests__/report.test.ts +88 -0
- package/framework/coverage.ts +41 -6
- package/framework/org-designer.ts +8 -2
- package/package.json +2 -1
- package/server/__tests__/scheduler.test.ts +134 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
+
import { estimateCost, formatCostUSD } from "../cost";
|
|
3
|
+
|
|
4
|
+
// OpenRouter のフェッチをモック(ネットワーク不要)
|
|
5
|
+
vi.stubGlobal("fetch", vi.fn());
|
|
6
|
+
|
|
7
|
+
beforeEach(() => {
|
|
8
|
+
vi.mocked(fetch).mockResolvedValue({
|
|
9
|
+
ok: false,
|
|
10
|
+
status: 503,
|
|
11
|
+
json: async () => ({}),
|
|
12
|
+
text: async () => "",
|
|
13
|
+
} as Response);
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
describe("formatCostUSD", () => {
|
|
17
|
+
it("null / undefined は —", () => {
|
|
18
|
+
expect(formatCostUSD(null)).toBe("—");
|
|
19
|
+
expect(formatCostUSD(undefined)).toBe("—");
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it("0.00005 未満は < $0.0001", () => {
|
|
23
|
+
expect(formatCostUSD(0.000005)).toBe("< $0.0001");
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it("0.0001 以上 0.01 未満は 4 桁小数", () => {
|
|
27
|
+
expect(formatCostUSD(0.0023)).toBe("$0.0023");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("0.01 以上 1 未満は 3 桁小数", () => {
|
|
31
|
+
expect(formatCostUSD(0.123)).toBe("$0.123");
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("1 以上は 2 桁小数", () => {
|
|
35
|
+
expect(formatCostUSD(2.5)).toBe("$2.50");
|
|
36
|
+
});
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
describe("estimateCost — free providers", () => {
|
|
40
|
+
it.each(["ollama", "lm-studio", "codex", "local"])("%s は null を返す", async (provider) => {
|
|
41
|
+
expect(await estimateCost("any-model", provider, 1000, 500)).toBeNull();
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
describe("estimateCost — Anthropic", () => {
|
|
46
|
+
it("claude-haiku-4-5-20251001 の料金を計算する", async () => {
|
|
47
|
+
const cost = await estimateCost("claude-haiku-4-5-20251001", "anthropic", 1_000_000, 500_000);
|
|
48
|
+
// input: 0.8/1M × 1M = 0.8, output: 4/1M × 500k = 2.0 → 2.8
|
|
49
|
+
expect(cost).toBeCloseTo(2.8, 5);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("prefix match — claude-haiku-4-5-xxx はキーに一致する", async () => {
|
|
53
|
+
const cost = await estimateCost("claude-haiku-4-5-some-suffix", "anthropic", 1_000_000, 0);
|
|
54
|
+
expect(cost).toBeCloseTo(0.8, 5);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("不明モデルは null", async () => {
|
|
58
|
+
expect(await estimateCost("claude-unknown-9999", "anthropic", 1000, 500)).toBeNull();
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it("claude-sonnet-4-6 の料金を計算する", async () => {
|
|
62
|
+
const cost = await estimateCost("claude-sonnet-4-6", "anthropic", 1_000_000, 1_000_000);
|
|
63
|
+
// input: 3/1M + output: 15/1M = 18
|
|
64
|
+
expect(cost).toBeCloseTo(18, 5);
|
|
65
|
+
});
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
describe("estimateCost — Bedrock", () => {
|
|
69
|
+
it("anthropic.claude-3-5-haiku-20241022-v1:0 の料金を計算する", async () => {
|
|
70
|
+
const cost = await estimateCost("anthropic.claude-3-5-haiku-20241022-v1:0", "bedrock", 1_000_000, 1_000_000);
|
|
71
|
+
// input: 0.8/1M + output: 4/1M = 4.8
|
|
72
|
+
expect(cost).toBeCloseTo(4.8, 5);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("クロスリージョンプレフィックス us. を除去してマッチする", async () => {
|
|
76
|
+
const direct = await estimateCost("anthropic.claude-3-5-sonnet-20241022-v2:0", "bedrock", 1_000_000, 0);
|
|
77
|
+
const crossRegion = await estimateCost("us.anthropic.claude-3-5-sonnet-20241022-v2:0", "bedrock", 1_000_000, 0);
|
|
78
|
+
expect(crossRegion).toBeCloseTo(direct!, 8);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it("不明モデルは null", async () => {
|
|
82
|
+
expect(await estimateCost("anthropic.claude-unknown-v99:0", "bedrock", 1000, 500)).toBeNull();
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
describe("estimateCost — OpenAI", () => {
|
|
87
|
+
it("gpt-4o の料金を計算する", async () => {
|
|
88
|
+
const cost = await estimateCost("gpt-4o", "openai", 1_000_000, 1_000_000);
|
|
89
|
+
// input: 5/1M + output: 15/1M = 20
|
|
90
|
+
expect(cost).toBeCloseTo(20, 5);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it("gpt-4o-mini の料金を計算する", async () => {
|
|
94
|
+
const cost = await estimateCost("gpt-4o-mini", "openai", 1_000_000, 1_000_000);
|
|
95
|
+
expect(cost).toBeCloseTo(0.75, 5);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it("不明モデルは null", async () => {
|
|
99
|
+
expect(await estimateCost("gpt-unknown", "openai", 1000, 500)).toBeNull();
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
describe("estimateCost — OpenRouter", () => {
|
|
104
|
+
it("fetch 失敗時は null を返す", async () => {
|
|
105
|
+
expect(await estimateCost("some/model", "openrouter", 1000, 500)).toBeNull();
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it("fetch 成功時はレスポンスの料金を使う", async () => {
|
|
109
|
+
vi.mocked(fetch).mockResolvedValueOnce({
|
|
110
|
+
ok: true,
|
|
111
|
+
json: async () => ({
|
|
112
|
+
data: [{ id: "some/model", pricing: { prompt: "0.000003", completion: "0.000015" } }],
|
|
113
|
+
}),
|
|
114
|
+
} as Response);
|
|
115
|
+
const cost = await estimateCost("some/model", "openrouter", 1_000_000, 1_000_000);
|
|
116
|
+
expect(cost).toBeCloseTo(18, 5);
|
|
117
|
+
});
|
|
118
|
+
});
|
|
@@ -167,6 +167,86 @@ describe("computeWeightedSummary", () => {
|
|
|
167
167
|
expect(result.formatted).toContain("By scenario");
|
|
168
168
|
});
|
|
169
169
|
|
|
170
|
+
it("14日以内に同じレンズが複数 run に登場するとボーナスが乗る", () => {
|
|
171
|
+
const now = Date.now();
|
|
172
|
+
// 同じ Accessibility レンズが2回登場 → bonus = 1 + (2-1)*0.5 = 1.5
|
|
173
|
+
setupMockCoverage({
|
|
174
|
+
entries: [
|
|
175
|
+
makeEntry({
|
|
176
|
+
runId: "run_1",
|
|
177
|
+
timestamp: new Date(now - 1000).toISOString(),
|
|
178
|
+
findingsCount: 2,
|
|
179
|
+
byLens: { Accessibility: 2 },
|
|
180
|
+
}),
|
|
181
|
+
makeEntry({
|
|
182
|
+
runId: "run_2",
|
|
183
|
+
timestamp: new Date(now).toISOString(),
|
|
184
|
+
findingsCount: 2,
|
|
185
|
+
byLens: { Accessibility: 2 },
|
|
186
|
+
}),
|
|
187
|
+
],
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
const resultWithRepeat = computeWeightedSummary();
|
|
191
|
+
|
|
192
|
+
// 1回しか登場しない場合と比較
|
|
193
|
+
setupMockCoverage({
|
|
194
|
+
entries: [
|
|
195
|
+
makeEntry({
|
|
196
|
+
runId: "run_1",
|
|
197
|
+
timestamp: new Date(now).toISOString(),
|
|
198
|
+
findingsCount: 2,
|
|
199
|
+
byLens: { Accessibility: 2 },
|
|
200
|
+
}),
|
|
201
|
+
],
|
|
202
|
+
});
|
|
203
|
+
const resultSingle = computeWeightedSummary();
|
|
204
|
+
|
|
205
|
+
// 繰り返しありのほうが lens の重みが高いはず
|
|
206
|
+
expect(resultWithRepeat.byLens["Accessibility"]).toBeGreaterThan(resultSingle.byLens["Accessibility"]);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it("14日より古いエントリーは繰り返しカウントに含まれない", () => {
|
|
210
|
+
const now = Date.now();
|
|
211
|
+
const oldMs = 15 * 24 * 60 * 60 * 1000; // 15日前
|
|
212
|
+
setupMockCoverage({
|
|
213
|
+
entries: [
|
|
214
|
+
makeEntry({
|
|
215
|
+
runId: "run_old",
|
|
216
|
+
timestamp: new Date(now - oldMs).toISOString(),
|
|
217
|
+
findingsCount: 2,
|
|
218
|
+
byLens: { Security: 2 },
|
|
219
|
+
}),
|
|
220
|
+
makeEntry({
|
|
221
|
+
runId: "run_new",
|
|
222
|
+
timestamp: new Date(now).toISOString(),
|
|
223
|
+
findingsCount: 2,
|
|
224
|
+
byLens: { Security: 2 },
|
|
225
|
+
}),
|
|
226
|
+
],
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
const result = computeWeightedSummary();
|
|
230
|
+
// 古いエントリーはウィンドウ外なのでボーナスなし(繰り返し回数=1 → bonus=1.0)
|
|
231
|
+
// ボーナスなしの場合: weight≈1.0*2 + 15日前のdecay*2 ≈ 2.06
|
|
232
|
+
expect(result.formatted).not.toContain("Repeated lenses");
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
it("繰り返しレンズが formatted に含まれる", () => {
|
|
236
|
+
const now = Date.now();
|
|
237
|
+
setupMockCoverage({
|
|
238
|
+
entries: [
|
|
239
|
+
makeEntry({ runId: "r1", timestamp: new Date(now - 1000).toISOString(), findingsCount: 1, byLens: { "UI design": 1 } }),
|
|
240
|
+
makeEntry({ runId: "r2", timestamp: new Date(now).toISOString(), findingsCount: 1, byLens: { "UI design": 1 } }),
|
|
241
|
+
],
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
const result = computeWeightedSummary();
|
|
245
|
+
expect(result.formatted).toContain("Repeated lenses");
|
|
246
|
+
expect(result.formatted).toContain("UI design");
|
|
247
|
+
expect(result.formatted).toContain("×2");
|
|
248
|
+
});
|
|
249
|
+
|
|
170
250
|
it("MAX_ENTRIES を超えると最新30件に切り捨てる", () => {
|
|
171
251
|
const entries = Array.from({ length: 35 }, (_, i) =>
|
|
172
252
|
makeEntry({
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
2
|
import * as fs from "fs";
|
|
3
3
|
import * as path from "path";
|
|
4
|
+
import type { AgentLog, RegressionCheck } from "../types";
|
|
5
|
+
import type { ScenarioOutcome } from "../scenario-designer";
|
|
4
6
|
|
|
5
7
|
vi.mock("fs");
|
|
6
8
|
vi.mock("path", async (importOriginal) => {
|
|
@@ -41,6 +43,22 @@ function makeRunLog(overrides: Partial<RunLog> = {}): RunLog {
|
|
|
41
43
|
};
|
|
42
44
|
}
|
|
43
45
|
|
|
46
|
+
function makeAgentLog(overrides: Partial<AgentLog> = {}): AgentLog {
|
|
47
|
+
return {
|
|
48
|
+
agentId: "a1",
|
|
49
|
+
agentName: "Alice",
|
|
50
|
+
agentType: "browser",
|
|
51
|
+
role: "tester",
|
|
52
|
+
status: "completed",
|
|
53
|
+
iterations: 3,
|
|
54
|
+
issuesPosted: [],
|
|
55
|
+
regressionChecks: [],
|
|
56
|
+
startedAt: "2026-04-27T00:01:00.000Z",
|
|
57
|
+
completedAt: "2026-04-27T00:03:00.000Z",
|
|
58
|
+
...overrides,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
44
62
|
function makeProductSpec(overrides: Partial<ProductSpec> = {}): ProductSpec {
|
|
45
63
|
return {
|
|
46
64
|
appName: "Test App",
|
|
@@ -138,6 +156,76 @@ describe("generateReport", () => {
|
|
|
138
156
|
expect(html).toContain("lens");
|
|
139
157
|
});
|
|
140
158
|
|
|
159
|
+
it("エージェントテーブルにエージェント名と status が含まれる", () => {
|
|
160
|
+
const agent = makeAgentLog({ agentName: "Bob", status: "completed" });
|
|
161
|
+
generateReport(makeRunLog({ agents: [agent] }), [], emptyTriage, makeProductSpec(), [], new Map());
|
|
162
|
+
const html = getSavedHtml();
|
|
163
|
+
expect(html).toContain("Bob");
|
|
164
|
+
expect(html).toContain("completed");
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it("regression エージェントに regression バッジが付く", () => {
|
|
168
|
+
const agent = makeAgentLog({ agentType: "regression" });
|
|
169
|
+
generateReport(makeRunLog({ agents: [agent] }), [], emptyTriage, makeProductSpec(), [], new Map());
|
|
170
|
+
expect(getSavedHtml()).toContain("regression");
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
it("regression checks がある場合 Progress セクションが表示される", () => {
|
|
174
|
+
const checks: RegressionCheck[] = [
|
|
175
|
+
{ issueNumber: 42, issueTitle: "Login button broken", status: "fixed" },
|
|
176
|
+
];
|
|
177
|
+
const agent = makeAgentLog({ agentType: "regression", regressionChecks: checks });
|
|
178
|
+
generateReport(makeRunLog({ agents: [agent] }), [], emptyTriage, makeProductSpec(), [], new Map());
|
|
179
|
+
const html = getSavedHtml();
|
|
180
|
+
expect(html).toContain("Progress");
|
|
181
|
+
expect(html).toContain("#42");
|
|
182
|
+
expect(html).toContain("Login button broken");
|
|
183
|
+
expect(html).toContain("✓ fixed");
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
it("regression が再発した場合 regressed バッジが表示される", () => {
|
|
187
|
+
const checks: RegressionCheck[] = [
|
|
188
|
+
{ issueNumber: 7, issueTitle: "Crash on submit", status: "regressed" },
|
|
189
|
+
];
|
|
190
|
+
const agent = makeAgentLog({ agentType: "regression", regressionChecks: checks });
|
|
191
|
+
generateReport(makeRunLog({ agents: [agent] }), [], emptyTriage, makeProductSpec(), [], new Map());
|
|
192
|
+
expect(getSavedHtml()).toContain("⚠ regressed");
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
it("regression checks がない場合 Progress セクションは表示されない", () => {
|
|
196
|
+
generateReport(makeRunLog(), [], emptyTriage, makeProductSpec(), [], new Map());
|
|
197
|
+
expect(getSavedHtml()).not.toContain("Progress (");
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
it("ScenarioOutcomes が achieved の場合 achieved バッジが表示される", () => {
|
|
201
|
+
const outcomes: ScenarioOutcome[] = [{
|
|
202
|
+
scenarioId: "s1",
|
|
203
|
+
scenarioTitle: "New employee task",
|
|
204
|
+
agentId: "a1",
|
|
205
|
+
agentName: "Alice",
|
|
206
|
+
achieved: true,
|
|
207
|
+
reason: "Completed successfully",
|
|
208
|
+
}];
|
|
209
|
+
generateReport(makeRunLog(), [], emptyTriage, makeProductSpec(), [], new Map(), outcomes);
|
|
210
|
+
const html = getSavedHtml();
|
|
211
|
+
expect(html).toContain("Scenario Outcomes");
|
|
212
|
+
expect(html).toContain("achieved");
|
|
213
|
+
expect(html).toContain("New employee task");
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
it("ScenarioOutcomes が failed の場合 failed バッジが表示される", () => {
|
|
217
|
+
const outcomes: ScenarioOutcome[] = [{
|
|
218
|
+
scenarioId: "s1",
|
|
219
|
+
scenarioTitle: "Purchase flow",
|
|
220
|
+
agentId: "a1",
|
|
221
|
+
agentName: "Bob",
|
|
222
|
+
achieved: false,
|
|
223
|
+
reason: "Could not find the button",
|
|
224
|
+
}];
|
|
225
|
+
generateReport(makeRunLog(), [], emptyTriage, makeProductSpec(), [], new Map(), outcomes);
|
|
226
|
+
expect(getSavedHtml()).toContain("failed");
|
|
227
|
+
});
|
|
228
|
+
|
|
141
229
|
it("finding が issued → unprocessed → skipped の順に並ぶ", () => {
|
|
142
230
|
const f1 = makeFinding({ id: "f1", title: "Issued Finding" });
|
|
143
231
|
const f2 = makeFinding({ id: "f2", title: "Skipped Finding" });
|
package/framework/coverage.ts
CHANGED
|
@@ -27,6 +27,9 @@ export interface WeightedSummary {
|
|
|
27
27
|
const COVERAGE_PATH = path.join(process.cwd(), "coverage", "coverage.json");
|
|
28
28
|
const MAX_ENTRIES = 30;
|
|
29
29
|
const HALF_LIFE_DAYS = 7;
|
|
30
|
+
const REPETITION_WINDOW_DAYS = 14;
|
|
31
|
+
const REPETITION_BONUS = 0.005;
|
|
32
|
+
const REPETITION_EXPONENT = 3;
|
|
30
33
|
|
|
31
34
|
export function loadCoverage(): Coverage {
|
|
32
35
|
try {
|
|
@@ -98,6 +101,20 @@ export function computeWeightedSummary(): WeightedSummary {
|
|
|
98
101
|
|
|
99
102
|
const now = Date.now();
|
|
100
103
|
const halfLifeMs = HALF_LIFE_DAYS * 24 * 60 * 60 * 1000;
|
|
104
|
+
const windowMs = REPETITION_WINDOW_DAYS * 24 * 60 * 60 * 1000;
|
|
105
|
+
|
|
106
|
+
// 14日以内の run で各 lens/scenario が何回登場したかを数える
|
|
107
|
+
const lensRepeat: Record<string, number> = {};
|
|
108
|
+
const scenarioRepeat: Record<string, number> = {};
|
|
109
|
+
for (const entry of coverage.entries) {
|
|
110
|
+
if (now - new Date(entry.timestamp).getTime() > windowMs) continue;
|
|
111
|
+
for (const lens of Object.keys(entry.byLens)) {
|
|
112
|
+
lensRepeat[lens] = (lensRepeat[lens] ?? 0) + 1;
|
|
113
|
+
}
|
|
114
|
+
for (const title of Object.keys(entry.byScenario ?? {})) {
|
|
115
|
+
scenarioRepeat[title] = (scenarioRepeat[title] ?? 0) + 1;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
101
118
|
|
|
102
119
|
const byCategory: Record<string, number> = {};
|
|
103
120
|
const byLens: Record<string, number> = {};
|
|
@@ -106,18 +123,21 @@ export function computeWeightedSummary(): WeightedSummary {
|
|
|
106
123
|
|
|
107
124
|
for (const entry of coverage.entries) {
|
|
108
125
|
const age = now - new Date(entry.timestamp).getTime();
|
|
109
|
-
const
|
|
126
|
+
const decay = Math.pow(0.5, age / halfLifeMs);
|
|
110
127
|
|
|
111
128
|
for (const [cat, count] of Object.entries(entry.byCategory)) {
|
|
112
|
-
byCategory[cat] = (byCategory[cat] ?? 0) + count *
|
|
129
|
+
byCategory[cat] = (byCategory[cat] ?? 0) + count * decay;
|
|
113
130
|
}
|
|
114
131
|
for (const [lens, count] of Object.entries(entry.byLens)) {
|
|
115
|
-
|
|
132
|
+
// 繰り返し呼ばれるほど「必要」とみなしてボーナスを加算
|
|
133
|
+
const bonus = 1 + Math.pow((lensRepeat[lens] ?? 1) - 1, REPETITION_EXPONENT) * REPETITION_BONUS;
|
|
134
|
+
byLens[lens] = (byLens[lens] ?? 0) + count * decay * bonus;
|
|
116
135
|
}
|
|
117
136
|
for (const [title, count] of Object.entries(entry.byScenario ?? {})) {
|
|
118
|
-
|
|
137
|
+
const bonus = 1 + Math.pow((scenarioRepeat[title] ?? 1) - 1, REPETITION_EXPONENT) * REPETITION_BONUS;
|
|
138
|
+
byScenario[title] = (byScenario[title] ?? 0) + count * decay * bonus;
|
|
119
139
|
}
|
|
120
|
-
totalWeighted += entry.findingsCount *
|
|
140
|
+
totalWeighted += entry.findingsCount * decay;
|
|
121
141
|
}
|
|
122
142
|
|
|
123
143
|
// 小数点1桁に丸める
|
|
@@ -142,12 +162,27 @@ export function computeWeightedSummary(): WeightedSummary {
|
|
|
142
162
|
? `By scenario: ${sortedScenario.map(([t, c]) => `"${t}" (${c})`).join(", ")}`
|
|
143
163
|
: null;
|
|
144
164
|
|
|
165
|
+
const repeatedLenses = Object.entries(lensRepeat)
|
|
166
|
+
.filter(([, n]) => n > 1)
|
|
167
|
+
.sort((a, b) => b[1] - a[1])
|
|
168
|
+
.map(([l, n]) => `${l} (×${n})`);
|
|
169
|
+
const repeatedScenarios = Object.entries(scenarioRepeat)
|
|
170
|
+
.filter(([, n]) => n > 1)
|
|
171
|
+
.sort((a, b) => b[1] - a[1])
|
|
172
|
+
.map(([t, n]) => `"${t}" (×${n})`);
|
|
173
|
+
|
|
145
174
|
const formatted = [
|
|
146
|
-
`Coverage summary (half-life: ${HALF_LIFE_DAYS} days, ${coverage.entries.length} run(s) tracked):`,
|
|
175
|
+
`Coverage summary (half-life: ${HALF_LIFE_DAYS} days, repetition window: ${REPETITION_WINDOW_DAYS} days, ${coverage.entries.length} run(s) tracked):`,
|
|
147
176
|
`Total weighted findings: ${totalWeighted}`,
|
|
148
177
|
`By lens: ${sortedLens.map(([l, c]) => `${l} (${c})`).join(" > ") || "(none)"}`,
|
|
149
178
|
scenarioLine,
|
|
150
179
|
`By category: ${sortedCategory.map(([c, n]) => `${c} (${n})`).join(" > ") || "(none)"}`,
|
|
180
|
+
repeatedLenses.length > 0
|
|
181
|
+
? `Repeated lenses (bonus applied): ${repeatedLenses.join(", ")}`
|
|
182
|
+
: null,
|
|
183
|
+
repeatedScenarios.length > 0
|
|
184
|
+
? `Repeated scenarios (bonus applied): ${repeatedScenarios.join(", ")}`
|
|
185
|
+
: null,
|
|
151
186
|
underrepresented.length > 0
|
|
152
187
|
? `Underrepresented lenses: ${underrepresented.join(", ")} — consider recruiting agents with these perspectives`
|
|
153
188
|
: "All lenses have comparable coverage",
|
|
@@ -14,7 +14,7 @@ export const UNIVERSAL_LENSES = [
|
|
|
14
14
|
"Business logic: calculation accuracy, status transitions, approval flow correctness, edge case handling in forms / ビジネスロジック観点",
|
|
15
15
|
"Data integrity: UI reflects actual state after actions, silent save failures, optimistic update inconsistencies / データ整合性観点",
|
|
16
16
|
"New user: first-time usability, onboarding clarity, instruction completeness, error recovery, empty state messaging / 新規ユーザー観点",
|
|
17
|
-
"UX design: interaction feedback (loading states, success/error messages), form usability, modal and dialog behavior, navigation consistency, micro-interactions — evaluate against established
|
|
17
|
+
"UX design: interaction feedback (loading states, success/error messages), form usability, modal and dialog behavior, navigation consistency, micro-interactions — evaluate against established HCI principles: Fitts's Law (are touch/click targets large and close enough?), Hick's Law (are choices overwhelming?), Miller's Law (is the amount of information shown at once within cognitive limits?), Jakob's Law (does the app behave like similar apps users already know?), Nielsen's heuristics (visibility of system status, error prevention, recognition over recall) / UXデザイン観点",
|
|
18
18
|
"Visual design: spacing and alignment consistency, typography hierarchy, color usage and contrast, component coherence across screens, mobile responsiveness — flag anything that looks broken, cramped, or visually inconsistent / ビジュアルデザイン観点",
|
|
19
19
|
"Product/PM: feature discoverability, user journey clarity, obvious next actions, drop-off risk points, call-to-action prominence, whether the app communicates its value clearly, missing features that users of this type would expect / プロダクト・PM観点",
|
|
20
20
|
"Power user: keyboard shortcuts availability, bulk operations, filtering/sorting depth, export options, API access, customization options / パワーユーザー観点",
|
|
@@ -78,7 +78,13 @@ ${UNIVERSAL_LENSES.map((l) => `- ${l}`).join("\n")}
|
|
|
78
78
|
When recruiting UX/design-oriented agents, give them awareness of these standards:
|
|
79
79
|
- Apple HIG: clear visual hierarchy, immediate feedback, forgiveness (undo/cancel), consistent navigation, minimal cognitive load
|
|
80
80
|
- Material Design: meaningful motion, bold clear typography, responsive layout, accessible color contrast (WCAG AA minimum)
|
|
81
|
-
- General web conventions: F-pattern reading, above-the-fold CTAs, error prevention over error recovery, progressive disclosure for complex forms
|
|
81
|
+
- General web conventions: F-pattern reading, above-the-fold CTAs, error prevention over error recovery, progressive disclosure for complex forms
|
|
82
|
+
- HCI principles to apply when exploring:
|
|
83
|
+
- Fitts's Law: notice when important buttons are small, far from natural cursor/thumb position, or hard to tap on mobile
|
|
84
|
+
- Hick's Law: flag screens with too many choices that slow down decision-making
|
|
85
|
+
- Miller's Law: flag when more than ~7 items are shown without grouping or progressive disclosure
|
|
86
|
+
- Jakob's Law: flag interactions that contradict conventions users expect from similar apps (e.g., swipe to delete, pull to refresh, hamburger menus)
|
|
87
|
+
- Nielsen's heuristics: check for missing system status feedback, unclear error messages, lack of undo, and forcing users to recall rather than recognize`;
|
|
82
88
|
|
|
83
89
|
console.log("[org-design] done");
|
|
84
90
|
return { hrGuidance };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@m8i-51/shoal",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.8",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Multi-agent web exploration framework — finds bugs, UX issues, and missing features by running AI agents against your app",
|
|
6
6
|
"repository": {
|
|
@@ -50,6 +50,7 @@
|
|
|
50
50
|
"@types/react": "^19.2.14",
|
|
51
51
|
"@types/react-dom": "^19.2.3",
|
|
52
52
|
"@vitejs/plugin-react": "^4.7.0",
|
|
53
|
+
"@vitest/coverage-v8": "^4.1.6",
|
|
53
54
|
"concurrently": "^9.2.1",
|
|
54
55
|
"i18next": "^26.0.6",
|
|
55
56
|
"react": "^19.2.5",
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
|
2
|
+
import * as fs from "fs";
|
|
3
|
+
|
|
4
|
+
vi.mock("fs");
|
|
5
|
+
vi.mock("path", async (importOriginal) => {
|
|
6
|
+
const actual = await importOriginal<typeof import("path")>();
|
|
7
|
+
return { ...actual, join: (...args: string[]) => args.join("/") };
|
|
8
|
+
});
|
|
9
|
+
vi.mock("../runner.js", () => ({ spawnRun: vi.fn() }));
|
|
10
|
+
|
|
11
|
+
import { loadSchedule, saveSchedule, type ScheduleConfig } from "../scheduler";
|
|
12
|
+
import { spawnRun } from "../runner.js";
|
|
13
|
+
|
|
14
|
+
const DEFAULT: ScheduleConfig = {
|
|
15
|
+
enabled: false,
|
|
16
|
+
dayOfWeek: 1,
|
|
17
|
+
hour: 9,
|
|
18
|
+
minute: 0,
|
|
19
|
+
lastRunDate: null,
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
beforeEach(() => {
|
|
23
|
+
vi.mocked(fs.existsSync).mockReturnValue(false);
|
|
24
|
+
vi.mocked(fs.readFileSync).mockReturnValue("{}");
|
|
25
|
+
vi.mocked(fs.writeFileSync).mockReturnValue(undefined);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
afterEach(() => {
|
|
29
|
+
vi.clearAllMocks();
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
describe("loadSchedule", () => {
|
|
33
|
+
it("ファイルがない場合はデフォルト設定を返す", () => {
|
|
34
|
+
vi.mocked(fs.existsSync).mockReturnValue(false);
|
|
35
|
+
expect(loadSchedule()).toEqual(DEFAULT);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it("ファイルがある場合は設定を読み込む", () => {
|
|
39
|
+
const saved: ScheduleConfig = { enabled: true, dayOfWeek: 3, hour: 14, minute: 30, lastRunDate: "2026-05-12" };
|
|
40
|
+
vi.mocked(fs.existsSync).mockReturnValue(true);
|
|
41
|
+
vi.mocked(fs.readFileSync).mockReturnValue(JSON.stringify(saved));
|
|
42
|
+
expect(loadSchedule()).toEqual(saved);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("ファイルが壊れている場合はデフォルトを返す", () => {
|
|
46
|
+
vi.mocked(fs.existsSync).mockReturnValue(true);
|
|
47
|
+
vi.mocked(fs.readFileSync).mockReturnValue("invalid json{{{");
|
|
48
|
+
expect(loadSchedule()).toEqual(DEFAULT);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("部分的な設定はデフォルトとマージされる", () => {
|
|
52
|
+
vi.mocked(fs.existsSync).mockReturnValue(true);
|
|
53
|
+
vi.mocked(fs.readFileSync).mockReturnValue(JSON.stringify({ enabled: true }));
|
|
54
|
+
expect(loadSchedule()).toEqual({ ...DEFAULT, enabled: true });
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
describe("saveSchedule", () => {
|
|
59
|
+
it("設定を JSON ファイルに書き出す", () => {
|
|
60
|
+
const config: ScheduleConfig = { enabled: true, dayOfWeek: 1, hour: 9, minute: 0, lastRunDate: null };
|
|
61
|
+
saveSchedule(config);
|
|
62
|
+
const [, content] = vi.mocked(fs.writeFileSync).mock.calls[0];
|
|
63
|
+
expect(JSON.parse(content as string)).toEqual(config);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
describe("scheduler — 時刻判定ロジック", () => {
|
|
68
|
+
it("enabled=false のときは spawnRun を呼ばない", async () => {
|
|
69
|
+
vi.useFakeTimers();
|
|
70
|
+
vi.mocked(fs.existsSync).mockReturnValue(true);
|
|
71
|
+
vi.mocked(fs.readFileSync).mockReturnValue(JSON.stringify({ ...DEFAULT, enabled: false }));
|
|
72
|
+
|
|
73
|
+
vi.resetModules();
|
|
74
|
+
const { startScheduler } = await import("../scheduler");
|
|
75
|
+
startScheduler();
|
|
76
|
+
|
|
77
|
+
// 最初の setTimeout(次の分の頭)+ check が走る分だけ進める
|
|
78
|
+
await vi.advanceTimersByTimeAsync(61_000);
|
|
79
|
+
expect(spawnRun).not.toHaveBeenCalled();
|
|
80
|
+
vi.useRealTimers();
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("スケジュール時刻に一致したとき spawnRun を呼ぶ", async () => {
|
|
84
|
+
vi.useFakeTimers();
|
|
85
|
+
|
|
86
|
+
// 月曜 09:00 に固定(UTC = ローカルとして扱う)
|
|
87
|
+
const monday9am = new Date("2026-05-11T09:00:00.000Z");
|
|
88
|
+
vi.setSystemTime(monday9am);
|
|
89
|
+
|
|
90
|
+
const config: ScheduleConfig = {
|
|
91
|
+
enabled: true,
|
|
92
|
+
dayOfWeek: monday9am.getDay(),
|
|
93
|
+
hour: monday9am.getHours(),
|
|
94
|
+
minute: monday9am.getMinutes(),
|
|
95
|
+
lastRunDate: null,
|
|
96
|
+
};
|
|
97
|
+
vi.mocked(fs.existsSync).mockReturnValue(true);
|
|
98
|
+
vi.mocked(fs.readFileSync).mockReturnValue(JSON.stringify(config));
|
|
99
|
+
|
|
100
|
+
vi.resetModules();
|
|
101
|
+
const { startScheduler: start } = await import("../scheduler");
|
|
102
|
+
start();
|
|
103
|
+
|
|
104
|
+
await vi.advanceTimersByTimeAsync(61_000);
|
|
105
|
+
expect(spawnRun).toHaveBeenCalledOnce();
|
|
106
|
+
vi.useRealTimers();
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it("同日に既に実行済みなら spawnRun を呼ばない", async () => {
|
|
110
|
+
vi.useFakeTimers();
|
|
111
|
+
|
|
112
|
+
const monday9am = new Date("2026-05-11T09:00:00.000Z");
|
|
113
|
+
vi.setSystemTime(monday9am);
|
|
114
|
+
const today = monday9am.toISOString().slice(0, 10);
|
|
115
|
+
|
|
116
|
+
const config: ScheduleConfig = {
|
|
117
|
+
enabled: true,
|
|
118
|
+
dayOfWeek: monday9am.getDay(),
|
|
119
|
+
hour: monday9am.getHours(),
|
|
120
|
+
minute: monday9am.getMinutes(),
|
|
121
|
+
lastRunDate: today,
|
|
122
|
+
};
|
|
123
|
+
vi.mocked(fs.existsSync).mockReturnValue(true);
|
|
124
|
+
vi.mocked(fs.readFileSync).mockReturnValue(JSON.stringify(config));
|
|
125
|
+
|
|
126
|
+
vi.resetModules();
|
|
127
|
+
const { startScheduler: start } = await import("../scheduler");
|
|
128
|
+
start();
|
|
129
|
+
|
|
130
|
+
await vi.advanceTimersByTimeAsync(61_000);
|
|
131
|
+
expect(spawnRun).not.toHaveBeenCalled();
|
|
132
|
+
vi.useRealTimers();
|
|
133
|
+
});
|
|
134
|
+
});
|