@m8i-51/shoal 0.1.17 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/framework/coverage.ts +48 -0
- package/framework/page-cache.ts +30 -0
- package/package.json +1 -1
- package/run.ts +44 -9
package/framework/coverage.ts
CHANGED
|
@@ -201,3 +201,51 @@ export function computeWeightedSummary(): WeightedSummary {
|
|
|
201
201
|
|
|
202
202
|
return { totalWeighted, byCategory, byLens, byScenario, formatted };
|
|
203
203
|
}
|
|
204
|
+
|
|
205
|
+
// ================================================================
|
|
206
|
+
// Finding hotspots — 集合知(過去 run の findings をパス別に集計)
|
|
207
|
+
// ================================================================
|
|
208
|
+
|
|
209
|
+
export interface FindingHotspot {
|
|
210
|
+
pathPrefix: string;
|
|
211
|
+
totalFindings: number;
|
|
212
|
+
categories: Record<string, number>;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
function extractPath(finding: Finding): string {
|
|
216
|
+
const text = `${finding.title} ${finding.body}`;
|
|
217
|
+
const m = text.match(/\b(\/[a-zA-Z0-9_/-]{2,})/);
|
|
218
|
+
if (!m) return "/";
|
|
219
|
+
const segments = m[1].split("/").filter(Boolean);
|
|
220
|
+
return segments.length > 0 ? `/${segments[0]}` : "/";
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
export function getFindingHotspots(topN = 12): FindingHotspot[] {
|
|
224
|
+
const base = path.join(process.cwd(), "findings");
|
|
225
|
+
if (!fs.existsSync(base)) return [];
|
|
226
|
+
|
|
227
|
+
const counts = new Map<string, { total: number; categories: Record<string, number> }>();
|
|
228
|
+
|
|
229
|
+
for (const runDir of fs.readdirSync(base)) {
|
|
230
|
+
if (!/^run_\d+$/.test(runDir)) continue;
|
|
231
|
+
const dir = path.join(base, runDir);
|
|
232
|
+
try {
|
|
233
|
+
for (const file of fs.readdirSync(dir)) {
|
|
234
|
+
if (!file.endsWith(".json")) continue;
|
|
235
|
+
try {
|
|
236
|
+
const f: Finding = JSON.parse(fs.readFileSync(path.join(dir, file), "utf-8"));
|
|
237
|
+
const p = extractPath(f);
|
|
238
|
+
const entry = counts.get(p) ?? { total: 0, categories: {} };
|
|
239
|
+
entry.total++;
|
|
240
|
+
entry.categories[f.category] = (entry.categories[f.category] ?? 0) + 1;
|
|
241
|
+
counts.set(p, entry);
|
|
242
|
+
} catch { /* skip */ }
|
|
243
|
+
}
|
|
244
|
+
} catch { /* skip */ }
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return Array.from(counts.entries())
|
|
248
|
+
.map(([pathPrefix, { total, categories }]) => ({ pathPrefix, totalFindings: total, categories }))
|
|
249
|
+
.sort((a, b) => b.totalFindings - a.totalFindings)
|
|
250
|
+
.slice(0, topN);
|
|
251
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import * as fs from "fs";
|
|
2
|
+
import * as path from "path";
|
|
3
|
+
import * as crypto from "crypto";
|
|
4
|
+
|
|
5
|
+
const CACHE_DIR = path.join(process.cwd(), "cache", "page-hashes");
|
|
6
|
+
|
|
7
|
+
function cacheFilePath(host: string): string {
|
|
8
|
+
const safe = host.replace(/[^a-zA-Z0-9.-]/g, "-");
|
|
9
|
+
return path.join(CACHE_DIR, `${safe}.json`);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export function loadPageHashes(host: string): Record<string, string> {
|
|
13
|
+
try {
|
|
14
|
+
const p = cacheFilePath(host);
|
|
15
|
+
if (fs.existsSync(p)) return JSON.parse(fs.readFileSync(p, "utf-8"));
|
|
16
|
+
} catch { /* ignore */ }
|
|
17
|
+
return {};
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function updatePageHashes(host: string, updates: Record<string, string>): void {
|
|
21
|
+
if (Object.keys(updates).length === 0) return;
|
|
22
|
+
const existing = loadPageHashes(host);
|
|
23
|
+
const merged = { ...existing, ...updates };
|
|
24
|
+
fs.mkdirSync(CACHE_DIR, { recursive: true });
|
|
25
|
+
fs.writeFileSync(cacheFilePath(host), JSON.stringify(merged, null, 2), "utf-8");
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function hashContent(content: string): string {
|
|
29
|
+
return crypto.createHash("sha256").update(content).digest("hex").slice(0, 16);
|
|
30
|
+
}
|
package/package.json
CHANGED
package/run.ts
CHANGED
|
@@ -17,7 +17,8 @@ import type { Tool } from "./framework/llm-client";
|
|
|
17
17
|
import { createMessageWithRetry, runAgentLoop, sleep, rateLimitRetries } from "./framework/agent-loop";
|
|
18
18
|
import { collectedFindings, initRunLog, saveRunLog, saveFinding, runLog } from "./framework/findings";
|
|
19
19
|
import { loadAgents, addAgent, retireAgent } from "./framework/agent-store";
|
|
20
|
-
import { updateCoverage, computeWeightedSummary, getLastRunPaths } from "./framework/coverage";
|
|
20
|
+
import { updateCoverage, computeWeightedSummary, getLastRunPaths, getFindingHotspots } from "./framework/coverage";
|
|
21
|
+
import { loadPageHashes, updatePageHashes, hashContent } from "./framework/page-cache";
|
|
21
22
|
import { loadPersonaPack, formatPackForPrompt, type PersonaPack } from "./framework/persona-pack";
|
|
22
23
|
import { buildTrackers } from "./framework/trackers/index";
|
|
23
24
|
import {
|
|
@@ -421,6 +422,11 @@ const PERSONA_DESIGNER_TOOLS: Anthropic.Tool[] = [
|
|
|
421
422
|
description: "Get the list of URL paths visited in the previous run. Use this to identify unexplored areas of the app and recruit agents likely to visit NEW paths. / 前回のrunで訪れたURLパス一覧を取得する。未探索エリアを特定し、新しいパスを訪れる可能性の高いペルソナを採用するために使う",
|
|
422
423
|
input_schema: { type: "object", properties: {}, required: [] },
|
|
423
424
|
},
|
|
425
|
+
{
|
|
426
|
+
name: "get_finding_hotspots",
|
|
427
|
+
description: "Get URL areas where findings have clustered across all past runs. Use this to understand which parts of the app have been thoroughly investigated vs. overlooked — recruit agents to explore under-investigated areas, or specialists to deep-dive problem hotspots. / 過去のrun全体でfindingsが集中しているURLエリアを取得する。十分に調査済みのエリアと見落とされているエリアを把握し、未探索エリアへの新エージェント採用や問題多発エリアへのスペシャリスト派遣に活かす",
|
|
428
|
+
input_schema: { type: "object", properties: {}, required: [] },
|
|
429
|
+
},
|
|
424
430
|
{
|
|
425
431
|
name: "get_persona_templates",
|
|
426
432
|
description: "Get the persona template pack defined for this project. Prefer these archetypes when adding agents — adapt names/details to fit the app context but keep the role intact. / このプロジェクト用に定義されたペルソナテンプレート一覧を取得する。エージェントを追加する際はまずこのテンプレートから選ぶこと",
|
|
@@ -482,8 +488,8 @@ async function runPersonaDesigner(
|
|
|
482
488
|
: "";
|
|
483
489
|
|
|
484
490
|
const pathCoverageStep = lastRunPaths
|
|
485
|
-
? "3. Call get_path_coverage to see which URL paths were visited last run — recruit agents whose role would naturally take them to DIFFERENT or unexplored paths"
|
|
486
|
-
: "3. (No previous run data yet — skip get_path_coverage)";
|
|
491
|
+
? "3. Call get_path_coverage to see which URL paths were visited last run — recruit agents whose role would naturally take them to DIFFERENT or unexplored paths\n4. Call get_finding_hotspots to see where problems have clustered across all past runs — recruit agents to under-investigated areas, or specialists to problem hotspots"
|
|
492
|
+
: "3. (No previous run data yet — skip get_path_coverage)\n4. Call get_finding_hotspots to see if any areas have already accumulated findings";
|
|
487
493
|
|
|
488
494
|
const personaTemplateStep = personaPack
|
|
489
495
|
? "2. Call get_persona_templates to get project-specific persona archetypes — prefer these over inventing new personas from scratch"
|
|
@@ -499,11 +505,11 @@ ${orgGuidance}${accountContext}
|
|
|
499
505
|
1. Call get_coverage to review which lenses and categories are underrepresented in past runs
|
|
500
506
|
${personaTemplateStep}
|
|
501
507
|
${pathCoverageStep}
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
508
|
+
5. Call get_open_issues to understand what problems are already known — recruit agents likely to find DIFFERENT issues in unexplored areas
|
|
509
|
+
6. Call get_scenarios to see the user test scenarios generated for this run — about 70% of agents will be assigned a scenario, so recruit personas whose background fits those scenarios
|
|
510
|
+
7. Call get_agents to check the current agent roster
|
|
511
|
+
8. Add 2–3 agents with add_agent — balance between scenario-fit personas (step 6), underrepresented lenses (step 1), unexplored paths (step 3), finding hotspots (step 4), and unexplored areas (step 5)${testAccounts.length > 0 ? "\n — assign each agent a role that matches one of the available test accounts" : ""}
|
|
512
|
+
9. If there are agents with old createdAt dates (oldest 1–2), retire them with retire_agent`;
|
|
507
513
|
|
|
508
514
|
try {
|
|
509
515
|
let iterations = 0;
|
|
@@ -541,6 +547,16 @@ ${pathCoverageStep}
|
|
|
541
547
|
result = `Paths visited in last run (${lastRunPaths.runId}):\n${lastRunPaths.visitedPaths.map((p) => `- ${p}`).join("\n")}\n\nRecruit agents whose role naturally takes them to paths NOT in this list.`;
|
|
542
548
|
}
|
|
543
549
|
console.log(` [persona-designer] path coverage fetched (${lastRunPaths?.visitedPaths.length ?? 0} paths)`);
|
|
550
|
+
} else if (toolUse.name === "get_finding_hotspots") {
|
|
551
|
+
const hotspots = getFindingHotspots();
|
|
552
|
+
if (hotspots.length === 0) {
|
|
553
|
+
result = "(no past findings data yet — this appears to be the first run)";
|
|
554
|
+
} else {
|
|
555
|
+
result = hotspots.map((h) =>
|
|
556
|
+
`${h.pathPrefix}: ${h.totalFindings} findings — ${Object.entries(h.categories).map(([c, n]) => `${c}:${n}`).join(", ")}`
|
|
557
|
+
).join("\n");
|
|
558
|
+
}
|
|
559
|
+
console.log(` [persona-designer] finding hotspots fetched (${hotspots.length} areas)`);
|
|
544
560
|
} else if (toolUse.name === "get_open_issues") {
|
|
545
561
|
if (openIssues.length === 0) {
|
|
546
562
|
result = "(no open issues — either GitHub is not configured or there are no known issues yet)";
|
|
@@ -718,6 +734,8 @@ async function executeBrowserTool(
|
|
|
718
734
|
observation: ObservationState,
|
|
719
735
|
agentId: string,
|
|
720
736
|
scenarioOutcomes: ScenarioOutcome[],
|
|
737
|
+
cachedHashes: Record<string, string>,
|
|
738
|
+
pageHashUpdates: Record<string, string>,
|
|
721
739
|
scenario?: Scenario,
|
|
722
740
|
): Promise<{ text: string; screenshot: { base64: string; filePath: string } | null; sendToClaude: boolean }> {
|
|
723
741
|
const startedAt = Date.now();
|
|
@@ -739,7 +757,18 @@ async function executeBrowserTool(
|
|
|
739
757
|
await page.waitForTimeout(3000);
|
|
740
758
|
screenshot = await takeScreenshot(page, `navigate_${navPath.replace(/\//g, "_")}`);
|
|
741
759
|
agentLog.visitedPaths.push(navPath);
|
|
742
|
-
|
|
760
|
+
// ページコンテンツハッシュで差分検出
|
|
761
|
+
try {
|
|
762
|
+
const content = await page.innerText("body", { timeout: 2000 });
|
|
763
|
+
const h = hashContent(content);
|
|
764
|
+
const unchanged = cachedHashes[navPath] && cachedHashes[navPath] === h;
|
|
765
|
+
pageHashUpdates[navPath] = h;
|
|
766
|
+
resultText = unchanged
|
|
767
|
+
? `Navigated to ${navPath} (page content unchanged since last run — consider exploring a different area)`
|
|
768
|
+
: `Navigated to ${navPath}`;
|
|
769
|
+
} catch {
|
|
770
|
+
resultText = `Navigated to ${navPath}`;
|
|
771
|
+
}
|
|
743
772
|
break;
|
|
744
773
|
}
|
|
745
774
|
case "click": {
|
|
@@ -925,6 +954,9 @@ async function runBrowserAgent(
|
|
|
925
954
|
};
|
|
926
955
|
|
|
927
956
|
const observation = setupObservation(page);
|
|
957
|
+
const host = new URL(BASE_URL).host;
|
|
958
|
+
const cachedHashes = loadPageHashes(host);
|
|
959
|
+
const pageHashUpdates: Record<string, string> = {};
|
|
928
960
|
|
|
929
961
|
const systemPrompt = `You are "${agent.name}".
|
|
930
962
|
Role: ${agent.role}
|
|
@@ -1023,6 +1055,8 @@ ${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\
|
|
|
1023
1055
|
observation,
|
|
1024
1056
|
agent.id,
|
|
1025
1057
|
scenarioOutcomes,
|
|
1058
|
+
cachedHashes,
|
|
1059
|
+
pageHashUpdates,
|
|
1026
1060
|
assignment.scenario,
|
|
1027
1061
|
);
|
|
1028
1062
|
|
|
@@ -1075,6 +1109,7 @@ ${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\
|
|
|
1075
1109
|
console.error(`[${agent.name}] error:`, e);
|
|
1076
1110
|
} finally {
|
|
1077
1111
|
agentLog.completedAt = new Date().toISOString();
|
|
1112
|
+
updatePageHashes(host, pageHashUpdates);
|
|
1078
1113
|
}
|
|
1079
1114
|
|
|
1080
1115
|
console.log(`[browser] ${agent.name} done (feedback: ${agentLog.feedbacksSaved.length})`);
|