@m8i-51/shoal 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -201,3 +201,51 @@ export function computeWeightedSummary(): WeightedSummary {
201
201
 
202
202
  return { totalWeighted, byCategory, byLens, byScenario, formatted };
203
203
  }
204
+
205
+ // ================================================================
206
+ // Finding hotspots — 集合知(過去 run の findings をパス別に集計)
207
+ // ================================================================
208
+
209
+ export interface FindingHotspot {
210
+ pathPrefix: string;
211
+ totalFindings: number;
212
+ categories: Record<string, number>;
213
+ }
214
+
215
+ function extractPath(finding: Finding): string {
216
+ const text = `${finding.title} ${finding.body}`;
217
+ const m = text.match(/\b(\/[a-zA-Z0-9_/-]{2,})/);
218
+ if (!m) return "/";
219
+ const segments = m[1].split("/").filter(Boolean);
220
+ return segments.length > 0 ? `/${segments[0]}` : "/";
221
+ }
222
+
223
+ export function getFindingHotspots(topN = 12): FindingHotspot[] {
224
+ const base = path.join(process.cwd(), "findings");
225
+ if (!fs.existsSync(base)) return [];
226
+
227
+ const counts = new Map<string, { total: number; categories: Record<string, number> }>();
228
+
229
+ for (const runDir of fs.readdirSync(base)) {
230
+ if (!/^run_\d+$/.test(runDir)) continue;
231
+ const dir = path.join(base, runDir);
232
+ try {
233
+ for (const file of fs.readdirSync(dir)) {
234
+ if (!file.endsWith(".json")) continue;
235
+ try {
236
+ const f: Finding = JSON.parse(fs.readFileSync(path.join(dir, file), "utf-8"));
237
+ const p = extractPath(f);
238
+ const entry = counts.get(p) ?? { total: 0, categories: {} };
239
+ entry.total++;
240
+ entry.categories[f.category] = (entry.categories[f.category] ?? 0) + 1;
241
+ counts.set(p, entry);
242
+ } catch { /* skip */ }
243
+ }
244
+ } catch { /* skip */ }
245
+ }
246
+
247
+ return Array.from(counts.entries())
248
+ .map(([pathPrefix, { total, categories }]) => ({ pathPrefix, totalFindings: total, categories }))
249
+ .sort((a, b) => b.totalFindings - a.totalFindings)
250
+ .slice(0, topN);
251
+ }
@@ -0,0 +1,30 @@
1
+ import * as fs from "fs";
2
+ import * as path from "path";
3
+ import * as crypto from "crypto";
4
+
5
+ const CACHE_DIR = path.join(process.cwd(), "cache", "page-hashes");
6
+
7
+ function cacheFilePath(host: string): string {
8
+ const safe = host.replace(/[^a-zA-Z0-9.-]/g, "-");
9
+ return path.join(CACHE_DIR, `${safe}.json`);
10
+ }
11
+
12
+ export function loadPageHashes(host: string): Record<string, string> {
13
+ try {
14
+ const p = cacheFilePath(host);
15
+ if (fs.existsSync(p)) return JSON.parse(fs.readFileSync(p, "utf-8"));
16
+ } catch { /* ignore */ }
17
+ return {};
18
+ }
19
+
20
+ export function updatePageHashes(host: string, updates: Record<string, string>): void {
21
+ if (Object.keys(updates).length === 0) return;
22
+ const existing = loadPageHashes(host);
23
+ const merged = { ...existing, ...updates };
24
+ fs.mkdirSync(CACHE_DIR, { recursive: true });
25
+ fs.writeFileSync(cacheFilePath(host), JSON.stringify(merged, null, 2), "utf-8");
26
+ }
27
+
28
+ export function hashContent(content: string): string {
29
+ return crypto.createHash("sha256").update(content).digest("hex").slice(0, 16);
30
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@m8i-51/shoal",
3
- "version": "0.1.17",
3
+ "version": "0.1.18",
4
4
  "type": "module",
5
5
  "description": "Multi-agent web exploration framework — finds bugs, UX issues, and missing features by running AI agents against your app",
6
6
  "repository": {
package/run.ts CHANGED
@@ -17,7 +17,8 @@ import type { Tool } from "./framework/llm-client";
17
17
  import { createMessageWithRetry, runAgentLoop, sleep, rateLimitRetries } from "./framework/agent-loop";
18
18
  import { collectedFindings, initRunLog, saveRunLog, saveFinding, runLog } from "./framework/findings";
19
19
  import { loadAgents, addAgent, retireAgent } from "./framework/agent-store";
20
- import { updateCoverage, computeWeightedSummary, getLastRunPaths } from "./framework/coverage";
20
+ import { updateCoverage, computeWeightedSummary, getLastRunPaths, getFindingHotspots } from "./framework/coverage";
21
+ import { loadPageHashes, updatePageHashes, hashContent } from "./framework/page-cache";
21
22
  import { loadPersonaPack, formatPackForPrompt, type PersonaPack } from "./framework/persona-pack";
22
23
  import { buildTrackers } from "./framework/trackers/index";
23
24
  import {
@@ -421,6 +422,11 @@ const PERSONA_DESIGNER_TOOLS: Anthropic.Tool[] = [
421
422
  description: "Get the list of URL paths visited in the previous run. Use this to identify unexplored areas of the app and recruit agents likely to visit NEW paths. / 前回のrunで訪れたURLパス一覧を取得する。未探索エリアを特定し、新しいパスを訪れる可能性の高いペルソナを採用するために使う",
422
423
  input_schema: { type: "object", properties: {}, required: [] },
423
424
  },
425
+ {
426
+ name: "get_finding_hotspots",
427
+ description: "Get URL areas where findings have clustered across all past runs. Use this to understand which parts of the app have been thoroughly investigated vs. overlooked — recruit agents to explore under-investigated areas, or specialists to deep-dive problem hotspots. / 過去のrun全体でfindingsが集中しているURLエリアを取得する。十分に調査済みのエリアと見落とされているエリアを把握し、未探索エリアへの新エージェント採用や問題多発エリアへのスペシャリスト派遣に活かす",
428
+ input_schema: { type: "object", properties: {}, required: [] },
429
+ },
424
430
  {
425
431
  name: "get_persona_templates",
426
432
  description: "Get the persona template pack defined for this project. Prefer these archetypes when adding agents — adapt names/details to fit the app context but keep the role intact. / このプロジェクト用に定義されたペルソナテンプレート一覧を取得する。エージェントを追加する際はまずこのテンプレートから選ぶこと",
@@ -482,8 +488,8 @@ async function runPersonaDesigner(
482
488
  : "";
483
489
 
484
490
  const pathCoverageStep = lastRunPaths
485
- ? "3. Call get_path_coverage to see which URL paths were visited last run — recruit agents whose role would naturally take them to DIFFERENT or unexplored paths"
486
- : "3. (No previous run data yet — skip get_path_coverage)";
491
+ ? "3. Call get_path_coverage to see which URL paths were visited last run — recruit agents whose role would naturally take them to DIFFERENT or unexplored paths\n4. Call get_finding_hotspots to see where problems have clustered across all past runs — recruit agents to under-investigated areas, or specialists to problem hotspots"
492
+ : "3. (No previous run data yet — skip get_path_coverage)\n4. Call get_finding_hotspots to see if any areas have already accumulated findings";
487
493
 
488
494
  const personaTemplateStep = personaPack
489
495
  ? "2. Call get_persona_templates to get project-specific persona archetypes — prefer these over inventing new personas from scratch"
@@ -499,11 +505,11 @@ ${orgGuidance}${accountContext}
499
505
  1. Call get_coverage to review which lenses and categories are underrepresented in past runs
500
506
  ${personaTemplateStep}
501
507
  ${pathCoverageStep}
502
- 4. Call get_open_issues to understand what problems are already known — recruit agents likely to find DIFFERENT issues in unexplored areas
503
- 5. Call get_scenarios to see the user test scenarios generated for this run — about 70% of agents will be assigned a scenario, so recruit personas whose background fits those scenarios
504
- 6. Call get_agents to check the current agent roster
505
- 7. Add 2–3 agents with add_agent — balance between scenario-fit personas (step 5), underrepresented lenses (step 1), unexplored paths (step 3), and unexplored areas (step 4)${testAccounts.length > 0 ? "\n — assign each agent a role that matches one of the available test accounts" : ""}
506
- 8. If there are agents with old createdAt dates (oldest 1–2), retire them with retire_agent`;
508
+ 5. Call get_open_issues to understand what problems are already known — recruit agents likely to find DIFFERENT issues in unexplored areas
509
+ 6. Call get_scenarios to see the user test scenarios generated for this run — about 70% of agents will be assigned a scenario, so recruit personas whose background fits those scenarios
510
+ 7. Call get_agents to check the current agent roster
511
+ 8. Add 2–3 agents with add_agent — balance between scenario-fit personas (step 6), underrepresented lenses (step 1), unexplored paths (step 3), finding hotspots (step 4), and unexplored areas (step 5)${testAccounts.length > 0 ? "\n — assign each agent a role that matches one of the available test accounts" : ""}
512
+ 9. If there are agents with old createdAt dates (oldest 1–2), retire them with retire_agent`;
507
513
 
508
514
  try {
509
515
  let iterations = 0;
@@ -541,6 +547,16 @@ ${pathCoverageStep}
541
547
  result = `Paths visited in last run (${lastRunPaths.runId}):\n${lastRunPaths.visitedPaths.map((p) => `- ${p}`).join("\n")}\n\nRecruit agents whose role naturally takes them to paths NOT in this list.`;
542
548
  }
543
549
  console.log(` [persona-designer] path coverage fetched (${lastRunPaths?.visitedPaths.length ?? 0} paths)`);
550
+ } else if (toolUse.name === "get_finding_hotspots") {
551
+ const hotspots = getFindingHotspots();
552
+ if (hotspots.length === 0) {
553
+ result = "(no past findings data yet — this appears to be the first run)";
554
+ } else {
555
+ result = hotspots.map((h) =>
556
+ `${h.pathPrefix}: ${h.totalFindings} findings — ${Object.entries(h.categories).map(([c, n]) => `${c}:${n}`).join(", ")}`
557
+ ).join("\n");
558
+ }
559
+ console.log(` [persona-designer] finding hotspots fetched (${hotspots.length} areas)`);
544
560
  } else if (toolUse.name === "get_open_issues") {
545
561
  if (openIssues.length === 0) {
546
562
  result = "(no open issues — either GitHub is not configured or there are no known issues yet)";
@@ -718,6 +734,8 @@ async function executeBrowserTool(
718
734
  observation: ObservationState,
719
735
  agentId: string,
720
736
  scenarioOutcomes: ScenarioOutcome[],
737
+ cachedHashes: Record<string, string>,
738
+ pageHashUpdates: Record<string, string>,
721
739
  scenario?: Scenario,
722
740
  ): Promise<{ text: string; screenshot: { base64: string; filePath: string } | null; sendToClaude: boolean }> {
723
741
  const startedAt = Date.now();
@@ -739,7 +757,18 @@ async function executeBrowserTool(
739
757
  await page.waitForTimeout(3000);
740
758
  screenshot = await takeScreenshot(page, `navigate_${navPath.replace(/\//g, "_")}`);
741
759
  agentLog.visitedPaths.push(navPath);
742
- resultText = `Navigated to ${navPath}`;
760
+ // ページコンテンツハッシュで差分検出
761
+ try {
762
+ const content = await page.innerText("body", { timeout: 2000 });
763
+ const h = hashContent(content);
764
+ const unchanged = cachedHashes[navPath] && cachedHashes[navPath] === h;
765
+ pageHashUpdates[navPath] = h;
766
+ resultText = unchanged
767
+ ? `Navigated to ${navPath} (page content unchanged since last run — consider exploring a different area)`
768
+ : `Navigated to ${navPath}`;
769
+ } catch {
770
+ resultText = `Navigated to ${navPath}`;
771
+ }
743
772
  break;
744
773
  }
745
774
  case "click": {
@@ -925,6 +954,9 @@ async function runBrowserAgent(
925
954
  };
926
955
 
927
956
  const observation = setupObservation(page);
957
+ const host = new URL(BASE_URL).host;
958
+ const cachedHashes = loadPageHashes(host);
959
+ const pageHashUpdates: Record<string, string> = {};
928
960
 
929
961
  const systemPrompt = `You are "${agent.name}".
930
962
  Role: ${agent.role}
@@ -1023,6 +1055,8 @@ ${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\
1023
1055
  observation,
1024
1056
  agent.id,
1025
1057
  scenarioOutcomes,
1058
+ cachedHashes,
1059
+ pageHashUpdates,
1026
1060
  assignment.scenario,
1027
1061
  );
1028
1062
 
@@ -1075,6 +1109,7 @@ ${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\
1075
1109
  console.error(`[${agent.name}] error:`, e);
1076
1110
  } finally {
1077
1111
  agentLog.completedAt = new Date().toISOString();
1112
+ updatePageHashes(host, pageHashUpdates);
1078
1113
  }
1079
1114
 
1080
1115
  console.log(`[browser] ${agent.name} done (feedback: ${agentLog.feedbacksSaved.length})`);