@m8i-51/shoal 0.1.17 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -114,12 +114,26 @@ npm run serve # from cloned repo
114
114
  Opens at `http://localhost:4000`. From there you can:
115
115
 
116
116
  - **Start a run** — configure agent count, target URL, and custom instructions
117
- - **Monitor live progress** — watch agents explore and file findings in real time
117
+ - **Watch agents swim live** — the Swarm tab shows an animated real-time view of agents as they explore. When a finding is discovered, the agent's chip flashes with the finding title.
118
118
  - **Review past runs** — findings by category, agent count, duration, and estimated cost
119
+ - **Generate an Agent Diary** — after a run completes, one LLM call turns the raw log into a story-style narrative of the exploration, readable by anyone on the team
120
+ - **Hall of Issues** — browse all findings across every run with full-text search and category filter. Export as JSON to share, or paste a community findings URL to import findings from other projects.
119
121
  - **Edit app goals** — guide the goal-gap detector by defining what the app should achieve
120
122
 
121
123
  ---
122
124
 
125
+ ## Cross-run intelligence
126
+
127
+ shoal gets smarter with each run.
128
+
129
+ **Diff exploration** — after every browser navigation, shoal hashes the page content (SHA-256 of `innerText`). On the next run, agents that land on an unchanged page are nudged to move on: *"page content unchanged since last run — consider exploring a different area."* The hashes accumulate in `cache/page-hashes/` and steer future agents toward parts of the app that have actually changed.
130
+
131
+ **Finding hotspots** — the persona designer has access to a `get_finding_hotspots` tool that aggregates findings by URL area across all past runs. It uses this to recruit agents toward under-investigated parts of the app, or to send specialists into zones where problems keep clustering.
132
+
133
+ Both signals work passively — no configuration needed. They improve automatically as runs accumulate.
134
+
135
+ ---
136
+
123
137
  ## Configuration
124
138
 
125
139
  | Variable | Default | Description |
@@ -201,3 +201,51 @@ export function computeWeightedSummary(): WeightedSummary {
201
201
 
202
202
  return { totalWeighted, byCategory, byLens, byScenario, formatted };
203
203
  }
204
+
205
+ // ================================================================
206
+ // Finding hotspots — 集合知(過去 run の findings をパス別に集計)
207
+ // ================================================================
208
+
209
+ export interface FindingHotspot {
210
+ pathPrefix: string;
211
+ totalFindings: number;
212
+ categories: Record<string, number>;
213
+ }
214
+
215
+ function extractPath(finding: Finding): string {
216
+ const text = `${finding.title} ${finding.body}`;
217
+ const m = text.match(/\b(\/[a-zA-Z0-9_/-]{2,})/);
218
+ if (!m) return "/";
219
+ const segments = m[1].split("/").filter(Boolean);
220
+ return segments.length > 0 ? `/${segments[0]}` : "/";
221
+ }
222
+
223
+ export function getFindingHotspots(topN = 12): FindingHotspot[] {
224
+ const base = path.join(process.cwd(), "findings");
225
+ if (!fs.existsSync(base)) return [];
226
+
227
+ const counts = new Map<string, { total: number; categories: Record<string, number> }>();
228
+
229
+ for (const runDir of fs.readdirSync(base)) {
230
+ if (!/^run_\d+$/.test(runDir)) continue;
231
+ const dir = path.join(base, runDir);
232
+ try {
233
+ for (const file of fs.readdirSync(dir)) {
234
+ if (!file.endsWith(".json")) continue;
235
+ try {
236
+ const f: Finding = JSON.parse(fs.readFileSync(path.join(dir, file), "utf-8"));
237
+ const p = extractPath(f);
238
+ const entry = counts.get(p) ?? { total: 0, categories: {} };
239
+ entry.total++;
240
+ entry.categories[f.category] = (entry.categories[f.category] ?? 0) + 1;
241
+ counts.set(p, entry);
242
+ } catch { /* skip */ }
243
+ }
244
+ } catch { /* skip */ }
245
+ }
246
+
247
+ return Array.from(counts.entries())
248
+ .map(([pathPrefix, { total, categories }]) => ({ pathPrefix, totalFindings: total, categories }))
249
+ .sort((a, b) => b.totalFindings - a.totalFindings)
250
+ .slice(0, topN);
251
+ }
@@ -0,0 +1,30 @@
1
+ import * as fs from "fs";
2
+ import * as path from "path";
3
+ import * as crypto from "crypto";
4
+
5
+ const CACHE_DIR = path.join(process.cwd(), "cache", "page-hashes");
6
+
7
+ function cacheFilePath(host: string): string {
8
+ const safe = host.replace(/[^a-zA-Z0-9.-]/g, "-");
9
+ return path.join(CACHE_DIR, `${safe}.json`);
10
+ }
11
+
12
+ export function loadPageHashes(host: string): Record<string, string> {
13
+ try {
14
+ const p = cacheFilePath(host);
15
+ if (fs.existsSync(p)) return JSON.parse(fs.readFileSync(p, "utf-8"));
16
+ } catch { /* ignore */ }
17
+ return {};
18
+ }
19
+
20
+ export function updatePageHashes(host: string, updates: Record<string, string>): void {
21
+ if (Object.keys(updates).length === 0) return;
22
+ const existing = loadPageHashes(host);
23
+ const merged = { ...existing, ...updates };
24
+ fs.mkdirSync(CACHE_DIR, { recursive: true });
25
+ fs.writeFileSync(cacheFilePath(host), JSON.stringify(merged, null, 2), "utf-8");
26
+ }
27
+
28
+ export function hashContent(content: string): string {
29
+ return crypto.createHash("sha256").update(content).digest("hex").slice(0, 16);
30
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@m8i-51/shoal",
3
- "version": "0.1.17",
3
+ "version": "0.1.19",
4
4
  "type": "module",
5
5
  "description": "Multi-agent web exploration framework — finds bugs, UX issues, and missing features by running AI agents against your app",
6
6
  "repository": {
package/run.ts CHANGED
@@ -17,7 +17,8 @@ import type { Tool } from "./framework/llm-client";
17
17
  import { createMessageWithRetry, runAgentLoop, sleep, rateLimitRetries } from "./framework/agent-loop";
18
18
  import { collectedFindings, initRunLog, saveRunLog, saveFinding, runLog } from "./framework/findings";
19
19
  import { loadAgents, addAgent, retireAgent } from "./framework/agent-store";
20
- import { updateCoverage, computeWeightedSummary, getLastRunPaths } from "./framework/coverage";
20
+ import { updateCoverage, computeWeightedSummary, getLastRunPaths, getFindingHotspots } from "./framework/coverage";
21
+ import { loadPageHashes, updatePageHashes, hashContent } from "./framework/page-cache";
21
22
  import { loadPersonaPack, formatPackForPrompt, type PersonaPack } from "./framework/persona-pack";
22
23
  import { buildTrackers } from "./framework/trackers/index";
23
24
  import {
@@ -421,6 +422,11 @@ const PERSONA_DESIGNER_TOOLS: Anthropic.Tool[] = [
421
422
  description: "Get the list of URL paths visited in the previous run. Use this to identify unexplored areas of the app and recruit agents likely to visit NEW paths. / 前回のrunで訪れたURLパス一覧を取得する。未探索エリアを特定し、新しいパスを訪れる可能性の高いペルソナを採用するために使う",
422
423
  input_schema: { type: "object", properties: {}, required: [] },
423
424
  },
425
+ {
426
+ name: "get_finding_hotspots",
427
+ description: "Get URL areas where findings have clustered across all past runs. Use this to understand which parts of the app have been thoroughly investigated vs. overlooked — recruit agents to explore under-investigated areas, or specialists to deep-dive problem hotspots. / 過去のrun全体でfindingsが集中しているURLエリアを取得する。十分に調査済みのエリアと見落とされているエリアを把握し、未探索エリアへの新エージェント採用や問題多発エリアへのスペシャリスト派遣に活かす",
428
+ input_schema: { type: "object", properties: {}, required: [] },
429
+ },
424
430
  {
425
431
  name: "get_persona_templates",
426
432
  description: "Get the persona template pack defined for this project. Prefer these archetypes when adding agents — adapt names/details to fit the app context but keep the role intact. / このプロジェクト用に定義されたペルソナテンプレート一覧を取得する。エージェントを追加する際はまずこのテンプレートから選ぶこと",
@@ -482,8 +488,8 @@ async function runPersonaDesigner(
482
488
  : "";
483
489
 
484
490
  const pathCoverageStep = lastRunPaths
485
- ? "3. Call get_path_coverage to see which URL paths were visited last run — recruit agents whose role would naturally take them to DIFFERENT or unexplored paths"
486
- : "3. (No previous run data yet — skip get_path_coverage)";
491
+ ? "3. Call get_path_coverage to see which URL paths were visited last run — recruit agents whose role would naturally take them to DIFFERENT or unexplored paths\n4. Call get_finding_hotspots to see where problems have clustered across all past runs — recruit agents to under-investigated areas, or specialists to problem hotspots"
492
+ : "3. (No previous run data yet — skip get_path_coverage)\n4. Call get_finding_hotspots to see if any areas have already accumulated findings";
487
493
 
488
494
  const personaTemplateStep = personaPack
489
495
  ? "2. Call get_persona_templates to get project-specific persona archetypes — prefer these over inventing new personas from scratch"
@@ -499,11 +505,11 @@ ${orgGuidance}${accountContext}
499
505
  1. Call get_coverage to review which lenses and categories are underrepresented in past runs
500
506
  ${personaTemplateStep}
501
507
  ${pathCoverageStep}
502
- 4. Call get_open_issues to understand what problems are already known — recruit agents likely to find DIFFERENT issues in unexplored areas
503
- 5. Call get_scenarios to see the user test scenarios generated for this run — about 70% of agents will be assigned a scenario, so recruit personas whose background fits those scenarios
504
- 6. Call get_agents to check the current agent roster
505
- 7. Add 2–3 agents with add_agent — balance between scenario-fit personas (step 5), underrepresented lenses (step 1), unexplored paths (step 3), and unexplored areas (step 4)${testAccounts.length > 0 ? "\n — assign each agent a role that matches one of the available test accounts" : ""}
506
- 8. If there are agents with old createdAt dates (oldest 1–2), retire them with retire_agent`;
508
+ 5. Call get_open_issues to understand what problems are already known — recruit agents likely to find DIFFERENT issues in unexplored areas
509
+ 6. Call get_scenarios to see the user test scenarios generated for this run — about 70% of agents will be assigned a scenario, so recruit personas whose background fits those scenarios
510
+ 7. Call get_agents to check the current agent roster
511
+ 8. Add 2–3 agents with add_agent — balance between scenario-fit personas (step 6), underrepresented lenses (step 1), unexplored paths (step 3), finding hotspots (step 4), and unexplored areas (step 5)${testAccounts.length > 0 ? "\n — assign each agent a role that matches one of the available test accounts" : ""}
512
+ 9. If there are agents with old createdAt dates (oldest 1–2), retire them with retire_agent`;
507
513
 
508
514
  try {
509
515
  let iterations = 0;
@@ -541,6 +547,16 @@ ${pathCoverageStep}
541
547
  result = `Paths visited in last run (${lastRunPaths.runId}):\n${lastRunPaths.visitedPaths.map((p) => `- ${p}`).join("\n")}\n\nRecruit agents whose role naturally takes them to paths NOT in this list.`;
542
548
  }
543
549
  console.log(` [persona-designer] path coverage fetched (${lastRunPaths?.visitedPaths.length ?? 0} paths)`);
550
+ } else if (toolUse.name === "get_finding_hotspots") {
551
+ const hotspots = getFindingHotspots();
552
+ if (hotspots.length === 0) {
553
+ result = "(no past findings data yet — this appears to be the first run)";
554
+ } else {
555
+ result = hotspots.map((h) =>
556
+ `${h.pathPrefix}: ${h.totalFindings} findings — ${Object.entries(h.categories).map(([c, n]) => `${c}:${n}`).join(", ")}`
557
+ ).join("\n");
558
+ }
559
+ console.log(` [persona-designer] finding hotspots fetched (${hotspots.length} areas)`);
544
560
  } else if (toolUse.name === "get_open_issues") {
545
561
  if (openIssues.length === 0) {
546
562
  result = "(no open issues — either GitHub is not configured or there are no known issues yet)";
@@ -718,6 +734,8 @@ async function executeBrowserTool(
718
734
  observation: ObservationState,
719
735
  agentId: string,
720
736
  scenarioOutcomes: ScenarioOutcome[],
737
+ cachedHashes: Record<string, string>,
738
+ pageHashUpdates: Record<string, string>,
721
739
  scenario?: Scenario,
722
740
  ): Promise<{ text: string; screenshot: { base64: string; filePath: string } | null; sendToClaude: boolean }> {
723
741
  const startedAt = Date.now();
@@ -739,7 +757,18 @@ async function executeBrowserTool(
739
757
  await page.waitForTimeout(3000);
740
758
  screenshot = await takeScreenshot(page, `navigate_${navPath.replace(/\//g, "_")}`);
741
759
  agentLog.visitedPaths.push(navPath);
742
- resultText = `Navigated to ${navPath}`;
760
+ // ページコンテンツハッシュで差分検出
761
+ try {
762
+ const content = await page.innerText("body", { timeout: 2000 });
763
+ const h = hashContent(content);
764
+ const unchanged = cachedHashes[navPath] && cachedHashes[navPath] === h;
765
+ pageHashUpdates[navPath] = h;
766
+ resultText = unchanged
767
+ ? `Navigated to ${navPath} (page content unchanged since last run — consider exploring a different area)`
768
+ : `Navigated to ${navPath}`;
769
+ } catch {
770
+ resultText = `Navigated to ${navPath}`;
771
+ }
743
772
  break;
744
773
  }
745
774
  case "click": {
@@ -925,6 +954,9 @@ async function runBrowserAgent(
925
954
  };
926
955
 
927
956
  const observation = setupObservation(page);
957
+ const host = new URL(BASE_URL).host;
958
+ const cachedHashes = loadPageHashes(host);
959
+ const pageHashUpdates: Record<string, string> = {};
928
960
 
929
961
  const systemPrompt = `You are "${agent.name}".
930
962
  Role: ${agent.role}
@@ -1023,6 +1055,8 @@ ${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\
1023
1055
  observation,
1024
1056
  agent.id,
1025
1057
  scenarioOutcomes,
1058
+ cachedHashes,
1059
+ pageHashUpdates,
1026
1060
  assignment.scenario,
1027
1061
  );
1028
1062
 
@@ -1075,6 +1109,7 @@ ${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\
1075
1109
  console.error(`[${agent.name}] error:`, e);
1076
1110
  } finally {
1077
1111
  agentLog.completedAt = new Date().toISOString();
1112
+ updatePageHashes(host, pageHashUpdates);
1078
1113
  }
1079
1114
 
1080
1115
  console.log(`[browser] ${agent.name} done (feedback: ${agentLog.feedbacksSaved.length})`);