npm - @m8i-51/shoal - Versions diffs - 0.1.17 → 0.1.19 - Mend

@m8i-51/shoal 0.1.17 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -114,12 +114,26 @@ npm run serve      # from cloned repo
 Opens at `http://localhost:4000`. From there you can:
 - **Start a run** — configure agent count, target URL, and custom instructions
-- **Monitor live progress** — watch agents explore and file findings in real time
+- **Watch agents swim live** — the Swarm tab shows an animated real-time view of agents as they explore. When a finding is discovered, the agent's chip flashes with the finding title.
 - **Review past runs** — findings by category, agent count, duration, and estimated cost
+- **Generate an Agent Diary** — after a run completes, one LLM call turns the raw log into a story-style narrative of the exploration, readable by anyone on the team
+- **Hall of Issues** — browse all findings across every run with full-text search and category filter. Export as JSON to share, or paste a community findings URL to import findings from other projects.
 - **Edit app goals** — guide the goal-gap detector by defining what the app should achieve
 ---
+## Cross-run intelligence
+shoal gets smarter with each run.
+**Diff exploration** — after every browser navigation, shoal hashes the page content (SHA-256 of `innerText`). On the next run, agents that land on an unchanged page are nudged to move on: *"page content unchanged since last run — consider exploring a different area."* The hashes accumulate in `cache/page-hashes/` and steer future agents toward parts of the app that have actually changed.
+**Finding hotspots** — the persona designer has access to a `get_finding_hotspots` tool that aggregates findings by URL area across all past runs. It uses this to recruit agents toward under-investigated parts of the app, or to send specialists into zones where problems keep clustering.
+Both signals work passively — no configuration needed. They improve automatically as runs accumulate.
+---
 ## Configuration
 | Variable | Default | Description |

package/framework/coverage.ts CHANGED Viewed

@@ -201,3 +201,51 @@ export function computeWeightedSummary(): WeightedSummary {
   return { totalWeighted, byCategory, byLens, byScenario, formatted };
 }
+// ================================================================
+// Finding hotspots — 集合知（過去 run の findings をパス別に集計）
+// ================================================================
+export interface FindingHotspot {
+  pathPrefix: string;
+  totalFindings: number;
+  categories: Record<string, number>;
+}
+function extractPath(finding: Finding): string {
+  const text = `${finding.title} ${finding.body}`;
+  const m = text.match(/\b(\/[a-zA-Z0-9_/-]{2,})/);
+  if (!m) return "/";
+  const segments = m[1].split("/").filter(Boolean);
+  return segments.length > 0 ? `/${segments[0]}` : "/";
+}
+export function getFindingHotspots(topN = 12): FindingHotspot[] {
+  const base = path.join(process.cwd(), "findings");
+  if (!fs.existsSync(base)) return [];
+  const counts = new Map<string, { total: number; categories: Record<string, number> }>();
+  for (const runDir of fs.readdirSync(base)) {
+    if (!/^run_\d+$/.test(runDir)) continue;
+    const dir = path.join(base, runDir);
+    try {
+      for (const file of fs.readdirSync(dir)) {
+        if (!file.endsWith(".json")) continue;
+        try {
+          const f: Finding = JSON.parse(fs.readFileSync(path.join(dir, file), "utf-8"));
+          const p = extractPath(f);
+          const entry = counts.get(p) ?? { total: 0, categories: {} };
+          entry.total++;
+          entry.categories[f.category] = (entry.categories[f.category] ?? 0) + 1;
+          counts.set(p, entry);
+        } catch { /* skip */ }
+      }
+    } catch { /* skip */ }
+  }
+  return Array.from(counts.entries())
+    .map(([pathPrefix, { total, categories }]) => ({ pathPrefix, totalFindings: total, categories }))
+    .sort((a, b) => b.totalFindings - a.totalFindings)
+    .slice(0, topN);
+}

package/framework/page-cache.ts ADDED Viewed

@@ -0,0 +1,30 @@
+import * as fs from "fs";
+import * as path from "path";
+import * as crypto from "crypto";
+const CACHE_DIR = path.join(process.cwd(), "cache", "page-hashes");
+function cacheFilePath(host: string): string {
+  const safe = host.replace(/[^a-zA-Z0-9.-]/g, "-");
+  return path.join(CACHE_DIR, `${safe}.json`);
+}
+export function loadPageHashes(host: string): Record<string, string> {
+  try {
+    const p = cacheFilePath(host);
+    if (fs.existsSync(p)) return JSON.parse(fs.readFileSync(p, "utf-8"));
+  } catch { /* ignore */ }
+  return {};
+}
+export function updatePageHashes(host: string, updates: Record<string, string>): void {
+  if (Object.keys(updates).length === 0) return;
+  const existing = loadPageHashes(host);
+  const merged = { ...existing, ...updates };
+  fs.mkdirSync(CACHE_DIR, { recursive: true });
+  fs.writeFileSync(cacheFilePath(host), JSON.stringify(merged, null, 2), "utf-8");
+}
+export function hashContent(content: string): string {
+  return crypto.createHash("sha256").update(content).digest("hex").slice(0, 16);
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@m8i-51/shoal",
-  "version": "0.1.17",
+  "version": "0.1.19",
   "type": "module",
   "description": "Multi-agent web exploration framework — finds bugs, UX issues, and missing features by running AI agents against your app",
   "repository": {

package/run.ts CHANGED Viewed

@@ -17,7 +17,8 @@ import type { Tool } from "./framework/llm-client";
 import { createMessageWithRetry, runAgentLoop, sleep, rateLimitRetries } from "./framework/agent-loop";
 import { collectedFindings, initRunLog, saveRunLog, saveFinding, runLog } from "./framework/findings";
 import { loadAgents, addAgent, retireAgent } from "./framework/agent-store";
-import { updateCoverage, computeWeightedSummary, getLastRunPaths } from "./framework/coverage";
+import { updateCoverage, computeWeightedSummary, getLastRunPaths, getFindingHotspots } from "./framework/coverage";
+import { loadPageHashes, updatePageHashes, hashContent } from "./framework/page-cache";
 import { loadPersonaPack, formatPackForPrompt, type PersonaPack } from "./framework/persona-pack";
 import { buildTrackers } from "./framework/trackers/index";
 import {
@@ -421,6 +422,11 @@ const PERSONA_DESIGNER_TOOLS: Anthropic.Tool[] = [
     description: "Get the list of URL paths visited in the previous run. Use this to identify unexplored areas of the app and recruit agents likely to visit NEW paths. / 前回のrunで訪れたURLパス一覧を取得する。未探索エリアを特定し、新しいパスを訪れる可能性の高いペルソナを採用するために使う",
     input_schema: { type: "object", properties: {}, required: [] },
   },
+  {
+    name: "get_finding_hotspots",
+    description: "Get URL areas where findings have clustered across all past runs. Use this to understand which parts of the app have been thoroughly investigated vs. overlooked — recruit agents to explore under-investigated areas, or specialists to deep-dive problem hotspots. / 過去のrun全体でfindingsが集中しているURLエリアを取得する。十分に調査済みのエリアと見落とされているエリアを把握し、未探索エリアへの新エージェント採用や問題多発エリアへのスペシャリスト派遣に活かす",
+    input_schema: { type: "object", properties: {}, required: [] },
+  },
   {
     name: "get_persona_templates",
     description: "Get the persona template pack defined for this project. Prefer these archetypes when adding agents — adapt names/details to fit the app context but keep the role intact. / このプロジェクト用に定義されたペルソナテンプレート一覧を取得する。エージェントを追加する際はまずこのテンプレートから選ぶこと",
@@ -482,8 +488,8 @@ async function runPersonaDesigner(
     : "";
   const pathCoverageStep = lastRunPaths
-    ? "3. Call get_path_coverage to see which URL paths were visited last run — recruit agents whose role would naturally take them to DIFFERENT or unexplored paths"
-    : "3. (No previous run data yet — skip get_path_coverage)";
+    ? "3. Call get_path_coverage to see which URL paths were visited last run — recruit agents whose role would naturally take them to DIFFERENT or unexplored paths\n4. Call get_finding_hotspots to see where problems have clustered across all past runs — recruit agents to under-investigated areas, or specialists to problem hotspots"
+    : "3. (No previous run data yet — skip get_path_coverage)\n4. Call get_finding_hotspots to see if any areas have already accumulated findings";
   const personaTemplateStep = personaPack
     ? "2. Call get_persona_templates to get project-specific persona archetypes — prefer these over inventing new personas from scratch"
@@ -499,11 +505,11 @@ ${orgGuidance}${accountContext}
 1. Call get_coverage to review which lenses and categories are underrepresented in past runs
 ${personaTemplateStep}
 ${pathCoverageStep}
-4. Call get_open_issues to understand what problems are already known — recruit agents likely to find DIFFERENT issues in unexplored areas
-5. Call get_scenarios to see the user test scenarios generated for this run — about 70% of agents will be assigned a scenario, so recruit personas whose background fits those scenarios
-6. Call get_agents to check the current agent roster
-7. Add 2–3 agents with add_agent — balance between scenario-fit personas (step 5), underrepresented lenses (step 1), unexplored paths (step 3), and unexplored areas (step 4)${testAccounts.length > 0 ? "\n   — assign each agent a role that matches one of the available test accounts" : ""}
-8. If there are agents with old createdAt dates (oldest 1–2), retire them with retire_agent`;
+5. Call get_open_issues to understand what problems are already known — recruit agents likely to find DIFFERENT issues in unexplored areas
+6. Call get_scenarios to see the user test scenarios generated for this run — about 70% of agents will be assigned a scenario, so recruit personas whose background fits those scenarios
+7. Call get_agents to check the current agent roster
+8. Add 2–3 agents with add_agent — balance between scenario-fit personas (step 6), underrepresented lenses (step 1), unexplored paths (step 3), finding hotspots (step 4), and unexplored areas (step 5)${testAccounts.length > 0 ? "\n   — assign each agent a role that matches one of the available test accounts" : ""}
+9. If there are agents with old createdAt dates (oldest 1–2), retire them with retire_agent`;
   try {
     let iterations = 0;
@@ -541,6 +547,16 @@ ${pathCoverageStep}
             result = `Paths visited in last run (${lastRunPaths.runId}):\n${lastRunPaths.visitedPaths.map((p) => `- ${p}`).join("\n")}\n\nRecruit agents whose role naturally takes them to paths NOT in this list.`;
           }
           console.log(`  [persona-designer] path coverage fetched (${lastRunPaths?.visitedPaths.length ?? 0} paths)`);
+        } else if (toolUse.name === "get_finding_hotspots") {
+          const hotspots = getFindingHotspots();
+          if (hotspots.length === 0) {
+            result = "(no past findings data yet — this appears to be the first run)";
+          } else {
+            result = hotspots.map((h) =>
+              `${h.pathPrefix}: ${h.totalFindings} findings — ${Object.entries(h.categories).map(([c, n]) => `${c}:${n}`).join(", ")}`
+            ).join("\n");
+          }
+          console.log(`  [persona-designer] finding hotspots fetched (${hotspots.length} areas)`);
         } else if (toolUse.name === "get_open_issues") {
           if (openIssues.length === 0) {
             result = "(no open issues — either GitHub is not configured or there are no known issues yet)";
@@ -718,6 +734,8 @@ async function executeBrowserTool(
   observation: ObservationState,
   agentId: string,
   scenarioOutcomes: ScenarioOutcome[],
+  cachedHashes: Record<string, string>,
+  pageHashUpdates: Record<string, string>,
   scenario?: Scenario,
 ): Promise<{ text: string; screenshot: { base64: string; filePath: string } | null; sendToClaude: boolean }> {
   const startedAt = Date.now();
@@ -739,7 +757,18 @@ async function executeBrowserTool(
         await page.waitForTimeout(3000);
         screenshot = await takeScreenshot(page, `navigate_${navPath.replace(/\//g, "_")}`);
         agentLog.visitedPaths.push(navPath);
-        resultText = `Navigated to ${navPath}`;
+        // ページコンテンツハッシュで差分検出
+        try {
+          const content = await page.innerText("body", { timeout: 2000 });
+          const h = hashContent(content);
+          const unchanged = cachedHashes[navPath] && cachedHashes[navPath] === h;
+          pageHashUpdates[navPath] = h;
+          resultText = unchanged
+            ? `Navigated to ${navPath} (page content unchanged since last run — consider exploring a different area)`
+            : `Navigated to ${navPath}`;
+        } catch {
+          resultText = `Navigated to ${navPath}`;
+        }
         break;
       }
       case "click": {
@@ -925,6 +954,9 @@ async function runBrowserAgent(
   };
   const observation = setupObservation(page);
+  const host = new URL(BASE_URL).host;
+  const cachedHashes = loadPageHashes(host);
+  const pageHashUpdates: Record<string, string> = {};
   const systemPrompt = `You are "${agent.name}".
 Role: ${agent.role}
@@ -1023,6 +1055,8 @@ ${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\
           observation,
           agent.id,
           scenarioOutcomes,
+          cachedHashes,
+          pageHashUpdates,
           assignment.scenario,
         );
@@ -1075,6 +1109,7 @@ ${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\
     console.error(`[${agent.name}] error:`, e);
   } finally {
     agentLog.completedAt = new Date().toISOString();
+    updatePageHashes(host, pageHashUpdates);
   }
   console.log(`[browser] ${agent.name} done (feedback: ${agentLog.feedbacksSaved.length})`);