npm - @m8i-51/shoal - Versions diffs - 0.1.7 → 0.1.9 - Mend

@m8i-51/shoal 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/framework/__tests__/coverage.test.ts +80 -0
package/framework/coverage.ts +41 -6
package/framework/org-designer.ts +32 -14
package/package.json +1 -1
package/run.ts +17 -17

package/framework/__tests__/coverage.test.ts CHANGED Viewed

@@ -167,6 +167,86 @@ describe("computeWeightedSummary", () => {
     expect(result.formatted).toContain("By scenario");
   });
+  it("14日以内に同じレンズが複数 run に登場するとボーナスが乗る", () => {
+    const now = Date.now();
+    // 同じ Accessibility レンズが2回登場 → bonus = 1 + (2-1)*0.5 = 1.5
+    setupMockCoverage({
+      entries: [
+        makeEntry({
+          runId: "run_1",
+          timestamp: new Date(now - 1000).toISOString(),
+          findingsCount: 2,
+          byLens: { Accessibility: 2 },
+        }),
+        makeEntry({
+          runId: "run_2",
+          timestamp: new Date(now).toISOString(),
+          findingsCount: 2,
+          byLens: { Accessibility: 2 },
+        }),
+      ],
+    });
+    const resultWithRepeat = computeWeightedSummary();
+    // 1回しか登場しない場合と比較
+    setupMockCoverage({
+      entries: [
+        makeEntry({
+          runId: "run_1",
+          timestamp: new Date(now).toISOString(),
+          findingsCount: 2,
+          byLens: { Accessibility: 2 },
+        }),
+      ],
+    });
+    const resultSingle = computeWeightedSummary();
+    // 繰り返しありのほうが lens の重みが高いはず
+    expect(resultWithRepeat.byLens["Accessibility"]).toBeGreaterThan(resultSingle.byLens["Accessibility"]);
+  });
+  it("14日より古いエントリーは繰り返しカウントに含まれない", () => {
+    const now = Date.now();
+    const oldMs = 15 * 24 * 60 * 60 * 1000; // 15日前
+    setupMockCoverage({
+      entries: [
+        makeEntry({
+          runId: "run_old",
+          timestamp: new Date(now - oldMs).toISOString(),
+          findingsCount: 2,
+          byLens: { Security: 2 },
+        }),
+        makeEntry({
+          runId: "run_new",
+          timestamp: new Date(now).toISOString(),
+          findingsCount: 2,
+          byLens: { Security: 2 },
+        }),
+      ],
+    });
+    const result = computeWeightedSummary();
+    // 古いエントリーはウィンドウ外なのでボーナスなし（繰り返し回数=1 → bonus=1.0）
+    // ボーナスなしの場合: weight≈1.0*2 + 15日前のdecay*2 ≈ 2.06
+    expect(result.formatted).not.toContain("Repeated lenses");
+  });
+  it("繰り返しレンズが formatted に含まれる", () => {
+    const now = Date.now();
+    setupMockCoverage({
+      entries: [
+        makeEntry({ runId: "r1", timestamp: new Date(now - 1000).toISOString(), findingsCount: 1, byLens: { "UI design": 1 } }),
+        makeEntry({ runId: "r2", timestamp: new Date(now).toISOString(), findingsCount: 1, byLens: { "UI design": 1 } }),
+      ],
+    });
+    const result = computeWeightedSummary();
+    expect(result.formatted).toContain("Repeated lenses");
+    expect(result.formatted).toContain("UI design");
+    expect(result.formatted).toContain("×2");
+  });
   it("MAX_ENTRIES を超えると最新30件に切り捨てる", () => {
     const entries = Array.from({ length: 35 }, (_, i) =>
       makeEntry({

package/framework/coverage.ts CHANGED Viewed

@@ -27,6 +27,9 @@ export interface WeightedSummary {
 const COVERAGE_PATH = path.join(process.cwd(), "coverage", "coverage.json");
 const MAX_ENTRIES = 30;
 const HALF_LIFE_DAYS = 7;
+const REPETITION_WINDOW_DAYS = 14;
+const REPETITION_BONUS = 0.005;
+const REPETITION_EXPONENT = 3;
 export function loadCoverage(): Coverage {
   try {
@@ -98,6 +101,20 @@ export function computeWeightedSummary(): WeightedSummary {
   const now = Date.now();
   const halfLifeMs = HALF_LIFE_DAYS * 24 * 60 * 60 * 1000;
+  const windowMs = REPETITION_WINDOW_DAYS * 24 * 60 * 60 * 1000;
+  // 14日以内の run で各 lens/scenario が何回登場したかを数える
+  const lensRepeat: Record<string, number> = {};
+  const scenarioRepeat: Record<string, number> = {};
+  for (const entry of coverage.entries) {
+    if (now - new Date(entry.timestamp).getTime() > windowMs) continue;
+    for (const lens of Object.keys(entry.byLens)) {
+      lensRepeat[lens] = (lensRepeat[lens] ?? 0) + 1;
+    }
+    for (const title of Object.keys(entry.byScenario ?? {})) {
+      scenarioRepeat[title] = (scenarioRepeat[title] ?? 0) + 1;
+    }
+  }
   const byCategory: Record<string, number> = {};
   const byLens: Record<string, number> = {};
@@ -106,18 +123,21 @@ export function computeWeightedSummary(): WeightedSummary {
   for (const entry of coverage.entries) {
     const age = now - new Date(entry.timestamp).getTime();
-    const weight = Math.pow(0.5, age / halfLifeMs);
+    const decay = Math.pow(0.5, age / halfLifeMs);
     for (const [cat, count] of Object.entries(entry.byCategory)) {
-      byCategory[cat] = (byCategory[cat] ?? 0) + count * weight;
+      byCategory[cat] = (byCategory[cat] ?? 0) + count * decay;
     }
     for (const [lens, count] of Object.entries(entry.byLens)) {
-      byLens[lens] = (byLens[lens] ?? 0) + count * weight;
+      // 繰り返し呼ばれるほど「必要」とみなしてボーナスを加算
+      const bonus = 1 + Math.pow((lensRepeat[lens] ?? 1) - 1, REPETITION_EXPONENT) * REPETITION_BONUS;
+      byLens[lens] = (byLens[lens] ?? 0) + count * decay * bonus;
     }
     for (const [title, count] of Object.entries(entry.byScenario ?? {})) {
-      byScenario[title] = (byScenario[title] ?? 0) + count * weight;
+      const bonus = 1 + Math.pow((scenarioRepeat[title] ?? 1) - 1, REPETITION_EXPONENT) * REPETITION_BONUS;
+      byScenario[title] = (byScenario[title] ?? 0) + count * decay * bonus;
     }
-    totalWeighted += entry.findingsCount * weight;
+    totalWeighted += entry.findingsCount * decay;
   }
   // 小数点1桁に丸める
@@ -142,12 +162,27 @@ export function computeWeightedSummary(): WeightedSummary {
     ? `By scenario: ${sortedScenario.map(([t, c]) => `"${t}" (${c})`).join(", ")}`
     : null;
+  const repeatedLenses = Object.entries(lensRepeat)
+    .filter(([, n]) => n > 1)
+    .sort((a, b) => b[1] - a[1])
+    .map(([l, n]) => `${l} (×${n})`);
+  const repeatedScenarios = Object.entries(scenarioRepeat)
+    .filter(([, n]) => n > 1)
+    .sort((a, b) => b[1] - a[1])
+    .map(([t, n]) => `"${t}" (×${n})`);
   const formatted = [
-    `Coverage summary (half-life: ${HALF_LIFE_DAYS} days, ${coverage.entries.length} run(s) tracked):`,
+    `Coverage summary (half-life: ${HALF_LIFE_DAYS} days, repetition window: ${REPETITION_WINDOW_DAYS} days, ${coverage.entries.length} run(s) tracked):`,
     `Total weighted findings: ${totalWeighted}`,
     `By lens: ${sortedLens.map(([l, c]) => `${l} (${c})`).join(" > ") || "(none)"}`,
     scenarioLine,
     `By category: ${sortedCategory.map(([c, n]) => `${c} (${n})`).join(" > ") || "(none)"}`,
+    repeatedLenses.length > 0
+      ? `Repeated lenses (bonus applied): ${repeatedLenses.join(", ")}`
+      : null,
+    repeatedScenarios.length > 0
+      ? `Repeated scenarios (bonus applied): ${repeatedScenarios.join(", ")}`
+      : null,
     underrepresented.length > 0
       ? `Underrepresented lenses: ${underrepresented.join(", ")} — consider recruiting agents with these perspectives`
       : "All lenses have comparable coverage",

package/framework/org-designer.ts CHANGED Viewed

@@ -4,7 +4,7 @@ import { createMessageWithRetry } from "./agent-loop";
 import type { ProductSpec } from "./product-discovery";
 export interface OrgDesign {
-  hrGuidance: string;
+  personaGuidance: string;
 }
 // Evaluation lenses always included regardless of app type / アプリ種別に関わらず常に含める観点
@@ -22,7 +22,7 @@ export const UNIVERSAL_LENSES = [
 ];
 export async function designOrg(spec: ProductSpec, client: LLMClient, model: string, coverageSummary?: string): Promise<OrgDesign> {
-  console.log("\n[org-design] starting...");
+  console.log("\n[persona-policy] starting...");
   const response = await createMessageWithRetry(client, {
     model,
@@ -47,18 +47,36 @@ ${spec.features}
 ${spec.designContext ? `\n[Design Context]\n${spec.designContext}\n` : ""}${coverageSummary ? `\n[Coverage History]\n${coverageSummary}\nUse this to identify underrepresented perspectives and adjust the recruitment policy accordingly.\n` : ""}
 Please output the following:
+## App type classification
+Classify this app as one of:
+- "business": used in work contexts by employees with specific job roles (CRM, project management, HR tools, etc.)
+- "consumer": used by individuals in personal contexts (personal finance, entertainment, health, productivity, etc.)
+- "mixed": significant use in both contexts
 ## User types for this app
-(What kinds of users exist — roles, skill levels, usage scenarios)
+(What kinds of users exist — described appropriately for the app type)
 ## Agent types to recruit (5–8 types)
-By job function, role, and technical literacy. Always include:
-- At least one UX/product designer persona (evaluates visual consistency, interaction patterns, HIG/Material compliance)
-- At least one product manager or business analyst persona (evaluates feature completeness, user journey clarity)
-- At least one target end-user with low technical literacy (first-time or reluctant user)
-- Domain-specific roles relevant to this app type
-## Recruitment instructions for the HR agent
-(Concrete hiring/retirement guidelines based on the above — emphasize persona diversity across technical skill levels, job functions, and design sensitivity)`,
+**If business app:**
+Recruit primarily by job role and function (e.g., sales rep, manager, admin).
+Include personas with varying technical literacy within those roles.
+**If consumer app:**
+Recruit primarily as real end-users — define by lifestyle, demographics, and usage context.
+Focus on who actually uses this app in daily life, not job titles.
+Examples for a subscription tracker: "budget-conscious student juggling streaming costs", "freelancer tracking SaaS tool expenses", "household manager reviewing family subscriptions".
+Avoid professional/specialist titles (QA engineer, PM, auditor) as primary personas — these are not real users of this app.
+**If mixed:**
+Balance job-role personas and lifestyle-based end-user personas.
+**Always include as supplement (1–2 personas regardless of app type):**
+- 1 UX evaluator: focuses on visual consistency, interaction patterns, HIG/Material compliance
+- 1 edge-case/accessibility evaluator: focuses on error handling, accessibility, stress scenarios
+## Recruitment instructions for the persona designer agent
+(Concrete guidelines based on the above — emphasize that the majority of personas should reflect real users of this specific app, with expert evaluators as a minority supplement)`,
       },
     ],
   });
@@ -68,7 +86,7 @@ By job function, role, and technical literacy. Always include:
     .map((b) => b.text)
     .join("");
-  const hrGuidance = `${text}
+  const personaGuidance = `${text}
 [Universal Evaluation Lenses]
 Include one of the following perspectives in each agent's persona to ensure diverse findings:
@@ -86,6 +104,6 @@ When recruiting UX/design-oriented agents, give them awareness of these standard
   - Jakob's Law: flag interactions that contradict conventions users expect from similar apps (e.g., swipe to delete, pull to refresh, hamburger menus)
   - Nielsen's heuristics: check for missing system status feedback, unclear error messages, lack of undo, and forcing users to recall rather than recognize`;
-  console.log("[org-design] done");
-  return { hrGuidance };
+  console.log("[persona-policy] done");
+  return { personaGuidance };
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@m8i-51/shoal",
-  "version": "0.1.7",
+  "version": "0.1.9",
   "type": "module",
   "description": "Multi-agent web exploration framework — finds bugs, UX issues, and missing features by running AI agents against your app",
   "repository": {

package/run.ts CHANGED Viewed

@@ -394,10 +394,10 @@ ${productSpec.uiFeatures ? `\n[UI-Only Features]\nThese features exist in the UI
 }
 // ================================================================
-// HR agent
+// Persona designer agent
 // ================================================================
-const HR_TOOLS: Anthropic.Tool[] = [
+const PERSONA_DESIGNER_TOOLS: Anthropic.Tool[] = [
   {
     name: "get_agents",
     description: "Get the current list of registered agents. / 現在登録されているエージェント一覧を取得する",
@@ -445,24 +445,24 @@ const HR_TOOLS: Anthropic.Tool[] = [
   },
 ];
-async function runHRAgent(
+async function runPersonaDesigner(
   productSpec: ProductSpec,
   orgGuidance: string,
   openIssues: { number: number; title: string; labels: string[] }[],
   scenarios: Scenario[],
   testAccounts: TestAccount[] = [],
 ): Promise<void> {
-  console.log("\n[hr] starting...");
+  console.log("\n[persona-designer] starting...");
   const messages: Anthropic.MessageParam[] = [
-    { role: "user", content: "Manage agent hiring and retirement." },
+    { role: "user", content: "Design and manage user personas for this run." },
   ];
   const accountContext = testAccounts.length > 0
     ? `\n[Available Test Accounts (one per role)]\n${testAccounts.map((a) => `- ${a.role}: ${a.email}`).join("\n")}\nWhen recruiting agents, match each persona's role to one of these accounts so they can operate with appropriate permissions.`
     : "";
-  const systemPrompt = `You are the test agent manager for "${productSpec.appName}".
-You recruit and manage agents that simulate real users of the app.
+  const systemPrompt = `You are the persona designer for "${productSpec.appName}".
+You create and manage test agents that simulate real users of the app.
 [Organization Design Guidelines]
 ${orgGuidance}${accountContext}
@@ -483,7 +483,7 @@ ${orgGuidance}${accountContext}
         model: defaultModel,
         max_tokens: 1024,
         system: systemPrompt,
-        tools: HR_TOOLS,
+        tools: PERSONA_DESIGNER_TOOLS,
         messages,
       });
       messages.push({ role: "assistant", content: response.content });
@@ -496,14 +496,14 @@ ${orgGuidance}${accountContext}
         let result: unknown;
         if (toolUse.name === "get_coverage") {
           result = computeWeightedSummary().formatted;
-          console.log("  [hr] coverage summary fetched");
+          console.log("  [persona-designer] coverage summary fetched");
         } else if (toolUse.name === "get_open_issues") {
           if (openIssues.length === 0) {
             result = "(no open issues — either GitHub is not configured or there are no known issues yet)";
           } else {
             result = openIssues.map((i) => `- #${i.number}: ${i.title} [${i.labels.join(", ")}]`).join("\n");
           }
-          console.log(`  [hr] open issues fetched (${openIssues.length})`);
+          console.log(`  [persona-designer] open issues fetched (${openIssues.length})`);
         } else if (toolUse.name === "get_scenarios") {
           if (scenarios.length === 0) {
             result = "(no scenarios generated — all agents will use free-exploration mode)";
@@ -512,19 +512,19 @@ ${orgGuidance}${accountContext}
               `[${s.id}] ${s.title}\n  Context: ${s.context}\n  Goal: ${s.goal}\n  Constraints: ${s.constraints}`
             ).join("\n\n");
           }
-          console.log(`  [hr] scenarios fetched (${scenarios.length})`);
+          console.log(`  [persona-designer] scenarios fetched (${scenarios.length})`);
         } else if (toolUse.name === "get_agents") {
           const agents = loadAgents();
           result = agents.map((a) => ({ id: a.id, name: a.name, role: a.role, createdAt: a.createdAt }));
-          console.log(`  [hr] current agents: ${agents.length}`);
+          console.log(`  [persona-designer] current agents: ${agents.length}`);
         } else if (toolUse.name === "add_agent") {
           const { name, role, persona } = toolUse.input as { name: string; role: string; persona: string };
           result = addAgent({ name, role, persona });
-          console.log(`  [hr] hired: ${name} (${role})`);
+          console.log(`  [persona-designer] created: ${name} (${role})`);
         } else if (toolUse.name === "retire_agent") {
           const { agentId, reason } = toolUse.input as { agentId: string; reason: string };
           result = { success: retireAgent(agentId) };
-          console.log(`  [hr] retired: ${agentId} — ${reason}`);
+          console.log(`  [persona-designer] retired: ${agentId} — ${reason}`);
         } else {
           result = { error: "unknown tool" };
         }
@@ -532,9 +532,9 @@ ${orgGuidance}${accountContext}
       }
       messages.push({ role: "user", content: toolResults });
     }
-    console.log("[hr] done");
+    console.log("[persona-designer] done");
   } catch (e) {
-    console.error("[hr] error:", e);
+    console.error("[persona-designer] error:", e);
   }
 }
@@ -1105,7 +1105,7 @@ async function main() {
     }
     // 4. HR agent
-    await runHRAgent(productSpec, orgDesign.hrGuidance, openIssues, scenarios, testAccounts);
+    await runPersonaDesigner(productSpec, orgDesign.personaGuidance, openIssues, scenarios, testAccounts);
     // 5. load agents + closed issues
     const allAgents = loadAgents();