@m8i-51/shoal 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -167,6 +167,86 @@ describe("computeWeightedSummary", () => {
167
167
  expect(result.formatted).toContain("By scenario");
168
168
  });
169
169
 
170
+ it("14日以内に同じレンズが複数 run に登場するとボーナスが乗る", () => {
171
+ const now = Date.now();
172
+ // 同じ Accessibility レンズが2回登場 → bonus = 1 + (2-1)*0.5 = 1.5
173
+ setupMockCoverage({
174
+ entries: [
175
+ makeEntry({
176
+ runId: "run_1",
177
+ timestamp: new Date(now - 1000).toISOString(),
178
+ findingsCount: 2,
179
+ byLens: { Accessibility: 2 },
180
+ }),
181
+ makeEntry({
182
+ runId: "run_2",
183
+ timestamp: new Date(now).toISOString(),
184
+ findingsCount: 2,
185
+ byLens: { Accessibility: 2 },
186
+ }),
187
+ ],
188
+ });
189
+
190
+ const resultWithRepeat = computeWeightedSummary();
191
+
192
+ // 1回しか登場しない場合と比較
193
+ setupMockCoverage({
194
+ entries: [
195
+ makeEntry({
196
+ runId: "run_1",
197
+ timestamp: new Date(now).toISOString(),
198
+ findingsCount: 2,
199
+ byLens: { Accessibility: 2 },
200
+ }),
201
+ ],
202
+ });
203
+ const resultSingle = computeWeightedSummary();
204
+
205
+ // 繰り返しありのほうが lens の重みが高いはず
206
+ expect(resultWithRepeat.byLens["Accessibility"]).toBeGreaterThan(resultSingle.byLens["Accessibility"]);
207
+ });
208
+
209
+ it("14日より古いエントリーは繰り返しカウントに含まれない", () => {
210
+ const now = Date.now();
211
+ const oldMs = 15 * 24 * 60 * 60 * 1000; // 15日前
212
+ setupMockCoverage({
213
+ entries: [
214
+ makeEntry({
215
+ runId: "run_old",
216
+ timestamp: new Date(now - oldMs).toISOString(),
217
+ findingsCount: 2,
218
+ byLens: { Security: 2 },
219
+ }),
220
+ makeEntry({
221
+ runId: "run_new",
222
+ timestamp: new Date(now).toISOString(),
223
+ findingsCount: 2,
224
+ byLens: { Security: 2 },
225
+ }),
226
+ ],
227
+ });
228
+
229
+ const result = computeWeightedSummary();
230
+ // 古いエントリーはウィンドウ外なのでボーナスなし(繰り返し回数=1 → bonus=1.0)
231
+ // ボーナスなしの場合: weight≈1.0*2 + 15日前のdecay*2 ≈ 2.06
232
+ expect(result.formatted).not.toContain("Repeated lenses");
233
+ });
234
+
235
+ it("繰り返しレンズが formatted に含まれる", () => {
236
+ const now = Date.now();
237
+ setupMockCoverage({
238
+ entries: [
239
+ makeEntry({ runId: "r1", timestamp: new Date(now - 1000).toISOString(), findingsCount: 1, byLens: { "UI design": 1 } }),
240
+ makeEntry({ runId: "r2", timestamp: new Date(now).toISOString(), findingsCount: 1, byLens: { "UI design": 1 } }),
241
+ ],
242
+ });
243
+
244
+ const result = computeWeightedSummary();
245
+ expect(result.formatted).toContain("Repeated lenses");
246
+ expect(result.formatted).toContain("UI design");
247
+ expect(result.formatted).toContain("×2");
248
+ });
249
+
170
250
  it("MAX_ENTRIES を超えると最新30件に切り捨てる", () => {
171
251
  const entries = Array.from({ length: 35 }, (_, i) =>
172
252
  makeEntry({
@@ -27,6 +27,9 @@ export interface WeightedSummary {
27
27
  const COVERAGE_PATH = path.join(process.cwd(), "coverage", "coverage.json");
28
28
  const MAX_ENTRIES = 30;
29
29
  const HALF_LIFE_DAYS = 7;
30
+ const REPETITION_WINDOW_DAYS = 14;
31
+ const REPETITION_BONUS = 0.005;
32
+ const REPETITION_EXPONENT = 3;
30
33
 
31
34
  export function loadCoverage(): Coverage {
32
35
  try {
@@ -98,6 +101,20 @@ export function computeWeightedSummary(): WeightedSummary {
98
101
 
99
102
  const now = Date.now();
100
103
  const halfLifeMs = HALF_LIFE_DAYS * 24 * 60 * 60 * 1000;
104
+ const windowMs = REPETITION_WINDOW_DAYS * 24 * 60 * 60 * 1000;
105
+
106
+ // 14日以内の run で各 lens/scenario が何回登場したかを数える
107
+ const lensRepeat: Record<string, number> = {};
108
+ const scenarioRepeat: Record<string, number> = {};
109
+ for (const entry of coverage.entries) {
110
+ if (now - new Date(entry.timestamp).getTime() > windowMs) continue;
111
+ for (const lens of Object.keys(entry.byLens)) {
112
+ lensRepeat[lens] = (lensRepeat[lens] ?? 0) + 1;
113
+ }
114
+ for (const title of Object.keys(entry.byScenario ?? {})) {
115
+ scenarioRepeat[title] = (scenarioRepeat[title] ?? 0) + 1;
116
+ }
117
+ }
101
118
 
102
119
  const byCategory: Record<string, number> = {};
103
120
  const byLens: Record<string, number> = {};
@@ -106,18 +123,21 @@ export function computeWeightedSummary(): WeightedSummary {
106
123
 
107
124
  for (const entry of coverage.entries) {
108
125
  const age = now - new Date(entry.timestamp).getTime();
109
- const weight = Math.pow(0.5, age / halfLifeMs);
126
+ const decay = Math.pow(0.5, age / halfLifeMs);
110
127
 
111
128
  for (const [cat, count] of Object.entries(entry.byCategory)) {
112
- byCategory[cat] = (byCategory[cat] ?? 0) + count * weight;
129
+ byCategory[cat] = (byCategory[cat] ?? 0) + count * decay;
113
130
  }
114
131
  for (const [lens, count] of Object.entries(entry.byLens)) {
115
- byLens[lens] = (byLens[lens] ?? 0) + count * weight;
132
+ // 繰り返し呼ばれるほど「必要」とみなしてボーナスを加算
133
+ const bonus = 1 + Math.pow((lensRepeat[lens] ?? 1) - 1, REPETITION_EXPONENT) * REPETITION_BONUS;
134
+ byLens[lens] = (byLens[lens] ?? 0) + count * decay * bonus;
116
135
  }
117
136
  for (const [title, count] of Object.entries(entry.byScenario ?? {})) {
118
- byScenario[title] = (byScenario[title] ?? 0) + count * weight;
137
+ const bonus = 1 + Math.pow((scenarioRepeat[title] ?? 1) - 1, REPETITION_EXPONENT) * REPETITION_BONUS;
138
+ byScenario[title] = (byScenario[title] ?? 0) + count * decay * bonus;
119
139
  }
120
- totalWeighted += entry.findingsCount * weight;
140
+ totalWeighted += entry.findingsCount * decay;
121
141
  }
122
142
 
123
143
  // 小数点1桁に丸める
@@ -142,12 +162,27 @@ export function computeWeightedSummary(): WeightedSummary {
142
162
  ? `By scenario: ${sortedScenario.map(([t, c]) => `"${t}" (${c})`).join(", ")}`
143
163
  : null;
144
164
 
165
+ const repeatedLenses = Object.entries(lensRepeat)
166
+ .filter(([, n]) => n > 1)
167
+ .sort((a, b) => b[1] - a[1])
168
+ .map(([l, n]) => `${l} (×${n})`);
169
+ const repeatedScenarios = Object.entries(scenarioRepeat)
170
+ .filter(([, n]) => n > 1)
171
+ .sort((a, b) => b[1] - a[1])
172
+ .map(([t, n]) => `"${t}" (×${n})`);
173
+
145
174
  const formatted = [
146
- `Coverage summary (half-life: ${HALF_LIFE_DAYS} days, ${coverage.entries.length} run(s) tracked):`,
175
+ `Coverage summary (half-life: ${HALF_LIFE_DAYS} days, repetition window: ${REPETITION_WINDOW_DAYS} days, ${coverage.entries.length} run(s) tracked):`,
147
176
  `Total weighted findings: ${totalWeighted}`,
148
177
  `By lens: ${sortedLens.map(([l, c]) => `${l} (${c})`).join(" > ") || "(none)"}`,
149
178
  scenarioLine,
150
179
  `By category: ${sortedCategory.map(([c, n]) => `${c} (${n})`).join(" > ") || "(none)"}`,
180
+ repeatedLenses.length > 0
181
+ ? `Repeated lenses (bonus applied): ${repeatedLenses.join(", ")}`
182
+ : null,
183
+ repeatedScenarios.length > 0
184
+ ? `Repeated scenarios (bonus applied): ${repeatedScenarios.join(", ")}`
185
+ : null,
151
186
  underrepresented.length > 0
152
187
  ? `Underrepresented lenses: ${underrepresented.join(", ")} — consider recruiting agents with these perspectives`
153
188
  : "All lenses have comparable coverage",
@@ -4,7 +4,7 @@ import { createMessageWithRetry } from "./agent-loop";
4
4
  import type { ProductSpec } from "./product-discovery";
5
5
 
6
6
  export interface OrgDesign {
7
- hrGuidance: string;
7
+ personaGuidance: string;
8
8
  }
9
9
 
10
10
  // Evaluation lenses always included regardless of app type / アプリ種別に関わらず常に含める観点
@@ -22,7 +22,7 @@ export const UNIVERSAL_LENSES = [
22
22
  ];
23
23
 
24
24
  export async function designOrg(spec: ProductSpec, client: LLMClient, model: string, coverageSummary?: string): Promise<OrgDesign> {
25
- console.log("\n[org-design] starting...");
25
+ console.log("\n[persona-policy] starting...");
26
26
 
27
27
  const response = await createMessageWithRetry(client, {
28
28
  model,
@@ -47,18 +47,36 @@ ${spec.features}
47
47
  ${spec.designContext ? `\n[Design Context]\n${spec.designContext}\n` : ""}${coverageSummary ? `\n[Coverage History]\n${coverageSummary}\nUse this to identify underrepresented perspectives and adjust the recruitment policy accordingly.\n` : ""}
48
48
  Please output the following:
49
49
 
50
+ ## App type classification
51
+ Classify this app as one of:
52
+ - "business": used in work contexts by employees with specific job roles (CRM, project management, HR tools, etc.)
53
+ - "consumer": used by individuals in personal contexts (personal finance, entertainment, health, productivity, etc.)
54
+ - "mixed": significant use in both contexts
55
+
50
56
  ## User types for this app
51
- (What kinds of users exist — roles, skill levels, usage scenarios)
57
+ (What kinds of users exist — described appropriately for the app type)
52
58
 
53
59
  ## Agent types to recruit (5–8 types)
54
- By job function, role, and technical literacy. Always include:
55
- - At least one UX/product designer persona (evaluates visual consistency, interaction patterns, HIG/Material compliance)
56
- - At least one product manager or business analyst persona (evaluates feature completeness, user journey clarity)
57
- - At least one target end-user with low technical literacy (first-time or reluctant user)
58
- - Domain-specific roles relevant to this app type
59
-
60
- ## Recruitment instructions for the HR agent
61
- (Concrete hiring/retirement guidelines based on the above emphasize persona diversity across technical skill levels, job functions, and design sensitivity)`,
60
+
61
+ **If business app:**
62
+ Recruit primarily by job role and function (e.g., sales rep, manager, admin).
63
+ Include personas with varying technical literacy within those roles.
64
+
65
+ **If consumer app:**
66
+ Recruit primarily as real end-users define by lifestyle, demographics, and usage context.
67
+ Focus on who actually uses this app in daily life, not job titles.
68
+ Examples for a subscription tracker: "budget-conscious student juggling streaming costs", "freelancer tracking SaaS tool expenses", "household manager reviewing family subscriptions".
69
+ Avoid professional/specialist titles (QA engineer, PM, auditor) as primary personas — these are not real users of this app.
70
+
71
+ **If mixed:**
72
+ Balance job-role personas and lifestyle-based end-user personas.
73
+
74
+ **Always include as supplement (1–2 personas regardless of app type):**
75
+ - 1 UX evaluator: focuses on visual consistency, interaction patterns, HIG/Material compliance
76
+ - 1 edge-case/accessibility evaluator: focuses on error handling, accessibility, stress scenarios
77
+
78
+ ## Recruitment instructions for the persona designer agent
79
+ (Concrete guidelines based on the above — emphasize that the majority of personas should reflect real users of this specific app, with expert evaluators as a minority supplement)`,
62
80
  },
63
81
  ],
64
82
  });
@@ -68,7 +86,7 @@ By job function, role, and technical literacy. Always include:
68
86
  .map((b) => b.text)
69
87
  .join("");
70
88
 
71
- const hrGuidance = `${text}
89
+ const personaGuidance = `${text}
72
90
 
73
91
  [Universal Evaluation Lenses]
74
92
  Include one of the following perspectives in each agent's persona to ensure diverse findings:
@@ -86,6 +104,6 @@ When recruiting UX/design-oriented agents, give them awareness of these standard
86
104
  - Jakob's Law: flag interactions that contradict conventions users expect from similar apps (e.g., swipe to delete, pull to refresh, hamburger menus)
87
105
  - Nielsen's heuristics: check for missing system status feedback, unclear error messages, lack of undo, and forcing users to recall rather than recognize`;
88
106
 
89
- console.log("[org-design] done");
90
- return { hrGuidance };
107
+ console.log("[persona-policy] done");
108
+ return { personaGuidance };
91
109
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@m8i-51/shoal",
3
- "version": "0.1.7",
3
+ "version": "0.1.9",
4
4
  "type": "module",
5
5
  "description": "Multi-agent web exploration framework — finds bugs, UX issues, and missing features by running AI agents against your app",
6
6
  "repository": {
package/run.ts CHANGED
@@ -394,10 +394,10 @@ ${productSpec.uiFeatures ? `\n[UI-Only Features]\nThese features exist in the UI
394
394
  }
395
395
 
396
396
  // ================================================================
397
- // HR agent
397
+ // Persona designer agent
398
398
  // ================================================================
399
399
 
400
- const HR_TOOLS: Anthropic.Tool[] = [
400
+ const PERSONA_DESIGNER_TOOLS: Anthropic.Tool[] = [
401
401
  {
402
402
  name: "get_agents",
403
403
  description: "Get the current list of registered agents. / 現在登録されているエージェント一覧を取得する",
@@ -445,24 +445,24 @@ const HR_TOOLS: Anthropic.Tool[] = [
445
445
  },
446
446
  ];
447
447
 
448
- async function runHRAgent(
448
+ async function runPersonaDesigner(
449
449
  productSpec: ProductSpec,
450
450
  orgGuidance: string,
451
451
  openIssues: { number: number; title: string; labels: string[] }[],
452
452
  scenarios: Scenario[],
453
453
  testAccounts: TestAccount[] = [],
454
454
  ): Promise<void> {
455
- console.log("\n[hr] starting...");
455
+ console.log("\n[persona-designer] starting...");
456
456
  const messages: Anthropic.MessageParam[] = [
457
- { role: "user", content: "Manage agent hiring and retirement." },
457
+ { role: "user", content: "Design and manage user personas for this run." },
458
458
  ];
459
459
 
460
460
  const accountContext = testAccounts.length > 0
461
461
  ? `\n[Available Test Accounts (one per role)]\n${testAccounts.map((a) => `- ${a.role}: ${a.email}`).join("\n")}\nWhen recruiting agents, match each persona's role to one of these accounts so they can operate with appropriate permissions.`
462
462
  : "";
463
463
 
464
- const systemPrompt = `You are the test agent manager for "${productSpec.appName}".
465
- You recruit and manage agents that simulate real users of the app.
464
+ const systemPrompt = `You are the persona designer for "${productSpec.appName}".
465
+ You create and manage test agents that simulate real users of the app.
466
466
 
467
467
  [Organization Design Guidelines]
468
468
  ${orgGuidance}${accountContext}
@@ -483,7 +483,7 @@ ${orgGuidance}${accountContext}
483
483
  model: defaultModel,
484
484
  max_tokens: 1024,
485
485
  system: systemPrompt,
486
- tools: HR_TOOLS,
486
+ tools: PERSONA_DESIGNER_TOOLS,
487
487
  messages,
488
488
  });
489
489
  messages.push({ role: "assistant", content: response.content });
@@ -496,14 +496,14 @@ ${orgGuidance}${accountContext}
496
496
  let result: unknown;
497
497
  if (toolUse.name === "get_coverage") {
498
498
  result = computeWeightedSummary().formatted;
499
- console.log(" [hr] coverage summary fetched");
499
+ console.log(" [persona-designer] coverage summary fetched");
500
500
  } else if (toolUse.name === "get_open_issues") {
501
501
  if (openIssues.length === 0) {
502
502
  result = "(no open issues — either GitHub is not configured or there are no known issues yet)";
503
503
  } else {
504
504
  result = openIssues.map((i) => `- #${i.number}: ${i.title} [${i.labels.join(", ")}]`).join("\n");
505
505
  }
506
- console.log(` [hr] open issues fetched (${openIssues.length})`);
506
+ console.log(` [persona-designer] open issues fetched (${openIssues.length})`);
507
507
  } else if (toolUse.name === "get_scenarios") {
508
508
  if (scenarios.length === 0) {
509
509
  result = "(no scenarios generated — all agents will use free-exploration mode)";
@@ -512,19 +512,19 @@ ${orgGuidance}${accountContext}
512
512
  `[${s.id}] ${s.title}\n Context: ${s.context}\n Goal: ${s.goal}\n Constraints: ${s.constraints}`
513
513
  ).join("\n\n");
514
514
  }
515
- console.log(` [hr] scenarios fetched (${scenarios.length})`);
515
+ console.log(` [persona-designer] scenarios fetched (${scenarios.length})`);
516
516
  } else if (toolUse.name === "get_agents") {
517
517
  const agents = loadAgents();
518
518
  result = agents.map((a) => ({ id: a.id, name: a.name, role: a.role, createdAt: a.createdAt }));
519
- console.log(` [hr] current agents: ${agents.length}`);
519
+ console.log(` [persona-designer] current agents: ${agents.length}`);
520
520
  } else if (toolUse.name === "add_agent") {
521
521
  const { name, role, persona } = toolUse.input as { name: string; role: string; persona: string };
522
522
  result = addAgent({ name, role, persona });
523
- console.log(` [hr] hired: ${name} (${role})`);
523
+ console.log(` [persona-designer] created: ${name} (${role})`);
524
524
  } else if (toolUse.name === "retire_agent") {
525
525
  const { agentId, reason } = toolUse.input as { agentId: string; reason: string };
526
526
  result = { success: retireAgent(agentId) };
527
- console.log(` [hr] retired: ${agentId} — ${reason}`);
527
+ console.log(` [persona-designer] retired: ${agentId} — ${reason}`);
528
528
  } else {
529
529
  result = { error: "unknown tool" };
530
530
  }
@@ -532,9 +532,9 @@ ${orgGuidance}${accountContext}
532
532
  }
533
533
  messages.push({ role: "user", content: toolResults });
534
534
  }
535
- console.log("[hr] done");
535
+ console.log("[persona-designer] done");
536
536
  } catch (e) {
537
- console.error("[hr] error:", e);
537
+ console.error("[persona-designer] error:", e);
538
538
  }
539
539
  }
540
540
 
@@ -1105,7 +1105,7 @@ async function main() {
1105
1105
  }
1106
1106
 
1107
1107
  // 4. HR agent
1108
- await runHRAgent(productSpec, orgDesign.hrGuidance, openIssues, scenarios, testAccounts);
1108
+ await runPersonaDesigner(productSpec, orgDesign.personaGuidance, openIssues, scenarios, testAccounts);
1109
1109
 
1110
1110
  // 5. load agents + closed issues
1111
1111
  const allAgents = loadAgents();