@m8i-51/shoal 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/framework/__tests__/coverage.test.ts +80 -0
- package/framework/coverage.ts +41 -6
- package/framework/org-designer.ts +32 -14
- package/package.json +1 -1
- package/run.ts +17 -17
|
@@ -167,6 +167,86 @@ describe("computeWeightedSummary", () => {
|
|
|
167
167
|
expect(result.formatted).toContain("By scenario");
|
|
168
168
|
});
|
|
169
169
|
|
|
170
|
+
it("14日以内に同じレンズが複数 run に登場するとボーナスが乗る", () => {
|
|
171
|
+
const now = Date.now();
|
|
172
|
+
// 同じ Accessibility レンズが2回登場 → bonus = 1 + (2-1)*0.5 = 1.5
|
|
173
|
+
setupMockCoverage({
|
|
174
|
+
entries: [
|
|
175
|
+
makeEntry({
|
|
176
|
+
runId: "run_1",
|
|
177
|
+
timestamp: new Date(now - 1000).toISOString(),
|
|
178
|
+
findingsCount: 2,
|
|
179
|
+
byLens: { Accessibility: 2 },
|
|
180
|
+
}),
|
|
181
|
+
makeEntry({
|
|
182
|
+
runId: "run_2",
|
|
183
|
+
timestamp: new Date(now).toISOString(),
|
|
184
|
+
findingsCount: 2,
|
|
185
|
+
byLens: { Accessibility: 2 },
|
|
186
|
+
}),
|
|
187
|
+
],
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
const resultWithRepeat = computeWeightedSummary();
|
|
191
|
+
|
|
192
|
+
// 1回しか登場しない場合と比較
|
|
193
|
+
setupMockCoverage({
|
|
194
|
+
entries: [
|
|
195
|
+
makeEntry({
|
|
196
|
+
runId: "run_1",
|
|
197
|
+
timestamp: new Date(now).toISOString(),
|
|
198
|
+
findingsCount: 2,
|
|
199
|
+
byLens: { Accessibility: 2 },
|
|
200
|
+
}),
|
|
201
|
+
],
|
|
202
|
+
});
|
|
203
|
+
const resultSingle = computeWeightedSummary();
|
|
204
|
+
|
|
205
|
+
// 繰り返しありのほうが lens の重みが高いはず
|
|
206
|
+
expect(resultWithRepeat.byLens["Accessibility"]).toBeGreaterThan(resultSingle.byLens["Accessibility"]);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it("14日より古いエントリーは繰り返しカウントに含まれない", () => {
|
|
210
|
+
const now = Date.now();
|
|
211
|
+
const oldMs = 15 * 24 * 60 * 60 * 1000; // 15日前
|
|
212
|
+
setupMockCoverage({
|
|
213
|
+
entries: [
|
|
214
|
+
makeEntry({
|
|
215
|
+
runId: "run_old",
|
|
216
|
+
timestamp: new Date(now - oldMs).toISOString(),
|
|
217
|
+
findingsCount: 2,
|
|
218
|
+
byLens: { Security: 2 },
|
|
219
|
+
}),
|
|
220
|
+
makeEntry({
|
|
221
|
+
runId: "run_new",
|
|
222
|
+
timestamp: new Date(now).toISOString(),
|
|
223
|
+
findingsCount: 2,
|
|
224
|
+
byLens: { Security: 2 },
|
|
225
|
+
}),
|
|
226
|
+
],
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
const result = computeWeightedSummary();
|
|
230
|
+
// 古いエントリーはウィンドウ外なのでボーナスなし(繰り返し回数=1 → bonus=1.0)
|
|
231
|
+
// ボーナスなしの場合: weight≈1.0*2 + 15日前のdecay*2 ≈ 2.06
|
|
232
|
+
expect(result.formatted).not.toContain("Repeated lenses");
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
it("繰り返しレンズが formatted に含まれる", () => {
|
|
236
|
+
const now = Date.now();
|
|
237
|
+
setupMockCoverage({
|
|
238
|
+
entries: [
|
|
239
|
+
makeEntry({ runId: "r1", timestamp: new Date(now - 1000).toISOString(), findingsCount: 1, byLens: { "UI design": 1 } }),
|
|
240
|
+
makeEntry({ runId: "r2", timestamp: new Date(now).toISOString(), findingsCount: 1, byLens: { "UI design": 1 } }),
|
|
241
|
+
],
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
const result = computeWeightedSummary();
|
|
245
|
+
expect(result.formatted).toContain("Repeated lenses");
|
|
246
|
+
expect(result.formatted).toContain("UI design");
|
|
247
|
+
expect(result.formatted).toContain("×2");
|
|
248
|
+
});
|
|
249
|
+
|
|
170
250
|
it("MAX_ENTRIES を超えると最新30件に切り捨てる", () => {
|
|
171
251
|
const entries = Array.from({ length: 35 }, (_, i) =>
|
|
172
252
|
makeEntry({
|
package/framework/coverage.ts
CHANGED
|
@@ -27,6 +27,9 @@ export interface WeightedSummary {
|
|
|
27
27
|
const COVERAGE_PATH = path.join(process.cwd(), "coverage", "coverage.json");
|
|
28
28
|
const MAX_ENTRIES = 30;
|
|
29
29
|
const HALF_LIFE_DAYS = 7;
|
|
30
|
+
const REPETITION_WINDOW_DAYS = 14;
|
|
31
|
+
const REPETITION_BONUS = 0.005;
|
|
32
|
+
const REPETITION_EXPONENT = 3;
|
|
30
33
|
|
|
31
34
|
export function loadCoverage(): Coverage {
|
|
32
35
|
try {
|
|
@@ -98,6 +101,20 @@ export function computeWeightedSummary(): WeightedSummary {
|
|
|
98
101
|
|
|
99
102
|
const now = Date.now();
|
|
100
103
|
const halfLifeMs = HALF_LIFE_DAYS * 24 * 60 * 60 * 1000;
|
|
104
|
+
const windowMs = REPETITION_WINDOW_DAYS * 24 * 60 * 60 * 1000;
|
|
105
|
+
|
|
106
|
+
// 14日以内の run で各 lens/scenario が何回登場したかを数える
|
|
107
|
+
const lensRepeat: Record<string, number> = {};
|
|
108
|
+
const scenarioRepeat: Record<string, number> = {};
|
|
109
|
+
for (const entry of coverage.entries) {
|
|
110
|
+
if (now - new Date(entry.timestamp).getTime() > windowMs) continue;
|
|
111
|
+
for (const lens of Object.keys(entry.byLens)) {
|
|
112
|
+
lensRepeat[lens] = (lensRepeat[lens] ?? 0) + 1;
|
|
113
|
+
}
|
|
114
|
+
for (const title of Object.keys(entry.byScenario ?? {})) {
|
|
115
|
+
scenarioRepeat[title] = (scenarioRepeat[title] ?? 0) + 1;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
101
118
|
|
|
102
119
|
const byCategory: Record<string, number> = {};
|
|
103
120
|
const byLens: Record<string, number> = {};
|
|
@@ -106,18 +123,21 @@ export function computeWeightedSummary(): WeightedSummary {
|
|
|
106
123
|
|
|
107
124
|
for (const entry of coverage.entries) {
|
|
108
125
|
const age = now - new Date(entry.timestamp).getTime();
|
|
109
|
-
const
|
|
126
|
+
const decay = Math.pow(0.5, age / halfLifeMs);
|
|
110
127
|
|
|
111
128
|
for (const [cat, count] of Object.entries(entry.byCategory)) {
|
|
112
|
-
byCategory[cat] = (byCategory[cat] ?? 0) + count *
|
|
129
|
+
byCategory[cat] = (byCategory[cat] ?? 0) + count * decay;
|
|
113
130
|
}
|
|
114
131
|
for (const [lens, count] of Object.entries(entry.byLens)) {
|
|
115
|
-
|
|
132
|
+
// 繰り返し呼ばれるほど「必要」とみなしてボーナスを加算
|
|
133
|
+
const bonus = 1 + Math.pow((lensRepeat[lens] ?? 1) - 1, REPETITION_EXPONENT) * REPETITION_BONUS;
|
|
134
|
+
byLens[lens] = (byLens[lens] ?? 0) + count * decay * bonus;
|
|
116
135
|
}
|
|
117
136
|
for (const [title, count] of Object.entries(entry.byScenario ?? {})) {
|
|
118
|
-
|
|
137
|
+
const bonus = 1 + Math.pow((scenarioRepeat[title] ?? 1) - 1, REPETITION_EXPONENT) * REPETITION_BONUS;
|
|
138
|
+
byScenario[title] = (byScenario[title] ?? 0) + count * decay * bonus;
|
|
119
139
|
}
|
|
120
|
-
totalWeighted += entry.findingsCount *
|
|
140
|
+
totalWeighted += entry.findingsCount * decay;
|
|
121
141
|
}
|
|
122
142
|
|
|
123
143
|
// 小数点1桁に丸める
|
|
@@ -142,12 +162,27 @@ export function computeWeightedSummary(): WeightedSummary {
|
|
|
142
162
|
? `By scenario: ${sortedScenario.map(([t, c]) => `"${t}" (${c})`).join(", ")}`
|
|
143
163
|
: null;
|
|
144
164
|
|
|
165
|
+
const repeatedLenses = Object.entries(lensRepeat)
|
|
166
|
+
.filter(([, n]) => n > 1)
|
|
167
|
+
.sort((a, b) => b[1] - a[1])
|
|
168
|
+
.map(([l, n]) => `${l} (×${n})`);
|
|
169
|
+
const repeatedScenarios = Object.entries(scenarioRepeat)
|
|
170
|
+
.filter(([, n]) => n > 1)
|
|
171
|
+
.sort((a, b) => b[1] - a[1])
|
|
172
|
+
.map(([t, n]) => `"${t}" (×${n})`);
|
|
173
|
+
|
|
145
174
|
const formatted = [
|
|
146
|
-
`Coverage summary (half-life: ${HALF_LIFE_DAYS} days, ${coverage.entries.length} run(s) tracked):`,
|
|
175
|
+
`Coverage summary (half-life: ${HALF_LIFE_DAYS} days, repetition window: ${REPETITION_WINDOW_DAYS} days, ${coverage.entries.length} run(s) tracked):`,
|
|
147
176
|
`Total weighted findings: ${totalWeighted}`,
|
|
148
177
|
`By lens: ${sortedLens.map(([l, c]) => `${l} (${c})`).join(" > ") || "(none)"}`,
|
|
149
178
|
scenarioLine,
|
|
150
179
|
`By category: ${sortedCategory.map(([c, n]) => `${c} (${n})`).join(" > ") || "(none)"}`,
|
|
180
|
+
repeatedLenses.length > 0
|
|
181
|
+
? `Repeated lenses (bonus applied): ${repeatedLenses.join(", ")}`
|
|
182
|
+
: null,
|
|
183
|
+
repeatedScenarios.length > 0
|
|
184
|
+
? `Repeated scenarios (bonus applied): ${repeatedScenarios.join(", ")}`
|
|
185
|
+
: null,
|
|
151
186
|
underrepresented.length > 0
|
|
152
187
|
? `Underrepresented lenses: ${underrepresented.join(", ")} — consider recruiting agents with these perspectives`
|
|
153
188
|
: "All lenses have comparable coverage",
|
|
@@ -4,7 +4,7 @@ import { createMessageWithRetry } from "./agent-loop";
|
|
|
4
4
|
import type { ProductSpec } from "./product-discovery";
|
|
5
5
|
|
|
6
6
|
export interface OrgDesign {
|
|
7
|
-
|
|
7
|
+
personaGuidance: string;
|
|
8
8
|
}
|
|
9
9
|
|
|
10
10
|
// Evaluation lenses always included regardless of app type / アプリ種別に関わらず常に含める観点
|
|
@@ -22,7 +22,7 @@ export const UNIVERSAL_LENSES = [
|
|
|
22
22
|
];
|
|
23
23
|
|
|
24
24
|
export async function designOrg(spec: ProductSpec, client: LLMClient, model: string, coverageSummary?: string): Promise<OrgDesign> {
|
|
25
|
-
console.log("\n[
|
|
25
|
+
console.log("\n[persona-policy] starting...");
|
|
26
26
|
|
|
27
27
|
const response = await createMessageWithRetry(client, {
|
|
28
28
|
model,
|
|
@@ -47,18 +47,36 @@ ${spec.features}
|
|
|
47
47
|
${spec.designContext ? `\n[Design Context]\n${spec.designContext}\n` : ""}${coverageSummary ? `\n[Coverage History]\n${coverageSummary}\nUse this to identify underrepresented perspectives and adjust the recruitment policy accordingly.\n` : ""}
|
|
48
48
|
Please output the following:
|
|
49
49
|
|
|
50
|
+
## App type classification
|
|
51
|
+
Classify this app as one of:
|
|
52
|
+
- "business": used in work contexts by employees with specific job roles (CRM, project management, HR tools, etc.)
|
|
53
|
+
- "consumer": used by individuals in personal contexts (personal finance, entertainment, health, productivity, etc.)
|
|
54
|
+
- "mixed": significant use in both contexts
|
|
55
|
+
|
|
50
56
|
## User types for this app
|
|
51
|
-
(What kinds of users exist —
|
|
57
|
+
(What kinds of users exist — described appropriately for the app type)
|
|
52
58
|
|
|
53
59
|
## Agent types to recruit (5–8 types)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
|
|
61
|
+
**If business app:**
|
|
62
|
+
Recruit primarily by job role and function (e.g., sales rep, manager, admin).
|
|
63
|
+
Include personas with varying technical literacy within those roles.
|
|
64
|
+
|
|
65
|
+
**If consumer app:**
|
|
66
|
+
Recruit primarily as real end-users — define by lifestyle, demographics, and usage context.
|
|
67
|
+
Focus on who actually uses this app in daily life, not job titles.
|
|
68
|
+
Examples for a subscription tracker: "budget-conscious student juggling streaming costs", "freelancer tracking SaaS tool expenses", "household manager reviewing family subscriptions".
|
|
69
|
+
Avoid professional/specialist titles (QA engineer, PM, auditor) as primary personas — these are not real users of this app.
|
|
70
|
+
|
|
71
|
+
**If mixed:**
|
|
72
|
+
Balance job-role personas and lifestyle-based end-user personas.
|
|
73
|
+
|
|
74
|
+
**Always include as supplement (1–2 personas regardless of app type):**
|
|
75
|
+
- 1 UX evaluator: focuses on visual consistency, interaction patterns, HIG/Material compliance
|
|
76
|
+
- 1 edge-case/accessibility evaluator: focuses on error handling, accessibility, stress scenarios
|
|
77
|
+
|
|
78
|
+
## Recruitment instructions for the persona designer agent
|
|
79
|
+
(Concrete guidelines based on the above — emphasize that the majority of personas should reflect real users of this specific app, with expert evaluators as a minority supplement)`,
|
|
62
80
|
},
|
|
63
81
|
],
|
|
64
82
|
});
|
|
@@ -68,7 +86,7 @@ By job function, role, and technical literacy. Always include:
|
|
|
68
86
|
.map((b) => b.text)
|
|
69
87
|
.join("");
|
|
70
88
|
|
|
71
|
-
const
|
|
89
|
+
const personaGuidance = `${text}
|
|
72
90
|
|
|
73
91
|
[Universal Evaluation Lenses]
|
|
74
92
|
Include one of the following perspectives in each agent's persona to ensure diverse findings:
|
|
@@ -86,6 +104,6 @@ When recruiting UX/design-oriented agents, give them awareness of these standard
|
|
|
86
104
|
- Jakob's Law: flag interactions that contradict conventions users expect from similar apps (e.g., swipe to delete, pull to refresh, hamburger menus)
|
|
87
105
|
- Nielsen's heuristics: check for missing system status feedback, unclear error messages, lack of undo, and forcing users to recall rather than recognize`;
|
|
88
106
|
|
|
89
|
-
console.log("[
|
|
90
|
-
return {
|
|
107
|
+
console.log("[persona-policy] done");
|
|
108
|
+
return { personaGuidance };
|
|
91
109
|
}
|
package/package.json
CHANGED
package/run.ts
CHANGED
|
@@ -394,10 +394,10 @@ ${productSpec.uiFeatures ? `\n[UI-Only Features]\nThese features exist in the UI
|
|
|
394
394
|
}
|
|
395
395
|
|
|
396
396
|
// ================================================================
|
|
397
|
-
//
|
|
397
|
+
// Persona designer agent
|
|
398
398
|
// ================================================================
|
|
399
399
|
|
|
400
|
-
const
|
|
400
|
+
const PERSONA_DESIGNER_TOOLS: Anthropic.Tool[] = [
|
|
401
401
|
{
|
|
402
402
|
name: "get_agents",
|
|
403
403
|
description: "Get the current list of registered agents. / 現在登録されているエージェント一覧を取得する",
|
|
@@ -445,24 +445,24 @@ const HR_TOOLS: Anthropic.Tool[] = [
|
|
|
445
445
|
},
|
|
446
446
|
];
|
|
447
447
|
|
|
448
|
-
async function
|
|
448
|
+
async function runPersonaDesigner(
|
|
449
449
|
productSpec: ProductSpec,
|
|
450
450
|
orgGuidance: string,
|
|
451
451
|
openIssues: { number: number; title: string; labels: string[] }[],
|
|
452
452
|
scenarios: Scenario[],
|
|
453
453
|
testAccounts: TestAccount[] = [],
|
|
454
454
|
): Promise<void> {
|
|
455
|
-
console.log("\n[
|
|
455
|
+
console.log("\n[persona-designer] starting...");
|
|
456
456
|
const messages: Anthropic.MessageParam[] = [
|
|
457
|
-
{ role: "user", content: "
|
|
457
|
+
{ role: "user", content: "Design and manage user personas for this run." },
|
|
458
458
|
];
|
|
459
459
|
|
|
460
460
|
const accountContext = testAccounts.length > 0
|
|
461
461
|
? `\n[Available Test Accounts (one per role)]\n${testAccounts.map((a) => `- ${a.role}: ${a.email}`).join("\n")}\nWhen recruiting agents, match each persona's role to one of these accounts so they can operate with appropriate permissions.`
|
|
462
462
|
: "";
|
|
463
463
|
|
|
464
|
-
const systemPrompt = `You are the
|
|
465
|
-
You
|
|
464
|
+
const systemPrompt = `You are the persona designer for "${productSpec.appName}".
|
|
465
|
+
You create and manage test agents that simulate real users of the app.
|
|
466
466
|
|
|
467
467
|
[Organization Design Guidelines]
|
|
468
468
|
${orgGuidance}${accountContext}
|
|
@@ -483,7 +483,7 @@ ${orgGuidance}${accountContext}
|
|
|
483
483
|
model: defaultModel,
|
|
484
484
|
max_tokens: 1024,
|
|
485
485
|
system: systemPrompt,
|
|
486
|
-
tools:
|
|
486
|
+
tools: PERSONA_DESIGNER_TOOLS,
|
|
487
487
|
messages,
|
|
488
488
|
});
|
|
489
489
|
messages.push({ role: "assistant", content: response.content });
|
|
@@ -496,14 +496,14 @@ ${orgGuidance}${accountContext}
|
|
|
496
496
|
let result: unknown;
|
|
497
497
|
if (toolUse.name === "get_coverage") {
|
|
498
498
|
result = computeWeightedSummary().formatted;
|
|
499
|
-
console.log(" [
|
|
499
|
+
console.log(" [persona-designer] coverage summary fetched");
|
|
500
500
|
} else if (toolUse.name === "get_open_issues") {
|
|
501
501
|
if (openIssues.length === 0) {
|
|
502
502
|
result = "(no open issues — either GitHub is not configured or there are no known issues yet)";
|
|
503
503
|
} else {
|
|
504
504
|
result = openIssues.map((i) => `- #${i.number}: ${i.title} [${i.labels.join(", ")}]`).join("\n");
|
|
505
505
|
}
|
|
506
|
-
console.log(` [
|
|
506
|
+
console.log(` [persona-designer] open issues fetched (${openIssues.length})`);
|
|
507
507
|
} else if (toolUse.name === "get_scenarios") {
|
|
508
508
|
if (scenarios.length === 0) {
|
|
509
509
|
result = "(no scenarios generated — all agents will use free-exploration mode)";
|
|
@@ -512,19 +512,19 @@ ${orgGuidance}${accountContext}
|
|
|
512
512
|
`[${s.id}] ${s.title}\n Context: ${s.context}\n Goal: ${s.goal}\n Constraints: ${s.constraints}`
|
|
513
513
|
).join("\n\n");
|
|
514
514
|
}
|
|
515
|
-
console.log(` [
|
|
515
|
+
console.log(` [persona-designer] scenarios fetched (${scenarios.length})`);
|
|
516
516
|
} else if (toolUse.name === "get_agents") {
|
|
517
517
|
const agents = loadAgents();
|
|
518
518
|
result = agents.map((a) => ({ id: a.id, name: a.name, role: a.role, createdAt: a.createdAt }));
|
|
519
|
-
console.log(` [
|
|
519
|
+
console.log(` [persona-designer] current agents: ${agents.length}`);
|
|
520
520
|
} else if (toolUse.name === "add_agent") {
|
|
521
521
|
const { name, role, persona } = toolUse.input as { name: string; role: string; persona: string };
|
|
522
522
|
result = addAgent({ name, role, persona });
|
|
523
|
-
console.log(` [
|
|
523
|
+
console.log(` [persona-designer] created: ${name} (${role})`);
|
|
524
524
|
} else if (toolUse.name === "retire_agent") {
|
|
525
525
|
const { agentId, reason } = toolUse.input as { agentId: string; reason: string };
|
|
526
526
|
result = { success: retireAgent(agentId) };
|
|
527
|
-
console.log(` [
|
|
527
|
+
console.log(` [persona-designer] retired: ${agentId} — ${reason}`);
|
|
528
528
|
} else {
|
|
529
529
|
result = { error: "unknown tool" };
|
|
530
530
|
}
|
|
@@ -532,9 +532,9 @@ ${orgGuidance}${accountContext}
|
|
|
532
532
|
}
|
|
533
533
|
messages.push({ role: "user", content: toolResults });
|
|
534
534
|
}
|
|
535
|
-
console.log("[
|
|
535
|
+
console.log("[persona-designer] done");
|
|
536
536
|
} catch (e) {
|
|
537
|
-
console.error("[
|
|
537
|
+
console.error("[persona-designer] error:", e);
|
|
538
538
|
}
|
|
539
539
|
}
|
|
540
540
|
|
|
@@ -1105,7 +1105,7 @@ async function main() {
|
|
|
1105
1105
|
}
|
|
1106
1106
|
|
|
1107
1107
|
// 4. HR agent
|
|
1108
|
-
await
|
|
1108
|
+
await runPersonaDesigner(productSpec, orgDesign.personaGuidance, openIssues, scenarios, testAccounts);
|
|
1109
1109
|
|
|
1110
1110
|
// 5. load agents + closed issues
|
|
1111
1111
|
const allAgents = loadAgents();
|