@m8i-51/shoal 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/framework/org-designer.ts +32 -14
- package/framework/triage.ts +3 -2
- package/package.json +1 -1
- package/run.ts +19 -19
|
@@ -4,7 +4,7 @@ import { createMessageWithRetry } from "./agent-loop";
|
|
|
4
4
|
import type { ProductSpec } from "./product-discovery";
|
|
5
5
|
|
|
6
6
|
export interface OrgDesign {
|
|
7
|
-
|
|
7
|
+
personaGuidance: string;
|
|
8
8
|
}
|
|
9
9
|
|
|
10
10
|
// Evaluation lenses always included regardless of app type / アプリ種別に関わらず常に含める観点
|
|
@@ -22,7 +22,7 @@ export const UNIVERSAL_LENSES = [
|
|
|
22
22
|
];
|
|
23
23
|
|
|
24
24
|
export async function designOrg(spec: ProductSpec, client: LLMClient, model: string, coverageSummary?: string): Promise<OrgDesign> {
|
|
25
|
-
console.log("\n[
|
|
25
|
+
console.log("\n[persona-policy] starting...");
|
|
26
26
|
|
|
27
27
|
const response = await createMessageWithRetry(client, {
|
|
28
28
|
model,
|
|
@@ -47,18 +47,36 @@ ${spec.features}
|
|
|
47
47
|
${spec.designContext ? `\n[Design Context]\n${spec.designContext}\n` : ""}${coverageSummary ? `\n[Coverage History]\n${coverageSummary}\nUse this to identify underrepresented perspectives and adjust the recruitment policy accordingly.\n` : ""}
|
|
48
48
|
Please output the following:
|
|
49
49
|
|
|
50
|
+
## App type classification
|
|
51
|
+
Classify this app as one of:
|
|
52
|
+
- "business": used in work contexts by employees with specific job roles (CRM, project management, HR tools, etc.)
|
|
53
|
+
- "consumer": used by individuals in personal contexts (personal finance, entertainment, health, productivity, etc.)
|
|
54
|
+
- "mixed": significant use in both contexts
|
|
55
|
+
|
|
50
56
|
## User types for this app
|
|
51
|
-
(What kinds of users exist —
|
|
57
|
+
(What kinds of users exist — described appropriately for the app type)
|
|
52
58
|
|
|
53
59
|
## Agent types to recruit (5–8 types)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
|
|
61
|
+
**If business app:**
|
|
62
|
+
Recruit primarily by job role and function (e.g., sales rep, manager, admin).
|
|
63
|
+
Include personas with varying technical literacy within those roles.
|
|
64
|
+
|
|
65
|
+
**If consumer app:**
|
|
66
|
+
Recruit primarily as real end-users — define by lifestyle, demographics, and usage context.
|
|
67
|
+
Focus on who actually uses this app in daily life, not job titles.
|
|
68
|
+
Examples for a subscription tracker: "budget-conscious student juggling streaming costs", "freelancer tracking SaaS tool expenses", "household manager reviewing family subscriptions".
|
|
69
|
+
Avoid professional/specialist titles (QA engineer, PM, auditor) as primary personas — these are not real users of this app.
|
|
70
|
+
|
|
71
|
+
**If mixed:**
|
|
72
|
+
Balance job-role personas and lifestyle-based end-user personas.
|
|
73
|
+
|
|
74
|
+
**Always include as supplement (1–2 personas regardless of app type):**
|
|
75
|
+
- 1 UX evaluator: focuses on visual consistency, interaction patterns, HIG/Material compliance
|
|
76
|
+
- 1 edge-case/accessibility evaluator: focuses on error handling, accessibility, stress scenarios
|
|
77
|
+
|
|
78
|
+
## Recruitment instructions for the persona designer agent
|
|
79
|
+
(Concrete guidelines based on the above — emphasize that the majority of personas should reflect real users of this specific app, with expert evaluators as a minority supplement)`,
|
|
62
80
|
},
|
|
63
81
|
],
|
|
64
82
|
});
|
|
@@ -68,7 +86,7 @@ By job function, role, and technical literacy. Always include:
|
|
|
68
86
|
.map((b) => b.text)
|
|
69
87
|
.join("");
|
|
70
88
|
|
|
71
|
-
const
|
|
89
|
+
const personaGuidance = `${text}
|
|
72
90
|
|
|
73
91
|
[Universal Evaluation Lenses]
|
|
74
92
|
Include one of the following perspectives in each agent's persona to ensure diverse findings:
|
|
@@ -86,6 +104,6 @@ When recruiting UX/design-oriented agents, give them awareness of these standard
|
|
|
86
104
|
- Jakob's Law: flag interactions that contradict conventions users expect from similar apps (e.g., swipe to delete, pull to refresh, hamburger menus)
|
|
87
105
|
- Nielsen's heuristics: check for missing system status feedback, unclear error messages, lack of undo, and forcing users to recall rather than recognize`;
|
|
88
106
|
|
|
89
|
-
console.log("[
|
|
90
|
-
return {
|
|
107
|
+
console.log("[persona-policy] done");
|
|
108
|
+
return { personaGuidance };
|
|
91
109
|
}
|
package/framework/triage.ts
CHANGED
|
@@ -163,11 +163,12 @@ Organize feedback collected by multiple agents and post it as GitHub Issues.
|
|
|
163
163
|
? `\n\n**Screenshots:**\n${screenshots.join("\n")}`
|
|
164
164
|
: "";
|
|
165
165
|
const fullBody = `**Category:** ${category}\n\n${body}${screenshotSection}\n\n---\n**Reported by:** ${mergedAgents.join(", ")}\n*This Issue was auto-generated by an AI triage agent*`;
|
|
166
|
-
const
|
|
166
|
+
const cleanTitle = title.replace(/^\[[^\]]+\]\s*/i, "");
|
|
167
|
+
const url = await postGitHubIssue(`[${category}] ${cleanTitle}`, fullBody, [category, "feedback-agent"], githubOptions);
|
|
167
168
|
mergedIds.forEach((id) => { pendingIds.delete(id); issuedIds.push(id); });
|
|
168
169
|
issuesCreated++;
|
|
169
170
|
result = { created: true, url, mergedCount: mergedIds.length };
|
|
170
|
-
console.log(` [triage] issue created: "${
|
|
171
|
+
console.log(` [triage] issue created: "[${category}] ${cleanTitle}" (merged ${mergedIds.length})`);
|
|
171
172
|
|
|
172
173
|
} else if (toolUse.name === "skip_finding") {
|
|
173
174
|
const { finding_id, reason } = toolUse.input as { finding_id: string; reason: string };
|
package/package.json
CHANGED
package/run.ts
CHANGED
|
@@ -394,10 +394,10 @@ ${productSpec.uiFeatures ? `\n[UI-Only Features]\nThese features exist in the UI
|
|
|
394
394
|
}
|
|
395
395
|
|
|
396
396
|
// ================================================================
|
|
397
|
-
//
|
|
397
|
+
// Persona designer agent
|
|
398
398
|
// ================================================================
|
|
399
399
|
|
|
400
|
-
const
|
|
400
|
+
const PERSONA_DESIGNER_TOOLS: Anthropic.Tool[] = [
|
|
401
401
|
{
|
|
402
402
|
name: "get_agents",
|
|
403
403
|
description: "Get the current list of registered agents. / 現在登録されているエージェント一覧を取得する",
|
|
@@ -445,24 +445,24 @@ const HR_TOOLS: Anthropic.Tool[] = [
|
|
|
445
445
|
},
|
|
446
446
|
];
|
|
447
447
|
|
|
448
|
-
async function
|
|
448
|
+
async function runPersonaDesigner(
|
|
449
449
|
productSpec: ProductSpec,
|
|
450
450
|
orgGuidance: string,
|
|
451
451
|
openIssues: { number: number; title: string; labels: string[] }[],
|
|
452
452
|
scenarios: Scenario[],
|
|
453
453
|
testAccounts: TestAccount[] = [],
|
|
454
454
|
): Promise<void> {
|
|
455
|
-
console.log("\n[
|
|
455
|
+
console.log("\n[persona-designer] starting...");
|
|
456
456
|
const messages: Anthropic.MessageParam[] = [
|
|
457
|
-
{ role: "user", content: "
|
|
457
|
+
{ role: "user", content: "Design and manage user personas for this run." },
|
|
458
458
|
];
|
|
459
459
|
|
|
460
460
|
const accountContext = testAccounts.length > 0
|
|
461
461
|
? `\n[Available Test Accounts (one per role)]\n${testAccounts.map((a) => `- ${a.role}: ${a.email}`).join("\n")}\nWhen recruiting agents, match each persona's role to one of these accounts so they can operate with appropriate permissions.`
|
|
462
462
|
: "";
|
|
463
463
|
|
|
464
|
-
const systemPrompt = `You are the
|
|
465
|
-
You
|
|
464
|
+
const systemPrompt = `You are the persona designer for "${productSpec.appName}".
|
|
465
|
+
You create and manage test agents that simulate real users of the app.
|
|
466
466
|
|
|
467
467
|
[Organization Design Guidelines]
|
|
468
468
|
${orgGuidance}${accountContext}
|
|
@@ -483,7 +483,7 @@ ${orgGuidance}${accountContext}
|
|
|
483
483
|
model: defaultModel,
|
|
484
484
|
max_tokens: 1024,
|
|
485
485
|
system: systemPrompt,
|
|
486
|
-
tools:
|
|
486
|
+
tools: PERSONA_DESIGNER_TOOLS,
|
|
487
487
|
messages,
|
|
488
488
|
});
|
|
489
489
|
messages.push({ role: "assistant", content: response.content });
|
|
@@ -496,14 +496,14 @@ ${orgGuidance}${accountContext}
|
|
|
496
496
|
let result: unknown;
|
|
497
497
|
if (toolUse.name === "get_coverage") {
|
|
498
498
|
result = computeWeightedSummary().formatted;
|
|
499
|
-
console.log(" [
|
|
499
|
+
console.log(" [persona-designer] coverage summary fetched");
|
|
500
500
|
} else if (toolUse.name === "get_open_issues") {
|
|
501
501
|
if (openIssues.length === 0) {
|
|
502
502
|
result = "(no open issues — either GitHub is not configured or there are no known issues yet)";
|
|
503
503
|
} else {
|
|
504
504
|
result = openIssues.map((i) => `- #${i.number}: ${i.title} [${i.labels.join(", ")}]`).join("\n");
|
|
505
505
|
}
|
|
506
|
-
console.log(` [
|
|
506
|
+
console.log(` [persona-designer] open issues fetched (${openIssues.length})`);
|
|
507
507
|
} else if (toolUse.name === "get_scenarios") {
|
|
508
508
|
if (scenarios.length === 0) {
|
|
509
509
|
result = "(no scenarios generated — all agents will use free-exploration mode)";
|
|
@@ -512,19 +512,19 @@ ${orgGuidance}${accountContext}
|
|
|
512
512
|
`[${s.id}] ${s.title}\n Context: ${s.context}\n Goal: ${s.goal}\n Constraints: ${s.constraints}`
|
|
513
513
|
).join("\n\n");
|
|
514
514
|
}
|
|
515
|
-
console.log(` [
|
|
515
|
+
console.log(` [persona-designer] scenarios fetched (${scenarios.length})`);
|
|
516
516
|
} else if (toolUse.name === "get_agents") {
|
|
517
517
|
const agents = loadAgents();
|
|
518
518
|
result = agents.map((a) => ({ id: a.id, name: a.name, role: a.role, createdAt: a.createdAt }));
|
|
519
|
-
console.log(` [
|
|
519
|
+
console.log(` [persona-designer] current agents: ${agents.length}`);
|
|
520
520
|
} else if (toolUse.name === "add_agent") {
|
|
521
521
|
const { name, role, persona } = toolUse.input as { name: string; role: string; persona: string };
|
|
522
522
|
result = addAgent({ name, role, persona });
|
|
523
|
-
console.log(` [
|
|
523
|
+
console.log(` [persona-designer] created: ${name} (${role})`);
|
|
524
524
|
} else if (toolUse.name === "retire_agent") {
|
|
525
525
|
const { agentId, reason } = toolUse.input as { agentId: string; reason: string };
|
|
526
526
|
result = { success: retireAgent(agentId) };
|
|
527
|
-
console.log(` [
|
|
527
|
+
console.log(` [persona-designer] retired: ${agentId} — ${reason}`);
|
|
528
528
|
} else {
|
|
529
529
|
result = { error: "unknown tool" };
|
|
530
530
|
}
|
|
@@ -532,9 +532,9 @@ ${orgGuidance}${accountContext}
|
|
|
532
532
|
}
|
|
533
533
|
messages.push({ role: "user", content: toolResults });
|
|
534
534
|
}
|
|
535
|
-
console.log("[
|
|
535
|
+
console.log("[persona-designer] done");
|
|
536
536
|
} catch (e) {
|
|
537
|
-
console.error("[
|
|
537
|
+
console.error("[persona-designer] error:", e);
|
|
538
538
|
}
|
|
539
539
|
}
|
|
540
540
|
|
|
@@ -691,7 +691,7 @@ async function executeBrowserTool(
|
|
|
691
691
|
const { path: navPath } = input as { path: string };
|
|
692
692
|
await saveSnapshotBeforeAction(page, observation);
|
|
693
693
|
await page.goto(`${BASE_URL}${navPath}`, { waitUntil: "networkidle" });
|
|
694
|
-
await page.waitForTimeout(
|
|
694
|
+
await page.waitForTimeout(3000);
|
|
695
695
|
screenshot = await takeScreenshot(page, `navigate_${navPath.replace(/\//g, "_")}`);
|
|
696
696
|
resultText = `Navigated to ${navPath}`;
|
|
697
697
|
break;
|
|
@@ -925,7 +925,7 @@ ${productSpec.designContext ? `\n[Design Context]\n${productSpec.designContext}\
|
|
|
925
925
|
: ""}`;
|
|
926
926
|
|
|
927
927
|
await page.goto(BASE_URL, { waitUntil: "networkidle" });
|
|
928
|
-
await page.waitForTimeout(
|
|
928
|
+
await page.waitForTimeout(5000);
|
|
929
929
|
const initialScreenshot = await takeScreenshot(page, "initial");
|
|
930
930
|
|
|
931
931
|
const messages: Anthropic.MessageParam[] = [
|
|
@@ -1105,7 +1105,7 @@ async function main() {
|
|
|
1105
1105
|
}
|
|
1106
1106
|
|
|
1107
1107
|
// 4. HR agent
|
|
1108
|
-
await
|
|
1108
|
+
await runPersonaDesigner(productSpec, orgDesign.personaGuidance, openIssues, scenarios, testAccounts);
|
|
1109
1109
|
|
|
1110
1110
|
// 5. load agents + closed issues
|
|
1111
1111
|
const allAgents = loadAgents();
|