@oberion/wildo 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-JXNI22FR.js +267 -0
- package/dist/chunk-TMJX67JD.js +2 -0
- package/dist/chunk-TOAEOZEP.js +61 -0
- package/dist/cli.js +10 -0
- package/dist/index.d.ts +232 -0
- package/dist/index.js +1 -0
- package/dist/server.js +1 -0
- package/dist/setup-ONOARWXI.js +14 -0
- package/dist/tui-MIW7YP2V.js +1 -0
- package/package.json +45 -0
- package/personas/architect/playbook.md +74 -0
- package/personas/developer/playbook.md +19 -0
- package/personas/reporter/playbook.md +38 -0
- package/personas/reviewer/playbook.md +109 -0
- package/personas/tester/playbook.md +19 -0
- package/static/index.html +739 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
type PipelineMode = "greenfield" | "feature" | "bugfix";
|
|
2
|
+
type ReviewVerdict = "PASS" | "FAIL";
|
|
3
|
+
type EscalationType = "spec_issue" | "blocked" | "user";
|
|
4
|
+
interface ReviewResult {
|
|
5
|
+
verdict: ReviewVerdict;
|
|
6
|
+
rawOutput: string;
|
|
7
|
+
blockingIssues: string[];
|
|
8
|
+
roundNumber: number;
|
|
9
|
+
}
|
|
10
|
+
interface EscalationReport {
|
|
11
|
+
escalationType: EscalationType;
|
|
12
|
+
phase: string;
|
|
13
|
+
roundsCompleted: number;
|
|
14
|
+
history: ReviewResult[];
|
|
15
|
+
diagnosis: string;
|
|
16
|
+
}
|
|
17
|
+
interface SessionUsage {
|
|
18
|
+
agentName: string;
|
|
19
|
+
contextWindow: number;
|
|
20
|
+
totalInputTokens: number;
|
|
21
|
+
totalOutputTokens: number;
|
|
22
|
+
seenMessageIds: Set<string>;
|
|
23
|
+
}
|
|
24
|
+
type AgentRole = "architect" | "reviewer" | "reporter" | "developer" | "tester";
|
|
25
|
+
interface AgentConfig {
|
|
26
|
+
systemPrompt: string;
|
|
27
|
+
tools: string[];
|
|
28
|
+
model: string;
|
|
29
|
+
}
|
|
30
|
+
type RunStatus = "running" | "completed" | "stopped" | "failed" | "crashed";
|
|
31
|
+
interface RunManifest {
|
|
32
|
+
runId: string;
|
|
33
|
+
mode: PipelineMode;
|
|
34
|
+
branch: string;
|
|
35
|
+
language: string;
|
|
36
|
+
status: RunStatus;
|
|
37
|
+
workingDir: string;
|
|
38
|
+
requirement: string;
|
|
39
|
+
acknowledged?: boolean;
|
|
40
|
+
}
|
|
41
|
+
interface CleanupDecision {
|
|
42
|
+
runId: string;
|
|
43
|
+
action: "keep" | "discard";
|
|
44
|
+
branch?: string;
|
|
45
|
+
}
|
|
46
|
+
type AuthMode = "subscription" | "api_key";
|
|
47
|
+
interface ProviderAuth {
|
|
48
|
+
authMode: AuthMode;
|
|
49
|
+
apiKey?: string;
|
|
50
|
+
}
|
|
51
|
+
interface AuthConfig {
|
|
52
|
+
claude: ProviderAuth;
|
|
53
|
+
codex: ProviderAuth;
|
|
54
|
+
}
|
|
55
|
+
interface ModelConfig {
|
|
56
|
+
architect: string;
|
|
57
|
+
reviewer: string;
|
|
58
|
+
reporter: string;
|
|
59
|
+
developer: string;
|
|
60
|
+
tester: string;
|
|
61
|
+
orchestrator: string;
|
|
62
|
+
}
|
|
63
|
+
interface PipelineConfig {
|
|
64
|
+
auth: AuthConfig;
|
|
65
|
+
models: ModelConfig;
|
|
66
|
+
maxAgentTurns: number;
|
|
67
|
+
maxReviewRounds: number;
|
|
68
|
+
contextHandoffThreshold: number;
|
|
69
|
+
codexIdleTimeout: number;
|
|
70
|
+
permissionMode: string;
|
|
71
|
+
}
|
|
72
|
+
type PipelineEventType = "agent_start" | "agent_done" | "content_block" | "timeline" | "confirm_request" | "cleanup_request" | "pipeline_done" | "pipeline_error";
|
|
73
|
+
interface PipelineEvent {
|
|
74
|
+
type: PipelineEventType;
|
|
75
|
+
agent?: string;
|
|
76
|
+
phase?: string;
|
|
77
|
+
data?: unknown;
|
|
78
|
+
}
|
|
79
|
+
interface PreflightSummary {
|
|
80
|
+
workingDir: string;
|
|
81
|
+
mode: PipelineMode;
|
|
82
|
+
language: string;
|
|
83
|
+
requirement: string;
|
|
84
|
+
hasExistingFiles: boolean;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Orchestrator: TypeScript control flow driving the multi-agent pipeline.
|
|
89
|
+
*
|
|
90
|
+
* Agents are workers invoked by deterministic code. State transitions depend on
|
|
91
|
+
* agent outputs (reviewer PASS/FAIL, escalation diagnosis), not on LLM decisions.
|
|
92
|
+
*
|
|
93
|
+
* Claude agents use @anthropic-ai/claude-agent-sdk SDK (direct query sessions).
|
|
94
|
+
* Codex agents use @openai/codex-sdk (direct API, no MCP dispatcher).
|
|
95
|
+
*/
|
|
96
|
+
|
|
97
|
+
declare class PipelineStopped extends Error {
|
|
98
|
+
constructor();
|
|
99
|
+
}
|
|
100
|
+
declare class UserInterventionRequired extends Error {
|
|
101
|
+
phase: string;
|
|
102
|
+
diagnosis: string;
|
|
103
|
+
constructor(phase: string, diagnosis: string, reportsDir?: string);
|
|
104
|
+
}
|
|
105
|
+
declare class Orchestrator {
|
|
106
|
+
readonly requirement: string;
|
|
107
|
+
readonly mode: PipelineMode;
|
|
108
|
+
readonly lang: string;
|
|
109
|
+
readonly workingDir: string;
|
|
110
|
+
private config;
|
|
111
|
+
private sessions;
|
|
112
|
+
private reviewHistories;
|
|
113
|
+
private lastQueryTime;
|
|
114
|
+
private originalBranch;
|
|
115
|
+
private workBranch;
|
|
116
|
+
private onEvent;
|
|
117
|
+
private stopped;
|
|
118
|
+
private workflowRoot;
|
|
119
|
+
private knowledgeDir;
|
|
120
|
+
private modulesDir;
|
|
121
|
+
private runsDir;
|
|
122
|
+
private runId;
|
|
123
|
+
private runDir;
|
|
124
|
+
private specsDir;
|
|
125
|
+
private plansDir;
|
|
126
|
+
private reportsDir;
|
|
127
|
+
private handoffsDir;
|
|
128
|
+
private roundsDir;
|
|
129
|
+
private confirmCallback;
|
|
130
|
+
private cleanupCallback;
|
|
131
|
+
constructor(requirement: string, workingDir: string, mode?: PipelineMode, lang?: string);
|
|
132
|
+
setEventHandler(handler: (event: PipelineEvent) => void): void;
|
|
133
|
+
setConfirmCallback(cb: (summary: string, warning: string) => Promise<boolean>): void;
|
|
134
|
+
setCleanupCallback(cb: (crashed: RunInfo[]) => Promise<Record<string, "keep" | "discard">>): void;
|
|
135
|
+
stop(): void;
|
|
136
|
+
abort(): void;
|
|
137
|
+
/** Apply API keys from config to environment so SDKs pick them up. */
|
|
138
|
+
private applyAuth;
|
|
139
|
+
run(): Promise<string>;
|
|
140
|
+
private emit;
|
|
141
|
+
private checkStopped;
|
|
142
|
+
private phase;
|
|
143
|
+
private describeTool;
|
|
144
|
+
private emitContentBlocks;
|
|
145
|
+
private timeline;
|
|
146
|
+
private get langInstruction();
|
|
147
|
+
private cooldown;
|
|
148
|
+
private markQueryDone;
|
|
149
|
+
private initRunDir;
|
|
150
|
+
private writeManifest;
|
|
151
|
+
private updateRunsIndex;
|
|
152
|
+
private finalizeRun;
|
|
153
|
+
private scanCrashedRuns;
|
|
154
|
+
private collectCleanupDecisions;
|
|
155
|
+
private applyCleanupDecisions;
|
|
156
|
+
private askCleanup;
|
|
157
|
+
private discardRun;
|
|
158
|
+
private acknowledgeRun;
|
|
159
|
+
private removeFromIndex;
|
|
160
|
+
private updateIndexStatus;
|
|
161
|
+
private preflightConfirm;
|
|
162
|
+
private callClaude;
|
|
163
|
+
private handoffClaude;
|
|
164
|
+
/**
|
|
165
|
+
* Call a producer agent (developer/tester) using the SDK matching its
|
|
166
|
+
* configured model. Claude models → Claude Agent SDK, OpenAI models → Codex SDK.
|
|
167
|
+
*/
|
|
168
|
+
private callAgent;
|
|
169
|
+
private callCodexAgent;
|
|
170
|
+
private getOrCreateSession;
|
|
171
|
+
private archiveRound;
|
|
172
|
+
private reviewLoop;
|
|
173
|
+
private rollbackToArchitect;
|
|
174
|
+
private formatReviewHistory;
|
|
175
|
+
private report;
|
|
176
|
+
private extractExperience;
|
|
177
|
+
private hasKnowledge;
|
|
178
|
+
private initKnowledge;
|
|
179
|
+
private loadKnowledgeContext;
|
|
180
|
+
private updateKnowledge;
|
|
181
|
+
private runImplementation;
|
|
182
|
+
private finalize;
|
|
183
|
+
private runGreenfield;
|
|
184
|
+
private runFeature;
|
|
185
|
+
private runBugfix;
|
|
186
|
+
}
|
|
187
|
+
interface RunInfo {
|
|
188
|
+
runId: string;
|
|
189
|
+
mode: string;
|
|
190
|
+
branch: string;
|
|
191
|
+
requirement: string;
|
|
192
|
+
status: string;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
interface UserConfigFile {
|
|
196
|
+
lang?: string;
|
|
197
|
+
theme?: "dark" | "light" | "auto";
|
|
198
|
+
auth?: {
|
|
199
|
+
claude?: Partial<ProviderAuth>;
|
|
200
|
+
codex?: Partial<ProviderAuth>;
|
|
201
|
+
};
|
|
202
|
+
models?: Partial<ModelConfig>;
|
|
203
|
+
maxAgentTurns?: number;
|
|
204
|
+
maxReviewRounds?: number;
|
|
205
|
+
contextHandoffThreshold?: number;
|
|
206
|
+
codexIdleTimeout?: number;
|
|
207
|
+
permissionMode?: string;
|
|
208
|
+
}
|
|
209
|
+
/** Detect available auth sources for each provider. */
|
|
210
|
+
declare function detectAuthSources(): {
|
|
211
|
+
claude: {
|
|
212
|
+
hasEnvKey: boolean;
|
|
213
|
+
hasCli: boolean;
|
|
214
|
+
envKey?: string;
|
|
215
|
+
};
|
|
216
|
+
codex: {
|
|
217
|
+
hasEnvKey: boolean;
|
|
218
|
+
hasCli: boolean;
|
|
219
|
+
envKey?: string;
|
|
220
|
+
};
|
|
221
|
+
};
|
|
222
|
+
/** Check if first-run setup has been completed. */
|
|
223
|
+
declare function isConfigured(): boolean;
|
|
224
|
+
/** Save config to ~/.wildo/config.json. */
|
|
225
|
+
declare function saveConfig(config: UserConfigFile): void;
|
|
226
|
+
/**
|
|
227
|
+
* Load pipeline config: defaults ← ~/.wildo/config.json ← env vars.
|
|
228
|
+
* Later sources override earlier ones.
|
|
229
|
+
*/
|
|
230
|
+
declare function loadConfig(): Promise<PipelineConfig>;
|
|
231
|
+
|
|
232
|
+
export { type AgentConfig, type AgentRole, type AuthConfig, type AuthMode, type CleanupDecision, type EscalationReport, type EscalationType, type ModelConfig, Orchestrator, type PipelineConfig, type PipelineEvent, type PipelineEventType, type PipelineMode, PipelineStopped, type PreflightSummary, type ReviewResult, type ReviewVerdict, type RunManifest, type RunStatus, type SessionUsage, UserInterventionRequired, detectAuthSources, isConfigured, loadConfig, saveConfig };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{a as o,b as r,c as p}from"./chunk-JXNI22FR.js";import{b as e,c as i,h as t,i as n}from"./chunk-TMJX67JD.js";import"./chunk-TOAEOZEP.js";export{p as Orchestrator,o as PipelineStopped,r as UserInterventionRequired,e as detectAuthSources,i as isConfigured,n as loadConfig,t as saveConfig};
|
package/dist/server.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{a as w,b as S,c as v}from"./chunk-JXNI22FR.js";import{b as f,c as g,h as y,k as h}from"./chunk-TMJX67JD.js";import"./chunk-TOAEOZEP.js";import{createServer as x}from"http";import{execFile as R}from"child_process";import{readFileSync as j,existsSync as k}from"fs";import{join as P,extname as I}from"path";import{WebSocketServer as N,WebSocket as T}from"ws";var c=8420,J=P(import.meta.dirname,"..","static"),r=null,d=!1,u=new Set,a=null,l=null,b=null;function i(t){let e=JSON.stringify(t);for(let n of u)n.readyState===T.OPEN&&n.send(e)}var H={".html":"text/html; charset=utf-8",".css":"text/css",".js":"application/javascript",".json":"application/json",".png":"image/png",".svg":"image/svg+xml",".ico":"image/x-icon"},m=x((t,e)=>{if(t.url==="/api/setup/status"){e.writeHead(200,{"Content-Type":"application/json"}),e.end(JSON.stringify({configured:g(),sources:f()}));return}if(t.url==="/api/setup/save"&&t.method==="POST"){let p="";t.on("data",s=>{p+=s}),t.on("end",()=>{try{let s=JSON.parse(p);y({auth:s.auth,lang:s.lang}),e.writeHead(200,{"Content-Type":"application/json"}),e.end(JSON.stringify({ok:!0}))}catch{e.writeHead(400,{"Content-Type":"application/json"}),e.end(JSON.stringify({error:"Invalid JSON"}))}});return}if(t.url?.startsWith("/api/i18n")){let s=new URL(t.url,"http://localhost").searchParams.get("lang")??"en";e.writeHead(200,{"Content-Type":"application/json"}),e.end(JSON.stringify(h(s)));return}if(t.url==="/api/config"){e.writeHead(200,{"Content-Type":"application/json"}),e.end(JSON.stringify(b??{}));return}let n=t.url==="/"?"/index.html":t.url??"/index.html";if(n=n.split("?")[0],n=P(J,n),!k(n)){e.writeHead(404),e.end("Not Found");return}let o=I(n),C=H[o]??"application/octet-stream",O=j(n);e.writeHead(200,{"Content-Type":C}),e.end(O)}),_=new N({server:m});_.on("connection",t=>{u.add(t),t.on("close",()=>u.delete(t)),t.on("message",e=>{let n;try{n=JSON.parse(e.toString())}catch{return}let o=n.action;o==="start"?W(n):o==="confirm"?a&&(a(n.confirmed??!1),a=null):o==="cleanup"?l&&(l(n.decisions??{}),l=null):o==="stop"?r?.stop():o==="abort"&&r?.abort()})});async function M(t,e){return i({type:"confirm_request",summary:t,warning:e}),new Promise(n=>{a=n})}async function U(t){return i({type:"cleanup_request",crashed:t.map(e=>({run_id:e.runId,mode:e.mode,branch:e.branch,requirement:e.requirement,status:e.status}))}),new Promise(e=>{l=e})}async function W(t){if(d){i({type:"error",message:"Pipeline already running"});return}let e=t.mode??"greenfield";r=new v(t.requirement??"",t.working_dir??".",e,t.lang??"en"),r.setEventHandler(n=>i(n)),r.setConfirmCallback(M),r.setCleanupCallback(U),d=!0;try{let n=await r.run();i({type:"pipeline_done",report:n})}catch(n){n instanceof w?i({type:"pipeline_stopped"}):n instanceof S?i({type:"pipeline_paused",phase:n.phase,diagnosis:n.diagnosis}):i({type:"error",message:String(n)})}finally{d=!1}}function D(t){let e=process.platform==="darwin"?"open":process.platform==="win32"?"cmd":"xdg-open",n=process.platform==="win32"?["/c","start","",t]:[t];R(e,n,()=>{})}function K(t){b=t,m.listen(c,()=>{let e=`http://localhost:${c}`;console.log(`wildo pipeline UI: ${e}`),D(e)})}import.meta.url===`file://${process.argv[1]}`&&m.listen(c,()=>{console.log(`wildo pipeline UI: http://localhost:${c}`)});export{m as httpServer,K as startWebUI};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import{b as x,e as L,h as y,j as t}from"./chunk-TMJX67JD.js";import{a as r,d as k,e as A,f,g as l,h as $,i as d,j as M,k as w,m as D}from"./chunk-TOAEOZEP.js";async function P(e,s,o){f(s),process.stdout.write(`
|
|
2
|
+
`),o.hasCli&&d(`${r.green}${t(e,"setup_cli_found")}${r.reset}`),o.hasEnvKey&&d(`${r.green}${t(e,"setup_env_found")}${r.reset} ${r.darkGray}(${o.envKey})${r.reset}`),!o.hasCli&&!o.hasEnvKey&&w(t(e,"setup_no_auth")),process.stdout.write(`
|
|
3
|
+
`);let n=[];o.hasCli&&n.push({value:"subscription",label:t(e,"setup_opt_sub"),hint:t(e,"setup_opt_sub_hint")}),o.hasEnvKey&&n.push({value:"env_key",label:t(e,"setup_opt_env"),hint:o.envKey}),n.push({value:"new_key",label:t(e,"setup_opt_new")});let u=await l({message:`${s} auth`,options:n});if(u==="subscription")return{authMode:"subscription"};if(u==="env_key")return{authMode:"api_key"};let i=await $({message:t(e,"setup_enter_key"),placeholder:"sk-...",password:!0});return i?{authMode:"api_key",apiKey:i}:(w(t(e,"setup_no_key")),{authMode:"api_key"})}var O=["opus","sonnet","haiku"],S=["gpt-5.4"],z=[...O,...S],H={architect:"opus",reviewer:"sonnet",reporter:"haiku",developer:"gpt-5.4",tester:"gpt-5.4"},I={architect:"opus",reviewer:"sonnet",reporter:"haiku",developer:"opus",tester:"opus"},N={architect:"gpt-5.4",reviewer:"gpt-5.4",reporter:"gpt-5.4",developer:"gpt-5.4",tester:"gpt-5.4"},R={en:{architect:"Architect",reviewer:"Reviewer",reporter:"Reporter",developer:"Developer",tester:"Tester"},zh:{architect:"\u67B6\u6784\u5E08",reviewer:"\u8BC4\u5BA1\u5458",reporter:"\u62A5\u544A\u5458",developer:"\u5F00\u53D1\u8005",tester:"\u6D4B\u8BD5\u5458"}};async function X(e="en",s){let o=L(),n=s==="claude_only"?I:s==="codex_only"?N:H,u=s==="claude_only"?O:s==="codex_only"?S:z;process.stdout.write(`
|
|
4
|
+
`),f(t(e,"setup_model_title")),process.stdout.write(`
|
|
5
|
+
`);let i=["architect","reviewer","reporter","developer","tester"],p={},_=t(e,"setup_model_default"),g=t(e,"setup_model_current");for(let a of i){let K=(R[e]??R.en)[a]??a,b=n[a],h=o[a],U=h??b,v=[];for(let c of u){let C=h&&c===h,E=c===b,T=C&&E?`${_} \xB7 ${g}`:C?g:E?_:void 0;v.push({value:c,label:c,hint:T})}let m=v.findIndex(c=>c.value===U),F=await l({message:t(e,"setup_model_msg",{role:K}),options:v,initial:m>=0?m:0});p[a]=F}y({models:p}),process.stdout.write(`
|
|
6
|
+
`),M(t(e,"setup_model_done"))}async function G(){process.stdout.write(`
|
|
7
|
+
`),k();let e=await l({message:"Language / \u8BED\u8A00",options:[{value:"en",label:"English"},{value:"zh",label:"\u4E2D\u6587"}]});e==="zh"&&(process.stdout.write(`
|
|
8
|
+
`),A()),process.stdout.write(`
|
|
9
|
+
`),d(t(e,"setup_intro")),process.stdout.write(`
|
|
10
|
+
`);let s=await l({message:t(e,"setup_provider_msg"),options:[{value:"both",label:t(e,"setup_provider_both"),hint:t(e,"setup_provider_both_hint")},{value:"claude_only",label:t(e,"setup_provider_claude_only"),hint:t(e,"setup_provider_claude_only_hint")},{value:"codex_only",label:t(e,"setup_provider_codex_only"),hint:t(e,"setup_provider_codex_only_hint")}]});process.stdout.write(`
|
|
11
|
+
`);let o=x(),n=s!=="codex_only",u=s!=="claude_only",i={authMode:"subscription"},p={authMode:"subscription"};n&&(i=await P(e,"Claude",o.claude),process.stdout.write(`
|
|
12
|
+
`)),u&&(p=await P(e,"Codex",o.codex),process.stdout.write(`
|
|
13
|
+
`)),y({lang:e,auth:{claude:i,codex:p}}),await X(e,s==="both"?void 0:s),process.stdout.write(`
|
|
14
|
+
`),D(t(e,"setup_saved"),`${r.gray}${t(e,"setup_reconfig")}${r.reset}`)}export{G as runSetup,X as setupModels};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u}from"./chunk-TOAEOZEP.js";export{b as ROLE_COLORS,p as agentCard,o as agentStatus,d as banner,e as bannerZh,a as c,t as configSummary,f as divider,l as error,i as info,m as note,n as phaseHeader,s as pipelineDone,q as reviewRound,c as roleColor,g as select,u as spinner,r as statusLine,j as success,h as text,k as warn};
|
package/package.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@oberion/wildo",
|
|
3
|
+
"version": "0.6.4",
|
|
4
|
+
"description": "Will do! Multi-agent pipeline — AI agents at your service",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"wildo": "./dist/cli.js"
|
|
8
|
+
},
|
|
9
|
+
"main": "./dist/index.js",
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"files": [
|
|
12
|
+
"dist",
|
|
13
|
+
"personas",
|
|
14
|
+
"static"
|
|
15
|
+
],
|
|
16
|
+
"scripts": {
|
|
17
|
+
"build": "tsup",
|
|
18
|
+
"dev": "tsup --watch",
|
|
19
|
+
"start": "node dist/cli.js"
|
|
20
|
+
},
|
|
21
|
+
"keywords": [
|
|
22
|
+
"wildo",
|
|
23
|
+
"ai",
|
|
24
|
+
"agents",
|
|
25
|
+
"claude",
|
|
26
|
+
"codex",
|
|
27
|
+
"pipeline"
|
|
28
|
+
],
|
|
29
|
+
"license": "MIT",
|
|
30
|
+
"engines": {
|
|
31
|
+
"node": ">=18.0.0"
|
|
32
|
+
},
|
|
33
|
+
"dependencies": {
|
|
34
|
+
"@anthropic-ai/claude-agent-sdk": "^0.2.0",
|
|
35
|
+
"@openai/codex-sdk": "^0.120.0",
|
|
36
|
+
"commander": "^13.0.0",
|
|
37
|
+
"ws": "^8.18.0"
|
|
38
|
+
},
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@types/node": "^22.0.0",
|
|
41
|
+
"@types/ws": "^8.5.0",
|
|
42
|
+
"tsup": "^8.0.0",
|
|
43
|
+
"typescript": "^5.7.0"
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Architect Playbook
|
|
2
|
+
|
|
3
|
+
You are a senior software architect. Your job is to turn user requirements
|
|
4
|
+
into clear, actionable specification documents.
|
|
5
|
+
|
|
6
|
+
## Spec Structure
|
|
7
|
+
|
|
8
|
+
Your spec must define:
|
|
9
|
+
- Functional requirements (what the system does)
|
|
10
|
+
- Non-functional requirements (performance, security, scalability)
|
|
11
|
+
- System components and their interfaces
|
|
12
|
+
- Data models and API contracts
|
|
13
|
+
- Acceptance criteria for each feature
|
|
14
|
+
|
|
15
|
+
Output spec to: `workflow/specs/spec.md`
|
|
16
|
+
|
|
17
|
+
## Design Process
|
|
18
|
+
|
|
19
|
+
### 1. Requirements Analysis
|
|
20
|
+
- Decompose the requirement into functional and non-functional aspects
|
|
21
|
+
- Identify implicit requirements the user didn't state but will expect
|
|
22
|
+
- Define acceptance criteria that are testable and unambiguous
|
|
23
|
+
|
|
24
|
+
### 2. Component Design
|
|
25
|
+
- Define clear component boundaries and responsibilities
|
|
26
|
+
- Specify interfaces and contracts precisely — ambiguity causes cascading errors downstream
|
|
27
|
+
- Document data flow between components
|
|
28
|
+
|
|
29
|
+
### 3. Trade-Off Analysis
|
|
30
|
+
For each significant design decision, document:
|
|
31
|
+
- **Decision**: What was chosen
|
|
32
|
+
- **Rationale**: Why this option
|
|
33
|
+
- **Alternatives considered**: What else was evaluated
|
|
34
|
+
- **Trade-offs**: What we gain vs. what we give up
|
|
35
|
+
|
|
36
|
+
## System Design Checklist
|
|
37
|
+
|
|
38
|
+
### Functional
|
|
39
|
+
- [ ] All user stories documented
|
|
40
|
+
- [ ] API contracts defined (endpoints, request/response shapes)
|
|
41
|
+
- [ ] Data models specified (entities, relationships, constraints)
|
|
42
|
+
- [ ] Error cases and edge conditions covered
|
|
43
|
+
|
|
44
|
+
### Non-Functional
|
|
45
|
+
- [ ] Performance targets defined (latency, throughput)
|
|
46
|
+
- [ ] Security requirements identified (auth, data protection)
|
|
47
|
+
- [ ] Scalability considerations noted (if relevant)
|
|
48
|
+
|
|
49
|
+
### Technical
|
|
50
|
+
- [ ] Component responsibilities defined
|
|
51
|
+
- [ ] Data flow documented
|
|
52
|
+
- [ ] Integration points identified
|
|
53
|
+
- [ ] Testing strategy outlined (what to test, how)
|
|
54
|
+
|
|
55
|
+
### Operations
|
|
56
|
+
- [ ] Deployment considerations noted
|
|
57
|
+
- [ ] Monitoring requirements identified (if applicable)
|
|
58
|
+
|
|
59
|
+
## Rules
|
|
60
|
+
|
|
61
|
+
- Define WHAT to build, not HOW to implement it. Leave implementation decisions to the developer.
|
|
62
|
+
- Be precise on interfaces and contracts — ambiguity causes cascading errors downstream.
|
|
63
|
+
- Every requirement must have a testable acceptance criterion.
|
|
64
|
+
- When you receive review feedback, address each point specifically. If you disagree, explain your reasoning rather than silently ignoring it.
|
|
65
|
+
|
|
66
|
+
## Red Flags — Avoid These
|
|
67
|
+
|
|
68
|
+
- **Over-specification**: Prescribing frameworks, algorithms, or implementation details
|
|
69
|
+
- **Vague criteria**: "should be fast", "user-friendly", "scalable" without measurable targets
|
|
70
|
+
- **Missing error cases**: Only describing happy paths
|
|
71
|
+
- **Assumed context**: Relying on knowledge the developer won't have
|
|
72
|
+
- **God component**: One component doing everything
|
|
73
|
+
- **Tight coupling**: Components with circular or deep dependencies
|
|
74
|
+
- **Premature optimization**: Adding complexity for hypothetical scale
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Developer Playbook
|
|
2
|
+
|
|
3
|
+
You are a senior software developer. Your job is to implement production-quality
|
|
4
|
+
code based on specifications and architecture plans.
|
|
5
|
+
|
|
6
|
+
## Approach
|
|
7
|
+
|
|
8
|
+
- Read the spec and plan thoroughly before writing code
|
|
9
|
+
- Follow the project's existing conventions (language, style, structure)
|
|
10
|
+
- Write clean, well-structured code — no dead code, no TODO placeholders
|
|
11
|
+
- Handle errors at system boundaries; trust internal invariants
|
|
12
|
+
- Use existing dependencies; only add new ones when justified by the spec
|
|
13
|
+
|
|
14
|
+
## Output
|
|
15
|
+
|
|
16
|
+
- Create/modify files as specified in the plan
|
|
17
|
+
- Ensure the code compiles/runs without errors
|
|
18
|
+
- Run any build commands to verify your work
|
|
19
|
+
- If a test suite exists, make sure existing tests still pass
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Reporter Playbook
|
|
2
|
+
|
|
3
|
+
You are a technical report writer. After each development phase completes,
|
|
4
|
+
you produce a concise phase report.
|
|
5
|
+
|
|
6
|
+
## Report Structure
|
|
7
|
+
|
|
8
|
+
```
|
|
9
|
+
# Phase Report: [Phase Name]
|
|
10
|
+
Date: [current date]
|
|
11
|
+
|
|
12
|
+
## Summary
|
|
13
|
+
[2-3 sentences: what was accomplished]
|
|
14
|
+
|
|
15
|
+
## Key Decisions
|
|
16
|
+
- [Decision]: [Rationale]
|
|
17
|
+
|
|
18
|
+
## Artifacts Produced
|
|
19
|
+
- [file path]: [description]
|
|
20
|
+
|
|
21
|
+
## Review Rounds
|
|
22
|
+
- Round N: [PASS/FAIL] - [key feedback points]
|
|
23
|
+
|
|
24
|
+
## Issues & Risks
|
|
25
|
+
- [Any unresolved concerns]
|
|
26
|
+
|
|
27
|
+
## Next Phase
|
|
28
|
+
- [What comes next]
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Output reports to: `workflow/reports/`
|
|
32
|
+
|
|
33
|
+
## Rules
|
|
34
|
+
|
|
35
|
+
- Be factual, not promotional. Report what happened, not what should have happened.
|
|
36
|
+
- Include review round history — how many rounds, what changed.
|
|
37
|
+
- Keep it under 100 lines.
|
|
38
|
+
- Use the actual review history data provided. Do not fabricate round details.
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Reviewer Playbook
|
|
2
|
+
|
|
3
|
+
You are a rigorous technical reviewer. You review artifacts produced by other
|
|
4
|
+
agents at each phase of the development lifecycle.
|
|
5
|
+
|
|
6
|
+
## Review Process
|
|
7
|
+
|
|
8
|
+
1. Read the artifact being reviewed
|
|
9
|
+
2. Read the relevant spec/plan for context: `workflow/specs/`, `workflow/plans/`
|
|
10
|
+
3. Evaluate against the criteria provided in each review request
|
|
11
|
+
4. Output a structured verdict
|
|
12
|
+
|
|
13
|
+
## Severity Levels
|
|
14
|
+
|
|
15
|
+
| Level | Meaning | Blocking? |
|
|
16
|
+
|-------|---------|-----------|
|
|
17
|
+
| CRITICAL | System failure, data loss, security breach, or spec violation | Yes |
|
|
18
|
+
| HIGH | Incorrect behavior, missing requirement, contract mismatch | Yes |
|
|
19
|
+
| MEDIUM | Quality issue, will cause problems later | No |
|
|
20
|
+
| LOW | Style, convention, minor improvement | No |
|
|
21
|
+
|
|
22
|
+
## Confidence-Based Filtering
|
|
23
|
+
|
|
24
|
+
- Report only when >80% confident it is a real issue
|
|
25
|
+
- Do NOT flood with noise or stylistic preferences
|
|
26
|
+
- Consolidate similar issues (e.g., "5 endpoints missing validation" not 5 separate findings)
|
|
27
|
+
- Prioritize issues that could cause bugs, security vulnerabilities, or data loss
|
|
28
|
+
|
|
29
|
+
## Review Dimensions
|
|
30
|
+
|
|
31
|
+
### Spec Review (Architecture Phase)
|
|
32
|
+
- Requirements completeness: all user needs covered?
|
|
33
|
+
- Acceptance criteria: testable and unambiguous?
|
|
34
|
+
- Interface precision: contracts specific enough for implementation?
|
|
35
|
+
- No implementation details leaking into spec
|
|
36
|
+
|
|
37
|
+
### Code Review (Implementation Phase)
|
|
38
|
+
|
|
39
|
+
#### Security — flag immediately
|
|
40
|
+
| Pattern | Severity |
|
|
41
|
+
|---------|----------|
|
|
42
|
+
| Hardcoded credentials (API keys, passwords, tokens) | CRITICAL |
|
|
43
|
+
| SQL injection (string concatenation in queries) | CRITICAL |
|
|
44
|
+
| Command injection (user input in shell commands) | CRITICAL |
|
|
45
|
+
| Unvalidated user input at system boundaries | HIGH |
|
|
46
|
+
| Missing auth checks on protected routes | CRITICAL |
|
|
47
|
+
| Sensitive data in logs (tokens, PII) | HIGH |
|
|
48
|
+
|
|
49
|
+
#### Silent Failures — often missed
|
|
50
|
+
| Pattern | Severity |
|
|
51
|
+
|---------|----------|
|
|
52
|
+
| Empty catch blocks (`catch {}` or `catch { }`) | HIGH |
|
|
53
|
+
| Catch returning empty default (`.catch(() => [])`) | MEDIUM |
|
|
54
|
+
| Error logged but never handled | MEDIUM |
|
|
55
|
+
| Missing timeout on external calls | MEDIUM |
|
|
56
|
+
| Transactions without rollback on error | HIGH |
|
|
57
|
+
|
|
58
|
+
#### Performance
|
|
59
|
+
| Pattern | Severity |
|
|
60
|
+
|---------|----------|
|
|
61
|
+
| N+1 queries (fetch in loop) | HIGH |
|
|
62
|
+
| Unbounded queries (no LIMIT on user-facing endpoints) | MEDIUM |
|
|
63
|
+
| O(n^2) when O(n) is possible | MEDIUM |
|
|
64
|
+
| Missing cleanup (event listeners, timers, subscriptions) | MEDIUM |
|
|
65
|
+
|
|
66
|
+
### Test Review (Testing Phase)
|
|
67
|
+
- Coverage: every acceptance criterion has tests?
|
|
68
|
+
- Edge cases: null, empty, invalid, boundary values?
|
|
69
|
+
- Tests verify behavior, not implementation details?
|
|
70
|
+
- Tests are independent (no shared mutable state)?
|
|
71
|
+
|
|
72
|
+
## Common False Positives — Do NOT Flag
|
|
73
|
+
|
|
74
|
+
- Environment variables in `.env.example` (not actual secrets)
|
|
75
|
+
- Test credentials in test files (if clearly marked)
|
|
76
|
+
- SHA256/MD5 used for checksums (not passwords)
|
|
77
|
+
- Simplified error handling in prototype/MVP scope
|
|
78
|
+
|
|
79
|
+
Always verify context before flagging.
|
|
80
|
+
|
|
81
|
+
## Output Format (strictly follow this)
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
## Review: [artifact name]
|
|
85
|
+
|
|
86
|
+
### Verdict: PASS | FAIL
|
|
87
|
+
|
|
88
|
+
### Criteria Results:
|
|
89
|
+
- [Criterion 1]: PASS | FAIL
|
|
90
|
+
- [If FAIL: severity, specific issue, exact location, what's wrong, what's expected]
|
|
91
|
+
- [Criterion 2]: PASS | FAIL
|
|
92
|
+
...
|
|
93
|
+
|
|
94
|
+
### Blocking Issues (if FAIL):
|
|
95
|
+
1. [CRITICAL/HIGH] [File/section]: [Exact problem description]
|
|
96
|
+
2. ...
|
|
97
|
+
|
|
98
|
+
### Suggestions (non-blocking):
|
|
99
|
+
- [MEDIUM/LOW] ...
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Rules
|
|
103
|
+
|
|
104
|
+
- Never give a PASS when there are blocking issues. Be honest, not lenient.
|
|
105
|
+
- Every FAIL must include: what's wrong, where exactly, and what the correct state should be.
|
|
106
|
+
- "Looks good overall" is not acceptable feedback. Be specific.
|
|
107
|
+
- You are NOT the author. Do not rationalize problems away.
|
|
108
|
+
- If the same issue persists across rounds, escalate its severity.
|
|
109
|
+
- Verify the artifact actually fulfills the criteria, don't just check format.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Tester Playbook
|
|
2
|
+
|
|
3
|
+
You are a senior QA engineer. Your job is to write and execute comprehensive
|
|
4
|
+
tests that verify the implementation meets the specification.
|
|
5
|
+
|
|
6
|
+
## Approach
|
|
7
|
+
|
|
8
|
+
- Read the spec to understand acceptance criteria and edge cases
|
|
9
|
+
- Read the implementation to understand what to test
|
|
10
|
+
- Write tests that cover: happy path, edge cases, error scenarios
|
|
11
|
+
- Use the project's existing test framework and conventions
|
|
12
|
+
- If no test framework exists, choose one appropriate for the language/stack
|
|
13
|
+
|
|
14
|
+
## Output
|
|
15
|
+
|
|
16
|
+
- Create test files following project conventions
|
|
17
|
+
- Execute all tests and report results
|
|
18
|
+
- If tests fail, report the failures clearly — do not silently skip
|
|
19
|
+
- Ensure both new and existing tests pass
|